generate-cyclonedx 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443
  1. #!/usr/bin/env python3
  2. # SPDX-License-Identifier: GPL-2.0-or-later
  3. # This script converts the output of the show-info make target
  4. # to CycloneDX format.
  5. #
  6. # Example usage:
  7. # $ make show-info | utils/generate-cyclonedx > sbom.json
  8. import argparse
  9. import bz2
  10. import gzip
  11. import json
  12. import os
  13. from pathlib import Path
  14. import urllib.request
  15. import subprocess
  16. import sys
  17. import re
  18. CYCLONEDX_VERSION = "1.6"
  19. SPDX_SCHEMA_URL = f"https://raw.githubusercontent.com/CycloneDX/specification/{CYCLONEDX_VERSION}/schema/spdx.schema.json"
  20. brpath = Path(__file__).parent.parent
  21. cyclonedxpath = Path(os.getenv("BR2_DL_DIR", brpath / "dl")) / "cyclonedx"
  22. SPDX_SCHEMA_PATH = cyclonedxpath / f"spdx-{CYCLONEDX_VERSION}.schema.json"
  23. BR2_VERSION_FULL = (
  24. subprocess.check_output(
  25. ["make", "--no-print-directory", "-C", brpath, "print-version"]
  26. )
  27. .decode()
  28. .strip()
  29. )
  30. # Set of vulnerabilities that were addressed by a patch present in buildroot
  31. # tree. This set is used to set the analysis of the ignored CVEs to
  32. # 'resolved_with_pedigree'.
  33. VULN_WITH_PEDIGREE = set()
  34. SPDX_LICENSES = []
  35. if not SPDX_SCHEMA_PATH.exists():
  36. # Download the CycloneDX SPDX schema JSON, and cache it locally
  37. cyclonedxpath.mkdir(parents=True, exist_ok=True)
  38. urllib.request.urlretrieve(SPDX_SCHEMA_URL, SPDX_SCHEMA_PATH)
  39. try:
  40. with SPDX_SCHEMA_PATH.open() as f:
  41. SPDX_LICENSES = json.load(f).get("enum", [])
  42. except json.JSONDecodeError:
  43. # In case of error the license will just not be matched to the SPDX names
  44. # but the SBOM generation still work.
  45. print(f"Failed to load the SPDX licenses file: {SPDX_SCHEMA_PATH}", file=sys.stderr)
  46. def split_top_level_comma(subj):
  47. """Split a string at comma's, but do not split at comma's in between parentheses.
  48. Args:
  49. subj (str): String to be split.
  50. Returns:
  51. list: A list of substrings
  52. """
  53. counter = 0
  54. substring = ""
  55. for char in subj:
  56. if char == "," and counter == 0:
  57. yield substring
  58. substring = ""
  59. else:
  60. if char == "(":
  61. counter += 1
  62. elif char == ")":
  63. counter -= 1
  64. substring += char
  65. yield substring
  66. def cyclonedx_license(lic):
  67. """Given the name of a license, create an individual entry in
  68. CycloneDX format. In CycloneDX, the 'id' keyword is used for
  69. names that are recognized as SPDX License abbreviations. All other
  70. license names are placed under the 'name' keyword.
  71. Args:
  72. lic (str): Name of the license
  73. Returns:
  74. dict: An entry for the license in CycloneDX format.
  75. """
  76. key = "id" if lic in SPDX_LICENSES else "name"
  77. return {
  78. key: lic,
  79. }
  80. def cyclonedx_licenses(lic_list):
  81. """Create a licenses list formatted for a CycloneDX component
  82. Args:
  83. lic_list (str): A comma separated list of license names.
  84. Returns:
  85. dict: A dictionary with license information for the component,
  86. in CycloneDX format.
  87. """
  88. return {
  89. "licenses": [
  90. {"license": cyclonedx_license(lic.strip())} for lic in split_top_level_comma(lic_list)
  91. ]
  92. }
  93. def extract_cves_from_header(header: str) -> list[str]:
  94. """Extract CVE identifiers from the patch header.
  95. Args:
  96. header (str): Content of the header of a patch.
  97. Returns:
  98. list: Array of CVE identifier present in a patch header passed as
  99. argument.
  100. """
  101. PATCH_CVE_HEADER = "CVE: "
  102. return [
  103. line.partition(PATCH_CVE_HEADER)[2].strip()
  104. for line in header.splitlines()
  105. if line.startswith(PATCH_CVE_HEADER)
  106. ]
  107. def patch_retrieve_header(content: str) -> str:
  108. """Read the content of a patch and split the header from the content.
  109. Args:
  110. content (str): Patch content.
  111. Returns:
  112. str: Patch header content.
  113. """
  114. DIFF_LINE_REGEX = re.compile(r"^diff\s+(?:--git|-[-\w]+)\s+(\S+)\s+(\S+)$")
  115. INDEX_LINE_REGEX = re.compile(r"^Index:\s+(\S+)$")
  116. lines = content.split('\n')
  117. header = []
  118. for i, line in enumerate(lines):
  119. if DIFF_LINE_REGEX.match(line):
  120. # diff --git a/configure.ac b/configure.ac
  121. # index 1234..1234 100644
  122. # --- a/configure.ac
  123. # +++ b/configure.ac
  124. break
  125. elif INDEX_LINE_REGEX.match(line):
  126. # Index: <filename>
  127. # --- <filename>
  128. # +++ <filename>
  129. if i < len(lines) - 2 and lines[i + 1].startswith("===") and lines[i + 2].startswith("---"):
  130. break
  131. elif line.startswith("---"):
  132. # Some patches don't have a 'diff' tag just the --- +++ tuple.
  133. # Check next line is starting with '+++'
  134. # ex: package/berkeleydb/0001-cwd-db_config.patch
  135. if i < len(lines) - 2 and lines[i + 1].startswith("+++") and lines[i + 2].startswith("@@"):
  136. break
  137. else:
  138. header.append(line)
  139. return '\n'.join(header)
  140. def read_patch_file(patch_path: Path) -> str:
  141. """Read the content of a patch file, handling compression.
  142. Args:
  143. patch_path (Path): Patch path.
  144. Returns:
  145. str: Patch content.
  146. """
  147. if patch_path.suffix == ".gz":
  148. f = gzip.open(patch_path, mode="rt")
  149. elif patch_path.suffix == ".bz":
  150. f = bz2.open(patch_path, mode="rt")
  151. else:
  152. f = open(patch_path)
  153. content = f.read()
  154. f.close()
  155. return content
  156. def cyclonedx_patches(patch_list: list[str]):
  157. """Translate a list of patches from the show-info JSON to a list of
  158. patches in CycloneDX format.
  159. Args:
  160. patch_list (list): Array of patch relative paths for a given component.
  161. Returns:
  162. dict: Patch information in CycloneDX format.
  163. """
  164. patch_contents = []
  165. for patch in patch_list:
  166. patch_path = brpath / patch
  167. if patch_path.exists():
  168. try:
  169. content = read_patch_file(patch_path)
  170. except Exception:
  171. # If the patch can't be read it won't be added to
  172. # the resulting SBOM.
  173. print(f"Failed to handle patch: {patch}", file=sys.stderr)
  174. continue
  175. header = patch_retrieve_header(content)
  176. issue = {}
  177. cves = extract_cves_from_header(header)
  178. if cves:
  179. VULN_WITH_PEDIGREE.update(cves)
  180. issue = {
  181. "resolves": [
  182. {
  183. "type": "security",
  184. "name": cve
  185. } for cve in cves
  186. ]
  187. }
  188. patch_contents.append({
  189. "diff": {
  190. "text": {
  191. "content": content
  192. }
  193. },
  194. **issue
  195. })
  196. else:
  197. # If the patch is not a file it's a tarball or diff url passed
  198. # through the `<pkg-name>_PATCH` variable.
  199. patch_contents.append({
  200. "diff": {
  201. "url": patch
  202. }
  203. })
  204. return {
  205. "pedigree": {
  206. "patches": [{
  207. "type": "unofficial",
  208. **content
  209. } for content in patch_contents]
  210. },
  211. }
  212. def cyclonedx_component(name, comp):
  213. """Translate a component from the show-info output, to a component entry in CycloneDX format.
  214. Args:
  215. name (str): Key used for the package in the show-info output.
  216. comp (dict): Data about the package as a Python dictionary.
  217. Returns:
  218. dict: Component information in CycloneDX format.
  219. """
  220. return {
  221. "bom-ref": name,
  222. "type": "library",
  223. **({
  224. "name": comp["name"],
  225. } if "name" in comp else {}),
  226. **({
  227. "version": comp["version"],
  228. **(cyclonedx_licenses(comp["licenses"]) if "licenses" in comp else {}),
  229. } if not comp["virtual"] else {}),
  230. **({
  231. "cpe": comp["cpe-id"],
  232. } if "cpe-id" in comp else {}),
  233. **(cyclonedx_patches(comp["patches"]) if comp.get("patches") else {}),
  234. "properties": [{
  235. "name": "BR_TYPE",
  236. "value": comp["type"],
  237. }],
  238. }
  239. def cyclonedx_dependency(ref, depends):
  240. """Create JSON for dependency relationships between components.
  241. Args:
  242. ref (str): reference to a component bom-ref.
  243. depends (list): array of component bom-ref identifier to create the dependencies.
  244. Returns:
  245. dict: Dependency information in CycloneDX format.
  246. """
  247. return {
  248. "ref": ref,
  249. "dependsOn": sorted(depends),
  250. }
  251. def cyclonedx_vulnerabilities(show_info_dict):
  252. """Create a JSON list of vulnerabilities ignored by buildroot and associate
  253. the component for which they are solved.
  254. Args:
  255. show_info_dict (dict): The JSON output of the show-info
  256. command, parsed into a Python dictionary.
  257. Returns:
  258. list: Solved vulnerabilities list in CycloneDX format.
  259. """
  260. cves = {}
  261. for name, comp in show_info_dict.items():
  262. for cve in comp.get('ignore_cves', []):
  263. cves.setdefault(cve, []).append(name)
  264. return [{
  265. "id": cve,
  266. "analysis": {
  267. "state": "resolved_with_pedigree" if cve in VULN_WITH_PEDIGREE else "in_triage",
  268. "detail": f"The CVE '{cve}' has been marked as ignored by Buildroot"
  269. },
  270. "affects": [
  271. {"ref": bomref} for bomref in components
  272. ]
  273. } for cve, components in cves.items()]
  274. def br2_parse_deps_recursively(ref, show_info_dict, virtual=False, deps=[]):
  275. """Parse dependencies from the show-info output. This function will
  276. recursively collect all dependencies, and return a list where each dependency
  277. is stated at most once.
  278. The dependency on virtual package will collect the final dependency without
  279. including the virtual one.
  280. Args:
  281. ref (str): The identifier of the package for which the dependencies have
  282. to be looked up.
  283. show_info_dict (dict): The JSON output of the show-info
  284. command, parsed into a Python dictionary.
  285. Kwargs:
  286. deps (list): A list, to which dependencies will be appended. If set to None,
  287. a new empty list will be created. Defaults to None.
  288. Returns:
  289. list: A list of dependencies of the 'ref' package.
  290. """
  291. for dep in show_info_dict.get(ref, {}).get("dependencies", []):
  292. if dep not in deps:
  293. if virtual or show_info_dict.get(dep, {}).get("virtual") is False:
  294. deps.append(dep)
  295. br2_parse_deps_recursively(dep, show_info_dict, virtual, deps)
  296. return deps
  297. def main():
  298. parser = argparse.ArgumentParser(
  299. description='''Create a CycloneDX SBoM for the Buildroot configuration.
  300. Example usage: make show-info | utils/generate-cyclonedx > sbom.json
  301. '''
  302. )
  303. parser.add_argument("-i", "--in-file", nargs="?", type=argparse.FileType("r"),
  304. default=(None if sys.stdin.isatty() else sys.stdin))
  305. parser.add_argument("-o", "--out-file", nargs="?", type=argparse.FileType("w"),
  306. default=sys.stdout)
  307. parser.add_argument("--virtual", default=False, action='store_true',
  308. help="This option includes virtual packages to the CycloneDX output")
  309. parser.add_argument("--project-name", type=str, default="buildroot",
  310. help="Specify the project name to use in the SBOM metadata (default:'buildroot')")
  311. parser.add_argument("--project-version", type=str, default=f"{BR2_VERSION_FULL}",
  312. help="Specify the project version to use in the SBOM metadata (default: builroot version)")
  313. args = parser.parse_args()
  314. if args.in_file is None:
  315. parser.print_help()
  316. sys.exit(1)
  317. show_info_dict = json.load(args.in_file)
  318. # Remove rootfs and virtual packages if not explicitly included
  319. # from the cli arguments
  320. filtered_show_info_dict = {k: v for k, v in show_info_dict.items()
  321. if ("rootfs" not in v["type"]) and (args.virtual or v["virtual"] is False)}
  322. cyclonedx_dict = {
  323. "bomFormat": "CycloneDX",
  324. "$schema": f"http://cyclonedx.org/schema/bom-{CYCLONEDX_VERSION}.schema.json",
  325. "specVersion": f"{CYCLONEDX_VERSION}",
  326. "metadata": {
  327. "component": {
  328. "bom-ref": args.project_name,
  329. "name": args.project_name,
  330. "version": args.project_version,
  331. "type": "firmware",
  332. },
  333. "tools": {
  334. "components": [
  335. {
  336. "type": "application",
  337. "name": "Buildroot generate-cyclonedx",
  338. "version": f"{BR2_VERSION_FULL}",
  339. "licenses": [
  340. {
  341. "license": {
  342. "id": "GPL-2.0"
  343. }
  344. }
  345. ]
  346. }
  347. ],
  348. }
  349. },
  350. "components": [
  351. cyclonedx_component(name, comp) for name, comp in filtered_show_info_dict.items()
  352. ],
  353. "dependencies": [
  354. cyclonedx_dependency("buildroot", list(filtered_show_info_dict)),
  355. *[cyclonedx_dependency(ref, br2_parse_deps_recursively(ref, show_info_dict, args.virtual))
  356. for ref in filtered_show_info_dict],
  357. ],
  358. "vulnerabilities": cyclonedx_vulnerabilities(show_info_dict),
  359. }
  360. args.out_file.write(json.dumps(cyclonedx_dict, indent=2))
  361. args.out_file.write('\n')
  362. if __name__ == "__main__":
  363. main()