Skip to content

Commit 8153bbd

Browse files
committed
feat(bom): map and createreleases consider PURL qualifiers
This adds support for PURLs with qualifiers, introducing the following semantics: Only qualifiers specified in the BOM are compared. If entries are found where all of them match, only those are returned.
1 parent fc02a55 commit 8153bbd

13 files changed

+311
-30
lines changed

ChangeLog.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
## NEXT
99

1010
* `bom map`: The options `--dbx` and `-all` were replaced by `--matchmode`.
11-
* `bom map --matchmode full-search` allows full search for the best possible
12-
matches and report all of them, see the discussion in `Readme_Mapping.md`.
11+
* `bom map`: new `--matchmode` options `full-search` (report all best matches) and
12+
`qualifier-match` (consider PackageURL qualifiers). See `Readme_Mapping.md`.
1313

1414
## 2.9.1
1515

Readme_Mapping.md

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,20 +45,36 @@ and version etc.
4545

4646
## Notes on id mapping / PackageURL mapping
4747

48-
CaPyCli supports mapping releases by the PackageURL. As encoding of a
49-
PackageURL is not unique (some characters *may* use URL encoding, qualifiers
48+
CaPyCli supports mapping **releases** by the PackageURL. As encoding of a
49+
PackageURL is not unique (some characters may be percent-encoded, qualifiers
5050
can be given in random order etc.), we can't just do a string comparison, but
5151
instead *all* SW360 releases with PackageURLs (using external id `package-url`)
5252
are retrieved and decoded. When your input BOM specifies a `purl` field, then
5353
the PackageURL is compared field by field (type, namespace, name, version) for
5454
a `FULL_MATCH_BY_ID`.
5555

56-
Also, components will be mapped by PackageURL and if a match is found, the
56+
Also, **components** will be mapped by PackageURL and if a match is found, the
5757
`capycli:componentId` property will be added to the output BOM item. Components
5858
can be identified directly by their external id `package-url` or as fallback
5959
also by the `package-url`s of their releases.
6060

61-
PackageURL subpath and qualifiers are currently ignored during PURL matching.
61+
PackageURL **qualifiers** (like `?distro=alpine-3.21&package-id=3a23`) will be
62+
considered when using `bom map --matchmode qualifier-match`. In some cases,
63+
qualifiers are essential for correct mapping, but many scanners also include
64+
non-essential qualifiers in their SBOMs. And the distinction might be
65+
challenging: while `distro` is crucial for correct mapping of Alpine packages
66+
(same package release can have different patches in different Alpine releases),
67+
but for Debian, `distro` is unnecessary since package versions are already
68+
unique. So we use the following rules to balance accuracy and practicality:
69+
70+
* Only the qualifiers specified in the input BOM are considered during matching,
71+
qualifiers only present in SW360 releases are ignored. So you can control
72+
matching by removing the unwanted qualifiers in your SBOM.
73+
* If one or more SW360 releases are found where *all* qualifiers specified in the
74+
input BOM match, *only* these releases are added to the output BOM. Otherwise,
75+
qualifiers will be ignored, so all release matches will be added.
76+
77+
PackageURL subpath is currently ignored during PURL matching.
6278

6379
## Example 1: Very Simple, Full Match
6480

capycli/bom/create_components.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -399,7 +399,7 @@ def update_release(self, cx_comp: Component, release_data: Dict[str, Any]) -> No
399399
bom_purl = packageurl.PackageURL.from_string(
400400
data["externalIds"][repository_type])
401401
sw360_purls = PurlUtils.get_purl_list_from_sw360_object(release_data)
402-
id_match = PurlUtils.contains(sw360_purls, bom_purl)
402+
id_match = PurlUtils.contains(sw360_purls, bom_purl, compare_qualifiers=True)
403403
except ValueError:
404404
pass
405405
if not id_match:

capycli/bom/map_bom.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ def __init__(self) -> None:
6060
self.purl_service: Optional[PurlService] = None
6161
self.no_match_by_name_only = True
6262
self.full_search = False
63+
self.qualifier_match = False
6364

6465
def is_id_match(self, release: Dict[str, Any], component: Component) -> bool:
6566
"""Determines whether this release is a match via identifier for the specified SBOM item"""
@@ -761,7 +762,9 @@ def map_bom_commons(self, component: Component) -> MapResult:
761762
# search release and component by purl which is independent of the component cache.
762763
if component.purl:
763764
result.component_hrefs = self.external_id_svc.search_components_by_purl(component.purl)
764-
result.release_hrefs = self.external_id_svc.search_releases_by_purl(component.purl)
765+
r = self.external_id_svc.search_releases_by_purl(component.purl, self.qualifier_match)
766+
result.release_hrefs = r["hrefs"]
767+
result.release_hrefs_results = r["results"]
765768

766769
return result
767770

@@ -849,6 +852,7 @@ def show_help(self) -> None:
849852
print(" --matchmode MATCHMODE matching mode, comma separated list of:")
850853
print(" full-search = report best matches, don't abort on first match (recommended)")
851854
print(" all-versions = also report matches for name, but different version")
855+
print(" qualifier-match = consider qualifiers for PURL matching")
852856
print(" ignore-debian = ignore Debian revision in version comparison, so SBOM")
853857
print(" version 3.1 will match SW360 version 3.1-3.debian")
854858
print(" -all deprecated, please use --matchmode all-versions")
@@ -904,6 +908,9 @@ def run(self, args: Any) -> None:
904908
if "full-search" in args.matchmode:
905909
self.full_search = True
906910

911+
if "qualifier-match" in args.matchmode:
912+
self.qualifier_match = True
913+
907914
print_text("Loading SBOM file", args.inputfile)
908915
try:
909916
sbom = CaPyCliBom.read_sbom(args.inputfile)

capycli/common/capycli_bom_support.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ class CycloneDxSupport():
6060
CDX_PROP_COMPONENT_ID = "capycli:componentId"
6161
CDX_PROP_FILENAME = "siemens:filename"
6262
CDX_PROP_MAPRESULT = "capycli:mapResult"
63+
CDX_PROP_MAPRESULT_BY_ID = "capycli:mapResultById"
6364
CDX_PROP_SW360_HREF = "capycli:sw360Href"
6465
CDX_PROP_SW360_URL = "capycli:sw360Url"
6566
CDX_PROP_REL_STATE = "capycli:releaseMainlineState"

capycli/common/map_result.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,20 @@
77
# -------------------------------------------------------------------------------
88

99
from typing import Any, List, Optional
10+
from enum import Enum
1011

1112
from cyclonedx.model.component import Component
1213

1314
from capycli.common.capycli_bom_support import CycloneDxSupport
1415

1516

17+
class MapResultByIdQualifiers(Enum):
18+
FULL_MATCH = "qualifiers-full-match"
19+
IGNORED = "qualifiers-ignored"
20+
UNKNOWN = "qualifiers-unknown-match"
21+
NO_QUALIFIER_MAPPING = ""
22+
23+
1624
class MapResult:
1725
"""Result of mapping a SBOM item to the list of releases"""
1826

@@ -50,8 +58,22 @@ def __init__(self, component: Optional[Component] = None) -> None:
5058
self.result: str = MapResult.NO_MATCH
5159
self._component_hrefs: List[str] = []
5260
self._release_hrefs: List[str] = []
61+
self._release_hrefs_results: List[str] = []
5362
self.releases: List[Any] = []
5463

64+
@property
65+
def release_hrefs_results(self) -> list[str]:
66+
return self._release_hrefs_results
67+
68+
@release_hrefs_results.setter
69+
def release_hrefs_results(self, value: list[str]) -> None:
70+
self._release_hrefs_results = value
71+
if not self.input_component or not value:
72+
return
73+
CycloneDxSupport.update_or_set_property(
74+
self.input_component, CycloneDxSupport.CDX_PROP_MAPRESULT_BY_ID,
75+
" ".join(value))
76+
5577
@property
5678
def component_hrefs(self) -> List[str]:
5779
return self._component_hrefs

capycli/common/purl_service.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,10 @@ def build_purl_cache(self, purl_types: Any = tuple(), no_warnings: bool = True)
6060
if purl_types and purl.type not in purl_types:
6161
continue
6262
if not no_warnings:
63-
for e in self.purl_cache.get_by_version(purl):
63+
already_in_cache = self.purl_cache.get_by_version(purl)
64+
_, already_in_cache = PurlStore.filter_by_qualifiers(
65+
already_in_cache, purl)
66+
for e in already_in_cache:
6467
if e["purl"] == purl:
6568
print_yellow("-> Multiple entries for purl:", purl)
6669
print_yellow(
@@ -78,14 +81,18 @@ def build_purl_cache(self, purl_types: Any = tuple(), no_warnings: bool = True)
7881
print_yellow("-> Ignoring invalid purl entry in", entry["_links"]["self"]["href"])
7982
print_yellow(purl_string)
8083

81-
def search_releases_by_purl(self, purl: packageurl.PackageURL) -> List[str]:
84+
def search_releases_by_purl(self, purl: packageurl.PackageURL, qualifier_match: bool = False) -> Dict[str, Any]:
8285
"""Get SW360 releases by Package URL using the purl cache
8386
84-
:return: list of release urls
87+
:return: tuple of release hrefs and list of notes about mapping
8588
"""
8689
self.build_purl_cache((purl.type,))
8790

8891
result = self.purl_cache.get_by_version(purl)
92+
if qualifier_match:
93+
qualifier_result, result = PurlStore.filter_by_qualifiers(result, purl)
94+
else:
95+
qualifier_result = None
8996
unique_hrefs = {r["href"] for r in result}
9097

9198
if len(unique_hrefs) > 1:
@@ -94,7 +101,12 @@ def search_releases_by_purl(self, purl: packageurl.PackageURL) -> List[str]:
94101
print_yellow(" Candidate", self.client.get_id_from_href(r["href"]),
95102
"has purl", r["purl"])
96103

97-
return list(unique_hrefs)
104+
search_result = {
105+
"hrefs": list(unique_hrefs),
106+
# can be extended with more details in the future
107+
"results": [qualifier_result.value] if (qualifier_result and qualifier_result.value) else []
108+
}
109+
return search_result
98110

99111
def search_components_by_purl(self, purl: packageurl.PackageURL) -> List[str]:
100112
"""

capycli/common/purl_store.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,10 @@
66
# SPDX-License-Identifier: MIT
77
# -------------------------------------------------------------------------------
88

9-
from typing import Any, Dict, List, Optional
9+
from typing import Any, Dict, List, Optional, Tuple
1010

1111
from packageurl import PackageURL
12+
from capycli.common.map_result import MapResultByIdQualifiers
1213

1314

1415
class PurlStore:
@@ -84,3 +85,27 @@ def get_by_version(self, purl: PackageURL) -> List[Dict[str, Any]]:
8485
return entries[purl.version]
8586

8687
return []
88+
89+
@staticmethod
90+
def filter_by_qualifiers(entries: List[Dict[str, Any]], purl: PackageURL) -> Tuple[MapResultByIdQualifiers,
91+
List[Dict[str, Any]]]:
92+
"""
93+
Filter entries based on the qualifiers in the given PackageURL and return the match type.
94+
95+
:param entries: A list of entries to filter as returned by get_by_version.
96+
:param purl: The PackageURL object containing qualifiers to match.
97+
:return: A tuple (qualifier_result, list of entries)
98+
"""
99+
if not purl.qualifiers or len(entries) == 0:
100+
return MapResultByIdQualifiers.NO_QUALIFIER_MAPPING, entries
101+
102+
assert isinstance(purl.qualifiers, dict)
103+
qualifiers_items = purl.qualifiers.items()
104+
filtered_entries = [
105+
entry for entry in entries
106+
if all(entry["purl"].qualifiers.get(key) == value for key, value in qualifiers_items)
107+
]
108+
109+
if filtered_entries:
110+
return MapResultByIdQualifiers.FULL_MATCH, filtered_entries
111+
return MapResultByIdQualifiers.IGNORED, entries

capycli/common/purl_utils.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,17 +56,23 @@ def parse_purls_from_external_id(purl_entries: Any) -> list: # type: ignore
5656
return []
5757

5858
@staticmethod
59-
def contains(purls: list, search_purl: packageurl.PackageURL) -> bool: # type: ignore
59+
def contains(purls: list, search_purl: packageurl.PackageURL, # type: ignore
60+
compare_qualifiers: bool = False) -> bool:
6061
"""
6162
Search the given PackageURL in the provided list
6263
Important: The matching is only based on type, namespace, name and version.
63-
We do not consider qualifiers and subpath.
64+
If `compare_qualifiers` is set, the qualifiers present in the search_purl are also checked.
65+
We do not consider other qualifiers and subpath.
6466
"""
6567
for entry in purls:
6668
if (entry.type == search_purl.type
6769
and entry.namespace == search_purl.namespace
6870
and entry.name == search_purl.name
6971
and entry.version == search_purl.version):
72+
if compare_qualifiers and isinstance(search_purl.qualifiers, dict):
73+
for key, value in search_purl.qualifiers.items():
74+
if key not in entry.qualifiers or entry.qualifiers[key] != value:
75+
return False
7076
return True
7177
return False
7278

tests/test_bom_map2.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,108 @@ def test_map_bom_item_purl_release_conflict(self) -> None:
243243
else:
244244
assert False, "Unexpected release id"
245245

246+
@responses.activate
247+
def test_map_bom_item_purl_release_w_qualifiers(self) -> None:
248+
"""test bom mapping: search for releases by PURL with qualifiers
249+
"""
250+
if not self.app.client:
251+
return
252+
253+
self.app.purl_service = PurlService(self.app.client, cache={'maven': {
254+
'com.fasterxml.jackson.core': {'jackson-core': {
255+
None: [{
256+
"purl": PackageURL("maven", "com.fasterxml.jackson.core", "jackson-core"),
257+
"href": SW360_BASE_URL + "components/a035"}],
258+
"2.18.0": [
259+
{"purl": PackageURL("maven", "com.fasterxml.jackson.core", "jackson-core", version="2.18.0",
260+
qualifiers={"classifier": "sources"}),
261+
"href": SW360_BASE_URL + "releases/1234"},
262+
{"purl": PackageURL("maven", "com.fasterxml.jackson.core", "jackson-core", version="2.18.0",
263+
qualifiers={"classifier": "javadoc"}),
264+
"href": SW360_BASE_URL + "releases/1235"},
265+
{"purl": PackageURL("maven", "com.fasterxml.jackson.core", "jackson-core", version="2.18.0",
266+
qualifiers={"classifier": "sources", "packaging": "jar"}),
267+
"href": SW360_BASE_URL + "releases/1236"}]}}}})
268+
269+
self.app.releases = [{"Id": "1234", "ComponentId": "a035",
270+
"Name": "Jackson Core", "Version": "2.18.0",
271+
"ExternalIds": {
272+
"package-url": "pkg:maven/com.fasterxml.jackson.core/[email protected]"
273+
"?classifier=sources"}},
274+
{"Id": "1235", "ComponentId": "a034",
275+
"Name": "com.fasterxml.jackson.core:jackson-core", "Version": "2.18.0_javadoc",
276+
"ExternalIds": {
277+
"package-url": "pkg:maven/com.fasterxml.jackson.core/[email protected]"
278+
"?classifier=javadoc"}},
279+
{"Id": "1236", "ComponentId": "a034",
280+
"Name": "com.fasterxml.jackson.core:jackson-core", "Version": "2.18.0_jar",
281+
"ExternalIds": {
282+
"package-url": "pkg:maven/com.fasterxml.jackson.core/[email protected]"
283+
"?classifier=sources&packaging=jar"}}]
284+
285+
bomitem = Component(
286+
name="jackson-core",
287+
version="2.18.0",
288+
purl=PackageURL.from_string("pkg:maven/com.fasterxml.jackson.core/[email protected]"
289+
"?classifier=sources"))
290+
291+
self.app.full_search = True
292+
293+
# 3 matches when ignoring qualifiers
294+
res = self.app.map_bom_item(bomitem, check_similar=False, result_required=False)
295+
assert res.result == MapResult.FULL_MATCH_BY_ID
296+
assert len(res.releases) == 3
297+
298+
self.app.qualifier_match = True
299+
300+
# 2 matches for classifier=sources
301+
bomitem.properties.clear() # resert properties to remove results from previous mapping
302+
res = self.app.map_bom_item(bomitem, check_similar=False, result_required=False)
303+
assert res.result == MapResult.FULL_MATCH_BY_ID
304+
assert len(res.releases) == 2
305+
306+
if res.releases[0]["Sw360Id"] == "1234":
307+
assert res.releases[0]["ComponentId"] == "a035"
308+
assert res.releases[1]["Sw360Id"] == "1236"
309+
assert res.releases[1]["ComponentId"] == "a034"
310+
elif res.releases[0]["Sw360Id"] == "1236":
311+
assert res.releases[0]["ComponentId"] == "a034"
312+
assert res.releases[1]["Sw360Id"] == "1234"
313+
assert res.releases[1]["ComponentId"] == "a035"
314+
else:
315+
assert False, "Unexpected release id"
316+
assert res.input_component is not None
317+
assert (
318+
CycloneDxSupport.get_property(res.input_component, CycloneDxSupport.CDX_PROP_MAPRESULT_BY_ID).value
319+
== "qualifiers-full-match")
320+
321+
self.app.qualifier_match = False
322+
323+
# bomitem has unknown qualifier -> all PURL version matches returned
324+
assert bomitem.purl is not None
325+
assert type(bomitem.purl.qualifiers) is dict
326+
bomitem.purl.qualifiers["themorequalifiers"] = "thebetter"
327+
bomitem.properties.clear() # resert properties to remove results from previous mapping
328+
res = self.app.map_bom_item(bomitem, check_similar=False, result_required=False)
329+
assert res.result == MapResult.FULL_MATCH_BY_ID
330+
assert len(res.releases) == 3
331+
all_results = [r["Sw360Id"] for r in res.releases]
332+
assert all_results == ["1234", "1235", "1236"]
333+
assert res.input_component is not None
334+
assert (
335+
CycloneDxSupport.get_property(res.input_component, CycloneDxSupport.CDX_PROP_MAPRESULT_BY_ID)
336+
is None)
337+
338+
self.app.qualifier_match = True
339+
340+
bomitem.properties.clear() # resert properties to remove results from previous mapping
341+
res = self.app.map_bom_item(bomitem, check_similar=False, result_required=False)
342+
assert len(res.releases) == 3
343+
assert res.input_component is not None
344+
assert (
345+
CycloneDxSupport.get_property(res.input_component, CycloneDxSupport.CDX_PROP_MAPRESULT_BY_ID).value
346+
== "qualifiers-ignored")
347+
246348
@responses.activate
247349
def test_map_bom_item_mixed_match(self) -> None:
248350
bomitem = Component(

0 commit comments

Comments
 (0)