From d0a81f676a911359c1fd62ca87821558c793ea90 Mon Sep 17 00:00:00 2001 From: Juanje Mendoza Date: Wed, 14 Jan 2026 07:58:58 +0100 Subject: [PATCH] parser dockerfile. Docs. Fixes #725 --- docs/dockerfiledoc.md | 30 +++ docs/supported_metadata_files.md | 2 + src/somef/parser/dockerfile_parser.py | 244 ++++++++++++++++++++--- src/somef/process_files.py | 31 ++- src/somef/test/test_JSON_export.py | 83 +------- src/somef/test/test_dockerfile_parser.py | 190 ++++++++++++++++++ src/somef/utils/constants.py | 14 +- 7 files changed, 460 insertions(+), 134 deletions(-) create mode 100644 docs/dockerfiledoc.md create mode 100644 src/somef/test/test_dockerfile_parser.py diff --git a/docs/dockerfiledoc.md b/docs/dockerfiledoc.md new file mode 100644 index 00000000..6dc668fd --- /dev/null +++ b/docs/dockerfiledoc.md @@ -0,0 +1,30 @@ +The following metadata fields can be extracted from a Dockerfile. +These fields are defined using Dockerfile `LABEL` instructions as described in the +[Dockerfile reference](https://docs.docker.com/reference/dockerfile/) and are interpreted +according to the OCI Image Specification, following the +[mapping for OCI image annotations](https://github.com/opencontainers/image-spec/blob/main/annotations.md#pre-defined-annotation-keys). + +| Software metadata category | SOMEF metadata JSON path | DOCKERFILE metadata file field | +|-----------------------------|-----------------------------------------|------------------------------------| +| authors | authors[i].result.value | org.opencontainers.image.authors *(1)* | +| authors | authors[i].result.value | LABEL maintainer *(1)* | +| code_repository | code_repository[i].result.value | org.opencontainers.image.url | +| description | description[i].result.value | org.opencontainers.image.description | +| documentation | documentation[i].result.value | org.opencontainers.image.documentation | +| license | license[i].result.value | org.opencontainers.image.licenses | +| name | name[i].result.value | org.opencontainers.image.ref.name | +| owner | owner[i].result.value | org.opencontainers.image.vendor | +| version | version[i].result.value | org.opencontainers.image.version | + + +--- + + +*(1)* +- Example: +``` +LABEL maintainer="The Prometheus Authors " +LABEL org.opencontainers.image.authors="The Prometheus Authors" \ +``` + + diff --git a/docs/supported_metadata_files.md b/docs/supported_metadata_files.md index a02a035b..cec1fd85 100644 --- a/docs/supported_metadata_files.md +++ b/docs/supported_metadata_files.md @@ -24,6 +24,8 @@ SOMEF can extract metadata from a wide range of files commonly found in software | `*.gemspec` | Ruby | Manifest file serves as the package descriptor used in Ruby gem projects. |
[🔍](./gemspec.md)
| [📄](https://guides.rubygems.org/specification-reference/)| |[Example](https://github.com/rubygems/rubygems/blob/master/bundler/bundler.gemspec) | | `cargo.toml` | Rust | Manifest file serves as the package descriptor used in Rust projects |
[🔍](./cargo.md)
| [📄](https://doc.rust-lang.org/cargo/reference/manifest.html)| |[Example](https://github.com/rust-lang/cargo/blob/master/Cargo.toml) | | `*.cabal` | Haskell | Manifest file serving as the package descriptor for Haskell projects.|
[🔍](./cabal.md)
| [📄](https://cabal.readthedocs.io/en/3.10/cabal-package.html)| |[Example](https://github.com/haskell/cabal/blob/master/Cabal/Cabal.cabal) | +| `dockerfile` | Dockerfile | Build specification file for container images that can include software metadata via LABEL instructions (OCI specification).|
[🔍](./dockerfiledoc.md)
| [📄](https://docs.docker.com/reference/dockerfile/)| |[Example](https://github.com/FairwindsOps/nova/blob/master/Dockerfile) | + > **Note:** The general principles behind metadata mapping in SOMEF are based on the [CodeMeta crosswalk](https://github.com/codemeta/codemeta/blob/master/crosswalk.csv) and the [CodeMeta JSON-LD context](https://github.com/codemeta/codemeta/blob/master/codemeta.jsonld). > However, each supported file type may have specific characteristics and field interpretations. diff --git a/src/somef/parser/dockerfile_parser.py b/src/somef/parser/dockerfile_parser.py index a7ac96c2..d47cd449 100644 --- a/src/somef/parser/dockerfile_parser.py +++ b/src/somef/parser/dockerfile_parser.py @@ -2,46 +2,226 @@ import os import re from ..utils import constants +from ..process_results import Result + +def parse_dockerfile(file_path, metadata_result: Result, source): + + print(f"Extracting properties from Dockerfile: {file_path}") -def extract_dockerfile_maintainer(file_path): - print(f"Extracting maintainers from Dockerfile: {file_path}") - maintainers = [] - unique_maintainers = [] try: with open(file_path, "rb") as file: raw_data = file.read() - try: - content = raw_data.decode("utf-8") - except UnicodeDecodeError: - logging.warning(f"File {file_path} is not UTF-8 decodable. Skipping.") - return maintainers - - # not sure if should be better property author or a new property of maintainer - oci_match = re.findall( - constants.REGEXP_MAINTAINER_LABEL_OCI, - content, - re.IGNORECASE | re.MULTILINE + content = raw_data.decode("utf-8") + except (OSError, UnicodeDecodeError) as e: + logging.warning(f"Could not process Dockerfile {file_path}: {e}") + return None + + # print(content) + title_match = re.search( + constants.REGEXP_DOCKER_TITLE, + content, + re.IGNORECASE + ) + + if title_match: + title = title_match.group(1).strip() + if title: + metadata_result.add_result( + constants.CAT_NAME, + { + "value": title, + "type": constants.STRING + }, + 1, + constants.TECHNIQUE_CODE_CONFIG_PARSER, + source + ) + + description_match = re.search( + constants.REGEXP_DOCKER_DESCRIPTION, + content, + re.IGNORECASE + ) + + if description_match: + description = description_match.group(1).strip() + if description: + metadata_result.add_result( + constants.CAT_DESCRIPTION, + { + "value": description, + "type": constants.STRING + }, + 1, + constants.TECHNIQUE_CODE_CONFIG_PARSER, + source + ) + + licenses_match = re.search(constants.REGEXP_DOCKER_LICENSES, content, re.IGNORECASE) + if licenses_match: + license_info_spdx = detect_license_spdx(licenses_match.group(1).strip()) + + if license_info_spdx: + license_data = { + "value": licenses_match.group(1).strip(), + "spdx_id": license_info_spdx.get('spdx_id'), + "name": license_info_spdx.get('name'), + "type": constants.LICENSE + } + else: + license_data = { + "value": licenses_match.group(1).strip(), + "type": constants.LICENSE + } + metadata_result.add_result( + constants.CAT_LICENSE, + license_data, + 1, + constants.TECHNIQUE_CODE_CONFIG_PARSER, + source + ) + + + # source_match = re.search(constants.REGEXP_DOCKER_SOURCE, content, re.IGNORECASE) + # if source_match: + # properties[constants.PROP_SOURCE] = source_match.group(1).strip() + + url_match = re.search(constants.REGEXP_DOCKER_URL, content, re.IGNORECASE) + if url_match: + metadata_result.add_result( + constants.CAT_CODE_REPOSITORY, + { + "value": url_match.group(1).strip(), + "type": constants.URL + }, + 1, + constants.TECHNIQUE_CODE_CONFIG_PARSER, + source + ) + + version_match = re.search(constants.REGEXP_DOCKER_VERSION, content, re.IGNORECASE) + if version_match: + metadata_result.add_result( + constants.CAT_VERSION, + { + "value": version_match.group(1).strip(), + "type": constants.RELEASE, + "tag": version_match.group(1).strip() + }, + 1, + constants.TECHNIQUE_CODE_CONFIG_PARSER, + source ) - # LABEL maintainer free - label_match = re.findall( - constants.REGEXP_MAINTAINER_LABEL_FREE, - content, - re.IGNORECASE | re.MULTILINE + + documentation_match = re.search(constants.REGEXP_DOCKER_DOCUMENTATION, content, re.IGNORECASE) + if documentation_match: + metadata_result.add_result( + constants.CAT_DOCUMENTATION, + { + "value": documentation_match.group(1).strip(), + "type": constants.STRING + }, + 1, + constants.TECHNIQUE_CODE_CONFIG_PARSER, + source ) - # Deprecated maintainer - maintainer_match = re.findall( - constants.REGEXP_MAINTAINER, - content, - re.IGNORECASE | re.MULTILINE + + + vendor_match = re.search( + constants.REGEXP_DOCKER_VENDOR, + content, + re.IGNORECASE + ) + + if vendor_match: + vendor = vendor_match.group(1).strip() + if vendor: + if vendor and re.search(constants.REGEXP_LTD_INC, vendor, re.IGNORECASE): + type_vendor = "Organization" + else: + type_vendor = "Person" + + metadata_result.add_result( + constants.CAT_OWNER, + { + "value": vendor, + "type": type_vendor + }, + 1, + constants.TECHNIQUE_CODE_CONFIG_PARSER, + source + ) + + # Extract maintainers + maintainers = [] + unique_maintainers = [] + + maintainer_oci_match = re.findall( + constants.REGEXP_MAINTAINER_LABEL_OCI, + content, + re.IGNORECASE | re.MULTILINE + ) + # LABEL maintainer free + maintanainer_label_match = re.findall( + constants.REGEXP_MAINTAINER_LABEL_FREE, + content, + re.IGNORECASE | re.MULTILINE + ) + # Deprecated maintainer + maintainer_match = re.findall( + constants.REGEXP_MAINTAINER, + content, + re.IGNORECASE | re.MULTILINE + ) + maintainers.extend(maintainer_oci_match) + maintainers.extend(maintanainer_label_match) + maintainers.extend(maintainer_match) + + unique_maintainers = list({m.strip() for m in maintainers if m.strip()}) + + for maintainer in unique_maintainers: + metadata_result.add_result( + constants.CAT_AUTHORS, + { + "type": constants.AGENT, + "value": maintainer + }, + 1, + constants.TECHNIQUE_CODE_CONFIG_PARSER, + source ) - maintainers.extend(oci_match) - maintainers.extend(label_match) - maintainers.extend(maintainer_match) + return metadata_result + +def detect_license_spdx(license_text): + """ + Function that given a license text, infers the name and spdx id in a dockerfile + Parameters + ---------- + license_text + + Returns + ------- + A JSON dictionary with name and spdx id + """ + print("Detecting license from text:", license_text) + for license_name, license_info in constants.LICENSES_DICT.items(): + if re.search(license_info["regex"], license_text, re.IGNORECASE): + return { + "name": license_name, + "spdx_id": f"{license_info['spdx_id']}", + "@id": f"https://spdx.org/licenses/{license_info['spdx_id']}" + } - unique_maintainers = list({m.strip() for m in maintainers if m.strip()}) - except OSError: - logging.warning(f"Could not read Dockerfile {file_path}") + for license_name, license_info in constants.LICENSES_DICT.items(): + spdx_id = license_info["spdx_id"] + if re.search(rf'\b{re.escape(spdx_id)}\b', license_text, re.IGNORECASE): + return { + "name": license_name, + "spdx_id": spdx_id, + "@id": f"https://spdx.org/licenses/{spdx_id}" + } + return None - return unique_maintainers + \ No newline at end of file diff --git a/src/somef/process_files.py b/src/somef/process_files.py index b15d34d6..84e55560 100644 --- a/src/somef/process_files.py +++ b/src/somef/process_files.py @@ -21,7 +21,7 @@ from .parser.description_parser import parse_description_file from .parser.toml_parser import parse_toml_file from .parser.cabal_parser import parse_cabal_file -from .parser.dockerfile_parser import extract_dockerfile_maintainer +from .parser.dockerfile_parser import parse_dockerfile from chardet import detect @@ -76,20 +76,22 @@ def process_repository_files(repo_dir, metadata_result: Result, repo_type, owner if filename == "Dockerfile" or filename.lower() == "docker-compose.yml": docker_url = get_file_link(repo_type, file_path, owner, repo_name, repo_default_branch, repo_dir, repo_relative_path, filename) - if filename == "Dockerfile": - format_file = constants.FORMAT_DOCKERFILE - maintainers = extract_dockerfile_maintainer(os.path.join(repo_dir, file_path)) - else: - format_file = constants.FORMAT_DOCKER_COMPOSE - maintainers = None + + # full_path = os.path.join(repo_dir, file_path) result_value = { constants.PROP_VALUE: docker_url, constants.PROP_TYPE: constants.URL, - constants.PROP_FORMAT: format_file } - if maintainers: - result_value[constants.PROP_AUTHOR] = maintainers + + if filename == "Dockerfile": + format_file = constants.FORMAT_DOCKERFILE + result_value[constants.PROP_FORMAT] = format_file + metadata_result = parse_dockerfile(os.path.join(dir_path, filename), metadata_result, docker_url) + else: + format_file = constants.FORMAT_DOCKER_COMPOSE + + result_value[constants.PROP_FORMAT] = format_file metadata_result.add_result( constants.CAT_HAS_BUILD_FILE, @@ -98,14 +100,7 @@ def process_repository_files(repo_dir, metadata_result: Result, repo_type, owner constants.TECHNIQUE_FILE_EXPLORATION, docker_url ) - # metadata_result.add_result(constants.CAT_HAS_BUILD_FILE, - # { - # constants.PROP_VALUE: docker_url, - # constants.PROP_TYPE: constants.URL, - # constants.PROP_FORMAT: format_file - # }, - # 1, - # constants.TECHNIQUE_FILE_EXPLORATION, docker_url) + if filename.lower().endswith(".ipynb"): notebook_url = get_file_link(repo_type, file_path, owner, repo_name, repo_default_branch, repo_dir, repo_relative_path, filename) diff --git a/src/somef/test/test_JSON_export.py b/src/somef/test/test_JSON_export.py index 8572da73..befc47f2 100644 --- a/src/somef/test/test_JSON_export.py +++ b/src/somef/test/test_JSON_export.py @@ -470,85 +470,4 @@ def test_issue_859(self): os.remove(test_data_path + "test-859.json") - def test_issue_725(self): - """Checks if this repository has authors extracted from Dockerfile""" - - somef_cli.run_cli(threshold=0.8, - ignore_classifiers=False, - repo_url=None, - local_repo=test_data_repositories + "Fairwinds", - doc_src=None, - in_file=None, - output=test_data_path + "test_issue_725.json", - graph_out=None, - graph_format="turtle", - codemeta_out=None, - pretty=True, - missing=False, - readme_only=False) - - text_file = open(test_data_path + "test_issue_725.json", "r") - data = text_file.read() - text_file.close() - json_content = json.loads(data) - - has_built = json_content.get("has_build_file", []) - - authors = [] - for entry in has_built: - result = entry.get("result", {}) - if "author" in result: - authors.extend(result["author"]) - - expected_author = "FairwindsOps, Inc." - - assert expected_author in authors, ( - f"Expected author '{expected_author}' not found. " - f"Authors found: {authors}" - ) - os.remove(test_data_path + "test_issue_725.json") - - def test_issue_725_2(self): - """Checks if this repository has authors extracted from Dockerfile""" - - somef_cli.run_cli(threshold=0.8, - ignore_classifiers=False, - repo_url=None, - local_repo=test_data_repositories + "Prometeus", - doc_src=None, - in_file=None, - output=test_data_path + "test_issue_725_2.json", - graph_out=None, - graph_format="turtle", - codemeta_out=None, - pretty=True, - missing=False, - readme_only=False) - - text_file = open(test_data_path + "test_issue_725_2.json", "r") - data = text_file.read() - text_file.close() - json_content = json.loads(data) - - has_built = json_content.get("has_build_file", []) - - authors = [] - for entry in has_built: - result = entry.get("result", {}) - if "author" in result: - authors.extend(result["author"]) - - expected_author = "The Prometheus Authors" - - assert expected_author in authors, ( - f"Expected author '{expected_author}' not found. " - f"Authors found: {authors}" - ) - expected_count = 2 - assert len(authors) == expected_count, ( - f"Expected {expected_count} authors, but found {len(authors)}: {authors}" - ) - os.remove(test_data_path + "test_issue_725_2.json") - -if __name__ == '__main__': - unittest.main() + \ No newline at end of file diff --git a/src/somef/test/test_dockerfile_parser.py b/src/somef/test/test_dockerfile_parser.py new file mode 100644 index 00000000..fd95dfb7 --- /dev/null +++ b/src/somef/test/test_dockerfile_parser.py @@ -0,0 +1,190 @@ +import json +import os +import unittest +from pathlib import Path +from .. import somef_cli +from ..utils import constants + +test_data_path = str(Path(__file__).parent / "test_data") + os.path.sep +test_data_repositories = str(Path(__file__).parent / "test_data" / "repositories") + os.path.sep + +class TestDockerfileParser(unittest.TestCase): + + def test_issue_725(self): + """Checks if this repository has properties extracted from Dockerfile Fairwinds""" + + somef_cli.run_cli(threshold=0.8, + ignore_classifiers=False, + repo_url=None, + local_repo=test_data_repositories + "Fairwinds", + doc_src=None, + in_file=None, + output=test_data_path + "test_issue_725.json", + graph_out=None, + graph_format="turtle", + codemeta_out=None, + pretty=True, + missing=False, + readme_only=False) + + text_file = open(test_data_path + "test_issue_725.json", "r") + data = text_file.read() + text_file.close() + json_content = json.loads(data) + + owners = json_content.get("owner", []) + + code_parser_owners = [ + entry["result"]["value"] + for entry in owners + if entry.get("technique") == "code_parser" + ] + + assert "FairwindsOps, Inc." in code_parser_owners, ( + "Expected owner 'FairwindsOps, Inc.' extracted from Dockerfile " + f"with technique 'code_parser'. Found: {code_parser_owners}" + ) + + descriptions = json_content.get("description", []) + docker_descriptions = [ + entry["result"]["value"] + for entry in descriptions + if entry.get("technique") == "code_parser" + ] + + expected_description = ( + "Nova is a cli tool to find outdated or deprecated Helm charts " + "running in your Kubernetes cluster." + ) + + assert expected_description in docker_descriptions, ( + "Expected description extracted from Dockerfile not found.\n" + f"Expected: {expected_description}\n" + f"Found: {docker_descriptions}" + ) + + documentation = json_content.get("documentation", []) + + doc_urls = [ + entry["result"]["value"] + for entry in documentation + if entry.get("technique") == "code_parser" + ] + + expected_doc = "https://nova.docs.fairwinds.com/" + + assert expected_doc in doc_urls, ( + f"Expected documentation URL '{expected_doc}' not found. " + f"Found: {doc_urls}" + ) + + authors = json_content.get("authors", []) + + author_values = [ + entry["result"]["value"] + for entry in authors + if entry.get("technique") == "code_parser" + ] + + expected_author = "FairwindsOps, Inc." + + assert expected_author in author_values, ( + f"Expected author '{expected_author}' not found. " + f"Authors found: {author_values}" + ) + os.remove(test_data_path + "test_issue_725.json") + + def test_issue_725_2(self): + """Checks if this repository has properties extracted from Dockerfile Prometeus""" + + somef_cli.run_cli(threshold=0.8, + ignore_classifiers=False, + repo_url=None, + local_repo=test_data_repositories + "Prometeus", + doc_src=None, + in_file=None, + output=test_data_path + "test_issue_725_2.json", + graph_out=None, + graph_format="turtle", + codemeta_out=None, + pretty=True, + missing=False, + readme_only=False) + + text_file = open(test_data_path + "test_issue_725_2.json", "r") + data = text_file.read() + text_file.close() + json_content = json.loads(data) + + code_repos = json_content.get("code_repository", []) + code_parser_repos = [ + entry["result"]["value"] + for entry in code_repos + if entry.get("technique") == "code_parser" + ] + + expected_repo = "https://github.com/prometheus/prometheus" + + assert expected_repo in code_parser_repos, ( + f"Expected code_repository '{expected_repo}' extracted with technique " + f"'code_parser'. Found: {code_parser_repos}" + ) + + licenses = json_content.get("license", []) + code_parser_licenses = [ + entry["result"] + for entry in licenses + if entry.get("technique") == "code_parser" + ] + + assert any( + lic.get("spdx_id") == "Apache-2.0" + for lic in code_parser_licenses + ), ( + "Expected license with SPDX ID 'Apache-2.0' extracted from Dockerfile " + f"using 'code_parser'. Found: {code_parser_licenses}" + ) + + descriptions = json_content.get("description", []) + code_parser_descriptions = [ + entry["result"]["value"] + for entry in descriptions + if entry.get("technique") == "code_parser" + ] + + expected_description = "The Prometheus monitoring system and time series database" + assert expected_description in code_parser_descriptions, ( + "Expected description extracted from Dockerfile not found.\n" + f"Expected: {expected_description}\n" + f"Found: {code_parser_descriptions}" + ) + + names = json_content.get("name", []) + code_parser_names = [ + entry["result"]["value"] + for entry in names + if entry.get("technique") == "code_parser" + ] + + expected_name = "Prometheus" + assert expected_name in code_parser_names, ( + f"Expected name '{expected_name}' extracted from Dockerfile " + f"using 'code_parser'. Found: {code_parser_names}" + ) + + documentation = json_content.get("documentation", []) + code_parser_docs = [ + entry["result"]["value"] + for entry in documentation + if entry.get("technique") == "code_parser" + ] + + expected_doc = "https://prometheus.io/docs" + assert expected_doc in code_parser_docs, ( + f"Expected documentation URL '{expected_doc}' extracted from Dockerfile " + f"using 'code_parser'. Found: {code_parser_docs}" + ) + os.remove(test_data_path + "test_issue_725_2.json") + +if __name__ == '__main__': + unittest.main() diff --git a/src/somef/utils/constants.py b/src/somef/utils/constants.py index 7db3d9e3..1829bc80 100644 --- a/src/somef/utils/constants.py +++ b/src/somef/utils/constants.py @@ -85,7 +85,8 @@ REGEXP_TITLE_NATURAL = r'["“](.+?)["”]' #License spdx -REGEXP_APACHE = r'(?i)apache\s+license\s*,?\s*version\s*2\.0' +# REGEXP_APACHE = r'(?i)apache\s+license\s*,?\s*version\s*2\.0' +REGEXP_APACHE = r'(?i)apache(?:\s+license)?\s*(?:,?\s*version\s*)?2\.0' REGEXP_GPL3 = r'(?i)gnu\s+general\s+public\s+license\s*,?\s*version\s*3\.0' REGEXP_MIT = r'(?i)mit\s+license' REGEXP_BSD2 = r'(?i)(bsd\s*-?\s*2-?clause(?:\s*license)?|redistribution\s+and\s+use\s+in\s+source\s+and\s+binary\s+forms)' @@ -460,4 +461,13 @@ class RepositoryType(Enum): # REGEXP_MAINTAINER_LABEL_OCI = r'^\s*LABEL\s+org\.opencontainers\.image\.authors\s*=\s*["\']?(.+?)["\']?\s*$' REGEXP_MAINTAINER_LABEL_OCI = r'^\s*LABEL\s+org\.opencontainers\.image\.authors\s*=\s*["\']([^"\'\\]+)["\']?\s*(?:\\)?\s*$' REGEXP_MAINTAINER_LABEL_FREE = r'^\s*LABEL\s+"?maintainer"?\s*=\s*["\']?(.+?)["\']?\s*$' -REGEXP_MAINTAINER = r'^\s*MAINTAINER\s+(.+)$' \ No newline at end of file +REGEXP_MAINTAINER = r'^\s*MAINTAINER\s+(.+)$' +REGEXP_DOCKER_TITLE = r'org\.opencontainers\.image\.title\s*=\s*"([^"]+)"' +REGEXP_DOCKER_DESCRIPTION = r'org\.opencontainers\.image\.description\s*=\s*"([^"]+)"' +REGEXP_DOCKER_LICENSES = r'org\.opencontainers\.image\.licenses\s*=\s*"([^"]+)"' +REGEXP_DOCKER_SOURCE = r'org\.opencontainers\.image\.source\s*=\s*"([^"]+)"' +REGEXP_DOCKER_URL = r'org\.opencontainers\.image\.url\s*=\s*"([^"]+)"' +REGEXP_DOCKER_VERSION = r'org\.opencontainers\.image\.version\s*=\s*"([^"]+)"' +REGEXP_DOCKER_DOCUMENTATION = r'org\.opencontainers\.image\.documentation\s*=\s*"([^"]+)"' +REGEXP_DOCKER_VENDOR = r'org\.opencontainers\.image\.vendor\s*=\s*"([^"]+)"' +REGEXP_DOCKER_CREATED_DATE = r'org\.opencontainers\.image\.created\s*=\s*"([^"]+)"'