55
66# pylint: disable=line-too-long
77
8- import re
98import json
10- from enum import Enum
9+ from azure . cli . core import decorators
1110
1211sensitive_data_warning_message = '[Warning] This output may compromise security by showing secrets. Learn more at: https://go.microsoft.com/fwlink/?linkid=2258669'
1312sensitive_data_detailed_warning_message = '[Warning] This output may compromise security by showing the following secrets: {}. Learn more at: https://go.microsoft.com/fwlink/?linkid=2258669'
1413
1514
16- class CredentialType (Enum ):
17- # ([?&;]) - match character '?' or '&' or ';' as group 1, which is the prefix of signature within sas token
18- # sig= - match the literal string 'sig='
19- # [\w%-/]+ - match any word character, '-', '%', or '/' one or more times. This is the signature which needs to be redacted
20- SAS_TOKEN = (r'([?&;])sig=[\w%-/]+' , r'\1sig=_REDACTED_SAS_TOKEN_SIG_' , 1 , 'SAS token' )
21- # key= - match the literal string 'key=', could be accountkey, primarykey, secondarykey, etc.
22- # [\w%+/=-]+ - match any word character, '%', '+', '/', '=', or '-' one or more times.
23- KEY = (r'key=[\w%+/=-]+' , r'key=_REDACTED_KEY_' , 1 , 'Several types of keys/secrets are passed with a query parameter "key"' )
24- # (?:eyJ0eXAi|eyJhbGci) - match the literal string 'eyJ0eXAi' or 'eyJhbGci' as group 1, which is the prefix of JWT token
25- # [\w\-.~+/%]* - match any word character, '-', '.', '~', '+', '/', '%', or '*' zero or more times.
26- JWT_TOKEN = (r'(?:eyJ0eXAi|eyJhbGci)[\w\-.~+/%]*' , '_REDACTED_JWT_TOKEN_' , 0 , 'JWT token' )
27- # (bearer |bearer%20) - match the literal string 'bearer ' or 'bearer%20'
28- # [\w\-.~+/]{100,} - match any word character, '-', '.', '~', '+', or '/' one hundred or more times.
29- BEARER_TOKEN = (r'(bearer |bearer%20)[\w\-.~+/]{100,}' , r'\1_REDACTED_BEARER_TOKEN_' , 0 , 'Bearer token' )
30- # (ssh-rsa ) - match the literal string 'ssh-rsa ' as group 1, which is the prefix of ssh key
31- # AAAA[\w\-.~+/]{100,} - match 'AAAA' followed by any word character, '-', '.', '~', '+', or '/' one hundred or more times.
32- SSH_KEY = (r'(ssh-rsa )AAAA[\w\-.~+/]{100,}' , r'\1_REDACTED_SSH_KEY_' , 1 , 'SSH key' )
33- # [\w.%#+-] - match any word character, '.', '%', '#', '+', or '-' one or more times.
34- # (%40|@) - match character '@' or '%40' as group 1
35- # ([a-z0-9.-]*\.[a-z]{2,}) - match any word character, '.', or '-' zero or more times, followed by a '.' and two or more word characters.
36- EMAIL_ADDRESS = (r'[\w.%#+-]+(%40|@)([a-z0-9.-]*\.[a-z]{2,})' , r'_REDACTED_EMAIL_\1\2' , 99 , 'Email address' )
37- # [0-9a-f]{8} - match any character in the range '0' to '9' or 'a' to 'f' exactly eight times.
38- # -? - match character '-' zero or one time.
39- GUID = (r'([0-9a-f]{8}-?[0-9a-f]{4}-?[0-9a-f]{4}-?[0-9a-f]{4}-?[0-9a-f]{12})' , '_REDACTED_GUID_' , 999 , 'GUID' )
40- # below regexes are shared by ADO cred scan, see definition:
41- # https://github.com/microsoft/azure-pipelines-agent/blob/master/src/Microsoft.VisualStudio.Services.Agent/AdditionalMaskingRegexes.CredScan.cs
42- AAD_CLIENT_APP = (r'[0-9A-Za-z-_~.]{3}7Q~[0-9A-Za-z-_~.]{31}\b|\b[0-9A-Za-z-_~.]{3}8Q~[0-9A-Za-z-_~.]{34}' , '_REDACTED_AAD_CLIENT_APP_' , 99 , 'AAD client app' )
43- SYMMETRIC_KEY_512 = (r'[0-9A-Za-z+/]{76}(APIM|ACDb|\+(ABa|AMC|ASt))[0-9A-Za-z+/]{5}[AQgw]==' , '_REDACTED_SYMMETRIC_KEY_' , 1 , '512-bit symmetric key' )
44- SYMMETRIC_KEY_256 = (r'[0-9A-Za-z+/]{33}(AIoT|\+(ASb|AEh|ARm))[A-P][0-9A-Za-z+/]{5}=' , '_REDACTED_SYMMETRIC_KEY_' , 1 , '256-bit symmetric key' )
45- AZURE_FUNCTION_KEY = (r'[0-9A-Za-z_\-]{44}AzFu[0-9A-Za-z\-_]{5}[AQgw]==' , '_REDACTED_AZURE_FUNCTION_KEY_' , 1 , 'Azure function key' )
46- AZURE_SEARCH_KEY = (r'[0-9A-Za-z]{42}AzSe[A-D][0-9A-Za-z]{5}' , '_REDACTED_AZURE_SEARCH_KEY_' , 1 , 'Azure search key' )
47- AZURE_CONTAINER_REGISTRY_KEY = (r'[0-9A-Za-z+/]{42}\+ACR[A-D][0-9A-Za-z+/]{5}' , '_REDACTED_AZURE_CONTAINER_REGISTRY_KEY_' , 1 , 'Azure container registry key' )
48- AZURE_CACHE_FOR_REDIS_KEY = (r'[0-9A-Za-z]{33}AzCa[A-P][0-9A-Za-z]{5}=' , '_REDACTED_AZURE_CACHE_FOR_REDIS_KEY_' , 1 , 'Azure cache for redis key' )
49-
50- def __init__ (self , regex , replacement , level = 0 , description = '' ):
51- self .regex = regex
52- self .replacement = replacement
53- self .level = level
54- self .description = description
55-
56-
57- def is_containing_credential (content , is_file = False , max_level = 9 ):
15+ @decorators .call_once
16+ def get_secret_masker ():
17+ # global secret_masker_instance
18+ from microsoft_security_utilities_secret_masker import SecretMasker , load_regex_patterns_from_json_file
19+ regex_patterns = load_regex_patterns_from_json_file ('HighConfidenceSecurityModels.json' )
20+ return SecretMasker (regex_patterns )
21+
22+
23+ def is_containing_credential (content , is_file = False ):
5824 """Check if the given content contains credential or not.
5925
6026 :param content: The content or the file path.
@@ -75,11 +41,10 @@ def is_containing_credential(content, is_file=False, max_level=9):
7541 content = str (content )
7642 except ValueError :
7743 raise ValueError ('The content is not string or json object.' )
78- return any (re .search (cred_type .regex , content , flags = re .IGNORECASE | re .MULTILINE ) and cred_type .level <= max_level
79- for cred_type in CredentialType )
44+ return get_secret_masker ().detect_secrets (content )
8045
8146
82- def distinguish_credential (content , is_file = False , max_level = 9 ):
47+ def distinguish_credential (content , is_file = False ):
8348 """Distinguish which property contains credential from the given content.
8449
8550 :param content: The content(can be string or json object) or the file path.
@@ -92,29 +57,35 @@ def distinguish_credential(content, is_file=False, max_level=9):
9257 """
9358 containing_credential = False
9459 secret_property_names = set ()
60+ secret_names = set ()
9561 if is_file :
9662 with open (content , 'r' ) as f :
9763 content = json .load (f )
9864
9965 if isinstance (content , list ):
10066 for item in content :
101- _containing_credential , _secret_property_names = distinguish_credential (item , max_level = max_level )
67+ _containing_credential , _secret_property_names , _secret_names = distinguish_credential (item )
10268 containing_credential = containing_credential or _containing_credential
10369 secret_property_names .update (_secret_property_names )
104- return containing_credential , secret_property_names
70+ secret_names .update (_secret_names )
71+ return containing_credential , secret_property_names , secret_names
10572
10673 if isinstance (content , dict ):
10774 for key , value in content .items ():
108- _containing_credential , _secret_property_names = distinguish_credential (value , max_level = max_level )
75+ _containing_credential , _secret_property_names , _secret_names = distinguish_credential (value )
10976 containing_credential = containing_credential or _containing_credential
11077 secret_property_names .update (_secret_property_names )
11178 if _containing_credential :
11279 secret_property_names .add (key )
113- return containing_credential , secret_property_names
80+ secret_names .update (_secret_names )
81+ return containing_credential , secret_property_names , secret_names
11482
115- if is_containing_credential (content , max_level = max_level ):
83+ detections = is_containing_credential (content )
84+ if detections :
11685 containing_credential = True
117- return containing_credential , secret_property_names
86+ for detection in detections :
87+ secret_names .add (detection .name )
88+ return containing_credential , secret_property_names , secret_names
11889
11990
12091def redact_credential (content , is_file = False ):
@@ -146,7 +117,5 @@ def redact_credential(content, is_file=False):
146117 raise ValueError ('The content is not string or json object.' )
147118
148119
149- def redact_credential_for_string (string ):
150- for cred_type in CredentialType :
151- string = re .sub (cred_type .regex , cred_type .replacement , string , flags = re .IGNORECASE | re .MULTILINE )
152- return string
120+ def redact_credential_for_string (content ):
121+ return get_secret_masker ().mask_secrets (content )
0 commit comments