Skip to content

Commit 8d1807f

Browse files
committed
Sanitize S3 Bucket, Clean Output, Licenses 2025-07-23
Signed-off-by: Julio Jimenez <[email protected]>
1 parent ae2c1e0 commit 8d1807f

File tree

4 files changed

+115
-25
lines changed

4 files changed

+115
-25
lines changed

entrypoint.sh

Lines changed: 15 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -590,8 +590,6 @@ EOF
590590
--data audience=wiz-api \
591591
"$WIZ_AUTH_ENDPOINT"); then
592592

593-
log_debug "Auth response: $auth_response"
594-
595593
# Extract access token
596594
local access_token
597595
if access_token=$(echo "$auth_response" | jq -r '.access_token // empty'); then
@@ -747,9 +745,9 @@ download_wiz_report_from_url() {
747745
log_success "ZIP file extracted successfully"
748746

749747
# Debug: Show what was extracted
750-
log_info "Extracted files:"
748+
log_debug "Extracted files:"
751749
find "$extract_dir" -type f | while read -r file; do
752-
log_info " - $(basename "$file") ($(file -b "$file" 2>/dev/null || echo "unknown type"))"
750+
log_debug " - $(basename "$file") ($(file -b "$file" 2>/dev/null || echo "unknown type"))"
753751
done
754752

755753
# Find JSON files in the extracted content
@@ -789,7 +787,7 @@ download_wiz_report_from_url() {
789787

790788
if [[ "$bom_format" == "CycloneDX" ]] || jq -e '.metadata.component' "$json_file" >/dev/null 2>&1; then
791789
cyclonedx_files+=("$json_file")
792-
log_info "$(basename "$json_file") is valid CycloneDX"
790+
log_debug "$(basename "$json_file") is valid CycloneDX"
793791
else
794792
log_warning "$(basename "$json_file") is not CycloneDX (format: $bom_format)"
795793
fi
@@ -1154,12 +1152,11 @@ merge_cyclonedx_sboms() {
11541152
local download_dir="$temp_dir/sboms"
11551153
mkdir -p "$download_dir"
11561154

1157-
# List all JSON files in the S3 bucket (excluding vulns/ directory)
1158-
log_info "Listing JSON files in S3 bucket (excluding vulns/ directory)..."
1155+
# List all JSON files in the S3 bucket
11591156
local s3_files
11601157

11611158
# Debug: Show raw S3 ls output
1162-
log_info "Raw S3 listing for bucket: $S3_BUCKET"
1159+
log_debug "Raw S3 listing for bucket: $S3_BUCKET"
11631160
if ! aws s3 ls "s3://$S3_BUCKET" --recursive; then
11641161
log_error "Failed to list files in S3 bucket: $S3_BUCKET"
11651162
log_error "Check bucket name and AWS permissions"
@@ -1177,13 +1174,10 @@ merge_cyclonedx_sboms() {
11771174
local json_files
11781175
json_files=$(echo "$all_files" | grep '\.json$' || true)
11791176
log_info "JSON files found: $(echo "$json_files" | wc -l) files"
1180-
1181-
s3_files=$(echo "$json_files" | grep -v 'vulns/' || true)
1182-
log_info "JSON files after excluding vulns/: $(echo "$s3_files" | wc -l) files"
1183-
1177+
11841178
# Also exclude the target S3_KEY file to avoid processing the merged output
11851179
local s3_key_basename=$(basename "${S3_KEY:-sbom.json}")
1186-
s3_files=$(echo "$s3_files" | grep -v "^${s3_key_basename}$" || true)
1180+
s3_files=$(echo "$json_files" | grep -v "^${s3_key_basename}$" || true)
11871181
log_info "JSON files after excluding target file ($s3_key_basename): $(echo "$s3_files" | wc -l) files"
11881182

11891183
# Apply include/exclude filters
@@ -1246,18 +1240,18 @@ merge_cyclonedx_sboms() {
12461240

12471241
local local_file="$download_dir/${filename}"
12481242

1249-
log_info "Downloading ($total_files/${#files_array[@]}): s3://$S3_BUCKET/$s3_key_to_merge"
1243+
log_debug "Downloading ($total_files/${#files_array[@]}): s3://$S3_BUCKET/$s3_key_to_merge"
12501244

12511245
# Try to download the file
12521246
if aws s3 cp "s3://$S3_BUCKET/$s3_key_to_merge" "$local_file"; then
12531247
log_success "Downloaded: $filename"
12541248

12551249
# Check if it's a valid CycloneDX SBOM
1256-
log_info "Validating CycloneDX format for: $filename"
1250+
log_debug "Validating CycloneDX format for: $filename"
12571251

12581252
# First check if it's valid JSON
12591253
if jq empty "$local_file" >/dev/null 2>&1; then
1260-
log_info "JSON validation passed for: $filename"
1254+
log_debug "JSON validation passed for: $filename"
12611255
else
12621256
log_warning "Skipping $filename - not valid JSON"
12631257
continue
@@ -1273,7 +1267,7 @@ merge_cyclonedx_sboms() {
12731267
bom_format="missing"
12741268
fi
12751269

1276-
log_info "File $filename has bomFormat: $bom_format"
1270+
log_debug "File $filename has bomFormat: $bom_format"
12771271

12781272
# Check if it's CycloneDX (also check for metadata.component as backup)
12791273
local is_cyclonedx=false
@@ -1587,16 +1581,16 @@ setup_clickhouse_table() {
15871581
# Use basic auth if username and password are provided
15881582
if [[ -n "${CLICKHOUSE_USERNAME:-}" ]] && [[ -n "${CLICKHOUSE_PASSWORD:-}" ]]; then
15891583
auth_params="-u ${CLICKHOUSE_USERNAME}:${CLICKHOUSE_PASSWORD}"
1590-
log_info "Using basic auth with username: ${CLICKHOUSE_USERNAME}"
1584+
log_debug "Using basic auth with username: ${CLICKHOUSE_USERNAME}"
15911585
elif [[ -n "${CLICKHOUSE_USERNAME:-}" ]]; then
15921586
auth_params="-u ${CLICKHOUSE_USERNAME}:"
1593-
log_info "Using basic auth with username only: ${CLICKHOUSE_USERNAME}"
1587+
log_debug "Using basic auth with username only: ${CLICKHOUSE_USERNAME}"
15941588
else
1595-
log_info "Using no authentication"
1589+
log_debug "Using no authentication"
15961590
fi
15971591

15981592
# Test connection first
1599-
log_info "Testing ClickHouse connection..."
1593+
log_debug "Testing ClickHouse connection..."
16001594
if ! curl -s ${auth_params} --data "SELECT 1" "${clickhouse_url}" > /dev/null; then
16011595
log_error "ClickHouse connection test failed"
16021596
log_error "Please verify your ClickHouse credentials and URL"

lib/sanitize.sh

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,33 @@ sanitize_url() {
7878
echo "$sanitized"
7979
}
8080

81+
# Sanitize S3 bucket names
82+
sanitize_s3_bucket() {
83+
local bucket="$1"
84+
85+
# S3 bucket names have specific rules
86+
local sanitized
87+
sanitized=$(echo "$bucket" | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9.-]//g')
88+
89+
# Validate S3 bucket naming rules
90+
if [[ ! "$sanitized" =~ ^[a-z0-9][a-z0-9.-]{1,61}[a-z0-9]$ ]]; then
91+
log_error "Invalid S3 bucket name: $bucket"
92+
log_error "S3 bucket names must be 3-63 characters, lowercase, and contain only letters, numbers, dots, and hyphens"
93+
exit 1
94+
fi
95+
96+
# Additional S3 bucket rules
97+
if [[ "$sanitized" == *.* ]]; then
98+
# If contains dots, validate it's not IP-like
99+
if [[ "$sanitized" =~ ^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
100+
log_error "S3 bucket name cannot be formatted as IP address: $bucket"
101+
exit 1
102+
fi
103+
fi
104+
105+
echo "$sanitized"
106+
}
107+
81108
# Main sanitization function - sanitizes all environment variables
82109
sanitize_inputs() {
83110
log_debug "Sanitizing input parameters..."
@@ -190,10 +217,10 @@ sanitize_inputs() {
190217
log_debug "Sanitized AWS_DEFAULT_REGION: $AWS_DEFAULT_REGION"
191218
fi
192219

193-
# if [[ -n "${S3_BUCKET:-}" ]]; then
194-
# S3_BUCKET=$(sanitize_s3_bucket "$S3_BUCKET")
195-
# log_debug "Sanitized S3_BUCKET: $S3_BUCKET"
196-
# fi
220+
if [[ -n "${S3_BUCKET:-}" ]]; then
221+
S3_BUCKET=$(sanitize_s3_bucket "$S3_BUCKET")
222+
log_debug "Sanitized S3_BUCKET: $S3_BUCKET"
223+
fi
197224

198225
# if [[ -n "${S3_KEY:-}" ]]; then
199226
# S3_KEY=$(sanitize_s3_key "$S3_KEY")

license-mappings.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
11
{
2+
"@astrojs/markdown-remark": "MIT",
3+
"@astrojs/mdx": "MIT",
4+
"@astrojs/sitemap": "MIT",
5+
"@astrojs/starlight": "MIT",
6+
"@astrojs/telemetry": "MIT",
27
"@clickhouse/client": "Apache-2.0",
38
"@clickhouse/client-common": "Apache-2.0",
49
"@faker-js/faker": "MIT",

test/simple.bats

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -453,3 +453,67 @@ EOF
453453
[ "$status" -eq 0 ]
454454
[[ "$output" == "https://example.com" ]]
455455
}
456+
457+
# Test 43: sanitize_s3_bucket accepts valid bucket name
458+
@test "sanitize_s3_bucket accepts valid bucket name" {
459+
run sanitize_s3_bucket "my-test-bucket"
460+
[ "$status" -eq 0 ]
461+
[[ "$output" == "my-test-bucket" ]]
462+
}
463+
464+
# Test 44: sanitize_s3_bucket converts bucket name to lowercase
465+
@test "sanitize_s3_bucket converts to lowercase" {
466+
run sanitize_s3_bucket "My-Test-Bucket"
467+
[ "$status" -eq 0 ]
468+
[[ "$output" == "my-test-bucket" ]]
469+
}
470+
471+
# Test 45: sanitize_s3_bucket accepts bucket with dots
472+
@test "sanitize_s3_bucket accepts bucket with dots" {
473+
run sanitize_s3_bucket "my.test.bucket"
474+
[ "$status" -eq 0 ]
475+
[[ "$output" == "my.test.bucket" ]]
476+
}
477+
478+
# Test 46: sanitize_s3_bucket removes invalid characters
479+
@test "sanitize_s3_bucket removes invalid characters" {
480+
run sanitize_s3_bucket "my_test@bucket!"
481+
[ "$status" -eq 0 ]
482+
[[ "$output" == "mytestbucket" ]]
483+
}
484+
485+
# Test 47: sanitize_s3_bucket rejects short bucket name
486+
@test "sanitize_s3_bucket rejects too short name" {
487+
run sanitize_s3_bucket "ab"
488+
[ "$status" -eq 1 ]
489+
[[ "$output" == *"Invalid S3 bucket name"* ]]
490+
}
491+
492+
# Test 48: sanitize_s3_bucket rejects long bucket name
493+
@test "sanitize_s3_bucket rejects too long name" {
494+
local long_name=$(printf 'a%.0s' {1..70})
495+
run sanitize_s3_bucket "$long_name"
496+
[ "$status" -eq 1 ]
497+
[[ "$output" == *"Invalid S3 bucket name"* ]]
498+
}
499+
500+
# Test 49: sanitize_s3_bucket rejects IP-like format
501+
@test "sanitize_s3_bucket rejects IP-like format" {
502+
run sanitize_s3_bucket "192.168.1.1"
503+
[ "$status" -eq 1 ]
504+
[[ "$output" == *"cannot be formatted as IP address"* ]]
505+
}
506+
507+
# Test 50: sanitize_s3_bucket rejects bucket starting with dash
508+
@test "sanitize_s3_bucket rejects bucket starting with dash" {
509+
run sanitize_s3_bucket "-invalid-bucket"
510+
[ "$status" -eq 1 ]
511+
[[ "$output" == *"Invalid S3 bucket name"* ]]
512+
}
513+
514+
# Test 51: sanitize_s3_bucket rejects bucket ending with dash
515+
@test "sanitize_s3_bucket rejects bucket ending with dash" {
516+
run sanitize_s3_bucket "invalid-bucket-"
517+
[ "$status" -eq 1 ]
518+
[[ "$output" == *"Invalid S3 bucket name"* ]]
519+
}

0 commit comments

Comments
 (0)