Skip to content

Commit fa8064f

Browse files
committed
ai: fix for irsa
1 parent 2929414 commit fa8064f

File tree

2 files changed

+130
-14
lines changed

2 files changed

+130
-14
lines changed

tools/dz-ai/internal/data/duck/config.go

Lines changed: 42 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,11 @@ import (
1313
)
1414

1515
// LoadS3ConfigFromEnv loads S3 configuration from environment variables.
16-
// Supports both AWS S3 and MinIO configurations.
16+
// Supports both AWS S3 and MinIO configurations, including IRSA (IAM Roles for Service Accounts).
1717
//
1818
// Environment variables:
19-
// - S3_ACCESS_KEY_ID or AWS_ACCESS_KEY_ID (required)
20-
// - S3_SECRET_ACCESS_KEY or AWS_SECRET_ACCESS_KEY (required)
19+
// - S3_ACCESS_KEY_ID or AWS_ACCESS_KEY_ID (optional, for IRSA leave unset to use IAM role)
20+
// - S3_SECRET_ACCESS_KEY or AWS_SECRET_ACCESS_KEY (optional, for IRSA leave unset to use IAM role)
2121
// - S3_ENDPOINT (optional, for MinIO: "http://localhost:9000")
2222
// - S3_REGION or AWS_REGION (optional, defaults to "us-east-1")
2323
// - S3_USE_SSL (optional, "true"/"false", auto-detected if S3_ENDPOINT is set)
@@ -30,23 +30,36 @@ import (
3030
// Otherwise, assumes AWS S3 and sets:
3131
// - UseSSL: true
3232
// - URLStyle: "virtual"
33+
//
34+
// For IRSA (IAM Roles for Service Accounts) in AWS EKS, leave both access key and secret
35+
// unset to use the IAM role credentials automatically.
3336
func LoadS3ConfigFromEnv() (*S3Config, error) {
3437
// Get access key (try S3_ prefix first, then AWS_ prefix)
3538
accessKeyID := os.Getenv("S3_ACCESS_KEY_ID")
3639
if accessKeyID == "" {
3740
accessKeyID = os.Getenv("AWS_ACCESS_KEY_ID")
3841
}
39-
if accessKeyID == "" {
40-
return nil, nil // Not an error, just not configured
41-
}
4242

4343
// Get secret key (try S3_ prefix first, then AWS_ prefix)
4444
secretAccessKey := os.Getenv("S3_SECRET_ACCESS_KEY")
4545
if secretAccessKey == "" {
4646
secretAccessKey = os.Getenv("AWS_SECRET_ACCESS_KEY")
4747
}
48-
if secretAccessKey == "" {
49-
return nil, fmt.Errorf("S3_ACCESS_KEY_ID or AWS_ACCESS_KEY_ID is set but S3_SECRET_ACCESS_KEY or AWS_SECRET_ACCESS_KEY is missing")
48+
49+
// If neither access key nor secret is set, return nil (not configured, will use IRSA/default credentials)
50+
if accessKeyID == "" && secretAccessKey == "" {
51+
return nil, nil // Not an error, just not configured - will use default AWS credentials chain (IRSA)
52+
}
53+
54+
// If only secret is set without access key, that's an error
55+
if accessKeyID == "" && secretAccessKey != "" {
56+
return nil, fmt.Errorf("S3_SECRET_ACCESS_KEY or AWS_SECRET_ACCESS_KEY is set but S3_ACCESS_KEY_ID or AWS_ACCESS_KEY_ID is missing")
57+
}
58+
59+
// If only access key is set without secret, that's also an error (inconsistent state)
60+
// For IRSA, both should be unset to use IAM role credentials
61+
if accessKeyID != "" && secretAccessKey == "" {
62+
return nil, fmt.Errorf("S3_ACCESS_KEY_ID or AWS_ACCESS_KEY_ID is set but S3_SECRET_ACCESS_KEY or AWS_SECRET_ACCESS_KEY is missing (for IRSA, leave both unset)")
5063
}
5164

5265
// Get endpoint (optional, for MinIO)
@@ -127,6 +140,10 @@ func EnsureMinIOBucket(ctx context.Context, log *slog.Logger, storageURI string,
127140
}
128141

129142
// Create S3 client
143+
// MinIO always requires explicit credentials
144+
if s3Config.AccessKeyID == "" || s3Config.SecretAccessKey == "" {
145+
return fmt.Errorf("MinIO requires both S3_ACCESS_KEY_ID and S3_SECRET_ACCESS_KEY to be set")
146+
}
130147
creds := credentials.NewStaticCredentialsProvider(
131148
s3Config.AccessKeyID,
132149
s3Config.SecretAccessKey,
@@ -181,12 +198,28 @@ func PrepareS3ConfigForStorageURI(ctx context.Context, log *slog.Logger, storage
181198
}
182199

183200
// Load S3 config from environment variables
201+
// If nil, that's OK - will use default AWS credentials chain (IRSA)
184202
s3Config, err := LoadS3ConfigFromEnv()
185203
if err != nil {
186204
return nil, fmt.Errorf("failed to load S3 configuration: %w", err)
187205
}
206+
// If s3Config is nil, create a minimal config with just region for IRSA
188207
if s3Config == nil {
189-
return nil, fmt.Errorf("S3 storage URI specified but S3 configuration not found in environment variables (S3_ACCESS_KEY_ID, S3_SECRET_ACCESS_KEY required)")
208+
region := os.Getenv("S3_REGION")
209+
if region == "" {
210+
region = os.Getenv("AWS_REGION")
211+
}
212+
if region == "" {
213+
region = "us-east-1" // Default region
214+
}
215+
s3Config = &S3Config{
216+
AccessKeyID: "", // Empty for IRSA
217+
SecretAccessKey: "", // Empty for IRSA
218+
Endpoint: "", // AWS S3
219+
Region: region,
220+
UseSSL: true, // AWS S3 default
221+
URLStyle: "virtual", // AWS S3 default
222+
}
190223
}
191224

192225
// If using localhost MinIO, ensure the bucket exists

tools/dz-ai/internal/data/duck/lake.go

Lines changed: 88 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,8 @@ func NewLake(ctx context.Context, log *slog.Logger, catalogName, catalogURI, sto
171171
return nil, fmt.Errorf("failed to create storage directory: %w", err)
172172
}
173173
} else if strings.HasPrefix(storageURI, "s3://") {
174-
// For S3 paths, we'll append the secret reference after creating the secret
174+
// For S3 paths, we'll reference the secret after creating it
175+
// The secret will be referenced in the DATA_PATH
175176
storagePath = storageURI
176177
useS3 = true
177178
} else {
@@ -219,6 +220,9 @@ func NewLake(ctx context.Context, log *slog.Logger, catalogName, catalogURI, sto
219220
}
220221

221222
// Create S3 secret
223+
// For IRSA (no explicit credentials), DuckDB's httpfs extension will use the default
224+
// AWS credentials chain via environment variables. We still create a secret with
225+
// region and other settings, but omit KEY_ID and SECRET.
222226
secretSQL := "CREATE SECRET IF NOT EXISTS s3_secret (TYPE s3"
223227
if cfg.AccessKeyID != "" {
224228
secretSQL += fmt.Sprintf(", KEY_ID '%s'", strings.ReplaceAll(cfg.AccessKeyID, "'", "''"))
@@ -239,22 +243,31 @@ func NewLake(ctx context.Context, log *slog.Logger, catalogName, catalogURI, sto
239243
}
240244
urlStyle := cfg.URLStyle
241245
if urlStyle == "" {
242-
urlStyle = "path" // Default to path style for MinIO compatibility
246+
// Default based on whether it's AWS S3 or MinIO
247+
if cfg.Endpoint == "" || strings.Contains(cfg.Endpoint, "amazonaws.com") {
248+
urlStyle = "virtual" // AWS S3 default
249+
} else {
250+
urlStyle = "path" // MinIO default
251+
}
243252
}
244253
secretSQL += fmt.Sprintf(", URL_STYLE '%s'", urlStyle)
245254
useSSL := cfg.UseSSL
246255
if cfg.Endpoint != "" && !strings.Contains(cfg.Endpoint, "amazonaws.com") {
247256
// Default to false for non-AWS endpoints (like MinIO)
248257
useSSL = false
258+
} else if cfg.Endpoint == "" {
259+
// AWS S3 default
260+
useSSL = true
249261
}
250262
secretSQL += fmt.Sprintf(", USE_SSL %t", useSSL)
251263
secretSQL += ")"
252264

253265
if _, err := db.Exec(secretSQL); err != nil {
254266
return nil, fmt.Errorf("failed to create S3 secret: %w", err)
255267
}
256-
// Note: The S3 secret is created and should be used automatically by ducklake
257-
// for S3 operations. The storagePath is passed as-is to DATA_PATH.
268+
// DuckDB's ducklake extension should automatically use the s3_secret for S3 operations
269+
// when the storage path is s3://. No need to reference it in the URI.
270+
// For IRSA, the secret (without KEY_ID/SECRET) will use the default AWS credentials chain
258271
log.Info("configured S3 storage", "endpoint", cfg.Endpoint, "region", cfg.Region)
259272
}
260273

@@ -280,7 +293,9 @@ func NewLake(ctx context.Context, log *slog.Logger, catalogName, catalogURI, sto
280293
break
281294
}
282295
if i < maxRetries-1 {
283-
log.Debug("postgres not ready, retrying attach", "attempt", i+1, "error", attachErr)
296+
// Sanitize error message to prevent password leakage
297+
errorMsg := sanitizeErrorForLogging(attachErr.Error())
298+
log.Debug("postgres not ready, retrying attach", "attempt", i+1, "error", errorMsg)
284299
// Use context-aware sleep to respect cancellation
285300
timer := time.NewTimer(retryDelay)
286301
select {
@@ -410,6 +425,74 @@ func validateStorageURI(uri string) error {
410425
return fmt.Errorf("storage URI must start with file:// or s3:// (got: %q)", uri)
411426
}
412427

428+
// sanitizeErrorForLogging redacts passwords and other sensitive information from error messages.
429+
func sanitizeErrorForLogging(errMsg string) string {
430+
// Redact libpq format passwords (password=secret)
431+
if strings.Contains(errMsg, "password=") {
432+
// Handle patterns like "password=secret" or "password='secret'" in space-separated or quoted strings
433+
parts := strings.Fields(errMsg)
434+
var sanitizedParts []string
435+
for _, part := range parts {
436+
if strings.HasPrefix(part, "password=") {
437+
// Extract the password value and redact it
438+
if idx := strings.Index(part, "="); idx != -1 {
439+
value := part[idx+1:]
440+
// Remove quotes if present
441+
value = strings.Trim(value, "'\"")
442+
if value != "" {
443+
sanitizedParts = append(sanitizedParts, "password=REDACTED")
444+
} else {
445+
sanitizedParts = append(sanitizedParts, part)
446+
}
447+
} else {
448+
sanitizedParts = append(sanitizedParts, part)
449+
}
450+
} else {
451+
sanitizedParts = append(sanitizedParts, part)
452+
}
453+
}
454+
return strings.Join(sanitizedParts, " ")
455+
}
456+
// Redact postgres:// URIs with passwords
457+
// Try to find postgres URIs and redact them using the existing function
458+
if strings.Contains(errMsg, "postgres://") || strings.Contains(errMsg, "postgresql://") {
459+
// For postgres URIs, try to extract and redact them
460+
// Look for patterns like "postgres://user:pass@host" or "postgresql://user:pass@host"
461+
// This is a simple heuristic - if we find @ after the scheme, there's likely a password
462+
if strings.Contains(errMsg, "@") {
463+
// Try to find the URI boundaries and redact
464+
// Simple approach: redact anything between :// and @ that contains :
465+
// This handles "postgres://user:password@host" -> "postgres://user:REDACTED@host"
466+
// Use a more careful approach with the existing RedactedCatalogURI if possible
467+
// For now, use a simple pattern replacement
468+
replaced := errMsg
469+
// Find and replace postgres://user:pass@ patterns
470+
for _, scheme := range []string{"postgres://", "postgresql://"} {
471+
if idx := strings.Index(replaced, scheme); idx != -1 {
472+
// Find the @ after the scheme
473+
afterScheme := replaced[idx+len(scheme):]
474+
if atIdx := strings.Index(afterScheme, "@"); atIdx != -1 {
475+
// Check if there's a : before @ (indicating password)
476+
credentials := afterScheme[:atIdx]
477+
if strings.Contains(credentials, ":") {
478+
// Split on : and redact the password part
479+
credParts := strings.SplitN(credentials, ":", 2)
480+
if len(credParts) == 2 {
481+
redactedCreds := credParts[0] + ":REDACTED"
482+
replaced = replaced[:idx+len(scheme)] + redactedCreds + afterScheme[atIdx:]
483+
errMsg = replaced
484+
break // Only process first occurrence
485+
}
486+
}
487+
}
488+
}
489+
}
490+
return replaced
491+
}
492+
}
493+
return errMsg
494+
}
495+
413496
// RedactedCatalogURI redacts sensitive information from catalog URIs for logging.
414497
// It redacts passwords from postgres:// URIs and libpq connection strings.
415498
func RedactedCatalogURI(uri string) string {

0 commit comments

Comments
 (0)