diff --git a/acceptance/bundle/telemetry/deploy-error-message/databricks.yml b/acceptance/bundle/telemetry/deploy-error-message/databricks.yml new file mode 100644 index 0000000000..a613c03529 --- /dev/null +++ b/acceptance/bundle/telemetry/deploy-error-message/databricks.yml @@ -0,0 +1,6 @@ +bundle: + name: test-bundle + +variables: + myvar: + description: a required variable diff --git a/acceptance/bundle/telemetry/deploy-error-message/out.test.toml b/acceptance/bundle/telemetry/deploy-error-message/out.test.toml new file mode 100644 index 0000000000..d560f1de04 --- /dev/null +++ b/acceptance/bundle/telemetry/deploy-error-message/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/bundle/telemetry/deploy-error-message/output.txt b/acceptance/bundle/telemetry/deploy-error-message/output.txt new file mode 100644 index 0000000000..bb7f609143 --- /dev/null +++ b/acceptance/bundle/telemetry/deploy-error-message/output.txt @@ -0,0 +1,9 @@ + +>>> [CLI] bundle deploy +Error: no value assigned to required variable myvar. Variables are usually assigned in databricks.yml, and they can be overridden using "--var", the BUNDLE_VAR_myvar environment variable, or .databricks/bundle//variable-overrides.json + + +Exit code: 1 + +>>> cat out.requests.txt +no value assigned to required variable myvar. Variables are usually assigned in databricks.yml, and they can be overridden using "--var", the BUNDLE_VAR_myvar environment variable, or [REDACTED_REL_PATH](json) diff --git a/acceptance/bundle/telemetry/deploy-error-message/script b/acceptance/bundle/telemetry/deploy-error-message/script new file mode 100644 index 0000000000..8c5e853386 --- /dev/null +++ b/acceptance/bundle/telemetry/deploy-error-message/script @@ -0,0 +1,5 @@ +errcode trace $CLI bundle deploy + +trace cat out.requests.txt | jq -r 'select(has("path") and .path == "/telemetry-ext") | .body.protoLogs[] | fromjson | .entry.databricks_cli_log.bundle_deploy_event.error_message' + +rm out.requests.txt diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index 4613a7a211..df39cb79d0 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -208,7 +208,6 @@ func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHand return } - logDeployTelemetry(ctx, b) bundle.ApplyContext(ctx, b, scripts.Execute(config.ScriptPostDeploy)) } diff --git a/bundle/phases/telemetry.go b/bundle/phases/telemetry.go index 4584e9fc5e..5478ddb2a1 100644 --- a/bundle/phases/telemetry.go +++ b/bundle/phases/telemetry.go @@ -33,7 +33,17 @@ func getExecutionTimes(b *bundle.Bundle) []protos.IntMapEntry { return executionTimes } -func logDeployTelemetry(ctx context.Context, b *bundle.Bundle) { +// Maximum length of the error message included in telemetry. +const maxErrorMessageLength = 500 + +// LogDeployTelemetry logs a telemetry event for a bundle deploy command. +func LogDeployTelemetry(ctx context.Context, b *bundle.Bundle, errMsg string) { + errMsg = scrubForTelemetry(errMsg) + + if len(errMsg) > maxErrorMessageLength { + errMsg = errMsg[:maxErrorMessageLength] + } + resourcesCount := int64(0) _, err := dyn.MapByPattern(b.Config.Value(), dyn.NewPattern(dyn.Key("resources"), dyn.AnyKey(), dyn.AnyKey()), func(p dyn.Path, v dyn.Value) (dyn.Value, error) { resourcesCount++ @@ -149,6 +159,7 @@ func logDeployTelemetry(ctx context.Context, b *bundle.Bundle) { BundleDeployEvent: &protos.BundleDeployEvent{ BundleUuid: bundleUuid, DeploymentId: b.Metrics.DeploymentId.String(), + ErrorMessage: errMsg, ResourceCount: resourcesCount, ResourceJobCount: int64(len(b.Config.Resources.Jobs)), diff --git a/bundle/phases/telemetry_scrub.go b/bundle/phases/telemetry_scrub.go new file mode 100644 index 0000000000..a25bfbc131 --- /dev/null +++ b/bundle/phases/telemetry_scrub.go @@ -0,0 +1,153 @@ +package phases + +import ( + "path" + "regexp" + "strings" +) + +// Scrub sensitive information from error messages before sending to telemetry. +// Inspired by VS Code's telemetry path scrubbing and Sentry's @userpath pattern. +// +// Path regexes use [\s:,"'] as boundary characters to delimit where a path +// ends. While these characters are technically valid in file paths, in error +// messages they act as delimiters (e.g. "error: /path/to/file: not found", +// or "failed to read '/some/path', skipping"). This is a practical tradeoff: +// paths containing colons, commas, or quotes are extremely rare, and without +// these boundaries the regexes would over-match into surrounding message text. +// +// References: +// - VS Code: https://github.com/microsoft/vscode/blob/main/src/vs/platform/telemetry/common/telemetryUtils.ts +// - Sentry: https://github.com/getsentry/relay (PII rule: @userpath) +var ( + // Matches Windows absolute paths with at least two components + // (e.g., C:\foo\bar, D:/projects/myapp). + windowsAbsPathRegexp = regexp.MustCompile(`[A-Za-z]:[/\\][^\s:,"'/\\]+[/\\][^\s:,"']+`) + + // Matches Databricks workspace paths (/Workspace/...). + workspacePathRegexp = regexp.MustCompile(`(^|[\s:,"'])(/Workspace/[^\s:,"']+)`) + + // Matches absolute Unix paths with at least two components + // (e.g., /home/user/..., /tmp/foo, ~/.config/databricks). + absPathRegexp = regexp.MustCompile(`(^|[\s:,"'])(~?/[^\s:,"'/]+/[^\s:,"']+)`) + + // Matches relative paths: + // - Explicit: ./foo, ../foo + // - Dot-prefixed directories: .databricks/bundle/..., .cache/foo + explicitRelPathRegexp = regexp.MustCompile(`(^|[\s:,"'])((?:\.\.?|\.[a-zA-Z][^\s:,"'/]*)/[^\s:,"']+)`) + + // Matches implicit relative paths: at least two path components where + // the last component has a file extension (e.g., "resources/job.yml", + // "bundle/dev/state.json"). + implicitRelPathRegexp = regexp.MustCompile(`(^|[\s:,"'])([a-zA-Z0-9_][^\s:,"']*/[^\s:,"']*\.[a-zA-Z][^\s:,"']*)`) + + // Matches email addresses. Workspace paths in Databricks often contain + // emails (e.g., /Workspace/Users/user@example.com/.bundle/dev). + emailRegexp = regexp.MustCompile(`[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}`) +) + +// Known file extensions that are safe to retain in redacted paths. +// These help understand usage patterns without capturing sensitive information. +var knownExtensions = map[string]bool{ + // Configuration and data formats + ".yml": true, + ".yaml": true, + ".json": true, + ".toml": true, + ".cfg": true, + ".ini": true, + ".env": true, + ".xml": true, + ".properties": true, + ".conf": true, + + // Notebook and script languages + ".py": true, + ".r": true, + ".scala": true, + ".sql": true, + ".ipynb": true, + ".sh": true, + + // Web / Apps + ".js": true, + ".ts": true, + ".jsx": true, + ".tsx": true, + ".html": true, + ".css": true, + + // Terraform + ".tf": true, + ".hcl": true, + ".tfstate": true, + ".tfvars": true, + + // Build artifacts and archives + ".whl": true, + ".jar": true, + ".egg": true, + ".zip": true, + ".tar": true, + ".gz": true, + ".tgz": true, + ".dbc": true, + + // Data formats + ".txt": true, + ".csv": true, + ".md": true, + ".parquet": true, + ".avro": true, + + // Logs and locks + ".log": true, + ".lock": true, + + // Certificates and keys + ".pem": true, + ".crt": true, +} + +// scrubForTelemetry is a best-effort scrubber that removes sensitive path and +// PII information from error messages before they are sent to telemetry. +// The error message is treated as PII by the logging infrastructure but we +// scrub to avoid collecting more information than necessary. +func scrubForTelemetry(msg string) string { + // Redact absolute paths. + msg = replacePathRegexp(msg, windowsAbsPathRegexp, "[REDACTED_PATH]", false) + msg = replacePathRegexp(msg, workspacePathRegexp, "[REDACTED_WORKSPACE_PATH]", true) + msg = replacePathRegexp(msg, absPathRegexp, "[REDACTED_PATH]", true) + + // Redact relative paths. + msg = replacePathRegexp(msg, explicitRelPathRegexp, "[REDACTED_REL_PATH]", true) + msg = replacePathRegexp(msg, implicitRelPathRegexp, "[REDACTED_REL_PATH]", true) + + // Redact email addresses. + msg = emailRegexp.ReplaceAllString(msg, "[REDACTED_EMAIL]") + + return msg +} + +// replacePathRegexp replaces path matches with the given label, retaining +// known file extensions. When hasDelimiterGroup is true, the first character +// of the match is preserved as a delimiter prefix. +func replacePathRegexp(msg string, re *regexp.Regexp, label string, hasDelimiterGroup bool) string { + return re.ReplaceAllStringFunc(msg, func(match string) string { + prefix := "" + p := match + if hasDelimiterGroup && len(match) > 0 { + first := match[0] + if strings.ContainsRune(" \t\n:,\"'", rune(first)) { + prefix = match[:1] + p = match[1:] + } + } + + ext := path.Ext(p) + if knownExtensions[ext] { + return prefix + label + "(" + ext[1:] + ")" + } + return prefix + label + }) +} diff --git a/bundle/phases/telemetry_scrub_test.go b/bundle/phases/telemetry_scrub_test.go new file mode 100644 index 0000000000..b8bd2b73cc --- /dev/null +++ b/bundle/phases/telemetry_scrub_test.go @@ -0,0 +1,213 @@ +package phases + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestScrubForTelemetry_AbsolutePaths(t *testing.T) { + tests := []struct { + name string + msg string + expected string + }{ + { + name: "tmp path with known extension", + msg: "failed to write /tmp/bundle-xyz/state.json", + expected: "failed to write [REDACTED_PATH](json)", + }, + { + name: "var folders path without extension", + msg: "error reading /var/folders/7t/n_tz6x9d4xj91h48pf8md5zh0000gp/T/test123/file", + expected: "error reading [REDACTED_PATH]", + }, + { + name: "etc path with known extension", + msg: "config at /etc/databricks/config.json not found", + expected: "config at [REDACTED_PATH](json) not found", + }, + { + name: "macOS home path with known extension", + msg: "failed to read /Users/otheruser/some-project/file.yml", + expected: "failed to read [REDACTED_PATH](yml)", + }, + { + name: "Linux home path with known extension", + msg: "failed to read /home/runner/work/project/file.yml", + expected: "failed to read [REDACTED_PATH](yml)", + }, + { + name: "absolute path after colon delimiter", + msg: "error: /Users/jane/project/a.yml: not found, try again", + expected: "error: [REDACTED_PATH](yml): not found, try again", + }, + { + name: "multiple absolute paths", + msg: "path /home/user/project/a.yml and /home/user/project/b.yml", + expected: "path [REDACTED_PATH](yml) and [REDACTED_PATH](yml)", + }, + { + name: "Windows path with backslashes", + msg: `error at C:\Users\shreyas\project\file.yml`, + expected: "error at [REDACTED_PATH](yml)", + }, + { + name: "Windows path with forward slashes", + msg: "error at C:/Users/shreyas/project/file.yml", + expected: "error at [REDACTED_PATH](yml)", + }, + { + name: "Windows path with lowercase drive letter", + msg: `error at c:\Users\shreyas\project\file.yml`, + expected: "error at [REDACTED_PATH](yml)", + }, + { + name: "volume path with known extension", + msg: "artifact at /Volumes/catalog/schema/volume/artifact.whl", + expected: "artifact at [REDACTED_PATH](whl)", + }, + { + name: "dbfs path with known extension", + msg: "state at /dbfs/mnt/data/state.json", + expected: "state at [REDACTED_PATH](json)", + }, + { + name: "path with unknown extension", + msg: "error at /home/user/project/file.xyz", + expected: "error at [REDACTED_PATH]", + }, + { + name: "tilde home path with known extension", + msg: "reading ~/.databricks/config.json failed", + expected: "reading [REDACTED_PATH](json) failed", + }, + { + name: "single component path is not matched", + msg: "POST /telemetry-ext failed", + expected: "POST /telemetry-ext failed", + }, + { + name: "empty message", + msg: "", + expected: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.expected, scrubForTelemetry(tt.msg)) + }) + } +} + +func TestScrubForTelemetry_WorkspacePaths(t *testing.T) { + tests := []struct { + name string + msg string + expected string + }{ + { + name: "workspace user path without extension", + msg: "uploading to /Workspace/Users/dev/.bundle/files", + expected: "uploading to [REDACTED_WORKSPACE_PATH]", + }, + { + name: "workspace path with email", + msg: "error at /Workspace/Users/user@example.com/.bundle/dev", + expected: "error at [REDACTED_WORKSPACE_PATH]", + }, + { + name: "workspace shared path without extension", + msg: "cannot access /Workspace/Shared/project/notebook", + expected: "cannot access [REDACTED_WORKSPACE_PATH]", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.expected, scrubForTelemetry(tt.msg)) + }) + } +} + +func TestScrubForTelemetry_RelativePaths(t *testing.T) { + tests := []struct { + name string + msg string + expected string + }{ + { + name: "explicit relative path with ./", + msg: "failed to load ./resources/job.yml", + expected: "failed to load [REDACTED_REL_PATH](yml)", + }, + { + name: "explicit relative path with ../", + msg: "path ../parent/file.yml not allowed", + expected: "path [REDACTED_REL_PATH](yml) not allowed", + }, + { + name: "implicit relative path with extension", + msg: "failed to read resources/pipeline.yml: not found", + expected: "failed to read [REDACTED_REL_PATH](yml): not found", + }, + { + name: "dot-prefixed directory path with extension", + msg: "error reading .databricks/bundle/dev/variable-overrides.json", + expected: "error reading [REDACTED_REL_PATH](json)", + }, + { + name: "single filename without path separator is preserved", + msg: "failed to load databricks.yml", + expected: "failed to load databricks.yml", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.expected, scrubForTelemetry(tt.msg)) + }) + } +} + +func TestScrubForTelemetry_Emails(t *testing.T) { + tests := []struct { + name string + msg string + expected string + }{ + { + name: "email in message", + msg: "access denied for user@example.com in workspace", + expected: "access denied for [REDACTED_EMAIL] in workspace", + }, + { + name: "no email present", + msg: "some error without emails", + expected: "some error without emails", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.expected, scrubForTelemetry(tt.msg)) + }) + } +} + +func TestScrubForTelemetry_Combined(t *testing.T) { + msg := "failed to load /Users/shreyas/myproject/databricks.yml: " + + "workspace /Workspace/Users/shreyas@databricks.com/.bundle is invalid, " + + "also tried /home/other/fallback/config.yml, " + + "temp at /tmp/bundle-cache/state.json, " + + "see .databricks/bundle/dev/variable-overrides.json" + + expected := "failed to load [REDACTED_PATH](yml): " + + "workspace [REDACTED_WORKSPACE_PATH] is invalid, " + + "also tried [REDACTED_PATH](yml), " + + "temp at [REDACTED_PATH](json), " + + "see [REDACTED_REL_PATH](json)" + + assert.Equal(t, expected, scrubForTelemetry(msg)) +} diff --git a/cmd/bundle/utils/process.go b/cmd/bundle/utils/process.go index 8390548912..30e0beadde 100644 --- a/cmd/bundle/utils/process.go +++ b/cmd/bundle/utils/process.go @@ -80,7 +80,7 @@ func ProcessBundle(cmd *cobra.Command, opts ProcessOptions) (*bundle.Bundle, err return b, err } -func ProcessBundleRet(cmd *cobra.Command, opts ProcessOptions) (*bundle.Bundle, *statemgmt.StateDesc, error) { +func ProcessBundleRet(cmd *cobra.Command, opts ProcessOptions) (b *bundle.Bundle, stateDesc *statemgmt.StateDesc, retErr error) { var err error ctx := cmd.Context() if opts.SkipInitContext { @@ -93,7 +93,24 @@ func ProcessBundleRet(cmd *cobra.Command, opts ProcessOptions) (*bundle.Bundle, } // Load bundle config and apply target - b := root.MustConfigureBundle(cmd) + b = root.MustConfigureBundle(cmd) + + // Log deploy telemetry on all exit paths. This is a defer to ensure + // telemetry is logged even when the deploy command fails, for both + // diagnostic errors and regular Go errors. + if opts.Deploy { + defer func() { + if b == nil { + return + } + errMsg := logdiag.GetFirstErrorSummary(ctx) + if errMsg == "" && retErr != nil && !errors.Is(retErr, root.ErrAlreadyPrinted) { + errMsg = retErr.Error() + } + phases.LogDeployTelemetry(ctx, b, errMsg) + }() + } + if logdiag.HasError(ctx) { return b, nil, root.ErrAlreadyPrinted } @@ -147,8 +164,6 @@ func ProcessBundleRet(cmd *cobra.Command, opts ProcessOptions) (*bundle.Bundle, } } - var stateDesc *statemgmt.StateDesc - shouldReadState := opts.ReadState || opts.AlwaysPull || opts.InitIDs || opts.ErrorOnEmptyState || opts.PreDeployChecks || opts.Deploy || opts.ReadPlanPath != "" if shouldReadState { diff --git a/cmd/root/root.go b/cmd/root/root.go index e29d6df83c..6b6de2a9ba 100644 --- a/cmd/root/root.go +++ b/cmd/root/root.go @@ -179,15 +179,6 @@ Stack Trace: commandStr := commandString(cmd) ctx = cmd.Context() - // Log bundle deploy failures. Only log if we have successfully configured - // an authenticated Databricks client. We cannot log unauthenticated telemetry - // from the CLI yet. - if cmdctx.HasConfigUsed(ctx) && commandStr == "bundle_deploy" && exitCode != 0 { - telemetry.Log(ctx, protos.DatabricksCliLog{ - BundleDeployEvent: &protos.BundleDeployEvent{}, - }) - } - telemetryErr := telemetry.Upload(cmd.Context(), protos.ExecutionContext{ CmdExecID: cmdctx.ExecId(ctx), Version: build.GetInfo().Version, diff --git a/libs/logdiag/logdiag.go b/libs/logdiag/logdiag.go index e466576c87..896d105751 100644 --- a/libs/logdiag/logdiag.go +++ b/libs/logdiag/logdiag.go @@ -30,6 +30,9 @@ type LogDiagData struct { // If Collect is true, diagnostics are appended to Collected. Use SetCollected() to set. Collect bool Collected []diag.Diagnostic + + // Summary of the first error diagnostic logged, if any. + FirstErrorSummary string } // IsSetup returns whether InitContext() was already called. @@ -117,6 +120,16 @@ func FlushCollected(ctx context.Context) diag.Diagnostics { return result } +// GetFirstErrorSummary returns the summary of the first error diagnostic +// logged, or an empty string if no errors have been logged. +func GetFirstErrorSummary(ctx context.Context) string { + val := read(ctx) + val.mu.Lock() + defer val.mu.Unlock() + + return val.FirstErrorSummary +} + func LogDiag(ctx context.Context, d diag.Diagnostic) { val := read(ctx) val.mu.Lock() @@ -125,6 +138,9 @@ func LogDiag(ctx context.Context, d diag.Diagnostic) { switch d.Severity { case diag.Error: val.Errors += 1 + if val.FirstErrorSummary == "" { + val.FirstErrorSummary = d.Summary + } case diag.Warning: val.Warnings += 1 case diag.Recommendation: diff --git a/libs/telemetry/logger.go b/libs/telemetry/logger.go index c294fee17b..70132f69f0 100644 --- a/libs/telemetry/logger.go +++ b/libs/telemetry/logger.go @@ -76,6 +76,11 @@ func Upload(ctx context.Context, ec protos.ExecutionContext) error { protoLogs[i] = string(b) } + if !cmdctx.HasConfigUsed(ctx) { + log.Debugf(ctx, "no auth config available; skipping telemetry upload") + return nil + } + apiClient, err := client.New(cmdctx.ConfigUsed(ctx)) if err != nil { return fmt.Errorf("failed to create API client: %w", err) diff --git a/libs/telemetry/protos/bundle_deploy.go b/libs/telemetry/protos/bundle_deploy.go index ab1b3a46de..d9439437d9 100644 --- a/libs/telemetry/protos/bundle_deploy.go +++ b/libs/telemetry/protos/bundle_deploy.go @@ -7,6 +7,9 @@ type BundleDeployEvent struct { // UUID associated with the deployment. DeploymentId string `json:"deployment_id,omitempty"` + // Error message encountered during the bundle deploy command, if any. + ErrorMessage string `json:"error_message,omitempty"` + ResourceCount int64 `json:"resource_count"` ResourceJobCount int64 `json:"resource_job_count"` ResourcePipelineCount int64 `json:"resource_pipeline_count"`