|
| 1 | +//! CLI parameter capture for storing user-provided configuration, filtering out sensitive parameters. |
| 2 | +
|
| 3 | +use std::collections::HashMap; |
| 4 | + |
| 5 | +/// List of all known sensitive CLI parameters |
| 6 | +const SENSITIVE_PARAMS: &[&str] = &[ |
| 7 | + "aws-access-key-id", |
| 8 | + "aws-secret-access-key", |
| 9 | + "aws-session-token", |
| 10 | + "aws-credentials-file", |
| 11 | + "aws-endpoint", |
| 12 | + "aws-allow-http", |
| 13 | + "aws-default-region", |
| 14 | + "aws-skip-signature", |
| 15 | + "azure-endpoint", |
| 16 | + "azure-allow-http", |
| 17 | + "azure-storage-account", |
| 18 | + "azure-storage-access-key", |
| 19 | + "google-service-account", |
| 20 | + "tls-key", |
| 21 | + "tls-cert", |
| 22 | + "without-auth", |
| 23 | +]; |
| 24 | + |
| 25 | +/// List of all known non-sensitive CLI parameters - only used in test but declared at module level |
| 26 | +/// for visibility |
| 27 | +#[allow(dead_code)] |
| 28 | +const NON_SENSITIVE_PARAMS: &[&str] = &[ |
| 29 | + // Core parameters |
| 30 | + "node-id", |
| 31 | + "http-bind", |
| 32 | + "max-http-request-size", |
| 33 | + "object-store", |
| 34 | + "data-dir", |
| 35 | + "verbose", |
| 36 | + "bucket", |
| 37 | + // Memory and performance parameters |
| 38 | + "exec-mem-pool-bytes", |
| 39 | + "num-datafusion-threads", |
| 40 | + "query-file-limit", |
| 41 | + "force-snapshot-mem-threshold", |
| 42 | + // WAL parameters |
| 43 | + "wal-flush-interval", |
| 44 | + "wal-snapshot-size", |
| 45 | + "wal-max-write-buffer-size", |
| 46 | + "wal-replay-fail-on-error", |
| 47 | + "wal-replay-concurrency-limit", |
| 48 | + "snapshotted-wal-files-to-keep", |
| 49 | + // Cache parameters |
| 50 | + "parquet-mem-cache-size", |
| 51 | + "parquet-mem-cache-prune-percentage", |
| 52 | + "parquet-mem-cache-prune-interval", |
| 53 | + "parquet-mem-cache-query-path-duration", |
| 54 | + "disable-parquet-mem-cache", |
| 55 | + "table-index-cache-max-entries", |
| 56 | + "table-index-cache-concurrency-limit", |
| 57 | + // Distinct cache parameters |
| 58 | + "distinct-cache-eviction-interval", |
| 59 | + // Last cache parameters |
| 60 | + "last-cache-eviction-interval", |
| 61 | + // Retention and deletion parameters |
| 62 | + "retention-check-interval", |
| 63 | + "delete-grace-period", |
| 64 | + "hard-delete-default-duration", |
| 65 | + // Generation configuration |
| 66 | + "gen1-duration", |
| 67 | + "gen1-lookback-duration", |
| 68 | + // Logging and tracing parameters |
| 69 | + "log-filter", |
| 70 | + "log-destination", |
| 71 | + "log-format", |
| 72 | + "query-log-size", |
| 73 | + "traces-exporter", |
| 74 | + "traces-exporter-jaeger-agent-host", |
| 75 | + "traces-exporter-jaeger-agent-port", |
| 76 | + "traces-exporter-jaeger-service-name", |
| 77 | + "traces-exporter-jaeger-trace-context-header-name", |
| 78 | + "traces-jaeger-debug-name", |
| 79 | + "traces-jaeger-tags", |
| 80 | + "traces-jaeger-max-msgs-per-second", |
| 81 | + // DataFusion parameters |
| 82 | + "datafusion-config", |
| 83 | + "datafusion-use-cached-parquet-loader", |
| 84 | + "datafusion-max-parquet-fanout", |
| 85 | + "datafusion-runtime-type", |
| 86 | + "datafusion-runtime-thread-priority", |
| 87 | + "datafusion-runtime-thread-keep-alive", |
| 88 | + "datafusion-runtime-disable-lifo-slot", |
| 89 | + "datafusion-runtime-event-interval", |
| 90 | + "datafusion-runtime-global-queue-interval", |
| 91 | + "datafusion-runtime-max-io-events-per-tick", |
| 92 | + "datafusion-runtime-max-blocking-threads", |
| 93 | + // Object store parameters |
| 94 | + "object-store-cache-endpoint", |
| 95 | + "object-store-connection-limit", |
| 96 | + "object-store-http2-only", |
| 97 | + "object-store-http2-max-frame-size", |
| 98 | + "object-store-max-retries", |
| 99 | + "object-store-retry-timeout", |
| 100 | + // Feature flags and modes |
| 101 | + "disable-authz", |
| 102 | + // Telemetry |
| 103 | + "telemetry-endpoint", |
| 104 | + "disable-telemetry-upload", |
| 105 | + // TLS parameters |
| 106 | + "tls-minimum-version", |
| 107 | + // Python integration |
| 108 | + "plugin-dir", |
| 109 | + "virtual-env-location", |
| 110 | + "package-manager", |
| 111 | + // Other parameters |
| 112 | + "tcp-listener-file-path", |
| 113 | +]; |
| 114 | + |
| 115 | +const REDACTED_STR: &str = "*******"; |
| 116 | + |
| 117 | +/// Capture user-provided CLI parameters with sensitive values redacted |
| 118 | +/// |
| 119 | +/// This function takes user-provided parameters extracted from ArgMatches |
| 120 | +/// and returns a JSON string where sensitive parameters show as "*******" |
| 121 | +/// while non-sensitive parameters show their actual values. |
| 122 | +pub(super) fn capture_cli_params(user_params: HashMap<String, String>) -> String { |
| 123 | + let mut params = HashMap::new(); |
| 124 | + |
| 125 | + for (key, value) in user_params { |
| 126 | + if is_sensitive(&key) { |
| 127 | + // Sensitive params show as asterisks |
| 128 | + params.insert(key, REDACTED_STR.to_string()); |
| 129 | + } else { |
| 130 | + // Non-sensitive params show actual value |
| 131 | + params.insert(key, value); |
| 132 | + } |
| 133 | + } |
| 134 | + |
| 135 | + serde_json::to_string(¶ms).unwrap_or_else(|_| "{}".to_string()) |
| 136 | +} |
| 137 | + |
| 138 | +/// Check if a parameter name contains sensitive information |
| 139 | +fn is_sensitive(param: &str) -> bool { |
| 140 | + // First check against our known sensitive parameters list |
| 141 | + if SENSITIVE_PARAMS.contains(¶m) { |
| 142 | + return true; |
| 143 | + } |
| 144 | + |
| 145 | + // Additional substring matches for parameter patterns (safety net) |
| 146 | + const SENSITIVE_PATTERNS: &[&str] = |
| 147 | + &["password", "secret", "token", "credential", "auth", "key"]; |
| 148 | + |
| 149 | + let param_lower = param.to_lowercase(); |
| 150 | + |
| 151 | + // Check substring matches for extra safety |
| 152 | + for pattern in SENSITIVE_PATTERNS { |
| 153 | + if param_lower.contains(pattern) { |
| 154 | + return true; |
| 155 | + } |
| 156 | + } |
| 157 | + |
| 158 | + false |
| 159 | +} |
| 160 | + |
| 161 | +#[cfg(test)] |
| 162 | +mod tests { |
| 163 | + use clap::CommandFactory; |
| 164 | + use hashbrown::HashSet; |
| 165 | + |
| 166 | + use crate::commands::serve::Config; |
| 167 | + |
| 168 | + use super::*; |
| 169 | + |
| 170 | + #[test] |
| 171 | + fn test_sensitive_params_are_redacted() { |
| 172 | + let mut params = HashMap::new(); |
| 173 | + for sensitive in SENSITIVE_PARAMS { |
| 174 | + params.insert(sensitive.to_string(), "un-redacted".to_string()); |
| 175 | + } |
| 176 | + let result = capture_cli_params(params); |
| 177 | + let parsed = serde_json::from_str::<HashMap<String, String>>(&result).unwrap(); |
| 178 | + assert_eq!( |
| 179 | + parsed.len(), |
| 180 | + SENSITIVE_PARAMS.len(), |
| 181 | + "expected there to be {n} parsed entries", |
| 182 | + n = SENSITIVE_PARAMS.len() |
| 183 | + ); |
| 184 | + for sensitive in SENSITIVE_PARAMS { |
| 185 | + assert_eq!( |
| 186 | + parsed.get(*sensitive).unwrap(), |
| 187 | + REDACTED_STR, |
| 188 | + "expected {REDACTED_STR} for '{sensitive}' argument" |
| 189 | + ); |
| 190 | + } |
| 191 | + } |
| 192 | + |
| 193 | + /// Extract all argument IDs from a Command recursively |
| 194 | + fn extract_all_arg_ids(cmd: &clap::Command, args: &mut HashSet<String>) { |
| 195 | + for arg in cmd.get_arguments() { |
| 196 | + let id = arg.get_id().as_str(); |
| 197 | + |
| 198 | + // Skip help and version which are always present |
| 199 | + if id == "help" || id == "version" || id == "help-all" { |
| 200 | + continue; |
| 201 | + } |
| 202 | + |
| 203 | + // Get the display name (long form or short form or id) |
| 204 | + let display_name = if let Some(long) = arg.get_long() { |
| 205 | + long.to_string() |
| 206 | + } else if let Some(short) = arg.get_short() { |
| 207 | + format!("{}", short) |
| 208 | + } else { |
| 209 | + id.to_string() |
| 210 | + }; |
| 211 | + |
| 212 | + args.insert(display_name); |
| 213 | + } |
| 214 | + |
| 215 | + // Recursively process subcommands |
| 216 | + for subcmd in cmd.get_subcommands() { |
| 217 | + if subcmd.get_name() != "help" { |
| 218 | + extract_all_arg_ids(subcmd, args); |
| 219 | + } |
| 220 | + } |
| 221 | + } |
| 222 | + |
| 223 | + #[test] |
| 224 | + fn test_all_config_params_categorized() { |
| 225 | + // Use the module-level constants - no need to redefine them here |
| 226 | + // Get all arguments from the Config command |
| 227 | + let cmd = Config::command(); |
| 228 | + let mut discovered_args = HashSet::new(); |
| 229 | + extract_all_arg_ids( |
| 230 | + cmd.get_subcommands() |
| 231 | + .find(|c| c.get_name() == "serve") |
| 232 | + .unwrap_or(&cmd), |
| 233 | + &mut discovered_args, |
| 234 | + ); |
| 235 | + |
| 236 | + // If there are no serve subcommand, check the root |
| 237 | + if discovered_args.is_empty() { |
| 238 | + extract_all_arg_ids(&cmd, &mut discovered_args); |
| 239 | + } |
| 240 | + |
| 241 | + let mut uncategorized = Vec::new(); |
| 242 | + |
| 243 | + for arg in &discovered_args { |
| 244 | + let is_in_non_sensitive_list = NON_SENSITIVE_PARAMS.contains(&arg.as_str()); |
| 245 | + let is_in_sensitive_list = SENSITIVE_PARAMS.contains(&arg.as_str()); |
| 246 | + |
| 247 | + if !is_in_non_sensitive_list && !is_in_sensitive_list { |
| 248 | + // Check if it might be caught by substring matching in is_sensitive function |
| 249 | + if !is_sensitive(arg) { |
| 250 | + uncategorized.push(arg.clone()); |
| 251 | + } |
| 252 | + } |
| 253 | + } |
| 254 | + |
| 255 | + if !uncategorized.is_empty() { |
| 256 | + panic!( |
| 257 | + "The following CLI parameters are not categorized as either sensitive or \ |
| 258 | + non-sensitive:\n{}\n\n\ |
| 259 | + Please add them to either NON_SENSITIVE_PARAMS or SENSITIVE_PARAMS constants \ |
| 260 | + at the module level.", |
| 261 | + uncategorized.join("\n") |
| 262 | + ); |
| 263 | + } |
| 264 | + |
| 265 | + let mut needlessly_categorized = Vec::new(); |
| 266 | + |
| 267 | + for arg in NON_SENSITIVE_PARAMS.iter().chain(SENSITIVE_PARAMS) { |
| 268 | + let is_discovered = discovered_args.contains(*arg); |
| 269 | + if !is_discovered { |
| 270 | + needlessly_categorized.push(arg.to_owned()); |
| 271 | + } |
| 272 | + } |
| 273 | + |
| 274 | + if !needlessly_categorized.is_empty() { |
| 275 | + panic!( |
| 276 | + "The following CLI parameters were set as either sensitive or non-sensitive \ |
| 277 | + but were not discovered in the actual command:\n{}\n\n\ |
| 278 | + Please remove them from the NON_SENSITIVE_PARAMS or SENSITIVE_PARAMS constants.", |
| 279 | + needlessly_categorized.join("\n") |
| 280 | + ); |
| 281 | + } |
| 282 | + } |
| 283 | +} |
0 commit comments