Skip to content

Commit 3f7dcb3

Browse files
authored
chore: port influxdb_pro#1163 to Core (#26735)
1 parent b29c0cc commit 3f7dcb3

File tree

13 files changed

+666
-8
lines changed

13 files changed

+666
-8
lines changed

influxdb3/src/commands/serve.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ use object_store_metrics::ObjectStoreMetrics;
5757
use observability_deps::tracing::*;
5858
use panic_logging::SendPanicsToTracing;
5959
use parquet_file::storage::{ParquetStorage, StorageId};
60+
use std::collections::HashMap;
6061
use std::{env, num::NonZeroUsize, sync::Arc, time::Duration};
6162
use std::{path::Path, str::FromStr};
6263
use std::{path::PathBuf, process::Command};
@@ -88,6 +89,8 @@ pub const DEFAULT_ADMIN_TOKEN_RECOVERY_BIND_ADDR: &str = "127.0.0.1:8182";
8889

8990
pub const DEFAULT_TELEMETRY_ENDPOINT: &str = "https://telemetry.v3.influxdata.com";
9091

92+
mod cli_params;
93+
9194
#[derive(Debug, Error)]
9295
pub enum Error {
9396
#[error("Cannot parse object store config: {0}")]
@@ -656,7 +659,7 @@ fn ensure_directory_exists(p: &Path) {
656659
}
657660
}
658661

659-
pub async fn command(config: Config) -> Result<()> {
662+
pub async fn command(config: Config, user_params: HashMap<String, String>) -> Result<()> {
660663
// Check that both a cert file and key file are present if TLS is being set up
661664
match (&config.cert_file, &config.key_file) {
662665
(Some(_), None) | (None, Some(_)) => {
@@ -890,12 +893,16 @@ pub async fn command(config: Config) -> Result<()> {
890893
.await
891894
});
892895

896+
// Capture and filter CLI parameters
897+
let cli_params = cli_params::capture_cli_params(user_params);
898+
893899
let _ = catalog
894900
.register_node(
895901
&config.node_identifier_prefix,
896902
num_cpus as u64,
897903
vec![influxdb3_catalog::log::NodeMode::Core],
898904
process_uuid_getter,
905+
Some(cli_params),
899906
)
900907
.await
901908
.map_err(Error::InitializeCatalog)?;
Lines changed: 283 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,283 @@
1+
//! CLI parameter capture for storing user-provided configuration, filtering out sensitive parameters.
2+
3+
use std::collections::HashMap;
4+
5+
/// List of all known sensitive CLI parameters
6+
const SENSITIVE_PARAMS: &[&str] = &[
7+
"aws-access-key-id",
8+
"aws-secret-access-key",
9+
"aws-session-token",
10+
"aws-credentials-file",
11+
"aws-endpoint",
12+
"aws-allow-http",
13+
"aws-default-region",
14+
"aws-skip-signature",
15+
"azure-endpoint",
16+
"azure-allow-http",
17+
"azure-storage-account",
18+
"azure-storage-access-key",
19+
"google-service-account",
20+
"tls-key",
21+
"tls-cert",
22+
"without-auth",
23+
];
24+
25+
/// List of all known non-sensitive CLI parameters - only used in test but declared at module level
26+
/// for visibility
27+
#[allow(dead_code)]
28+
const NON_SENSITIVE_PARAMS: &[&str] = &[
29+
// Core parameters
30+
"node-id",
31+
"http-bind",
32+
"max-http-request-size",
33+
"object-store",
34+
"data-dir",
35+
"verbose",
36+
"bucket",
37+
// Memory and performance parameters
38+
"exec-mem-pool-bytes",
39+
"num-datafusion-threads",
40+
"query-file-limit",
41+
"force-snapshot-mem-threshold",
42+
// WAL parameters
43+
"wal-flush-interval",
44+
"wal-snapshot-size",
45+
"wal-max-write-buffer-size",
46+
"wal-replay-fail-on-error",
47+
"wal-replay-concurrency-limit",
48+
"snapshotted-wal-files-to-keep",
49+
// Cache parameters
50+
"parquet-mem-cache-size",
51+
"parquet-mem-cache-prune-percentage",
52+
"parquet-mem-cache-prune-interval",
53+
"parquet-mem-cache-query-path-duration",
54+
"disable-parquet-mem-cache",
55+
"table-index-cache-max-entries",
56+
"table-index-cache-concurrency-limit",
57+
// Distinct cache parameters
58+
"distinct-cache-eviction-interval",
59+
// Last cache parameters
60+
"last-cache-eviction-interval",
61+
// Retention and deletion parameters
62+
"retention-check-interval",
63+
"delete-grace-period",
64+
"hard-delete-default-duration",
65+
// Generation configuration
66+
"gen1-duration",
67+
"gen1-lookback-duration",
68+
// Logging and tracing parameters
69+
"log-filter",
70+
"log-destination",
71+
"log-format",
72+
"query-log-size",
73+
"traces-exporter",
74+
"traces-exporter-jaeger-agent-host",
75+
"traces-exporter-jaeger-agent-port",
76+
"traces-exporter-jaeger-service-name",
77+
"traces-exporter-jaeger-trace-context-header-name",
78+
"traces-jaeger-debug-name",
79+
"traces-jaeger-tags",
80+
"traces-jaeger-max-msgs-per-second",
81+
// DataFusion parameters
82+
"datafusion-config",
83+
"datafusion-use-cached-parquet-loader",
84+
"datafusion-max-parquet-fanout",
85+
"datafusion-runtime-type",
86+
"datafusion-runtime-thread-priority",
87+
"datafusion-runtime-thread-keep-alive",
88+
"datafusion-runtime-disable-lifo-slot",
89+
"datafusion-runtime-event-interval",
90+
"datafusion-runtime-global-queue-interval",
91+
"datafusion-runtime-max-io-events-per-tick",
92+
"datafusion-runtime-max-blocking-threads",
93+
// Object store parameters
94+
"object-store-cache-endpoint",
95+
"object-store-connection-limit",
96+
"object-store-http2-only",
97+
"object-store-http2-max-frame-size",
98+
"object-store-max-retries",
99+
"object-store-retry-timeout",
100+
// Feature flags and modes
101+
"disable-authz",
102+
// Telemetry
103+
"telemetry-endpoint",
104+
"disable-telemetry-upload",
105+
// TLS parameters
106+
"tls-minimum-version",
107+
// Python integration
108+
"plugin-dir",
109+
"virtual-env-location",
110+
"package-manager",
111+
// Other parameters
112+
"tcp-listener-file-path",
113+
];
114+
115+
const REDACTED_STR: &str = "*******";
116+
117+
/// Capture user-provided CLI parameters with sensitive values redacted
118+
///
119+
/// This function takes user-provided parameters extracted from ArgMatches
120+
/// and returns a JSON string where sensitive parameters show as "*******"
121+
/// while non-sensitive parameters show their actual values.
122+
pub(super) fn capture_cli_params(user_params: HashMap<String, String>) -> String {
123+
let mut params = HashMap::new();
124+
125+
for (key, value) in user_params {
126+
if is_sensitive(&key) {
127+
// Sensitive params show as asterisks
128+
params.insert(key, REDACTED_STR.to_string());
129+
} else {
130+
// Non-sensitive params show actual value
131+
params.insert(key, value);
132+
}
133+
}
134+
135+
serde_json::to_string(&params).unwrap_or_else(|_| "{}".to_string())
136+
}
137+
138+
/// Check if a parameter name contains sensitive information
139+
fn is_sensitive(param: &str) -> bool {
140+
// First check against our known sensitive parameters list
141+
if SENSITIVE_PARAMS.contains(&param) {
142+
return true;
143+
}
144+
145+
// Additional substring matches for parameter patterns (safety net)
146+
const SENSITIVE_PATTERNS: &[&str] =
147+
&["password", "secret", "token", "credential", "auth", "key"];
148+
149+
let param_lower = param.to_lowercase();
150+
151+
// Check substring matches for extra safety
152+
for pattern in SENSITIVE_PATTERNS {
153+
if param_lower.contains(pattern) {
154+
return true;
155+
}
156+
}
157+
158+
false
159+
}
160+
161+
#[cfg(test)]
162+
mod tests {
163+
use clap::CommandFactory;
164+
use hashbrown::HashSet;
165+
166+
use crate::commands::serve::Config;
167+
168+
use super::*;
169+
170+
#[test]
171+
fn test_sensitive_params_are_redacted() {
172+
let mut params = HashMap::new();
173+
for sensitive in SENSITIVE_PARAMS {
174+
params.insert(sensitive.to_string(), "un-redacted".to_string());
175+
}
176+
let result = capture_cli_params(params);
177+
let parsed = serde_json::from_str::<HashMap<String, String>>(&result).unwrap();
178+
assert_eq!(
179+
parsed.len(),
180+
SENSITIVE_PARAMS.len(),
181+
"expected there to be {n} parsed entries",
182+
n = SENSITIVE_PARAMS.len()
183+
);
184+
for sensitive in SENSITIVE_PARAMS {
185+
assert_eq!(
186+
parsed.get(*sensitive).unwrap(),
187+
REDACTED_STR,
188+
"expected {REDACTED_STR} for '{sensitive}' argument"
189+
);
190+
}
191+
}
192+
193+
/// Extract all argument IDs from a Command recursively
194+
fn extract_all_arg_ids(cmd: &clap::Command, args: &mut HashSet<String>) {
195+
for arg in cmd.get_arguments() {
196+
let id = arg.get_id().as_str();
197+
198+
// Skip help and version which are always present
199+
if id == "help" || id == "version" || id == "help-all" {
200+
continue;
201+
}
202+
203+
// Get the display name (long form or short form or id)
204+
let display_name = if let Some(long) = arg.get_long() {
205+
long.to_string()
206+
} else if let Some(short) = arg.get_short() {
207+
format!("{}", short)
208+
} else {
209+
id.to_string()
210+
};
211+
212+
args.insert(display_name);
213+
}
214+
215+
// Recursively process subcommands
216+
for subcmd in cmd.get_subcommands() {
217+
if subcmd.get_name() != "help" {
218+
extract_all_arg_ids(subcmd, args);
219+
}
220+
}
221+
}
222+
223+
#[test]
224+
fn test_all_config_params_categorized() {
225+
// Use the module-level constants - no need to redefine them here
226+
// Get all arguments from the Config command
227+
let cmd = Config::command();
228+
let mut discovered_args = HashSet::new();
229+
extract_all_arg_ids(
230+
cmd.get_subcommands()
231+
.find(|c| c.get_name() == "serve")
232+
.unwrap_or(&cmd),
233+
&mut discovered_args,
234+
);
235+
236+
// If there are no serve subcommand, check the root
237+
if discovered_args.is_empty() {
238+
extract_all_arg_ids(&cmd, &mut discovered_args);
239+
}
240+
241+
let mut uncategorized = Vec::new();
242+
243+
for arg in &discovered_args {
244+
let is_in_non_sensitive_list = NON_SENSITIVE_PARAMS.contains(&arg.as_str());
245+
let is_in_sensitive_list = SENSITIVE_PARAMS.contains(&arg.as_str());
246+
247+
if !is_in_non_sensitive_list && !is_in_sensitive_list {
248+
// Check if it might be caught by substring matching in is_sensitive function
249+
if !is_sensitive(arg) {
250+
uncategorized.push(arg.clone());
251+
}
252+
}
253+
}
254+
255+
if !uncategorized.is_empty() {
256+
panic!(
257+
"The following CLI parameters are not categorized as either sensitive or \
258+
non-sensitive:\n{}\n\n\
259+
Please add them to either NON_SENSITIVE_PARAMS or SENSITIVE_PARAMS constants \
260+
at the module level.",
261+
uncategorized.join("\n")
262+
);
263+
}
264+
265+
let mut needlessly_categorized = Vec::new();
266+
267+
for arg in NON_SENSITIVE_PARAMS.iter().chain(SENSITIVE_PARAMS) {
268+
let is_discovered = discovered_args.contains(*arg);
269+
if !is_discovered {
270+
needlessly_categorized.push(arg.to_owned());
271+
}
272+
}
273+
274+
if !needlessly_categorized.is_empty() {
275+
panic!(
276+
"The following CLI parameters were set as either sensitive or non-sensitive \
277+
but were not discovered in the actual command:\n{}\n\n\
278+
Please remove them from the NON_SENSITIVE_PARAMS or SENSITIVE_PARAMS constants.",
279+
needlessly_categorized.join("\n")
280+
);
281+
}
282+
}
283+
}

0 commit comments

Comments
 (0)