diff --git a/Cargo.lock b/Cargo.lock index d7b07545..74fcd561 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -66,7 +66,7 @@ checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] name = "codegraph-core" -version = "3.7.0" +version = "3.9.0" dependencies = [ "ignore", "napi", diff --git a/crates/codegraph-core/src/barrel_resolution.rs b/crates/codegraph-core/src/barrel_resolution.rs new file mode 100644 index 00000000..ad7e48f0 --- /dev/null +++ b/crates/codegraph-core/src/barrel_resolution.rs @@ -0,0 +1,191 @@ +//! Shared barrel-file resolution logic. +//! +//! Both `edge_builder.rs` (napi-driven) and `import_edges.rs` (SQLite-driven) +//! need to recursively resolve a symbol through barrel reexport chains. +//! This module extracts the common algorithm so both callers share a single +//! implementation. + +use std::collections::HashSet; + +/// Minimal view of a single reexport entry, borrowed from the caller's data. +pub struct ReexportRef<'a> { + pub source: &'a str, + pub names: &'a [String], + pub wildcard_reexport: bool, +} + +/// Trait that abstracts over the different context types in `edge_builder` and +/// `import_edges`. Each implementor provides access to its own reexport map +/// and definition index so the resolution algorithm stays generic. +pub trait BarrelContext { + /// Return the reexport entries for `barrel_path`, or `None` if the path + /// has no reexports. + fn reexports_for(&self, barrel_path: &str) -> Option>>; + + /// Return `true` if `file_path` contains a definition named `symbol`. + fn has_definition(&self, file_path: &str, symbol: &str) -> bool; +} + +/// Recursively resolve a symbol through barrel reexport chains. +/// +/// Mirrors `resolveBarrelExport()` in `resolve-imports.ts`. +/// The caller provides a `visited` set to prevent infinite loops on circular +/// reexport chains. +pub fn resolve_barrel_export<'a, C: BarrelContext>( + ctx: &'a C, + barrel_path: &str, + symbol_name: &str, + visited: &mut HashSet, +) -> Option { + if visited.contains(barrel_path) { + return None; + } + visited.insert(barrel_path.to_string()); + + let reexports = ctx.reexports_for(barrel_path)?; + + for re in &reexports { + // Named reexports (non-wildcard) + if !re.names.is_empty() && !re.wildcard_reexport { + if re.names.iter().any(|n| n == symbol_name) { + if ctx.has_definition(re.source, symbol_name) { + return Some(re.source.to_string()); + } + let deeper = resolve_barrel_export(ctx, re.source, symbol_name, visited); + if deeper.is_some() { + return deeper; + } + // Fallback: return source even if no definition found + return Some(re.source.to_string()); + } + continue; + } + + // Wildcard or empty-names reexports + if re.wildcard_reexport || re.names.is_empty() { + if ctx.has_definition(re.source, symbol_name) { + return Some(re.source.to_string()); + } + let deeper = resolve_barrel_export(ctx, re.source, symbol_name, visited); + if deeper.is_some() { + return deeper; + } + } + } + + None +} + +#[cfg(test)] +mod tests { + use super::*; + use std::collections::HashMap; + + struct TestContext { + reexports: HashMap, bool)>>, + definitions: HashMap>, + } + + impl BarrelContext for TestContext { + fn reexports_for(&self, barrel_path: &str) -> Option>> { + self.reexports.get(barrel_path).map(|entries| { + entries + .iter() + .map(|(source, names, wildcard)| ReexportRef { + source: source.as_str(), + names: names.as_slice(), + wildcard_reexport: *wildcard, + }) + .collect() + }) + } + + fn has_definition(&self, file_path: &str, symbol: &str) -> bool { + self.definitions + .get(file_path) + .map_or(false, |defs| defs.contains(symbol)) + } + } + + #[test] + fn resolves_named_reexport() { + let mut reexports = HashMap::new(); + reexports.insert( + "src/index.ts".to_string(), + vec![("src/utils.ts".to_string(), vec!["foo".to_string()], false)], + ); + let mut definitions = HashMap::new(); + definitions.insert( + "src/utils.ts".to_string(), + HashSet::from(["foo".to_string()]), + ); + + let ctx = TestContext { reexports, definitions }; + let mut visited = HashSet::new(); + let result = resolve_barrel_export(&ctx, "src/index.ts", "foo", &mut visited); + assert_eq!(result.as_deref(), Some("src/utils.ts")); + } + + #[test] + fn resolves_wildcard_reexport() { + let mut reexports = HashMap::new(); + reexports.insert( + "src/index.ts".to_string(), + vec![("src/utils.ts".to_string(), vec![], true)], + ); + let mut definitions = HashMap::new(); + definitions.insert( + "src/utils.ts".to_string(), + HashSet::from(["bar".to_string()]), + ); + + let ctx = TestContext { reexports, definitions }; + let mut visited = HashSet::new(); + let result = resolve_barrel_export(&ctx, "src/index.ts", "bar", &mut visited); + assert_eq!(result.as_deref(), Some("src/utils.ts")); + } + + #[test] + fn resolves_transitive_chain() { + let mut reexports = HashMap::new(); + reexports.insert( + "src/index.ts".to_string(), + vec![("src/mid.ts".to_string(), vec![], true)], + ); + reexports.insert( + "src/mid.ts".to_string(), + vec![("src/deep.ts".to_string(), vec!["baz".to_string()], false)], + ); + let mut definitions = HashMap::new(); + definitions.insert( + "src/deep.ts".to_string(), + HashSet::from(["baz".to_string()]), + ); + + let ctx = TestContext { reexports, definitions }; + let mut visited = HashSet::new(); + let result = resolve_barrel_export(&ctx, "src/index.ts", "baz", &mut visited); + assert_eq!(result.as_deref(), Some("src/deep.ts")); + } + + #[test] + fn prevents_circular_reexport() { + let mut reexports = HashMap::new(); + reexports.insert( + "src/a.ts".to_string(), + vec![("src/b.ts".to_string(), vec![], true)], + ); + reexports.insert( + "src/b.ts".to_string(), + vec![("src/a.ts".to_string(), vec![], true)], + ); + + let ctx = TestContext { + reexports, + definitions: HashMap::new(), + }; + let mut visited = HashSet::new(); + let result = resolve_barrel_export(&ctx, "src/a.ts", "missing", &mut visited); + assert_eq!(result, None); + } +} diff --git a/crates/codegraph-core/src/build_pipeline.rs b/crates/codegraph-core/src/build_pipeline.rs index b565fe57..964e4356 100644 --- a/crates/codegraph-core/src/build_pipeline.rs +++ b/crates/codegraph-core/src/build_pipeline.rs @@ -18,6 +18,7 @@ use crate::change_detection; use crate::config::{BuildConfig, BuildOpts, BuildPathAliases}; +use crate::constants::{FAST_PATH_MAX_CHANGED_FILES, FAST_PATH_MIN_EXISTING_FILES}; use crate::file_collector; use crate::import_edges::{self, ImportEdgeContext}; use crate::import_resolution; @@ -116,58 +117,7 @@ pub fn run_pipeline( // ── Stage 2: Collect files ───────────────────────────────────────── let t0 = Instant::now(); - let collect_result = if let Some(ref scope) = opts.scope { - // Scoped rebuild - let files: Vec = scope - .iter() - .map(|f| { - let abs = Path::new(root_dir).join(normalize_path(f)); - abs.to_str().unwrap_or("").to_string() - }) - .filter(|f| Path::new(f).exists()) - .collect(); - file_collector::CollectResult { - directories: files - .iter() - .filter_map(|f| { - Path::new(f) - .parent() - .map(|p| p.to_str().unwrap_or("").to_string()) - }) - .collect(), - files, - } - } else if incremental && !force_full_rebuild { - // Try fast collect from DB + journal - let journal = journal::read_journal(root_dir); - let has_entries = - journal.valid && (!journal.changed.is_empty() || !journal.removed.is_empty()); - - if has_entries { - let db_files: Vec = conn - .prepare("SELECT file FROM file_hashes") - .and_then(|mut stmt| { - stmt.query_map([], |row| row.get::<_, String>(0)) - .map(|rows| rows.filter_map(|r| r.ok()).collect()) - }) - .unwrap_or_default(); - - if !db_files.is_empty() { - file_collector::try_fast_collect( - root_dir, - &db_files, - &journal.changed, - &journal.removed, - ) - } else { - file_collector::collect_files(root_dir, &config.ignore_dirs) - } - } else { - file_collector::collect_files(root_dir, &config.ignore_dirs) - } - } else { - file_collector::collect_files(root_dir, &config.ignore_dirs) - }; + let collect_result = collect_source_files(conn, root_dir, &config, &opts, incremental, force_full_rebuild); timing.collect_ms = t0.elapsed().as_secs_f64() * 1000.0; // ── Stage 3: Detect changes ──────────────────────────────────────── @@ -322,120 +272,11 @@ pub fn run_pipeline( timing.resolve_ms = t0.elapsed().as_secs_f64() * 1000.0; // ── Stage 6b: Re-parse barrel candidates (incremental only) ───────── - // Mirrors JS pipeline's findBarrelCandidates + reparseBarrelFiles. - // For incremental builds, barrel files (re-export-only index files) may - // not be in file_symbols because they weren't changed or reverse-deps. - // Without their symbols, barrel resolution in Stage 7 can't create the - // transitive import edges (e.g. app.js -> math.js through index.js). if !change_result.is_full_build { - // Find all barrel files from DB (files that have 'reexports' edges) - let barrel_files_in_db: HashSet = { - let rows: Vec = match conn.prepare( - "SELECT DISTINCT n1.file FROM edges e \ - JOIN nodes n1 ON e.source_id = n1.id \ - WHERE e.kind = 'reexports' AND n1.kind = 'file'", - ) { - Ok(mut stmt) => match stmt.query_map([], |row| row.get::<_, String>(0)) { - Ok(mapped) => mapped.filter_map(|r| r.ok()).collect(), - Err(_) => Vec::new(), - }, - Err(_) => Vec::new(), - }; - rows.into_iter().collect() - }; - - // Check which barrels are imported by parsed files but not in file_symbols - let mut barrel_paths_to_parse: Vec = Vec::new(); - for (_rel_path, symbols) in &file_symbols { - for imp in &symbols.imports { - // Look up resolved path from batch_resolved - let abs_file = Path::new(root_dir).join(_rel_path); - let fwd = abs_file.to_str().unwrap_or("").replace('\\', "/"); - let key = format!("{}|{}", fwd, imp.source); - if let Some(resolved) = batch_resolved.get(&key) { - if barrel_files_in_db.contains(resolved) && !file_symbols.contains_key(resolved) - { - let abs = Path::new(root_dir).join(resolved); - if abs.exists() { - barrel_paths_to_parse - .push(abs.to_str().unwrap_or("").to_string()); - } - } - } - } - } - - // Also find barrels that re-export FROM changed files - { - let changed_rel: Vec<&str> = file_symbols.keys().map(|s| s.as_str()).collect(); - if let Ok(mut stmt) = conn.prepare( - "SELECT DISTINCT n1.file FROM edges e \ - JOIN nodes n1 ON e.source_id = n1.id \ - JOIN nodes n2 ON e.target_id = n2.id \ - WHERE e.kind = 'reexports' AND n1.kind = 'file' AND n2.file = ?1", - ) { - for changed in &changed_rel { - if let Ok(rows) = stmt.query_map(rusqlite::params![changed], |row| { - row.get::<_, String>(0) - }) { - for row in rows.flatten() { - if !file_symbols.contains_key(&row) { - let abs = Path::new(root_dir).join(&row); - if abs.exists() { - barrel_paths_to_parse - .push(abs.to_str().unwrap_or("").to_string()); - } - } - } - } - } - } - } - - // Re-parse barrel files and merge into file_symbols - if !barrel_paths_to_parse.is_empty() { - barrel_paths_to_parse.sort(); - barrel_paths_to_parse.dedup(); - // Barrel files are re-export-only — no function bodies or dataflow, - // so skip dataflow/AST analysis to avoid unnecessary overhead. - let barrel_parsed = parallel::parse_files_parallel( - &barrel_paths_to_parse, - root_dir, - false, - false, - ); - for mut sym in barrel_parsed { - let rel = relative_path(root_dir, &sym.file); - sym.file = rel.clone(); - // Delete outgoing import/reexport edges for barrel files being re-parsed - // (scoped to import-related kinds to avoid dropping calls edges) - let _ = conn.execute( - "DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = ?1) \ - AND kind IN ('imports', 'reexports')", - rusqlite::params![&rel], - ); - // Re-resolve imports for the barrel file - let abs_str = - Path::new(root_dir).join(&rel).to_str().unwrap_or("").to_string(); - for imp in &sym.imports { - let input = ImportResolutionInput { - from_file: abs_str.clone(), - import_source: imp.source.clone(), - }; - let resolved_batch = import_resolution::resolve_imports_batch( - &[input], - root_dir, - &napi_aliases, - Some(&known_files), - ); - for r in &resolved_batch { - let key = format!("{}|{}", r.from_file, r.import_source); - batch_resolved.insert(key, r.resolved_path.clone()); - } - } - file_symbols.insert(rel, sym); - } - } + reparse_barrel_candidates( + conn, root_dir, &napi_aliases, &known_files, + &mut file_symbols, &mut batch_resolved, + ); } // ── Stage 7: Build edges ─────────────────────────────────────────── @@ -492,7 +333,7 @@ pub fn run_pipeline( // reverse-dep files added for edge rebuilding, which inflates the count // and would skip the fast path even for single-file incremental builds. let use_fast_path = - !change_result.is_full_build && parse_changes.len() <= 5 && existing_file_count > 20; + !change_result.is_full_build && parse_changes.len() <= FAST_PATH_MAX_CHANGED_FILES && existing_file_count > FAST_PATH_MIN_EXISTING_FILES; if use_fast_path { structure::update_changed_file_metrics( @@ -539,27 +380,7 @@ pub fn run_pipeline( // ── Stage 9: Finalize ────────────────────────────────────────────── let t0 = Instant::now(); - let node_count = conn - .query_row("SELECT COUNT(*) FROM nodes", [], |row| row.get::<_, i64>(0)) - .unwrap_or(0); - let edge_count = conn - .query_row("SELECT COUNT(*) FROM edges", [], |row| row.get::<_, i64>(0)) - .unwrap_or(0); - - // Persist build metadata - let version = env!("CARGO_PKG_VERSION"); - let meta_sql = "INSERT OR REPLACE INTO metadata (key, value) VALUES (?, ?)"; - if let Ok(mut stmt) = conn.prepare(meta_sql) { - let _ = stmt.execute(["engine", "native"]); - let _ = stmt.execute(["engine_version", version]); - let _ = stmt.execute(["codegraph_version", version]); - let _ = stmt.execute(["node_count", &node_count.to_string()]); - let _ = stmt.execute(["edge_count", &edge_count.to_string()]); - let _ = stmt.execute(["last_build", &now_ms().to_string()]); - } - - // Write journal header - journal::write_journal_header(root_dir, now_ms()); + let (node_count, edge_count) = finalize_build(conn, root_dir); timing.finalize_ms = t0.elapsed().as_secs_f64() * 1000.0; // Include total time in setup for overhead accounting. @@ -589,6 +410,217 @@ pub fn run_pipeline( }) } +/// Stage 2: Collect source files with strategy selection (scoped, journal-fast, or full). +fn collect_source_files( + conn: &Connection, + root_dir: &str, + config: &BuildConfig, + opts: &BuildOpts, + incremental: bool, + force_full_rebuild: bool, +) -> file_collector::CollectResult { + if let Some(ref scope) = opts.scope { + // Scoped rebuild + let files: Vec = scope + .iter() + .map(|f| { + let abs = Path::new(root_dir).join(normalize_path(f)); + abs.to_str().unwrap_or("").to_string() + }) + .filter(|f| Path::new(f).exists()) + .collect(); + file_collector::CollectResult { + directories: files + .iter() + .filter_map(|f| { + Path::new(f) + .parent() + .map(|p| p.to_str().unwrap_or("").to_string()) + }) + .collect(), + files, + } + } else if incremental && !force_full_rebuild { + // Try fast collect from DB + journal + let journal = journal::read_journal(root_dir); + let has_entries = + journal.valid && (!journal.changed.is_empty() || !journal.removed.is_empty()); + + if has_entries { + let db_files: Vec = conn + .prepare("SELECT file FROM file_hashes") + .and_then(|mut stmt| { + stmt.query_map([], |row| row.get::<_, String>(0)) + .map(|rows| rows.filter_map(|r| r.ok()).collect()) + }) + .unwrap_or_default(); + + if !db_files.is_empty() { + file_collector::try_fast_collect( + root_dir, + &db_files, + &journal.changed, + &journal.removed, + ) + } else { + file_collector::collect_files(root_dir, &config.ignore_dirs) + } + } else { + file_collector::collect_files(root_dir, &config.ignore_dirs) + } + } else { + file_collector::collect_files(root_dir, &config.ignore_dirs) + } +} + +/// Stage 6b: Re-parse barrel candidates for incremental builds. +/// +/// Barrel files (re-export-only index files) may not be in file_symbols because +/// they weren't changed or reverse-deps. Without their symbols, barrel resolution +/// in Stage 7 can't create transitive import edges. +fn reparse_barrel_candidates( + conn: &Connection, + root_dir: &str, + napi_aliases: &crate::types::PathAliases, + known_files: &HashSet, + file_symbols: &mut HashMap, + batch_resolved: &mut HashMap, +) { + // Find all barrel files from DB (files that have 'reexports' edges) + let barrel_files_in_db: HashSet = { + let rows: Vec = match conn.prepare( + "SELECT DISTINCT n1.file FROM edges e \ + JOIN nodes n1 ON e.source_id = n1.id \ + WHERE e.kind = 'reexports' AND n1.kind = 'file'", + ) { + Ok(mut stmt) => match stmt.query_map([], |row| row.get::<_, String>(0)) { + Ok(mapped) => mapped.filter_map(|r| r.ok()).collect(), + Err(_) => Vec::new(), + }, + Err(_) => Vec::new(), + }; + rows.into_iter().collect() + }; + + // Check which barrels are imported by parsed files but not in file_symbols + let mut barrel_paths_to_parse: Vec = Vec::new(); + for (_rel_path, symbols) in file_symbols.iter() { + for imp in &symbols.imports { + let abs_file = Path::new(root_dir).join(_rel_path); + let fwd = abs_file.to_str().unwrap_or("").replace('\\', "/"); + let key = format!("{}|{}", fwd, imp.source); + if let Some(resolved) = batch_resolved.get(&key) { + if barrel_files_in_db.contains(resolved) && !file_symbols.contains_key(resolved) + { + let abs = Path::new(root_dir).join(resolved); + if abs.exists() { + barrel_paths_to_parse + .push(abs.to_str().unwrap_or("").to_string()); + } + } + } + } + } + + // Also find barrels that re-export FROM changed files + { + let changed_rel: Vec<&str> = file_symbols.keys().map(|s| s.as_str()).collect(); + if let Ok(mut stmt) = conn.prepare( + "SELECT DISTINCT n1.file FROM edges e \ + JOIN nodes n1 ON e.source_id = n1.id \ + JOIN nodes n2 ON e.target_id = n2.id \ + WHERE e.kind = 'reexports' AND n1.kind = 'file' AND n2.file = ?1", + ) { + for changed in &changed_rel { + if let Ok(rows) = stmt.query_map(rusqlite::params![changed], |row| { + row.get::<_, String>(0) + }) { + for row in rows.flatten() { + if !file_symbols.contains_key(&row) { + let abs = Path::new(root_dir).join(&row); + if abs.exists() { + barrel_paths_to_parse + .push(abs.to_str().unwrap_or("").to_string()); + } + } + } + } + } + } + } + + // Re-parse barrel files and merge into file_symbols + if !barrel_paths_to_parse.is_empty() { + barrel_paths_to_parse.sort(); + barrel_paths_to_parse.dedup(); + // Barrel files are re-export-only — no function bodies or dataflow, + // so skip dataflow/AST analysis to avoid unnecessary overhead. + let barrel_parsed = parallel::parse_files_parallel( + &barrel_paths_to_parse, + root_dir, + false, + false, + ); + for mut sym in barrel_parsed { + let rel = relative_path(root_dir, &sym.file); + sym.file = rel.clone(); + // Delete outgoing import/reexport edges for barrel files being re-parsed + // (scoped to import-related kinds to avoid dropping calls edges) + let _ = conn.execute( + "DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = ?1) \ + AND kind IN ('imports', 'reexports')", + rusqlite::params![&rel], + ); + // Re-resolve imports for the barrel file + let abs_str = + Path::new(root_dir).join(&rel).to_str().unwrap_or("").to_string(); + for imp in &sym.imports { + let input = ImportResolutionInput { + from_file: abs_str.clone(), + import_source: imp.source.clone(), + }; + let resolved_batch = import_resolution::resolve_imports_batch( + &[input], + root_dir, + napi_aliases, + Some(known_files), + ); + for r in &resolved_batch { + let key = format!("{}|{}", r.from_file, r.import_source); + batch_resolved.insert(key, r.resolved_path.clone()); + } + } + file_symbols.insert(rel, sym); + } + } +} + +/// Stage 9: Finalize build — persist metadata, write journal, return counts. +fn finalize_build(conn: &Connection, root_dir: &str) -> (i64, i64) { + let node_count = conn + .query_row("SELECT COUNT(*) FROM nodes", [], |row| row.get::<_, i64>(0)) + .unwrap_or(0); + let edge_count = conn + .query_row("SELECT COUNT(*) FROM edges", [], |row| row.get::<_, i64>(0)) + .unwrap_or(0); + + // Persist build metadata + let version = env!("CARGO_PKG_VERSION"); + let meta_sql = "INSERT OR REPLACE INTO metadata (key, value) VALUES (?, ?)"; + if let Ok(mut stmt) = conn.prepare(meta_sql) { + let _ = stmt.execute(["engine", "native"]); + let _ = stmt.execute(["engine_version", version]); + let _ = stmt.execute(["codegraph_version", version]); + let _ = stmt.execute(["node_count", &node_count.to_string()]); + let _ = stmt.execute(["edge_count", &edge_count.to_string()]); + let _ = stmt.execute(["last_build", &now_ms().to_string()]); + } + + // Write journal header + journal::write_journal_header(root_dir, now_ms()); + (node_count, edge_count) +} + /// Check if engine/schema/version changed since last build (forces full rebuild). fn check_version_mismatch(conn: &Connection) -> bool { let get_meta = |key: &str| -> Option { diff --git a/crates/codegraph-core/src/constants.rs b/crates/codegraph-core/src/constants.rs index d1156147..5c7f4569 100644 --- a/crates/codegraph-core/src/constants.rs +++ b/crates/codegraph-core/src/constants.rs @@ -1,3 +1,30 @@ /// Maximum recursion depth for AST traversal to prevent stack overflow /// on deeply nested trees. Used by extractors, complexity, CFG, and dataflow. pub const MAX_WALK_DEPTH: usize = 200; + +// ─── Louvain community detection ──────────────────────────────────── + +/// Maximum number of coarsening levels in the Louvain algorithm. +pub const LOUVAIN_MAX_LEVELS: usize = 50; + +/// Maximum number of local-move passes per level before stopping. +pub const LOUVAIN_MAX_PASSES: usize = 20; + +/// Minimum modularity gain to accept a node move (avoids floating-point noise). +pub const LOUVAIN_MIN_GAIN: f64 = 1e-12; + +/// Default random seed for deterministic community detection. +pub const DEFAULT_RANDOM_SEED: u32 = 42; + +// ─── Dataflow analysis ────────────────────────────────────────────── + +/// Maximum character length for truncated dataflow expressions. +pub const DATAFLOW_TRUNCATION_LIMIT: usize = 120; + +// ─── Build pipeline ───────────────────────────────────────────────── + +/// Maximum number of changed files eligible for the incremental fast path. +pub const FAST_PATH_MAX_CHANGED_FILES: usize = 5; + +/// Minimum existing file count required before the fast path is considered. +pub const FAST_PATH_MIN_EXISTING_FILES: usize = 20; diff --git a/crates/codegraph-core/src/dataflow.rs b/crates/codegraph-core/src/dataflow.rs index af736be0..8e925f1d 100644 --- a/crates/codegraph-core/src/dataflow.rs +++ b/crates/codegraph-core/src/dataflow.rs @@ -1,7 +1,7 @@ use std::collections::HashMap; use tree_sitter::{Node, Tree}; -use crate::constants::MAX_WALK_DEPTH; +use crate::constants::{DATAFLOW_TRUNCATION_LIMIT, MAX_WALK_DEPTH}; use crate::types::{ DataflowArgFlow, DataflowAssignment, DataflowMutation, DataflowParam, DataflowResult, DataflowReturn, @@ -579,127 +579,143 @@ fn function_name<'a>(fn_node: &Node<'a>, rules: &DataflowRules, source: &[u8]) - None } -/// Extract parameter names using per-language strategy. -fn extract_param_names_strategy(node: &Node, strategy: ParamStrategy, source: &[u8]) -> Option> { - match strategy { - ParamStrategy::Default => None, - ParamStrategy::Python => { - let t = node.kind(); - if t == "typed_parameter" || t == "typed_default_parameter" { - let cursor = &mut node.walk(); - for c in node.named_children(cursor) { - if c.kind() == "identifier" { - return Some(vec![node_text(&c, source).to_string()]); - } - } - return Some(vec![]); - } - if t == "default_parameter" { - if let Some(name_node) = node.child_by_field_name("name") { - return Some(vec![node_text(&name_node, source).to_string()]); - } - return Some(vec![]); - } - if t == "list_splat_pattern" || t == "dictionary_splat_pattern" { - let cursor = &mut node.walk(); - for c in node.named_children(cursor) { - if c.kind() == "identifier" { - return Some(vec![node_text(&c, source).to_string()]); - } - } - return Some(vec![]); +// ── Per-language parameter extraction handlers ───────────────────────────── + +fn extract_params_python(node: &Node, source: &[u8]) -> Option> { + let t = node.kind(); + if t == "typed_parameter" || t == "typed_default_parameter" { + let cursor = &mut node.walk(); + for c in node.named_children(cursor) { + if c.kind() == "identifier" { + return Some(vec![node_text(&c, source).to_string()]); } - None } - ParamStrategy::Go => { - let t = node.kind(); - if t == "parameter_declaration" { - let mut names = Vec::new(); - let cursor = &mut node.walk(); - for c in node.named_children(cursor) { - if c.kind() == "identifier" { - names.push(node_text(&c, source).to_string()); - } - } - if !names.is_empty() { Some(names) } else { None } - } else if t == "variadic_parameter_declaration" { - node.child_by_field_name("name") - .map(|n| vec![node_text(&n, source).to_string()]) - } else { - None - } + return Some(vec![]); + } + if t == "default_parameter" { + if let Some(name_node) = node.child_by_field_name("name") { + return Some(vec![node_text(&name_node, source).to_string()]); } - ParamStrategy::Rust => { - let t = node.kind(); - if t == "parameter" { - if let Some(pat) = node.child_by_field_name("pattern") { - if pat.kind() == "identifier" { - return Some(vec![node_text(&pat, source).to_string()]); - } - } - return Some(vec![]); - } - if t == "identifier" { - return Some(vec![node_text(node, source).to_string()]); + return Some(vec![]); + } + if t == "list_splat_pattern" || t == "dictionary_splat_pattern" { + let cursor = &mut node.walk(); + for c in node.named_children(cursor) { + if c.kind() == "identifier" { + return Some(vec![node_text(&c, source).to_string()]); } - None } - ParamStrategy::Java => { - let t = node.kind(); - if t == "formal_parameter" || t == "spread_parameter" { - if let Some(name_node) = node.child_by_field_name("name") { - return Some(vec![node_text(&name_node, source).to_string()]); - } - return Some(vec![]); - } - if t == "identifier" { - return Some(vec![node_text(node, source).to_string()]); + return Some(vec![]); + } + None +} + +fn extract_params_go(node: &Node, source: &[u8]) -> Option> { + let t = node.kind(); + if t == "parameter_declaration" { + let mut names = Vec::new(); + let cursor = &mut node.walk(); + for c in node.named_children(cursor) { + if c.kind() == "identifier" { + names.push(node_text(&c, source).to_string()); } - None } - ParamStrategy::CSharp => { - let t = node.kind(); - if t == "parameter" { - if let Some(name_node) = node.child_by_field_name("name") { - return Some(vec![node_text(&name_node, source).to_string()]); - } - return Some(vec![]); - } - if t == "identifier" { - return Some(vec![node_text(node, source).to_string()]); + if !names.is_empty() { Some(names) } else { None } + } else if t == "variadic_parameter_declaration" { + node.child_by_field_name("name") + .map(|n| vec![node_text(&n, source).to_string()]) + } else { + None + } +} + +fn extract_params_rust(node: &Node, source: &[u8]) -> Option> { + let t = node.kind(); + if t == "parameter" { + if let Some(pat) = node.child_by_field_name("pattern") { + if pat.kind() == "identifier" { + return Some(vec![node_text(&pat, source).to_string()]); } - None } - ParamStrategy::Php => { - let t = node.kind(); - if t == "simple_parameter" || t == "variadic_parameter" { - if let Some(name_node) = node.child_by_field_name("name") { - return Some(vec![node_text(&name_node, source).to_string()]); - } - return Some(vec![]); - } - if t == "variable_name" { - return Some(vec![node_text(node, source).to_string()]); - } - None + return Some(vec![]); + } + if t == "identifier" { + return Some(vec![node_text(node, source).to_string()]); + } + None +} + +fn extract_params_java(node: &Node, source: &[u8]) -> Option> { + let t = node.kind(); + if t == "formal_parameter" || t == "spread_parameter" { + if let Some(name_node) = node.child_by_field_name("name") { + return Some(vec![node_text(&name_node, source).to_string()]); } - ParamStrategy::Ruby => { - let t = node.kind(); - if t == "identifier" { - return Some(vec![node_text(node, source).to_string()]); - } - if t == "optional_parameter" - || t == "keyword_parameter" - || t == "splat_parameter" - || t == "hash_splat_parameter" - { - if let Some(name_node) = node.child_by_field_name("name") { - return Some(vec![node_text(&name_node, source).to_string()]); - } - return Some(vec![]); - } - None + return Some(vec![]); + } + if t == "identifier" { + return Some(vec![node_text(node, source).to_string()]); + } + None +} + +fn extract_params_csharp(node: &Node, source: &[u8]) -> Option> { + let t = node.kind(); + if t == "parameter" { + if let Some(name_node) = node.child_by_field_name("name") { + return Some(vec![node_text(&name_node, source).to_string()]); + } + return Some(vec![]); + } + if t == "identifier" { + return Some(vec![node_text(node, source).to_string()]); + } + None +} + +fn extract_params_php(node: &Node, source: &[u8]) -> Option> { + let t = node.kind(); + if t == "simple_parameter" || t == "variadic_parameter" { + if let Some(name_node) = node.child_by_field_name("name") { + return Some(vec![node_text(&name_node, source).to_string()]); } + return Some(vec![]); + } + if t == "variable_name" { + return Some(vec![node_text(node, source).to_string()]); + } + None +} + +fn extract_params_ruby(node: &Node, source: &[u8]) -> Option> { + let t = node.kind(); + if t == "identifier" { + return Some(vec![node_text(node, source).to_string()]); + } + if t == "optional_parameter" + || t == "keyword_parameter" + || t == "splat_parameter" + || t == "hash_splat_parameter" + { + if let Some(name_node) = node.child_by_field_name("name") { + return Some(vec![node_text(&name_node, source).to_string()]); + } + return Some(vec![]); + } + None +} + +/// Extract parameter names using per-language strategy. +fn extract_param_names_strategy(node: &Node, strategy: ParamStrategy, source: &[u8]) -> Option> { + match strategy { + ParamStrategy::Default => None, + ParamStrategy::Python => extract_params_python(node, source), + ParamStrategy::Go => extract_params_go(node, source), + ParamStrategy::Rust => extract_params_rust(node, source), + ParamStrategy::Java => extract_params_java(node, source), + ParamStrategy::CSharp => extract_params_csharp(node, source), + ParamStrategy::Php => extract_params_php(node, source), + ParamStrategy::Ruby => extract_params_ruby(node, source), } } @@ -1196,7 +1212,7 @@ fn handle_var_declarator( var_name: n.clone(), caller_func: Some(func_name.clone()), source_call_name: callee.clone(), - expression: truncate(node_text(node, source), 120), + expression: truncate(node_text(node, source), DATAFLOW_TRUNCATION_LIMIT), line: node_line(node), }); scope @@ -1209,7 +1225,7 @@ fn handle_var_declarator( var_name: var_name.clone(), caller_func: Some(func_name), source_call_name: callee.clone(), - expression: truncate(node_text(node, source), 120), + expression: truncate(node_text(node, source), DATAFLOW_TRUNCATION_LIMIT), line: node_line(node), }); scope.locals.insert(var_name, LocalSource::CallReturn { callee }); @@ -1245,7 +1261,7 @@ fn handle_assignment( func_name: Some(func_name.clone()), receiver_name: receiver, binding_type: binding.as_ref().map(|b| b.binding_type.clone()), - mutating_expr: truncate(node_text(node, source), 120), + mutating_expr: truncate(node_text(node, source), DATAFLOW_TRUNCATION_LIMIT), line: node_line(node), }); } @@ -1264,7 +1280,7 @@ fn handle_assignment( var_name: var_name.clone(), caller_func: Some(func_name), source_call_name: callee.clone(), - expression: truncate(node_text(node, source), 120), + expression: truncate(node_text(node, source), DATAFLOW_TRUNCATION_LIMIT), line: node_line(node), }); if let Some(scope) = scope_stack.last_mut() { @@ -1340,7 +1356,7 @@ fn handle_call_expr( arg_name: Some(tracked.clone()), binding_type: binding.as_ref().map(|b| b.binding_type.clone()), confidence: conf, - expression: truncate(node_text(&arg_raw, source), 120), + expression: truncate(node_text(&arg_raw, source), DATAFLOW_TRUNCATION_LIMIT), line: node_line(node), }); } @@ -1442,7 +1458,7 @@ fn handle_expr_stmt_mutation( func_name, receiver_name: recv, binding_type: binding.as_ref().map(|b| b.binding_type.clone()), - mutating_expr: truncate(node_text(&expr, source), 120), + mutating_expr: truncate(node_text(&expr, source), DATAFLOW_TRUNCATION_LIMIT), line: node_line(node), }); } diff --git a/crates/codegraph-core/src/edge_builder.rs b/crates/codegraph-core/src/edge_builder.rs index 8d03dbd0..3a4194ac 100644 --- a/crates/codegraph-core/src/edge_builder.rs +++ b/crates/codegraph-core/src/edge_builder.rs @@ -2,6 +2,7 @@ use std::collections::{HashMap, HashSet}; use napi_derive::napi; +use crate::barrel_resolution::{self, BarrelContext, ReexportRef}; use crate::import_resolution; /// Kind sets for hierarchy edge resolution -- mirrors the JS constants in @@ -466,55 +467,25 @@ impl<'a> ImportEdgeContext<'a> { } } -/// Recursively resolve a symbol through barrel reexport chains. -/// Mirrors `resolveBarrelExport()` in resolve-imports.ts. -fn resolve_barrel_export<'a>( - ctx: &'a ImportEdgeContext<'a>, - barrel_path: &'a str, - symbol_name: &str, - visited: &mut HashSet<&'a str>, -) -> Option<&'a str> { - if visited.contains(barrel_path) { - return None; +impl<'a> BarrelContext for ImportEdgeContext<'a> { + fn reexports_for(&self, barrel_path: &str) -> Option>> { + self.reexport_map.get(barrel_path).map(|entries| { + entries + .iter() + .map(|re| ReexportRef { + source: re.source.as_str(), + names: &re.names, + wildcard_reexport: re.wildcard_reexport, + }) + .collect() + }) } - visited.insert(barrel_path); - let reexports = ctx.reexport_map.get(barrel_path)?; - - for re in reexports.iter() { - // Named reexports (non-wildcard) - if !re.names.is_empty() && !re.wildcard_reexport { - if re.names.iter().any(|n| n == symbol_name) { - if let Some(defs) = ctx.file_defs.get(re.source.as_str()) { - if defs.contains(symbol_name) { - return Some(re.source.as_str()); - } - let deeper = resolve_barrel_export(ctx, re.source.as_str(), symbol_name, visited); - if deeper.is_some() { - return deeper; - } - } - // Fallback: return source even if no definition found - return Some(re.source.as_str()); - } - continue; - } - - // Wildcard or empty-names reexports - if re.wildcard_reexport || re.names.is_empty() { - if let Some(defs) = ctx.file_defs.get(re.source.as_str()) { - if defs.contains(symbol_name) { - return Some(re.source.as_str()); - } - let deeper = resolve_barrel_export(ctx, re.source.as_str(), symbol_name, visited); - if deeper.is_some() { - return deeper; - } - } - } + fn has_definition(&self, file_path: &str, symbol: &str) -> bool { + self.file_defs + .get(file_path) + .map_or(false, |defs| defs.contains(symbol)) } - - None } /// Build import and barrel-through edges in Rust. @@ -583,7 +554,7 @@ pub fn build_import_edges( // Barrel resolution: if not reexport and target is a barrel file if !imp.reexport && ctx.barrel_set.contains(resolved_path) { - let mut resolved_sources: HashSet<&str> = HashSet::new(); + let mut resolved_sources: HashSet = HashSet::new(); for name in &imp.names { let clean_name = if name.starts_with("* as ") || name.starts_with("*\tas ") { // Strip "* as " or "*\tas " prefix (both exactly 5 bytes) @@ -594,12 +565,11 @@ pub fn build_import_edges( }; let mut visited = HashSet::new(); - let actual = resolve_barrel_export(&ctx, resolved_path, clean_name, &mut visited); + let actual = barrel_resolution::resolve_barrel_export(&ctx, resolved_path, clean_name, &mut visited); if let Some(actual_source) = actual { - if actual_source != resolved_path && !resolved_sources.contains(actual_source) { - resolved_sources.insert(actual_source); - if let Some(&actual_node_id) = ctx.file_node_map.get(actual_source) { + if actual_source != resolved_path && !resolved_sources.contains(&actual_source) { + if let Some(&actual_node_id) = ctx.file_node_map.get(actual_source.as_str()) { let barrel_kind = match edge_kind { "imports-type" => "imports-type", "dynamic-imports" => "dynamic-imports", @@ -613,6 +583,7 @@ pub fn build_import_edges( dynamic: 0, }); } + resolved_sources.insert(actual_source); } } } diff --git a/crates/codegraph-core/src/extractors/cpp.rs b/crates/codegraph-core/src/extractors/cpp.rs index 8fb30108..ab0d649c 100644 --- a/crates/codegraph-core/src/extractors/cpp.rs +++ b/crates/codegraph-core/src/extractors/cpp.rs @@ -208,180 +208,190 @@ fn extract_cpp_base_classes(node: &Node, source: &[u8], class_name: &str, symbol } } -fn match_cpp_node(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: usize) { - match node.kind() { - "function_definition" => { - if let Some(name) = extract_cpp_function_name(node, source) { - let parent_class = find_cpp_parent_class(node, source); - let full_name = match &parent_class { - Some(cls) => format!("{}.{}", cls, name), - None => name, - }; - let kind = if parent_class.is_some() { "method" } else { "function" }; - let children = extract_cpp_parameters(node, source); - symbols.definitions.push(Definition { - name: full_name, - kind: kind.to_string(), - line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: compute_all_metrics(node, source, "cpp"), - cfg: build_function_cfg(node, "cpp", source), - children: opt_children(children), - }); - } - } +// ── Per-node-kind handlers ────────────────────────────────────────────────── - "class_specifier" => { - if let Some(name_node) = node.child_by_field_name("name") { - let class_name = node_text(&name_node, source).to_string(); - let children = node.child_by_field_name("body") - .map(|body| extract_cpp_fields(&body, source)) - .unwrap_or_default(); - symbols.definitions.push(Definition { - name: class_name.clone(), - kind: "class".to_string(), - line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: None, - cfg: None, - children: opt_children(children), - }); - extract_cpp_base_classes(node, source, &class_name, symbols); - } - } +fn handle_cpp_function_definition(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + if let Some(name) = extract_cpp_function_name(node, source) { + let parent_class = find_cpp_parent_class(node, source); + let full_name = match &parent_class { + Some(cls) => format!("{}.{}", cls, name), + None => name, + }; + let kind = if parent_class.is_some() { "method" } else { "function" }; + let children = extract_cpp_parameters(node, source); + symbols.definitions.push(Definition { + name: full_name, + kind: kind.to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: compute_all_metrics(node, source, "cpp"), + cfg: build_function_cfg(node, "cpp", source), + children: opt_children(children), + }); + } +} - "struct_specifier" => { - if let Some(name_node) = node.child_by_field_name("name") { - let struct_name = node_text(&name_node, source).to_string(); - let children = node.child_by_field_name("body") - .map(|body| extract_cpp_fields(&body, source)) - .unwrap_or_default(); - symbols.definitions.push(Definition { - name: struct_name.clone(), - kind: "struct".to_string(), - line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: None, - cfg: None, - children: opt_children(children), - }); - extract_cpp_base_classes(node, source, &struct_name, symbols); +fn handle_cpp_class_specifier(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + if let Some(name_node) = node.child_by_field_name("name") { + let class_name = node_text(&name_node, source).to_string(); + let children = node.child_by_field_name("body") + .map(|body| extract_cpp_fields(&body, source)) + .unwrap_or_default(); + symbols.definitions.push(Definition { + name: class_name.clone(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: opt_children(children), + }); + extract_cpp_base_classes(node, source, &class_name, symbols); + } +} + +fn handle_cpp_struct_specifier(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + if let Some(name_node) = node.child_by_field_name("name") { + let struct_name = node_text(&name_node, source).to_string(); + let children = node.child_by_field_name("body") + .map(|body| extract_cpp_fields(&body, source)) + .unwrap_or_default(); + symbols.definitions.push(Definition { + name: struct_name.clone(), + kind: "struct".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: opt_children(children), + }); + extract_cpp_base_classes(node, source, &struct_name, symbols); + } +} + +fn handle_cpp_enum_specifier(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + if let Some(name_node) = node.child_by_field_name("name") { + let children = extract_cpp_enum_constants(node, source); + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "enum".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: opt_children(children), + }); + } +} + +fn handle_cpp_namespace_definition(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + if let Some(name_node) = node.child_by_field_name("name") { + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "namespace".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); + } +} + +fn handle_cpp_type_definition(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let mut alias_name = None; + for i in (0..node.child_count()).rev() { + if let Some(child) = node.child(i) { + match child.kind() { + "type_identifier" | "identifier" | "primitive_type" => { + alias_name = Some(node_text(&child, source).to_string()); + break; + } + _ => {} } } + } + if let Some(name) = alias_name { + symbols.definitions.push(Definition { + name, + kind: "type".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); + } +} - "enum_specifier" => { - if let Some(name_node) = node.child_by_field_name("name") { - let children = extract_cpp_enum_constants(node, source); - symbols.definitions.push(Definition { - name: node_text(&name_node, source).to_string(), - kind: "enum".to_string(), - line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: None, - cfg: None, - children: opt_children(children), - }); - } +fn handle_cpp_preproc_include(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + if let Some(path_node) = node.child_by_field_name("path") { + let raw = node_text(&path_node, source); + let path = raw.trim_matches(|c| c == '"' || c == '<' || c == '>'); + if !path.is_empty() { + let last = path.split('/').last().unwrap_or(path); + let name = last.strip_suffix(".h") + .or_else(|| last.strip_suffix(".hpp")) + .unwrap_or(last); + let mut imp = Import::new(path.to_string(), vec![name.to_string()], start_line(node)); + imp.c_include = Some(true); + symbols.imports.push(imp); } + } +} - "namespace_definition" => { - if let Some(name_node) = node.child_by_field_name("name") { - symbols.definitions.push(Definition { - name: node_text(&name_node, source).to_string(), - kind: "namespace".to_string(), +fn handle_cpp_call_expression(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + if let Some(fn_node) = node.child_by_field_name("function") { + match fn_node.kind() { + "identifier" | "qualified_identifier" | "scoped_identifier" => { + symbols.calls.push(Call { + name: node_text(&fn_node, source).to_string(), line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: None, - cfg: None, - children: None, + dynamic: None, + receiver: None, }); } - } - - "type_definition" => { - let mut alias_name = None; - for i in (0..node.child_count()).rev() { - if let Some(child) = node.child(i) { - match child.kind() { - "type_identifier" | "identifier" | "primitive_type" => { - alias_name = Some(node_text(&child, source).to_string()); - break; - } - _ => {} - } - } - } - if let Some(name) = alias_name { - symbols.definitions.push(Definition { + "field_expression" => { + let name = fn_node.child_by_field_name("field") + .map(|n| node_text(&n, source).to_string()) + .unwrap_or_else(|| node_text(&fn_node, source).to_string()); + let receiver = fn_node.child_by_field_name("argument") + .map(|n| node_text(&n, source).to_string()); + symbols.calls.push(Call { name, - kind: "type".to_string(), line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: None, - cfg: None, - children: None, + dynamic: None, + receiver, }); } - } - - "preproc_include" => { - if let Some(path_node) = node.child_by_field_name("path") { - let raw = node_text(&path_node, source); - let path = raw.trim_matches(|c| c == '"' || c == '<' || c == '>'); - if !path.is_empty() { - let last = path.split('/').last().unwrap_or(path); - let name = last.strip_suffix(".h") - .or_else(|| last.strip_suffix(".hpp")) - .unwrap_or(last); - let mut imp = Import::new(path.to_string(), vec![name.to_string()], start_line(node)); - imp.c_include = Some(true); - symbols.imports.push(imp); - } - } - } - - "call_expression" => { - if let Some(fn_node) = node.child_by_field_name("function") { - match fn_node.kind() { - "identifier" | "qualified_identifier" | "scoped_identifier" => { - symbols.calls.push(Call { - name: node_text(&fn_node, source).to_string(), - line: start_line(node), - dynamic: None, - receiver: None, - }); - } - "field_expression" => { - let name = fn_node.child_by_field_name("field") - .map(|n| node_text(&n, source).to_string()) - .unwrap_or_else(|| node_text(&fn_node, source).to_string()); - let receiver = fn_node.child_by_field_name("argument") - .map(|n| node_text(&n, source).to_string()); - symbols.calls.push(Call { - name, - line: start_line(node), - dynamic: None, - receiver, - }); - } - _ => { - symbols.calls.push(Call { - name: node_text(&fn_node, source).to_string(), - line: start_line(node), - dynamic: None, - receiver: None, - }); - } - } + _ => { + symbols.calls.push(Call { + name: node_text(&fn_node, source).to_string(), + line: start_line(node), + dynamic: None, + receiver: None, + }); } } + } +} +fn match_cpp_node(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: usize) { + match node.kind() { + "function_definition" => handle_cpp_function_definition(node, source, symbols), + "class_specifier" => handle_cpp_class_specifier(node, source, symbols), + "struct_specifier" => handle_cpp_struct_specifier(node, source, symbols), + "enum_specifier" => handle_cpp_enum_specifier(node, source, symbols), + "namespace_definition" => handle_cpp_namespace_definition(node, source, symbols), + "type_definition" => handle_cpp_type_definition(node, source, symbols), + "preproc_include" => handle_cpp_preproc_include(node, source, symbols), + "call_expression" => handle_cpp_call_expression(node, source, symbols), _ => {} } } diff --git a/crates/codegraph-core/src/extractors/csharp.rs b/crates/codegraph-core/src/extractors/csharp.rs index 5a33c460..581344e8 100644 --- a/crates/codegraph-core/src/extractors/csharp.rs +++ b/crates/codegraph-core/src/extractors/csharp.rs @@ -292,41 +292,37 @@ fn extract_csharp_class_fields(node: &Node, source: &[u8]) -> Vec { let mut fields = Vec::new(); let body = node.child_by_field_name("body") .or_else(|| find_child(node, "declaration_list")); - if let Some(body) = body { - for i in 0..body.child_count() { - if let Some(child) = body.child(i) { - if child.kind() == "field_declaration" { - // Walk variable_declaration inside - for j in 0..child.child_count() { - if let Some(decl) = child.child(j) { - if decl.kind() == "variable_declaration" { - for k in 0..decl.child_count() { - if let Some(declarator) = decl.child(k) { - if declarator.kind() == "variable_declarator" { - if let Some(name_node) = declarator.child_by_field_name("name") - .or_else(|| declarator.child(0)) - { - if name_node.kind() == "identifier" { - fields.push(child_def( - node_text(&name_node, source).to_string(), - "property", - start_line(&child), - )); - } - } - } - } - } - } - } - } - } - } + let Some(body) = body else { return fields }; + for i in 0..body.child_count() { + let Some(child) = body.child(i) else { continue }; + if child.kind() == "field_declaration" { + collect_field_declarator_names(&child, source, &mut fields); } } fields } +fn collect_field_declarator_names(field: &Node, source: &[u8], fields: &mut Vec) { + for j in 0..field.child_count() { + let Some(decl) = field.child(j) else { continue }; + if decl.kind() != "variable_declaration" { continue; } + for k in 0..decl.child_count() { + let Some(declarator) = decl.child(k) else { continue }; + if declarator.kind() != "variable_declarator" { continue; } + let name_node = declarator.child_by_field_name("name") + .or_else(|| declarator.child(0)); + let Some(name_node) = name_node else { continue }; + if name_node.kind() == "identifier" { + fields.push(child_def( + node_text(&name_node, source).to_string(), + "property", + start_line(field), + )); + } + } + } +} + fn extract_csharp_enum_members(node: &Node, source: &[u8]) -> Vec { let mut members = Vec::new(); let body = node.child_by_field_name("body") diff --git a/crates/codegraph-core/src/extractors/elixir.rs b/crates/codegraph-core/src/extractors/elixir.rs index 975f2acb..8be46455 100644 --- a/crates/codegraph-core/src/extractors/elixir.rs +++ b/crates/codegraph-core/src/extractors/elixir.rs @@ -163,18 +163,8 @@ fn find_elixir_parent_module<'a>(node: &Node<'a>, source: &[u8]) -> Option(node: &Node<'a>, source: &[u8]) -> Option Option { + let gp = do_block.parent()?; + if gp.kind() != "call" { return None; } + let target = gp.child_by_field_name("target").or_else(|| gp.child(0))?; + if target.kind() != "identifier" || node_text(&target, source) != "defmodule" { + return None; + } + let args = find_child(&gp, "arguments")?; + let alias = find_child(&args, "alias")?; + Some(node_text(&alias, source).to_string()) +} + fn handle_defprotocol(node: &Node, source: &[u8], symbols: &mut FileSymbols) { let args = match find_child(node, "arguments") { Some(a) => a, diff --git a/crates/codegraph-core/src/extractors/go.rs b/crates/codegraph-core/src/extractors/go.rs index 3676e47d..92e73878 100644 --- a/crates/codegraph-core/src/extractors/go.rs +++ b/crates/codegraph-core/src/extractors/go.rs @@ -314,42 +314,27 @@ fn extract_go_type_name<'a>(type_node: &Node<'a>, source: &'a [u8]) -> Option<&' fn match_go_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: usize) { match node.kind() { - "var_spec" => { - if let Some(type_node) = node.child_by_field_name("type") { - if let Some(type_name) = extract_go_type_name(&type_node, source) { - for i in 0..node.child_count() { - if let Some(child) = node.child(i) { - if child.kind() == "identifier" { - symbols.type_map.push(TypeMapEntry { - name: node_text(&child, source).to_string(), - type_name: type_name.to_string(), - }); - } - } - } - } - } - } - "parameter_declaration" => { - if let Some(type_node) = node.child_by_field_name("type") { - if let Some(type_name) = extract_go_type_name(&type_node, source) { - for i in 0..node.child_count() { - if let Some(child) = node.child(i) { - if child.kind() == "identifier" { - symbols.type_map.push(TypeMapEntry { - name: node_text(&child, source).to_string(), - type_name: type_name.to_string(), - }); - } - } - } - } - } + "var_spec" | "parameter_declaration" => { + collect_go_typed_identifiers(node, source, &mut symbols.type_map); } _ => {} } } +fn collect_go_typed_identifiers(node: &Node, source: &[u8], type_map: &mut Vec) { + let Some(type_node) = node.child_by_field_name("type") else { return }; + let Some(type_name) = extract_go_type_name(&type_node, source) else { return }; + for i in 0..node.child_count() { + let Some(child) = node.child(i) else { continue }; + if child.kind() == "identifier" { + type_map.push(TypeMapEntry { + name: node_text(&child, source).to_string(), + type_name: type_name.to_string(), + }); + } + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/crates/codegraph-core/src/extractors/helpers.rs b/crates/codegraph-core/src/extractors/helpers.rs index a5f6f199..ea29336f 100644 --- a/crates/codegraph-core/src/extractors/helpers.rs +++ b/crates/codegraph-core/src/extractors/helpers.rs @@ -389,6 +389,105 @@ pub fn walk_ast_nodes_with_config( walk_ast_nodes_with_config_depth(node, source, ast_nodes, config, 0); } +/// Classify a tree-sitter node against the language AST config. +/// Returns the AST kind string if matched, or `None` to skip. +fn classify_ast_node<'a>(kind: &str, config: &'a LangAstConfig) -> Option<&'a str> { + if config.new_types.contains(&kind) { + Some("new") + } else if config.throw_types.contains(&kind) { + Some("throw") + } else if config.await_types.contains(&kind) { + Some("await") + } else if config.string_types.contains(&kind) { + Some("string") + } else if config.regex_types.contains(&kind) { + Some("regex") + } else { + None + } +} + +/// Build an AstNode for a "new" expression. +fn build_new_node(node: &Node, source: &[u8]) -> AstNode { + AstNode { + kind: "new".to_string(), + name: extract_constructor_name(node, source), + line: start_line(node), + text: Some(truncate(node_text(node, source), AST_TEXT_MAX)), + receiver: None, + } +} + +/// Build an AstNode for a "throw" statement. +fn build_throw_node(node: &Node, source: &[u8], config: &LangAstConfig) -> AstNode { + AstNode { + kind: "throw".to_string(), + name: extract_throw_target(node, source, config), + line: start_line(node), + text: extract_child_expression_text(node, source), + receiver: None, + } +} + +/// Build an AstNode for an "await" expression. +fn build_await_node(node: &Node, source: &[u8]) -> AstNode { + AstNode { + kind: "await".to_string(), + name: extract_awaited_name(node, source), + line: start_line(node), + text: extract_child_expression_text(node, source), + receiver: None, + } +} + +/// Build an AstNode for a string literal. +/// Returns `None` if the string content is too short (< 2 chars). +fn build_string_node(node: &Node, source: &[u8], config: &LangAstConfig) -> Option { + let raw = node_text(node, source); + let kind = node.kind(); + let is_raw_string = kind.contains("raw_string"); + // Strip language prefix modifiers before quote chars: + // - C# verbatim `@"..."`, Rust raw strings `r"..."`, Python prefixes: r, b, f, u + let without_prefix = raw.trim_start_matches('@') + .trim_start_matches(|c: char| config.string_prefixes.contains(&c)); + let without_prefix = if is_raw_string { + without_prefix.trim_start_matches('r').trim_start_matches('#') + } else { + without_prefix + }; + let content = without_prefix + .trim_start_matches(|c: char| config.quote_chars.contains(&c)); + let content = if is_raw_string { + content.trim_end_matches('#') + } else { + content + }; + let content = content + .trim_end_matches(|c: char| config.quote_chars.contains(&c)); + if content.chars().count() < 2 { + return None; + } + Some(AstNode { + kind: "string".to_string(), + name: truncate(content, 100), + line: start_line(node), + text: Some(truncate(raw, AST_TEXT_MAX)), + receiver: None, + }) +} + +/// Build an AstNode for a regex literal. +fn build_regex_node(node: &Node, source: &[u8]) -> AstNode { + let raw = node_text(node, source); + AstNode { + kind: "regex".to_string(), + name: if raw.is_empty() { "?".to_string() } else { raw.to_string() }, + line: start_line(node), + text: Some(truncate(raw, AST_TEXT_MAX)), + receiver: None, + } +} + fn walk_ast_nodes_with_config_depth( node: &Node, source: &[u8], @@ -399,69 +498,38 @@ fn walk_ast_nodes_with_config_depth( if depth >= MAX_WALK_DEPTH { return; } - let kind = node.kind(); - if config.new_types.contains(&kind) { - let name = extract_constructor_name(node, source); - let text = truncate(node_text(node, source), AST_TEXT_MAX); - ast_nodes.push(AstNode { - kind: "new".to_string(), - name, - line: start_line(node), - text: Some(text), - receiver: None, - }); - // Fall through to recurse children (e.g. string args inside `new`) - } else if config.throw_types.contains(&kind) { - let name = extract_throw_target(node, source, config); - let text = extract_child_expression_text(node, source); - ast_nodes.push(AstNode { - kind: "throw".to_string(), - name, - line: start_line(node), - text, - receiver: None, - }); - // Fall through to recurse children (e.g. `new` inside `throw new ...`) - } else if config.await_types.contains(&kind) { - let name = extract_awaited_name(node, source); - let text = extract_child_expression_text(node, source); - ast_nodes.push(AstNode { - kind: "await".to_string(), - name, - line: start_line(node), - text, - receiver: None, - }); - // Fall through to recurse children — captures strings, etc. inside await expr. - } else if config.string_types.contains(&kind) { - let raw = node_text(node, source); - let is_raw_string = kind.contains("raw_string"); - // Strip language prefix modifiers before quote chars: - // - C# verbatim `@"..."` - // - Rust raw strings `r"..."`, `r#"..."#` - // - Python prefixes: r, b, f, u and combos like rb, fr - let without_prefix = raw.trim_start_matches('@') - .trim_start_matches(|c: char| config.string_prefixes.contains(&c)); - // For raw string node types (e.g. Rust `r#"..."#`), strip the `r` prefix - // and `#` delimiters. This must be conditional — the unconditional - // `.trim_start_matches('r')` that was here before double-stripped 'r' for - // languages like Python where 'r' is already in string_prefixes. - let without_prefix = if is_raw_string { - without_prefix.trim_start_matches('r').trim_start_matches('#') - } else { - without_prefix - }; - let content = without_prefix - .trim_start_matches(|c: char| config.quote_chars.contains(&c)); - let content = if is_raw_string { - content.trim_end_matches('#') - } else { - content - }; - let content = content - .trim_end_matches(|c: char| config.quote_chars.contains(&c)); - if content.chars().count() < 2 { + if let Some(ast_kind) = classify_ast_node(node.kind(), config) { + let skip_children_on_short_string; + match ast_kind { + "new" => { + ast_nodes.push(build_new_node(node, source)); + skip_children_on_short_string = false; + } + "throw" => { + ast_nodes.push(build_throw_node(node, source, config)); + skip_children_on_short_string = false; + } + "await" => { + ast_nodes.push(build_await_node(node, source)); + skip_children_on_short_string = false; + } + "string" => { + if let Some(ast_node) = build_string_node(node, source, config) { + ast_nodes.push(ast_node); + skip_children_on_short_string = false; + } else { + // Short string: recurse children then return early + skip_children_on_short_string = true; + } + } + "regex" => { + ast_nodes.push(build_regex_node(node, source)); + skip_children_on_short_string = false; + } + _ => { skip_children_on_short_string = false; } + } + if skip_children_on_short_string { for i in 0..node.child_count() { if let Some(child) = node.child(i) { walk_ast_nodes_with_config_depth(&child, source, ast_nodes, config, depth + 1); @@ -469,28 +537,6 @@ fn walk_ast_nodes_with_config_depth( } return; } - let name = truncate(content, 100); - let text = truncate(raw, AST_TEXT_MAX); - ast_nodes.push(AstNode { - kind: "string".to_string(), - name, - line: start_line(node), - text: Some(text), - receiver: None, - }); - // Fall through to recurse children (template strings may have nested expressions) - } else if config.regex_types.contains(&kind) { - let raw = node_text(node, source); - let name = if raw.is_empty() { "?".to_string() } else { raw.to_string() }; - let text = truncate(raw, AST_TEXT_MAX); - ast_nodes.push(AstNode { - kind: "regex".to_string(), - name, - line: start_line(node), - text: Some(text), - receiver: None, - }); - // Fall through to recurse children } for i in 0..node.child_count() { diff --git a/crates/codegraph-core/src/extractors/java.rs b/crates/codegraph-core/src/extractors/java.rs index d41554db..f187d6e5 100644 --- a/crates/codegraph-core/src/extractors/java.rs +++ b/crates/codegraph-core/src/extractors/java.rs @@ -372,57 +372,52 @@ fn extract_java_interfaces( symbols: &mut FileSymbols, ) { for i in 0..interfaces.child_count() { - if let Some(child) = interfaces.child(i) { - match child.kind() { - "type_identifier" | "identifier" => { - symbols.classes.push(ClassRelation { - name: class_name.to_string(), - extends: None, - implements: Some(node_text(&child, source).to_string()), - line: start_line(interfaces), - }); - } - "type_list" => { - for j in 0..child.child_count() { - if let Some(t) = child.child(j) { - match t.kind() { - "type_identifier" | "identifier" => { - symbols.classes.push(ClassRelation { - name: class_name.to_string(), - extends: None, - implements: Some(node_text(&t, source).to_string()), - line: start_line(interfaces), - }); - } - "generic_type" => { - if let Some(first) = t.child(0) { - symbols.classes.push(ClassRelation { - name: class_name.to_string(), - extends: None, - implements: Some( - node_text(&first, source).to_string(), - ), - line: start_line(interfaces), - }); - } - } - _ => {} - } - } - } + let Some(child) = interfaces.child(i) else { continue }; + match child.kind() { + "type_identifier" | "identifier" => { + push_implements(symbols, class_name, node_text(&child, source), interfaces); + } + "type_list" => { + collect_type_list_implements(&child, class_name, source, symbols, interfaces); + } + "generic_type" => { + if let Some(first) = child.child(0) { + push_implements(symbols, class_name, node_text(&first, source), interfaces); } - "generic_type" => { - if let Some(first) = child.child(0) { - symbols.classes.push(ClassRelation { - name: class_name.to_string(), - extends: None, - implements: Some(node_text(&first, source).to_string()), - line: start_line(interfaces), - }); - } + } + _ => {} + } + } +} + +fn push_implements(symbols: &mut FileSymbols, class_name: &str, iface_name: &str, line_node: &Node) { + symbols.classes.push(ClassRelation { + name: class_name.to_string(), + extends: None, + implements: Some(iface_name.to_string()), + line: start_line(line_node), + }); +} + +fn collect_type_list_implements( + type_list: &Node, + class_name: &str, + source: &[u8], + symbols: &mut FileSymbols, + line_node: &Node, +) { + for j in 0..type_list.child_count() { + let Some(t) = type_list.child(j) else { continue }; + match t.kind() { + "type_identifier" | "identifier" => { + push_implements(symbols, class_name, node_text(&t, source), line_node); + } + "generic_type" => { + if let Some(first) = t.child(0) { + push_implements(symbols, class_name, node_text(&first, source), line_node); } - _ => {} } + _ => {} } } } diff --git a/crates/codegraph-core/src/extractors/javascript.rs b/crates/codegraph-core/src/extractors/javascript.rs index d92926f8..dff3b290 100644 --- a/crates/codegraph-core/src/extractors/javascript.rs +++ b/crates/codegraph-core/src/extractors/javascript.rs @@ -1087,70 +1087,76 @@ fn extract_dynamic_import_names(call_node: &Node, source: &[u8]) -> Vec Some(n) if n.kind() == "variable_declarator" => n, _ => return Vec::new(), }; - let name_node = match declarator.child_by_field_name("name") { - Some(n) => n, - None => return Vec::new(), + let Some(name_node) = declarator.child_by_field_name("name") else { + return Vec::new(); }; match name_node.kind() { - // const { a, b } = await import(...) - "object_pattern" => { - let mut names = Vec::new(); - for i in 0..name_node.child_count() { - if let Some(child) = name_node.child(i) { - if child.kind() == "shorthand_property_identifier_pattern" - || child.kind() == "shorthand_property_identifier" - { - names.push(node_text(&child, source).to_string()); - } else if child.kind() == "pair_pattern" || child.kind() == "pair" { - // { exportName: localAlias } → extract the key (export name), - // not the value (local alias). The key maps to the source - // module's export; the value is only the local binding. - if let Some(key) = child.child_by_field_name("key") { - names.push(node_text(&key, source).to_string()); - } - } else if child.kind() == "object_assignment_pattern" { - // Handle `{ a = 'default' }` — extract the left-hand binding - if let Some(left) = child.child_by_field_name("left") { - names.push(node_text(&left, source).to_string()); - } - } else if child.kind() == "rest_pattern" || child.kind() == "rest_element" { - // Handle `{ a, ...rest }` — extract the identifier inside the spread - if let Some(inner) = child.child(0) { - if inner.kind() == "identifier" { - names.push(node_text(&inner, source).to_string()); - } - } - } + "object_pattern" => collect_object_pattern_names(&name_node, source), + "identifier" => vec![node_text(&name_node, source).to_string()], + "array_pattern" => collect_array_pattern_names(&name_node, source), + _ => Vec::new(), + } +} + +/// Collect names from `const { a, b } = await import(...)` +fn collect_object_pattern_names(pattern: &Node, source: &[u8]) -> Vec { + let mut names = Vec::new(); + for i in 0..pattern.child_count() { + let Some(child) = pattern.child(i) else { continue }; + match child.kind() { + "shorthand_property_identifier_pattern" | "shorthand_property_identifier" => { + names.push(node_text(&child, source).to_string()); + } + "pair_pattern" | "pair" => { + // { exportName: localAlias } → extract the key (export name) + if let Some(key) = child.child_by_field_name("key") { + names.push(node_text(&key, source).to_string()); } } - names + "object_assignment_pattern" => { + // { a = 'default' } → extract the left-hand binding + if let Some(left) = child.child_by_field_name("left") { + names.push(node_text(&left, source).to_string()); + } + } + "rest_pattern" | "rest_element" => { + extract_rest_identifier(&child, source, &mut names); + } + _ => {} } - // const mod = await import(...) - "identifier" => vec![node_text(&name_node, source).to_string()], - // const [first, second] = await import(...) - "array_pattern" => { - let mut names = Vec::new(); - for i in 0..name_node.child_count() { - if let Some(child) = name_node.child(i) { - if child.kind() == "identifier" { - names.push(node_text(&child, source).to_string()); - } else if child.kind() == "assignment_pattern" { - if let Some(left) = child.child_by_field_name("left") { - names.push(node_text(&left, source).to_string()); - } - } else if child.kind() == "rest_pattern" || child.kind() == "rest_element" { - // Handle `[a, ...rest]` — extract the identifier inside the spread - if let Some(inner) = child.child(0) { - if inner.kind() == "identifier" { - names.push(node_text(&inner, source).to_string()); - } - } - } + } + names +} + +/// Collect names from `const [first, second] = await import(...)` +fn collect_array_pattern_names(pattern: &Node, source: &[u8]) -> Vec { + let mut names = Vec::new(); + for i in 0..pattern.child_count() { + let Some(child) = pattern.child(i) else { continue }; + match child.kind() { + "identifier" => { + names.push(node_text(&child, source).to_string()); + } + "assignment_pattern" => { + if let Some(left) = child.child_by_field_name("left") { + names.push(node_text(&left, source).to_string()); } } - names + "rest_pattern" | "rest_element" => { + extract_rest_identifier(&child, source, &mut names); + } + _ => {} + } + } + names +} + +/// Extract the identifier from a rest/spread element (`...rest` → `rest`) +fn extract_rest_identifier(rest_node: &Node, source: &[u8], names: &mut Vec) { + if let Some(inner) = rest_node.child(0) { + if inner.kind() == "identifier" { + names.push(node_text(&inner, source).to_string()); } - _ => Vec::new(), } } diff --git a/crates/codegraph-core/src/extractors/php.rs b/crates/codegraph-core/src/extractors/php.rs index d68ef68a..7681e986 100644 --- a/crates/codegraph-core/src/extractors/php.rs +++ b/crates/codegraph-core/src/extractors/php.rs @@ -323,33 +323,31 @@ fn extract_php_class_properties(node: &Node, source: &[u8]) -> Vec { let mut props = Vec::new(); let body = node.child_by_field_name("body") .or_else(|| find_child(node, "declaration_list")); - if let Some(body) = body { - for i in 0..body.child_count() { - if let Some(child) = body.child(i) { - if child.kind() == "property_declaration" { - // Walk property_element children - for j in 0..child.child_count() { - if let Some(elem) = child.child(j) { - if elem.kind() == "property_element" { - if let Some(name_node) = elem.child(0) { - if name_node.kind() == "variable_name" { - props.push(child_def( - node_text(&name_node, source).to_string(), - "property", - start_line(&child), - )); - } - } - } - } - } - } - } + let Some(body) = body else { return props }; + for i in 0..body.child_count() { + let Some(child) = body.child(i) else { continue }; + if child.kind() == "property_declaration" { + collect_property_element_names(&child, source, &mut props); } } props } +fn collect_property_element_names(decl: &Node, source: &[u8], props: &mut Vec) { + for j in 0..decl.child_count() { + let Some(elem) = decl.child(j) else { continue }; + if elem.kind() != "property_element" { continue; } + let Some(name_node) = elem.child(0) else { continue }; + if name_node.kind() == "variable_name" { + props.push(child_def( + node_text(&name_node, source).to_string(), + "property", + start_line(decl), + )); + } + } +} + fn extract_php_enum_cases(node: &Node, source: &[u8]) -> Vec { let mut cases = Vec::new(); let body = node.child_by_field_name("body") diff --git a/crates/codegraph-core/src/extractors/python.rs b/crates/codegraph-core/src/extractors/python.rs index eb2eab2f..61d631a8 100644 --- a/crates/codegraph-core/src/extractors/python.rs +++ b/crates/codegraph-core/src/extractors/python.rs @@ -259,39 +259,30 @@ fn extract_python_class_properties(class_node: &Node, source: &[u8]) -> Vec) { for i in 0..node.child_count() { - if let Some(child) = node.child(i) { - if child.kind() == "expression_statement" { - if let Some(expr) = child.child(0) { - if expr.kind() == "assignment" { - if let Some(left) = expr.child_by_field_name("left") { - if left.kind() == "attribute" { - if let Some(obj) = left.child_by_field_name("object") { - if node_text(&obj, source) == "self" { - if let Some(attr) = left.child_by_field_name("attribute") { - let name = node_text(&attr, source); - // Avoid duplicates - if !props.iter().any(|p| p.name == name) { - props.push(child_def( - name.to_string(), - "property", - start_line(&child), - )); - } - } - } - } - } - } - } - } - } - // Recurse into blocks (if/for/etc inside __init__) - if child.kind() == "block" || child.kind() == "if_statement" - || child.kind() == "for_statement" || child.kind() == "while_statement" - { - collect_self_assignments(&child, source, props); - } + let Some(child) = node.child(i) else { continue }; + if child.kind() == "expression_statement" { + try_extract_self_assignment(&child, source, props); } + // Recurse into blocks (if/for/etc inside __init__) + if child.kind() == "block" || child.kind() == "if_statement" + || child.kind() == "for_statement" || child.kind() == "while_statement" + { + collect_self_assignments(&child, source, props); + } + } +} + +fn try_extract_self_assignment(stmt: &Node, source: &[u8], props: &mut Vec) { + let Some(expr) = stmt.child(0) else { return }; + if expr.kind() != "assignment" { return; } + let Some(left) = expr.child_by_field_name("left") else { return }; + if left.kind() != "attribute" { return; } + let Some(obj) = left.child_by_field_name("object") else { return }; + if node_text(&obj, source) != "self" { return; } + let Some(attr) = left.child_by_field_name("attribute") else { return }; + let name = node_text(&attr, source); + if !props.iter().any(|p| p.name == name) { + props.push(child_def(name.to_string(), "property", start_line(stmt))); } } diff --git a/crates/codegraph-core/src/extractors/ruby.rs b/crates/codegraph-core/src/extractors/ruby.rs index b74c5952..6402a381 100644 --- a/crates/codegraph-core/src/extractors/ruby.rs +++ b/crates/codegraph-core/src/extractors/ruby.rs @@ -224,35 +224,27 @@ fn extract_ruby_class_children(node: &Node, source: &[u8]) -> Vec { fn collect_ruby_class_children(node: &Node, source: &[u8], children: &mut Vec) { for i in 0..node.child_count() { - if let Some(child) = node.child(i) { - match child.kind() { - // Instance variable assignment: @name = ... - "assignment" => { - if let Some(left) = child.child_by_field_name("left") { - if left.kind() == "instance_variable" { - let name = node_text(&left, source); - if !children.iter().any(|c| c.name == name) { - children.push(child_def( - name.to_string(), - "property", - start_line(&child), - )); - } - } - // UPPER_CASE = value → constant - if left.kind() == "constant" { - let name = node_text(&left, source); - children.push(child_def( - name.to_string(), - "constant", - start_line(&child), - )); - } - } - } - _ => {} + let Some(child) = node.child(i) else { continue }; + if child.kind() == "assignment" { + try_collect_ruby_assignment(&child, source, children); + } + } +} + +fn try_collect_ruby_assignment(assign: &Node, source: &[u8], children: &mut Vec) { + let Some(left) = assign.child_by_field_name("left") else { return }; + match left.kind() { + "instance_variable" => { + let name = node_text(&left, source); + if !children.iter().any(|c| c.name == name) { + children.push(child_def(name.to_string(), "property", start_line(assign))); } } + "constant" => { + let name = node_text(&left, source); + children.push(child_def(name.to_string(), "constant", start_line(assign))); + } + _ => {} } } diff --git a/crates/codegraph-core/src/extractors/rust_lang.rs b/crates/codegraph-core/src/extractors/rust_lang.rs index 3d22416d..cc81bd3f 100644 --- a/crates/codegraph-core/src/extractors/rust_lang.rs +++ b/crates/codegraph-core/src/extractors/rust_lang.rs @@ -309,75 +309,63 @@ fn extract_rust_use_path(node: &Node, source: &[u8]) -> Vec<(String, Vec "use_list" => { let mut results = Vec::new(); for i in 0..node.child_count() { - if let Some(child) = node.child(i) { - results.extend(extract_rust_use_path(&child, source)); - } + let Some(child) = node.child(i) else { continue }; + results.extend(extract_rust_use_path(&child, source)); } results } - - "scoped_use_list" => { - let path_node = node.child_by_field_name("path"); - let list_node = node.child_by_field_name("list"); - let prefix = path_node - .map(|p| node_text(&p, source).to_string()) - .unwrap_or_default(); - if let Some(list_node) = list_node { - let mut names = Vec::new(); - for i in 0..list_node.child_count() { - if let Some(child) = list_node.child(i) { - match child.kind() { - "identifier" | "self" => { - names.push(node_text(&child, source).to_string()); - } - "use_as_clause" => { - let name = child - .child_by_field_name("alias") - .or_else(|| child.child_by_field_name("name")) - .map(|n| node_text(&n, source).to_string()); - if let Some(name) = name { - names.push(name); - } - } - _ => {} - } - } - } - vec![(prefix, names)] - } else { - vec![(prefix, vec![])] - } - } - + "scoped_use_list" => extract_scoped_use_list(node, source), "use_as_clause" => { let name = node .child_by_field_name("alias") .or_else(|| node.child_by_field_name("name")) .map(|n| node_text(&n, source).to_string()); - vec![( - node_text(node, source).to_string(), - name.into_iter().collect(), - )] + vec![(node_text(node, source).to_string(), name.into_iter().collect())] } - "use_wildcard" => { - let path_node = node.child_by_field_name("path"); - let src = path_node + let src = node.child_by_field_name("path") .map(|p| node_text(&p, source).to_string()) .unwrap_or_else(|| "*".to_string()); vec![(src, vec!["*".to_string()])] } - "scoped_identifier" | "identifier" => { let text = node_text(node, source).to_string(); let last_name = text.split("::").last().unwrap_or("").to_string(); vec![(text, vec![last_name])] } - _ => vec![], } } +fn extract_scoped_use_list(node: &Node, source: &[u8]) -> Vec<(String, Vec)> { + let prefix = node.child_by_field_name("path") + .map(|p| node_text(&p, source).to_string()) + .unwrap_or_default(); + let Some(list_node) = node.child_by_field_name("list") else { + return vec![(prefix, vec![])]; + }; + let mut names = Vec::new(); + for i in 0..list_node.child_count() { + let Some(child) = list_node.child(i) else { continue }; + match child.kind() { + "identifier" | "self" => { + names.push(node_text(&child, source).to_string()); + } + "use_as_clause" => { + let name = child + .child_by_field_name("alias") + .or_else(|| child.child_by_field_name("name")) + .map(|n| node_text(&n, source).to_string()); + if let Some(name) = name { + names.push(name); + } + } + _ => {} + } + } + vec![(prefix, names)] +} + fn extract_rust_type_name<'a>(type_node: &Node<'a>, source: &'a [u8]) -> Option<&'a str> { match type_node.kind() { "type_identifier" | "identifier" | "scoped_type_identifier" => Some(node_text(type_node, source)), diff --git a/crates/codegraph-core/src/extractors/scala.rs b/crates/codegraph-core/src/extractors/scala.rs index ddc2faa0..2f4373cc 100644 --- a/crates/codegraph-core/src/extractors/scala.rs +++ b/crates/codegraph-core/src/extractors/scala.rs @@ -193,142 +193,150 @@ fn extract_scala_import_path(node: &Node, source: &[u8]) -> String { path } -fn match_scala_node(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: usize) { - match node.kind() { - "class_definition" => { - let name_node = node.child_by_field_name("name") - .or_else(|| find_child(node, "identifier")); - if let Some(name_node) = name_node { - let class_name = node_text(&name_node, source).to_string(); - let children = extract_scala_class_members(node, source); - symbols.definitions.push(Definition { - name: class_name.clone(), - kind: "class".to_string(), - line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: None, - cfg: None, - children: opt_children(children), - }); - extract_scala_extends(node, source, &class_name, symbols); - } - } +// ── Per-node-kind handlers ────────────────────────────────────────────────── + +fn handle_scala_class_definition(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let name_node = node.child_by_field_name("name") + .or_else(|| find_child(node, "identifier")); + if let Some(name_node) = name_node { + let class_name = node_text(&name_node, source).to_string(); + let children = extract_scala_class_members(node, source); + symbols.definitions.push(Definition { + name: class_name.clone(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: opt_children(children), + }); + extract_scala_extends(node, source, &class_name, symbols); + } +} + +fn handle_scala_trait_definition(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let name_node = node.child_by_field_name("name") + .or_else(|| find_child(node, "identifier")); + if let Some(name_node) = name_node { + let trait_name = node_text(&name_node, source).to_string(); + symbols.definitions.push(Definition { + name: trait_name.clone(), + kind: "interface".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); + extract_scala_extends(node, source, &trait_name, symbols); + } +} + +fn handle_scala_object_definition(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let name_node = node.child_by_field_name("name") + .or_else(|| find_child(node, "identifier")); + if let Some(name_node) = name_node { + let obj_name = node_text(&name_node, source).to_string(); + let children = extract_scala_class_members(node, source); + symbols.definitions.push(Definition { + name: obj_name.clone(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: opt_children(children), + }); + extract_scala_extends(node, source, &obj_name, symbols); + } +} - "trait_definition" => { - let name_node = node.child_by_field_name("name") - .or_else(|| find_child(node, "identifier")); - if let Some(name_node) = name_node { - let trait_name = node_text(&name_node, source).to_string(); - symbols.definitions.push(Definition { - name: trait_name.clone(), - kind: "interface".to_string(), +fn handle_scala_function_definition(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let name_node = node.child_by_field_name("name") + .or_else(|| find_child(node, "identifier")); + if let Some(name_node) = name_node { + let parent_class = find_scala_parent_class(node, source); + let name = node_text(&name_node, source); + let full_name = match &parent_class { + Some(cls) => format!("{}.{}", cls, name), + None => name.to_string(), + }; + let kind = if parent_class.is_some() { "method" } else { "function" }; + let children = extract_scala_parameters(node, source); + symbols.definitions.push(Definition { + name: full_name, + kind: kind.to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: compute_all_metrics(node, source, "scala"), + cfg: build_function_cfg(node, "scala", source), + children: opt_children(children), + }); + } +} + +fn handle_scala_import_declaration(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let path = extract_scala_import_path(node, source); + if !path.is_empty() { + let last = path.split('.').last().unwrap_or("").to_string(); + let mut imp = Import::new(path, vec![last], start_line(node)); + imp.scala_import = Some(true); + symbols.imports.push(imp); + } +} + +fn handle_scala_call_expression(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + if let Some(fn_node) = node.child_by_field_name("function") + .or_else(|| node.child(0)) + { + match fn_node.kind() { + "identifier" => { + symbols.calls.push(Call { + name: node_text(&fn_node, source).to_string(), line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: None, - cfg: None, - children: None, + dynamic: None, + receiver: None, }); - extract_scala_extends(node, source, &trait_name, symbols); } - } - - "object_definition" => { - let name_node = node.child_by_field_name("name") - .or_else(|| find_child(node, "identifier")); - if let Some(name_node) = name_node { - let obj_name = node_text(&name_node, source).to_string(); - let children = extract_scala_class_members(node, source); - symbols.definitions.push(Definition { - name: obj_name.clone(), - kind: "class".to_string(), + "field_expression" => { + let name = fn_node.child_by_field_name("field") + .or_else(|| fn_node.child_by_field_name("member")) + .map(|n| node_text(&n, source).to_string()) + .unwrap_or_else(|| node_text(&fn_node, source).to_string()); + let receiver = fn_node.child_by_field_name("value") + .or_else(|| fn_node.child(0)) + .map(|n| node_text(&n, source).to_string()); + symbols.calls.push(Call { + name, line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: None, - cfg: None, - children: opt_children(children), + dynamic: None, + receiver, }); - extract_scala_extends(node, source, &obj_name, symbols); } - } - - "function_definition" => { - let name_node = node.child_by_field_name("name") - .or_else(|| find_child(node, "identifier")); - if let Some(name_node) = name_node { - let parent_class = find_scala_parent_class(node, source); - let name = node_text(&name_node, source); - let full_name = match &parent_class { - Some(cls) => format!("{}.{}", cls, name), - None => name.to_string(), - }; - let kind = if parent_class.is_some() { "method" } else { "function" }; - let children = extract_scala_parameters(node, source); - symbols.definitions.push(Definition { - name: full_name, - kind: kind.to_string(), + _ => { + symbols.calls.push(Call { + name: node_text(&fn_node, source).to_string(), line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: compute_all_metrics(node, source, "scala"), - cfg: build_function_cfg(node, "scala", source), - children: opt_children(children), + dynamic: None, + receiver: None, }); } } + } +} - "import_declaration" => { - let path = extract_scala_import_path(node, source); - if !path.is_empty() { - let last = path.split('.').last().unwrap_or("").to_string(); - let mut imp = Import::new(path, vec![last], start_line(node)); - imp.scala_import = Some(true); - symbols.imports.push(imp); - } - } - - "call_expression" => { - if let Some(fn_node) = node.child_by_field_name("function") - .or_else(|| node.child(0)) - { - match fn_node.kind() { - "identifier" => { - symbols.calls.push(Call { - name: node_text(&fn_node, source).to_string(), - line: start_line(node), - dynamic: None, - receiver: None, - }); - } - "field_expression" => { - let name = fn_node.child_by_field_name("field") - .or_else(|| fn_node.child_by_field_name("member")) - .map(|n| node_text(&n, source).to_string()) - .unwrap_or_else(|| node_text(&fn_node, source).to_string()); - let receiver = fn_node.child_by_field_name("value") - .or_else(|| fn_node.child(0)) - .map(|n| node_text(&n, source).to_string()); - symbols.calls.push(Call { - name, - line: start_line(node), - dynamic: None, - receiver, - }); - } - _ => { - symbols.calls.push(Call { - name: node_text(&fn_node, source).to_string(), - line: start_line(node), - dynamic: None, - receiver: None, - }); - } - } - } - } - +fn match_scala_node(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: usize) { + match node.kind() { + "class_definition" => handle_scala_class_definition(node, source, symbols), + "trait_definition" => handle_scala_trait_definition(node, source, symbols), + "object_definition" => handle_scala_object_definition(node, source, symbols), + "function_definition" => handle_scala_function_definition(node, source, symbols), + "import_declaration" => handle_scala_import_declaration(node, source, symbols), + "call_expression" => handle_scala_call_expression(node, source, symbols), _ => {} } } diff --git a/crates/codegraph-core/src/extractors/zig.rs b/crates/codegraph-core/src/extractors/zig.rs index dfb6fa9c..9fb08272 100644 --- a/crates/codegraph-core/src/extractors/zig.rs +++ b/crates/codegraph-core/src/extractors/zig.rs @@ -74,87 +74,17 @@ fn extract_zig_params(func_node: &Node, source: &[u8]) -> Vec { } fn handle_zig_variable(node: &Node, source: &[u8], symbols: &mut FileSymbols) { - let name_node = match find_child(node, "identifier") { - Some(n) => n, - None => return, - }; + let Some(name_node) = find_child(node, "identifier") else { return }; let name = node_text(&name_node, source).to_string(); - // Check for struct/enum/union - for i in 0..node.child_count() { - if let Some(child) = node.child(i) { - match child.kind() { - "struct_declaration" => { - let members = extract_zig_container_fields(&child, source); - symbols.definitions.push(Definition { - name, - kind: "struct".to_string(), - line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: None, - cfg: None, - children: opt_children(members), - }); - return; - } - "enum_declaration" => { - symbols.definitions.push(Definition { - name, - kind: "enum".to_string(), - line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: None, - cfg: None, - children: None, - }); - return; - } - "union_declaration" => { - symbols.definitions.push(Definition { - name, - kind: "struct".to_string(), - line: start_line(node), - end_line: Some(end_line(node)), - decorators: None, - complexity: None, - cfg: None, - children: None, - }); - return; - } - _ => {} - } - } + // Check for struct/enum/union type definition + if try_handle_zig_type_def(node, source, symbols, &name) { + return; } - // Check for @import - for i in 0..node.child_count() { - if let Some(child) = node.child(i) { - if child.kind() == "builtin_function" { - if let Some(builtin_id) = find_child(&child, "builtin_identifier") { - if node_text(&builtin_id, source) == "@import" { - if let Some(args) = find_child(&child, "arguments") { - for j in 0..args.child_count() { - if let Some(arg) = args.child(j) { - if arg.kind() == "string_literal" || arg.kind() == "string" { - let raw = node_text(&arg, source); - let source_path = raw.trim_matches('"').to_string(); - symbols.imports.push(Import::new( - source_path, - vec![name], - start_line(node), - )); - return; - } - } - } - } - } - } - } - } + // Check for @import binding + if try_handle_zig_import(node, source, symbols, name.clone()) { + return; } // Regular const/var @@ -171,6 +101,56 @@ fn handle_zig_variable(node: &Node, source: &[u8], symbols: &mut FileSymbols) { }); } +fn try_handle_zig_type_def(node: &Node, source: &[u8], symbols: &mut FileSymbols, name: &str) -> bool { + for i in 0..node.child_count() { + let Some(child) = node.child(i) else { continue }; + let (kind, children) = match child.kind() { + "struct_declaration" => ("struct", opt_children(extract_zig_container_fields(&child, source))), + "enum_declaration" => ("enum", None), + "union_declaration" => ("struct", None), + _ => continue, + }; + symbols.definitions.push(Definition { + name: name.to_string(), + kind: kind.to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children, + }); + return true; + } + false +} + +fn try_handle_zig_import(node: &Node, source: &[u8], symbols: &mut FileSymbols, name: String) -> bool { + for i in 0..node.child_count() { + let Some(child) = node.child(i) else { continue }; + if child.kind() != "builtin_function" { continue; } + if let Some(path) = extract_zig_import_path(&child, source) { + symbols.imports.push(Import::new(path, vec![name], start_line(node))); + return true; + } + } + false +} + +fn extract_zig_import_path(builtin: &Node, source: &[u8]) -> Option { + let builtin_id = find_child(builtin, "builtin_identifier")?; + if node_text(&builtin_id, source) != "@import" { return None; } + let args = find_child(builtin, "arguments")?; + for j in 0..args.child_count() { + let Some(arg) = args.child(j) else { continue }; + if arg.kind() == "string_literal" || arg.kind() == "string" { + let raw = node_text(&arg, source); + return Some(raw.trim_matches('"').to_string()); + } + } + None +} + fn extract_zig_container_fields(container: &Node, source: &[u8]) -> Vec { let mut fields = Vec::new(); for i in 0..container.child_count() { diff --git a/crates/codegraph-core/src/graph_algorithms.rs b/crates/codegraph-core/src/graph_algorithms.rs index 78dbf448..a30c269f 100644 --- a/crates/codegraph-core/src/graph_algorithms.rs +++ b/crates/codegraph-core/src/graph_algorithms.rs @@ -1,5 +1,6 @@ use std::collections::{HashMap, HashSet, VecDeque}; +use crate::constants::{DEFAULT_RANDOM_SEED, LOUVAIN_MAX_LEVELS, LOUVAIN_MAX_PASSES, LOUVAIN_MIN_GAIN}; use crate::types::GraphEdge; use napi_derive::napi; @@ -242,16 +243,25 @@ pub fn louvain_communities( &edges, &node_ids, resolution.unwrap_or(1.0), - random_seed.unwrap_or(42), + random_seed.unwrap_or(DEFAULT_RANDOM_SEED), ) } -fn louvain_impl( +/// Internal state for the Louvain multi-level loop. +struct LouvainState { + cur_n: usize, + cur_edges: HashMap<(usize, usize), f64>, + cur_degree: Vec, + original_community: Vec, + rng_state: u32, +} + +/// Build the initial index-based edge map and degree vector from raw edges. +fn louvain_init( edges: &[GraphEdge], node_ids: &[String], - resolution: f64, seed: u32, -) -> LouvainResult { +) -> (HashMap<(usize, usize), f64>, f64, LouvainState) { let n = node_ids.len(); let mut id_to_idx: HashMap<&str, usize> = HashMap::with_capacity(n); for (i, id) in node_ids.iter().enumerate() { @@ -274,168 +284,174 @@ fn louvain_impl( } let total_weight: f64 = edge_map.values().sum(); - if total_weight == 0.0 { - return LouvainResult { - assignments: node_ids - .iter() - .enumerate() - .map(|(i, id)| CommunityAssignment { - node: id.clone(), - community: i as i32, - }) - .collect(), - modularity: 0.0, - }; - } - - // original_community[i] tracks each original node's final community - let mut original_community: Vec = (0..n).collect(); - // Current level's graph - let mut cur_n = n; - let mut cur_edges = edge_map.clone(); - let mut cur_degree: Vec = vec![0.0; cur_n]; - for (&(src, tgt), &w) in &cur_edges { + let mut cur_degree: Vec = vec![0.0; n]; + for (&(src, tgt), &w) in &edge_map { cur_degree[src] += w; cur_degree[tgt] += w; } - // Seeded xorshift32 RNG - let mut rng_state: u32 = if seed == 0 { 1 } else { seed }; - let mut next_rand = || -> u32 { - rng_state ^= rng_state << 13; - rng_state ^= rng_state >> 17; - rng_state ^= rng_state << 5; - rng_state + let rng_state = if seed == 0 { 1 } else { seed }; + + let state = LouvainState { + cur_n: n, + cur_edges: edge_map.clone(), + cur_degree, + original_community: (0..n).collect(), + rng_state, }; - // m2 = 2 × total edge weight of the ORIGINAL graph — a constant across all levels. - // Recalculating from cur_edges would undercount because coarsening strips intra-community - // edges, inflating the penalty term and causing under-merging at coarser levels. - let total_m2: f64 = 2.0 * total_weight; + (edge_map, total_weight, state) +} - for _level in 0..50 { - if cur_edges.is_empty() { - break; - } +/// Xorshift32 PRNG step. +fn xorshift32(state: &mut u32) -> u32 { + *state ^= *state << 13; + *state ^= *state >> 17; + *state ^= *state << 5; + *state +} - // Build adjacency list - let mut adj: Vec> = vec![vec![]; cur_n]; - for (&(src, tgt), &w) in &cur_edges { - adj[src].push((tgt, w)); - adj[tgt].push((src, w)); - } +/// Local move phase: greedily reassign nodes to communities to maximize modularity. +/// Returns true if any node moved. +fn local_move_phase( + state: &mut LouvainState, + resolution: f64, + total_m2: f64, +) -> (Vec, bool) { + let cur_n = state.cur_n; + + // Build adjacency list + let mut adj: Vec> = vec![vec![]; cur_n]; + for (&(src, tgt), &w) in &state.cur_edges { + adj[src].push((tgt, w)); + adj[tgt].push((src, w)); + } - // Local phase: greedy modularity optimization - let mut level_comm: Vec = (0..cur_n).collect(); - let mut comm_total: Vec = cur_degree.clone(); + let mut level_comm: Vec = (0..cur_n).collect(); + let mut comm_total: Vec = state.cur_degree.clone(); - let mut order: Vec = (0..cur_n).collect(); - for i in (1..order.len()).rev() { - let j = next_rand() as usize % (i + 1); - order.swap(i, j); - } + // Shuffle visit order with seeded RNG + let mut order: Vec = (0..cur_n).collect(); + for i in (1..order.len()).rev() { + let j = xorshift32(&mut state.rng_state) as usize % (i + 1); + order.swap(i, j); + } - let mut any_moved = false; - for _pass in 0..20 { - let mut pass_moved = false; - for &node in &order { - let node_comm = level_comm[node]; - let node_deg = cur_degree[node]; + let mut any_moved = false; + for _pass in 0..LOUVAIN_MAX_PASSES { + let mut pass_moved = false; + for &node in &order { + let node_comm = level_comm[node]; + let node_deg = state.cur_degree[node]; - let mut comm_w: HashMap = HashMap::new(); - for &(neighbor, w) in &adj[node] { - *comm_w.entry(level_comm[neighbor]).or_insert(0.0) += w; - } + let mut comm_w: HashMap = HashMap::new(); + for &(neighbor, w) in &adj[node] { + *comm_w.entry(level_comm[neighbor]).or_insert(0.0) += w; + } - let w_own = *comm_w.get(&node_comm).unwrap_or(&0.0); - let remove_cost = - w_own - resolution * node_deg * (comm_total[node_comm] - node_deg) / total_m2; + let w_own = *comm_w.get(&node_comm).unwrap_or(&0.0); + let remove_cost = + w_own - resolution * node_deg * (comm_total[node_comm] - node_deg) / total_m2; - let mut best_comm = node_comm; - let mut best_gain: f64 = 0.0; + let mut best_comm = node_comm; + let mut best_gain: f64 = 0.0; - for (&target_comm, &w_target) in &comm_w { - if target_comm == node_comm { - continue; - } - let gain = w_target - - resolution * node_deg * comm_total[target_comm] / total_m2 - - remove_cost; - if gain > best_gain { - best_gain = gain; - best_comm = target_comm; - } + for (&target_comm, &w_target) in &comm_w { + if target_comm == node_comm { + continue; } - - if best_comm != node_comm && best_gain > 1e-12 { - comm_total[node_comm] -= node_deg; - comm_total[best_comm] += node_deg; - level_comm[node] = best_comm; - pass_moved = true; - any_moved = true; + let gain = w_target + - resolution * node_deg * comm_total[target_comm] / total_m2 + - remove_cost; + if gain > best_gain { + best_gain = gain; + best_comm = target_comm; } } - if !pass_moved { - break; + + if best_comm != node_comm && best_gain > LOUVAIN_MIN_GAIN { + comm_total[node_comm] -= node_deg; + comm_total[best_comm] += node_deg; + level_comm[node] = best_comm; + pass_moved = true; + any_moved = true; } } - - if !any_moved { + if !pass_moved { break; } + } - // Renumber communities contiguously - let mut comm_remap: HashMap = HashMap::new(); - let mut next_id: usize = 0; - for &c in &level_comm { - if !comm_remap.contains_key(&c) { - comm_remap.insert(c, next_id); - next_id += 1; - } - } - for c in level_comm.iter_mut() { - *c = comm_remap[c]; - } - let coarse_n = next_id; + (level_comm, any_moved) +} - if coarse_n == cur_n { - break; +/// Aggregation phase: renumber communities, compose original mapping, build coarse graph. +/// Returns false if no further coarsening is possible (convergence). +fn aggregation_phase( + state: &mut LouvainState, + level_comm: &mut Vec, +) -> bool { + // Renumber communities contiguously + let mut comm_remap: HashMap = HashMap::new(); + let mut next_id: usize = 0; + for &c in level_comm.iter() { + if !comm_remap.contains_key(&c) { + comm_remap.insert(c, next_id); + next_id += 1; } + } + for c in level_comm.iter_mut() { + *c = comm_remap[c]; + } + let coarse_n = next_id; - // Compose: update original_community through this level's assignments - for oc in original_community.iter_mut() { - *oc = level_comm[*oc]; - } + if coarse_n == state.cur_n { + return false; + } - // Build coarse graph for next level - let mut coarse_edge_map: HashMap<(usize, usize), f64> = HashMap::new(); - for (&(src, tgt), &w) in &cur_edges { - let cu = level_comm[src]; - let cv = level_comm[tgt]; - if cu == cv { - continue; - } - let key = if cu < cv { (cu, cv) } else { (cv, cu) }; - *coarse_edge_map.entry(key).or_insert(0.0) += w; - } + // Compose: update original_community through this level's assignments + for oc in state.original_community.iter_mut() { + *oc = level_comm[*oc]; + } - let mut coarse_degree: Vec = vec![0.0; coarse_n]; - for (i, °) in cur_degree.iter().enumerate() { - coarse_degree[level_comm[i]] += deg; + // Build coarse graph for next level + let mut coarse_edge_map: HashMap<(usize, usize), f64> = HashMap::new(); + for (&(src, tgt), &w) in &state.cur_edges { + let cu = level_comm[src]; + let cv = level_comm[tgt]; + if cu == cv { + continue; } + let key = if cu < cv { (cu, cv) } else { (cv, cu) }; + *coarse_edge_map.entry(key).or_insert(0.0) += w; + } - cur_n = coarse_n; - cur_edges = coarse_edge_map; - cur_degree = coarse_degree; + let mut coarse_degree: Vec = vec![0.0; coarse_n]; + for (i, °) in state.cur_degree.iter().enumerate() { + coarse_degree[level_comm[i]] += deg; } - // Compute modularity: Q = sum_c [ L_c / m - gamma * (k_c / 2m)^2 ] + state.cur_n = coarse_n; + state.cur_edges = coarse_edge_map; + state.cur_degree = coarse_degree; + + true +} + +/// Compute final modularity score: Q = sum_c [ L_c / m - gamma * (k_c / 2m)^2 ] +fn compute_modularity( + edge_map: &HashMap<(usize, usize), f64>, + original_community: &[usize], + total_weight: f64, + resolution: f64, + n: usize, +) -> f64 { let m = total_weight; let m2 = 2.0 * m; let mut orig_degree: Vec = vec![0.0; n]; - for (&(src, tgt), &w) in &edge_map { + for (&(src, tgt), &w) in edge_map { orig_degree[src] += w; orig_degree[tgt] += w; } @@ -447,7 +463,7 @@ fn louvain_impl( for (i, °) in orig_degree.iter().enumerate() { kc[original_community[i]] += deg; } - for (&(src, tgt), &w) in &edge_map { + for (&(src, tgt), &w) in edge_map { if original_community[src] == original_community[tgt] { lc[original_community[src]] += w; } @@ -459,13 +475,60 @@ fn louvain_impl( modularity += lc[c] / m - resolution * (kc[c] / m2).powi(2); } } + modularity +} + +fn louvain_impl( + edges: &[GraphEdge], + node_ids: &[String], + resolution: f64, + seed: u32, +) -> LouvainResult { + let n = node_ids.len(); + let (edge_map, total_weight, mut state) = louvain_init(edges, node_ids, seed); + + if total_weight == 0.0 { + return LouvainResult { + assignments: node_ids + .iter() + .enumerate() + .map(|(i, id)| CommunityAssignment { + node: id.clone(), + community: i as i32, + }) + .collect(), + modularity: 0.0, + }; + } + + // m2 = 2 x total edge weight of the ORIGINAL graph -- a constant across all levels. + // Recalculating from cur_edges would undercount because coarsening strips intra-community + // edges, inflating the penalty term and causing under-merging at coarser levels. + let total_m2: f64 = 2.0 * total_weight; + + for _level in 0..LOUVAIN_MAX_LEVELS { + if state.cur_edges.is_empty() { + break; + } + + let (mut level_comm, any_moved) = local_move_phase(&mut state, resolution, total_m2); + if !any_moved { + break; + } + + if !aggregation_phase(&mut state, &mut level_comm) { + break; + } + } + + let modularity = compute_modularity(&edge_map, &state.original_community, total_weight, resolution, n); let assignments = node_ids .iter() .enumerate() .map(|(i, id)| CommunityAssignment { node: id.clone(), - community: original_community[i] as i32, + community: state.original_community[i] as i32, }) .collect(); diff --git a/crates/codegraph-core/src/import_edges.rs b/crates/codegraph-core/src/import_edges.rs index 1dfcd4d0..8d3966a7 100644 --- a/crates/codegraph-core/src/import_edges.rs +++ b/crates/codegraph-core/src/import_edges.rs @@ -4,6 +4,7 @@ //! the barrel detection from `resolve-imports.ts:isBarrelFile()`, and the //! recursive barrel export resolution from `resolveBarrelExport()`. +use crate::barrel_resolution::{self, BarrelContext, ReexportRef}; use crate::import_resolution; use crate::types::{FileSymbols, PathAliases}; use rusqlite::Connection; @@ -79,58 +80,36 @@ impl ImportEdgeContext { } /// Recursively resolve a barrel export to its actual source file. + /// + /// Delegates to the shared [`barrel_resolution::resolve_barrel_export`] algorithm. pub fn resolve_barrel_export( &self, barrel_path: &str, symbol_name: &str, visited: &mut HashSet, ) -> Option { - if visited.contains(barrel_path) { - return None; - } - visited.insert(barrel_path.to_string()); + barrel_resolution::resolve_barrel_export(self, barrel_path, symbol_name, visited) + } +} - let reexports = self.reexport_map.get(barrel_path)?; - for re in reexports { - // Named reexport (not wildcard) - if !re.names.is_empty() && !re.wildcard_reexport { - if re.names.iter().any(|n| n == symbol_name) { - if let Some(target_symbols) = self.file_symbols.get(&re.source) { - let has_def = target_symbols - .definitions - .iter() - .any(|d| d.name == symbol_name); - if has_def { - return Some(re.source.clone()); - } - let deeper = self.resolve_barrel_export(&re.source, symbol_name, visited); - if deeper.is_some() { - return deeper; - } - } - return Some(re.source.clone()); - } - continue; - } +impl BarrelContext for ImportEdgeContext { + fn reexports_for(&self, barrel_path: &str) -> Option>> { + self.reexport_map.get(barrel_path).map(|entries| { + entries + .iter() + .map(|re| ReexportRef { + source: re.source.as_str(), + names: &re.names, + wildcard_reexport: re.wildcard_reexport, + }) + .collect() + }) + } - // Wildcard reexport or unnamed - if re.wildcard_reexport || re.names.is_empty() { - if let Some(target_symbols) = self.file_symbols.get(&re.source) { - let has_def = target_symbols - .definitions - .iter() - .any(|d| d.name == symbol_name); - if has_def { - return Some(re.source.clone()); - } - let deeper = self.resolve_barrel_export(&re.source, symbol_name, visited); - if deeper.is_some() { - return deeper; - } - } - } - } - None + fn has_definition(&self, file_path: &str, symbol: &str) -> bool { + self.file_symbols + .get(file_path) + .map_or(false, |s| s.definitions.iter().any(|d| d.name == symbol)) } } diff --git a/crates/codegraph-core/src/lib.rs b/crates/codegraph-core/src/lib.rs index 1b16b029..5fbe317d 100644 --- a/crates/codegraph-core/src/lib.rs +++ b/crates/codegraph-core/src/lib.rs @@ -1,5 +1,6 @@ pub mod analysis; pub mod ast_db; +pub mod barrel_resolution; pub mod build_pipeline; pub mod change_detection; pub mod cfg; diff --git a/generated/titan/titan-report-v3.9.0-2026-04-04T06-48-00.md b/generated/titan/titan-report-v3.9.0-2026-04-04T06-48-00.md new file mode 100644 index 00000000..73152023 --- /dev/null +++ b/generated/titan/titan-report-v3.9.0-2026-04-04T06-48-00.md @@ -0,0 +1,245 @@ +# Titan Audit Report + +**Version:** 3.9.0 +**Date:** 2026-04-04T10:11 UTC -> 2026-04-04T12:33 UTC +**Branch:** worktree-titan-run +**Target:** H:\Vscode\codegraph\.claude\worktrees\titan-run + +--- + +## Executive Summary + +This Titan run targeted the native Rust engine and key TypeScript hotspots across 37 audit targets spanning 13 batches. All 18 execution phases completed successfully with 0 failures. The pipeline was fresh throughout (zero drift from main). Key outcomes: the top 6 highest-bug-density functions were decomposed, bringing `run_pipeline` from 7.42 to 4.39 estimated bugs, `louvain_impl` from 2.72 to 0.43, and `buildGraph` from 3.62 to 0.20. A +1 function cycle regression introduced during barrel resolution extraction was root-caused and fixed. + +--- + +## Pipeline Timeline + +| Phase | Duration | Notes | +|-------|----------|-------| +| RECON | 9.8 min | Mapped 558 files, 15489 symbols, 14 domains | +| GAUNTLET | 32.4 min | 54 files audited across 13 batches | +| SYNC | 13.7 min | 11 clusters, 5 abstractions, 18 phases planned | +| FORGE | 76.8 min | 17 commits, first at 6f4c52e, last at 9eacf7e | +| GATE | across forge | 7 runs, all pass | +| CLOSE | ~15 min | Report generation and PR splitting | +| **Total** | **~148 min** | **~2.5 hours** | + +--- + +## Metrics: Before & After + +| Metric | Baseline | Final | Delta | Trend | +|--------|----------|-------|-------|-------| +| Quality Score | 68 | 68* | 0 | -- | +| Total Files | 558 | 628 | +70 | (new extracted modules) | +| Total Symbols | 15489 | 15880 | +391 | (decomposed functions add symbols) | +| Total Edges | 30523 | 31335 | +812 | (new internal call edges) | +| Functions Above Threshold (cog>15) | 50** | 291*** | N/A | see note | +| Dead Symbols | 11741 | 11885 | +144 | (new extracted helpers not yet consumed cross-file) | +| Avg Halstead Bugs | 0.18 | 0.18 | 0 | -- | +| Avg Maintainability Index | 60.42 | 60.42 | 0 | -- | + +\* Quality score not recomputable from DB alone (requires CLI stats); baseline value carried forward. +\*\* Baseline "50" was from RECON's `--above-threshold` which uses default thresholds. The final "291" is raw DB count of cognitive>15 across all functions including Rust/scripts; not directly comparable. +\*\*\* The meaningful comparison is the targeted function improvements below. + +### Complexity Improvement: Top Movers + +These are the functions specifically targeted by the Titan audit, showing before (GAUNTLET baseline) and after metrics: + +| Function | Bugs Before | Bugs After | Cog Before | Cog After | MI Before | MI After | +|----------|-------------|------------|------------|-----------|-----------|----------| +| run_pipeline | 7.42 | 4.39 | 110 | 29 | 22.9 | 34.1 | +| buildGraph | 3.62 | 0.20 | 180 | 8 | 22.6 | 64.1 | +| louvain_impl | 2.72 | 0.43 | 85 | 8 | 30.6 | 50.7 | +| match_cpp_node | 2.37 | 0.18 | -- | 1 | 20.3 | 55.3 | +| match_scala_node | 1.87 | 0.14 | -- | 1 | 24.7 | 57.9 | +| extract_param_names_strategy | 1.36 | 0.17 | 83 | 1 | 23.1 | 56.2 | +| watchProject | 1.30 | 0.12 | 59 | 4 | 38.9 | 57.9 | +| buildComplexityMetrics | 1.17 | 0.04 | 117 | 1 | 36.2 | 61.1 | +| classifyNodeRolesFull | -- | 0.48 | 27 | 4 | -- | 46.8 | +| classifyNodeRolesIncremental | -- | 0.89 | 27 | 4 | -- | 43.7 | +| createAstStoreVisitor | -- | 0.93 | 35 | 34 | -- | 40.5 | + +**Total estimated bug reduction across targeted functions: 22.24 -> 7.97 (-14.27, -64%)** + +### Remaining Hot Spots + +Functions still above thresholds after this run (carried forward for next Titan): + +| Function | File | Bugs | Cog | MI | +|----------|------|------|-----|----| +| run_pipeline | build_pipeline.rs | 4.39 | 29 | 34.1 | +| NativeDatabase.get_graph_stats | read_queries.rs | 4.21 | 30 | 25.8 | +| main | scripts/token-benchmark.ts | 2.44 | 33 | 32.0 | +| do_insert_nodes | insert_nodes.rs | 2.13 | 51 | 31.6 | +| build_and_insert_call_edges | build_pipeline.rs | 2.07 | 22 | 32.4 | +| match_c_node | extractors/c.rs | 1.90 | 31 | 26.8 | +| match_kotlin_node | extractors/kotlin.rs | 1.87 | 32 | 29.6 | +| CfgBuilder.process_try_catch | cfg.rs | 1.85 | 62 | 34.2 | +| match_swift_node | extractors/swift.rs | 1.76 | 24 | 29.1 | +| resolveBenchmarkSource | scripts/lib/bench-config.ts | 1.84 | 35 | 39.2 | + +--- + +## Audit Results Summary + +**Targets audited:** 54 files +**Pass:** 8 | **Warn:** 13 | **Fail:** 27 | **Decompose:** 6 + +### By Pillar + +| Pillar | Pass | Warn | Fail | +|--------|------|------|------| +| I -- Structural Purity | 8 | 6 | 40 | +| II -- Data & Type Sovereignty | 48 | 4 | 2 | +| III -- Ecosystem Synergy | 54 | 0 | 0 | +| IV -- Quality Vigil | 30 | 12 | 12 | + +### Most Common Violations + +1. **Rule 1 -- Cognitive complexity** (126 violations): Dominant issue across all domains. Most Rust extractors exceeded thresholds. +2. **Max nesting depth** (Rust extractors worst at nest 6-9): Deep match arm nesting in tree-sitter node dispatch. +3. **Magic numbers** (seed 42 in louvain.ts, various thresholds in Rust): Addressed in phase 2. +4. **Naming** (nn() vague, short abbreviations in risk.ts): Minor naming concerns. +5. **Dead code false positives**: Mostly codegraph limitations (error classes, barrel re-exports, type imports). + +--- + +## Changes Made + +### Commits: 17 + +| SHA | Message | Files | Domain | +|-----|---------|-------|--------| +| 6f4c52e | refactor(native): extract magic numbers to named constants | 5 | native-engine, graph-model | +| 74980eb | refactor: extract shared node-role classification from structure.ts | 1 | features | +| 41f7dfd | refactor: unify duplicate dataflow result builders | 1 | features | +| 8a08153 | refactor(native): extract shared barrel resolution into common module | 4 | native-engine | +| ac28911 | refactor(native): flatten deeply nested extractor match arms | 10 | native-engine | +| 7be28ce | refactor(native): decompose cpp and scala node matchers | 2 | native-engine | +| faa63c3 | refactor(native): decompose louvain_impl into init/move/aggregate phases | 1 | native-engine | +| 8f14f42 | refactor(native): split extract_param_names_strategy into per-language handlers | 1 | native-engine | +| dea81ca | refactor(native): decompose run_pipeline into stage functions | 1 | native-engine | +| 5988439 | refactor: decompose buildComplexityMetrics into native/wasm/merge sub-functions | 1 | features | +| 3f8537b | refactor: continue buildGraph decomposition into pipeline stages | 1 | domain-builder | +| f51fe4b | refactor: split presentation formatters into sub-renderers | 3 | presentation | +| 6d521cd | refactor: extract watcher debounce and journal logic | 1 | domain-core | +| c9433ed | refactor: reduce complexity in TS extractors and file-utils | 3 | extractors, shared | +| b11b075 | refactor: simplify AST store visitor and engine setup | 2 | ast-analysis | +| 8347867 | refactor(native): improve helper and barrel resolution quality | 2 | native-engine, domain-builder | +| 9eacf7e | fix: resolve +1 function cycle regression in barrel resolution | 1 | domain-builder | + +### PR Split Plan + +| PR # | URL | Title | Concern | Domain | Commits | Files | Depends On | +|------|-----|-------|---------|--------|---------|-------|------------| +| 1 | [#842](https://github.com/optave/ops-codegraph-tool/pull/842) | refactor(native): extract constants and shared barrel resolution | abstraction | native-engine | 2 | 9 | -- | +| 2 | [#843](https://github.com/optave/ops-codegraph-tool/pull/843) | refactor: DRY shared abstractions in TS features | abstraction | features | 2 | 2 | -- | +| 3 | [#844](https://github.com/optave/ops-codegraph-tool/pull/844) | refactor(native): flatten and decompose extractor match arms | decomposition | native-engine | 4 | 12 | PR #1 | +| 4 | [#845](https://github.com/optave/ops-codegraph-tool/pull/845) | refactor(native): decompose core Rust algorithms and pipeline | decomposition | native-engine | 5 | 3 | PR #1 | +| 5 | [#846](https://github.com/optave/ops-codegraph-tool/pull/846) | refactor: decompose TS complexity and build pipeline | decomposition | features, domain-builder | 4 | 2 | PR #2 | +| 6 | [#847](https://github.com/optave/ops-codegraph-tool/pull/847) | refactor: improve TS code quality across modules | quality_fix | presentation, extractors, ast-analysis, domain-core | 4 | 9 | -- | +| 7 | [#848](https://github.com/optave/ops-codegraph-tool/pull/848) | fix: resolve barrel resolution quality and cycle regression | quality_fix | native-engine, domain-builder | 4 | 3 | PR #1 | + +**Merge order:** PR #1 and #2 first (no deps), then #3, #4, #5, #6 (parallel), then #7 last. + +--- + +## Gate Validation History + +**Total runs:** 7 +**Pass:** 7 | **Warn:** 0 | **Fail:** 0 +**Rollbacks:** 0 + +### Failure Patterns + +No gate failures occurred. All 7 gate runs passed lint, build, and tests. Codegraph-specific checks (manifesto, cycles, complexity, blast radius) were skipped due to the WAL lock contention issue in worktrees (documented as tooling limitation). + +--- + +## Issues Discovered + +### Codegraph Bugs (3) + +1. **bug** -- Error class instantiation (`new ClassName()`) not tracked as consumption. All error hierarchy classes in `src/shared/errors.ts` appear dead despite 47 uses across 21 files. (Phase: gauntlet) +2. **bug** -- Role classification misses consumers through barrel re-exports. `queryName` in `inspect.ts` shows 0 consumers but is consumed via barrel chain. (Phase: gauntlet) +3. **bug** -- `shouldIgnore` and `isSupportedFile` in `constants.ts` classified as test-only despite production consumers in `watcher.ts`. (Phase: gauntlet) + +### Tooling Limitations (4) + +1. **limitation** -- `codegraph embed` failed: `@huggingface/transformers` not installed. DRY detection was grep-only. (Phase: recon) +2. **limitation** -- codegraph CLI commands hang/timeout in worktree (WAL lock contention from concurrent worktrees). Had to fall back to direct SQLite readonly queries. (Phase: gauntlet, sync) +3. **limitation** -- TypeScript interfaces classified as dead-unresolved because codegraph doesn't track type-level imports. (Phase: gauntlet) +4. **limitation** -- Constants `DEFAULT_WEIGHTS`, `ROLE_WEIGHTS` flagged as dead despite same-file consumption. Internal consumption not recognized. (Phase: gauntlet) + +### Process Suggestions (4) + +1. **suggestion** -- Rust files have no dead code detection via codegraph (no cross-file resolution for Rust). (Phase: gauntlet) +2. **suggestion** -- Rule 15 (structured logging) should exempt presentation/ layer where console.log is intended output. (Phase: gauntlet) +3. **suggestion** -- RECON should verify file existence when building batches. Batch 10 referenced non-existent `src/extractors/typescript.ts`. (Phase: gauntlet) +4. **suggestion** -- Batch 13 referenced non-existent `typescript.rs` and `terraform.rs` native extractors. (Phase: gauntlet) + +### Codebase Observations (3) + +1. **suggestion** -- `classifyNodeRolesFull` and `classifyNodeRolesIncremental` were near-duplicates (both cog=27). Addressed in this run. (Phase: gauntlet) +2. **suggestion** -- `buildNodeDataflowResult` and `buildNativeDataflowResult` were near-duplicate result builders. Addressed in this run. (Phase: gauntlet) +3. **suggestion** -- `tarjan` function re-exported via barrel but only consumed in tests. Consider removing re-export to reduce API surface. (Phase: gauntlet) + +--- + +## Domains Analyzed + +| Domain | Root Dirs | Files Audited | Status | +|--------|-----------|---------------|--------| +| native-engine | crates/codegraph-core/ | 22 | Decomposed: run_pipeline, louvain_impl, extract_param_names_strategy, match_cpp_node, match_scala_node. Flattened 10 extractor match arms. | +| domain-builder | src/domain/graph/builder/ | 5 | Decomposed: buildGraph. Fixed barrel resolution cycle. | +| domain-core | src/domain/ | 4 | Extracted: watcher debounce/journal logic. | +| features | src/features/ | 3 | Decomposed: buildComplexityMetrics. DRY: node-role classification, dataflow result builders. | +| extractors | src/extractors/ | 4 | Reduced complexity in JS/Go extractors and file-utils. | +| ast-analysis | src/ast-analysis/ | 4 | Simplified AST store visitor and engine setup. | +| graph-model | src/graph/ | 4 | Audited (pass/warn). Magic number extraction in louvain.ts. | +| presentation | src/presentation/ | 5 | Split formatters into sub-renderers. | +| shared | src/shared/ | 4 | Audited (mostly pass). Identified 3 codegraph bugs. | +| database | src/db/ | 0 | Not targeted this run. | +| cli | src/cli/ | 0 | Not targeted this run. | +| mcp | src/mcp/ | 0 | Not targeted this run. | +| infrastructure | src/infrastructure/ | 0 | Not targeted this run. | + +--- + +## Pipeline Freshness + +**Main at RECON:** 0e543e4 +**Main at CLOSE:** 0e543e4 +**Commits behind:** 0 +**Overall staleness:** fresh + +### Drift Events + +| Phase | Staleness | Impacted Targets | Action | +|-------|-----------|-----------------|--------| +| gauntlet | none | 0 | Continued normally | +| sync | none | 0 | Continued normally | +| close | none | 0 | Report generated normally | + +### Stale Targets + +None. All audit results reflect current main. + +--- + +## Recommendations for Next Run + +1. **Remaining Rust hot spots:** `do_insert_nodes` (bugs=2.13, cog=51), `build_and_insert_call_edges` (bugs=2.07), `CfgBuilder.process_try_catch` (bugs=1.85, cog=62), and the C/Kotlin/Swift node matchers should be the next Titan targets. + +2. **Fix codegraph bugs first:** The 3 codegraph bugs (error class consumption, barrel re-export traversal, type-only import resolution) inflate dead symbol counts and cause false role classifications. Fixing these before the next dead code cleanup run will produce accurate results. + +3. **WAL lock contention in worktrees:** The tooling limitation that forced fallback to direct SQLite queries should be investigated. All codegraph CLI commands hung in the worktree context. This impacts gate validation quality (manifesto/cycles/complexity checks were skipped). + +4. **Untargeted domains:** Database (src/db/), CLI (src/cli/), MCP (src/mcp/), and Infrastructure (src/infrastructure/) were not audited. The database layer has low cohesion (0.08) and should be prioritized. + +5. **run_pipeline still hot:** Even after decomposition, `run_pipeline` remains the highest-bug function (4.39). Further decomposition or restructuring of the Rust build pipeline would yield the most impact. + +6. **Scripts cleanup:** `token-benchmark.ts` and `bench-config.ts` have high complexity but are scripts, not production code. Consider whether they warrant cleanup effort. diff --git a/src/ast-analysis/engine.ts b/src/ast-analysis/engine.ts index 18e0e649..cc8c9d37 100644 --- a/src/ast-analysis/engine.ts +++ b/src/ast-analysis/engine.ts @@ -215,25 +215,37 @@ function runNativeAnalysis( } } +/** Index native results by line number and match to a definition by name. */ +function indexNativeByLine( + results: T[], +): Map { + const byLine = new Map(); + for (const r of results) { + if (!byLine.has(r.line)) byLine.set(r.line, []); + byLine.get(r.line)!.push(r); + } + return byLine; +} + +function matchNativeResult( + candidates: T[] | undefined, + defName: string, +): T | undefined { + if (!candidates) return undefined; + if (candidates.length === 1) return candidates[0]; + return candidates.find((r) => r.name === defName) ?? candidates[0]; +} + /** Store native complexity results on definitions, matched by line number. */ function storeNativeComplexityResults( results: NativeFunctionComplexityResult[], defs: Definition[], ): void { - const byLine = new Map(); - for (const r of results) { - if (!byLine.has(r.line)) byLine.set(r.line, []); - byLine.get(r.line)!.push(r); - } + const byLine = indexNativeByLine(results); for (const def of defs) { if ((def.kind === 'function' || def.kind === 'method') && def.line && !def.complexity) { - const candidates = byLine.get(def.line); - if (!candidates) continue; - const match = - candidates.length === 1 - ? candidates[0] - : (candidates.find((r) => r.name === def.name) ?? candidates[0]); + const match = matchNativeResult(byLine.get(def.line), def.name); if (!match) continue; const { complexity: c } = match; def.complexity = { @@ -284,11 +296,7 @@ function overrideCyclomaticFromCfg(def: Definition, cfgCyclomatic: number): void /** Store native CFG results on definitions, matched by line number. */ function storeNativeCfgResults(results: NativeFunctionCfgResult[], defs: Definition[]): void { - const byLine = new Map(); - for (const r of results) { - if (!byLine.has(r.line)) byLine.set(r.line, []); - byLine.get(r.line)!.push(r); - } + const byLine = indexNativeByLine(results); for (const def of defs) { if ( @@ -297,12 +305,7 @@ function storeNativeCfgResults(results: NativeFunctionCfgResult[], defs: Definit def.cfg !== null && !def.cfg?.blocks?.length ) { - const candidates = byLine.get(def.line); - if (!candidates) continue; - const match = - candidates.length === 1 - ? candidates[0] - : (candidates.find((r) => r.name === def.name) ?? candidates[0]); + const match = matchNativeResult(byLine.get(def.line), def.name); if (!match) continue; def.cfg = match.cfg; @@ -353,42 +356,61 @@ function reconcileCfgCyclomatic(fileSymbols: Map): void // ─── WASM pre-parse ───────────────────────────────────────────────────── +/** Check whether a single file needs a WASM tree for any enabled analysis pass. */ +function fileNeedsWasmTree( + relPath: string, + symbols: ExtractorOutput, + flags: { doAst: boolean; doComplexity: boolean; doCfg: boolean; doDataflow: boolean }, +): boolean { + if (symbols._tree) return false; + const ext = path.extname(relPath).toLowerCase(); + const defs = symbols.definitions || []; + const lid = symbols._langId || ''; + + if ( + flags.doAst && + !Array.isArray(symbols.astNodes) && + (WALK_EXTENSIONS.has(ext) || AST_TYPE_MAPS.has(lid)) + ) + return true; + if ( + flags.doComplexity && + (COMPLEXITY_EXTENSIONS.has(ext) || COMPLEXITY_RULES.has(lid)) && + defs.some((d) => hasFuncBody(d) && !d.complexity) + ) + return true; + if ( + flags.doCfg && + (CFG_EXTENSIONS.has(ext) || CFG_RULES.has(lid)) && + defs.some((d) => hasFuncBody(d) && d.cfg !== null && !Array.isArray(d.cfg?.blocks)) + ) + return true; + if ( + flags.doDataflow && + !symbols.dataflow && + (DATAFLOW_EXTENSIONS.has(ext) || DATAFLOW_RULES.has(lid)) + ) + return true; + return false; +} + async function ensureWasmTreesIfNeeded( fileSymbols: Map, opts: AnalysisOpts, rootDir: string, ): Promise { - const doAst = opts.ast !== false; - const doComplexity = opts.complexity !== false; - const doCfg = opts.cfg !== false; - const doDataflow = opts.dataflow !== false; + const flags = { + doAst: opts.ast !== false, + doComplexity: opts.complexity !== false, + doCfg: opts.cfg !== false, + doDataflow: opts.dataflow !== false, + }; - if (!doAst && !doComplexity && !doCfg && !doDataflow) return; + if (!flags.doAst && !flags.doComplexity && !flags.doCfg && !flags.doDataflow) return; let needsWasmTrees = false; for (const [relPath, symbols] of fileSymbols) { - if (symbols._tree) continue; - const ext = path.extname(relPath).toLowerCase(); - const defs = symbols.definitions || []; - - // AST: need tree when native didn't provide non-call astNodes - const lid = symbols._langId || ''; - const needsAst = - doAst && - !Array.isArray(symbols.astNodes) && - (WALK_EXTENSIONS.has(ext) || AST_TYPE_MAPS.has(lid)); - const needsComplexity = - doComplexity && - (COMPLEXITY_EXTENSIONS.has(ext) || COMPLEXITY_RULES.has(lid)) && - defs.some((d) => hasFuncBody(d) && !d.complexity); - const needsCfg = - doCfg && - (CFG_EXTENSIONS.has(ext) || CFG_RULES.has(lid)) && - defs.some((d) => hasFuncBody(d) && d.cfg !== null && !Array.isArray(d.cfg?.blocks)); - const needsDataflow = - doDataflow && !symbols.dataflow && (DATAFLOW_EXTENSIONS.has(ext) || DATAFLOW_RULES.has(lid)); - - if (needsAst || needsComplexity || needsCfg || needsDataflow) { + if (fileNeedsWasmTree(relPath, symbols, flags)) { needsWasmTrees = true; break; } diff --git a/src/ast-analysis/visitors/ast-store-visitor.ts b/src/ast-analysis/visitors/ast-store-visitor.ts index 84d770f4..c21dd306 100644 --- a/src/ast-analysis/visitors/ast-store-visitor.ts +++ b/src/ast-analysis/visitors/ast-store-visitor.ts @@ -102,27 +102,25 @@ export function createAstStoreVisitor( return nodeIdMap.get(`${parentDef.name}|${parentDef.kind}|${parentDef.line}`) || null; } - function resolveNameAndText( - node: TreeSitterNode, - kind: string, - ): { name: string | null | undefined; text: string | null; skip?: boolean } { - switch (kind) { - case 'new': - return { name: extractNewName(node), text: truncate(node.text) }; - case 'throw': - return { name: extractThrowName(node), text: extractExpressionText(node) }; - case 'await': - return { name: extractAwaitName(node), text: extractExpressionText(node) }; - case 'string': { - const content = node.text?.replace(/^['"`]|['"`]$/g, '') || ''; - if (content.length < 2) return { name: null, text: null, skip: true }; - return { name: truncate(content, 100), text: truncate(node.text) }; - } - case 'regex': - return { name: node.text || '?', text: truncate(node.text) }; - default: - return { name: undefined, text: null }; - } + type NameTextResult = { name: string | null | undefined; text: string | null; skip?: boolean }; + type KindHandler = (node: TreeSitterNode) => NameTextResult; + + const kindHandlers: Record = { + new: (node) => ({ name: extractNewName(node), text: truncate(node.text) }), + throw: (node) => ({ name: extractThrowName(node), text: extractExpressionText(node) }), + await: (node) => ({ name: extractAwaitName(node), text: extractExpressionText(node) }), + string: (node) => { + const content = node.text?.replace(/^['"`]|['"`]$/g, '') || ''; + if (content.length < 2) return { name: null, text: null, skip: true }; + return { name: truncate(content, 100), text: truncate(node.text) }; + }, + regex: (node) => ({ name: node.text || '?', text: truncate(node.text) }), + }; + const defaultResult: NameTextResult = { name: undefined, text: null }; + + function resolveNameAndText(node: TreeSitterNode, kind: string): NameTextResult { + const handler = kindHandlers[kind]; + return handler ? handler(node) : defaultResult; } function collectNode(node: TreeSitterNode, kind: string): void { diff --git a/src/domain/graph/builder/pipeline.ts b/src/domain/graph/builder/pipeline.ts index 40c86f0d..1e1767dd 100644 --- a/src/domain/graph/builder/pipeline.ts +++ b/src/domain/graph/builder/pipeline.ts @@ -251,6 +251,363 @@ function refreshJsDb(ctx: PipelineContext): void { ctx.db = openDb(ctx.dbPath); } +// ── Native orchestrator types ────────────────────────────────────────── + +interface NativeOrchestratorResult { + phases: Record; + earlyExit?: boolean; + nodeCount?: number; + edgeCount?: number; + fileCount?: number; + changedFiles?: string[]; + changedCount?: number; + removedCount?: number; + isFullBuild?: boolean; +} + +// ── Native orchestrator helpers ─────────────────────────────────────── + +/** Determine whether the native orchestrator should be skipped. Returns a reason string, or null if it should run. */ +function shouldSkipNativeOrchestrator(ctx: PipelineContext): string | null { + if (process.env.CODEGRAPH_FORCE_JS_PIPELINE === '1') return 'CODEGRAPH_FORCE_JS_PIPELINE=1'; + if (ctx.forceFullRebuild) return 'forceFullRebuild'; + const orchestratorBuggy = !!ctx.engineVersion && semverCompare(ctx.engineVersion, '3.8.1') <= 0; + if (orchestratorBuggy) return `buggy addon ${ctx.engineVersion}`; + if (ctx.engineName !== 'native') return `engine=${ctx.engineName}`; + return null; +} + +/** Checkpoint WAL through rusqlite, close nativeDb, and reopen better-sqlite3. + * Returns false if the DB reopen fails (caller should return partial result). */ +function handoffWalAfterNativeBuild(ctx: PipelineContext): boolean { + try { + ctx.nativeDb!.exec('PRAGMA wal_checkpoint(TRUNCATE)'); + } catch { + /* ignore checkpoint errors */ + } + try { + ctx.nativeDb!.close(); + } catch { + /* ignore close errors */ + } + ctx.nativeDb = undefined; + try { + ctx.db.close(); + } catch { + /* ignore close errors */ + } + ctx.db = null!; // avoid closeDbPair operating on a stale handle + try { + ctx.db = openDb(ctx.dbPath); + return true; + } catch (reopenErr) { + warn(`Failed to reopen DB after native build: ${(reopenErr as Error).message}`); + return false; + } +} + +/** Reconstruct fileSymbols from the DB after a native orchestrator build. */ +function reconstructFileSymbolsFromDb(ctx: PipelineContext): Map { + const allFileRows = ctx.db + .prepare( + 'SELECT file, name, kind, line, end_line as endLine FROM nodes WHERE file IS NOT NULL ORDER BY file, line', + ) + .all() as { + file: string; + name: string; + kind: string; + line: number; + endLine: number | null; + }[]; + + const allFileSymbols = new Map(); + for (const row of allFileRows) { + let entry = allFileSymbols.get(row.file); + if (!entry) { + entry = { + definitions: [], + calls: [], + imports: [], + classes: [], + exports: [], + typeMap: new Map(), + }; + allFileSymbols.set(row.file, entry); + } + entry.definitions.push({ + name: row.name, + kind: row.kind as Definition['kind'], + line: row.line, + endLine: row.endLine ?? undefined, + }); + } + + // Populate import/export counts from DB edges so buildStructure + // computes correct import_count/export_count in node_metrics. + // The extractor arrays aren't persisted to the DB, so we derive + // counts from edge data instead (#804). + const importCountRows = ctx.db + .prepare( + `SELECT n.file, COUNT(*) AS cnt + FROM edges e JOIN nodes n ON e.source_id = n.id + WHERE e.kind IN ('imports', 'imports-type', 'dynamic-imports') + AND n.file IS NOT NULL + GROUP BY n.file`, + ) + .all() as { file: string; cnt: number }[]; + for (const row of importCountRows) { + const entry = allFileSymbols.get(row.file); + if (entry) entry.imports = new Array(row.cnt) as ExtractorOutput['imports']; + } + + const exportCountRows = ctx.db + .prepare( + `SELECT n_tgt.file, COUNT(DISTINCT n_tgt.id) AS cnt + FROM edges e + JOIN nodes n_tgt ON e.target_id = n_tgt.id + JOIN nodes n_src ON e.source_id = n_src.id + WHERE e.kind IN ('imports', 'imports-type', 'reexports') + AND n_tgt.file IS NOT NULL + AND n_src.file != n_tgt.file + GROUP BY n_tgt.file`, + ) + .all() as { file: string; cnt: number }[]; + for (const row of exportCountRows) { + const entry = allFileSymbols.get(row.file); + if (entry) entry.exports = new Array(row.cnt) as ExtractorOutput['exports']; + } + + return allFileSymbols; +} + +/** Run JS buildStructure() after native orchestrator to fill directory nodes + contains edges. */ +async function runPostNativeStructure( + ctx: PipelineContext, + allFileSymbols: Map, +): Promise { + const structureStart = performance.now(); + try { + const directories = new Set(); + for (const relPath of allFileSymbols.keys()) { + const parts = relPath.split('/'); + for (let i = 1; i < parts.length; i++) { + directories.add(parts.slice(0, i).join('/')); + } + } + + const lineCountMap = new Map(); + const cachedLineCounts = ctx.db + .prepare( + `SELECT n.name AS file, m.line_count + FROM node_metrics m JOIN nodes n ON m.node_id = n.id + WHERE n.kind = 'file'`, + ) + .all() as Array<{ file: string; line_count: number }>; + for (const row of cachedLineCounts) { + lineCountMap.set(row.file, row.line_count); + } + + const changedFilePaths = null; // full rebuild — every directory gets nodes + const { buildStructure: buildStructureFn } = (await import( + '../../../features/structure.js' + )) as { + buildStructure: ( + db: typeof ctx.db, + fileSymbols: Map, + rootDir: string, + lineCountMap: Map, + directories: Set, + changedFiles: string[] | null, + ) => void; + }; + buildStructureFn( + ctx.db, + allFileSymbols, + ctx.rootDir, + lineCountMap, + directories, + changedFilePaths, + ); + debug('Structure phase completed after native orchestrator'); + } catch (err) { + warn(`Structure phase failed after native build: ${toErrorMessage(err)}`); + } + return performance.now() - structureStart; +} + +/** Run AST/complexity/CFG/dataflow analysis after native orchestrator. */ +async function runPostNativeAnalysis( + ctx: PipelineContext, + allFileSymbols: Map, + changedFiles: string[] | undefined, +): Promise<{ astMs: number; complexityMs: number; cfgMs: number; dataflowMs: number }> { + const timing = { astMs: 0, complexityMs: 0, cfgMs: 0, dataflowMs: 0 }; + + // Scope analysis fileSymbols to changed files only + let analysisFileSymbols: Map; + if (changedFiles && changedFiles.length > 0) { + analysisFileSymbols = new Map(); + for (const f of changedFiles) { + const entry = allFileSymbols.get(f); + if (entry) analysisFileSymbols.set(f, entry); + } + } else { + analysisFileSymbols = allFileSymbols; + } + + // Reopen nativeDb for analysis features (suspend/resume WAL pattern). + const native = loadNative(); + if (native?.NativeDatabase) { + try { + ctx.nativeDb = native.NativeDatabase.openReadWrite(ctx.dbPath); + if (ctx.engineOpts) ctx.engineOpts.nativeDb = ctx.nativeDb; + } catch { + ctx.nativeDb = undefined; + if (ctx.engineOpts) ctx.engineOpts.nativeDb = undefined; + } + } + + try { + const { runAnalyses: runAnalysesFn } = await import('../../../ast-analysis/engine.js'); + const result = await runAnalysesFn( + ctx.db, + analysisFileSymbols, + ctx.rootDir, + ctx.opts, + ctx.engineOpts, + ); + timing.astMs = result.astMs ?? 0; + timing.complexityMs = result.complexityMs ?? 0; + timing.cfgMs = result.cfgMs ?? 0; + timing.dataflowMs = result.dataflowMs ?? 0; + } catch (err) { + warn(`Analysis phases failed after native build: ${toErrorMessage(err)}`); + } + + // Close nativeDb after analyses + if (ctx.nativeDb) { + try { + ctx.nativeDb.exec('PRAGMA wal_checkpoint(TRUNCATE)'); + } catch { + /* ignore checkpoint errors */ + } + try { + ctx.nativeDb.close(); + } catch { + /* ignore close errors */ + } + ctx.nativeDb = undefined; + if (ctx.engineOpts) ctx.engineOpts.nativeDb = undefined; + } + + return timing; +} + +/** Format timing result from native orchestrator phases + JS post-processing. */ +function formatNativeTimingResult( + p: Record, + structurePatchMs: number, + analysisTiming: { astMs: number; complexityMs: number; cfgMs: number; dataflowMs: number }, +): BuildResult { + return { + phases: { + setupMs: +((p.setupMs ?? 0) + (p.collectMs ?? 0) + (p.detectMs ?? 0)).toFixed(1), + parseMs: +(p.parseMs ?? 0).toFixed(1), + insertMs: +(p.insertMs ?? 0).toFixed(1), + resolveMs: +(p.resolveMs ?? 0).toFixed(1), + edgesMs: +(p.edgesMs ?? 0).toFixed(1), + structureMs: +((p.structureMs ?? 0) + structurePatchMs).toFixed(1), + rolesMs: +(p.rolesMs ?? 0).toFixed(1), + astMs: +(analysisTiming.astMs ?? 0).toFixed(1), + complexityMs: +(analysisTiming.complexityMs ?? 0).toFixed(1), + cfgMs: +(analysisTiming.cfgMs ?? 0).toFixed(1), + dataflowMs: +(analysisTiming.dataflowMs ?? 0).toFixed(1), + finalizeMs: +(p.finalizeMs ?? 0).toFixed(1), + }, + }; +} + +/** Try the native build orchestrator. Returns a BuildResult on success, undefined to fall through to JS pipeline. */ +async function tryNativeOrchestrator( + ctx: PipelineContext, +): Promise { + const skipReason = shouldSkipNativeOrchestrator(ctx); + if (skipReason) { + debug(`Skipping native orchestrator: ${skipReason}`); + return undefined; + } + if (!ctx.nativeDb?.buildGraph) return undefined; + + const resultJson = ctx.nativeDb.buildGraph( + ctx.rootDir, + JSON.stringify(ctx.config), + JSON.stringify(ctx.aliases), + JSON.stringify(ctx.opts), + ); + const result = JSON.parse(resultJson) as NativeOrchestratorResult; + + if (result.earlyExit) { + info('No changes detected'); + closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb }); + return 'early-exit'; + } + + // Log incremental status to match JS pipeline output + const changed = result.changedCount ?? 0; + const removed = result.removedCount ?? 0; + if (!result.isFullBuild && (changed > 0 || removed > 0)) { + info(`Incremental: ${changed} changed, ${removed} removed`); + } + + const p = result.phases; + + // Sync build_meta so JS-side version/engine checks work on next build. + setBuildMeta(ctx.db, { + engine: ctx.engineName, + engine_version: ctx.engineVersion || '', + codegraph_version: CODEGRAPH_VERSION, + schema_version: String(ctx.schemaVersion), + built_at: new Date().toISOString(), + node_count: String(result.nodeCount ?? 0), + edge_count: String(result.edgeCount ?? 0), + }); + + info( + `Native build orchestrator completed: ${result.nodeCount ?? 0} nodes, ${result.edgeCount ?? 0} edges, ${result.fileCount ?? 0} files`, + ); + + // ── Post-native structure + analysis ────────────────────────────── + let analysisTiming = { astMs: 0, complexityMs: 0, cfgMs: 0, dataflowMs: 0 }; + let structurePatchMs = 0; + const needsAnalysis = + ctx.opts.ast !== false || + ctx.opts.complexity !== false || + ctx.opts.cfg !== false || + ctx.opts.dataflow !== false; + // Always run JS structure — native fast-path guard can't be reliably detected. + const needsStructure = true; + + if (needsAnalysis || needsStructure) { + if (!handoffWalAfterNativeBuild(ctx)) { + // DB reopen failed — return partial result + return formatNativeTimingResult(p, 0, analysisTiming); + } + + const allFileSymbols = reconstructFileSymbolsFromDb(ctx); + + if (needsStructure) { + structurePatchMs = await runPostNativeStructure(ctx, allFileSymbols); + } + + if (needsAnalysis) { + analysisTiming = await runPostNativeAnalysis(ctx, allFileSymbols, result.changedFiles); + } + } + + closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb }); + return formatNativeTimingResult(p, structurePatchMs, analysisTiming); +} + // ── Pipeline stages execution ─────────────────────────────────────────── async function runPipelineStages(ctx: PipelineContext): Promise { @@ -338,368 +695,13 @@ export async function buildGraph( // When available, run the entire build pipeline in Rust with zero // napi crossings (eliminates WAL dual-connection dance). Falls back // to the JS pipeline on failure or when native is unavailable. - // - // Native addon ≤3.8.0 has a path bug: file_symbols keys are absolute - // paths but known_files are relative, causing zero import/call edges. - // Native addon ≤3.8.1 has an incremental barrel bug: the Rust pipeline - // doesn't re-parse barrel files that are imported by changed files, - // causing missing barrel import edges and lost analysis data for - // reverse-dep files during incremental builds. - // Skip the orchestrator for affected versions (fixed in 3.9.0+). - const orchestratorBuggy = !!ctx.engineVersion && semverCompare(ctx.engineVersion, '3.8.1') <= 0; - const forceJs = - process.env.CODEGRAPH_FORCE_JS_PIPELINE === '1' || - ctx.forceFullRebuild || - orchestratorBuggy || - ctx.engineName !== 'native'; - if (forceJs) { - const reason = - process.env.CODEGRAPH_FORCE_JS_PIPELINE === '1' - ? 'CODEGRAPH_FORCE_JS_PIPELINE=1' - : ctx.forceFullRebuild - ? 'forceFullRebuild' - : orchestratorBuggy - ? `buggy addon ${ctx.engineVersion}` - : `engine=${ctx.engineName}`; - debug(`Skipping native orchestrator: ${reason}`); - } - if (!forceJs && ctx.nativeDb?.buildGraph) { - try { - const resultJson = ctx.nativeDb.buildGraph( - ctx.rootDir, - JSON.stringify(ctx.config), - JSON.stringify(ctx.aliases), - JSON.stringify(opts), - ); - const result = JSON.parse(resultJson) as { - phases: Record; - earlyExit?: boolean; - nodeCount?: number; - edgeCount?: number; - fileCount?: number; - changedFiles?: string[]; - changedCount?: number; - removedCount?: number; - isFullBuild?: boolean; - }; - - if (result.earlyExit) { - info('No changes detected'); - closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb }); - return; - } - - // Log incremental status to match JS pipeline output - const changed = result.changedCount ?? 0; - const removed = result.removedCount ?? 0; - if (!result.isFullBuild && (changed > 0 || removed > 0)) { - info(`Incremental: ${changed} changed, ${removed} removed`); - } - - // Map Rust timing fields to the JS BuildResult format. - // Rust handles collect+detect+parse+insert+resolve+edges+structure+roles. - const p = result.phases; - - // Sync build_meta so JS-side version/engine checks work on next build. - // Note: the Rust orchestrator also writes codegraph_version (using - // CARGO_PKG_VERSION). We intentionally overwrite it here with the npm - // package version so that the JS-side "version changed → full rebuild" - // detection (line ~97) compares against the authoritative JS version. - // The two versions are kept in lockstep by the release process. - setBuildMeta(ctx.db, { - engine: ctx.engineName, - engine_version: ctx.engineVersion || '', - codegraph_version: CODEGRAPH_VERSION, - schema_version: String(ctx.schemaVersion), - built_at: new Date().toISOString(), - node_count: String(result.nodeCount ?? 0), - edge_count: String(result.edgeCount ?? 0), - }); - - info( - `Native build orchestrator completed: ${result.nodeCount ?? 0} nodes, ${result.edgeCount ?? 0} edges, ${result.fileCount ?? 0} files`, - ); - - // ── Run structure + analysis phases after native orchestrator ── - // Structure (directory nodes, contains edges, metrics) is not fully - // ported to Rust — the native pipeline only handles the small - // incremental fast path (≤5 changed files). For full builds and - // larger incremental builds, run JS buildStructure() to fill the gap. - // Analysis phases (AST, complexity, CFG, dataflow) are also not yet - // ported; run via JS engine after reconstructing fileSymbols from DB. - let analysisTiming = { astMs: 0, complexityMs: 0, cfgMs: 0, dataflowMs: 0 }; - let structurePatchMs = 0; - const needsAnalysis = - opts.ast !== false || - opts.complexity !== false || - opts.cfg !== false || - opts.dataflow !== false; - - // The native fast path only runs structure for small incremental - // builds: !isFullBuild && changedCount <= 5 && existingFileCount > 20. - // For all other cases (full builds, large incrementals), we must - // run JS buildStructure() to create directory nodes + contains edges (#804). - // Always run JS structure — the native fast-path has an additional - // existingFileCount > 20 guard that isn't reflected in the result JSON, - // so we can't reliably detect whether native actually ran structure. - const nativeHandledStructure = false; - const needsStructure = !nativeHandledStructure; - - if (needsAnalysis || needsStructure) { - // WAL handoff: checkpoint through rusqlite, close nativeDb, - // reopen better-sqlite3 with a fresh page cache (#715, #736). - try { - ctx.nativeDb!.exec('PRAGMA wal_checkpoint(TRUNCATE)'); - } catch { - /* ignore checkpoint errors */ - } - try { - ctx.nativeDb!.close(); - } catch { - /* ignore close errors */ - } - ctx.nativeDb = undefined; - try { - ctx.db.close(); - } catch { - /* ignore close errors */ - } - ctx.db = null!; // avoid closeDbPair operating on a stale handle - try { - ctx.db = openDb(ctx.dbPath); - } catch (reopenErr) { - warn(`Failed to reopen DB after native build: ${(reopenErr as Error).message}`); - // Native build succeeded but we can't run post-processing — return partial result - return { - phases: { - setupMs: +((p.setupMs ?? 0) + (p.collectMs ?? 0) + (p.detectMs ?? 0)).toFixed(1), - parseMs: +(p.parseMs ?? 0).toFixed(1), - insertMs: +(p.insertMs ?? 0).toFixed(1), - resolveMs: +(p.resolveMs ?? 0).toFixed(1), - edgesMs: +(p.edgesMs ?? 0).toFixed(1), - structureMs: +(p.structureMs ?? 0).toFixed(1), - rolesMs: +(p.rolesMs ?? 0).toFixed(1), - astMs: 0, - complexityMs: 0, - cfgMs: 0, - dataflowMs: 0, - finalizeMs: +(p.finalizeMs ?? 0).toFixed(1), - }, - }; - } - - // Reconstruct fileSymbols from DB. For structure we need ALL files - // (to build complete directory tree); for analysis we scope to - // changed files only. Load all files, then scope analysis later. - const allFileRows = ctx.db - .prepare( - 'SELECT file, name, kind, line, end_line as endLine FROM nodes WHERE file IS NOT NULL ORDER BY file, line', - ) - .all() as { - file: string; - name: string; - kind: string; - line: number; - endLine: number | null; - }[]; - - const allFileSymbols = new Map(); - for (const row of allFileRows) { - let entry = allFileSymbols.get(row.file); - if (!entry) { - entry = { - definitions: [], - calls: [], - imports: [], - classes: [], - exports: [], - typeMap: new Map(), - }; - allFileSymbols.set(row.file, entry); - } - entry.definitions.push({ - name: row.name, - kind: row.kind as Definition['kind'], - line: row.line, - endLine: row.endLine ?? undefined, - }); - } - - // Populate import/export counts from DB edges so buildStructure - // computes correct import_count/export_count in node_metrics. - // The extractor arrays aren't persisted to the DB, so we derive - // counts from edge data instead (#804). - const importCountRows = ctx.db - .prepare( - `SELECT n.file, COUNT(*) AS cnt - FROM edges e JOIN nodes n ON e.source_id = n.id - WHERE e.kind IN ('imports', 'imports-type', 'dynamic-imports') - AND n.file IS NOT NULL - GROUP BY n.file`, - ) - .all() as { file: string; cnt: number }[]; - for (const row of importCountRows) { - const entry = allFileSymbols.get(row.file); - if (entry) entry.imports = new Array(row.cnt) as ExtractorOutput['imports']; - } - // Export count: definitions in this file that are imported by other files - const exportCountRows = ctx.db - .prepare( - `SELECT n_tgt.file, COUNT(DISTINCT n_tgt.id) AS cnt - FROM edges e - JOIN nodes n_tgt ON e.target_id = n_tgt.id - JOIN nodes n_src ON e.source_id = n_src.id - WHERE e.kind IN ('imports', 'imports-type', 'reexports') - AND n_tgt.file IS NOT NULL - AND n_src.file != n_tgt.file - GROUP BY n_tgt.file`, - ) - .all() as { file: string; cnt: number }[]; - for (const row of exportCountRows) { - const entry = allFileSymbols.get(row.file); - if (entry) entry.exports = new Array(row.cnt) as ExtractorOutput['exports']; - } - - // ── Structure phase: directory nodes + contains edges (#804) ── - if (needsStructure) { - const structureStart = performance.now(); - try { - // Derive directories from file paths - const directories = new Set(); - for (const relPath of allFileSymbols.keys()) { - const parts = relPath.split('/'); - for (let i = 1; i < parts.length; i++) { - directories.add(parts.slice(0, i).join('/')); - } - } - - // Build line count map from DB metrics or file content - const lineCountMap = new Map(); - const cachedLineCounts = ctx.db - .prepare( - `SELECT n.name AS file, m.line_count - FROM node_metrics m JOIN nodes n ON m.node_id = n.id - WHERE n.kind = 'file'`, - ) - .all() as Array<{ file: string; line_count: number }>; - for (const row of cachedLineCounts) { - lineCountMap.set(row.file, row.line_count); - } - - // Native ran no structure at all — always do a full rebuild so - // every directory gets nodes + contains edges (#804). - const changedFilePaths = null; - - const { buildStructure: buildStructureFn } = (await import( - '../../../features/structure.js' - )) as { - buildStructure: ( - db: typeof ctx.db, - fileSymbols: Map, - rootDir: string, - lineCountMap: Map, - directories: Set, - changedFiles: string[] | null, - ) => void; - }; - buildStructureFn( - ctx.db, - allFileSymbols, - ctx.rootDir, - lineCountMap, - directories, - changedFilePaths, - ); - debug('Structure phase completed after native orchestrator'); - } catch (err) { - warn(`Structure phase failed after native build: ${toErrorMessage(err)}`); - } - structurePatchMs = performance.now() - structureStart; - } - - // ── Analysis phase ── - if (needsAnalysis) { - // Scope analysis fileSymbols to changed files only - const changedFiles = result.changedFiles; - let analysisFileSymbols: Map; - if (changedFiles && changedFiles.length > 0) { - analysisFileSymbols = new Map(); - for (const f of changedFiles) { - const entry = allFileSymbols.get(f); - if (entry) analysisFileSymbols.set(f, entry); - } - } else { - analysisFileSymbols = allFileSymbols; - } - - // Reopen nativeDb for analysis features (suspend/resume WAL pattern). - const native = loadNative(); - if (native?.NativeDatabase) { - try { - ctx.nativeDb = native.NativeDatabase.openReadWrite(ctx.dbPath); - if (ctx.engineOpts) ctx.engineOpts.nativeDb = ctx.nativeDb; - } catch { - ctx.nativeDb = undefined; - if (ctx.engineOpts) ctx.engineOpts.nativeDb = undefined; - } - } - - try { - const { runAnalyses: runAnalysesFn } = await import( - '../../../ast-analysis/engine.js' - ); - analysisTiming = await runAnalysesFn( - ctx.db, - analysisFileSymbols, - ctx.rootDir, - opts, - ctx.engineOpts, - ); - } catch (err) { - warn(`Analysis phases failed after native build: ${toErrorMessage(err)}`); - } - - // Close nativeDb after analyses - if (ctx.nativeDb) { - try { - ctx.nativeDb.exec('PRAGMA wal_checkpoint(TRUNCATE)'); - } catch { - /* ignore checkpoint errors */ - } - try { - ctx.nativeDb.close(); - } catch { - /* ignore close errors */ - } - ctx.nativeDb = undefined; - if (ctx.engineOpts) ctx.engineOpts.nativeDb = undefined; - } - } - } - - closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb }); - return { - phases: { - setupMs: +((p.setupMs ?? 0) + (p.collectMs ?? 0) + (p.detectMs ?? 0)).toFixed(1), - parseMs: +(p.parseMs ?? 0).toFixed(1), - insertMs: +(p.insertMs ?? 0).toFixed(1), - resolveMs: +(p.resolveMs ?? 0).toFixed(1), - edgesMs: +(p.edgesMs ?? 0).toFixed(1), - structureMs: +((p.structureMs ?? 0) + structurePatchMs).toFixed(1), - rolesMs: +(p.rolesMs ?? 0).toFixed(1), - astMs: +(analysisTiming.astMs ?? 0).toFixed(1), - complexityMs: +(analysisTiming.complexityMs ?? 0).toFixed(1), - cfgMs: +(analysisTiming.cfgMs ?? 0).toFixed(1), - dataflowMs: +(analysisTiming.dataflowMs ?? 0).toFixed(1), - finalizeMs: +(p.finalizeMs ?? 0).toFixed(1), - }, - }; - } catch (err) { - warn( - `Native build orchestrator failed, falling back to JS pipeline: ${toErrorMessage(err)}`, - ); - // Fall through to JS pipeline - } + try { + const nativeResult = await tryNativeOrchestrator(ctx); + if (nativeResult === 'early-exit') return; + if (nativeResult) return nativeResult; + } catch (err) { + warn(`Native build orchestrator failed, falling back to JS pipeline: ${toErrorMessage(err)}`); + // Fall through to JS pipeline } await runPipelineStages(ctx); diff --git a/src/domain/graph/builder/stages/resolve-imports.ts b/src/domain/graph/builder/stages/resolve-imports.ts index 5ab36d03..1ddb5219 100644 --- a/src/domain/graph/builder/stages/resolve-imports.ts +++ b/src/domain/graph/builder/stages/resolve-imports.ts @@ -180,6 +180,13 @@ export function isBarrelFile(ctx: PipelineContext, relPath: string): boolean { return reexports.length >= ownDefs; } +/** Check if a re-export source directly defines the symbol. */ +function sourceDefinesSymbol(ctx: PipelineContext, source: string, symbolName: string): boolean { + const targetSymbols = ctx.fileSymbols.get(source); + if (!targetSymbols) return false; + return targetSymbols.definitions.some((d) => d.name === symbolName); +} + export function resolveBarrelExport( ctx: PipelineContext, barrelPath: string, @@ -188,31 +195,24 @@ export function resolveBarrelExport( ): string | null { if (visited.has(barrelPath)) return null; visited.add(barrelPath); + const reexports = ctx.reexportMap.get(barrelPath) as ReexportEntry[] | undefined; if (!reexports) return null; + for (const re of reexports) { + // Named re-export: only follow if the symbol is in the export list if (re.names.length > 0 && !re.wildcardReexport) { - if (re.names.includes(symbolName)) { - const targetSymbols = ctx.fileSymbols.get(re.source); - if (targetSymbols) { - const hasDef = targetSymbols.definitions.some((d) => d.name === symbolName); - if (hasDef) return re.source; - const deeper = resolveBarrelExport(ctx, re.source, symbolName, visited); - if (deeper) return deeper; - } - return re.source; - } - continue; - } - if (re.wildcardReexport || re.names.length === 0) { - const targetSymbols = ctx.fileSymbols.get(re.source); - if (targetSymbols) { - const hasDef = targetSymbols.definitions.some((d) => d.name === symbolName); - if (hasDef) return re.source; - const deeper = resolveBarrelExport(ctx, re.source, symbolName, visited); - if (deeper) return deeper; - } + if (!re.names.includes(symbolName)) continue; + if (sourceDefinesSymbol(ctx, re.source, symbolName)) return re.source; + const deeper = resolveBarrelExport(ctx, re.source, symbolName, visited); + return deeper ?? re.source; } + + // Wildcard or namespace re-export: check if target defines the symbol + if (sourceDefinesSymbol(ctx, re.source, symbolName)) return re.source; + const deeper = resolveBarrelExport(ctx, re.source, symbolName, visited); + if (deeper) return deeper; } + return null; } diff --git a/src/domain/graph/watcher.ts b/src/domain/graph/watcher.ts index 0bc834b3..194f69c6 100644 --- a/src/domain/graph/watcher.ts +++ b/src/domain/graph/watcher.ts @@ -141,7 +141,8 @@ function collectTrackedFiles(dir: string, result: string[]): void { let entries: fs.Dirent[]; try { entries = fs.readdirSync(dir, { withFileTypes: true }); - } catch { + } catch (e: unknown) { + debug(`collectTrackedFiles: cannot read ${dir}: ${(e as Error).message}`); return; } for (const entry of entries) { @@ -155,10 +156,20 @@ function collectTrackedFiles(dir: string, result: string[]): void { } } -export async function watchProject( - rootDir: string, - opts: { engine?: string; poll?: boolean; pollInterval?: number } = {}, -): Promise { +/** Shared watcher state passed between setup and watcher sub-functions. */ +interface WatcherContext { + rootDir: string; + db: ReturnType; + stmts: IncrementalStmts; + engineOpts: import('../../types.js').EngineOpts; + cache: ReturnType; + pending: Set; + timer: ReturnType | null; + debounceMs: number; +} + +/** Initialize DB, engine, cache, and statements for watch mode. */ +function setupWatcher(rootDir: string, opts: { engine?: string }): WatcherContext { const dbPath = path.join(rootDir, '.codegraph', 'graph.db'); if (!fs.existsSync(dbPath)) { throw new DbError('No graph.db found. Run `codegraph build` first.', { file: dbPath }); @@ -183,111 +194,124 @@ export async function watchProject( const stmts = prepareWatcherStatements(db); - const pending = new Set(); - let timer: ReturnType | null = null; - const DEBOUNCE_MS = 300; - - const usePoll = opts.poll ?? process.platform === 'win32'; - const POLL_INTERVAL_MS = opts.pollInterval ?? 2000; + return { + rootDir, + db, + stmts, + engineOpts, + cache, + pending: new Set(), + timer: null, + debounceMs: 300, + }; +} - info(`Watching ${rootDir} for changes${usePoll ? ' (polling mode)' : ''}...`); - info('Press Ctrl+C to stop.'); +/** Schedule debounced processing of pending files. */ +function scheduleDebouncedProcess(ctx: WatcherContext): void { + if (ctx.timer) clearTimeout(ctx.timer); + ctx.timer = setTimeout(async () => { + const files = [...ctx.pending]; + ctx.pending.clear(); + await processPendingFiles(files, ctx.db, ctx.rootDir, ctx.stmts, ctx.engineOpts, ctx.cache); + }, ctx.debounceMs); +} - let cleanup: () => void; +/** Start polling-based file watcher. Returns cleanup function. */ +function startPollingWatcher(ctx: WatcherContext, pollIntervalMs: number): () => void { + const mtimeMap = new Map(); + + const initial: string[] = []; + collectTrackedFiles(ctx.rootDir, initial); + for (const f of initial) { + try { + mtimeMap.set(f, fs.statSync(f).mtimeMs); + } catch { + /* deleted between collect and stat */ + } + } + info(`Polling ${initial.length} tracked files every ${pollIntervalMs}ms`); - if (usePoll) { - // Polling mode: avoids native OS file watchers (NtNotifyChangeDirectoryFileEx) - // which can crash ReFS drivers on Windows Dev Drives. - const mtimeMap = new Map(); + const pollTimer = setInterval(() => { + const current: string[] = []; + collectTrackedFiles(ctx.rootDir, current); + const currentSet = new Set(current); - // Seed initial mtimes - const initial: string[] = []; - collectTrackedFiles(rootDir, initial); - for (const f of initial) { + for (const f of current) { try { - mtimeMap.set(f, fs.statSync(f).mtimeMs); + const mtime = fs.statSync(f).mtimeMs; + const prev = mtimeMap.get(f); + if (prev === undefined || mtime !== prev) { + mtimeMap.set(f, mtime); + ctx.pending.add(f); + } } catch { /* deleted between collect and stat */ } } - info(`Polling ${initial.length} tracked files every ${POLL_INTERVAL_MS}ms`); - - const pollTimer = setInterval(() => { - const current: string[] = []; - collectTrackedFiles(rootDir, current); - const currentSet = new Set(current); - - // Detect modified or new files - for (const f of current) { - try { - const mtime = fs.statSync(f).mtimeMs; - const prev = mtimeMap.get(f); - if (prev === undefined || mtime !== prev) { - mtimeMap.set(f, mtime); - pending.add(f); - } - } catch { - /* deleted between collect and stat */ - } - } - // Detect deleted files - for (const f of mtimeMap.keys()) { - if (!currentSet.has(f)) { - mtimeMap.delete(f); - pending.add(f); - } + for (const f of mtimeMap.keys()) { + if (!currentSet.has(f)) { + mtimeMap.delete(f); + ctx.pending.add(f); } + } - if (pending.size > 0) { - if (timer) clearTimeout(timer); - timer = setTimeout(async () => { - const files = [...pending]; - pending.clear(); - await processPendingFiles(files, db, rootDir, stmts, engineOpts, cache); - }, DEBOUNCE_MS); - } - }, POLL_INTERVAL_MS); - - cleanup = () => clearInterval(pollTimer); - } else { - // Native OS watcher — efficient but can trigger ReFS crashes on Windows Dev Drives. - // Use --poll if you experience BSOD/HYPERVISOR_ERROR on ReFS volumes. - const watcher = fs.watch(rootDir, { recursive: true }, (_eventType, filename) => { - if (!filename) return; - if (shouldIgnore(filename)) return; - if (!isTrackedExt(filename)) return; - - const fullPath = path.join(rootDir, filename); - pending.add(fullPath); - - if (timer) clearTimeout(timer); - timer = setTimeout(async () => { - const files = [...pending]; - pending.clear(); - await processPendingFiles(files, db, rootDir, stmts, engineOpts, cache); - }, DEBOUNCE_MS); - }); - - cleanup = () => watcher.close(); - } + if (ctx.pending.size > 0) { + scheduleDebouncedProcess(ctx); + } + }, pollIntervalMs); + + return () => clearInterval(pollTimer); +} + +/** Start native OS file watcher. Returns cleanup function. */ +function startNativeWatcher(ctx: WatcherContext): () => void { + const watcher = fs.watch(ctx.rootDir, { recursive: true }, (_eventType, filename) => { + if (!filename) return; + if (shouldIgnore(filename)) return; + if (!isTrackedExt(filename)) return; + ctx.pending.add(path.join(ctx.rootDir, filename)); + scheduleDebouncedProcess(ctx); + }); + + return () => watcher.close(); +} + +/** Register SIGINT handler to flush journal and clean up. */ +function setupShutdownHandler(ctx: WatcherContext, cleanup: () => void): void { process.on('SIGINT', () => { info('Stopping watcher...'); cleanup(); - // Flush any pending file paths to journal before exit - if (pending.size > 0) { - const entries = [...pending].map((filePath) => ({ - file: normalizePath(path.relative(rootDir, filePath)), + if (ctx.pending.size > 0) { + const entries = [...ctx.pending].map((filePath) => ({ + file: normalizePath(path.relative(ctx.rootDir, filePath)), })); try { - appendJournalEntries(rootDir, entries); + appendJournalEntries(ctx.rootDir, entries); } catch (e: unknown) { debug(`Journal flush on exit failed (non-fatal): ${(e as Error).message}`); } } - if (cache) cache.clear(); - closeDb(db); + if (ctx.cache) ctx.cache.clear(); + closeDb(ctx.db); process.exit(0); }); } + +export async function watchProject( + rootDir: string, + opts: { engine?: string; poll?: boolean; pollInterval?: number } = {}, +): Promise { + const ctx = setupWatcher(rootDir, opts); + + const usePoll = opts.poll ?? process.platform === 'win32'; + const pollIntervalMs = opts.pollInterval ?? 2000; + + info(`Watching ${rootDir} for changes${usePoll ? ' (polling mode)' : ''}...`); + info('Press Ctrl+C to stop.'); + + const cleanup = usePoll ? startPollingWatcher(ctx, pollIntervalMs) : startNativeWatcher(ctx); + + setupShutdownHandler(ctx, cleanup); +} diff --git a/src/extractors/go.ts b/src/extractors/go.ts index 13124b26..6019b910 100644 --- a/src/extractors/go.ts +++ b/src/extractors/go.ts @@ -266,44 +266,69 @@ function handleTypedIdentifiers( } /** Infer type from a single RHS expression in a short var declaration. */ -function inferShortVarType( +/** x := Struct{...} — composite literal (confidence 1.0). */ +function inferCompositeLiteral( varNode: TreeSitterNode, rhs: TreeSitterNode, typeMap: Map, -): void { - // x := Struct{...} — composite literal (confidence 1.0) - if (rhs.type === 'composite_literal') { - const typeNode = rhs.childForFieldName('type'); - if (typeNode) { - const typeName = extractGoTypeName(typeNode); - if (typeName) setTypeMapEntry(typeMap, varNode.text, typeName, 1.0); - } - } - // x := &Struct{...} — address-of composite literal (confidence 1.0) - if (rhs.type === 'unary_expression') { - const operand = rhs.childForFieldName('operand'); - if (operand && operand.type === 'composite_literal') { - const typeNode = operand.childForFieldName('type'); - if (typeNode) { - const typeName = extractGoTypeName(typeNode); - if (typeName) setTypeMapEntry(typeMap, varNode.text, typeName, 1.0); - } - } - } - // x := NewFoo() or x := pkg.NewFoo() — factory function (confidence 0.7) - if (rhs.type === 'call_expression') { - const fn = rhs.childForFieldName('function'); - if (fn && fn.type === 'selector_expression') { - const field = fn.childForFieldName('field'); - if (field?.text.startsWith('New')) { - const typeName = field.text.slice(3); - if (typeName) setTypeMapEntry(typeMap, varNode.text, typeName, 0.7); - } - } else if (fn && fn.type === 'identifier' && fn.text.startsWith('New')) { - const typeName = fn.text.slice(3); +): boolean { + if (rhs.type !== 'composite_literal') return false; + const typeNode = rhs.childForFieldName('type'); + if (!typeNode) return false; + const typeName = extractGoTypeName(typeNode); + if (typeName) setTypeMapEntry(typeMap, varNode.text, typeName, 1.0); + return true; +} + +/** x := &Struct{...} — address-of composite literal (confidence 1.0). */ +function inferAddressOfComposite( + varNode: TreeSitterNode, + rhs: TreeSitterNode, + typeMap: Map, +): boolean { + if (rhs.type !== 'unary_expression') return false; + const operand = rhs.childForFieldName('operand'); + if (!operand || operand.type !== 'composite_literal') return false; + const typeNode = operand.childForFieldName('type'); + if (!typeNode) return false; + const typeName = extractGoTypeName(typeNode); + if (typeName) setTypeMapEntry(typeMap, varNode.text, typeName, 1.0); + return true; +} + +/** x := NewFoo() or x := pkg.NewFoo() — factory function (confidence 0.7). */ +function inferFactoryCall( + varNode: TreeSitterNode, + rhs: TreeSitterNode, + typeMap: Map, +): boolean { + if (rhs.type !== 'call_expression') return false; + const fn = rhs.childForFieldName('function'); + if (!fn) return false; + + if (fn.type === 'selector_expression') { + const field = fn.childForFieldName('field'); + if (field?.text.startsWith('New')) { + const typeName = field.text.slice(3); if (typeName) setTypeMapEntry(typeMap, varNode.text, typeName, 0.7); + return true; } + } else if (fn.type === 'identifier' && fn.text.startsWith('New')) { + const typeName = fn.text.slice(3); + if (typeName) setTypeMapEntry(typeMap, varNode.text, typeName, 0.7); + return true; } + return false; +} + +function inferShortVarType( + varNode: TreeSitterNode, + rhs: TreeSitterNode, + typeMap: Map, +): void { + if (inferCompositeLiteral(varNode, rhs, typeMap)) return; + if (inferAddressOfComposite(varNode, rhs, typeMap)) return; + inferFactoryCall(varNode, rhs, typeMap); } /** Handle short_var_declaration: x := Struct{}, x := &Struct{}, x := NewFoo(). */ diff --git a/src/extractors/javascript.ts b/src/extractors/javascript.ts index e699d085..9e62a678 100644 --- a/src/extractors/javascript.ts +++ b/src/extractors/javascript.ts @@ -202,6 +202,48 @@ function handleExportCapture( } } +function handleInterfaceCapture( + c: Record, + definitions: Definition[], +): void { + const ifaceNode = c.iface_node!; + const ifaceName = c.iface_name!.text; + definitions.push({ + name: ifaceName, + kind: 'interface', + line: ifaceNode.startPosition.row + 1, + endLine: nodeEndLine(ifaceNode), + }); + const body = + ifaceNode.childForFieldName('body') || + findChild(ifaceNode, 'interface_body') || + findChild(ifaceNode, 'object_type'); + if (body) extractInterfaceMethods(body, ifaceName, definitions); +} + +function handleTypeCapture(c: Record, definitions: Definition[]): void { + const typeNode = c.type_node!; + definitions.push({ + name: c.type_name!.text, + kind: 'type', + line: typeNode.startPosition.row + 1, + endLine: nodeEndLine(typeNode), + }); +} + +function handleImportCapture(c: Record, imports: Import[]): void { + const impNode = c.imp_node!; + const isTypeOnly = impNode.text.startsWith('import type'); + const modPath = c.imp_source!.text.replace(/['"]/g, ''); + const names = extractImportNames(impNode); + imports.push({ + source: modPath, + names, + line: impNode.startPosition.row + 1, + typeOnly: isTypeOnly, + }); +} + /** Dispatch a single query match to the appropriate handler. */ function dispatchQueryMatch( c: Record, @@ -220,35 +262,11 @@ function dispatchQueryMatch( } else if (c.meth_node) { handleMethodCapture(c, definitions); } else if (c.iface_node) { - const ifaceName = c.iface_name!.text; - definitions.push({ - name: ifaceName, - kind: 'interface', - line: c.iface_node.startPosition.row + 1, - endLine: nodeEndLine(c.iface_node), - }); - const body = - c.iface_node.childForFieldName('body') || - findChild(c.iface_node, 'interface_body') || - findChild(c.iface_node, 'object_type'); - if (body) extractInterfaceMethods(body, ifaceName, definitions); + handleInterfaceCapture(c, definitions); } else if (c.type_node) { - definitions.push({ - name: c.type_name!.text, - kind: 'type', - line: c.type_node.startPosition.row + 1, - endLine: nodeEndLine(c.type_node), - }); + handleTypeCapture(c, definitions); } else if (c.imp_node) { - const isTypeOnly = c.imp_node.text.startsWith('import type'); - const modPath = c.imp_source!.text.replace(/['"]/g, ''); - const names = extractImportNames(c.imp_node); - imports.push({ - source: modPath, - names, - line: c.imp_node.startPosition.row + 1, - typeOnly: isTypeOnly, - }); + handleImportCapture(c, imports); } else if (c.exp_node) { handleExportCapture(c, exps, imports); } else if (c.callfn_node) { diff --git a/src/features/complexity.ts b/src/features/complexity.ts index ba9bde47..5514fa6c 100644 --- a/src/features/complexity.ts +++ b/src/features/complexity.ts @@ -535,75 +535,89 @@ function upsertAstComplexity( return 1; } -export async function buildComplexityMetrics( +/** Collect native bulk-insert rows from precomputed complexity data. + * Returns the rows array, or null if any definition is missing complexity + * (signalling that JS fallback is needed). */ +function collectNativeBulkRows( + db: BetterSqlite3Database, + fileSymbols: Map, +): Array> | null { + const rows: Array> = []; + + for (const [relPath, symbols] of fileSymbols) { + for (const def of symbols.definitions) { + if (def.kind !== 'function' && def.kind !== 'method') continue; + if (!def.line) continue; + if (!def.complexity) return null; // needs JS fallback + const nodeId = getFunctionNodeId(db, def.name, relPath, def.line); + if (!nodeId) continue; + const ch = def.complexity.halstead; + const cl = def.complexity.loc; + rows.push({ + nodeId, + cognitive: def.complexity.cognitive ?? 0, + cyclomatic: def.complexity.cyclomatic ?? 0, + maxNesting: def.complexity.maxNesting ?? 0, + loc: cl ? cl.loc : 0, + sloc: cl ? cl.sloc : 0, + commentLines: cl ? cl.commentLines : 0, + halsteadN1: ch ? ch.n1 : 0, + halsteadN2: ch ? ch.n2 : 0, + halsteadBigN1: ch ? ch.bigN1 : 0, + halsteadBigN2: ch ? ch.bigN2 : 0, + halsteadVocabulary: ch ? ch.vocabulary : 0, + halsteadLength: ch ? ch.length : 0, + halsteadVolume: ch ? ch.volume : 0, + halsteadDifficulty: ch ? ch.difficulty : 0, + halsteadEffort: ch ? ch.effort : 0, + halsteadBugs: ch ? ch.bugs : 0, + maintainabilityIndex: def.complexity.maintainabilityIndex ?? 0, + }); + } + } + + return rows; +} + +/** Try the native bulk-insert fast path. Returns true if all rows were + * inserted successfully (caller can return early). */ +function tryNativeBulkInsert( db: BetterSqlite3Database, fileSymbols: Map, - rootDir: string, engineOpts?: { nativeDb?: { bulkInsertComplexity?(rows: Array>): number }; suspendJsDb?: () => void; resumeJsDb?: () => void; }, -): Promise { - // ── Native bulk-insert fast path ────────────────────────────────────── +): boolean { const nativeDb = engineOpts?.nativeDb; - if (nativeDb?.bulkInsertComplexity) { - const rows: Array> = []; - let needsJsFallback = false; + if (!nativeDb?.bulkInsertComplexity) return false; - for (const [relPath, symbols] of fileSymbols) { - for (const def of symbols.definitions) { - if (def.kind !== 'function' && def.kind !== 'method') continue; - if (!def.line) continue; - if (!def.complexity) { - needsJsFallback = true; - break; - } - const nodeId = getFunctionNodeId(db, def.name, relPath, def.line); - if (!nodeId) continue; - const ch = def.complexity.halstead; - const cl = def.complexity.loc; - rows.push({ - nodeId, - cognitive: def.complexity.cognitive ?? 0, - cyclomatic: def.complexity.cyclomatic ?? 0, - maxNesting: def.complexity.maxNesting ?? 0, - loc: cl ? cl.loc : 0, - sloc: cl ? cl.sloc : 0, - commentLines: cl ? cl.commentLines : 0, - halsteadN1: ch ? ch.n1 : 0, - halsteadN2: ch ? ch.n2 : 0, - halsteadBigN1: ch ? ch.bigN1 : 0, - halsteadBigN2: ch ? ch.bigN2 : 0, - halsteadVocabulary: ch ? ch.vocabulary : 0, - halsteadLength: ch ? ch.length : 0, - halsteadVolume: ch ? ch.volume : 0, - halsteadDifficulty: ch ? ch.difficulty : 0, - halsteadEffort: ch ? ch.effort : 0, - halsteadBugs: ch ? ch.bugs : 0, - maintainabilityIndex: def.complexity.maintainabilityIndex ?? 0, - }); - } - if (needsJsFallback) break; - } + const rows = collectNativeBulkRows(db, fileSymbols); + if (!rows || rows.length === 0) return false; - if (!needsJsFallback && rows.length > 0) { - let inserted: number; - try { - engineOpts?.suspendJsDb?.(); - inserted = nativeDb.bulkInsertComplexity(rows); - } finally { - engineOpts?.resumeJsDb?.(); - } - if (inserted === rows.length) { - info(`Complexity (native bulk): ${inserted} functions analyzed`); - return; - } - debug(`Native bulkInsertComplexity partial: ${inserted}/${rows.length} — falling back to JS`); - } + let inserted: number; + try { + engineOpts?.suspendJsDb?.(); + inserted = nativeDb.bulkInsertComplexity(rows); + } finally { + engineOpts?.resumeJsDb?.(); + } + + if (inserted === rows.length) { + info(`Complexity (native bulk): ${inserted} functions analyzed`); + return true; } + debug(`Native bulkInsertComplexity partial: ${inserted}/${rows.length} — falling back to JS`); + return false; +} - // ── JS fallback path ───────────────────────────────────────────────── +/** JS/WASM fallback: parse files and compute metrics via AST traversal. */ +async function computeJsFallbackMetrics( + db: BetterSqlite3Database, + fileSymbols: Map, + rootDir: string, +): Promise { const { parsers, extToLang } = await initWasmParsersIfNeeded(fileSymbols); const { getParser } = await import('../domain/parser.js'); @@ -649,6 +663,20 @@ export async function buildComplexityMetrics( } } +export async function buildComplexityMetrics( + db: BetterSqlite3Database, + fileSymbols: Map, + rootDir: string, + engineOpts?: { + nativeDb?: { bulkInsertComplexity?(rows: Array>): number }; + suspendJsDb?: () => void; + resumeJsDb?: () => void; + }, +): Promise { + if (tryNativeBulkInsert(db, fileSymbols, engineOpts)) return; + await computeJsFallbackMetrics(db, fileSymbols, rootDir); +} + // ─── Query-Time Functions (re-exported from complexity-query.ts) ────────── // Split to separate query-time concerns (DB reads, filtering, pagination) // from compute-time concerns (AST traversal, metric algorithms). diff --git a/src/features/dataflow.ts b/src/features/dataflow.ts index 0b754716..d85bcb66 100644 --- a/src/features/dataflow.ts +++ b/src/features/dataflow.ts @@ -23,6 +23,7 @@ import { hasDataflowTable, openReadonlyOrFail, openReadonlyWithNative } from '.. import { ALL_SYMBOL_KINDS, normalizeSymbol } from '../domain/queries.js'; import { debug, info } from '../infrastructure/logger.js'; import { isTestFile } from '../infrastructure/test-filter.js'; +import type { NormalizedSymbol } from '../shared/normalize.js'; import { paginateResult } from '../shared/paginate.js'; import type { BetterSqlite3Database, NativeDatabase, NodeRow, TreeSitterNode } from '../types.js'; import { findNodes } from './shared/find-nodes.js'; @@ -438,85 +439,126 @@ function prepareDataflowStmts(db: BetterSqlite3Database): DataflowStmts { }; } -function buildNodeDataflowResult( - node: NodeRow, - stmts: DataflowStmts, - db: BetterSqlite3Database, - hc: Map, +// ─── Shared dataflow result builder ────────────────────────────────── + +/** Pre-mapped raw dataflow edge arrays shared between SQL and native paths. */ +interface RawDataflowEdges { + flowsTo: { + target: string; + kind: string; + file: string; + line: number; + paramIndex: number; + expression: string; + confidence: number; + }[]; + flowsFrom: { + source: string; + kind: string; + file: string; + line: number; + paramIndex: number; + expression: string; + confidence: number; + }[]; + returnConsumers: { + consumer: string; + kind: string; + file: string; + line: number; + expression: string; + }[]; + returnedBy: { producer: string; kind: string; file: string; line: number; expression: string }[]; + mutatesTargets: { target: string; expression: string; line: number }[]; + mutatedBy: { source: string; expression: string; line: number }[]; +} + +/** + * Build a unified dataflow result from pre-mapped edge data. + * Shared between the SQL and native code paths. + */ +function buildDataflowResult( + sym: NormalizedSymbol, + edges: RawDataflowEdges, noTests: boolean, ): Record { - const sym = normalizeSymbol(node, db, hc); - - const flowsTo = stmts.flowsToOut.all(node.id).map((r: any) => ({ - target: r.target_name, - kind: r.target_kind, - file: r.target_file, - line: r.line, - paramIndex: r.param_index, - expression: r.expression, - confidence: r.confidence, - })); - - const flowsFrom = stmts.flowsToIn.all(node.id).map((r: any) => ({ - source: r.source_name, - kind: r.source_kind, - file: r.source_file, - line: r.line, - paramIndex: r.param_index, - expression: r.expression, - confidence: r.confidence, - })); - - const returnConsumers = stmts.returnsOut.all(node.id).map((r: any) => ({ - consumer: r.target_name, - kind: r.target_kind, - file: r.target_file, - line: r.line, - expression: r.expression, - })); - - const returnedBy = stmts.returnsIn.all(node.id).map((r: any) => ({ - producer: r.source_name, - kind: r.source_kind, - file: r.source_file, - line: r.line, - expression: r.expression, - })); - - const mutatesTargets = stmts.mutatesOut.all(node.id).map((r: any) => ({ - target: r.target_name, - expression: r.expression, - line: r.line, - })); - - const mutatedBy = stmts.mutatesIn.all(node.id).map((r: any) => ({ - source: r.source_name, - expression: r.expression, - line: r.line, - })); - if (noTests) { const filter = (arr: any[]) => arr.filter((r: any) => !isTestFile(r.file)); return { ...sym, - flowsTo: filter(flowsTo), - flowsFrom: filter(flowsFrom), - returns: returnConsumers.filter((r) => !isTestFile(r.file)), - returnedBy: returnedBy.filter((r) => !isTestFile(r.file)), - mutates: mutatesTargets, - mutatedBy, + flowsTo: filter(edges.flowsTo), + flowsFrom: filter(edges.flowsFrom), + returns: edges.returnConsumers.filter((r: any) => !isTestFile(r.file)), + returnedBy: edges.returnedBy.filter((r: any) => !isTestFile(r.file)), + mutates: edges.mutatesTargets, + mutatedBy: edges.mutatedBy, }; } return { ...sym, - flowsTo, - flowsFrom, - returns: returnConsumers, - returnedBy, - mutates: mutatesTargets, - mutatedBy, + flowsTo: edges.flowsTo, + flowsFrom: edges.flowsFrom, + returns: edges.returnConsumers, + returnedBy: edges.returnedBy, + mutates: edges.mutatesTargets, + mutatedBy: edges.mutatedBy, + }; +} + +function buildNodeDataflowResult( + node: NodeRow, + stmts: DataflowStmts, + db: BetterSqlite3Database, + hc: Map, + noTests: boolean, +): Record { + const sym = normalizeSymbol(node, db, hc); + const edges: RawDataflowEdges = { + flowsTo: stmts.flowsToOut.all(node.id).map((r: any) => ({ + target: r.target_name, + kind: r.target_kind, + file: r.target_file, + line: r.line, + paramIndex: r.param_index, + expression: r.expression, + confidence: r.confidence, + })), + flowsFrom: stmts.flowsToIn.all(node.id).map((r: any) => ({ + source: r.source_name, + kind: r.source_kind, + file: r.source_file, + line: r.line, + paramIndex: r.param_index, + expression: r.expression, + confidence: r.confidence, + })), + returnConsumers: stmts.returnsOut.all(node.id).map((r: any) => ({ + consumer: r.target_name, + kind: r.target_kind, + file: r.target_file, + line: r.line, + expression: r.expression, + })), + returnedBy: stmts.returnsIn.all(node.id).map((r: any) => ({ + producer: r.source_name, + kind: r.source_kind, + file: r.source_file, + line: r.line, + expression: r.expression, + })), + mutatesTargets: stmts.mutatesOut.all(node.id).map((r: any) => ({ + target: r.target_name, + expression: r.expression, + line: r.line, + })), + mutatedBy: stmts.mutatesIn.all(node.id).map((r: any) => ({ + source: r.source_name, + expression: r.expression, + line: r.line, + })), }; + return buildDataflowResult(sym, edges, noTests); } function buildNativeDataflowResult( @@ -528,72 +570,51 @@ function buildNativeDataflowResult( ): Record { const sym = normalizeSymbol(node, db, hc); const d = nativeDb.getDataflowEdges!(node.id); - - const flowsTo = d.flowsToOut.map((r: any) => ({ - target: r.name, - kind: r.kind, - file: r.file, - line: r.line, - paramIndex: r.paramIndex, - expression: r.expression, - confidence: r.confidence, - })); - const flowsFrom = d.flowsToIn.map((r: any) => ({ - source: r.name, - kind: r.kind, - file: r.file, - line: r.line, - paramIndex: r.paramIndex, - expression: r.expression, - confidence: r.confidence, - })); - const returnConsumers = d.returnsOut.map((r: any) => ({ - consumer: r.name, - kind: r.kind, - file: r.file, - line: r.line, - expression: r.expression, - })); - const returnedBy = d.returnsIn.map((r: any) => ({ - producer: r.name, - kind: r.kind, - file: r.file, - line: r.line, - expression: r.expression, - })); - const mutatesTargets = d.mutatesOut.map((r: any) => ({ - target: r.name, - expression: r.expression, - line: r.line, - })); - const mutatedBy = d.mutatesIn.map((r: any) => ({ - source: r.name, - expression: r.expression, - line: r.line, - })); - - if (noTests) { - const filter = (arr: any[]) => arr.filter((r: any) => !isTestFile(r.file)); - return { - ...sym, - flowsTo: filter(flowsTo), - flowsFrom: filter(flowsFrom), - returns: returnConsumers.filter((r: any) => !isTestFile(r.file)), - returnedBy: returnedBy.filter((r: any) => !isTestFile(r.file)), - mutates: mutatesTargets, - mutatedBy, - }; - } - - return { - ...sym, - flowsTo, - flowsFrom, - returns: returnConsumers, - returnedBy, - mutates: mutatesTargets, - mutatedBy, + const edges: RawDataflowEdges = { + flowsTo: d.flowsToOut.map((r: any) => ({ + target: r.name, + kind: r.kind, + file: r.file, + line: r.line, + paramIndex: r.paramIndex, + expression: r.expression, + confidence: r.confidence, + })), + flowsFrom: d.flowsToIn.map((r: any) => ({ + source: r.name, + kind: r.kind, + file: r.file, + line: r.line, + paramIndex: r.paramIndex, + expression: r.expression, + confidence: r.confidence, + })), + returnConsumers: d.returnsOut.map((r: any) => ({ + consumer: r.name, + kind: r.kind, + file: r.file, + line: r.line, + expression: r.expression, + })), + returnedBy: d.returnsIn.map((r: any) => ({ + producer: r.name, + kind: r.kind, + file: r.file, + line: r.line, + expression: r.expression, + })), + mutatesTargets: d.mutatesOut.map((r: any) => ({ + target: r.name, + expression: r.expression, + line: r.line, + })), + mutatedBy: d.mutatesIn.map((r: any) => ({ + source: r.name, + expression: r.expression, + line: r.line, + })), }; + return buildDataflowResult(sym, edges, noTests); } export function dataflowData( diff --git a/src/features/structure.ts b/src/features/structure.ts index 471fa6be..b361fb04 100644 --- a/src/features/structure.ts +++ b/src/features/structure.ts @@ -444,6 +444,77 @@ export function classifyNodeRoles( return classifyNodeRolesFull(db, emptySummary); } +// ─── Shared role-classification helpers ─────────────────────────────── + +/** + * Build a role summary and group node IDs by role from classifier output. + * Shared between full and incremental classification paths. + */ +function buildRoleSummary( + rows: { id: number }[], + leafRows: { id: number }[], + roleMap: Map, + emptySummary: RoleSummary, +): { summary: RoleSummary; idsByRole: Map } { + const summary: RoleSummary = { ...emptySummary }; + const idsByRole = new Map(); + + // Leaf kinds are always dead-leaf — skip classifier + if (leafRows.length > 0) { + const leafIds: number[] = []; + for (const row of leafRows) leafIds.push(row.id); + idsByRole.set('dead-leaf', leafIds); + summary.dead += leafRows.length; + summary['dead-leaf'] += leafRows.length; + } + + for (const row of rows) { + const role = roleMap.get(String(row.id)) || 'leaf'; + if (role.startsWith('dead')) summary.dead++; + summary[role] = (summary[role] || 0) + 1; + let ids = idsByRole.get(role); + if (!ids) { + ids = []; + idsByRole.set(role, ids); + } + ids.push(row.id); + } + + return { summary, idsByRole }; +} + +/** + * Batch-update node roles in the database. Executes a reset callback + * first (full resets all nodes, incremental resets only affected files), + * then writes new roles in chunks. + */ +function batchUpdateRoles( + db: BetterSqlite3Database, + idsByRole: Map, + resetFn: () => void, +): void { + const ROLE_CHUNK = 500; + const roleStmtCache = new Map(); + db.transaction(() => { + resetFn(); + for (const [role, ids] of idsByRole) { + for (let i = 0; i < ids.length; i += ROLE_CHUNK) { + const end = Math.min(i + ROLE_CHUNK, ids.length); + const chunkSize = end - i; + let stmt = roleStmtCache.get(chunkSize); + if (!stmt) { + const placeholders = Array.from({ length: chunkSize }, () => '?').join(','); + stmt = db.prepare(`UPDATE nodes SET role = ? WHERE id IN (${placeholders})`); + roleStmtCache.set(chunkSize, stmt); + } + const vals: unknown[] = [role]; + for (let j = i; j < end; j++) vals.push(ids[j]); + stmt.run(...vals); + } + } + })(); +} + function classifyNodeRolesFull(db: BetterSqlite3Database, emptySummary: RoleSummary): RoleSummary { // Leaf kinds (parameter, property) can never have callers/callees. // Classify them directly as dead-leaf without the expensive fan-in/fan-out JOINs. @@ -525,52 +596,11 @@ function classifyNodeRolesFull(db: BetterSqlite3Database, emptySummary: RoleSumm const roleMap = classifyRoles(classifierInput); - // Build summary and group updates by role for batch UPDATE - const summary: RoleSummary = { ...emptySummary }; - const idsByRole = new Map(); - - // Leaf kinds are always dead-leaf -- skip classifier - if (leafRows.length > 0) { - const leafIds: number[] = []; - for (const row of leafRows) leafIds.push(row.id); - idsByRole.set('dead-leaf', leafIds); - summary.dead += leafRows.length; - summary['dead-leaf'] += leafRows.length; - } - - for (const row of rows) { - const role = roleMap.get(String(row.id)) || 'leaf'; - if (role.startsWith('dead')) summary.dead++; - summary[role] = (summary[role] || 0) + 1; - let ids = idsByRole.get(role); - if (!ids) { - ids = []; - idsByRole.set(role, ids); - } - ids.push(row.id); - } + const { summary, idsByRole } = buildRoleSummary(rows, leafRows, roleMap, emptySummary); - // Batch UPDATE: one statement per role instead of one per node - const ROLE_CHUNK = 500; - const roleStmtCache = new Map(); - db.transaction(() => { + batchUpdateRoles(db, idsByRole, () => { db.prepare('UPDATE nodes SET role = NULL').run(); - for (const [role, ids] of idsByRole) { - for (let i = 0; i < ids.length; i += ROLE_CHUNK) { - const end = Math.min(i + ROLE_CHUNK, ids.length); - const chunkSize = end - i; - let stmt = roleStmtCache.get(chunkSize); - if (!stmt) { - const placeholders = Array.from({ length: chunkSize }, () => '?').join(','); - stmt = db.prepare(`UPDATE nodes SET role = ? WHERE id IN (${placeholders})`); - roleStmtCache.set(chunkSize, stmt); - } - const vals: unknown[] = [role]; - for (let j = i; j < end; j++) vals.push(ids[j]); - stmt.run(...vals); - } - } - })(); + }); return summary; } @@ -704,54 +734,14 @@ function classifyNodeRolesIncremental( const roleMap = classifyRoles(classifierInput, globalMedians); // 6. Build summary (only for affected nodes) and update only those nodes - const summary: RoleSummary = { ...emptySummary }; - const idsByRole = new Map(); + const { summary, idsByRole } = buildRoleSummary(rows, leafRows, roleMap, emptySummary); - // Leaf kinds are always dead-leaf -- skip classifier - if (leafRows.length > 0) { - const leafIds: number[] = []; - for (const row of leafRows) leafIds.push(row.id); - idsByRole.set('dead-leaf', leafIds); - summary.dead += leafRows.length; - summary['dead-leaf'] += leafRows.length; - } - - for (const row of rows) { - const role = roleMap.get(String(row.id)) || 'leaf'; - if (role.startsWith('dead')) summary.dead++; - summary[role] = (summary[role] || 0) + 1; - let ids = idsByRole.get(role); - if (!ids) { - ids = []; - idsByRole.set(role, ids); - } - ids.push(row.id); - } - - // Only update affected nodes — no global NULL reset - const ROLE_CHUNK = 500; - const roleStmtCache = new Map(); - db.transaction(() => { + batchUpdateRoles(db, idsByRole, () => { // Reset roles only for affected files' nodes db.prepare( `UPDATE nodes SET role = NULL WHERE file IN (${placeholders}) AND kind NOT IN ('file', 'directory')`, ).run(...allAffectedFiles); - for (const [role, ids] of idsByRole) { - for (let i = 0; i < ids.length; i += ROLE_CHUNK) { - const end = Math.min(i + ROLE_CHUNK, ids.length); - const chunkSize = end - i; - let stmt = roleStmtCache.get(chunkSize); - if (!stmt) { - const ph = Array.from({ length: chunkSize }, () => '?').join(','); - stmt = db.prepare(`UPDATE nodes SET role = ? WHERE id IN (${ph})`); - roleStmtCache.set(chunkSize, stmt); - } - const vals: unknown[] = [role]; - for (let j = i; j < end; j++) vals.push(ids[j]); - stmt.run(...vals); - } - } - })(); + }); return summary; } diff --git a/src/graph/algorithms/louvain.ts b/src/graph/algorithms/louvain.ts index f1c610b3..6cece3f5 100644 --- a/src/graph/algorithms/louvain.ts +++ b/src/graph/algorithms/louvain.ts @@ -12,6 +12,9 @@ import type { CodeGraph } from '../model.js'; import type { DetectClustersResult } from './leiden/index.js'; import { detectClusters } from './leiden/index.js'; +/** Default random seed for deterministic community detection. */ +const DEFAULT_RANDOM_SEED = 42; + export interface LouvainOptions { resolution?: number; maxLevels?: number; @@ -42,7 +45,7 @@ export function louvainCommunities(graph: CodeGraph, opts: LouvainOptions = {}): } const edges = graph.toEdgeArray(); const nodeIds = graph.nodeIds(); - const result = native.louvainCommunities(edges, nodeIds, resolution, 42); + const result = native.louvainCommunities(edges, nodeIds, resolution, DEFAULT_RANDOM_SEED); const assignments = new Map(); for (const entry of result.assignments) { assignments.set(entry.node, entry.community); @@ -57,7 +60,7 @@ export function louvainCommunities(graph: CodeGraph, opts: LouvainOptions = {}): function louvainJS(graph: CodeGraph, opts: LouvainOptions, resolution: number): LouvainResult { const result: DetectClustersResult = detectClusters(graph, { resolution, - randomSeed: 42, + randomSeed: DEFAULT_RANDOM_SEED, directed: false, ...(opts.maxLevels != null && { maxLevels: opts.maxLevels }), ...(opts.maxLocalPasses != null && { maxLocalPasses: opts.maxLocalPasses }), diff --git a/src/presentation/communities.ts b/src/presentation/communities.ts index eb8ecf07..6681f2ef 100644 --- a/src/presentation/communities.ts +++ b/src/presentation/communities.ts @@ -44,6 +44,48 @@ interface CommunitiesResult { drift: DriftAnalysis; } +function renderCommunityList(communityList: Community[]): void { + for (const c of communityList) { + const dirs = Object.entries(c.directories) + .sort((a, b) => b[1] - a[1]) + .map(([d, n]) => `${d} (${n})`) + .join(', '); + console.log(` Community ${c.id} (${c.size} members): ${dirs}`); + if (c.members) { + const shown = c.members.slice(0, 8); + for (const m of shown) { + const kind = m.kind ? ` [${m.kind}]` : ''; + console.log(` - ${m.name}${kind} ${m.file}`); + } + if (c.members.length > 8) { + console.log(` ... and ${c.members.length - 8} more`); + } + } + } +} + +function renderDriftAnalysis(d: DriftAnalysis, driftScore: number): void { + if (d.splitCandidates.length === 0 && d.mergeCandidates.length === 0) return; + + console.log(`\n# Drift Analysis (score: ${driftScore}%)\n`); + + if (d.splitCandidates.length > 0) { + console.log(' Split candidates (directories spanning multiple communities):'); + for (const s of d.splitCandidates.slice(0, 10)) { + console.log(` - ${s.directory} → ${s.communityCount} communities`); + } + } + + if (d.mergeCandidates.length > 0) { + console.log(' Merge candidates (communities spanning multiple directories):'); + for (const m of d.mergeCandidates.slice(0, 10)) { + console.log( + ` - Community ${m.communityId} (${m.size} members) → ${m.directoryCount} dirs: ${m.directories.join(', ')}`, + ); + } + } +} + export function communities(customDbPath: string | undefined, opts: CommunitiesCliOpts = {}): void { const data = communitiesData(customDbPath, opts) as unknown as CommunitiesResult; @@ -64,46 +106,9 @@ export function communities(customDbPath: string | undefined, opts: CommunitiesC ); if (!opts.drift) { - for (const c of data.communities) { - const dirs = Object.entries(c.directories) - .sort((a, b) => b[1] - a[1]) - .map(([d, n]) => `${d} (${n})`) - .join(', '); - console.log(` Community ${c.id} (${c.size} members): ${dirs}`); - if (c.members) { - const shown = c.members.slice(0, 8); - for (const m of shown) { - const kind = m.kind ? ` [${m.kind}]` : ''; - console.log(` - ${m.name}${kind} ${m.file}`); - } - if (c.members.length > 8) { - console.log(` ... and ${c.members.length - 8} more`); - } - } - } - } - - // Drift analysis - const d = data.drift; - if (d.splitCandidates.length > 0 || d.mergeCandidates.length > 0) { - console.log(`\n# Drift Analysis (score: ${data.summary.driftScore}%)\n`); - - if (d.splitCandidates.length > 0) { - console.log(' Split candidates (directories spanning multiple communities):'); - for (const s of d.splitCandidates.slice(0, 10)) { - console.log(` - ${s.directory} → ${s.communityCount} communities`); - } - } - - if (d.mergeCandidates.length > 0) { - console.log(' Merge candidates (communities spanning multiple directories):'); - for (const m of d.mergeCandidates.slice(0, 10)) { - console.log( - ` - Community ${m.communityId} (${m.size} members) → ${m.directoryCount} dirs: ${m.directories.join(', ')}`, - ); - } - } + renderCommunityList(data.communities); } + renderDriftAnalysis(data.drift, data.summary.driftScore); console.log(); } diff --git a/src/presentation/manifesto.ts b/src/presentation/manifesto.ts index 981c6f8a..521f09f0 100644 --- a/src/presentation/manifesto.ts +++ b/src/presentation/manifesto.ts @@ -22,17 +22,9 @@ interface ManifestoViolationRow { line?: number; } -export function manifesto(customDbPath: string | undefined, opts: ManifestoOpts = {}): void { - const data = manifestoData(customDbPath, opts as any) as any; - - if (outputResult(data, 'violations', opts)) { - if (!data.passed) process.exitCode = 1; - return; - } - +function renderRulesTable(data: any): void { console.log('\n# Manifesto Rules\n'); - // Rules table console.log( ` ${'Rule'.padEnd(20)} ${'Level'.padEnd(10)} ${'Status'.padEnd(8)} ${'Warn'.padStart(6)} ${'Fail'.padStart(6)} ${'Violations'.padStart(11)}`, ); @@ -49,44 +41,49 @@ export function manifesto(customDbPath: string | undefined, opts: ManifestoOpts ); } - // Summary const s = data.summary; console.log( `\n ${s.total} rules | ${s.passed} passed | ${s.warned} warned | ${s.failed} failed | ${s.violationCount} violations`, ); +} - // Violations detail - if (data.violations.length > 0) { - const failViolations = data.violations.filter((v: ManifestoViolationRow) => v.level === 'fail'); - const warnViolations = data.violations.filter((v: ManifestoViolationRow) => v.level === 'warn'); +function renderViolationList( + label: string, + violations: ManifestoViolationRow[], + maxShown = 20, +): void { + if (violations.length === 0) return; + console.log(`\n## ${label} (${violations.length})\n`); + for (const v of violations.slice(0, maxShown)) { + const loc = v.line ? `${v.file}:${v.line}` : v.file; + const tag = label === 'Failures' ? 'FAIL' : 'WARN'; + console.log( + ` [${tag}] ${v.rule}: ${v.name} (${v.value}) at ${loc} — threshold ${v.threshold}`, + ); + } + if (violations.length > maxShown) { + console.log(` ... and ${violations.length - maxShown} more`); + } +} + +function renderViolations(violations: ManifestoViolationRow[]): void { + if (violations.length === 0) return; + const failViolations = violations.filter((v) => v.level === 'fail'); + const warnViolations = violations.filter((v) => v.level === 'warn'); + renderViolationList('Failures', failViolations); + renderViolationList('Warnings', warnViolations); +} - if (failViolations.length > 0) { - console.log(`\n## Failures (${failViolations.length})\n`); - for (const v of failViolations.slice(0, 20)) { - const loc = v.line ? `${v.file}:${v.line}` : v.file; - console.log( - ` [FAIL] ${v.rule}: ${v.name} (${v.value}) at ${loc} — threshold ${v.threshold}`, - ); - } - if (failViolations.length > 20) { - console.log(` ... and ${failViolations.length - 20} more`); - } - } +export function manifesto(customDbPath: string | undefined, opts: ManifestoOpts = {}): void { + const data = manifestoData(customDbPath, opts as any) as any; - if (warnViolations.length > 0) { - console.log(`\n## Warnings (${warnViolations.length})\n`); - for (const v of warnViolations.slice(0, 20)) { - const loc = v.line ? `${v.file}:${v.line}` : v.file; - console.log( - ` [WARN] ${v.rule}: ${v.name} (${v.value}) at ${loc} — threshold ${v.threshold}`, - ); - } - if (warnViolations.length > 20) { - console.log(` ... and ${warnViolations.length - 20} more`); - } - } + if (outputResult(data, 'violations', opts)) { + if (!data.passed) process.exitCode = 1; + return; } + renderRulesTable(data); + renderViolations(data.violations); console.log(); if (!data.passed) { diff --git a/src/presentation/queries-cli/inspect.ts b/src/presentation/queries-cli/inspect.ts index 1b407134..e900289d 100644 --- a/src/presentation/queries-cli/inspect.ts +++ b/src/presentation/queries-cli/inspect.ts @@ -182,6 +182,39 @@ interface InterfacesData { results: InterfacesResult[]; } +function renderWhereSymbolResults(results: WhereSymbolResult[]): void { + for (const r of results) { + const roleTag = r.role ? ` [${r.role}]` : ''; + const tag = r.exported ? ' (exported)' : ''; + console.log(`\n${kindIcon(r.kind)} ${r.name}${roleTag} ${r.file}:${r.line}${tag}`); + if (r.uses.length > 0) { + const useStrs = r.uses.map((u) => `${u.file}:${u.line}`); + console.log(` Used in: ${useStrs.join(', ')}`); + } else { + console.log(' No uses found'); + } + } +} + +function renderWhereFileResults(results: WhereFileResult[]): void { + for (const r of results) { + console.log(`\n# ${r.file}`); + if (r.symbols.length > 0) { + const symStrs = r.symbols.map((s) => `${s.name}:${s.line}`); + console.log(` Symbols: ${symStrs.join(', ')}`); + } + if (r.imports.length > 0) { + console.log(` Imports: ${r.imports.join(', ')}`); + } + if (r.importedBy.length > 0) { + console.log(` Imported by: ${r.importedBy.join(', ')}`); + } + if (r.exported.length > 0) { + console.log(` Exported: ${r.exported.join(', ')}`); + } + } +} + export function where(target: string, customDbPath: string, opts: OutputOpts = {}): void { const data = whereData(target, customDbPath, opts as Record) as WhereData; if (outputResult(data as unknown as Record, 'results', opts)) return; @@ -196,34 +229,9 @@ export function where(target: string, customDbPath: string, opts: OutputOpts = { } if (data.mode === 'symbol') { - for (const r of data.results as WhereSymbolResult[]) { - const roleTag = r.role ? ` [${r.role}]` : ''; - const tag = r.exported ? ' (exported)' : ''; - console.log(`\n${kindIcon(r.kind)} ${r.name}${roleTag} ${r.file}:${r.line}${tag}`); - if (r.uses.length > 0) { - const useStrs = r.uses.map((u) => `${u.file}:${u.line}`); - console.log(` Used in: ${useStrs.join(', ')}`); - } else { - console.log(' No uses found'); - } - } + renderWhereSymbolResults(data.results as WhereSymbolResult[]); } else { - for (const r of data.results as WhereFileResult[]) { - console.log(`\n# ${r.file}`); - if (r.symbols.length > 0) { - const symStrs = r.symbols.map((s) => `${s.name}:${s.line}`); - console.log(` Symbols: ${symStrs.join(', ')}`); - } - if (r.imports.length > 0) { - console.log(` Imports: ${r.imports.join(', ')}`); - } - if (r.importedBy.length > 0) { - console.log(` Imported by: ${r.importedBy.join(', ')}`); - } - if (r.exported.length > 0) { - console.log(` Exported: ${r.exported.join(', ')}`); - } - } + renderWhereFileResults(data.results as WhereFileResult[]); } console.log(); } @@ -402,6 +410,17 @@ function renderContextResult(r: ContextResult): void { } } +function renderExplainSymbolList(label: string, symbols: ExplainSymbol[]): void { + if (symbols.length === 0) return; + console.log(`\n## ${label}`); + for (const s of symbols) { + const sig = s.signature?.params != null ? `(${s.signature.params})` : ''; + const roleTag = s.role ? ` [${s.role}]` : ''; + const summary = s.summary ? ` -- ${s.summary}` : ''; + console.log(` ${kindIcon(s.kind)} ${s.name}${sig}${roleTag} :${s.line}${summary}`); + } +} + function renderFileExplain(r: FileExplainResult): void { const publicCount = r.publicApi.length; const internalCount = r.internal.length; @@ -418,25 +437,8 @@ function renderFileExplain(r: FileExplainResult): void { console.log(` Imported by: ${r.importedBy.map((i) => i.file).join(', ')}`); } - if (r.publicApi.length > 0) { - console.log(`\n## Exported`); - for (const s of r.publicApi) { - const sig = s.signature?.params != null ? `(${s.signature.params})` : ''; - const roleTag = s.role ? ` [${s.role}]` : ''; - const summary = s.summary ? ` -- ${s.summary}` : ''; - console.log(` ${kindIcon(s.kind)} ${s.name}${sig}${roleTag} :${s.line}${summary}`); - } - } - - if (r.internal.length > 0) { - console.log(`\n## Internal`); - for (const s of r.internal) { - const sig = s.signature?.params != null ? `(${s.signature.params})` : ''; - const roleTag = s.role ? ` [${s.role}]` : ''; - const summary = s.summary ? ` -- ${s.summary}` : ''; - console.log(` ${kindIcon(s.kind)} ${s.name}${sig}${roleTag} :${s.line}${summary}`); - } - } + renderExplainSymbolList('Exported', r.publicApi); + renderExplainSymbolList('Internal', r.internal); if (r.dataFlow.length > 0) { console.log(`\n## Data Flow`); diff --git a/src/shared/file-utils.ts b/src/shared/file-utils.ts index 6d8e5d68..6879e915 100644 --- a/src/shared/file-utils.ts +++ b/src/shared/file-utils.ts @@ -45,56 +45,97 @@ interface ExtractSummaryOpts { summaryMaxChars?: number; } -export function extractSummary( - fileLines: string[] | null, - line: number | undefined, - opts: ExtractSummaryOpts = {}, +/** Truncate text to maxChars, appending "..." if truncated. */ +function truncate(text: string, maxChars: number): string { + return text.length > maxChars ? `${text.slice(0, maxChars)}...` : text; +} + +/** Try to extract a single-line comment (// or #) above the definition. */ +function extractSingleLineComment( + fileLines: string[], + idx: number, + scanLines: number, + maxChars: number, ): string | null { - if (!fileLines || !line || line <= 1) return null; - const idx = line - 2; // line above the definition (0-indexed) - const jsdocEndScanLines = opts.jsdocEndScanLines ?? 10; - const jsdocOpenScanLines = opts.jsdocOpenScanLines ?? 20; - const summaryMaxChars = opts.summaryMaxChars ?? 100; - // Scan up for JSDoc or comment - let jsdocEnd = -1; - for (let i = idx; i >= Math.max(0, idx - jsdocEndScanLines); i--) { + for (let i = idx; i >= Math.max(0, idx - scanLines); i--) { const trimmed = fileLines[i]!.trim(); - if (trimmed.endsWith('*/')) { - jsdocEnd = i; - break; - } + if (trimmed.endsWith('*/')) return null; // hit a block comment — defer to JSDoc extractor if (trimmed.startsWith('//') || trimmed.startsWith('#')) { - // Single-line comment immediately above const text = trimmed .replace(/^\/\/\s*/, '') .replace(/^#\s*/, '') .trim(); - return text.length > summaryMaxChars ? `${text.slice(0, summaryMaxChars)}...` : text; + return truncate(text, maxChars); } - if (trimmed !== '' && !trimmed.startsWith('*') && !trimmed.startsWith('/*')) break; + if (trimmed !== '' && !trimmed.startsWith('*') && !trimmed.startsWith('/*')) return null; } - if (jsdocEnd >= 0) { - // Find opening /** - for (let i = jsdocEnd; i >= Math.max(0, jsdocEnd - jsdocOpenScanLines); i--) { - if (fileLines[i]!.trim().startsWith('/**')) { - // Extract first non-tag, non-empty line - for (let j = i + 1; j <= jsdocEnd; j++) { - const docLine = fileLines[j]!.trim() - .replace(/^\*\s?/, '') - .trim(); - if (docLine && !docLine.startsWith('@') && docLine !== '/' && docLine !== '*/') { - return docLine.length > summaryMaxChars - ? `${docLine.slice(0, summaryMaxChars)}...` - : docLine; - } - } - break; + return null; +} + +/** Find the line index where a block comment (*​/) ends, scanning upward from idx. */ +function findJsdocEndLine(fileLines: string[], idx: number, scanLines: number): number { + for (let i = idx; i >= Math.max(0, idx - scanLines); i--) { + const trimmed = fileLines[i]!.trim(); + if (trimmed.endsWith('*/')) return i; + if ( + trimmed !== '' && + !trimmed.startsWith('*') && + !trimmed.startsWith('/*') && + !trimmed.startsWith('//') && + !trimmed.startsWith('#') + ) { + break; + } + } + return -1; +} + +/** Extract the first description line from a JSDoc block ending at jsdocEnd. */ +function extractJsdocDescription( + fileLines: string[], + jsdocEnd: number, + openScanLines: number, + maxChars: number, +): string | null { + for (let i = jsdocEnd; i >= Math.max(0, jsdocEnd - openScanLines); i--) { + if (!fileLines[i]!.trim().startsWith('/**')) continue; + for (let j = i + 1; j <= jsdocEnd; j++) { + const docLine = fileLines[j]!.trim() + .replace(/^\*\s?/, '') + .trim(); + if (docLine && !docLine.startsWith('@') && docLine !== '/' && docLine !== '*/') { + return truncate(docLine, maxChars); } } + break; } return null; } +export function extractSummary( + fileLines: string[] | null, + line: number | undefined, + opts: ExtractSummaryOpts = {}, +): string | null { + if (!fileLines || !line || line <= 1) return null; + const idx = line - 2; // line above the definition (0-indexed) + const jsdocEndScanLines = opts.jsdocEndScanLines ?? 10; + const jsdocOpenScanLines = opts.jsdocOpenScanLines ?? 20; + const summaryMaxChars = opts.summaryMaxChars ?? 100; + + // Try single-line comment first + const singleLine = extractSingleLineComment(fileLines, idx, jsdocEndScanLines, summaryMaxChars); + if (singleLine) return singleLine; + + // Try JSDoc block comment + const jsdocEnd = findJsdocEndLine(fileLines, idx, jsdocEndScanLines); + if (jsdocEnd >= 0) { + return extractJsdocDescription(fileLines, jsdocEnd, jsdocOpenScanLines, summaryMaxChars); + } + + return null; +} + interface ExtractSignatureOpts { signatureGatherLines?: number; } @@ -104,6 +145,38 @@ export interface Signature { returnType: string | null; } +/** Per-language signature patterns. Each entry has a regex and an extractor for return type. */ +const SIGNATURE_PATTERNS: Array<{ + regex: RegExp; + returnType: (m: RegExpMatchArray) => string | null; +}> = [ + // JS/TS: function name(params) or async function + { + regex: /(?:export\s+)?(?:async\s+)?function\s*\*?\s*\w*\s*\(([^)]*)\)\s*(?::\s*([^\n{]+))?/, + returnType: (m) => (m[2] ? m[2].trim().replace(/\s*\{$/, '') : null), + }, + // Arrow: const name = (params) => or (params):ReturnType => + { + regex: /=\s*(?:async\s+)?\(([^)]*)\)\s*(?::\s*([^=>\n{]+))?\s*=>/, + returnType: (m) => (m[2] ? m[2].trim() : null), + }, + // Python: def name(params) -> return: + { + regex: /def\s+\w+\s*\(([^)]*)\)\s*(?:->\s*([^:\n]+))?/, + returnType: (m) => (m[2] ? m[2].trim() : null), + }, + // Go: func (recv) name(params) (returns) + { + regex: /func\s+(?:\([^)]*\)\s+)?\w+\s*\(([^)]*)\)\s*(?:\(([^)]+)\)|(\w[^\n{]*))?/, + returnType: (m) => (m[2] || m[3] || '').trim() || null, + }, + // Rust: fn name(params) -> ReturnType + { + regex: /fn\s+\w+\s*\(([^)]*)\)\s*(?:->\s*([^\n{]+))?/, + returnType: (m) => (m[2] ? m[2].trim() : null), + }, +]; + export function extractSignature( fileLines: string[] | null, line: number | undefined, @@ -112,52 +185,18 @@ export function extractSignature( if (!fileLines || !line) return null; const idx = line - 1; const signatureGatherLines = opts.signatureGatherLines ?? 5; - // Gather lines to handle multi-line params const chunk = fileLines .slice(idx, Math.min(fileLines.length, idx + signatureGatherLines)) .join('\n'); - // JS/TS: function name(params) or (params) => or async function - let m = chunk.match( - /(?:export\s+)?(?:async\s+)?function\s*\*?\s*\w*\s*\(([^)]*)\)\s*(?::\s*([^\n{]+))?/, - ); - if (m) { - return { - params: m[1]!.trim() || null, - returnType: m[2] ? m[2].trim().replace(/\s*\{$/, '') : null, - }; - } - // Arrow: const name = (params) => or (params):ReturnType => - m = chunk.match(/=\s*(?:async\s+)?\(([^)]*)\)\s*(?::\s*([^=>\n{]+))?\s*=>/); - if (m) { - return { - params: m[1]!.trim() || null, - returnType: m[2] ? m[2].trim() : null, - }; - } - // Python: def name(params) -> return: - m = chunk.match(/def\s+\w+\s*\(([^)]*)\)\s*(?:->\s*([^:\n]+))?/); - if (m) { - return { - params: m[1]!.trim() || null, - returnType: m[2] ? m[2].trim() : null, - }; - } - // Go: func (recv) name(params) (returns) - m = chunk.match(/func\s+(?:\([^)]*\)\s+)?\w+\s*\(([^)]*)\)\s*(?:\(([^)]+)\)|(\w[^\n{]*))?/); - if (m) { - return { - params: m[1]!.trim() || null, - returnType: (m[2] || m[3] || '').trim() || null, - }; - } - // Rust: fn name(params) -> ReturnType - m = chunk.match(/fn\s+\w+\s*\(([^)]*)\)\s*(?:->\s*([^\n{]+))?/); - if (m) { - return { - params: m[1]!.trim() || null, - returnType: m[2] ? m[2].trim() : null, - }; + for (const pattern of SIGNATURE_PATTERNS) { + const m = chunk.match(pattern.regex); + if (m) { + return { + params: m[1]!.trim() || null, + returnType: pattern.returnType(m), + }; + } } return null; }