From 2aacecc408b7646fafea02370cdf67ba35bb7270 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Mon, 1 Jun 2026 20:27:58 -0600 Subject: [PATCH 01/14] feat(native): implement return-type + call-assignment extraction in Rust engine (closes #1280) Ports Phase 8.2 to the native engine so returnTypeMap and callAssignments are populated for JS/TS/TSX files regardless of which engine is active. - Rust: new NativeCallAssignment NAPI struct; FileSymbols gains returnTypeMap and callAssignments fields; match_js_return_type_map and match_js_call_assignments walk passes mirror the JS extractReturnTypeMapWalk and recordCallAssignment logic exactly - JS: patchReturnTypeMap() in parser.ts converts the native array to a Map (same pattern as patchTypeMap) so propagateReturnTypesAcrossFiles sees a unified ExtractorOutput regardless of engine --- Cargo.lock | 2 +- .../src/extractors/javascript.rs | 134 ++++++++++++++++++ crates/codegraph-core/src/types.rs | 17 +++ src/domain/parser.ts | 19 +++ 4 files changed, 171 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 691901bf..629505d1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -66,7 +66,7 @@ checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] name = "codegraph-core" -version = "3.11.1" +version = "3.11.2" dependencies = [ "globset", "ignore", diff --git a/crates/codegraph-core/src/extractors/javascript.rs b/crates/codegraph-core/src/extractors/javascript.rs index 7fb8f215..2ec67b50 100644 --- a/crates/codegraph-core/src/extractors/javascript.rs +++ b/crates/codegraph-core/src/extractors/javascript.rs @@ -13,6 +13,9 @@ impl SymbolExtractor for JsExtractor { walk_tree(&tree.root_node(), source, &mut symbols, match_js_node); walk_ast_nodes(&tree.root_node(), source, &mut symbols.ast_nodes); walk_tree(&tree.root_node(), source, &mut symbols, match_js_type_map); + walk_tree(&tree.root_node(), source, &mut symbols, match_js_return_type_map); + // call_assignments runs after type_map is populated (needs receiver types) + walk_tree(&tree.root_node(), source, &mut symbols, match_js_call_assignments); symbols } } @@ -101,6 +104,137 @@ fn match_js_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols, _dep } } +// ── Return-type map extraction (Phase 8.2 parity) ─────────────────────────── + +/// Walk the AST collecting function/method return types into `symbols.return_type_map`. +/// Mirrors `extractReturnTypeMapWalk` in src/extractors/javascript.ts. +fn match_js_return_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: usize) { + match node.kind() { + "function_declaration" => { + let Some(name_n) = node.child_by_field_name("name") else { return }; + let fn_name = node_text(&name_n, source); + if fn_name == "constructor" { return; } + let key = match find_parent_class(node, source) { + Some(cls) => format!("{}.{}", cls, fn_name), + None => fn_name.to_string(), + }; + store_return_type(node, &key, source, symbols); + } + "method_definition" => { + let Some(name_n) = node.child_by_field_name("name") else { return }; + let method_name = node_text(&name_n, source); + if method_name == "constructor" { return; } + let key = match find_parent_class(node, source) { + Some(cls) => format!("{}.{}", cls, method_name), + None => method_name.to_string(), + }; + store_return_type(node, &key, source, symbols); + } + "variable_declarator" => { + let Some(name_n) = node.child_by_field_name("name") else { return }; + if name_n.kind() != "identifier" { return; } + let Some(value_n) = node.child_by_field_name("value") else { return }; + if !matches!(value_n.kind(), "arrow_function" | "function_expression" | "function") { + return; + } + let var_name = node_text(&name_n, source); + let key = match find_parent_class(node, source) { + Some(cls) => format!("{}.{}", cls, var_name), + None => var_name.to_string(), + }; + store_return_type(&value_n, &key, source, symbols); + } + _ => {} + } +} + +/// Extract the return type of `fn_node` and push it into `symbols.return_type_map`. +/// Prefers explicit return type annotation (confidence 1.0) over inferred `return new X()` +/// (confidence 0.85). Higher confidence wins on conflict. +fn store_return_type(fn_node: &Node, fn_name: &str, source: &[u8], symbols: &mut FileSymbols) { + // Explicit return type annotation + if let Some(ret_type_node) = fn_node.child_by_field_name("return_type") { + if let Some(type_name) = extract_simple_type_name(&ret_type_node, source) { + push_return_type_entry(symbols, fn_name, type_name, 1.0); + return; + } + } + // Infer from first `return new Constructor()` in body + if let Some(body) = fn_node.child_by_field_name("body") { + if let Some(type_name) = find_return_new_expr_type(&body, source) { + push_return_type_entry(symbols, fn_name, type_name, 0.85); + } + } +} + +/// Scan direct children of `body` for the first `return new X()` and return the constructor name. +fn find_return_new_expr_type<'a>(body: &Node<'a>, source: &'a [u8]) -> Option<&'a str> { + for i in 0..body.child_count() { + let Some(child) = body.child(i) else { continue }; + if child.kind() != "return_statement" { continue; } + for j in 0..child.child_count() { + let Some(expr) = child.child(j) else { continue }; + if expr.kind() == "new_expression" { + return extract_new_expr_type_name(&expr, source); + } + } + } + None +} + +/// Insert `(fn_name → type_name)` into `return_type_map`, keeping the highest-confidence entry. +fn push_return_type_entry(symbols: &mut FileSymbols, fn_name: &str, type_name: &str, confidence: f64) { + if let Some(pos) = symbols.return_type_map.iter().position(|e| e.name == fn_name) { + if symbols.return_type_map[pos].confidence >= confidence { return; } + symbols.return_type_map.swap_remove(pos); + } + symbols.return_type_map.push(TypeMapEntry { + name: fn_name.to_string(), + type_name: type_name.to_string(), + confidence, + }); +} + +// ── Call-assignment extraction (Phase 8.2 parity) ─────────────────────────── + +/// Walk the AST recording variable assignments from call expressions into +/// `symbols.call_assignments` for cross-file return-type propagation. +/// Mirrors `recordCallAssignment` in src/extractors/javascript.ts. +fn match_js_call_assignments(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: usize) { + if node.kind() != "variable_declarator" { return; } + let Some(name_n) = node.child_by_field_name("name") else { return }; + if name_n.kind() != "identifier" { return; } + let Some(value_n) = node.child_by_field_name("value") else { return }; + if value_n.kind() != "call_expression" { return; } + + let var_name = node_text(&name_n, source).to_string(); + let Some(fn_node) = value_n.child_by_field_name("function") else { return }; + + match fn_node.kind() { + "identifier" => { + symbols.call_assignments.push(NativeCallAssignment { + var_name, + callee_name: node_text(&fn_node, source).to_string(), + receiver_type_name: None, + }); + } + "member_expression" => { + let Some(obj) = fn_node.child_by_field_name("object") else { return }; + let Some(prop) = fn_node.child_by_field_name("property") else { return }; + if obj.kind() != "identifier" { return; } + let receiver_type = symbols.type_map.iter() + .find(|e| e.name == node_text(&obj, source)) + .map(|e| e.type_name.clone()); + symbols.call_assignments.push(NativeCallAssignment { + var_name, + callee_name: node_text(&prop, source).to_string(), + receiver_type_name: receiver_type, + }); + } + _ => {} + } +} + fn match_js_node(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: usize) { match node.kind() { "function_declaration" => handle_function_decl(node, source, symbols), diff --git a/crates/codegraph-core/src/types.rs b/crates/codegraph-core/src/types.rs index f61f9fb8..1936f535 100644 --- a/crates/codegraph-core/src/types.rs +++ b/crates/codegraph-core/src/types.rs @@ -295,6 +295,17 @@ pub struct TypeMapEntry { pub confidence: f64, } +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct NativeCallAssignment { + #[napi(js_name = "varName")] + pub var_name: String, + #[napi(js_name = "calleeName")] + pub callee_name: String, + #[napi(js_name = "receiverTypeName")] + pub receiver_type_name: Option, +} + #[napi(object)] #[derive(Debug, Clone, Serialize, Deserialize)] pub struct FileSymbols { @@ -311,6 +322,10 @@ pub struct FileSymbols { pub line_count: Option, #[napi(js_name = "typeMap")] pub type_map: Vec, + #[napi(js_name = "returnTypeMap")] + pub return_type_map: Vec, + #[napi(js_name = "callAssignments")] + pub call_assignments: Vec, } impl FileSymbols { @@ -326,6 +341,8 @@ impl FileSymbols { dataflow: None, line_count: None, type_map: Vec::new(), + return_type_map: Vec::new(), + call_assignments: Vec::new(), } } } diff --git a/src/domain/parser.ts b/src/domain/parser.ts index 38ebc035..013e531b 100644 --- a/src/domain/parser.ts +++ b/src/domain/parser.ts @@ -684,6 +684,24 @@ function patchTypeMap(r: any): void { } } +/** Normalize native returnTypeMap array to a Map instance, keeping highest-confidence entry per key. */ +function patchReturnTypeMap(r: any): void { + if (!r.returnTypeMap || r.returnTypeMap instanceof Map) return; + const map = new Map(); + for (const e of r.returnTypeMap as Array<{ + name: string; + typeName: string; + confidence?: number; + }>) { + const conf = e.confidence ?? 1.0; + const existing = map.get(e.name); + if (!existing || conf > existing.confidence) { + map.set(e.name, { type: e.typeName, confidence: conf }); + } + } + r.returnTypeMap = map.size > 0 ? map : undefined; +} + /** Wrap bindingType into binding object for dataflow argFlows and mutations. */ function patchDataflow(dataflow: any): void { if (dataflow.argFlows) { @@ -706,6 +724,7 @@ function patchNativeResult(r: any): ExtractorOutput { if (r.definitions) patchDefinitions(r.definitions); if (r.imports) patchImports(r.imports); patchTypeMap(r); + patchReturnTypeMap(r); if (r.dataflow) patchDataflow(r.dataflow); return r; From 2f11a0ff028acd65b37ccb2888d20e2fa8977169 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Mon, 1 Jun 2026 21:45:03 -0600 Subject: [PATCH 02/14] fix: add missing return_type_map and call_assignments in test FileSymbols literals Two test-only FileSymbols struct literals in import_edges.rs and structure.rs were not updated when the new fields were added to the struct, causing cargo test --release to fail to compile on all platforms (E0063). --- crates/codegraph-core/src/import_edges.rs | 2 ++ crates/codegraph-core/src/structure.rs | 2 ++ 2 files changed, 4 insertions(+) diff --git a/crates/codegraph-core/src/import_edges.rs b/crates/codegraph-core/src/import_edges.rs index f000a808..dd378983 100644 --- a/crates/codegraph-core/src/import_edges.rs +++ b/crates/codegraph-core/src/import_edges.rs @@ -560,6 +560,8 @@ mod tests { classes: vec![], exports: vec![], type_map: vec![], + return_type_map: vec![], + call_assignments: vec![], ast_nodes: vec![], dataflow: None, line_count: None, diff --git a/crates/codegraph-core/src/structure.rs b/crates/codegraph-core/src/structure.rs index b34307a8..6657f623 100644 --- a/crates/codegraph-core/src/structure.rs +++ b/crates/codegraph-core/src/structure.rs @@ -929,6 +929,8 @@ mod tests { classes: vec![], exports: vec![], type_map: vec![], + return_type_map: vec![], + call_assignments: vec![], ast_nodes: vec![], dataflow: None, line_count: Some(42), From 1a263f81bd8fdf69555772d807cd4c9fb2fe0808 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Mon, 1 Jun 2026 21:45:13 -0600 Subject: [PATCH 03/14] fix(native): align return-type extraction with WASM reference Three parity issues identified in review: 1. find_parent_class climbed through function scope boundaries, causing nested function declarations inside class methods to be incorrectly attributed to the enclosing class (e.g. UserService.buildQuery instead of buildQuery). Added find_parent_class_no_fn_boundary which stops the ancestor walk when it crosses a function_declaration, function_expression, arrow_function, or method_definition node, matching the WASM extractReturnTypeMapWalk which resets currentClass to null before recursing into any function body. 2. The variable_declarator branch included 'function' as a valid value kind, which has no counterpart in the TS reference and is not a valid tree-sitter value- expression node kind. Removed it, leaving only 'arrow_function' and 'function_expression' to match the WASM reference exactly. 3. patchReturnTypeMap returned undefined for files with no annotated functions, while patchTypeMap always returns a (possibly empty) Map. Changed to always assign new Map() to keep both native and WASM paths consistent and safe for future callers that iterate without optional-chaining. --- .../src/extractors/javascript.rs | 45 +++++++++++++++++-- src/domain/parser.ts | 2 +- 2 files changed, 43 insertions(+), 4 deletions(-) diff --git a/crates/codegraph-core/src/extractors/javascript.rs b/crates/codegraph-core/src/extractors/javascript.rs index 2ec67b50..ced07324 100644 --- a/crates/codegraph-core/src/extractors/javascript.rs +++ b/crates/codegraph-core/src/extractors/javascript.rs @@ -114,7 +114,9 @@ fn match_js_return_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbol let Some(name_n) = node.child_by_field_name("name") else { return }; let fn_name = node_text(&name_n, source); if fn_name == "constructor" { return; } - let key = match find_parent_class(node, source) { + // Use the boundary-aware variant: nested function declarations inside + // method bodies must not inherit the class prefix (matches WASM behaviour). + let key = match find_parent_class_no_fn_boundary(node, source) { Some(cls) => format!("{}.{}", cls, fn_name), None => fn_name.to_string(), }; @@ -124,6 +126,8 @@ fn match_js_return_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbol let Some(name_n) = node.child_by_field_name("name") else { return }; let method_name = node_text(&name_n, source); if method_name == "constructor" { return; } + // method_definition is always a direct child of class_body — plain + // find_parent_class is correct here. let key = match find_parent_class(node, source) { Some(cls) => format!("{}.{}", cls, method_name), None => method_name.to_string(), @@ -134,11 +138,14 @@ fn match_js_return_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbol let Some(name_n) = node.child_by_field_name("name") else { return }; if name_n.kind() != "identifier" { return; } let Some(value_n) = node.child_by_field_name("value") else { return }; - if !matches!(value_n.kind(), "arrow_function" | "function_expression" | "function") { + // Only arrow_function and function_expression match the TS reference; + // "function" is not a valid tree-sitter value-expression kind here. + if !matches!(value_n.kind(), "arrow_function" | "function_expression") { return; } let var_name = node_text(&name_n, source); - let key = match find_parent_class(node, source) { + // Use the boundary-aware variant for the same reason as function_declaration. + let key = match find_parent_class_no_fn_boundary(node, source) { Some(cls) => format!("{}.{}", cls, var_name), None => var_name.to_string(), }; @@ -1402,6 +1409,38 @@ fn find_parent_class(node: &Node, source: &[u8]) -> Option { find_enclosing_type_name(node, JS_CLASS_KINDS, source) } +/// Like `find_parent_class` but stops at function scope boundaries. +/// +/// The WASM `extractReturnTypeMapWalk` resets `currentClass` to `null` before +/// recursing into any function or method body. This means nested function +/// declarations and arrow-function variable declarators inside a method body +/// are never attributed to the enclosing class. This function replicates that +/// behavior by halting the ancestor walk when a function/method node is found +/// before reaching a class. +const JS_FN_SCOPE_KINDS: &[&str] = &[ + "function_declaration", + "function_expression", + "arrow_function", + "method_definition", +]; + +fn find_parent_class_no_fn_boundary(node: &Node, source: &[u8]) -> Option { + let mut current = node.parent(); + while let Some(parent) = current { + let kind = parent.kind(); + if JS_FN_SCOPE_KINDS.contains(&kind) { + // Crossed a function scope boundary — stop, as WASM does. + return None; + } + if JS_CLASS_KINDS.contains(&kind) { + return named_child_text(&parent, "name", source) + .map(|s| s.to_string()); + } + current = parent.parent(); + } + None +} + /// Extract named bindings from a dynamic `import()` call expression. /// Handles: `const { a, b } = await import(...)` and `const mod = await import(...)` fn extract_dynamic_import_names(call_node: &Node, source: &[u8]) -> Vec { diff --git a/src/domain/parser.ts b/src/domain/parser.ts index 013e531b..050398c0 100644 --- a/src/domain/parser.ts +++ b/src/domain/parser.ts @@ -699,7 +699,7 @@ function patchReturnTypeMap(r: any): void { map.set(e.name, { type: e.typeName, confidence: conf }); } } - r.returnTypeMap = map.size > 0 ? map : undefined; + r.returnTypeMap = map.size > 0 ? map : new Map(); } /** Wrap bindingType into binding object for dataflow argFlows and mutations. */ From be22ab74524571058c2904c4a5723ebb08d23edf Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Mon, 1 Jun 2026 23:09:27 -0600 Subject: [PATCH 04/14] fix: include ts-resolver files missed in merge commit --- src/domain/graph/resolver/ts-resolver.ts | 519 +++++++++++++++++++++++ tests/unit/ts-resolver.test.ts | 261 ++++++++++++ 2 files changed, 780 insertions(+) create mode 100644 src/domain/graph/resolver/ts-resolver.ts create mode 100644 tests/unit/ts-resolver.test.ts diff --git a/src/domain/graph/resolver/ts-resolver.ts b/src/domain/graph/resolver/ts-resolver.ts new file mode 100644 index 00000000..f2b7a6f2 --- /dev/null +++ b/src/domain/graph/resolver/ts-resolver.ts @@ -0,0 +1,519 @@ +/** + * TypeScript-native type resolver (Phase 8.1). + * + * Runs as a build-time enrichment pass after tree-sitter parsing. Uses the + * TypeScript compiler API to resolve the actual runtime type of every variable + * and parameter in .ts/.tsx files, replacing heuristic typeMap entries (0.7–0.9 + * confidence) with compiler-verified ones (1.0). + * + * Tree-sitter parses fast; this pass resolves accurately. Together they give + * codegraph both speed and precision on its primary use case. + * + * The `typescript` package is a peer/optional dependency — it is present on any + * machine that compiles TypeScript but is not bundled with codegraph itself. This + * module lazy-imports it at runtime; if the import fails the pass is silently + * skipped so JS-only projects and environments without `typescript` installed are + * unaffected. + */ +import fs from 'node:fs'; +import path from 'node:path'; +import { debug } from '../../../infrastructure/logger.js'; +import type { CallAssignment, ExtractorOutput, TypeMapEntry } from '../../../types.js'; + +// typescript is not a hard dependency — lazy-load it so JS-only projects +// and environments without typescript installed work without error. +type TsModule = typeof import('typescript'); +let _ts: TsModule | null | undefined; // undefined = not yet tried; null = unavailable + +async function loadTs(): Promise { + if (_ts !== undefined) return _ts; + try { + // TypeScript 6+ ships dual CJS/ESM exports; `.default` is the CJS interop + // namespace and is present and non-null in both TS 5.x and TS 6.x. + _ts = (await import('typescript')).default as TsModule; + } catch { + _ts = null; + debug('ts-resolver: typescript package not available — skipping TSC type enrichment'); + } + return _ts; +} + +const TS_EXTENSIONS = new Set(['.ts', '.tsx', '.mts', '.cts']); + +function isTsFile(relPath: string): boolean { + // Exclude .d.ts declaration files — path.extname('.d.ts') returns '.ts', + // so we must check the full suffix explicitly. + return TS_EXTENSIONS.has(path.extname(relPath)) && !relPath.endsWith('.d.ts'); +} + +// Primitive and built-in type names that don't help call resolution. +const SKIP_TYPE_NAMES = new Set([ + 'string', + 'number', + 'boolean', + 'any', + 'unknown', + 'never', + 'void', + 'null', + 'undefined', + 'object', + 'symbol', + 'bigint', + 'String', + 'Number', + 'Boolean', + 'Object', + 'Array', + 'Promise', + 'Map', + 'Set', + 'WeakMap', + 'WeakSet', + 'Error', + 'Function', + 'RegExp', + 'Date', +]); + +/** + * Enrich the typeMap for every .ts/.tsx file using the TypeScript compiler API. + * + * Called from buildEdges before call-edge construction. Only overwrites entries + * with lower confidence than 1.0 (constructor calls are already exact). + */ +export async function enrichTypeMapWithTsc( + rootDir: string, + fileSymbols: Map, +): Promise { + const tsRelPaths = [...fileSymbols.keys()].filter(isTsFile); + if (tsRelPaths.length === 0) return; + + const ts = await loadTs(); + if (!ts) return; + + const tsconfigPath = findTsconfig(rootDir); + if (!tsconfigPath) { + debug('ts-resolver: no tsconfig.json found — skipping TypeScript type enrichment'); + return; + } + + const t0 = Date.now(); + const program = createProgram(ts, tsconfigPath); + if (!program) return; + + const checker = program.getTypeChecker(); + let enrichedFiles = 0; + let enrichedEntries = 0; + let backfilledFiles = 0; + + for (const relPath of tsRelPaths) { + const symbols = fileSymbols.get(relPath)!; + const absPath = path.resolve(rootDir, relPath); + const sourceFile = program.getSourceFile(absPath); + if (!sourceFile) continue; + + const before = symbols.typeMap.size; + const countBefore = countLowConfidence(symbols.typeMap); + enrichSourceFile(ts, sourceFile, checker, symbols.typeMap); + const countAfter = countLowConfidence(symbols.typeMap); + const gained = countBefore - countAfter + (symbols.typeMap.size - before); + if (gained > 0) { + enrichedEntries += gained; + enrichedFiles++; + } + + // Phase 8.2 parity: backfill returnTypeMap and callAssignments for engines + // (native Rust) that don't populate them during extraction. The JS extractor + // sets these fields; native leaves them undefined. + // Guards are intentionally independent so a future extractor that sets one + // but not the other is handled correctly without silently skipping either. + let didBackfill = false; + if (symbols.returnTypeMap === undefined) { + symbols.returnTypeMap = new Map(); + enrichReturnTypeMap(ts, sourceFile, checker, symbols.returnTypeMap); + if (symbols.returnTypeMap.size > 0) didBackfill = true; + } + if (symbols.callAssignments === undefined) { + symbols.callAssignments = []; + enrichCallAssignments(ts, sourceFile, symbols.typeMap, symbols.callAssignments); + if (symbols.callAssignments.length > 0) didBackfill = true; + } + if (didBackfill) backfilledFiles++; + } + + debug( + `ts-resolver: enriched ${enrichedEntries} typeMap entries across ${enrichedFiles} files` + + (backfilledFiles > 0 + ? `, backfilled returnTypeMap/callAssignments in ${backfilledFiles} files` + : '') + + ` in ${Date.now() - t0}ms`, + ); +} + +function countLowConfidence(typeMap: Map): number { + let count = 0; + for (const entry of typeMap.values()) { + if (entry.confidence < 1.0) count++; + } + return count; +} + +/** + * Walk up from rootDir looking for tsconfig.json (up to 4 levels). + * Handles monorepo setups where rootDir is a package subdirectory but + * the tsconfig lives at the repository root. + */ +function findTsconfig(rootDir: string): string | null { + let dir = rootDir; + for (let i = 0; i < 4; i++) { + const candidate = path.join(dir, 'tsconfig.json'); + if (fs.existsSync(candidate)) return candidate; + const parent = path.dirname(dir); + if (parent === dir) break; // reached filesystem root + dir = parent; + } + return null; +} + +function createProgram(ts: TsModule, tsconfigPath: string): import('typescript').Program | null { + try { + const configFile = ts.readConfigFile(tsconfigPath, ts.sys.readFile); + if (configFile.error) { + debug( + `ts-resolver: tsconfig error — ${ts.flattenDiagnosticMessageText(configFile.error.messageText, '\n')}`, + ); + return null; + } + + const parsed = ts.parseJsonConfigFileContent( + configFile.config, + ts.sys, + path.dirname(tsconfigPath), + ); + + if (parsed.errors.length > 0) { + for (const err of parsed.errors) { + debug( + `ts-resolver: tsconfig parse warning — ${ts.flattenDiagnosticMessageText(err.messageText, '\n')}`, + ); + } + } + + if (parsed.fileNames.length === 0) { + // Empty fileNames usually means a solution-style tsconfig that only has + // `references:[]` and no `files`/`include`. In this case ts.createProgram + // would receive [tsconfigPath] as source — a JSON file — and every + // subsequent getSourceFile() call for real .ts files returns undefined, + // producing zero enrichment silently. Warn instead of wasting time. + debug( + 'ts-resolver: tsconfig resolved no source files (solution-style tsconfig?) — skipping enrichment', + ); + return null; + } + + return ts.createProgram({ + rootNames: parsed.fileNames, + options: { + ...parsed.options, + noEmit: true, + skipLibCheck: true, + }, + }); + } catch (err) { + debug(`ts-resolver: failed to create TS program — ${err}`); + return null; + } +} + +/** + * Walk a single SourceFile and update typeMap entries for: + * - Variable declarations: const/let/var names with inferred or annotated types + * - Function/method parameters with type annotations + * + * Keys are scoped as `::` to avoid collisions across functions + * that share parameter names (e.g., two functions both taking `service`). The + * call-edge resolver looks up by bare name, so we only write bare-name entries + * when there is no ambiguity (i.e., the name appears exactly once in this file). + * + * Entries already at confidence 1.0 (e.g., `new Foo()` from tree-sitter) are + * left unchanged. New entries from the compiler are added at confidence 1.0. + */ +function enrichSourceFile( + ts: TsModule, + sourceFile: import('typescript').SourceFile, + checker: import('typescript').TypeChecker, + typeMap: Map, +): void { + // First pass: collect resolved types keyed by bare identifier name. + // Track both the short name (for typeMap writes) and the fully-qualified name + // (module-path-prefixed) for ambiguity detection. Two classes may share the + // same short name (e.g., `OrderService` from two different modules), and + // symbol.getName() returns the declared name — not the local alias — so + // deduplication on short names alone would incorrectly collapse them. + const nameToEntries = new Map(); + + function visit(node: import('typescript').Node): void { + let identName: string | null = null; + let nameNode: import('typescript').Identifier | null = null; + + if (ts.isVariableDeclaration(node) && ts.isIdentifier(node.name)) { + identName = node.name.text; + nameNode = node.name; + } else if (ts.isParameter(node) && ts.isIdentifier(node.name)) { + identName = node.name.text; + nameNode = node.name; + } + + if (identName && nameNode) { + const resolved = resolveTypeName(ts, nameNode, checker); + if (resolved) { + const existing = nameToEntries.get(identName); + if (existing) { + existing.push(resolved); + } else { + nameToEntries.set(identName, [resolved]); + } + } + } + + ts.forEachChild(node, visit); + } + ts.forEachChild(sourceFile, visit); + + // Second pass: only write unambiguous entries (single unique qualified type for a name) + for (const [name, entries] of nameToEntries) { + const uniqueQualified = [...new Set(entries.map((e) => e.qualifiedName))]; + if (uniqueQualified.length !== 1) continue; // ambiguous across modules — skip + // entries is non-empty because we only set() on first occurrence and push() after — + // TypeScript's noUncheckedIndexedAccess can flag [0] access, so assert the type. + const first = entries[0]; + if (!first) continue; + const shortName = first.shortName; + const existing = typeMap.get(name); + if (!existing || existing.confidence < 1.0) { + typeMap.set(name, { type: shortName, confidence: 1.0 }); + } + } +} + +/** + * Walk a SourceFile and populate returnTypeMap with compiler-verified return types. + * Handles function declarations, method declarations, and arrow/function-expression + * variable initialisers at module scope. Methods are stored as `ClassName.methodName`. + * + * Only captures declarations at module scope or directly inside a class body — + * local functions nested inside method bodies are excluded to avoid spurious + * cross-file type matches (same guard as enrichSourceFile's "unambiguous names only" + * heuristic). Recursion stops at function/method body boundaries. + * + * Async functions returning Promise are unwrapped: the inner type argument T is + * used so that async methods receive a returnTypeMap entry just like sync ones. + */ +function enrichReturnTypeMap( + ts: TsModule, + sourceFile: import('typescript').SourceFile, + checker: import('typescript').TypeChecker, + returnTypeMap: Map, +): void { + let currentClass: string | null = null; + + /** + * Resolve the concrete return type name for a signature, unwrapping + * Promise so async functions contribute their inner type. + */ + function resolveReturnTypeName(sig: import('typescript').Signature | undefined): string | null { + if (!sig) return null; + try { + let retType = checker.getReturnTypeOfSignature(sig); + + // Unwrap Promise → T so async functions get a useful returnTypeMap entry. + const outerSym = retType.getSymbol() ?? retType.aliasSymbol; + if (outerSym?.getName() === 'Promise') { + const args = checker.getTypeArguments(retType as import('typescript').TypeReference); + if (args.length > 0) retType = args[0]!; + } + + const sym = retType.getSymbol() ?? retType.aliasSymbol; + if (!sym) return null; + const name = sym.getName(); + if (!name || name === '__type' || name === '__object' || SKIP_TYPE_NAMES.has(name)) + return null; + return name; + } catch { + return null; + } + } + + function writeEntry(fnName: string, sigNode: import('typescript').SignatureDeclaration): void { + const typeName = resolveReturnTypeName(checker.getSignatureFromDeclaration(sigNode)); + if (typeName) { + const existing = returnTypeMap.get(fnName); + if (!existing || existing.confidence < 1.0) + returnTypeMap.set(fnName, { type: typeName, confidence: 1.0 }); + } + } + + /** + * Visit nodes at the current lexical scope (module level or class body). + * Does NOT recurse into function/method bodies to avoid capturing local + * helper functions under bare names. + */ + function visit(node: import('typescript').Node): void { + if (ts.isClassDeclaration(node) || ts.isClassExpression(node)) { + // Enter class scope: visit direct children (method/property declarations). + const saved = currentClass; + currentClass = + (node as import('typescript').ClassDeclaration | import('typescript').ClassExpression).name + ?.text ?? null; + ts.forEachChild(node, visit); + currentClass = saved; + return; // class body fully handled — stop here + } + + if (ts.isFunctionDeclaration(node) && node.name) { + // Module-level function declaration: record and stop (no body descent). + writeEntry(node.name.text, node); + return; + } + + if (ts.isMethodDeclaration(node) && ts.isIdentifier(node.name)) { + // Class method: record as ClassName.methodName and stop. + const fnName = currentClass ? `${currentClass}.${node.name.text}` : node.name.text; + writeEntry(fnName, node); + return; + } + + if (ts.isVariableDeclaration(node) && ts.isIdentifier(node.name) && node.initializer) { + // Arrow/function-expression assigned to a variable at the current scope. + // Because we never recurse into function bodies, any VariableDeclaration + // we see here is guaranteed to be at module scope or inside a class body + // (not inside a method body), making the bare name safe for cross-file use. + const init = node.initializer; + if (ts.isArrowFunction(init) || ts.isFunctionExpression(init)) { + writeEntry(node.name.text, init); + } + return; // variable declaration fully handled — stop here + } + + // For all other node kinds (VariableStatement, VariableDeclarationList, + // ExportDeclaration, etc.) recurse to reach nested function/class/var nodes. + ts.forEachChild(node, visit); + } + + ts.forEachChild(sourceFile, visit); +} + +/** + * Walk a SourceFile and push call assignments (`const x = fn()`) whose variable + * is not yet in typeMap into callAssignments for cross-file propagation. + * Phase 8.1 already resolved the common case into typeMap; this captures the rest. + * + * Uses the same two-pass "unambiguous names only" strategy as `enrichSourceFile`: + * collect all candidates first, then only push entries where a given `varName` + * maps to exactly one distinct `calleeName`. This prevents multiple methods in the + * same file that each bind a different imported function to a common local name + * (e.g., `const result = getA()` in one method, `const result = getB()` in + * another) from both landing in `callAssignments`, which would cause + * `propagateReturnTypesAcrossFiles` to silently resolve one arbitrarily. + */ +function enrichCallAssignments( + ts: TsModule, + sourceFile: import('typescript').SourceFile, + typeMap: Map, + callAssignments: CallAssignment[], +): void { + // First pass: collect all candidates keyed by varName. + const candidates = new Map(); + + function visit(node: import('typescript').Node): void { + if ( + ts.isVariableDeclaration(node) && + ts.isIdentifier(node.name) && + node.initializer && + ts.isCallExpression(node.initializer) + ) { + const varName = node.name.text; + if (!typeMap.has(varName)) { + const call = node.initializer; + let calleeName: string | null = null; + let receiverTypeName: string | undefined; + + if (ts.isIdentifier(call.expression)) { + calleeName = call.expression.text; + } else if (ts.isPropertyAccessExpression(call.expression)) { + calleeName = call.expression.name.text; + const obj = call.expression.expression; + if (ts.isIdentifier(obj)) { + const entry = typeMap.get(obj.text); + if (entry && typeof entry === 'object') receiverTypeName = entry.type; + } + } + + if (calleeName) { + const ca: CallAssignment = { varName, calleeName, receiverTypeName }; + const existing = candidates.get(varName); + if (existing) { + existing.push(ca); + } else { + candidates.set(varName, [ca]); + } + } + } + } + + ts.forEachChild(node, visit); + } + + ts.forEachChild(sourceFile, visit); + + // Second pass: only push entries where varName maps to exactly one distinct + // calleeName. Ambiguous varNames (same name, different callees across scopes) + // are excluded to avoid silently resolving the wrong type cross-file. + for (const entries of candidates.values()) { + const uniqueCallees = new Set(entries.map((e) => e.calleeName)); + if (uniqueCallees.size === 1) { + callAssignments.push(entries[0] as CallAssignment); + } + } +} + +/** + * Ask the type checker for the type of a name node and return both the short + * declared name and the fully-qualified module-prefixed name. Returns null when + * the type is a primitive, anonymous, or otherwise not useful for resolution. + * + * The fully-qualified name (e.g., `"./legacy/service".OrderService`) is used for + * ambiguity detection — it distinguishes two classes that share the same short + * declaration name but come from different modules. The short name is what the + * call-edge resolver looks up in the typeMap. + */ +function resolveTypeName( + ts: TsModule, + nameNode: import('typescript').Identifier, + checker: import('typescript').TypeChecker, +): { shortName: string; qualifiedName: string } | null { + try { + const type = checker.getTypeAtLocation(nameNode); + const symbol = type.getSymbol() ?? type.aliasSymbol; + if (!symbol) return null; + const shortName = symbol.getName(); + if ( + !shortName || + shortName === '__type' || + shortName === '__object' || + SKIP_TYPE_NAMES.has(shortName) || + // Skip generic type-parameter symbols (T, E, K, etc.) — they do not + // correspond to any real class and would overwrite useful lower-confidence + // heuristic entries, causing call edges to be silently dropped. + !!(symbol.flags & (ts.SymbolFlags.TypeParameter | ts.SymbolFlags.TypeAlias)) + ) + return null; + // getFullyQualifiedName returns e.g. `"./path/to/module".ClassName` for + // imported symbols — unique across modules even when short names collide. + const qualifiedName = checker.getFullyQualifiedName(symbol); + return { shortName, qualifiedName }; + } catch { + return null; + } +} diff --git a/tests/unit/ts-resolver.test.ts b/tests/unit/ts-resolver.test.ts new file mode 100644 index 00000000..f60e2b36 --- /dev/null +++ b/tests/unit/ts-resolver.test.ts @@ -0,0 +1,261 @@ +/** + * Unit tests for ts-resolver Phase 8.2 parity backfill. + * + * Verifies that enrichTypeMapWithTsc populates returnTypeMap and callAssignments + * when they are undefined (simulating the native engine path that doesn't run + * the JS extractor). Also verifies that existing returnTypeMap data (WASM path) + * is not overwritten. + */ +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { enrichTypeMapWithTsc } from '../../src/domain/graph/resolver/ts-resolver.js'; +import type { ExtractorOutput } from '../../src/types.js'; + +function makeTmpDir(): string { + return fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-ts-resolver-test-')); +} + +function makeFileSymbols( + relPath: string, + returnTypeMap?: ExtractorOutput['returnTypeMap'], +): Map { + const fileSymbols = new Map(); + fileSymbols.set(relPath, { + definitions: [], + calls: [], + imports: [], + classes: [], + exports: [], + typeMap: new Map(), + returnTypeMap, + callAssignments: returnTypeMap !== undefined ? [] : undefined, + }); + return fileSymbols; +} + +describe('enrichTypeMapWithTsc Phase 8.2 backfill', () => { + let tmpDir: string; + + beforeEach(() => { + tmpDir = makeTmpDir(); + // Minimal tsconfig that includes all .ts files in the directory + fs.writeFileSync( + path.join(tmpDir, 'tsconfig.json'), + JSON.stringify({ compilerOptions: { strict: false }, include: ['./**/*.ts'] }), + ); + }); + + afterEach(() => { + fs.rmSync(tmpDir, { recursive: true, force: true }); + }); + + it('backfills returnTypeMap for function declarations when undefined (native engine path)', async () => { + const srcFile = 'service.ts'; + fs.writeFileSync( + path.join(tmpDir, srcFile), + ` +class User { name: string = ''; } +function createUser(): User { return new User(); } +`, + ); + + const fileSymbols = makeFileSymbols(srcFile); // returnTypeMap = undefined + await enrichTypeMapWithTsc(tmpDir, fileSymbols); + + const symbols = fileSymbols.get(srcFile)!; + expect(symbols.returnTypeMap).toBeInstanceOf(Map); + expect(symbols.returnTypeMap!.get('createUser')).toEqual({ type: 'User', confidence: 1.0 }); + }); + + it('backfills returnTypeMap for qualified method names', async () => { + const srcFile = 'repo.ts'; + fs.writeFileSync( + path.join(tmpDir, srcFile), + ` +class Profile {} +class UserService { + getProfile(): Profile { return new Profile(); } +} +`, + ); + + const fileSymbols = makeFileSymbols(srcFile); + await enrichTypeMapWithTsc(tmpDir, fileSymbols); + + const symbols = fileSymbols.get(srcFile)!; + expect(symbols.returnTypeMap!.get('UserService.getProfile')).toEqual({ + type: 'Profile', + confidence: 1.0, + }); + }); + + it('backfills returnTypeMap for arrow function variable initialisers', async () => { + const srcFile = 'factory.ts'; + fs.writeFileSync( + path.join(tmpDir, srcFile), + ` +class Widget {} +const makeWidget = (): Widget => new Widget(); +`, + ); + + const fileSymbols = makeFileSymbols(srcFile); + await enrichTypeMapWithTsc(tmpDir, fileSymbols); + + const symbols = fileSymbols.get(srcFile)!; + expect(symbols.returnTypeMap!.get('makeWidget')).toEqual({ type: 'Widget', confidence: 1.0 }); + }); + + it('backfills callAssignments for call-expression variable assignments', async () => { + const srcFile = 'consumer.ts'; + fs.writeFileSync( + path.join(tmpDir, srcFile), + ` +declare function getRepo(): any; +const repo = getRepo(); +`, + ); + + const fileSymbols = makeFileSymbols(srcFile); + await enrichTypeMapWithTsc(tmpDir, fileSymbols); + + const symbols = fileSymbols.get(srcFile)!; + expect(symbols.callAssignments).toBeDefined(); + const ca = symbols.callAssignments!.find((c) => c.varName === 'repo'); + expect(ca).toBeDefined(); + expect(ca!.calleeName).toBe('getRepo'); + }); + + it('excludes ambiguous callAssignments where same varName maps to different callees', async () => { + const srcFile = 'ambiguous.ts'; + fs.writeFileSync( + path.join(tmpDir, srcFile), + ` +declare function getA(): any; +declare function getB(): any; +declare function getUnique(): any; + +class MyService { + methodOne() { + const result = getA(); + return result; + } + methodTwo() { + // Same varName 'result' but different callee — should be excluded as ambiguous + const result = getB(); + return result; + } +} + +// Unambiguous: only one binding across the file +const unique = getUnique(); +`, + ); + + const fileSymbols = makeFileSymbols(srcFile); + await enrichTypeMapWithTsc(tmpDir, fileSymbols); + + const symbols = fileSymbols.get(srcFile)!; + expect(symbols.callAssignments).toBeDefined(); + // 'result' is ambiguous (getA in one method, getB in another) — must be excluded + const resultEntries = symbols.callAssignments!.filter((c) => c.varName === 'result'); + expect(resultEntries).toHaveLength(0); + // 'unique' is unambiguous — must be included + const uniqueEntry = symbols.callAssignments!.find((c) => c.varName === 'unique'); + expect(uniqueEntry).toBeDefined(); + expect(uniqueEntry!.calleeName).toBe('getUnique'); + }); + + it('does NOT overwrite returnTypeMap when already set (JS/WASM engine path)', async () => { + const srcFile = 'existing.ts'; + fs.writeFileSync( + path.join(tmpDir, srcFile), + ` +class Foo {} +function makeFoo(): Foo { return new Foo(); } +`, + ); + + const preExisting = new Map([['makeFoo', { type: 'OriginalType', confidence: 0.85 }]]); + const fileSymbols = makeFileSymbols(srcFile, preExisting); // returnTypeMap already set + await enrichTypeMapWithTsc(tmpDir, fileSymbols); + + const symbols = fileSymbols.get(srcFile)!; + // returnTypeMap should be the same object (not replaced) + expect(symbols.returnTypeMap).toBe(preExisting); + expect(symbols.returnTypeMap!.get('makeFoo')).toEqual({ + type: 'OriginalType', + confidence: 0.85, + }); + }); + + it('backfills returnTypeMap for async functions by unwrapping Promise', async () => { + const srcFile = 'async-service.ts'; + fs.writeFileSync( + path.join(tmpDir, srcFile), + ` +class Order {} +async function fetchOrder(): Promise { return new Order(); } +class OrderService { + async loadOrder(): Promise { return new Order(); } +} +`, + ); + + const fileSymbols = makeFileSymbols(srcFile); + await enrichTypeMapWithTsc(tmpDir, fileSymbols); + + const symbols = fileSymbols.get(srcFile)!; + // Async functions must be unwrapped — Promise itself is in SKIP_TYPE_NAMES + expect(symbols.returnTypeMap!.get('fetchOrder')).toEqual({ type: 'Order', confidence: 1.0 }); + expect(symbols.returnTypeMap!.get('OrderService.loadOrder')).toEqual({ + type: 'Order', + confidence: 1.0, + }); + }); + + it('does NOT capture local (method-body-scoped) helper functions in returnTypeMap', async () => { + const srcFile = 'nested.ts'; + fs.writeFileSync( + path.join(tmpDir, srcFile), + ` +class InnerResult {} +class MyService { + doWork(): void { + // This local helper must NOT appear in returnTypeMap under the bare name 'helper' + const helper = (): InnerResult => new InnerResult(); + helper(); + } +} +`, + ); + + const fileSymbols = makeFileSymbols(srcFile); + await enrichTypeMapWithTsc(tmpDir, fileSymbols); + + const symbols = fileSymbols.get(srcFile)!; + // 'helper' is local to MyService.doWork — must not pollute returnTypeMap + expect(symbols.returnTypeMap!.has('helper')).toBe(false); + }); + + it('skips non-TS files even when returnTypeMap is undefined', async () => { + const fileSymbols = new Map(); + fileSymbols.set('index.js', { + definitions: [], + calls: [], + imports: [], + classes: [], + exports: [], + typeMap: new Map(), + returnTypeMap: undefined, + }); + + await enrichTypeMapWithTsc(tmpDir, fileSymbols); + + // JS files are not processed — returnTypeMap stays undefined + const symbols = fileSymbols.get('index.js')!; + expect(symbols.returnTypeMap).toBeUndefined(); + }); +}); From 11a1126fd91bf69f38aef66fa6f9cbce62b49244 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Tue, 2 Jun 2026 01:08:20 -0600 Subject: [PATCH 05/14] feat(resolver): field-based points-to analysis for higher-order calls (phase 8.3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements a per-file Andersen-style constraint solver that resolves higher-order function calls missed by the existing callback heuristic. The extractor now records FnRefBinding entries (const fn = handler, const fn = obj.method) via handleVarDeclaratorTypeMap. In the JS edge- building path, buildPointsToMapForFile seeds the pts map from local definitions and imported names, then propagates alias constraints to fixed-point (≤50 iterations). When resolveCallTargets returns empty for a dynamic, receiver-less call, resolveViaPointsTo is tried as a fallback and the resolved alias goes back through the normal resolution chain — so cross-module flows work transparently via importedNames. Confidence on pts-resolved edges is penalised by PROPAGATION_HOP_PENALTY (0.1) to reflect the indirection. The fallback is strictly additive and conservative: it only fires when primary resolution already failed, so no existing edges are affected. Expected impact: +5–10pp on caller coverage for callback-heavy code (Express/Koa middleware, Node.js EventEmitter patterns, strategy objects). Impact: 14 functions changed, 12 affected --- .../graph/builder/stages/build-edges.ts | 49 ++++++++ src/domain/graph/resolver/points-to.ts | 105 ++++++++++++++++++ src/extractors/javascript.ts | 34 +++++- src/types.ts | 20 ++++ 4 files changed, 205 insertions(+), 3 deletions(-) create mode 100644 src/domain/graph/resolver/points-to.ts diff --git a/src/domain/graph/builder/stages/build-edges.ts b/src/domain/graph/builder/stages/build-edges.ts index ed895daf..18506d1b 100644 --- a/src/domain/graph/builder/stages/build-edges.ts +++ b/src/domain/graph/builder/stages/build-edges.ts @@ -22,6 +22,8 @@ import type { TypeMapEntry, } from '../../../../types.js'; import { computeConfidence } from '../../resolve.js'; +import type { PointsToMap } from '../../resolver/points-to.js'; +import { buildPointsToMap, resolveViaPointsTo } from '../../resolver/points-to.js'; import { enrichTypeMapWithTsc } from '../../resolver/ts-resolver.js'; import { type CallNodeLookup, @@ -562,6 +564,7 @@ function buildCallEdgesJS( const importedNames = buildImportedNamesMap(ctx, relPath, symbols, rootDir); const typeMap: Map = symbols.typeMap || new Map(); const seenCallEdges = new Set(); + const ptsMap = buildPointsToMapForFile(symbols, importedNames); buildFileCallEdges( relPath, @@ -572,6 +575,7 @@ function buildCallEdgesJS( lookup, allEdgeRows, typeMap, + ptsMap, ); buildClassHierarchyEdges(ctx, relPath, symbols, allEdgeRows); } @@ -616,6 +620,23 @@ function makeContextLookup(ctx: PipelineContext, getNodeIdStmt: NodeIdStmt): Cal }; } +/** + * Build a per-file points-to map for Phase 8.3 alias resolution. + * Returns null fast when the file has no function-reference bindings. + */ +function buildPointsToMapForFile( + symbols: ExtractorOutput, + importedNames: Map, +): PointsToMap | null { + if (!symbols.fnRefBindings?.length) return null; + const defNames = new Set( + symbols.definitions + .filter((d) => d.kind === 'function' || d.kind === 'method') + .map((d) => d.name), + ); + return buildPointsToMap(symbols.fnRefBindings, defNames, importedNames); +} + function buildFileCallEdges( relPath: string, symbols: ExtractorOutput, @@ -625,6 +646,7 @@ function buildFileCallEdges( lookup: CallNodeLookup, allEdgeRows: EdgeRowTuple[], typeMap: Map, + ptsMap?: PointsToMap | null, ): void { for (const call of symbols.calls) { if (call.receiver && BUILTIN_RECEIVERS.has(call.receiver)) continue; @@ -648,6 +670,33 @@ function buildFileCallEdges( } } + // Phase 8.3: points-to fallback for unresolved dynamic identifier calls. + // When primary resolution finds nothing and the call is flagged dynamic (i.e. + // it was emitted by extractCallbackReferenceCalls as a named function reference), + // check whether the call name is an alias in the pts map and retry resolution + // with each concrete target. Confidence is penalised by one hop to reflect the + // extra indirection. + if (targets.length === 0 && call.dynamic && !call.receiver && ptsMap) { + for (const alias of resolveViaPointsTo(call.name, ptsMap)) { + const { targets: aliasTargets, importedFrom: aliasFrom } = resolveCallTargets( + lookup, + { name: alias }, + relPath, + importedNames, + typeMap as Map, + ); + for (const t of aliasTargets) { + const edgeKey = `${caller.id}|${t.id}`; + if (t.id !== caller.id && !seenCallEdges.has(edgeKey)) { + seenCallEdges.add(edgeKey); + const conf = + computeConfidence(relPath, t.file, aliasFrom ?? null) - PROPAGATION_HOP_PENALTY; + if (conf > 0) allEdgeRows.push([caller.id, t.id, 'calls', conf, isDynamic]); + } + } + } + } + if ( call.receiver && !BUILTIN_RECEIVERS.has(call.receiver) && diff --git a/src/domain/graph/resolver/points-to.ts b/src/domain/graph/resolver/points-to.ts new file mode 100644 index 00000000..fef570c2 --- /dev/null +++ b/src/domain/graph/resolver/points-to.ts @@ -0,0 +1,105 @@ +/** + * Phase 8.3 — Lightweight field-based points-to analysis for JS/TS. + * + * Resolves higher-order function calls where a named variable is an alias for + * a function that the syntactic extractor cannot connect directly. Common + * patterns resolved: + * + * const fn = handler; arr.map(fn) → edge to handler + * const fn = obj.method; router.use(fn) → edge to obj.method + * const m = authMiddleware; app.use(m) → edge to authMiddleware + * + * Algorithm: Andersen-style inclusion-based analysis with allocation-site + * abstraction and fixed-point constraint propagation. + * + * Field-based (not field-sensitive): all instances of obj.field are treated as + * a single abstract location, matching ACG's sweet spot of 99% precision. + * + * Scope: intra-module + cross-module via importedNames (the importedNames map + * that build-edges.ts already builds per file is the cross-module link — if + * a variable aliases an imported name, resolveCallTargets follows it). + */ +import type { FnRefBinding } from '../../../types.js'; + +export type PointsToMap = Map>; + +/** Maximum fixed-point iterations before bailing out (prevents divergence). */ +const MAX_SOLVER_ITERATIONS = 50; + +/** + * Build a points-to map for one file. + * + * Seeds concrete function names (locally-defined functions + imported names), + * then propagates assignments through fixed-point iteration until stable. + * + * Each "concrete target" in a pts set is a name that `resolveCallTargets` can + * look up — either a locally-defined function name (found via byNameAndFile) or + * an imported name (found via importedNames → byNameAndFile in the source file). + * + * @param fnRefBindings - identifier/member-expr bindings from the extractor + * @param definitionNames - locally-defined callable names in this file + * @param importedNames - names imported into this file (name → resolved file) + */ +export function buildPointsToMap( + fnRefBindings: readonly FnRefBinding[], + definitionNames: ReadonlySet, + importedNames: ReadonlyMap, +): PointsToMap { + const pts: PointsToMap = new Map(); + + // Seed: each locally-defined function points to itself. + for (const name of definitionNames) { + pts.set(name, new Set([name])); + } + + // Seed: each imported name points to itself (importedNames resolves it to + // the source file when resolveCallTargets is called with that name). + for (const name of importedNames.keys()) { + if (!pts.has(name)) pts.set(name, new Set([name])); + } + + if (fnRefBindings.length === 0) return pts; + + // Build constraint list: pts(lhs) ⊇ pts(rhsKey). + // For member expressions (const fn = obj.method), key is "obj.method". + // These composite keys won't be in pts unless a prior statement seeded them + // (e.g. handlers.auth = authMiddleware); they produce no flow otherwise, + // which is safe — no false edges. + const constraints: Array<{ lhs: string; rhsKey: string }> = fnRefBindings.map((b) => ({ + lhs: b.lhs, + rhsKey: b.rhsReceiver ? `${b.rhsReceiver}.${b.rhs}` : b.rhs, + })); + + // Fixed-point iteration: propagate pts sets until no new information flows. + for (let iter = 0; iter < MAX_SOLVER_ITERATIONS; iter++) { + let changed = false; + for (const { lhs, rhsKey } of constraints) { + const rhsPts = pts.get(rhsKey); + if (!rhsPts || rhsPts.size === 0) continue; + let lhsPts = pts.get(lhs); + if (!lhsPts) { + lhsPts = new Set(); + pts.set(lhs, lhsPts); + } + const before = lhsPts.size; + for (const target of rhsPts) lhsPts.add(target); + if (lhsPts.size !== before) changed = true; + } + if (!changed) break; + } + + return pts; +} + +/** + * Return the concrete function names that `callName` flows to, excluding + * itself to prevent circular self-reference edges. + * + * Returns an empty array when callName is not in the pts map (i.e., it is + * not an alias — the caller should fall back to normal resolution failure). + */ +export function resolveViaPointsTo(callName: string, pts: PointsToMap): string[] { + const targets = pts.get(callName); + if (!targets) return []; + return [...targets].filter((t) => t !== callName); +} diff --git a/src/extractors/javascript.ts b/src/extractors/javascript.ts index 59dbcb40..8b1e0265 100644 --- a/src/extractors/javascript.ts +++ b/src/extractors/javascript.ts @@ -6,6 +6,7 @@ import type { Definition, Export, ExtractorOutput, + FnRefBinding, Import, SubDeclaration, TreeSitterNode, @@ -314,6 +315,7 @@ function extractSymbolsQuery(tree: TreeSitterTree, query: TreeSitterQuery): Extr const typeMap: Map = new Map(); const returnTypeMap: Map = new Map(); const callAssignments: CallAssignment[] = []; + const fnRefBindings: FnRefBinding[] = []; const matches = query.matches(tree.rootNode); @@ -334,7 +336,7 @@ function extractSymbolsQuery(tree: TreeSitterTree, query: TreeSitterQuery): Extr extractReturnTypeMapWalk(tree.rootNode, returnTypeMap); // Extract typeMap with intra-file return-type propagation - extractTypeMapWalk(tree.rootNode, typeMap, returnTypeMap, callAssignments); + extractTypeMapWalk(tree.rootNode, typeMap, returnTypeMap, callAssignments, fnRefBindings); // Extract definitions from destructured bindings (query patterns don't match object_pattern) extractDestructuredBindingsWalk(tree.rootNode, definitions); @@ -348,6 +350,7 @@ function extractSymbolsQuery(tree: TreeSitterTree, query: TreeSitterQuery): Extr typeMap, returnTypeMap, callAssignments, + fnRefBindings, }; } @@ -578,13 +581,20 @@ function extractSymbolsWalk(tree: TreeSitterTree): ExtractorOutput { typeMap: new Map(), returnTypeMap: new Map(), callAssignments: [], + fnRefBindings: [], }; walkJavaScriptNode(tree.rootNode, ctx); // Phase 8.2: Extract function return types first so propagation can use them extractReturnTypeMapWalk(tree.rootNode, ctx.returnTypeMap!); // Populate typeMap with type annotations and intra-file return-type propagation - extractTypeMapWalk(tree.rootNode, ctx.typeMap!, ctx.returnTypeMap, ctx.callAssignments); + extractTypeMapWalk( + tree.rootNode, + ctx.typeMap!, + ctx.returnTypeMap, + ctx.callAssignments, + ctx.fnRefBindings, + ); return ctx; } @@ -1359,12 +1369,13 @@ function extractTypeMapWalk( typeMap: Map, returnTypeMap?: Map, callAssignments?: CallAssignment[], + fnRefBindings?: FnRefBinding[], ): void { function walk(node: TreeSitterNode, depth: number): void { if (depth >= MAX_WALK_DEPTH) return; const t = node.type; if (t === 'variable_declarator') { - handleVarDeclaratorTypeMap(node, typeMap, returnTypeMap, callAssignments); + handleVarDeclaratorTypeMap(node, typeMap, returnTypeMap, callAssignments, fnRefBindings); } else if (t === 'required_parameter' || t === 'optional_parameter') { handleParamTypeMap(node, typeMap); } @@ -1381,6 +1392,7 @@ function handleVarDeclaratorTypeMap( typeMap: Map, returnTypeMap?: Map, callAssignments?: CallAssignment[], + fnRefBindings?: FnRefBinding[], ): void { const nameN = node.childForFieldName('name'); if (!nameN || nameN.type !== 'identifier') return; @@ -1388,6 +1400,22 @@ function handleVarDeclaratorTypeMap( const typeAnno = findChild(node, 'type_annotation'); const valueN = node.childForFieldName('value'); + // Phase 8.3: record function-reference bindings before any type-analysis early returns. + // Captures `const fn = handler` (identifier) and `const fn = obj.method` (member_expression). + // call_expression and new_expression are intentionally excluded — those are handled by + // Phase 8.2 callAssignments and the constructor type-map respectively. + if (fnRefBindings && valueN) { + if (valueN.type === 'identifier' && !BUILTIN_GLOBALS.has(valueN.text)) { + fnRefBindings.push({ lhs: nameN.text, rhs: valueN.text }); + } else if (valueN.type === 'member_expression') { + const prop = valueN.childForFieldName('property'); + const obj = valueN.childForFieldName('object'); + if (prop && obj?.type === 'identifier' && !BUILTIN_GLOBALS.has(obj.text)) { + fnRefBindings.push({ lhs: nameN.text, rhs: prop.text, rhsReceiver: obj.text }); + } + } + } + // Constructor on the same declaration wins over annotation: the runtime type is // what matters for call resolution (e.g. `const x: Base = new Derived()` should // resolve `x.render()` to `Derived.render`, not `Base.render`). diff --git a/src/types.ts b/src/types.ts index edc52ffb..5ad43afd 100644 --- a/src/types.ts +++ b/src/types.ts @@ -536,6 +536,20 @@ export interface CallAssignment { receiverTypeName?: string; } +/** + * A function-reference binding recorded during extraction for points-to analysis (Phase 8.3). + * Captures `const fn = handler` or `const fn = obj.method` patterns where the right-hand + * side is a named function reference (not a call expression or literal). + */ +export interface FnRefBinding { + /** Variable being assigned (the left-hand side identifier). */ + lhs: string; + /** Named function/property on the right-hand side. */ + rhs: string; + /** If rhs is a member expression (obj.method), the receiver object name. */ + rhsReceiver?: string; +} + /** The normalized output shape returned by every language extractor. */ export interface ExtractorOutput { definitions: Definition[]; @@ -555,6 +569,12 @@ export interface ExtractorOutput { * per-file returnTypeMap. Consumed by build-edges.ts to propagate cross-file return types. */ callAssignments?: CallAssignment[]; + /** + * Function-reference bindings for points-to analysis (Phase 8.3). + * Records `const fn = handler` and `const fn = obj.method` patterns so the + * edge builder can follow aliases when a call target has no direct definition. + */ + fnRefBindings?: FnRefBinding[]; /** WASM tree retained for downstream analysis (complexity, CFG, dataflow). */ _tree?: TreeSitterTree; /** Language identifier. */ From a37c55a570a7f5c150cbb9d25331982464d6d267 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Tue, 2 Jun 2026 01:14:54 -0600 Subject: [PATCH 06/14] fix(resolver): address review feedback on Phase 8.3 points-to analysis (#1289) - Guard member_expression property binding to static property access only (property_identifier | identifier), preventing computed subscript expressions like obj[expr] from producing pts keys that can never match - Add explanatory comment to the partial Call object passed to resolveCallTargets in the pts alias fallback path (only name is needed for alias resolution) - Add DEFAULTS.analysis.pointsToMaxIterations (50) and document the TODO to wire MAX_SOLVER_ITERATIONS through config alongside typePropagationDepth - Document that callable-only seeding in buildPointsToMapForFile is intentional: class aliases are handled by Phase 8.2 call-assignment propagation - Update config unit test to include the new pointsToMaxIterations field - File issue #1293 to track native Rust engine parity for fn-ref bindings --- src/domain/graph/builder/stages/build-edges.ts | 11 ++++++++++- src/domain/graph/resolver/points-to.ts | 7 ++++++- src/extractors/javascript.ts | 10 +++++++++- src/infrastructure/config.ts | 10 +++++++--- src/types.ts | 2 ++ tests/unit/config.test.ts | 1 + 6 files changed, 35 insertions(+), 6 deletions(-) diff --git a/src/domain/graph/builder/stages/build-edges.ts b/src/domain/graph/builder/stages/build-edges.ts index 18506d1b..d442faef 100644 --- a/src/domain/graph/builder/stages/build-edges.ts +++ b/src/domain/graph/builder/stages/build-edges.ts @@ -623,6 +623,12 @@ function makeContextLookup(ctx: PipelineContext, getNodeIdStmt: NodeIdStmt): Cal /** * Build a per-file points-to map for Phase 8.3 alias resolution. * Returns null fast when the file has no function-reference bindings. + * + * Only callable definitions (function/method) are seeded as concrete targets. + * Class and interface names are intentionally excluded — aliasing a constructor + * (`const Svc = MyService`) is an uncommon pattern that would require tracking + * `new`-expression flows separately from the alias chain. That is left to Phase + * 8.2 call-assignment propagation, which already handles constructor assignments. */ function buildPointsToMapForFile( symbols: ExtractorOutput, @@ -678,9 +684,12 @@ function buildFileCallEdges( // extra indirection. if (targets.length === 0 && call.dynamic && !call.receiver && ptsMap) { for (const alias of resolveViaPointsTo(call.name, ptsMap)) { + // Resolve the concrete alias target. Only `name` is needed here — receiver + // and line are not relevant for alias resolution (we are looking up the + // aliased function by name, not dispatching a method call). const { targets: aliasTargets, importedFrom: aliasFrom } = resolveCallTargets( lookup, - { name: alias }, + { name: alias, dynamic: false }, relPath, importedNames, typeMap as Map, diff --git a/src/domain/graph/resolver/points-to.ts b/src/domain/graph/resolver/points-to.ts index fef570c2..2e688fda 100644 --- a/src/domain/graph/resolver/points-to.ts +++ b/src/domain/graph/resolver/points-to.ts @@ -23,7 +23,12 @@ import type { FnRefBinding } from '../../../types.js'; export type PointsToMap = Map>; -/** Maximum fixed-point iterations before bailing out (prevents divergence). */ +/** + * Maximum fixed-point iterations before bailing out (prevents divergence). + * Mirrors `DEFAULTS.analysis.pointsToMaxIterations` in config.ts. + * TODO(Phase 8.3): thread config through buildPointsToMap so this can be tuned + * per-repo via `.codegraphrc.json` (tracked alongside typePropagationDepth). + */ const MAX_SOLVER_ITERATIONS = 50; /** diff --git a/src/extractors/javascript.ts b/src/extractors/javascript.ts index 8b1e0265..7c3cbe4c 100644 --- a/src/extractors/javascript.ts +++ b/src/extractors/javascript.ts @@ -1410,7 +1410,15 @@ function handleVarDeclaratorTypeMap( } else if (valueN.type === 'member_expression') { const prop = valueN.childForFieldName('property'); const obj = valueN.childForFieldName('object'); - if (prop && obj?.type === 'identifier' && !BUILTIN_GLOBALS.has(obj.text)) { + // Guard: only static property access (property_identifier or identifier), not + // computed subscript expressions like obj[expr] where prop.text would be the + // full expression rather than a simple name — those can never match pts keys. + if ( + prop && + (prop.type === 'property_identifier' || prop.type === 'identifier') && + obj?.type === 'identifier' && + !BUILTIN_GLOBALS.has(obj.text) + ) { fnRefBindings.push({ lhs: nameN.text, rhs: prop.text, rhsReceiver: obj.text }); } } diff --git a/src/infrastructure/config.ts b/src/infrastructure/config.ts index 2b482893..645fdd5a 100644 --- a/src/infrastructure/config.ts +++ b/src/infrastructure/config.ts @@ -80,10 +80,14 @@ export const DEFAULTS = { briefImporterDepth: 5, briefHighRiskCallers: 10, briefMediumRiskCallers: 3, - // TODO(Phase 8.3): wire this into PROPAGATION_HOP_PENALTY / MAX_PROPAGATION_DEPTH once - // config is threaded through to extractSymbols. Currently the depth is controlled by - // the hardcoded MAX_PROPAGATION_DEPTH constant in src/extractors/javascript.ts. + // TODO(Phase 8.3): wire these into the points-to solver and type-propagation path + // once config is threaded through to extractSymbols / buildPointsToMap. Currently + // controlled by hardcoded constants in src/extractors/javascript.ts + // (MAX_PROPAGATION_DEPTH, PROPAGATION_HOP_PENALTY) and in + // src/domain/graph/resolver/points-to.ts (MAX_SOLVER_ITERATIONS). typePropagationDepth: 3, + /** Maximum fixed-point iterations for the Phase 8.3 points-to solver. */ + pointsToMaxIterations: 50, }, community: { resolution: 1.0, diff --git a/src/types.ts b/src/types.ts index 5ad43afd..3d3f3339 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1243,6 +1243,8 @@ export interface CodegraphConfig { briefMediumRiskCallers: number; /** Maximum chain depth for inter-procedural return-type propagation (Phase 8.2). */ typePropagationDepth: number; + /** Maximum fixed-point iterations for the Phase 8.3 points-to solver. */ + pointsToMaxIterations: number; }; community: { diff --git a/tests/unit/config.test.ts b/tests/unit/config.test.ts index 91f62c7e..4e9052c1 100644 --- a/tests/unit/config.test.ts +++ b/tests/unit/config.test.ts @@ -96,6 +96,7 @@ describe('DEFAULTS', () => { briefHighRiskCallers: 10, briefMediumRiskCallers: 3, typePropagationDepth: 3, + pointsToMaxIterations: 50, }); }); From 7071a9596231f5fed079b00d8dd5ab1a25dcc603 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Tue, 2 Jun 2026 17:56:57 -0600 Subject: [PATCH 07/14] fix(resolver): remove unknown 'dynamic' field from resolveCallTargets call object (#1289) --- src/domain/graph/builder/stages/build-edges.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/domain/graph/builder/stages/build-edges.ts b/src/domain/graph/builder/stages/build-edges.ts index d442faef..58734b55 100644 --- a/src/domain/graph/builder/stages/build-edges.ts +++ b/src/domain/graph/builder/stages/build-edges.ts @@ -689,7 +689,7 @@ function buildFileCallEdges( // aliased function by name, not dispatching a method call). const { targets: aliasTargets, importedFrom: aliasFrom } = resolveCallTargets( lookup, - { name: alias, dynamic: false }, + { name: alias }, relPath, importedNames, typeMap as Map, From d794d47271a2deb11463e5a3bcab5322c28dec21 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Tue, 2 Jun 2026 22:17:36 -0600 Subject: [PATCH 08/14] fix(resolver): prevent pts-resolved edge from preempting higher-confidence direct-call edge (#1289) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A pts fallback edge for caller→target (confidence C-0.1) was added to seenCallEdges before the primary loop could process a later direct call to the same target. The direct call was silently dropped by the dedup guard, leaving the graph with a penalised confidence instead of the correct full-confidence edge. Fix: pts edges are tracked in a separate ptsEdgeRows map (edgeKey → allEdgeRows index) instead of seenCallEdges. When the primary loop later resolves the same caller→target pair directly, it upgrades the existing row's confidence in-place and promotes the key to seenCallEdges. --- .../graph/builder/stages/build-edges.ts | 36 +++++++++++++++---- 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/src/domain/graph/builder/stages/build-edges.ts b/src/domain/graph/builder/stages/build-edges.ts index 58734b55..2169aa70 100644 --- a/src/domain/graph/builder/stages/build-edges.ts +++ b/src/domain/graph/builder/stages/build-edges.ts @@ -654,6 +654,13 @@ function buildFileCallEdges( typeMap: Map, ptsMap?: PointsToMap | null, ): void { + // Tracks edges that were inserted by the pts fallback (edgeKey → allEdgeRows index). + // Kept separate from seenCallEdges so that a subsequent direct-call edge for the same + // caller→target pair can upgrade the confidence in-place rather than being silently + // dropped by the dedup guard. Once upgraded, the key moves to seenCallEdges and is + // no longer tracked here. + const ptsEdgeRows = new Map(); + for (const call of symbols.calls) { if (call.receiver && BUILTIN_RECEIVERS.has(call.receiver)) continue; @@ -669,10 +676,21 @@ function buildFileCallEdges( for (const t of targets) { const edgeKey = `${caller.id}|${t.id}`; - if (t.id !== caller.id && !seenCallEdges.has(edgeKey)) { - seenCallEdges.add(edgeKey); + if (t.id !== caller.id) { const confidence = computeConfidence(relPath, t.file, importedFrom ?? null); - allEdgeRows.push([caller.id, t.id, 'calls', confidence, isDynamic]); + if (seenCallEdges.has(edgeKey)) continue; + const ptsIdx = ptsEdgeRows.get(edgeKey); + if (ptsIdx !== undefined) { + // A pts-resolved edge already exists for this caller→target pair with a + // penalised confidence. Upgrade it to the direct-call confidence in-place, + // then promote to seenCallEdges so no further processing is needed. + allEdgeRows[ptsIdx][3] = confidence; + ptsEdgeRows.delete(edgeKey); + seenCallEdges.add(edgeKey); + } else { + seenCallEdges.add(edgeKey); + allEdgeRows.push([caller.id, t.id, 'calls', confidence, isDynamic]); + } } } @@ -682,6 +700,10 @@ function buildFileCallEdges( // check whether the call name is an alias in the pts map and retry resolution // with each concrete target. Confidence is penalised by one hop to reflect the // extra indirection. + // + // Note: pts edges are added to ptsEdgeRows (not seenCallEdges) so that a later + // direct call to the same target in the same function body can upgrade confidence + // rather than being silently dropped by the dedup guard. if (targets.length === 0 && call.dynamic && !call.receiver && ptsMap) { for (const alias of resolveViaPointsTo(call.name, ptsMap)) { // Resolve the concrete alias target. Only `name` is needed here — receiver @@ -696,11 +718,13 @@ function buildFileCallEdges( ); for (const t of aliasTargets) { const edgeKey = `${caller.id}|${t.id}`; - if (t.id !== caller.id && !seenCallEdges.has(edgeKey)) { - seenCallEdges.add(edgeKey); + if (t.id !== caller.id && !seenCallEdges.has(edgeKey) && !ptsEdgeRows.has(edgeKey)) { const conf = computeConfidence(relPath, t.file, aliasFrom ?? null) - PROPAGATION_HOP_PENALTY; - if (conf > 0) allEdgeRows.push([caller.id, t.id, 'calls', conf, isDynamic]); + if (conf > 0) { + ptsEdgeRows.set(edgeKey, allEdgeRows.length); + allEdgeRows.push([caller.id, t.id, 'calls', conf, isDynamic]); + } } } } From 8019fe485cbd962ef708408464dd22216026f841 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Wed, 3 Jun 2026 00:00:21 -0600 Subject: [PATCH 09/14] fix(resolver): fix TS2532 and upgrade is_dynamic flag when promoting pts edge (#1289) Two bugs in the pts-edge upgrade path introduced by d794d47: 1. allEdgeRows[ptsIdx][3] = confidence produced TS2532 (Object is possibly undefined) because TypeScript cannot prove that ptsIdx is a valid allEdgeRows index, even though we set the index ourselves just before pushing the row. Fix: assign to a local variable with an explicit undefined guard before writing. 2. When a direct call upgrades a pts-fallback edge, only the confidence (index 3) was written back; the isDynamic flag (index 4) kept the value 1 from the original alias call. Downstream consumers (diff-impact, risk classifiers) would then misclassify a confirmed direct call as a dynamic dispatch. Fix: write isDynamic to index 4 alongside confidence. --- src/domain/graph/builder/stages/build-edges.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/domain/graph/builder/stages/build-edges.ts b/src/domain/graph/builder/stages/build-edges.ts index 2169aa70..3699adeb 100644 --- a/src/domain/graph/builder/stages/build-edges.ts +++ b/src/domain/graph/builder/stages/build-edges.ts @@ -684,7 +684,11 @@ function buildFileCallEdges( // A pts-resolved edge already exists for this caller→target pair with a // penalised confidence. Upgrade it to the direct-call confidence in-place, // then promote to seenCallEdges so no further processing is needed. - allEdgeRows[ptsIdx][3] = confidence; + const ptsRow = allEdgeRows[ptsIdx]; + if (ptsRow) { + ptsRow[3] = confidence; + ptsRow[4] = isDynamic; // upgrade is_dynamic: direct call overrides the pts-alias dynamic flag + } ptsEdgeRows.delete(edgeKey); seenCallEdges.add(edgeKey); } else { From 40541c17cdd2fcefb3e9b4ca88be6d824f718fd3 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Wed, 3 Jun 2026 01:41:32 -0600 Subject: [PATCH 10/14] feat(resolver): extend pts analysis to native Rust path (#1290) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 8.3b: closes the parity gap introduced by #1289 where field-based points-to analysis only ran in the WASM/JS call-edge builder. **Rust changes (crates/codegraph-core)** - `types.rs`: add `FnRefBinding` napi struct; add `fn_ref_bindings` field to `FileSymbols` so the all-Rust orchestrator path can carry bindings through. - `extractors/javascript.rs`: extend `handle_var_decl` to detect `const alias = identifier` and `const alias = obj.method` patterns and push to `fn_ref_bindings`, mirroring the WASM extractor's `handleVarDeclaratorTypeMap`. - `edge_builder.rs`: add `build_points_to_map` / `resolve_via_points_to` (Andersen-style ≤50-iteration fixed-point solver, mirrors `points-to.ts`); add pts fallback in `process_file` for dynamic receiver-less calls with no primary targets; wire `fn_ref_bindings` into `FileEdgeInput`. - `build_pipeline.rs`: forward `FileSymbols.fn_ref_bindings` into `FileEdgeInput` for the all-Rust build pipeline path. **TypeScript changes** - `build-edges.ts`: add `fnRefBindings` to `NativeFileEntry` and serialize `symbols.fnRefBindings` when building the JS→Rust FFI entries. - `wasm-worker-entry.ts` / `wasm-worker-pool.ts` / `wasm-worker-protocol.ts`: propagate `fnRefBindings` through the WASM worker serialization boundary so the WASM pts path also works end-to-end (was silently dropped before). **Test** - `tests/integration/pts-parity.test.ts`: assert both engines emit a `processItems → handler` call edge for the `const alias = handler; arr.map(alias)` pattern. --- crates/codegraph-core/src/build_pipeline.rs | 5 + crates/codegraph-core/src/edge_builder.rs | 122 ++++++++++++++++++ .../src/extractors/javascript.rs | 19 +++ crates/codegraph-core/src/types.rs | 16 +++ .../graph/builder/stages/build-edges.ts | 3 + src/domain/wasm-worker-entry.ts | 1 + src/domain/wasm-worker-pool.ts | 1 + src/domain/wasm-worker-protocol.ts | 1 + tests/integration/pts-parity.test.ts | 105 +++++++++++++++ 9 files changed, 273 insertions(+) create mode 100644 tests/integration/pts-parity.test.ts diff --git a/crates/codegraph-core/src/build_pipeline.rs b/crates/codegraph-core/src/build_pipeline.rs index 0e5df13e..9c7ffc16 100644 --- a/crates/codegraph-core/src/build_pipeline.rs +++ b/crates/codegraph-core/src/build_pipeline.rs @@ -1382,6 +1382,11 @@ fn build_and_insert_call_edges( }) .collect(), type_map, + fn_ref_bindings: if symbols.fn_ref_bindings.is_empty() { + None + } else { + Some(symbols.fn_ref_bindings.clone()) + }, }); } diff --git a/crates/codegraph-core/src/edge_builder.rs b/crates/codegraph-core/src/edge_builder.rs index 863ddfbb..f696c0e7 100644 --- a/crates/codegraph-core/src/edge_builder.rs +++ b/crates/codegraph-core/src/edge_builder.rs @@ -4,6 +4,7 @@ use napi_derive::napi; use crate::barrel_resolution::{self, BarrelContext, ReexportRef}; use crate::import_resolution; +use crate::types::FnRefBinding; /// Kind sets for hierarchy edge resolution -- mirrors the JS constants in /// `build-edges.js` (`HIERARCHY_SOURCE_KINDS`, `EXTENDS_TARGET_KINDS`, @@ -13,6 +14,10 @@ const HIERARCHY_SOURCE_KINDS: &[&str] = &["class", "struct", "record", "enum"]; const EXTENDS_TARGET_KINDS: &[&str] = &["class", "struct", "trait", "record"]; const IMPLEMENTS_TARGET_KINDS: &[&str] = &["interface", "trait", "class"]; +/// Confidence penalty per alias hop — mirrors `PROPAGATION_HOP_PENALTY` in +/// `src/extractors/javascript.ts`. +const PROPAGATION_HOP_PENALTY: f64 = 0.1; + #[napi(object)] pub struct NodeInfo { pub id: u32, @@ -73,6 +78,9 @@ pub struct FileEdgeInput { pub classes: Vec, #[napi(js_name = "typeMap")] pub type_map: Vec, + /// Function-reference bindings for Phase 8.3 pts analysis (optional). + #[napi(js_name = "fnRefBindings")] + pub fn_ref_bindings: Option>, } #[napi(object)] @@ -120,6 +128,68 @@ impl<'a> EdgeContext<'a> { } } +// ── Phase 8.3: points-to analysis ───────────────────────────────────────── + +/// Build a per-file points-to map. Mirrors `buildPointsToMap` in +/// `src/domain/graph/resolver/points-to.ts`. +/// +/// Seeds every locally-defined callable and every imported name as +/// pointing to itself, then propagates assignments (`pts(lhs) ⊇ pts(rhs)`) +/// via fixed-point iteration. +fn build_points_to_map( + fn_ref_bindings: &[FnRefBinding], + def_names: &HashSet<&str>, + imported_names: &HashMap<&str, &str>, +) -> HashMap> { + let mut pts: HashMap> = HashMap::new(); + for name in def_names { + pts.entry(name.to_string()).or_default().insert(name.to_string()); + } + for name in imported_names.keys() { + pts.entry(name.to_string()).or_default().insert(name.to_string()); + } + if fn_ref_bindings.is_empty() { + return pts; + } + let constraints: Vec<(String, String)> = fn_ref_bindings.iter().map(|b| { + let rhs_key = match &b.rhs_receiver { + Some(recv) => format!("{}.{}", recv, b.rhs), + None => b.rhs.clone(), + }; + (b.lhs.clone(), rhs_key) + }).collect(); + for _ in 0..50 { + let mut changed = false; + for (lhs, rhs_key) in &constraints { + let rhs_pts: Option> = pts.get(rhs_key.as_str()) + .map(|s| s.iter().cloned().collect()); + if let Some(targets) = rhs_pts { + let entry = pts.entry(lhs.clone()).or_default(); + for t in targets { + if entry.insert(t) { changed = true; } + } + } + } + if !changed { break; } + } + pts +} + +/// Return the concrete targets `call_name` flows to, excluding self-references. +/// Mirrors `resolveViaPointsTo` in `src/domain/graph/resolver/points-to.ts`. +fn resolve_via_points_to<'a>( + call_name: &str, + pts: &'a HashMap>, +) -> Vec<&'a str> { + match pts.get(call_name) { + None => vec![], + Some(targets) => targets.iter() + .filter(|t| t.as_str() != call_name) + .map(|t| t.as_str()) + .collect(), + } +} + /// Build call, receiver, extends, and implements edges in Rust. /// /// Mirrors the algorithm in builder.js `buildEdges` transaction (call edges @@ -180,6 +250,17 @@ fn process_file<'a>( DefWithId { _name: &d.name, line: d.line, end_line: d.end_line.unwrap_or(u32::MAX), node_id } }).collect(); + // Phase 8.3: build pts map for alias resolution. + // Only callable (function/method) defs are seeded — mirrors JS buildPointsToMapForFile. + let pts_map: Option>> = + file_input.fn_ref_bindings.as_deref().filter(|b| !b.is_empty()).map(|bindings| { + let def_names: HashSet<&str> = file_input.definitions.iter() + .filter(|d| d.kind == "function" || d.kind == "method") + .map(|d| d.name.as_str()) + .collect(); + build_points_to_map(bindings, &def_names, &imported_names) + }); + let mut seen_edges: HashSet = HashSet::new(); for call in &file_input.calls { @@ -194,6 +275,47 @@ fn process_file<'a>( let mut targets = resolve_call_targets(ctx, call, rel_path, imported_from, &type_map); sort_targets_by_confidence(&mut targets, rel_path, imported_from); emit_call_edges(&targets, caller_id, is_dynamic, rel_path, imported_from, &mut seen_edges, edges); + + // Phase 8.3: pts fallback for unresolved dynamic identifier calls. + // When primary resolution finds nothing and the call is dynamic with no receiver, + // look up the call name in the pts map and retry resolution for each alias target. + // Confidence is penalised by one hop to reflect the extra indirection. + if targets.is_empty() && call.dynamic.unwrap_or(false) && call.receiver.is_none() { + if let Some(ref pts) = pts_map { + for alias in resolve_via_points_to(call.name.as_str(), pts) { + let alias_imported_from = imported_names.get(alias).copied(); + let alias_call = CallInfo { + name: alias.to_string(), + line: call.line, + dynamic: Some(true), + receiver: None, + }; + let mut alias_targets = resolve_call_targets( + ctx, &alias_call, rel_path, alias_imported_from, &type_map, + ); + sort_targets_by_confidence(&mut alias_targets, rel_path, alias_imported_from); + for t in &alias_targets { + let edge_key = ((caller_id as u64) << 32) | (t.id as u64); + if t.id != caller_id && !seen_edges.contains(&edge_key) { + seen_edges.insert(edge_key); + let conf = import_resolution::compute_confidence( + rel_path, &t.file, alias_imported_from, + ) - PROPAGATION_HOP_PENALTY; + if conf > 0.0 { + edges.push(ComputedEdge { + source_id: caller_id, + target_id: t.id, + kind: "calls".to_string(), + confidence: conf, + dynamic: is_dynamic, + }); + } + } + } + } + } + } + emit_receiver_edge(ctx, call, caller_id, rel_path, &type_map, &mut seen_edges, edges); } diff --git a/crates/codegraph-core/src/extractors/javascript.rs b/crates/codegraph-core/src/extractors/javascript.rs index ced07324..515b6eef 100644 --- a/crates/codegraph-core/src/extractors/javascript.rs +++ b/crates/codegraph-core/src/extractors/javascript.rs @@ -445,6 +445,25 @@ fn handle_var_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { cfg: None, children: None, }); + } else if name_n.kind() == "identifier" && value_n.kind() == "identifier" { + // Phase 8.3: `const alias = handler` — record for pts analysis. + symbols.fn_ref_bindings.push(FnRefBinding { + lhs: node_text(&name_n, source).to_string(), + rhs: node_text(&value_n, source).to_string(), + rhs_receiver: None, + }); + } else if name_n.kind() == "identifier" && value_n.kind() == "member_expression" { + // Phase 8.3: `const alias = obj.method` — record for pts analysis. + if let (Some(obj), Some(prop)) = ( + value_n.child_by_field_name("object"), + value_n.child_by_field_name("property"), + ) { + symbols.fn_ref_bindings.push(FnRefBinding { + lhs: node_text(&name_n, source).to_string(), + rhs: node_text(&prop, source).to_string(), + rhs_receiver: Some(node_text(&obj, source).to_string()), + }); + } } } } diff --git a/crates/codegraph-core/src/types.rs b/crates/codegraph-core/src/types.rs index 1936f535..ae40af75 100644 --- a/crates/codegraph-core/src/types.rs +++ b/crates/codegraph-core/src/types.rs @@ -306,6 +306,18 @@ pub struct NativeCallAssignment { pub receiver_type_name: Option, } +/// Function-reference binding for Phase 8.3 points-to analysis. +/// Records `const alias = fn` and `const alias = obj.method` patterns. +/// Mirrors the `FnRefBinding` interface in `src/types.ts`. +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct FnRefBinding { + pub lhs: String, + pub rhs: String, + #[napi(js_name = "rhsReceiver")] + pub rhs_receiver: Option, +} + #[napi(object)] #[derive(Debug, Clone, Serialize, Deserialize)] pub struct FileSymbols { @@ -326,6 +338,9 @@ pub struct FileSymbols { pub return_type_map: Vec, #[napi(js_name = "callAssignments")] pub call_assignments: Vec, + /// Phase 8.3: function-reference bindings for points-to analysis. + #[napi(js_name = "fnRefBindings")] + pub fn_ref_bindings: Vec, } impl FileSymbols { @@ -343,6 +358,7 @@ impl FileSymbols { type_map: Vec::new(), return_type_map: Vec::new(), call_assignments: Vec::new(), + fn_ref_bindings: Vec::new(), } } } diff --git a/src/domain/graph/builder/stages/build-edges.ts b/src/domain/graph/builder/stages/build-edges.ts index 3699adeb..55d9537e 100644 --- a/src/domain/graph/builder/stages/build-edges.ts +++ b/src/domain/graph/builder/stages/build-edges.ts @@ -61,6 +61,8 @@ interface NativeFileEntry { importedNames: Array<{ name: string; file: string }>; classes: ClassRelation[]; typeMap: Array<{ name: string; typeName: string; confidence: number }>; + /** Phase 8.3: function-reference bindings for pts analysis. */ + fnRefBindings?: Array<{ lhs: string; rhs: string; rhsReceiver?: string }>; } /** Shape returned by native buildCallEdges. */ @@ -504,6 +506,7 @@ function buildCallEdgesNative( importedNames, classes: symbols.classes, typeMap, + fnRefBindings: symbols.fnRefBindings?.length ? symbols.fnRefBindings : undefined, }); } diff --git a/src/domain/wasm-worker-entry.ts b/src/domain/wasm-worker-entry.ts index 59af5c1b..62c7056e 100644 --- a/src/domain/wasm-worker-entry.ts +++ b/src/domain/wasm-worker-entry.ts @@ -801,6 +801,7 @@ function serializeExtractorOutput( _lineCount: code.split('\n').length, dataflow: symbols.dataflow, astNodes, + ...(symbols.fnRefBindings?.length ? { fnRefBindings: symbols.fnRefBindings } : {}), }; } diff --git a/src/domain/wasm-worker-pool.ts b/src/domain/wasm-worker-pool.ts index 37c69572..361a948c 100644 --- a/src/domain/wasm-worker-pool.ts +++ b/src/domain/wasm-worker-pool.ts @@ -106,6 +106,7 @@ function deserializeResult(ser: SerializedExtractorOutput | null): ExtractorOutp // {line, kind, name, text?, receiver?} shape — see engine.ts:822 where the // visitor output is cast the same way. if (ser.astNodes !== undefined) out.astNodes = ser.astNodes as unknown as ASTNodeRow[]; + if (ser.fnRefBindings?.length) out.fnRefBindings = ser.fnRefBindings; return out; } diff --git a/src/domain/wasm-worker-protocol.ts b/src/domain/wasm-worker-protocol.ts index 959607fa..7235cc50 100644 --- a/src/domain/wasm-worker-protocol.ts +++ b/src/domain/wasm-worker-protocol.ts @@ -62,6 +62,7 @@ export interface SerializedExtractorOutput { text?: string; receiver?: string; }>; + fnRefBindings?: import('../types.js').FnRefBinding[]; } export interface WorkerParseResponseOk { diff --git a/tests/integration/pts-parity.test.ts b/tests/integration/pts-parity.test.ts new file mode 100644 index 00000000..d0e6888c --- /dev/null +++ b/tests/integration/pts-parity.test.ts @@ -0,0 +1,105 @@ +/** + * Phase 8.3b parity test — points-to analysis: native vs WASM. + * + * Verifies that when a function reference is aliased and passed as a + * higher-order argument (`const fn = handler; arr.map(fn)`), both engines + * emit a call edge from the containing function to the aliased target. + * + * This test guards the Phase 8.3b native pts implementation introduced in + * issue #1290. Both engines must produce the same set of call edges. + */ + +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; +import Database from 'better-sqlite3'; +import { afterAll, beforeAll, describe, expect, it } from 'vitest'; +import { buildGraph } from '../../src/domain/graph/builder.js'; +import { isNativeAvailable } from '../../src/infrastructure/native.js'; + +const hasNative = isNativeAvailable(); +const requireParity = !!process.env.CODEGRAPH_PARITY; +const describeOrSkip = requireParity || hasNative ? describe : describe.skip; + +// ── Fixture source ──────────────────────────────────────────────────────── + +const HANDLER_JS = ` +export function handler(item) { + return item * 2; +} +`.trimStart(); + +const CONSUMER_JS = ` +import { handler } from './handler.js'; + +export function processItems(items) { + const alias = handler; + return items.map(alias); +} +`.trimStart(); + +// ── Helpers ─────────────────────────────────────────────────────────────── + +function writeFixture(dir: string): void { + fs.mkdirSync(dir, { recursive: true }); + fs.writeFileSync(path.join(dir, 'handler.js'), HANDLER_JS); + fs.writeFileSync(path.join(dir, 'consumer.js'), CONSUMER_JS); +} + +function readCallEdges(dbPath: string): Array<{ source: string; target: string }> { + const db = new Database(dbPath, { readonly: true }); + const rows = db + .prepare(` + SELECT n1.name AS source, n2.name AS target + FROM edges e + JOIN nodes n1 ON e.source_id = n1.id + JOIN nodes n2 ON e.target_id = n2.id + WHERE e.kind = 'calls' + ORDER BY n1.name, n2.name + `) + .all() as Array<{ source: string; target: string }>; + db.close(); + return rows; +} + +// ── Test ────────────────────────────────────────────────────────────────── + +describeOrSkip('Phase 8.3 pts parity: native vs WASM', () => { + let wasmDir: string; + let nativeDir: string; + + beforeAll(async () => { + const tmpBase = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-pts-parity-')); + wasmDir = path.join(tmpBase, 'wasm'); + nativeDir = path.join(tmpBase, 'native'); + writeFixture(wasmDir); + writeFixture(nativeDir); + + await buildGraph(wasmDir, { engine: 'wasm', incremental: false, skipRegistry: true }); + await buildGraph(nativeDir, { engine: 'native', incremental: false, skipRegistry: true }); + }, 60_000); + + afterAll(() => { + try { + if (wasmDir) fs.rmSync(path.dirname(wasmDir), { recursive: true, force: true }); + } catch { + /* ignore */ + } + }); + + it('WASM engine resolves processItems → handler via pts alias', () => { + const edges = readCallEdges(path.join(wasmDir, '.codegraph', 'graph.db')); + expect(edges).toContainEqual({ source: 'processItems', target: 'handler' }); + }); + + it('native engine resolves processItems → handler via pts alias', () => { + const edges = readCallEdges(path.join(nativeDir, '.codegraph', 'graph.db')); + expect(edges).toContainEqual({ source: 'processItems', target: 'handler' }); + }); + + it('both engines emit identical call edges', () => { + const wasmEdges = readCallEdges(path.join(wasmDir, '.codegraph', 'graph.db')); + const nativeEdges = readCallEdges(path.join(nativeDir, '.codegraph', 'graph.db')); + expect(nativeEdges).toEqual(wasmEdges); + }); +}); From cf3bd2cc6325b99cdec221943f4fb333e213bc4d Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Wed, 3 Jun 2026 02:46:38 -0600 Subject: [PATCH 11/14] fix(rust): add missing fn_ref_bindings field to test struct initializers (#1296) Three test helpers in edge_builder.rs, import_edges.rs and structure.rs constructed FileEdgeInput / FileSymbols literals without the fn_ref_bindings field added in Phase 8.3, causing the native host build to fail to compile on all three CI platforms. --- crates/codegraph-core/src/edge_builder.rs | 9 ++++++++- crates/codegraph-core/src/import_edges.rs | 1 + crates/codegraph-core/src/structure.rs | 1 + 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/crates/codegraph-core/src/edge_builder.rs b/crates/codegraph-core/src/edge_builder.rs index f696c0e7..43a22ab9 100644 --- a/crates/codegraph-core/src/edge_builder.rs +++ b/crates/codegraph-core/src/edge_builder.rs @@ -130,6 +130,12 @@ impl<'a> EdgeContext<'a> { // ── Phase 8.3: points-to analysis ───────────────────────────────────────── +/// Maximum fixed-point iterations for the pts solver. +/// Mirrors `MAX_SOLVER_ITERATIONS` in `src/domain/graph/resolver/points-to.ts`. +/// TODO: wire through `CodegraphConfig.analysis.pointsToMaxIterations` once +/// config plumbing is in place (same pattern as `typePropagationDepth`). +const MAX_SOLVER_ITERATIONS: usize = 50; + /// Build a per-file points-to map. Mirrors `buildPointsToMap` in /// `src/domain/graph/resolver/points-to.ts`. /// @@ -158,7 +164,7 @@ fn build_points_to_map( }; (b.lhs.clone(), rhs_key) }).collect(); - for _ in 0..50 { + for _ in 0..MAX_SOLVER_ITERATIONS { let mut changed = false; for (lhs, rhs_key) in &constraints { let rhs_pts: Option> = pts.get(rhs_key.as_str()) @@ -1170,6 +1176,7 @@ mod call_edge_tests { imported_names: vec![], classes, type_map, + fn_ref_bindings: None, } } diff --git a/crates/codegraph-core/src/import_edges.rs b/crates/codegraph-core/src/import_edges.rs index dd378983..1df73c9e 100644 --- a/crates/codegraph-core/src/import_edges.rs +++ b/crates/codegraph-core/src/import_edges.rs @@ -565,6 +565,7 @@ mod tests { ast_nodes: vec![], dataflow: None, line_count: None, + fn_ref_bindings: vec![], } } diff --git a/crates/codegraph-core/src/structure.rs b/crates/codegraph-core/src/structure.rs index 6657f623..8343881e 100644 --- a/crates/codegraph-core/src/structure.rs +++ b/crates/codegraph-core/src/structure.rs @@ -934,6 +934,7 @@ mod tests { ast_nodes: vec![], dataflow: None, line_count: Some(42), + fn_ref_bindings: vec![], }; file_symbols.insert("src/a.ts".to_string(), sym.clone()); From 65ed277b72c21e217ab83dbec01cbd37709f9a5e Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Wed, 3 Jun 2026 02:46:46 -0600 Subject: [PATCH 12/14] fix(rust): add BUILTIN_GLOBALS guard and named MAX_SOLVER_ITERATIONS constant (#1296) The Rust extractor was recording fn_ref_bindings for patterns like `const fn = Array` without the guard present in the JS extractor. This created a parity gap: if a file defines a local function whose name matches a builtin, the native engine would seed it in the pts map and produce a spurious call edge while the WASM engine would silently drop the binding. - Add JS_BUILTIN_GLOBALS constant mirroring the TS BUILTIN_GLOBALS set - Guard both the identifier and member_expression paths in match_js_type_map - Extract MAX_SOLVER_ITERATIONS named constant (was hardcoded 0..50) with a TODO noting it should be wired through config alongside typePropagationDepth --- .../src/extractors/javascript.rs | 45 ++++++++++++++----- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/crates/codegraph-core/src/extractors/javascript.rs b/crates/codegraph-core/src/extractors/javascript.rs index 515b6eef..449f1e24 100644 --- a/crates/codegraph-core/src/extractors/javascript.rs +++ b/crates/codegraph-core/src/extractors/javascript.rs @@ -5,6 +5,21 @@ use crate::complexity::compute_all_metrics; use crate::types::*; use tree_sitter::{Node, Tree}; +/// Well-known JS globals that must not be recorded as pts targets. +/// Mirrors the `BUILTIN_GLOBALS` set in `src/extractors/javascript.ts`. +const JS_BUILTIN_GLOBALS: &[&str] = &[ + "Math", "JSON", "Promise", "Array", "Object", "Date", "Error", + "Symbol", "Map", "Set", "RegExp", "Number", "String", "Boolean", + "WeakMap", "WeakSet", "WeakRef", "Proxy", "Reflect", "Intl", + "ArrayBuffer", "SharedArrayBuffer", "DataView", "Atomics", "BigInt", + "Float32Array", "Float64Array", "Int8Array", "Int16Array", "Int32Array", + "Uint8Array", "Uint16Array", "Uint32Array", "Uint8ClampedArray", + "URL", "URLSearchParams", "TextEncoder", "TextDecoder", + "AbortController", "AbortSignal", "Headers", "Request", "Response", + "FormData", "Blob", "File", "ReadableStream", "WritableStream", + "TransformStream", "console", "Buffer", "EventEmitter", "Stream", +]; + pub struct JsExtractor; impl SymbolExtractor for JsExtractor { @@ -447,22 +462,32 @@ fn handle_var_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { }); } else if name_n.kind() == "identifier" && value_n.kind() == "identifier" { // Phase 8.3: `const alias = handler` — record for pts analysis. - symbols.fn_ref_bindings.push(FnRefBinding { - lhs: node_text(&name_n, source).to_string(), - rhs: node_text(&value_n, source).to_string(), - rhs_receiver: None, - }); + // Mirror the JS BUILTIN_GLOBALS guard: skip well-known JS globals so + // they are never seeded as pts targets (e.g. `const a = Array`). + let rhs_text = node_text(&value_n, source); + if !JS_BUILTIN_GLOBALS.contains(&rhs_text) { + symbols.fn_ref_bindings.push(FnRefBinding { + lhs: node_text(&name_n, source).to_string(), + rhs: rhs_text.to_string(), + rhs_receiver: None, + }); + } } else if name_n.kind() == "identifier" && value_n.kind() == "member_expression" { // Phase 8.3: `const alias = obj.method` — record for pts analysis. + // Mirror the JS BUILTIN_GLOBALS guard: skip bindings where the + // receiver object is a well-known JS global (e.g. `const fn = Math.random`). if let (Some(obj), Some(prop)) = ( value_n.child_by_field_name("object"), value_n.child_by_field_name("property"), ) { - symbols.fn_ref_bindings.push(FnRefBinding { - lhs: node_text(&name_n, source).to_string(), - rhs: node_text(&prop, source).to_string(), - rhs_receiver: Some(node_text(&obj, source).to_string()), - }); + let obj_text = node_text(&obj, source); + if !JS_BUILTIN_GLOBALS.contains(&obj_text) { + symbols.fn_ref_bindings.push(FnRefBinding { + lhs: node_text(&name_n, source).to_string(), + rhs: node_text(&prop, source).to_string(), + rhs_receiver: Some(obj_text.to_string()), + }); + } } } } From d5ee803c488cbee2c1816802cc5ad7ed454d6086 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Wed, 3 Jun 2026 02:46:53 -0600 Subject: [PATCH 13/14] fix(test): add always-on WASM pts test and clarify reserved config fields (#1296) The parity test suite only ran when native was available or CODEGRAPH_PARITY was set, leaving the primary bug (fnRefBindings dropped at WASM worker boundary) unguarded in WASM-only CI. Add a dedicated describe block that unconditionally exercises the WASM engine. Also document that pointsToMaxIterations is reserved and not yet wired to either solver in config.ts and types.ts, and remove a redundant double-negation in ts-resolver.ts. --- src/domain/graph/resolver/ts-resolver.ts | 2 +- src/infrastructure/config.ts | 8 ++++++- src/types.ts | 6 ++++- tests/integration/pts-parity.test.ts | 30 ++++++++++++++++++++++++ 4 files changed, 43 insertions(+), 3 deletions(-) diff --git a/src/domain/graph/resolver/ts-resolver.ts b/src/domain/graph/resolver/ts-resolver.ts index f2b7a6f2..6ed86b0a 100644 --- a/src/domain/graph/resolver/ts-resolver.ts +++ b/src/domain/graph/resolver/ts-resolver.ts @@ -506,7 +506,7 @@ function resolveTypeName( // Skip generic type-parameter symbols (T, E, K, etc.) — they do not // correspond to any real class and would overwrite useful lower-confidence // heuristic entries, causing call edges to be silently dropped. - !!(symbol.flags & (ts.SymbolFlags.TypeParameter | ts.SymbolFlags.TypeAlias)) + symbol.flags & (ts.SymbolFlags.TypeParameter | ts.SymbolFlags.TypeAlias) ) return null; // getFullyQualifiedName returns e.g. `"./path/to/module".ClassName` for diff --git a/src/infrastructure/config.ts b/src/infrastructure/config.ts index 645fdd5a..bb537bfe 100644 --- a/src/infrastructure/config.ts +++ b/src/infrastructure/config.ts @@ -86,7 +86,13 @@ export const DEFAULTS = { // (MAX_PROPAGATION_DEPTH, PROPAGATION_HOP_PENALTY) and in // src/domain/graph/resolver/points-to.ts (MAX_SOLVER_ITERATIONS). typePropagationDepth: 3, - /** Maximum fixed-point iterations for the Phase 8.3 points-to solver. */ + /** + * Maximum fixed-point iterations for the Phase 8.3 points-to solver. + * @reserved — currently not wired to either the WASM solver + * (`MAX_SOLVER_ITERATIONS` in `points-to.ts`) or the native Rust solver + * (`MAX_SOLVER_ITERATIONS` in `edge_builder.rs`), both of which use the + * same hardcoded value of 50. See the TODO comment above. + */ pointsToMaxIterations: 50, }, community: { diff --git a/src/types.ts b/src/types.ts index 3d3f3339..797334bc 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1243,7 +1243,11 @@ export interface CodegraphConfig { briefMediumRiskCallers: number; /** Maximum chain depth for inter-procedural return-type propagation (Phase 8.2). */ typePropagationDepth: number; - /** Maximum fixed-point iterations for the Phase 8.3 points-to solver. */ + /** + * Maximum fixed-point iterations for the Phase 8.3 points-to solver. + * @reserved — currently not wired to either solver; both use a hardcoded + * constant of 50. See TODO in `src/infrastructure/config.ts`. + */ pointsToMaxIterations: number; }; diff --git a/tests/integration/pts-parity.test.ts b/tests/integration/pts-parity.test.ts index d0e6888c..ba589ac8 100644 --- a/tests/integration/pts-parity.test.ts +++ b/tests/integration/pts-parity.test.ts @@ -62,6 +62,36 @@ function readCallEdges(dbPath: string): Array<{ source: string; target: string } return rows; } +// ── WASM-only test (always runs, guards the serialization fix) ──────────── +// +// The bug fixed in Phase 8.3 was that `fnRefBindings` was silently dropped at +// the WASM worker boundary. This suite does NOT require native — it always +// runs and validates that the WASM engine alone resolves alias call edges. + +describe('Phase 8.3 WASM pts: fnRefBindings serialization fix', () => { + let wasmOnlyDir: string; + + beforeAll(async () => { + const tmpBase = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-pts-wasm-')); + wasmOnlyDir = path.join(tmpBase, 'wasm'); + writeFixture(wasmOnlyDir); + await buildGraph(wasmOnlyDir, { engine: 'wasm', incremental: false, skipRegistry: true }); + }, 60_000); + + afterAll(() => { + try { + if (wasmOnlyDir) fs.rmSync(path.dirname(wasmOnlyDir), { recursive: true, force: true }); + } catch { + /* ignore */ + } + }); + + it('WASM engine records fnRefBindings and resolves processItems → handler via pts alias', () => { + const edges = readCallEdges(path.join(wasmOnlyDir, '.codegraph', 'graph.db')); + expect(edges).toContainEqual({ source: 'processItems', target: 'handler' }); + }); +}); + // ── Test ────────────────────────────────────────────────────────────────── describeOrSkip('Phase 8.3 pts parity: native vs WASM', () => { From 65e916d29e7a631c7b0c278df4a0b88c7e02847c Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Wed, 3 Jun 2026 14:27:29 -0600 Subject: [PATCH 14/14] fix(rust): add pts_edge_map confidence-upgrade path to match JS ptsEdgeRows (#1296) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a file both aliases and directly calls the same target (e.g. `arr.map(alias)` then `handler(x)`), the Rust pts fallback was inserting the edge directly into `seen_edges`, locking in the penalised confidence and silently dropping the subsequent direct call. The JS/WASM path avoids this via `ptsEdgeRows` — pts edges go into a staging map, and `emit_call_edges` upgrades confidence in-place when the direct call is found. Mirrors the same mechanism in `build-edges.ts` lines 660–700: - `pts_edge_map: HashMap` (key = edge_key, value = index into `edges`) replaces the direct `seen_edges.insert` for pts-resolved edges. - `emit_call_edges` now checks `pts_edge_map` before inserting a new direct-call edge; if a pts edge exists for the same pair, it upgrades confidence and is_dynamic in-place and promotes the key to `seen_edges`. - Adds a confidence-upgrade fixture and two new test suites (WASM-only + parity) that assert the edge confidence is >= 0.9 (direct-call level) rather than the penalised 0.9 - 0.1 = 0.8 value. --- crates/codegraph-core/src/edge_builder.rs | 41 +++++-- tests/integration/pts-parity.test.ts | 134 ++++++++++++++++++++++ 2 files changed, 166 insertions(+), 9 deletions(-) diff --git a/crates/codegraph-core/src/edge_builder.rs b/crates/codegraph-core/src/edge_builder.rs index 43a22ab9..d292ba46 100644 --- a/crates/codegraph-core/src/edge_builder.rs +++ b/crates/codegraph-core/src/edge_builder.rs @@ -268,6 +268,12 @@ fn process_file<'a>( }); let mut seen_edges: HashSet = HashSet::new(); + // Phase 8.3: tracks pts-resolved edges separately from seen_edges so that a + // subsequent direct call to the same caller→target pair can upgrade confidence + // in-place rather than being silently dropped by the dedup guard. + // Mirrors `ptsEdgeRows` in `src/domain/graph/builder/stages/build-edges.ts`. + // Key: edge_key (same as seen_edges). Value: index into `edges` vec. + let mut pts_edge_map: HashMap = HashMap::new(); for call in &file_input.calls { if let Some(ref receiver) = call.receiver { @@ -280,12 +286,16 @@ fn process_file<'a>( let mut targets = resolve_call_targets(ctx, call, rel_path, imported_from, &type_map); sort_targets_by_confidence(&mut targets, rel_path, imported_from); - emit_call_edges(&targets, caller_id, is_dynamic, rel_path, imported_from, &mut seen_edges, edges); + emit_call_edges(&targets, caller_id, is_dynamic, rel_path, imported_from, &mut seen_edges, &mut pts_edge_map, edges); // Phase 8.3: pts fallback for unresolved dynamic identifier calls. // When primary resolution finds nothing and the call is dynamic with no receiver, // look up the call name in the pts map and retry resolution for each alias target. // Confidence is penalised by one hop to reflect the extra indirection. + // + // Pts edges go into pts_edge_map (not seen_edges) so a later direct call to the + // same target in the same function body can upgrade confidence in-place — mirroring + // the ptsEdgeRows mechanism on the JS/WASM path. if targets.is_empty() && call.dynamic.unwrap_or(false) && call.receiver.is_none() { if let Some(ref pts) = pts_map { for alias in resolve_via_points_to(call.name.as_str(), pts) { @@ -302,12 +312,12 @@ fn process_file<'a>( sort_targets_by_confidence(&mut alias_targets, rel_path, alias_imported_from); for t in &alias_targets { let edge_key = ((caller_id as u64) << 32) | (t.id as u64); - if t.id != caller_id && !seen_edges.contains(&edge_key) { - seen_edges.insert(edge_key); + if t.id != caller_id && !seen_edges.contains(&edge_key) && !pts_edge_map.contains_key(&edge_key) { let conf = import_resolution::compute_confidence( rel_path, &t.file, alias_imported_from, ) - PROPAGATION_HOP_PENALTY; if conf > 0.0 { + pts_edge_map.insert(edge_key, edges.len()); edges.push(ComputedEdge { source_id: caller_id, target_id: t.id, @@ -420,17 +430,30 @@ fn sort_targets_by_confidence(targets: &mut Vec<&NodeInfo>, rel_path: &str, impo fn emit_call_edges( targets: &[&NodeInfo], caller_id: u32, is_dynamic: u32, rel_path: &str, imported_from: Option<&str>, - seen_edges: &mut HashSet, edges: &mut Vec, + seen_edges: &mut HashSet, pts_edge_map: &mut HashMap, edges: &mut Vec, ) { for t in targets { let edge_key = ((caller_id as u64) << 32) | (t.id as u64); if t.id != caller_id && !seen_edges.contains(&edge_key) { - seen_edges.insert(edge_key); let confidence = import_resolution::compute_confidence(rel_path, &t.file, imported_from); - edges.push(ComputedEdge { - source_id: caller_id, target_id: t.id, - kind: "calls".to_string(), confidence, dynamic: is_dynamic, - }); + if let Some(&pts_idx) = pts_edge_map.get(&edge_key) { + // A pts-resolved edge already exists for this caller→target pair with a + // penalised confidence. Upgrade it to the direct-call confidence in-place, + // then promote to seen_edges so no further processing is needed. + // Mirrors the ptsEdgeRows upgrade path in build-edges.ts. + if let Some(pts_row) = edges.get_mut(pts_idx) { + pts_row.confidence = confidence; + pts_row.dynamic = is_dynamic; // direct call overrides alias dynamic flag + } + pts_edge_map.remove(&edge_key); + seen_edges.insert(edge_key); + } else { + seen_edges.insert(edge_key); + edges.push(ComputedEdge { + source_id: caller_id, target_id: t.id, + kind: "calls".to_string(), confidence, dynamic: is_dynamic, + }); + } } } } diff --git a/tests/integration/pts-parity.test.ts b/tests/integration/pts-parity.test.ts index ba589ac8..290370f1 100644 --- a/tests/integration/pts-parity.test.ts +++ b/tests/integration/pts-parity.test.ts @@ -38,6 +38,20 @@ export function processItems(items) { } `.trimStart(); +// Fixture for the confidence-upgrade scenario: the caller both aliases and +// directly calls the same target in the same function body. The pts-resolved +// edge (lower confidence) must be upgraded to direct-call confidence when the +// direct call is encountered — mirroring the ptsEdgeRows upgrade on the JS path. +const CONSUMER_UPGRADE_JS = ` +import { handler } from './handler.js'; + +export function processItemsDirect(items) { + const alias = handler; + items.map(alias); // pts-resolved edge (penalised confidence) + handler(items[0]); // direct call — must upgrade confidence in-place +} +`.trimStart(); + // ── Helpers ─────────────────────────────────────────────────────────────── function writeFixture(dir: string): void { @@ -46,6 +60,30 @@ function writeFixture(dir: string): void { fs.writeFileSync(path.join(dir, 'consumer.js'), CONSUMER_JS); } +function writeUpgradeFixture(dir: string): void { + fs.mkdirSync(dir, { recursive: true }); + fs.writeFileSync(path.join(dir, 'handler.js'), HANDLER_JS); + fs.writeFileSync(path.join(dir, 'consumer.js'), CONSUMER_UPGRADE_JS); +} + +function readCallEdgesWithConfidence( + dbPath: string, +): Array<{ source: string; target: string; confidence: number }> { + const db = new Database(dbPath, { readonly: true }); + const rows = db + .prepare(` + SELECT n1.name AS source, n2.name AS target, e.confidence + FROM edges e + JOIN nodes n1 ON e.source_id = n1.id + JOIN nodes n2 ON e.target_id = n2.id + WHERE e.kind = 'calls' + ORDER BY n1.name, n2.name + `) + .all() as Array<{ source: string; target: string; confidence: number }>; + db.close(); + return rows; +} + function readCallEdges(dbPath: string): Array<{ source: string; target: string }> { const db = new Database(dbPath, { readonly: true }); const rows = db @@ -92,6 +130,102 @@ describe('Phase 8.3 WASM pts: fnRefBindings serialization fix', () => { }); }); +// ── Confidence upgrade test (WASM always runs) ──────────────────────────── +// +// Guards the ptsEdgeRows / pts_edge_map confidence-upgrade path: when a file +// contains both an alias call (pts-resolved, penalised confidence) and a +// subsequent direct call to the same target in the same function body, the edge +// confidence must be upgraded to the direct-call value — not left at the +// penalised pts confidence. Both engines must agree. + +describe('Phase 8.3 pts: confidence upgrade when alias + direct call coexist (WASM)', () => { + let upgradeWasmDir: string; + + beforeAll(async () => { + const tmpBase = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-pts-upgrade-wasm-')); + upgradeWasmDir = path.join(tmpBase, 'wasm'); + writeUpgradeFixture(upgradeWasmDir); + await buildGraph(upgradeWasmDir, { engine: 'wasm', incremental: false, skipRegistry: true }); + }, 60_000); + + afterAll(() => { + try { + if (upgradeWasmDir) fs.rmSync(path.dirname(upgradeWasmDir), { recursive: true, force: true }); + } catch { + /* ignore */ + } + }); + + it('WASM engine emits processItemsDirect → handler with direct-call confidence (not pts-penalised)', () => { + const edges = readCallEdgesWithConfidence(path.join(upgradeWasmDir, '.codegraph', 'graph.db')); + const edge = edges.find((e) => e.source === 'processItemsDirect' && e.target === 'handler'); + expect(edge).toBeDefined(); + // Direct-call confidence (>= 0.9 for same-dir imports) must be higher than + // a pts-penalised confidence (direct - 0.1). Assert it is at least 0.9 to + // confirm the upgrade happened and the penalised value was not kept. + expect(edge!.confidence).toBeGreaterThanOrEqual(0.9); + }); +}); + +describeOrSkip( + 'Phase 8.3 pts: confidence upgrade when alias + direct call coexist (parity)', + () => { + let upgradeWasmDir: string; + let upgradeNativeDir: string; + + beforeAll(async () => { + const tmpBase = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-pts-upgrade-')); + upgradeWasmDir = path.join(tmpBase, 'wasm'); + upgradeNativeDir = path.join(tmpBase, 'native'); + writeUpgradeFixture(upgradeWasmDir); + writeUpgradeFixture(upgradeNativeDir); + + await buildGraph(upgradeWasmDir, { engine: 'wasm', incremental: false, skipRegistry: true }); + await buildGraph(upgradeNativeDir, { + engine: 'native', + incremental: false, + skipRegistry: true, + }); + }, 60_000); + + afterAll(() => { + try { + if (upgradeWasmDir) + fs.rmSync(path.dirname(upgradeWasmDir), { recursive: true, force: true }); + } catch { + /* ignore */ + } + }); + + it('native engine emits processItemsDirect → handler with direct-call confidence (not pts-penalised)', () => { + const edges = readCallEdgesWithConfidence( + path.join(upgradeNativeDir, '.codegraph', 'graph.db'), + ); + const edge = edges.find((e) => e.source === 'processItemsDirect' && e.target === 'handler'); + expect(edge).toBeDefined(); + expect(edge!.confidence).toBeGreaterThanOrEqual(0.9); + }); + + it('both engines emit identical confidence for the processItemsDirect → handler edge', () => { + const wasmEdges = readCallEdgesWithConfidence( + path.join(upgradeWasmDir, '.codegraph', 'graph.db'), + ); + const nativeEdges = readCallEdgesWithConfidence( + path.join(upgradeNativeDir, '.codegraph', 'graph.db'), + ); + const wasmEdge = wasmEdges.find( + (e) => e.source === 'processItemsDirect' && e.target === 'handler', + ); + const nativeEdge = nativeEdges.find( + (e) => e.source === 'processItemsDirect' && e.target === 'handler', + ); + expect(nativeEdge).toBeDefined(); + expect(wasmEdge).toBeDefined(); + expect(nativeEdge!.confidence).toBeCloseTo(wasmEdge!.confidence, 5); + }); + }, +); + // ── Test ────────────────────────────────────────────────────────────────── describeOrSkip('Phase 8.3 pts parity: native vs WASM', () => {