Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
4baaa2d
fix(mcp): resolve trace_call_path through Class DEFINES_METHOD edges
Mar 27, 2026
a9cc7b6
fix(mcp): use strcpy variants in detect_changes to prevent use-after-…
Mar 27, 2026
c3e008d
fix(pipeline): reject invalid route paths from vendored JS files
Mar 27, 2026
217973d
fix(extraction): add C# base_list handling for class inheritance
Mar 27, 2026
02fee9e
fix(mcp): wire get_architecture to full store analysis
Mar 27, 2026
27d0785
feat(store): wire Louvain clustering into get_architecture
Mar 27, 2026
58fff9e
feat(pipeline): add Hapi.js route extraction for object-literal patterns
Mar 27, 2026
358de42
feat(store): add BM25 full-text search via SQLite FTS5
Mar 27, 2026
8373e3f
feat(pipeline): auto-detect execution flows from entry points via BFS…
Mar 27, 2026
0d05b0a
feat(pipeline): add Socket.IO and EventEmitter channel detection
Mar 27, 2026
e0d6cca
feat(cypher): resolve unknown properties from JSON properties_json
Mar 27, 2026
4416642
feat(quality): 3 output quality improvements for investigation-grade …
Mar 27, 2026
57b89e0
feat(quality): semantic cluster labels + process participation in trace
Mar 27, 2026
a7b60cb
feat(mcp): investigation-grade trace output + impact analysis + proce…
Mar 27, 2026
93041d2
fix(mcp): crash on 0-edge nodes + fuzzy name fallback in trace
Mar 27, 2026
9e1dc6d
feat(extraction): C# delegate/event handler call resolution
Mar 27, 2026
6a45196
fix(mcp+extraction): C# class has_method + C# channel detection
Mar 27, 2026
689050f
fix(mcp): get_impact resolves Class over Constructor for accurate bla…
Mar 29, 2026
b7ba394
feat(extraction): entry point detection for C#/Java class methods
Mar 29, 2026
8021d94
feat(store+cypher): channel dedup, count(DISTINCT), SQL injection fix
Mar 29, 2026
0aa615a
feat(cypher): NOT EXISTS subquery with optimized edge lookup
Mar 29, 2026
d7cc2f7
feat(mcp): cross-repo channel query + has_property in trace output
Mar 29, 2026
4ece8db
feat(extraction): C# property extraction with HAS_PROPERTY edges
Mar 29, 2026
db9a15e
fix(extraction): C/C++ CALLS edge attribution to enclosing function s…
Mar 29, 2026
975329a
feat(extraction): C++ entry point heuristics for Windows and GTest
Mar 29, 2026
309780d
feat(store): include HANDLES/HTTP_CALLS in process detection BFS
Mar 29, 2026
949d663
feat(store): Route→Function resolution + relaxed process detection
Mar 29, 2026
d98f3a0
feat(pipeline): resolve relative import paths for IMPORTS edge creation
Mar 29, 2026
00b096d
feat(extraction): CommonJS require() import extraction for JS/TS
Mar 29, 2026
80772e3
feat(mcp): process participation in search_graph results
Mar 30, 2026
22408ed
feat(pipeline): JS/TS constant resolution for Socket.IO channel detec…
Mar 30, 2026
ea65a25
feat(mcp): expose BM25 query and sort_by params in search_graph schema
Mar 30, 2026
2997cdb
fix(search): pure BM25 relevance ranking + camelCase token splitting
Mar 30, 2026
ebdee0e
fix(store): deduplicate entry points + add [module] prefix to process…
Mar 30, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Makefile.cbm
Original file line number Diff line number Diff line change
Expand Up @@ -217,8 +217,8 @@ MIMALLOC_CFLAGS_TEST = -std=c11 -g -O1 -w \

# sqlite3 (vendored amalgamation — compiled ourselves for ASan instrumentation)
SQLITE3_SRC = vendored/sqlite3/sqlite3.c
SQLITE3_CFLAGS = -std=c11 -O2 -w -DSQLITE_DQS=0 -DSQLITE_THREADSAFE=1
SQLITE3_CFLAGS_TEST = -std=c11 -g -O1 -w -DSQLITE_DQS=0 -DSQLITE_THREADSAFE=1
SQLITE3_CFLAGS = -std=c11 -O2 -w -DSQLITE_DQS=0 -DSQLITE_THREADSAFE=1 -DSQLITE_ENABLE_FTS5
SQLITE3_CFLAGS_TEST = -std=c11 -g -O1 -w -DSQLITE_DQS=0 -DSQLITE_THREADSAFE=1 -DSQLITE_ENABLE_FTS5

# TRE regex (vendored, Windows only — POSIX uses system <regex.h>)
TRE_SRC = vendored/tre/tre_all.c
Expand Down
97 changes: 97 additions & 0 deletions internal/cbm/extract_calls.c
Original file line number Diff line number Diff line change
Expand Up @@ -344,4 +344,101 @@ void handle_calls(CBMExtractCtx *ctx, TSNode node, const CBMLangSpec *spec, Walk
}
}
}

// C# delegate/event patterns
if (ctx->language == CBM_LANG_CSHARP) {
// Fix 1: event += MethodName (bare method reference subscription)
// Creates a CALLS edge from the subscribing method to the handler method.
// e.g. _socket.OnConnected += SocketOnConnected;
if (strcmp(kind, "assignment_expression") == 0) {
TSNode op = ts_node_child_by_field_name(node, "operator", 8);
if (!ts_node_is_null(op)) {
char *op_text = cbm_node_text(ctx->arena, op, ctx->source);
if (op_text && strcmp(op_text, "+=") == 0) {
TSNode right = ts_node_child_by_field_name(node, "right", 5);
if (!ts_node_is_null(right)) {
const char *rk = ts_node_type(right);
if (strcmp(rk, "identifier") == 0 ||
strcmp(rk, "member_access_expression") == 0) {
char *callee = cbm_node_text(ctx->arena, right, ctx->source);
if (callee && callee[0] && !cbm_is_keyword(callee, ctx->language)) {
CBMCall call;
call.callee_name = callee;
call.enclosing_func_qn = state->enclosing_func_qn;
cbm_calls_push(&ctx->result->calls, ctx->arena, call);
}
}
}
}
}
}

// Fix 2: delegate?.Invoke() → resolve to receiver (delegate) name.
// C# delegates are invoked via .Invoke() or ?.Invoke() — the callee name
// "Invoke" resolves to nothing. Instead, extract the receiver (delegate property)
// name, which is more likely to match a registered symbol.
// e.g. OnConnected?.Invoke(this, e) → creates CALLS edge to "OnConnected"
//
// C# tree-sitter AST for "OnConnected?.Invoke(this, e)":
// invocation_expression
// function: conditional_access_expression
// expression: identifier "OnConnected" ← receiver
// member_binding_expression
// name: identifier "Invoke" ← method
// arguments: argument_list
if (cbm_kind_in_set(node, spec->call_node_types)) {
TSNode func_node2 = ts_node_child_by_field_name(node, "function", 8);
if (!ts_node_is_null(func_node2)) {
const char *fk2 = ts_node_type(func_node2);
bool is_invoke = false;
TSNode receiver2 = {0}; // NOLINT

if (strcmp(fk2, "conditional_access_expression") == 0) {
// ?. access: look for member_binding_expression child
uint32_t ncc = ts_node_named_child_count(func_node2);
for (uint32_t ci = 0; ci < ncc; ci++) {
TSNode child = ts_node_named_child(func_node2, ci);
const char *ck = ts_node_type(child);
if (strcmp(ck, "member_binding_expression") == 0) {
TSNode name_n = ts_node_child_by_field_name(child, "name", 4);
if (!ts_node_is_null(name_n)) {
char *nm = cbm_node_text(ctx->arena, name_n, ctx->source);
if (nm && strcmp(nm, "Invoke") == 0) {
is_invoke = true;
}
}
}
if (strcmp(ck, "identifier") == 0 ||
strcmp(ck, "member_access_expression") == 0) {
receiver2 = child;
}
}
} else if (strcmp(fk2, "member_access_expression") == 0) {
// Dot access: obj.Invoke(...)
TSNode name_n = ts_node_child_by_field_name(func_node2, "name", 4);
if (!ts_node_is_null(name_n)) {
char *nm = cbm_node_text(ctx->arena, name_n, ctx->source);
if (nm && strcmp(nm, "Invoke") == 0) {
is_invoke = true;
TSNode expr = ts_node_child_by_field_name(func_node2,
"expression", 10);
if (!ts_node_is_null(expr)) {
receiver2 = expr;
}
}
}
}

if (is_invoke && !ts_node_is_null(receiver2)) {
char *recv = cbm_node_text(ctx->arena, receiver2, ctx->source);
if (recv && recv[0] && !cbm_is_keyword(recv, ctx->language)) {
CBMCall call;
call.callee_name = recv;
call.enclosing_func_qn = state->enclosing_func_qn;
cbm_calls_push(&ctx->result->calls, ctx->arena, call);
}
}
}
}
}
}
243 changes: 240 additions & 3 deletions internal/cbm/extract_defs.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "tree_sitter/api.h" // TSNode, ts_node_*
#include <stdint.h> // uint32_t
#include <string.h>
#include <strings.h> /* strcasecmp */
#include <ctype.h>

// Field name lengths for ts_node_child_by_field_name() calls.
Expand Down Expand Up @@ -565,10 +566,58 @@ static const char **extract_base_classes(CBMArena *a, TSNode node, const char *s
}
}
}
// C/C++ specific: handle base_class_clause (contains access specifiers + type names)
// C# specific: handle base_list node (contains base types separated by commas)
{
uint32_t count = ts_node_child_count(node);
for (uint32_t i = 0; i < count; i++) {
TSNode child = ts_node_child(node, i);
if (strcmp(ts_node_type(child), "base_list") == 0) {
const char *bases[16];
int base_count = 0;
uint32_t bnc = ts_node_named_child_count(child);
for (uint32_t bi = 0; bi < bnc && base_count < MAX_BASES_MINUS_1; bi++) {
TSNode bc = ts_node_named_child(child, bi);
const char *bk = ts_node_type(bc);
// C# base types can be: identifier, generic_name, qualified_name,
// or wrapped in a simple_base_type / primary_constructor_base_type
char *text = NULL;
if (strcmp(bk, "identifier") == 0 || strcmp(bk, "generic_name") == 0 ||
strcmp(bk, "qualified_name") == 0) {
text = cbm_node_text(a, bc, source);
} else {
// For wrapper nodes (simple_base_type etc.), extract the first
// named child which should be the type identifier
TSNode inner = ts_node_named_child(bc, 0);
if (!ts_node_is_null(inner)) {
text = cbm_node_text(a, inner, source);
}
}
if (text && text[0]) {
// Strip generic args for resolution: "List<int>" → "List"
char *angle = strchr(text, '<');
if (angle) *angle = '\0';
bases[base_count++] = text;
}
}
if (base_count > 0) {
const char **result =
(const char **)cbm_arena_alloc(a, (base_count + 1) * sizeof(const char *));
if (result) {
for (int j = 0; j < base_count; j++) {
result[j] = bases[j];
}
result[base_count] = NULL;
return result;
}
}
}
}
}

// C/C++ specific: handle base_class_clause (contains access specifiers + type names)
{
uint32_t count2 = ts_node_child_count(node);
for (uint32_t i = 0; i < count2; i++) {
TSNode child = ts_node_child(node, i);
if (strcmp(ts_node_type(child), "base_class_clause") == 0) {
// Extract type identifiers from base_class_clause, skipping access specifiers
Expand Down Expand Up @@ -1136,11 +1185,82 @@ static void extract_func_def(CBMExtractCtx *ctx, TSNode node, const CBMLangSpec
}
}

// main is always an entry point
if (strcmp(name, "main") == 0) {
// main/Main is always an entry point (case-insensitive for C#/Java)
if (strcasecmp(name, "main") == 0) {
def.is_entry_point = true;
}

// C/C++ entry point detection: WinMain, DllMain, GTest, MFC
if ((ctx->language == CBM_LANG_C || ctx->language == CBM_LANG_CPP) && !def.is_entry_point) {
if (strcmp(name, "WinMain") == 0 || strcmp(name, "wWinMain") == 0 ||
strcmp(name, "DllMain") == 0 || strcmp(name, "wmain") == 0 ||
strcmp(name, "_tmain") == 0 || strcmp(name, "InitInstance") == 0 ||
strcmp(name, "OnInitDialog") == 0) {
def.is_entry_point = true;
}
}

// C# entry point detection: Windows Service lifecycle, ASP.NET controllers
if (ctx->language == CBM_LANG_CSHARP && !def.is_entry_point) {
// Windows Service lifecycle entry points
if (strcmp(name, "OnStart") == 0 || strcmp(name, "OnStartImpl") == 0 ||
strcmp(name, "OnStop") == 0 || strcmp(name, "OnStopImpl") == 0 ||
strcmp(name, "Run") == 0 || strcmp(name, "Execute") == 0 ||
strcmp(name, "Configure") == 0 || strcmp(name, "ConfigureServices") == 0) {
def.is_entry_point = true;
}
// ASP.NET controller decorators: [HttpGet], [HttpPost], [Route], etc.
if (!def.is_entry_point && def.decorators) {
for (const char **d = def.decorators; *d; d++) {
if (strstr(*d, "HttpGet") || strstr(*d, "HttpPost") ||
strstr(*d, "HttpPut") || strstr(*d, "HttpDelete") ||
strstr(*d, "HttpPatch") || strstr(*d, "Route") ||
strstr(*d, "ApiController") || strstr(*d, "Authorize")) {
def.is_entry_point = true;
break;
}
}
}
// Test entry points: [TestMethod], [Fact], [Test], [SetUp]
if (!def.is_entry_point && def.decorators) {
for (const char **d = def.decorators; *d; d++) {
if (strstr(*d, "TestMethod") || strstr(*d, "Fact") ||
strstr(*d, "Test") || strstr(*d, "SetUp") ||
strstr(*d, "TestInitialize")) {
def.is_entry_point = true;
break;
}
}
}
}

// Java entry point detection: Spring Boot, Vert.x, JAX-RS, JUnit
if (ctx->language == CBM_LANG_JAVA && !def.is_entry_point) {
// Vert.x lifecycle and common server patterns
if (strcmp(name, "start") == 0 || strcmp(name, "configure") == 0 ||
strcmp(name, "init") == 0 || strcmp(name, "run") == 0 ||
strcmp(name, "handle") == 0) {
def.is_entry_point = true;
}
// Spring/JAX-RS/JUnit decorators
if (!def.is_entry_point && def.decorators) {
for (const char **d = def.decorators; *d; d++) {
if (strstr(*d, "RequestMapping") || strstr(*d, "GetMapping") ||
strstr(*d, "PostMapping") || strstr(*d, "PutMapping") ||
strstr(*d, "DeleteMapping") || strstr(*d, "PatchMapping") ||
strstr(*d, "Endpoint") || strstr(*d, "EventHandler") ||
strstr(*d, "Scheduled") || strstr(*d, "Bean") ||
strstr(*d, "Override") || strstr(*d, "Test") ||
strstr(*d, "GET") || strstr(*d, "POST") ||
strstr(*d, "PUT") || strstr(*d, "DELETE") ||
strstr(*d, "Path") || strstr(*d, "Consumes")) {
def.is_entry_point = true;
break;
}
}
}
}

cbm_defs_push(&ctx->result->defs, a, def);
}

Expand Down Expand Up @@ -1610,6 +1730,68 @@ static void push_method_def(CBMExtractCtx *ctx, TSNode child, const char *class_
def.complexity = cbm_count_branching(child, spec->branching_node_types);
}

// Entry point detection for class methods (same rules as extract_func_def)
// Case-insensitive "main" check
if (strcasecmp(name, "main") == 0) {
def.is_entry_point = true;
}

// C/C++ entry point detection: WinMain, DllMain, GTest, MFC
if ((ctx->language == CBM_LANG_C || ctx->language == CBM_LANG_CPP) && !def.is_entry_point) {
if (strcmp(name, "WinMain") == 0 || strcmp(name, "wWinMain") == 0 ||
strcmp(name, "DllMain") == 0 || strcmp(name, "wmain") == 0 ||
strcmp(name, "_tmain") == 0 || strcmp(name, "InitInstance") == 0 ||
strcmp(name, "OnInitDialog") == 0) {
def.is_entry_point = true;
}
}

// C# entry point detection: Windows Service lifecycle, ASP.NET controllers
if (ctx->language == CBM_LANG_CSHARP && !def.is_entry_point) {
if (strcmp(name, "OnStart") == 0 || strcmp(name, "OnStartImpl") == 0 ||
strcmp(name, "OnStop") == 0 || strcmp(name, "OnStopImpl") == 0 ||
strcmp(name, "Run") == 0 || strcmp(name, "Execute") == 0 ||
strcmp(name, "Configure") == 0 || strcmp(name, "ConfigureServices") == 0) {
def.is_entry_point = true;
}
if (!def.is_entry_point && def.decorators) {
for (const char **d = def.decorators; *d; d++) {
if (strstr(*d, "HttpGet") || strstr(*d, "HttpPost") ||
strstr(*d, "HttpPut") || strstr(*d, "HttpDelete") ||
strstr(*d, "HttpPatch") || strstr(*d, "Route") ||
strstr(*d, "ApiController") || strstr(*d, "Authorize")) {
def.is_entry_point = true;
break;
}
}
}
}

// Java entry point detection
if (ctx->language == CBM_LANG_JAVA && !def.is_entry_point) {
if (strcmp(name, "start") == 0 || strcmp(name, "configure") == 0 ||
strcmp(name, "init") == 0 || strcmp(name, "run") == 0 ||
strcmp(name, "handle") == 0) {
def.is_entry_point = true;
}
if (!def.is_entry_point && def.decorators) {
for (const char **d = def.decorators; *d; d++) {
if (strstr(*d, "RequestMapping") || strstr(*d, "GetMapping") ||
strstr(*d, "PostMapping") || strstr(*d, "PutMapping") ||
strstr(*d, "DeleteMapping") || strstr(*d, "PatchMapping") ||
strstr(*d, "Endpoint") || strstr(*d, "EventHandler") ||
strstr(*d, "Scheduled") || strstr(*d, "Bean") ||
strstr(*d, "Override") || strstr(*d, "Test") ||
strstr(*d, "GET") || strstr(*d, "POST") ||
strstr(*d, "PUT") || strstr(*d, "DELETE") ||
strstr(*d, "Path") || strstr(*d, "Consumes")) {
def.is_entry_point = true;
break;
}
}
}
}

cbm_defs_push(&ctx->result->defs, a, def);
}

Expand Down Expand Up @@ -1648,6 +1830,61 @@ static void extract_class_methods(CBMExtractCtx *ctx, TSNode class_node, const c
continue;
}

/* C#/Java property extraction: property_declaration, auto_property_declaration.
* Creates a "Property" node with parent_class set for DEFINES_METHOD edge. */
const char *child_type = ts_node_type(child);
if (child_type &&
(strcmp(child_type, "property_declaration") == 0 ||
strcmp(child_type, "indexer_declaration") == 0 ||
strcmp(child_type, "event_declaration") == 0 ||
strcmp(child_type, "event_field_declaration") == 0)) {
TSNode name_node = ts_node_child_by_field_name(child, "name", 4);
if (ts_node_is_null(name_node)) {
/* indexer_declaration doesn't have a 'name' field, use "this" */
if (strcmp(child_type, "indexer_declaration") == 0) {
CBMDefinition pdef;
memset(&pdef, 0, sizeof(pdef));
pdef.name = cbm_arena_strdup(ctx->arena, "this[]");
pdef.qualified_name = cbm_arena_sprintf(ctx->arena, "%s.this[]", class_qn);
pdef.label = "Property";
pdef.file_path = ctx->rel_path;
pdef.parent_class = class_qn;
pdef.start_line = ts_node_start_point(child).row + 1;
pdef.end_line = ts_node_end_point(child).row + 1;
pdef.lines = (int)(pdef.end_line - pdef.start_line + 1);
TSNode type_node = ts_node_child_by_field_name(child, "type", 4);
if (!ts_node_is_null(type_node)) {
pdef.return_type = cbm_node_text(ctx->arena, type_node, ctx->source);
}
cbm_defs_push(&ctx->result->defs, ctx->arena, pdef);
}
continue;
}
char *pname = cbm_node_text(ctx->arena, name_node, ctx->source);
if (pname && pname[0]) {
CBMDefinition pdef;
memset(&pdef, 0, sizeof(pdef));
pdef.name = pname;
pdef.qualified_name = cbm_arena_sprintf(ctx->arena, "%s.%s", class_qn, pname);
pdef.label = "Property";
pdef.file_path = ctx->rel_path;
pdef.parent_class = class_qn;
pdef.start_line = ts_node_start_point(child).row + 1;
pdef.end_line = ts_node_end_point(child).row + 1;
pdef.lines = (int)(pdef.end_line - pdef.start_line + 1);
pdef.is_exported = cbm_is_exported(pname, ctx->language);
/* Extract type */
TSNode type_node = ts_node_child_by_field_name(child, "type", 4);
if (!ts_node_is_null(type_node)) {
pdef.return_type = cbm_node_text(ctx->arena, type_node, ctx->source);
}
pdef.decorators = extract_decorators(ctx->arena, child, ctx->source,
ctx->language, spec);
cbm_defs_push(&ctx->result->defs, ctx->arena, pdef);
}
continue;
}

if (!cbm_kind_in_set(child, spec->function_node_types)) {
continue;
}
Expand Down
Loading