ripvec-core 4.1.15

Semantic code + document search engine. Cacheless static-embedding + cross-encoder rerank by default; optional ModernBERT/BGE transformer engines with GPU backends. Tree-sitter chunking, hybrid BM25 + PageRank, composable ranking layers.
Documentation
//! Regression tests for I#55 — fn-pointer struct-literal initializer edges.
//!
//! C/C++ codebases routinely dispatch via tables of function pointers built
//! as struct or array initializer literals:
//!
//! ```c
//! // Designated initializers — Linux drivers ubiquitously:
//! static const struct file_operations my_fops = {
//!     .read = my_read,
//!     .write = my_write,
//! };
//!
//! // Positional initializers — redis command tables, libuv, etc:
//! struct redisCommand cmds[] = {
//!     {"get", getCommand, 2},
//!     {"set", setCommand, -3},
//! };
//! ```
//!
//! Pre-I#55 the call-graph extractor only matched `(call_expression
//! function: (identifier))` and `(field_expression field:
//! (field_identifier))`, so the functions referenced exclusively via these
//! tables appeared dead. On redis (Wave 5/sonnet) every command
//! implementation collapsed into the largest dead cluster; on linux this is
//! the kernel-mega-cluster pathology (Part XI §XI.4).
//!
//! The fix emits synthetic call-graph edges from the enclosing
//! `declaration` def (e.g. `my_fops`, `cmds`) to each function-identifier
//! found inside an `initializer_list` — both designated (`.read = my_read`)
//! and positional (`{"get", getCommand}`) forms, at arbitrary nesting
//! depth. Edges are emitted only when the identifier resolves to a function
//! definition in the same translation unit — string-literal table keys,
//! integer/character constants, and nested initializer braces are skipped
//! by tree-sitter's node-kind discrimination (we only consume bare
//! `identifier` children of `initializer_pair` and `initializer_list`).

use ripvec_core::languages;
use ripvec_core::repo_map::{self, Definition};

/// Parse a C source string and return the def list with calls populated.
///
/// Mirrors the call_graph_resolution.rs helper but uses the C lang/call
/// configs. Drives `extract_calls_pub` directly so we exercise the same
/// pipeline `build_graph` uses, just without I/O.
fn parse_c(source: &str) -> Vec<Definition> {
    let lang_config = languages::config_for_extension("c").expect("c lang config");
    let call_config = languages::call_query_for_extension("c").expect("c call config");
    let mut defs = repo_map::extract_definitions_pub(source, &lang_config);
    repo_map::extract_calls_pub(source, &call_config, &mut defs);
    defs
}

/// Helper: assert the enclosing def `def_name` has a call to `target` (by
/// bare name, not qualified path). Dumps the full def list on failure.
fn assert_call_edge(defs: &[Definition], def_name: &str, target: &str) {
    let def = defs
        .iter()
        .find(|d| d.name == def_name)
        .unwrap_or_else(|| panic!("def {def_name} missing; defs: {:?}", names(defs)));
    let call_names: Vec<&str> = def.calls.iter().map(|c| c.name.as_str()).collect();
    assert!(
        def.calls.iter().any(|c| c.name == target),
        "expected def {def_name} to call {target}; actual call edges = {call_names:?}",
    );
}

fn names(defs: &[Definition]) -> Vec<&str> {
    defs.iter().map(|d| d.name.as_str()).collect()
}

#[test]
fn c_struct_init_designated_call_edge() {
    // The minimal Linux-style fops pattern: a static const struct
    // initialized with .field = funcname designated initializers.
    // Each fnptr-field's right-hand-side identifier must produce an edge
    // from the enclosing declaration (`my_fops`) to the target function.
    let source = r#"
static int my_read(void) { return 1; }
static int my_write(void) { return 2; }

static const struct file_operations my_fops = {
    .read = my_read,
    .write = my_write,
};
"#;
    let defs = parse_c(source);
    assert_call_edge(&defs, "my_fops", "my_read");
    assert_call_edge(&defs, "my_fops", "my_write");
}

#[test]
fn c_struct_init_positional_call_edge() {
    // The minimal redis-style command table: an array of struct literals
    // initialized with positional initializers. Each {name, fnptr, arity}
    // tuple's second slot is a bare function identifier.
    let source = r#"
int getCommand(int x) { return x; }
int setCommand(int x) { return x; }
int delCommand(int x) { return x; }

struct redisCommand cmds[] = {
    {"get", getCommand, 2},
    {"set", setCommand, -3},
    {"del", delCommand, -2},
};
"#;
    let defs = parse_c(source);
    assert_call_edge(&defs, "cmds", "getCommand");
    assert_call_edge(&defs, "cmds", "setCommand");
    assert_call_edge(&defs, "cmds", "delCommand");
}

#[test]
fn c_struct_init_nested_call_edge() {
    // Nested array-of-struct-of-fnptr: each inner struct has both
    // positional and designated initializers. The outer declaration must
    // collect edges to every inner function identifier regardless of which
    // syntactic form it appears in.
    let source = r#"
int op_alpha(int x) { return x + 1; }
int op_beta(int x) { return x + 2; }
int op_gamma(int x) { return x + 3; }
int op_delta(int x) { return x + 4; }

struct dispatch_entry table[] = {
    { .name = "alpha", .fn = op_alpha },
    { .name = "beta",  .fn = op_beta },
    { "gamma", op_gamma, 0 },
    { "delta", op_delta, 0 },
};
"#;
    let defs = parse_c(source);
    for target in ["op_alpha", "op_beta", "op_gamma", "op_delta"] {
        assert_call_edge(&defs, "table", target);
    }
}

#[test]
fn c_struct_init_real_world_redis() {
    // Minimized redisCommandTable shape from redis src/commands.c. The
    // table is an array of struct literals where the second positional
    // slot is the command implementation fnptr.
    //
    // Pre-I#55: getCommand/setCommand/delCommand were unreachable from any
    // entry point — they accumulated in the largest dead cluster.
    let source = r#"
typedef int redisCommandProc(int *c);

struct redisCommand {
    const char *name;
    redisCommandProc *proc;
    int arity;
    const char *sflags;
};

int getCommand(int *c) { return 0; }
int setCommand(int *c) { return 0; }
int delCommand(int *c) { return 0; }
int existsCommand(int *c) { return 0; }
int incrCommand(int *c) { return 0; }

struct redisCommand redisCommandTable[] = {
    {"get",    getCommand,    2, "rF"},
    {"set",    setCommand,   -3, "wm"},
    {"del",    delCommand,   -2, "w"},
    {"exists", existsCommand,-2, "rF"},
    {"incr",   incrCommand,   2, "wmF"},
};
"#;
    let defs = parse_c(source);
    for target in [
        "getCommand",
        "setCommand",
        "delCommand",
        "existsCommand",
        "incrCommand",
    ] {
        assert_call_edge(&defs, "redisCommandTable", target);
    }
}

#[test]
fn c_struct_init_real_world_linux_fops() {
    // Minimized Linux file_operations struct — the universal driver
    // dispatch pattern. Each .field = funcname designated initializer is a
    // call-graph edge.
    //
    // Pre-I#55: usb_fops/inode_fops/etc-style fnptrs were unreachable from
    // any entry point — the kernel-mega-cluster pathology.
    let source = r#"
struct file_operations {
    int (*open)(struct inode *, struct file *);
    int (*release)(struct inode *, struct file *);
    long (*read)(struct file *, char *, long);
    long (*write)(struct file *, const char *, long);
    long (*ioctl)(struct file *, unsigned int, unsigned long);
};

static int my_open(struct inode *i, struct file *f) { return 0; }
static int my_release(struct inode *i, struct file *f) { return 0; }
static long my_read(struct file *f, char *b, long n) { return n; }
static long my_write(struct file *f, const char *b, long n) { return n; }
static long my_ioctl(struct file *f, unsigned int c, unsigned long a) { return 0; }

static const struct file_operations my_fops = {
    .open    = my_open,
    .release = my_release,
    .read    = my_read,
    .write   = my_write,
    .ioctl   = my_ioctl,
};
"#;
    let defs = parse_c(source);
    for target in ["my_open", "my_release", "my_read", "my_write", "my_ioctl"] {
        assert_call_edge(&defs, "my_fops", target);
    }
}