use crate::reference_miner::CodeReference;
use car_ast::parse_file;
use car_memgine::{
ContentType, FactMetadata, MemKind, MemNode, MemoryGraph, NodeIndex,
};
#[cfg(test)]
use car_memgine::Partition;
use chrono::Utc;
#[derive(Debug, Default, Clone)]
pub struct IndexReport {
pub references_seen: usize,
pub references_parsed: usize,
pub symbols_inserted: usize,
pub node_ids: Vec<NodeIndex>,
pub failures: Vec<(String, String)>,
}
pub struct Indexer;
impl Indexer {
pub fn new() -> Self {
Self
}
pub fn ingest(
&self,
graph: &mut MemoryGraph,
references: &[CodeReference],
) -> IndexReport {
let mut report = IndexReport::default();
report.references_seen = references.len();
for r in references {
let parsed = match parse_file(&r.snippet, &r.path) {
Some(p) => p,
None => {
let nix = graph.insert_foreign(
r.repo.clone(),
r.commit.clone(),
snippet_node(r),
);
report.node_ids.push(nix);
report.symbols_inserted += 1;
report.failures.push((r.path.clone(), "unsupported language".into()));
continue;
}
};
report.references_parsed += 1;
for sym in parsed.all_symbols() {
let node = MemNode {
kind: MemKind::CodeSymbol,
layer: 2,
key: format!("{}::{}", r.path, sym.name),
value: if sym.signature.is_empty() { sym.name.clone() } else { sym.signature.clone() },
fact_id: Some(format!(
"foreign::{}::{}::{}::{}",
r.repo, r.commit, r.path, sym.name
)),
scope: foreign_scope(&r.repo, &r.commit),
authority: license_authority(&r.license),
is_constraint: false,
created_at: Utc::now(),
expires_at: None,
content_type: ContentType::Code(car_memgine::CodeLanguage::Unknown),
metadata: FactMetadata::default(),
};
let nix = graph.insert_foreign(r.repo.clone(), r.commit.clone(), node);
report.node_ids.push(nix);
report.symbols_inserted += 1;
}
}
report
}
}
impl Default for Indexer {
fn default() -> Self {
Self::new()
}
}
fn snippet_node(r: &CodeReference) -> MemNode {
MemNode {
kind: MemKind::Fact,
layer: 2,
key: r.path.clone(),
value: r.snippet.clone(),
fact_id: Some(format!("foreign::{}::{}::{}", r.repo, r.commit, r.path)),
scope: foreign_scope(&r.repo, &r.commit),
authority: license_authority(&r.license),
is_constraint: false,
created_at: Utc::now(),
expires_at: None,
content_type: ContentType::Code(car_memgine::CodeLanguage::Unknown),
metadata: FactMetadata::default(),
}
}
fn foreign_scope(repo: &str, commit: &str) -> String {
format!("foreign:{repo}@{commit}")
}
fn license_authority(license: &Option<String>) -> String {
match license {
Some(spdx) => format!("external:{spdx}"),
None => "external:unknown-license".into(),
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::reference_miner::CodeReference;
fn rust_ref() -> CodeReference {
CodeReference {
repo: "github.com/acme/widget".into(),
commit: "abc123".into(),
path: "src/lib.rs".into(),
snippet: "pub fn hello() -> &'static str { \"hi\" }\npub struct Greeter;"
.into(),
score: 0.9,
license: Some("MIT".into()),
why_relevant: "matches greeting query".into(),
}
}
#[test]
fn ingests_symbols_into_foreign_partition() {
let mut g = MemoryGraph::new();
let report = Indexer::new().ingest(&mut g, &[rust_ref()]);
assert_eq!(report.references_seen, 1);
assert_eq!(report.references_parsed, 1);
assert!(report.symbols_inserted >= 2, "expected anchor + symbols");
for nix in &report.node_ids {
match g.partition_of(*nix) {
Partition::Foreign { source_repo, .. } => {
assert_eq!(source_repo, "github.com/acme/widget");
}
_ => panic!("indexer wrote outside Foreign partition"),
}
}
}
#[test]
fn ingestion_is_idempotent() {
let mut g = MemoryGraph::new();
let r = rust_ref();
let first = Indexer::new().ingest(&mut g, &[r.clone()]);
let node_count_after_first = g.inner.node_count();
let second = Indexer::new().ingest(&mut g, &[r]);
assert_eq!(g.inner.node_count(), node_count_after_first,
"re-ingesting the same reference must not grow the graph");
assert_eq!(first.node_ids.len(), second.node_ids.len());
}
#[test]
fn unsupported_language_stores_raw_snippet() {
let mut g = MemoryGraph::new();
let r = CodeReference {
path: "doc/notes.xyz".into(),
snippet: "some notes in an unknown format".into(),
..rust_ref()
};
let report = Indexer::new().ingest(&mut g, &[r]);
assert_eq!(report.references_parsed, 0);
assert_eq!(report.symbols_inserted, 1);
assert_eq!(report.failures.len(), 1);
}
}