use std::collections::HashMap;
use std::path::Path;
use ucm_graph_core::edge::*;
use ucm_graph_core::entity::*;
use ucm_graph_core::event::*;
pub type RustCrateMap = HashMap<String, String>;
pub fn parse_source_code(file_path: &str, source: &str, language: &str) -> Vec<UcmEvent> {
parse_source_code_with_context(file_path, source, language, &HashMap::new())
}
pub fn parse_source_code_with_context(
file_path: &str,
source: &str,
language: &str,
crate_map: &RustCrateMap,
) -> Vec<UcmEvent> {
let mut events = Vec::new();
let module_id = EntityId::local(file_path, "module");
events.push(UcmEvent::new(EventPayload::EntityDiscovered {
entity_id: module_id.clone(),
kind: EntityKind::Module {
language: language.to_string(),
exports: vec![],
},
name: file_name_of(file_path),
file_path: file_path.to_string(),
language: language.to_string(),
source: DiscoverySource::StaticAnalysis,
line_range: None,
}));
let functions = match language {
"rust" | "rs" => extract_functions_rust(source),
"python" | "py" => extract_functions_python(source),
_ => extract_functions_ts(source),
};
for (name, is_async, line_start, line_end) in functions {
let fn_id = EntityId::local(file_path, &name);
events.push(UcmEvent::new(EventPayload::EntityDiscovered {
entity_id: fn_id.clone(),
kind: EntityKind::Function {
is_async,
parameter_count: 0,
return_type: None,
},
name: name.clone(),
file_path: file_path.to_string(),
language: language.to_string(),
source: DiscoverySource::StaticAnalysis,
line_range: Some((line_start, line_end)),
}));
events.push(UcmEvent::new(EventPayload::DependencyLinked {
source_entity: fn_id,
target_entity: module_id.clone(),
relation_type: RelationType::DependsOn,
confidence: 0.99,
source: DiscoverySource::StaticAnalysis,
description: format!("{name} is defined in {file_path}"),
}));
}
let structs = match language {
"rust" | "rs" => extract_structs_rust(source),
"python" | "py" => extract_classes_python(source),
_ => extract_classes_ts(source),
};
for (name, line_num) in structs {
let struct_id = EntityId::local(file_path, &name);
events.push(UcmEvent::new(EventPayload::EntityDiscovered {
entity_id: struct_id.clone(),
kind: EntityKind::DataModel { fields: vec![] },
name: name.clone(),
file_path: file_path.to_string(),
language: language.to_string(),
source: DiscoverySource::StaticAnalysis,
line_range: Some((line_num, line_num + 5)),
}));
events.push(UcmEvent::new(EventPayload::DependencyLinked {
source_entity: struct_id,
target_entity: module_id.clone(),
relation_type: RelationType::DependsOn,
confidence: 0.99,
source: DiscoverySource::StaticAnalysis,
description: format!("{name} is defined in {file_path}"),
}));
}
if matches!(language, "typescript" | "javascript" | "ts" | "js") {
for (method, route, _handler, line_num) in extract_routes_ts(source) {
let route_id = EntityId::local(file_path, &format!("{method}:{route}"));
events.push(UcmEvent::new(EventPayload::EntityDiscovered {
entity_id: route_id.clone(),
kind: EntityKind::ApiEndpoint {
method: method.clone(),
route: route.clone(),
handler: String::new(),
},
name: format!("{method} {route}"),
file_path: file_path.to_string(),
language: language.to_string(),
source: DiscoverySource::StaticAnalysis,
line_range: Some((line_num, line_num)),
}));
events.push(UcmEvent::new(EventPayload::DependencyLinked {
source_entity: route_id,
target_entity: module_id.clone(),
relation_type: RelationType::DependsOn,
confidence: 0.99,
source: DiscoverySource::StaticAnalysis,
description: format!("{method} {route} is defined in {file_path}"),
}));
}
}
let imports = match language {
"rust" | "rs" => extract_imports_rust(source, file_path, crate_map),
"python" | "py" => extract_imports_python(source, file_path),
_ => extract_imports_ts(source, file_path),
};
for (symbols, from_path, line_num) in imports {
for symbol in &symbols {
events.push(UcmEvent::new(EventPayload::DependencyLinked {
source_entity: module_id.clone(),
target_entity: EntityId::local(&from_path, symbol),
relation_type: RelationType::Imports,
confidence: 0.95,
source: DiscoverySource::StaticAnalysis,
description: format!("import {symbol} from '{from_path}' at line {line_num}"),
}));
}
}
events
}
fn extract_functions_ts(source: &str) -> Vec<(String, bool, usize, usize)> {
let mut out = Vec::new();
for (i, line) in source.lines().enumerate() {
let t = line.trim();
let is_async = t.contains("async");
if let Some(name) = ts_function_name(t) {
out.push((name, is_async, i + 1, i + 20));
}
}
out
}
fn ts_function_name(line: &str) -> Option<String> {
for pat in &["function ", "async function "] {
if let Some(pos) = line.find(pat) {
let after = &line[pos + pat.len()..];
let name: String = after
.chars()
.take_while(|c| c.is_alphanumeric() || *c == '_')
.collect();
if !name.is_empty() {
return Some(name);
}
}
}
if line.starts_with("const ") || line.starts_with("export const ") {
let rest = line
.strip_prefix("export const ")
.unwrap_or_else(|| line.strip_prefix("const ").unwrap_or(line));
if let Some(eq) = rest.find('=') {
let name: String = rest[..eq]
.trim()
.chars()
.take_while(|c| c.is_alphanumeric() || *c == '_')
.collect();
let after_eq = &rest[eq..];
if !name.is_empty() && (after_eq.contains('(') || after_eq.contains("=>")) {
return Some(name);
}
}
}
None
}
fn extract_classes_ts(source: &str) -> Vec<(String, usize)> {
let mut out = Vec::new();
for (i, line) in source.lines().enumerate() {
let t = line.trim();
if t.contains("class ") && t.contains('{') {
if let Some(after) = t.split("class ").nth(1) {
let name: String = after
.chars()
.take_while(|c| c.is_alphanumeric() || *c == '_')
.collect();
if !name.is_empty() {
out.push((name, i + 1));
}
}
}
}
out
}
fn extract_routes_ts(source: &str) -> Vec<(String, String, String, usize)> {
let mut out = Vec::new();
for (i, line) in source.lines().enumerate() {
let t = line.trim();
for method in &["get", "post", "put", "delete", "patch"] {
for prefix in &[format!("app.{method}("), format!("router.{method}(")] {
if t.contains(prefix.as_str()) {
if let Some(route) = ts_route_path(t) {
out.push((method.to_uppercase(), route, String::new(), i + 1));
}
}
}
}
}
out
}
fn ts_route_path(line: &str) -> Option<String> {
let after = line.split('(').nth(1)?;
let q = if after.contains('\'') { '\'' } else { '"' };
let parts: Vec<&str> = after.split(q).collect();
if parts.len() >= 2 {
Some(parts[1].to_string())
} else {
None
}
}
fn extract_imports_ts(source: &str, current_file: &str) -> Vec<(Vec<String>, String, usize)> {
let mut out = Vec::new();
let dir = parent_dir(current_file);
for (i, line) in source.lines().enumerate() {
let t = line.trim();
if t.starts_with("import ") && t.contains("from ") {
let symbols = ts_import_symbols(t);
if let Some(raw_path) = ts_import_path(t) {
if raw_path.starts_with("./") || raw_path.starts_with("../") {
let resolved = resolve_path(&dir, &raw_path, &["ts", "tsx", "js"]);
if !symbols.is_empty() {
out.push((symbols, resolved, i + 1));
}
}
}
}
}
out
}
fn ts_import_symbols(line: &str) -> Vec<String> {
if let (Some(s), Some(e)) = (line.find('{'), line.find('}')) {
return line[s + 1..e]
.split(',')
.map(|s| {
s.trim()
.split(" as ")
.next()
.unwrap_or("")
.trim()
.to_string()
})
.filter(|s| !s.is_empty())
.collect();
}
let after = line.strip_prefix("import ").unwrap_or("");
let name: String = after
.chars()
.take_while(|c| c.is_alphanumeric() || *c == '_')
.collect();
if !name.is_empty() && name != "type" {
vec![name]
} else {
vec![]
}
}
fn ts_import_path(line: &str) -> Option<String> {
let after = line.split("from ").nth(1)?;
let q = if after.contains('\'') { '\'' } else { '"' };
let parts: Vec<&str> = after.split(q).collect();
if parts.len() >= 2 {
Some(parts[1].to_string())
} else {
None
}
}
fn extract_functions_rust(source: &str) -> Vec<(String, bool, usize, usize)> {
let mut out = Vec::new();
for (i, line) in source.lines().enumerate() {
let t = line.trim();
if t.starts_with("//") || t.starts_with("#[test") {
continue;
}
if let Some(name) = rust_fn_name(t) {
let is_async = t.contains("async ");
out.push((name, is_async, i + 1, i + 30));
}
}
out
}
fn rust_fn_name(line: &str) -> Option<String> {
let stripped = line
.trim_start_matches("pub(crate) ")
.trim_start_matches("pub(super) ")
.trim_start_matches("pub ")
.trim_start_matches("async ")
.trim_start_matches("unsafe ")
.trim_start_matches("extern \"C\" ");
if let Some(rest) = stripped.strip_prefix("fn ") {
let name: String = rest
.chars()
.take_while(|c| c.is_alphanumeric() || *c == '_')
.collect();
if !name.is_empty() {
return Some(name);
}
}
None
}
fn extract_structs_rust(source: &str) -> Vec<(String, usize)> {
let mut out = Vec::new();
for (i, line) in source.lines().enumerate() {
let t = line.trim();
let stripped = t
.trim_start_matches("pub(crate) ")
.trim_start_matches("pub ");
if let Some(rest) = stripped.strip_prefix("struct ") {
let name: String = rest
.chars()
.take_while(|c| c.is_alphanumeric() || *c == '_')
.collect();
if !name.is_empty() {
out.push((name, i + 1));
}
} else if let Some(rest) = stripped.strip_prefix("enum ") {
let name: String = rest
.chars()
.take_while(|c| c.is_alphanumeric() || *c == '_')
.collect();
if !name.is_empty() {
out.push((name, i + 1));
}
} else if let Some(rest) = stripped.strip_prefix("trait ") {
let name: String = rest
.chars()
.take_while(|c| c.is_alphanumeric() || *c == '_')
.collect();
if !name.is_empty() {
out.push((name, i + 1));
}
}
}
out
}
fn extract_imports_rust(
source: &str,
current_file: &str,
crate_map: &RustCrateMap,
) -> Vec<(Vec<String>, String, usize)> {
let mut out = Vec::new();
let crate_src_root = infer_crate_src_root(current_file);
let file_in_crate = current_file
.strip_prefix(&format!("{crate_src_root}/"))
.unwrap_or(current_file);
for (i, line) in source.lines().enumerate() {
let t = line.trim();
if !t.starts_with("use ") {
continue;
}
let rest = &t[4..];
let (target_src_root, rest_after_prefix) = if let Some(r) = rest.strip_prefix("crate::") {
(crate_src_root.clone(), r)
} else if let Some(r) = rest.strip_prefix("super::") {
let parent = rust_parent_module_dir(&crate_src_root, file_in_crate);
(parent, r)
} else if let Some(r) = rest.strip_prefix("self::") {
let current_dir = rust_current_module_dir(&crate_src_root, file_in_crate);
(current_dir, r)
} else {
let first_segment = rest.split("::").next().unwrap_or("");
if let Some(sibling_root) = crate_map.get(first_segment) {
let after = rest
.strip_prefix(first_segment)
.and_then(|s| s.strip_prefix("::"))
.unwrap_or("");
(sibling_root.clone(), after)
} else {
continue;
}
};
let cleaned = rest_after_prefix.trim_end_matches(';');
let (module_segments, symbols) = if cleaned.contains('{') {
let brace_start = cleaned.find('{').unwrap_or(cleaned.len());
let prefix = cleaned[..brace_start].trim_end_matches("::");
let inner = cleaned
.get(brace_start + 1..)
.and_then(|s| s.split('}').next())
.unwrap_or("");
let syms: Vec<String> = inner
.split(',')
.map(|s| {
s.trim()
.split(" as ")
.next()
.unwrap_or("")
.trim()
.to_string()
})
.filter(|s| !s.is_empty() && s != "*")
.collect();
(prefix.to_string(), syms)
} else {
let parts: Vec<&str> = cleaned.split("::").collect();
if parts.len() < 2 {
if parts.len() == 1 && !parts[0].is_empty() && parts[0] != "*" {
let mod_name = parts[0].to_string();
let file_path = format!("{target_src_root}/{mod_name}.rs");
out.push((vec!["module".to_string()], file_path, i + 1));
}
continue;
}
let symbol = parts.last().unwrap().to_string();
if symbol == "*" {
continue;
}
let mod_parts = &parts[..parts.len() - 1];
(mod_parts.join("::"), vec![symbol])
};
if symbols.is_empty() {
continue;
}
let module_file_path = if module_segments.is_empty() {
format!("{target_src_root}/lib.rs")
} else {
format!(
"{target_src_root}/{}.rs",
module_segments.replace("::", "/")
)
};
out.push((symbols, module_file_path, i + 1));
}
out
}
fn infer_crate_src_root(file_path: &str) -> String {
if let Some(pos) = file_path.rfind("/src/") {
file_path[..pos + 4].to_string()
} else if file_path.starts_with("src/") {
"src".to_string()
} else {
parent_dir(file_path)
}
}
fn rust_parent_module_dir(crate_src_root: &str, file_in_crate: &str) -> String {
let dir = parent_dir(file_in_crate);
if dir.is_empty() {
crate_src_root.to_string()
} else {
let parent = parent_dir(&dir);
if parent.is_empty() {
crate_src_root.to_string()
} else {
format!("{crate_src_root}/{parent}")
}
}
}
fn rust_current_module_dir(crate_src_root: &str, file_in_crate: &str) -> String {
let dir = parent_dir(file_in_crate);
if dir.is_empty() {
crate_src_root.to_string()
} else {
format!("{crate_src_root}/{dir}")
}
}
fn extract_functions_python(source: &str) -> Vec<(String, bool, usize, usize)> {
let mut out = Vec::new();
for (i, line) in source.lines().enumerate() {
let t = line.trim();
if let Some(rest) = t
.strip_prefix("async def ")
.or_else(|| t.strip_prefix("def "))
{
let is_async = t.starts_with("async ");
let name: String = rest
.chars()
.take_while(|c| c.is_alphanumeric() || *c == '_')
.collect();
if !name.is_empty() {
out.push((name, is_async, i + 1, i + 20));
}
}
}
out
}
fn extract_classes_python(source: &str) -> Vec<(String, usize)> {
let mut out = Vec::new();
for (i, line) in source.lines().enumerate() {
let t = line.trim();
if let Some(rest) = t.strip_prefix("class ") {
let name: String = rest
.chars()
.take_while(|c| c.is_alphanumeric() || *c == '_')
.collect();
if !name.is_empty() {
out.push((name, i + 1));
}
}
}
out
}
fn extract_imports_python(source: &str, current_file: &str) -> Vec<(Vec<String>, String, usize)> {
let mut out = Vec::new();
let dir = parent_dir(current_file);
for (i, line) in source.lines().enumerate() {
let t = line.trim();
if let Some(rest) = t.strip_prefix("from .") {
if let Some(imp_pos) = rest.find(" import ") {
let mod_part = &rest[..imp_pos];
let imp_part = &rest[imp_pos + 8..];
let symbols: Vec<String> = imp_part
.split(',')
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty() && s != "*")
.collect();
let path = format!("{dir}/{}.py", mod_part.trim_start_matches('.'));
if !symbols.is_empty() {
out.push((symbols, path, i + 1));
}
}
}
}
out
}
fn parent_dir(file_path: &str) -> String {
Path::new(file_path)
.parent()
.map(|p| p.to_string_lossy().to_string())
.unwrap_or_default()
}
fn file_name_of(file_path: &str) -> String {
Path::new(file_path)
.file_name()
.map(|n| n.to_string_lossy().to_string())
.unwrap_or_else(|| file_path.to_string())
}
fn resolve_path(dir: &str, raw: &str, _extensions: &[&str]) -> String {
use std::path::{Component, PathBuf};
let base = if dir.is_empty() {
PathBuf::from(".")
} else {
PathBuf::from(dir)
};
let joined = base.join(raw);
let mut parts: Vec<std::ffi::OsString> = Vec::new();
for comp in joined.components() {
match comp {
Component::ParentDir => {
parts.pop();
}
Component::CurDir => {}
Component::RootDir => {} other => parts.push(other.as_os_str().to_owned()),
}
}
let normalized: PathBuf = parts.iter().collect();
let s = normalized.to_string_lossy();
if Path::new(s.as_ref()).extension().is_none() {
format!("{s}.ts")
} else {
s.to_string()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_typescript_emits_module_entity() {
let source = r#"
import { DatabaseClient } from './db/client';
export async function validateToken(token: string): Promise<boolean> {
return true;
}
"#;
let events = parse_source_code("src/auth/service.ts", source, "typescript");
let entity_events: Vec<_> = events
.iter()
.filter(|e| matches!(&e.payload, EventPayload::EntityDiscovered { .. }))
.collect();
assert!(
entity_events.len() >= 2,
"Expected module + function entities"
);
let dep_events: Vec<_> = events
.iter()
.filter(|e| matches!(&e.payload, EventPayload::DependencyLinked { .. }))
.collect();
assert!(
dep_events.len() >= 2,
"Expected function→module + module→import edges"
);
}
#[test]
fn test_module_entity_is_discovered_before_import_edges() {
let source = "import { Foo } from './foo';\nfunction bar() {}";
let events = parse_source_code("src/main.ts", source, "typescript");
let first_entity = events
.iter()
.position(|e| matches!(&e.payload, EventPayload::EntityDiscovered { .. }));
let first_dep = events
.iter()
.position(|e| matches!(&e.payload, EventPayload::DependencyLinked { .. }));
assert!(
first_entity < first_dep,
"EntityDiscovered must precede DependencyLinked"
);
}
#[test]
fn test_parse_rust_functions_and_structs() {
let source = r#"
use crate::graph::UcmGraph;
pub struct GraphProjection;
impl GraphProjection {
pub fn replay_all(events: &[UcmEvent]) -> UcmGraph {
UcmGraph::new()
}
pub async fn apply_event(graph: &mut UcmGraph, event: &UcmEvent) {}
}
"#;
let events = parse_source_code("src/projection.rs", source, "rust");
let entities: Vec<_> = events
.iter()
.filter(|e| {
matches!(
&e.payload,
EventPayload::EntityDiscovered {
kind: EntityKind::Function { .. },
..
}
)
})
.collect();
assert!(
entities.len() >= 2,
"Should find replay_all and apply_event"
);
let structs: Vec<_> = events
.iter()
.filter(|e| {
matches!(
&e.payload,
EventPayload::EntityDiscovered {
kind: EntityKind::DataModel { .. },
..
}
)
})
.collect();
assert!(!structs.is_empty(), "Should find GraphProjection struct");
}
#[test]
fn test_parse_rust_imports() {
let source = r#"
use crate::entity::EntityId;
use crate::graph::UcmGraph;
use std::collections::HashMap;
"#;
let empty_map = RustCrateMap::new();
let imports = extract_imports_rust(source, "ucm-core/src/main.rs", &empty_map);
assert_eq!(imports.len(), 2, "Should find 2 crate imports, skip std");
assert!(imports
.iter()
.any(|(syms, _, _)| syms.contains(&"EntityId".to_string())));
assert!(imports
.iter()
.any(|(syms, _, _)| syms.contains(&"UcmGraph".to_string())));
let entity_import = imports
.iter()
.find(|(s, _, _)| s.contains(&"EntityId".to_string()))
.unwrap();
assert_eq!(
entity_import.1, "ucm-core/src/entity.rs",
"crate::entity::EntityId should resolve to ucm-core/src/entity.rs"
);
let graph_import = imports
.iter()
.find(|(s, _, _)| s.contains(&"UcmGraph".to_string()))
.unwrap();
assert_eq!(
graph_import.1, "ucm-core/src/graph.rs",
"crate::graph::UcmGraph should resolve to ucm-core/src/graph.rs"
);
}
#[test]
fn test_rust_cross_crate_imports() {
let source = r#"
use ucm_graph_core::graph::UcmGraph;
use ucm_graph_core::entity::{EntityId, EntityKind};
use ucm_ingest::code_parser;
use serde::Serialize;
"#;
let mut crate_map = RustCrateMap::new();
crate_map.insert("ucm_graph_core".to_string(), "ucm-core/src".to_string());
crate_map.insert("ucm_ingest".to_string(), "ucm-ingest/src".to_string());
let imports = extract_imports_rust(source, "ucm-api/src/main.rs", &crate_map);
assert_eq!(
imports.len(),
3,
"Should find 3 sibling crate imports, skip serde: got {imports:?}"
);
let graph_import = imports
.iter()
.find(|(s, _, _)| s.contains(&"UcmGraph".to_string()))
.unwrap();
assert_eq!(graph_import.1, "ucm-core/src/graph.rs");
let entity_import = imports
.iter()
.find(|(s, _, _)| s.contains(&"EntityId".to_string()))
.unwrap();
assert_eq!(entity_import.1, "ucm-core/src/entity.rs");
assert!(
entity_import.0.contains(&"EntityKind".to_string()),
"Should import both EntityId and EntityKind"
);
let parser_import = imports
.iter()
.find(|(_, path, _)| path.contains("ucm-ingest"))
.unwrap();
assert_eq!(parser_import.1, "ucm-ingest/src/code_parser.rs");
}
#[test]
fn test_parse_api_routes() {
let source = r#"
app.get('/api/v1/users', getUsers);
app.post('/api/v1/auth/login', handleLogin);
"#;
let events = parse_source_code("src/routes.ts", source, "typescript");
let routes: Vec<_> = events
.iter()
.filter(|e| {
matches!(
&e.payload,
EventPayload::EntityDiscovered {
kind: EntityKind::ApiEndpoint { .. },
..
}
)
})
.collect();
assert_eq!(routes.len(), 2);
}
#[test]
fn test_resolve_path_parent_traversal() {
let result = resolve_path("fraud", "../pipeline/rag-pipeline", &["ts"]);
assert_eq!(result, "pipeline/rag-pipeline.ts");
let result2 = resolve_path("src/fraud", "../pipeline/rag", &["ts"]);
assert_eq!(result2, "src/pipeline/rag.ts");
let result3 = resolve_path("fraud", "./compliance-checker", &["ts"]);
assert_eq!(result3, "fraud/compliance-checker.ts");
let result4 = resolve_path("", "./embedding-service", &["ts"]);
assert_eq!(result4, "embedding-service.ts");
}
#[test]
fn test_full_graph_has_edges() {
let auth_src = "export async function validateToken() {}";
let mid_src =
"import { validateToken } from './auth';\nexport function authMiddleware() {}";
use ucm_graph_core::graph::UcmGraph;
use ucm_events::projection::GraphProjection;
let mut graph = UcmGraph::new();
for ev in parse_source_code("src/auth.ts", auth_src, "typescript") {
GraphProjection::apply_event(&mut graph, &ev);
}
for ev in parse_source_code("src/middleware.ts", mid_src, "typescript") {
GraphProjection::apply_event(&mut graph, &ev);
}
let stats = graph.stats();
assert!(stats.entity_count >= 2, "Should have entities");
assert!(
stats.edge_count >= 1,
"Should have at least one edge — this was the core bug"
);
}
}