1use std::collections::HashMap;
19use std::path::Path;
20use ucm_graph_core::edge::*;
21use ucm_graph_core::entity::*;
22use ucm_graph_core::event::*;
23
24pub type RustCrateMap = HashMap<String, String>;
28
29pub type PythonPackageRoot = Option<String>;
33
34pub fn parse_source_code(file_path: &str, source: &str, language: &str) -> Vec<UcmEvent> {
44 parse_source_code_with_context(file_path, source, language, &HashMap::new())
45}
46
47pub fn parse_source_code_with_context(
52 file_path: &str,
53 source: &str,
54 language: &str,
55 crate_map: &RustCrateMap,
56) -> Vec<UcmEvent> {
57 parse_source_code_full(file_path, source, language, crate_map, &None)
58}
59
60pub fn parse_source_code_full(
62 file_path: &str,
63 source: &str,
64 language: &str,
65 crate_map: &RustCrateMap,
66 python_package_root: &PythonPackageRoot,
67) -> Vec<UcmEvent> {
68 let mut events = Vec::new();
69
70 let module_id = EntityId::local(file_path, "module");
72 events.push(UcmEvent::new(EventPayload::EntityDiscovered {
73 entity_id: module_id.clone(),
74 kind: EntityKind::Module {
75 language: language.to_string(),
76 exports: vec![],
77 },
78 name: file_name_of(file_path),
79 file_path: file_path.to_string(),
80 language: language.to_string(),
81 source: DiscoverySource::StaticAnalysis,
82 line_range: None,
83 }));
84
85 if matches!(language, "python" | "py") {
88 let py_entities = extract_python_entities(source);
89 for ent in &py_entities {
90 match ent {
91 PythonEntity::Function {
92 name,
93 is_async,
94 line_start,
95 line_end,
96 class_name,
97 } => {
98 let display_name = if let Some(cls) = class_name {
99 format!("{cls}.{name}")
100 } else {
101 name.clone()
102 };
103 let fn_id = EntityId::local(file_path, &display_name);
104 events.push(UcmEvent::new(EventPayload::EntityDiscovered {
105 entity_id: fn_id.clone(),
106 kind: EntityKind::Function {
107 is_async: *is_async,
108 parameter_count: 0,
109 return_type: None,
110 },
111 name: display_name.clone(),
112 file_path: file_path.to_string(),
113 language: language.to_string(),
114 source: DiscoverySource::StaticAnalysis,
115 line_range: Some((*line_start, *line_end)),
116 }));
117 events.push(UcmEvent::new(EventPayload::DependencyLinked {
118 source_entity: fn_id.clone(),
119 target_entity: module_id.clone(),
120 relation_type: RelationType::DependsOn,
121 confidence: 0.99,
122 source: DiscoverySource::StaticAnalysis,
123 description: format!("{display_name} is defined in {file_path}"),
124 }));
125 if let Some(cls) = class_name {
127 let class_id = EntityId::local(file_path, cls);
128 events.push(UcmEvent::new(EventPayload::DependencyLinked {
129 source_entity: class_id,
130 target_entity: fn_id,
131 relation_type: RelationType::Contains,
132 confidence: 0.99,
133 source: DiscoverySource::StaticAnalysis,
134 description: format!("{cls} contains method {name}"),
135 }));
136 }
137 }
138 PythonEntity::Class {
139 name,
140 line_num,
141 bases,
142 } => {
143 let class_id = EntityId::local(file_path, name);
144 events.push(UcmEvent::new(EventPayload::EntityDiscovered {
145 entity_id: class_id.clone(),
146 kind: EntityKind::DataModel { fields: vec![] },
147 name: name.clone(),
148 file_path: file_path.to_string(),
149 language: language.to_string(),
150 source: DiscoverySource::StaticAnalysis,
151 line_range: Some((*line_num, line_num + 5)),
152 }));
153 events.push(UcmEvent::new(EventPayload::DependencyLinked {
154 source_entity: class_id.clone(),
155 target_entity: module_id.clone(),
156 relation_type: RelationType::DependsOn,
157 confidence: 0.99,
158 source: DiscoverySource::StaticAnalysis,
159 description: format!("{name} is defined in {file_path}"),
160 }));
161 for base in bases {
163 let base_id = EntityId::local(file_path, base);
164 events.push(UcmEvent::new(EventPayload::DependencyLinked {
165 source_entity: class_id.clone(),
166 target_entity: base_id,
167 relation_type: RelationType::Extends,
168 confidence: 0.90,
169 source: DiscoverySource::StaticAnalysis,
170 description: format!("{name} extends {base}"),
171 }));
172 }
173 }
174 }
175 }
176 } else {
177 let functions = match language {
178 "rust" | "rs" => extract_functions_rust(source),
179 _ => extract_functions_ts(source),
180 };
181
182 for (name, is_async, line_start, line_end) in functions {
183 let fn_id = EntityId::local(file_path, &name);
184 events.push(UcmEvent::new(EventPayload::EntityDiscovered {
185 entity_id: fn_id.clone(),
186 kind: EntityKind::Function {
187 is_async,
188 parameter_count: 0,
189 return_type: None,
190 },
191 name: name.clone(),
192 file_path: file_path.to_string(),
193 language: language.to_string(),
194 source: DiscoverySource::StaticAnalysis,
195 line_range: Some((line_start, line_end)),
196 }));
197 events.push(UcmEvent::new(EventPayload::DependencyLinked {
199 source_entity: fn_id,
200 target_entity: module_id.clone(),
201 relation_type: RelationType::DependsOn,
202 confidence: 0.99,
203 source: DiscoverySource::StaticAnalysis,
204 description: format!("{name} is defined in {file_path}"),
205 }));
206 }
207
208 let structs = match language {
210 "rust" | "rs" => extract_structs_rust(source),
211 _ => extract_classes_ts(source),
212 };
213
214 for (name, line_num) in structs {
215 let struct_id = EntityId::local(file_path, &name);
216 events.push(UcmEvent::new(EventPayload::EntityDiscovered {
217 entity_id: struct_id.clone(),
218 kind: EntityKind::DataModel { fields: vec![] },
219 name: name.clone(),
220 file_path: file_path.to_string(),
221 language: language.to_string(),
222 source: DiscoverySource::StaticAnalysis,
223 line_range: Some((line_num, line_num + 5)),
224 }));
225 events.push(UcmEvent::new(EventPayload::DependencyLinked {
226 source_entity: struct_id,
227 target_entity: module_id.clone(),
228 relation_type: RelationType::DependsOn,
229 confidence: 0.99,
230 source: DiscoverySource::StaticAnalysis,
231 description: format!("{name} is defined in {file_path}"),
232 }));
233 }
234 }
235
236 if matches!(language, "typescript" | "javascript" | "ts" | "js") {
238 for (method, route, _handler, line_num) in extract_routes_ts(source) {
239 let route_id = EntityId::local(file_path, &format!("{method}:{route}"));
240 events.push(UcmEvent::new(EventPayload::EntityDiscovered {
241 entity_id: route_id.clone(),
242 kind: EntityKind::ApiEndpoint {
243 method: method.clone(),
244 route: route.clone(),
245 handler: String::new(),
246 },
247 name: format!("{method} {route}"),
248 file_path: file_path.to_string(),
249 language: language.to_string(),
250 source: DiscoverySource::StaticAnalysis,
251 line_range: Some((line_num, line_num)),
252 }));
253 events.push(UcmEvent::new(EventPayload::DependencyLinked {
254 source_entity: route_id,
255 target_entity: module_id.clone(),
256 relation_type: RelationType::DependsOn,
257 confidence: 0.99,
258 source: DiscoverySource::StaticAnalysis,
259 description: format!("{method} {route} is defined in {file_path}"),
260 }));
261 }
262 }
263
264 let imports = match language {
269 "rust" | "rs" => extract_imports_rust(source, file_path, crate_map),
270 "python" | "py" => extract_imports_python(source, file_path, python_package_root),
271 _ => extract_imports_ts(source, file_path),
272 };
273
274 for (symbols, from_path, line_num) in imports {
275 for symbol in &symbols {
276 events.push(UcmEvent::new(EventPayload::DependencyLinked {
277 source_entity: module_id.clone(),
278 target_entity: EntityId::local(&from_path, symbol),
279 relation_type: RelationType::Imports,
280 confidence: 0.95,
281 source: DiscoverySource::StaticAnalysis,
282 description: format!("import {symbol} from '{from_path}' at line {line_num}"),
283 }));
284 }
285 }
286
287 events
288}
289
290fn extract_functions_ts(source: &str) -> Vec<(String, bool, usize, usize)> {
293 let mut out = Vec::new();
294 for (i, line) in source.lines().enumerate() {
295 let t = line.trim();
296 let is_async = t.contains("async");
297 if let Some(name) = ts_function_name(t) {
298 out.push((name, is_async, i + 1, i + 20));
299 }
300 }
301 out
302}
303
304fn ts_function_name(line: &str) -> Option<String> {
305 for pat in &["function ", "async function "] {
306 if let Some(pos) = line.find(pat) {
307 let after = &line[pos + pat.len()..];
308 let name: String = after
309 .chars()
310 .take_while(|c| c.is_alphanumeric() || *c == '_')
311 .collect();
312 if !name.is_empty() {
313 return Some(name);
314 }
315 }
316 }
317 if line.starts_with("const ") || line.starts_with("export const ") {
319 let rest = line
320 .strip_prefix("export const ")
321 .unwrap_or_else(|| line.strip_prefix("const ").unwrap_or(line));
322 if let Some(eq) = rest.find('=') {
323 let name: String = rest[..eq]
324 .trim()
325 .chars()
326 .take_while(|c| c.is_alphanumeric() || *c == '_')
327 .collect();
328 let after_eq = &rest[eq..];
329 if !name.is_empty() && (after_eq.contains('(') || after_eq.contains("=>")) {
330 return Some(name);
331 }
332 }
333 }
334 None
335}
336
337fn extract_classes_ts(source: &str) -> Vec<(String, usize)> {
338 let mut out = Vec::new();
339 for (i, line) in source.lines().enumerate() {
340 let t = line.trim();
341 if t.contains("class ") && t.contains('{') {
342 if let Some(after) = t.split("class ").nth(1) {
343 let name: String = after
344 .chars()
345 .take_while(|c| c.is_alphanumeric() || *c == '_')
346 .collect();
347 if !name.is_empty() {
348 out.push((name, i + 1));
349 }
350 }
351 }
352 }
353 out
354}
355
356fn extract_routes_ts(source: &str) -> Vec<(String, String, String, usize)> {
357 let mut out = Vec::new();
358 for (i, line) in source.lines().enumerate() {
359 let t = line.trim();
360 for method in &["get", "post", "put", "delete", "patch"] {
361 for prefix in &[format!("app.{method}("), format!("router.{method}(")] {
362 if t.contains(prefix.as_str()) {
363 if let Some(route) = ts_route_path(t) {
364 out.push((method.to_uppercase(), route, String::new(), i + 1));
365 }
366 }
367 }
368 }
369 }
370 out
371}
372
373fn ts_route_path(line: &str) -> Option<String> {
374 let after = line.split('(').nth(1)?;
375 let q = if after.contains('\'') { '\'' } else { '"' };
376 let parts: Vec<&str> = after.split(q).collect();
377 if parts.len() >= 2 {
378 Some(parts[1].to_string())
379 } else {
380 None
381 }
382}
383
384fn extract_imports_ts(source: &str, current_file: &str) -> Vec<(Vec<String>, String, usize)> {
386 let mut out = Vec::new();
387 let dir = parent_dir(current_file);
388 for (i, line) in source.lines().enumerate() {
389 let t = line.trim();
390 if t.starts_with("import ") && t.contains("from ") {
391 let symbols = ts_import_symbols(t);
392 if let Some(raw_path) = ts_import_path(t) {
393 if raw_path.starts_with("./") || raw_path.starts_with("../") {
395 let resolved = resolve_path(&dir, &raw_path, &["ts", "tsx", "js"]);
396 if !symbols.is_empty() {
397 out.push((symbols, resolved, i + 1));
398 }
399 }
400 }
401 }
402 }
403 out
404}
405
406fn ts_import_symbols(line: &str) -> Vec<String> {
407 if let (Some(s), Some(e)) = (line.find('{'), line.find('}')) {
408 return line[s + 1..e]
409 .split(',')
410 .map(|s| {
411 s.trim()
412 .split(" as ")
413 .next()
414 .unwrap_or("")
415 .trim()
416 .to_string()
417 })
418 .filter(|s| !s.is_empty())
419 .collect();
420 }
421 let after = line.strip_prefix("import ").unwrap_or("");
423 let name: String = after
424 .chars()
425 .take_while(|c| c.is_alphanumeric() || *c == '_')
426 .collect();
427 if !name.is_empty() && name != "type" {
428 vec![name]
429 } else {
430 vec![]
431 }
432}
433
434fn ts_import_path(line: &str) -> Option<String> {
435 let after = line.split("from ").nth(1)?;
436 let q = if after.contains('\'') { '\'' } else { '"' };
437 let parts: Vec<&str> = after.split(q).collect();
438 if parts.len() >= 2 {
439 Some(parts[1].to_string())
440 } else {
441 None
442 }
443}
444
445fn extract_functions_rust(source: &str) -> Vec<(String, bool, usize, usize)> {
448 let mut out = Vec::new();
449 for (i, line) in source.lines().enumerate() {
450 let t = line.trim();
451 if t.starts_with("//") || t.starts_with("#[test") {
453 continue;
454 }
455 if let Some(name) = rust_fn_name(t) {
456 let is_async = t.contains("async ");
457 out.push((name, is_async, i + 1, i + 30));
458 }
459 }
460 out
461}
462
463fn rust_fn_name(line: &str) -> Option<String> {
464 let stripped = line
466 .trim_start_matches("pub(crate) ")
467 .trim_start_matches("pub(super) ")
468 .trim_start_matches("pub ")
469 .trim_start_matches("async ")
470 .trim_start_matches("unsafe ")
471 .trim_start_matches("extern \"C\" ");
472 if let Some(rest) = stripped.strip_prefix("fn ") {
473 let name: String = rest
474 .chars()
475 .take_while(|c| c.is_alphanumeric() || *c == '_')
476 .collect();
477 if !name.is_empty() {
478 return Some(name);
479 }
480 }
481 None
482}
483
484fn extract_structs_rust(source: &str) -> Vec<(String, usize)> {
485 let mut out = Vec::new();
486 for (i, line) in source.lines().enumerate() {
487 let t = line.trim();
488 let stripped = t
489 .trim_start_matches("pub(crate) ")
490 .trim_start_matches("pub ");
491 if let Some(rest) = stripped.strip_prefix("struct ") {
492 let name: String = rest
493 .chars()
494 .take_while(|c| c.is_alphanumeric() || *c == '_')
495 .collect();
496 if !name.is_empty() {
497 out.push((name, i + 1));
498 }
499 } else if let Some(rest) = stripped.strip_prefix("enum ") {
500 let name: String = rest
501 .chars()
502 .take_while(|c| c.is_alphanumeric() || *c == '_')
503 .collect();
504 if !name.is_empty() {
505 out.push((name, i + 1));
506 }
507 } else if let Some(rest) = stripped.strip_prefix("trait ") {
508 let name: String = rest
509 .chars()
510 .take_while(|c| c.is_alphanumeric() || *c == '_')
511 .collect();
512 if !name.is_empty() {
513 out.push((name, i + 1));
514 }
515 }
516 }
517 out
518}
519
520fn extract_imports_rust(
529 source: &str,
530 current_file: &str,
531 crate_map: &RustCrateMap,
532) -> Vec<(Vec<String>, String, usize)> {
533 let mut out = Vec::new();
534
535 let crate_src_root = infer_crate_src_root(current_file);
539 let file_in_crate = current_file
541 .strip_prefix(&format!("{crate_src_root}/"))
542 .unwrap_or(current_file);
543
544 for (i, line) in source.lines().enumerate() {
545 let t = line.trim();
546 if !t.starts_with("use ") {
547 continue;
548 }
549 let rest = &t[4..]; let (target_src_root, rest_after_prefix) = if let Some(r) = rest.strip_prefix("crate::") {
553 (crate_src_root.clone(), r)
555 } else if let Some(r) = rest.strip_prefix("super::") {
556 let parent = rust_parent_module_dir(&crate_src_root, file_in_crate);
558 (parent, r)
559 } else if let Some(r) = rest.strip_prefix("self::") {
560 let current_dir = rust_current_module_dir(&crate_src_root, file_in_crate);
562 (current_dir, r)
563 } else {
564 let first_segment = rest.split("::").next().unwrap_or("");
567 if let Some(sibling_root) = crate_map.get(first_segment) {
568 let after = rest
569 .strip_prefix(first_segment)
570 .and_then(|s| s.strip_prefix("::"))
571 .unwrap_or("");
572 (sibling_root.clone(), after)
573 } else {
574 continue;
576 }
577 };
578
579 let cleaned = rest_after_prefix.trim_end_matches(';');
581 let (module_segments, symbols) = if cleaned.contains('{') {
582 let brace_start = cleaned.find('{').unwrap_or(cleaned.len());
584 let prefix = cleaned[..brace_start].trim_end_matches("::");
585 let inner = cleaned
586 .get(brace_start + 1..)
587 .and_then(|s| s.split('}').next())
588 .unwrap_or("");
589 let syms: Vec<String> = inner
590 .split(',')
591 .map(|s| {
592 s.trim()
593 .split(" as ")
594 .next()
595 .unwrap_or("")
596 .trim()
597 .to_string()
598 })
599 .filter(|s| !s.is_empty() && s != "*")
600 .collect();
601 (prefix.to_string(), syms)
602 } else {
603 let parts: Vec<&str> = cleaned.split("::").collect();
605 if parts.len() < 2 {
606 if parts.len() == 1 && !parts[0].is_empty() && parts[0] != "*" {
608 let mod_name = parts[0].to_string();
610 let file_path = format!("{target_src_root}/{mod_name}.rs");
611 out.push((vec!["module".to_string()], file_path, i + 1));
612 }
613 continue;
614 }
615 let symbol = parts.last().unwrap().to_string();
616 if symbol == "*" {
617 continue;
618 }
619 let mod_parts = &parts[..parts.len() - 1];
620 (mod_parts.join("::"), vec![symbol])
621 };
622
623 if symbols.is_empty() {
624 continue;
625 }
626
627 let module_file_path = if module_segments.is_empty() {
630 format!("{target_src_root}/lib.rs")
633 } else {
634 format!(
635 "{target_src_root}/{}.rs",
636 module_segments.replace("::", "/")
637 )
638 };
639
640 out.push((symbols, module_file_path, i + 1));
641 }
642 out
643}
644
645fn infer_crate_src_root(file_path: &str) -> String {
650 if let Some(pos) = file_path.rfind("/src/") {
652 file_path[..pos + 4].to_string()
653 } else if file_path.starts_with("src/") {
654 "src".to_string()
655 } else {
656 parent_dir(file_path)
658 }
659}
660
661fn rust_parent_module_dir(crate_src_root: &str, file_in_crate: &str) -> String {
665 let dir = parent_dir(file_in_crate);
666 if dir.is_empty() {
667 crate_src_root.to_string()
669 } else {
670 let parent = parent_dir(&dir);
672 if parent.is_empty() {
673 crate_src_root.to_string()
674 } else {
675 format!("{crate_src_root}/{parent}")
676 }
677 }
678}
679
680fn rust_current_module_dir(crate_src_root: &str, file_in_crate: &str) -> String {
682 let dir = parent_dir(file_in_crate);
683 if dir.is_empty() {
684 crate_src_root.to_string()
685 } else {
686 format!("{crate_src_root}/{dir}")
687 }
688}
689
690#[derive(Debug)]
694enum PythonEntity {
695 Function {
696 name: String,
697 is_async: bool,
698 line_start: usize,
699 line_end: usize,
700 class_name: Option<String>,
702 },
703 Class {
704 name: String,
705 line_num: usize,
706 bases: Vec<String>,
708 },
709}
710
711fn extract_python_entities(source: &str) -> Vec<PythonEntity> {
713 let mut out = Vec::new();
714 let mut current_class: Option<(String, usize)> = None; for (i, line) in source.lines().enumerate() {
717 let trimmed = line.trim();
718 if trimmed.is_empty() || trimmed.starts_with('#') {
719 continue;
720 }
721
722 let indent = line.len() - line.trim_start().len();
723
724 if let Some((_, class_indent)) = ¤t_class {
727 if indent <= *class_indent && !trimmed.is_empty() {
728 current_class = None;
729 }
730 }
731
732 if let Some(rest) = trimmed.strip_prefix("class ") {
734 let name: String = rest
735 .chars()
736 .take_while(|c| c.is_alphanumeric() || *c == '_')
737 .collect();
738 if !name.is_empty() {
739 let bases = parse_python_bases(rest);
740 current_class = Some((name.clone(), indent));
741 out.push(PythonEntity::Class {
742 name,
743 line_num: i + 1,
744 bases,
745 });
746 continue;
747 }
748 }
749
750 if let Some(rest) = trimmed
752 .strip_prefix("async def ")
753 .or_else(|| trimmed.strip_prefix("def "))
754 {
755 let is_async = trimmed.starts_with("async ");
756 let name: String = rest
757 .chars()
758 .take_while(|c| c.is_alphanumeric() || *c == '_')
759 .collect();
760 if !name.is_empty() {
761 let class_name = current_class
762 .as_ref()
763 .filter(|(_, ci)| indent > *ci)
764 .map(|(cn, _)| cn.clone());
765 out.push(PythonEntity::Function {
766 name,
767 is_async,
768 line_start: i + 1,
769 line_end: i + 20,
770 class_name,
771 });
772 }
773 }
774 }
775 out
776}
777
778fn parse_python_bases(after_class_name: &str) -> Vec<String> {
781 if let Some(paren_start) = after_class_name.find('(') {
782 if let Some(paren_end) = after_class_name.find(')') {
783 let inner = &after_class_name[paren_start + 1..paren_end];
784 return inner
785 .split(',')
786 .map(|s| {
787 let s = s.trim();
789 if s.contains('=') {
790 return String::new();
791 }
792 s.rsplit('.').next().unwrap_or("").trim().to_string()
794 })
795 .filter(|s| !s.is_empty())
796 .collect();
797 }
798 }
799 vec![]
800}
801
802fn extract_imports_python(
804 source: &str,
805 current_file: &str,
806 package_root: &PythonPackageRoot,
807) -> Vec<(Vec<String>, String, usize)> {
808 let mut out = Vec::new();
809 let dir = parent_dir(current_file);
810
811 for (i, line) in source.lines().enumerate() {
812 let t = line.trim();
813
814 if let Some(rest) = t.strip_prefix("from .") {
816 if let Some(imp_pos) = rest.find(" import ") {
817 let mod_part = &rest[..imp_pos];
818 let imp_part = &rest[imp_pos + 8..];
819 let symbols = parse_python_import_symbols(imp_part);
820 let dots = mod_part.chars().take_while(|c| *c == '.').count();
822 let module_name = mod_part.trim_start_matches('.');
823 let mut base = dir.clone();
824 for _ in 0..dots {
825 base = parent_dir(&base);
826 }
827 let path = if module_name.is_empty() {
828 format!("{base}/__init__.py")
829 } else {
830 format!("{base}/{}.py", module_name.replace('.', "/"))
831 };
832 if !symbols.is_empty() {
833 out.push((symbols, path, i + 1));
834 }
835 }
836 continue;
837 }
838
839 if let Some(rest) = t.strip_prefix("from ") {
841 if let Some(imp_pos) = rest.find(" import ") {
842 let mod_part = &rest[..imp_pos];
843 let imp_part = &rest[imp_pos + 8..];
844
845 if let Some(pkg) = package_root {
847 if mod_part.starts_with(pkg.as_str()) {
848 let symbols = parse_python_import_symbols(imp_part);
849 let after_pkg = mod_part
853 .strip_prefix(pkg.as_str())
854 .unwrap_or(mod_part)
855 .trim_start_matches('.');
856 let path = if after_pkg.is_empty() {
857 "__init__.py".to_string()
858 } else {
859 format!("{}.py", after_pkg.replace('.', "/"))
860 };
861 if !symbols.is_empty() {
862 out.push((symbols, path, i + 1));
863 }
864 }
865 }
866 }
867 continue;
868 }
869
870 if let Some(rest) = t.strip_prefix("import ") {
872 if let Some(pkg) = package_root {
873 for mod_path in rest.split(',') {
875 let mod_path = mod_path.trim().split(" as ").next().unwrap_or("").trim();
876 if mod_path.starts_with(pkg.as_str()) {
877 let after_pkg = mod_path
878 .strip_prefix(pkg.as_str())
879 .unwrap_or(mod_path)
880 .trim_start_matches('.');
881 let path = if after_pkg.is_empty() {
882 "__init__.py".to_string()
883 } else {
884 format!("{}.py", after_pkg.replace('.', "/"))
885 };
886 out.push((vec!["module".to_string()], path, i + 1));
887 }
888 }
889 }
890 }
891 }
892 out
893}
894
895fn parse_python_import_symbols(imp_part: &str) -> Vec<String> {
897 let cleaned = imp_part.trim_start_matches('(').trim_end_matches(')');
899 cleaned
900 .split(',')
901 .map(|s| {
902 s.trim()
903 .split(" as ")
904 .next()
905 .unwrap_or("")
906 .trim()
907 .to_string()
908 })
909 .filter(|s| !s.is_empty() && s != "*")
910 .collect()
911}
912
913fn parent_dir(file_path: &str) -> String {
916 Path::new(file_path)
917 .parent()
918 .map(|p| p.to_string_lossy().to_string())
919 .unwrap_or_default()
920}
921
922fn file_name_of(file_path: &str) -> String {
923 Path::new(file_path)
924 .file_name()
925 .map(|n| n.to_string_lossy().to_string())
926 .unwrap_or_else(|| file_path.to_string())
927}
928
929fn resolve_path(dir: &str, raw: &str, _extensions: &[&str]) -> String {
936 use std::path::{Component, PathBuf};
937
938 let base = if dir.is_empty() {
940 PathBuf::from(".")
941 } else {
942 PathBuf::from(dir)
943 };
944
945 let joined = base.join(raw);
949 let mut parts: Vec<std::ffi::OsString> = Vec::new();
950 for comp in joined.components() {
951 match comp {
952 Component::ParentDir => {
953 parts.pop();
954 }
955 Component::CurDir => {}
956 Component::RootDir => {} other => parts.push(other.as_os_str().to_owned()),
958 }
959 }
960 let normalized: PathBuf = parts.iter().collect();
961 let s = normalized.to_string_lossy();
962
963 if Path::new(s.as_ref()).extension().is_none() {
965 format!("{s}.ts")
966 } else {
967 s.to_string()
968 }
969}
970
971#[cfg(test)]
972mod tests {
973 use super::*;
974
975 #[test]
976 fn test_parse_typescript_emits_module_entity() {
977 let source = r#"
978import { DatabaseClient } from './db/client';
979export async function validateToken(token: string): Promise<boolean> {
980 return true;
981}
982"#;
983 let events = parse_source_code("src/auth/service.ts", source, "typescript");
984
985 let entity_events: Vec<_> = events
986 .iter()
987 .filter(|e| matches!(&e.payload, EventPayload::EntityDiscovered { .. }))
988 .collect();
989 assert!(
991 entity_events.len() >= 2,
992 "Expected module + function entities"
993 );
994
995 let dep_events: Vec<_> = events
996 .iter()
997 .filter(|e| matches!(&e.payload, EventPayload::DependencyLinked { .. }))
998 .collect();
999 assert!(
1001 dep_events.len() >= 2,
1002 "Expected function→module + module→import edges"
1003 );
1004 }
1005
1006 #[test]
1007 fn test_module_entity_is_discovered_before_import_edges() {
1008 let source = "import { Foo } from './foo';\nfunction bar() {}";
1009 let events = parse_source_code("src/main.ts", source, "typescript");
1010
1011 let first_entity = events
1013 .iter()
1014 .position(|e| matches!(&e.payload, EventPayload::EntityDiscovered { .. }));
1015 let first_dep = events
1016 .iter()
1017 .position(|e| matches!(&e.payload, EventPayload::DependencyLinked { .. }));
1018 assert!(
1019 first_entity < first_dep,
1020 "EntityDiscovered must precede DependencyLinked"
1021 );
1022 }
1023
1024 #[test]
1025 fn test_parse_rust_functions_and_structs() {
1026 let source = r#"
1027use crate::graph::UcmGraph;
1028
1029pub struct GraphProjection;
1030
1031impl GraphProjection {
1032 pub fn replay_all(events: &[UcmEvent]) -> UcmGraph {
1033 UcmGraph::new()
1034 }
1035
1036 pub async fn apply_event(graph: &mut UcmGraph, event: &UcmEvent) {}
1037}
1038"#;
1039 let events = parse_source_code("src/projection.rs", source, "rust");
1040
1041 let entities: Vec<_> = events
1042 .iter()
1043 .filter(|e| {
1044 matches!(
1045 &e.payload,
1046 EventPayload::EntityDiscovered {
1047 kind: EntityKind::Function { .. },
1048 ..
1049 }
1050 )
1051 })
1052 .collect();
1053 assert!(
1054 entities.len() >= 2,
1055 "Should find replay_all and apply_event"
1056 );
1057
1058 let structs: Vec<_> = events
1059 .iter()
1060 .filter(|e| {
1061 matches!(
1062 &e.payload,
1063 EventPayload::EntityDiscovered {
1064 kind: EntityKind::DataModel { .. },
1065 ..
1066 }
1067 )
1068 })
1069 .collect();
1070 assert!(!structs.is_empty(), "Should find GraphProjection struct");
1071 }
1072
1073 #[test]
1074 fn test_parse_rust_imports() {
1075 let source = r#"
1076use crate::entity::EntityId;
1077use crate::graph::UcmGraph;
1078use std::collections::HashMap;
1079"#;
1080 let empty_map = RustCrateMap::new();
1081 let imports = extract_imports_rust(source, "ucm-core/src/main.rs", &empty_map);
1082 assert_eq!(imports.len(), 2, "Should find 2 crate imports, skip std");
1084 assert!(imports
1085 .iter()
1086 .any(|(syms, _, _)| syms.contains(&"EntityId".to_string())));
1087 assert!(imports
1088 .iter()
1089 .any(|(syms, _, _)| syms.contains(&"UcmGraph".to_string())));
1090
1091 let entity_import = imports
1093 .iter()
1094 .find(|(s, _, _)| s.contains(&"EntityId".to_string()))
1095 .unwrap();
1096 assert_eq!(
1097 entity_import.1, "ucm-core/src/entity.rs",
1098 "crate::entity::EntityId should resolve to ucm-core/src/entity.rs"
1099 );
1100
1101 let graph_import = imports
1102 .iter()
1103 .find(|(s, _, _)| s.contains(&"UcmGraph".to_string()))
1104 .unwrap();
1105 assert_eq!(
1106 graph_import.1, "ucm-core/src/graph.rs",
1107 "crate::graph::UcmGraph should resolve to ucm-core/src/graph.rs"
1108 );
1109 }
1110
1111 #[test]
1112 fn test_rust_cross_crate_imports() {
1113 let source = r#"
1114use ucm_graph_core::graph::UcmGraph;
1115use ucm_graph_core::entity::{EntityId, EntityKind};
1116use ucm_ingest::code_parser;
1117use serde::Serialize;
1118"#;
1119 let mut crate_map = RustCrateMap::new();
1120 crate_map.insert("ucm_graph_core".to_string(), "ucm-core/src".to_string());
1121 crate_map.insert("ucm_ingest".to_string(), "ucm-ingest/src".to_string());
1122
1123 let imports = extract_imports_rust(source, "ucm-api/src/main.rs", &crate_map);
1124
1125 assert_eq!(
1128 imports.len(),
1129 3,
1130 "Should find 3 sibling crate imports, skip serde: got {imports:?}"
1131 );
1132
1133 let graph_import = imports
1135 .iter()
1136 .find(|(s, _, _)| s.contains(&"UcmGraph".to_string()))
1137 .unwrap();
1138 assert_eq!(graph_import.1, "ucm-core/src/graph.rs");
1139
1140 let entity_import = imports
1141 .iter()
1142 .find(|(s, _, _)| s.contains(&"EntityId".to_string()))
1143 .unwrap();
1144 assert_eq!(entity_import.1, "ucm-core/src/entity.rs");
1145 assert!(
1146 entity_import.0.contains(&"EntityKind".to_string()),
1147 "Should import both EntityId and EntityKind"
1148 );
1149
1150 let parser_import = imports
1152 .iter()
1153 .find(|(_, path, _)| path.contains("ucm-ingest"))
1154 .unwrap();
1155 assert_eq!(parser_import.1, "ucm-ingest/src/code_parser.rs");
1156 }
1157
1158 #[test]
1159 fn test_parse_api_routes() {
1160 let source = r#"
1161app.get('/api/v1/users', getUsers);
1162app.post('/api/v1/auth/login', handleLogin);
1163"#;
1164 let events = parse_source_code("src/routes.ts", source, "typescript");
1165 let routes: Vec<_> = events
1166 .iter()
1167 .filter(|e| {
1168 matches!(
1169 &e.payload,
1170 EventPayload::EntityDiscovered {
1171 kind: EntityKind::ApiEndpoint { .. },
1172 ..
1173 }
1174 )
1175 })
1176 .collect();
1177 assert_eq!(routes.len(), 2);
1178 }
1179
1180 #[test]
1181 fn test_resolve_path_parent_traversal() {
1182 let result = resolve_path("fraud", "../pipeline/rag-pipeline", &["ts"]);
1186 assert_eq!(result, "pipeline/rag-pipeline.ts");
1187
1188 let result2 = resolve_path("src/fraud", "../pipeline/rag", &["ts"]);
1192 assert_eq!(result2, "src/pipeline/rag.ts");
1193
1194 let result3 = resolve_path("fraud", "./compliance-checker", &["ts"]);
1196 assert_eq!(result3, "fraud/compliance-checker.ts");
1197
1198 let result4 = resolve_path("", "./embedding-service", &["ts"]);
1200 assert_eq!(result4, "embedding-service.ts");
1201 }
1202
1203 #[test]
1204 fn test_full_graph_has_edges() {
1205 let auth_src = "export async function validateToken() {}";
1207 let mid_src =
1208 "import { validateToken } from './auth';\nexport function authMiddleware() {}";
1209
1210 use ucm_events::projection::GraphProjection;
1211 use ucm_graph_core::graph::UcmGraph;
1212 let mut graph = UcmGraph::new();
1213 for ev in parse_source_code("src/auth.ts", auth_src, "typescript") {
1214 GraphProjection::apply_event(&mut graph, &ev);
1215 }
1216 for ev in parse_source_code("src/middleware.ts", mid_src, "typescript") {
1217 GraphProjection::apply_event(&mut graph, &ev);
1218 }
1219
1220 let stats = graph.stats();
1221 assert!(stats.entity_count >= 2, "Should have entities");
1222 assert!(
1223 stats.edge_count >= 1,
1224 "Should have at least one edge — this was the core bug"
1225 );
1226 }
1227
1228 #[test]
1231 fn test_python_absolute_imports() {
1232 let source = r#"
1233from marimo._runtime.dataflow import DirectedGraph
1234from marimo._ast.visitor import parse_cell
1235import os
1236import marimo._plugins.ui as ui
1237"#;
1238 let pkg_root = Some("marimo".to_string());
1239 let imports = extract_imports_python(source, "_runtime/runtime.py", &pkg_root);
1240
1241 assert_eq!(
1243 imports.len(),
1244 3,
1245 "Expected 3 marimo imports, got {imports:?}"
1246 );
1247
1248 let dg_import = imports
1250 .iter()
1251 .find(|(s, _, _)| s.contains(&"DirectedGraph".to_string()))
1252 .expect("Should find DirectedGraph import");
1253 assert_eq!(dg_import.1, "_runtime/dataflow.py");
1254
1255 let visitor_import = imports
1257 .iter()
1258 .find(|(s, _, _)| s.contains(&"parse_cell".to_string()))
1259 .expect("Should find parse_cell import");
1260 assert_eq!(visitor_import.1, "_ast/visitor.py");
1261
1262 let bare_import = imports
1264 .iter()
1265 .find(|(_, path, _)| path.contains("_plugins"))
1266 .expect("Should find bare marimo._plugins import");
1267 assert_eq!(bare_import.1, "_plugins/ui.py");
1268 }
1269
1270 #[test]
1271 fn test_python_absolute_imports_skip_external() {
1272 let source = r#"
1273from marimo._runtime.dataflow import DirectedGraph
1274from typing import Optional
1275import json
1276from dataclasses import dataclass
1277"#;
1278 let pkg_root = Some("marimo".to_string());
1279 let imports = extract_imports_python(source, "marimo/test.py", &pkg_root);
1280
1281 assert_eq!(
1283 imports.len(),
1284 1,
1285 "Should skip external imports, got {imports:?}"
1286 );
1287 }
1288
1289 #[test]
1290 fn test_python_relative_imports_still_work() {
1291 let source = r#"
1292from .dataflow import DirectedGraph
1293from ..utils import serialize
1294"#;
1295 let pkg_root = Some("marimo".to_string());
1296 let imports = extract_imports_python(source, "marimo/_runtime/runtime.py", &pkg_root);
1297
1298 assert_eq!(imports.len(), 2, "Should find 2 relative imports");
1299 let dg = imports
1300 .iter()
1301 .find(|(s, _, _)| s.contains(&"DirectedGraph".to_string()))
1302 .unwrap();
1303 assert_eq!(dg.1, "marimo/_runtime/dataflow.py");
1304
1305 let util = imports
1306 .iter()
1307 .find(|(s, _, _)| s.contains(&"serialize".to_string()))
1308 .unwrap();
1309 assert_eq!(util.1, "marimo/utils.py");
1310 }
1311
1312 #[test]
1313 fn test_python_class_method_association() {
1314 let source = r#"
1315class DirectedGraph:
1316 def __init__(self):
1317 pass
1318
1319 def add_edge(self, src, dst):
1320 pass
1321
1322 async def traverse(self):
1323 pass
1324
1325def standalone_function():
1326 pass
1327"#;
1328 let entities = extract_python_entities(source);
1329
1330 let classes: Vec<_> = entities
1332 .iter()
1333 .filter(|e| matches!(e, PythonEntity::Class { .. }))
1334 .collect();
1335 assert_eq!(classes.len(), 1, "Should find 1 class");
1336
1337 let methods: Vec<_> = entities
1338 .iter()
1339 .filter(|e| {
1340 matches!(
1341 e,
1342 PythonEntity::Function {
1343 class_name: Some(_),
1344 ..
1345 }
1346 )
1347 })
1348 .collect();
1349 assert_eq!(methods.len(), 3, "Should find 3 methods in DirectedGraph");
1350
1351 let standalone: Vec<_> = entities
1352 .iter()
1353 .filter(|e| {
1354 matches!(
1355 e,
1356 PythonEntity::Function {
1357 class_name: None,
1358 ..
1359 }
1360 )
1361 })
1362 .collect();
1363 assert_eq!(standalone.len(), 1, "Should find 1 standalone function");
1364
1365 for m in &methods {
1367 if let PythonEntity::Function { class_name, .. } = m {
1368 assert_eq!(
1369 class_name.as_deref(),
1370 Some("DirectedGraph"),
1371 "Method should belong to DirectedGraph"
1372 );
1373 }
1374 }
1375 }
1376
1377 #[test]
1378 fn test_python_class_method_events() {
1379 let source = r#"
1380class MyClass:
1381 def my_method(self):
1382 pass
1383"#;
1384 let events = parse_source_code("test.py", source, "python");
1385
1386 let contains_edges: Vec<_> = events
1388 .iter()
1389 .filter(|e| {
1390 matches!(
1391 &e.payload,
1392 EventPayload::DependencyLinked {
1393 relation_type: RelationType::Contains,
1394 ..
1395 }
1396 )
1397 })
1398 .collect();
1399 assert_eq!(
1400 contains_edges.len(),
1401 1,
1402 "Should have 1 Contains edge for class→method"
1403 );
1404
1405 let method_entities: Vec<_> = events
1407 .iter()
1408 .filter_map(|e| {
1409 if let EventPayload::EntityDiscovered {
1410 kind: EntityKind::Function { .. },
1411 name,
1412 ..
1413 } = &e.payload
1414 {
1415 Some(name.clone())
1416 } else {
1417 None
1418 }
1419 })
1420 .collect();
1421 assert!(
1422 method_entities.contains(&"MyClass.my_method".to_string()),
1423 "Method should be named MyClass.my_method, got {method_entities:?}"
1424 );
1425 }
1426
1427 #[test]
1428 fn test_python_inheritance_edges() {
1429 let source = r#"
1430class Animal:
1431 pass
1432
1433class Dog(Animal):
1434 pass
1435
1436class GuideDog(Dog, Trainable):
1437 pass
1438"#;
1439 let entities = extract_python_entities(source);
1440
1441 let classes: Vec<_> = entities
1442 .iter()
1443 .filter_map(|e| {
1444 if let PythonEntity::Class { name, bases, .. } = e {
1445 Some((name.clone(), bases.clone()))
1446 } else {
1447 None
1448 }
1449 })
1450 .collect();
1451
1452 assert_eq!(classes.len(), 3);
1453
1454 let animal = classes.iter().find(|(n, _)| n == "Animal").unwrap();
1455 assert!(animal.1.is_empty(), "Animal has no bases");
1456
1457 let dog = classes.iter().find(|(n, _)| n == "Dog").unwrap();
1458 assert_eq!(dog.1, vec!["Animal"]);
1459
1460 let guide = classes.iter().find(|(n, _)| n == "GuideDog").unwrap();
1461 assert_eq!(guide.1, vec!["Dog", "Trainable"]);
1462 }
1463
1464 #[test]
1465 fn test_python_inheritance_events() {
1466 let source = r#"
1467class Base:
1468 pass
1469
1470class Child(Base):
1471 pass
1472"#;
1473 let events = parse_source_code("test.py", source, "python");
1474
1475 let extends_edges: Vec<_> = events
1476 .iter()
1477 .filter(|e| {
1478 matches!(
1479 &e.payload,
1480 EventPayload::DependencyLinked {
1481 relation_type: RelationType::Extends,
1482 ..
1483 }
1484 )
1485 })
1486 .collect();
1487 assert_eq!(
1488 extends_edges.len(),
1489 1,
1490 "Should have 1 Extends edge (Child → Base)"
1491 );
1492 }
1493
1494 #[test]
1495 fn test_python_no_package_root_skips_absolute() {
1496 let source = "from marimo._runtime import foo\nimport json\n";
1497 let no_pkg: PythonPackageRoot = None;
1498 let imports = extract_imports_python(source, "test.py", &no_pkg);
1499 assert!(
1500 imports.is_empty(),
1501 "Without package root, absolute imports should be skipped"
1502 );
1503 }
1504
1505 #[test]
1506 fn test_python_metaclass_skipped_in_bases() {
1507 let source = "class Foo(Bar, metaclass=ABCMeta):\n pass\n";
1508 let entities = extract_python_entities(source);
1509 if let Some(PythonEntity::Class { bases, .. }) = entities.first() {
1510 assert_eq!(bases, &["Bar"], "metaclass= arg should be skipped");
1511 } else {
1512 panic!("Expected a class entity");
1513 }
1514 }
1515}