Skip to main content

depyler_graph/
lib.rs

1//! DEPYLER-1300: Dependency Graph Analysis for Error Reasoning
2//!
3//! This crate transforms the transpiler from a "compiler" into a "reasoning engine"
4//! by building dependency graphs that reveal error topology and enable ML-based fixes.
5//!
6//! # Architecture
7//!
8//! ```text
9//! Python Source
10//!     │
11//!     ▼
12//! ┌─────────────────┐
13//! │  AST Parser     │
14//! └────────┬────────┘
15//!          │
16//!          ▼
17//! ┌─────────────────┐     ┌─────────────────┐
18//! │ Dependency      │────►│ Error Overlay   │
19//! │ Graph Builder   │     │ (Rust Errors)   │
20//! └────────┬────────┘     └────────┬────────┘
21//!          │                       │
22//!          ▼                       ▼
23//! ┌─────────────────┐     ┌─────────────────┐
24//! │ Impact Scorer   │◄────│ Vectorized      │
25//! │ (PageRank)      │     │ Failures (JSON) │
26//! └────────┬────────┘     └─────────────────┘
27//!          │
28//!          ▼
29//! ┌─────────────────┐
30//! │ Patient Zero    │
31//! │ Identification  │
32//! └─────────────────┘
33//! ```
34
35mod builder;
36mod error_overlay;
37mod impact;
38mod vectorize;
39
40pub use builder::{DependencyGraph, EdgeKind, GraphBuilder, NodeKind};
41pub use error_overlay::{ErrorOverlay, OverlaidError};
42pub use impact::{ImpactScore, ImpactScorer, PatientZero};
43pub use vectorize::{
44    serialize_to_json, serialize_to_ndjson, vectorize_failures, AstContext, FailureContext,
45    FailureLabels, GraphContext, VectorizedFailure,
46};
47
48use serde::{Deserialize, Serialize};
49use std::collections::HashMap;
50use std::path::PathBuf;
51
52/// A node in the dependency graph representing a Python entity
53#[derive(Debug, Clone, Serialize, Deserialize)]
54pub struct GraphNode {
55    /// Unique identifier
56    pub id: String,
57    /// Node kind (function, class, module)
58    pub kind: NodeKind,
59    /// Source file
60    pub file: PathBuf,
61    /// Line number
62    pub line: usize,
63    /// Column number
64    pub column: usize,
65    /// Number of errors rooted at this node
66    pub error_count: usize,
67    /// Impact score (PageRank-style)
68    pub impact_score: f64,
69}
70
71/// An edge in the dependency graph
72#[derive(Debug, Clone, Serialize, Deserialize)]
73pub struct GraphEdge {
74    /// Edge kind (calls, imports, inherits)
75    pub kind: EdgeKind,
76    /// Weight for impact calculation
77    pub weight: f64,
78}
79
80/// Complete analysis result
81#[derive(Debug, Clone, Serialize, Deserialize)]
82pub struct GraphAnalysis {
83    /// The dependency graph
84    pub node_count: usize,
85    /// Edge count
86    pub edge_count: usize,
87    /// Patient Zero nodes (highest impact)
88    pub patient_zeros: Vec<PatientZero>,
89    /// Vectorized failures for ML training
90    pub vectorized_failures: Vec<VectorizedFailure>,
91    /// Error distribution by node
92    pub error_distribution: HashMap<String, usize>,
93    /// Total errors analyzed
94    pub total_errors: usize,
95}
96
97/// Main entry point for graph-based error analysis
98pub fn analyze_with_graph(
99    python_source: &str,
100    rust_errors: &[(String, String, usize)], // (code, message, line)
101) -> Result<GraphAnalysis, GraphError> {
102    // Build dependency graph from Python source
103    let mut builder = GraphBuilder::new();
104    let graph = builder.build_from_source(python_source)?;
105
106    // Overlay errors onto graph
107    let overlay = ErrorOverlay::new(&graph);
108    let overlaid_errors = overlay.overlay_errors(rust_errors);
109
110    // Calculate impact scores
111    let scorer = ImpactScorer::new(&graph, &overlaid_errors);
112    let scores = scorer.calculate_impact();
113
114    // Identify patient zeros
115    let patient_zeros = scorer.identify_patient_zeros(&scores, 5);
116
117    // Vectorize failures for ML
118    let vectorized = vectorize::vectorize_failures(&graph, &overlaid_errors, python_source);
119
120    // Build error distribution
121    let mut error_distribution = HashMap::new();
122    for error in &overlaid_errors {
123        if let Some(node_id) = &error.node_id {
124            *error_distribution.entry(node_id.clone()).or_insert(0) += 1;
125        }
126    }
127
128    Ok(GraphAnalysis {
129        node_count: graph.node_count(),
130        edge_count: graph.edge_count(),
131        patient_zeros,
132        vectorized_failures: vectorized,
133        error_distribution,
134        total_errors: rust_errors.len(),
135    })
136}
137
138/// Errors that can occur during graph analysis
139#[derive(Debug, thiserror::Error)]
140pub enum GraphError {
141    #[error("Failed to parse Python source: {0}")]
142    ParseError(String),
143
144    #[error("Graph construction failed: {0}")]
145    BuildError(String),
146
147    #[error("Error overlay failed: {0}")]
148    OverlayError(String),
149}
150
151#[cfg(test)]
152mod tests {
153    use super::*;
154
155    #[test]
156    fn test_analyze_simple_function() {
157        let python = r#"
158def foo():
159    return 42
160
161def bar():
162    return foo() + 1
163"#;
164
165        let errors = vec![("E0308".to_string(), "mismatched types".to_string(), 5)];
166
167        let result = analyze_with_graph(python, &errors);
168        assert!(result.is_ok());
169
170        let analysis = result.unwrap();
171        assert!(analysis.node_count >= 2); // foo and bar
172        assert!(analysis.edge_count >= 1); // bar calls foo
173    }
174
175    #[test]
176    fn test_analyze_class_hierarchy() {
177        let python = r#"
178class Base:
179    def method(self):
180        pass
181
182class Derived(Base):
183    def method(self):
184        super().method()
185"#;
186
187        let errors = vec![("E0599".to_string(), "no method found".to_string(), 7)];
188
189        let result = analyze_with_graph(python, &errors);
190        assert!(result.is_ok());
191
192        let analysis = result.unwrap();
193        assert!(analysis.node_count >= 2); // Base and Derived
194    }
195
196    #[test]
197    fn test_patient_zero_identification() {
198        let python = r#"
199def problematic():
200    return "bug"
201
202def caller1():
203    return problematic()
204
205def caller2():
206    return problematic()
207
208def caller3():
209    return problematic()
210"#;
211
212        // Errors in all callers point back to problematic()
213        let errors = vec![
214            ("E0308".to_string(), "type mismatch".to_string(), 5),
215            ("E0308".to_string(), "type mismatch".to_string(), 8),
216            ("E0308".to_string(), "type mismatch".to_string(), 11),
217        ];
218
219        let result = analyze_with_graph(python, &errors);
220        assert!(result.is_ok());
221
222        let analysis = result.unwrap();
223        // problematic should have highest impact
224        if !analysis.patient_zeros.is_empty() {
225            assert!(analysis.patient_zeros[0].impact_score > 0.0);
226        }
227    }
228
229    #[test]
230    fn test_vectorized_failure_output() {
231        let python = r#"
232def foo(x: int) -> str:
233    return x  # E0308: expected str, found int
234"#;
235
236        let errors = vec![(
237            "E0308".to_string(),
238            "expected str, found int".to_string(),
239            3,
240        )];
241
242        let result = analyze_with_graph(python, &errors);
243        assert!(result.is_ok());
244
245        let analysis = result.unwrap();
246        assert!(!analysis.vectorized_failures.is_empty());
247
248        // Verify vectorized failure has AST context
249        let failure = &analysis.vectorized_failures[0];
250        assert_eq!(failure.error_code, "E0308");
251    }
252
253    #[test]
254    fn test_analyze_with_no_errors() {
255        let python = "def foo():\n    return 42\n";
256        let errors: Vec<(String, String, usize)> = vec![];
257
258        let result = analyze_with_graph(python, &errors);
259        assert!(result.is_ok());
260
261        let analysis = result.unwrap();
262        assert_eq!(analysis.total_errors, 0);
263        assert!(analysis.vectorized_failures.is_empty());
264        assert!(analysis.error_distribution.is_empty());
265        // Patient zeros may still be identified via pagerank even without errors
266        // but none should have direct errors
267        for pz in &analysis.patient_zeros {
268            assert_eq!(pz.direct_errors, 0);
269        }
270    }
271
272    #[test]
273    fn test_analyze_invalid_python() {
274        let python = "def broken(:\n";
275        let errors: Vec<(String, String, usize)> = vec![];
276
277        let result = analyze_with_graph(python, &errors);
278        assert!(result.is_err());
279    }
280
281    #[test]
282    fn test_analyze_empty_source() {
283        let python = "";
284        let errors: Vec<(String, String, usize)> = vec![];
285
286        let result = analyze_with_graph(python, &errors);
287        assert!(result.is_ok());
288
289        let analysis = result.unwrap();
290        assert_eq!(analysis.node_count, 0);
291        assert_eq!(analysis.edge_count, 0);
292    }
293
294    #[test]
295    fn test_analyze_error_distribution() {
296        let python = r#"
297def foo():
298    return 42
299
300def bar():
301    return foo()
302"#;
303        let errors = vec![
304            ("E0308".to_string(), "err1".to_string(), 10),
305            ("E0599".to_string(), "err2".to_string(), 10),
306        ];
307
308        let result = analyze_with_graph(python, &errors);
309        assert!(result.is_ok());
310
311        let analysis = result.unwrap();
312        assert_eq!(analysis.total_errors, 2);
313        // Errors should be distributed across nodes
314        let total_dist: usize = analysis.error_distribution.values().sum();
315        assert!(total_dist <= 2);
316    }
317
318    #[test]
319    fn test_graph_error_display_parse() {
320        let err = GraphError::ParseError("unexpected token".to_string());
321        let msg = format!("{err}");
322        assert!(msg.contains("parse"));
323        assert!(msg.contains("unexpected token"));
324    }
325
326    #[test]
327    fn test_graph_error_display_build() {
328        let err = GraphError::BuildError("node conflict".to_string());
329        let msg = format!("{err}");
330        assert!(msg.contains("construction"));
331        assert!(msg.contains("node conflict"));
332    }
333
334    #[test]
335    fn test_graph_error_display_overlay() {
336        let err = GraphError::OverlayError("mapping failed".to_string());
337        let msg = format!("{err}");
338        assert!(msg.contains("overlay"));
339        assert!(msg.contains("mapping failed"));
340    }
341
342    #[test]
343    fn test_graph_node_serde_roundtrip() {
344        let node = GraphNode {
345            id: "my_func".to_string(),
346            kind: NodeKind::Function,
347            file: std::path::PathBuf::from("test.py"),
348            line: 10,
349            column: 4,
350            error_count: 2,
351            impact_score: 3.14,
352        };
353
354        let json = serde_json::to_string(&node).unwrap();
355        let deserialized: GraphNode = serde_json::from_str(&json).unwrap();
356
357        assert_eq!(deserialized.id, "my_func");
358        assert_eq!(deserialized.kind, NodeKind::Function);
359        assert_eq!(deserialized.line, 10);
360        assert_eq!(deserialized.column, 4);
361        assert_eq!(deserialized.error_count, 2);
362    }
363
364    #[test]
365    fn test_graph_edge_serde_roundtrip() {
366        let edge = GraphEdge {
367            kind: EdgeKind::Calls,
368            weight: 2.5,
369        };
370
371        let json = serde_json::to_string(&edge).unwrap();
372        let deserialized: GraphEdge = serde_json::from_str(&json).unwrap();
373
374        assert_eq!(deserialized.kind, EdgeKind::Calls);
375        assert!((deserialized.weight - 2.5).abs() < f64::EPSILON);
376    }
377
378    #[test]
379    fn test_graph_analysis_serde_roundtrip() {
380        let analysis = GraphAnalysis {
381            node_count: 5,
382            edge_count: 3,
383            patient_zeros: vec![],
384            vectorized_failures: vec![],
385            error_distribution: HashMap::new(),
386            total_errors: 0,
387        };
388
389        let json = serde_json::to_string(&analysis).unwrap();
390        let deserialized: GraphAnalysis = serde_json::from_str(&json).unwrap();
391
392        assert_eq!(deserialized.node_count, 5);
393        assert_eq!(deserialized.edge_count, 3);
394        assert_eq!(deserialized.total_errors, 0);
395    }
396
397    #[test]
398    fn test_analyze_complex_program() {
399        let python = r#"
400import math
401
402class Shape:
403    def area(self):
404        pass
405
406class Circle(Shape):
407    def area(self):
408        return 3.14
409
410def compute(shape):
411    return shape.area()
412
413def main():
414    c = Circle()
415    return compute(c)
416"#;
417        let errors = vec![
418            ("E0599".to_string(), "no method area".to_string(), 30),
419            ("E0308".to_string(), "type mismatch".to_string(), 50),
420        ];
421
422        let result = analyze_with_graph(python, &errors);
423        assert!(result.is_ok());
424
425        let analysis = result.unwrap();
426        // Should have import, classes, methods, and functions
427        assert!(analysis.node_count >= 5);
428        assert_eq!(analysis.total_errors, 2);
429    }
430
431    // ========================================================================
432    // S9B7: Coverage tests for graph lib
433    // ========================================================================
434
435    #[test]
436    fn test_s9b7_graph_node_debug_clone() {
437        let node = GraphNode {
438            id: "n".to_string(),
439            kind: NodeKind::Function,
440            file: std::path::PathBuf::from("test.py"),
441            line: 1,
442            column: 1,
443            error_count: 0,
444            impact_score: 0.0,
445        };
446        let debug = format!("{:?}", node);
447        assert!(debug.contains("GraphNode"));
448        let cloned = node.clone();
449        assert_eq!(cloned.id, "n");
450    }
451
452    #[test]
453    fn test_s9b7_graph_edge_debug_clone() {
454        let edge = GraphEdge {
455            kind: EdgeKind::Imports,
456            weight: 1.0,
457        };
458        let debug = format!("{:?}", edge);
459        assert!(debug.contains("GraphEdge"));
460        let cloned = edge.clone();
461        assert_eq!(cloned.kind, EdgeKind::Imports);
462    }
463
464    #[test]
465    fn test_s9b7_graph_analysis_debug_clone() {
466        let analysis = GraphAnalysis {
467            node_count: 0,
468            edge_count: 0,
469            patient_zeros: vec![],
470            vectorized_failures: vec![],
471            error_distribution: HashMap::new(),
472            total_errors: 0,
473        };
474        let debug = format!("{:?}", analysis);
475        assert!(debug.contains("GraphAnalysis"));
476        let cloned = analysis.clone();
477        assert_eq!(cloned.node_count, 0);
478    }
479
480    #[test]
481    fn test_s9b7_analyze_with_graph_multiple_errors_same_line() {
482        let python = "def foo():\n    return 42\n";
483        let errors = vec![
484            ("E0308".to_string(), "e1".to_string(), 5),
485            ("E0308".to_string(), "e2".to_string(), 5),
486        ];
487        let result = analyze_with_graph(python, &errors).unwrap();
488        assert_eq!(result.total_errors, 2);
489    }
490
491    #[test]
492    fn test_s9b7_graph_error_debug() {
493        let e1 = GraphError::ParseError("bad".to_string());
494        let debug1 = format!("{:?}", e1);
495        assert!(debug1.contains("ParseError"));
496
497        let e2 = GraphError::BuildError("err".to_string());
498        let debug2 = format!("{:?}", e2);
499        assert!(debug2.contains("BuildError"));
500
501        let e3 = GraphError::OverlayError("fail".to_string());
502        let debug3 = format!("{:?}", e3);
503        assert!(debug3.contains("OverlayError"));
504    }
505
506    #[test]
507    fn test_s9b7_analyze_with_graph_only_class() {
508        let python = r#"
509class Standalone:
510    def method(self):
511        pass
512"#;
513        let errors: Vec<(String, String, usize)> = vec![];
514        let result = analyze_with_graph(python, &errors).unwrap();
515        assert!(result.node_count >= 2);
516        assert_eq!(result.total_errors, 0);
517    }
518
519    #[test]
520    fn test_s9b7_error_distribution_aggregation() {
521        let python = "def foo():\n    return 42\n";
522        let errors = vec![
523            ("E0308".to_string(), "e1".to_string(), 10),
524            ("E0599".to_string(), "e2".to_string(), 10),
525            ("E0425".to_string(), "e3".to_string(), 10),
526        ];
527        let result = analyze_with_graph(python, &errors).unwrap();
528        assert_eq!(result.total_errors, 3);
529        // All errors may map to the same node or not
530        let total_dist: usize = result.error_distribution.values().sum();
531        assert!(total_dist <= 3);
532    }
533
534    #[test]
535    fn test_graph_error_is_send_sync() {
536        fn assert_send<T: Send>() {}
537        fn assert_sync<T: Sync>() {}
538        assert_send::<GraphError>();
539        assert_sync::<GraphError>();
540    }
541
542    // ========================================================================
543    // S12: Deep coverage tests for graph lib
544    // ========================================================================
545
546    #[test]
547    fn test_s12_analyze_only_imports() {
548        let python = "import os\nimport sys\n";
549        let errors: Vec<(String, String, usize)> = vec![];
550        let result = analyze_with_graph(python, &errors).unwrap();
551        assert_eq!(result.node_count, 2);
552        assert_eq!(result.total_errors, 0);
553    }
554
555    #[test]
556    fn test_s12_analyze_large_error_count() {
557        let python = "def foo():\n    return 42\n";
558        let errors: Vec<(String, String, usize)> = (0..50)
559            .map(|i| ("E0308".to_string(), format!("error {i}"), i * 10))
560            .collect();
561        let result = analyze_with_graph(python, &errors).unwrap();
562        assert_eq!(result.total_errors, 50);
563    }
564
565    #[test]
566    fn test_s12_analyze_with_inheritance_chain() {
567        let python = r#"
568class A:
569    def m(self):
570        pass
571class B(A):
572    def m(self):
573        pass
574class C(B):
575    def m(self):
576        pass
577"#;
578        let errors = vec![("E0308".to_string(), "err".to_string(), 20)];
579        let result = analyze_with_graph(python, &errors).unwrap();
580        // 3 classes + 3 methods = 6 nodes minimum
581        assert!(result.node_count >= 6);
582    }
583
584    #[test]
585    fn test_s12_graph_analysis_full_serde() {
586        let python = r#"
587def a():
588    return b()
589def b():
590    return 1
591"#;
592        let errors = vec![("E0308".to_string(), "mismatch".to_string(), 10)];
593        let analysis = analyze_with_graph(python, &errors).unwrap();
594        let json = serde_json::to_string(&analysis).unwrap();
595        let back: GraphAnalysis = serde_json::from_str(&json).unwrap();
596        assert_eq!(back.node_count, analysis.node_count);
597        assert_eq!(back.total_errors, 1);
598    }
599
600    #[test]
601    fn test_s12_error_distribution_multiple_nodes() {
602        let python = r#"
603def foo():
604    return 1
605def bar():
606    return 2
607"#;
608        // Two errors: one near foo, one near bar
609        let errors = vec![
610            ("E0308".to_string(), "e1".to_string(), 10),  // py_line=1, near foo
611            ("E0308".to_string(), "e2".to_string(), 50),  // py_line=5, near bar
612        ];
613        let result = analyze_with_graph(python, &errors).unwrap();
614        assert_eq!(result.total_errors, 2);
615        // At least some errors should be distributed
616        let total_dist: usize = result.error_distribution.values().sum();
617        assert!(total_dist > 0);
618    }
619
620    #[test]
621    fn test_s12_graph_error_source_trait() {
622        // GraphError implements std::error::Error (via thiserror)
623        let err = GraphError::ParseError("test".to_string());
624        let e: &dyn std::error::Error = &err;
625        assert!(e.source().is_none()); // Simple string errors have no source
626    }
627}