Skip to main content

depyler_graph/
vectorize.rs

1//! Vectorization Module
2//!
3//! Serializes AST context of errors into structured format for ML training.
4//! Creates the dataset required for "Given this AST context, predict the fix".
5
6use crate::builder::DependencyGraph;
7use crate::error_overlay::OverlaidError;
8use serde::{Deserialize, Serialize};
9
10/// Vectorized failure ready for ML training
11#[derive(Debug, Clone, Serialize, Deserialize)]
12pub struct VectorizedFailure {
13    /// Unique identifier for this failure
14    pub id: String,
15    /// Error code (e.g., E0308)
16    pub error_code: String,
17    /// Error message
18    pub error_message: String,
19    /// AST context around the error
20    pub ast_context: AstContext,
21    /// Graph context (node relationships)
22    pub graph_context: GraphContext,
23    /// Python source snippet
24    pub source_snippet: String,
25    /// Labels for supervised learning
26    pub labels: FailureLabels,
27}
28
29/// AST context around an error
30#[derive(Debug, Clone, Serialize, Deserialize)]
31pub struct AstContext {
32    /// Function/method name containing the error
33    pub containing_function: Option<String>,
34    /// Class name if in a method
35    pub containing_class: Option<String>,
36    /// Return type annotation (if present)
37    pub return_type: Option<String>,
38    /// Parameter types (if annotated)
39    pub parameter_types: Vec<String>,
40    /// Local variable types inferred
41    pub local_types: Vec<(String, String)>,
42    /// Statement kind (return, assign, call, etc.)
43    pub statement_kind: String,
44    /// Expression kind (call, binop, name, etc.)
45    pub expression_kind: String,
46    /// Depth in AST (0 = top level)
47    pub ast_depth: usize,
48}
49
50/// Graph context for an error
51#[derive(Debug, Clone, Serialize, Deserialize)]
52pub struct GraphContext {
53    /// Node ID in the dependency graph
54    pub node_id: Option<String>,
55    /// Number of callers (in-degree)
56    pub in_degree: usize,
57    /// Number of callees (out-degree)
58    pub out_degree: usize,
59    /// Names of functions called
60    pub callees: Vec<String>,
61    /// Names of callers
62    pub callers: Vec<String>,
63    /// Inheritance chain (for methods)
64    pub inheritance_chain: Vec<String>,
65}
66
67/// Labels for supervised ML training
68#[derive(Debug, Clone, Serialize, Deserialize)]
69pub struct FailureLabels {
70    /// Category of the error
71    pub category: String,
72    /// Sub-category (e.g., "double_result_wrap")
73    pub subcategory: String,
74    /// Suggested fix type
75    pub fix_type: String,
76    /// Confidence in the categorization
77    pub confidence: f64,
78}
79
80/// Context for failure extraction
81pub struct FailureContext<'a> {
82    #[allow(dead_code)]
83    graph: &'a DependencyGraph,
84    source: &'a str,
85}
86
87impl<'a> FailureContext<'a> {
88    /// Create a new failure context
89    pub fn new(graph: &'a DependencyGraph, source: &'a str) -> Self {
90        Self { graph, source }
91    }
92
93    /// Extract source snippet around a line
94    fn extract_snippet(&self, line: usize, context_lines: usize) -> String {
95        let lines: Vec<&str> = self.source.lines().collect();
96        if lines.is_empty() {
97            return String::new();
98        }
99        // Bound line to valid range (line numbers are 1-indexed from rustc)
100        let bounded_line = line.min(lines.len()).max(1);
101        let start = bounded_line.saturating_sub(context_lines + 1);
102        let end = (bounded_line + context_lines).min(lines.len());
103
104        lines[start..end].join("\n")
105    }
106
107    /// Classify error into category and subcategory
108    fn classify_error(&self, code: &str, message: &str) -> (String, String, String, f64) {
109        // E0308 sub-patterns
110        if code == "E0308" {
111            if message.contains("expected") && message.contains("Result") {
112                return (
113                    "type_mismatch".to_string(),
114                    "double_result_wrap".to_string(),
115                    "unwrap_result".to_string(),
116                    0.9,
117                );
118            }
119            if message.contains("DepylerValue") {
120                return (
121                    "type_mismatch".to_string(),
122                    "depyler_value_leak".to_string(),
123                    "type_annotation".to_string(),
124                    0.85,
125                );
126            }
127            if message.contains("&str") && message.contains("String") {
128                return (
129                    "type_mismatch".to_string(),
130                    "string_ref_mismatch".to_string(),
131                    "to_string".to_string(),
132                    0.9,
133                );
134            }
135            if message.contains("i32") || message.contains("i64") || message.contains("f64") {
136                return (
137                    "type_mismatch".to_string(),
138                    "numeric_type_mismatch".to_string(),
139                    "cast".to_string(),
140                    0.8,
141                );
142            }
143            return (
144                "type_mismatch".to_string(),
145                "general".to_string(),
146                "type_inference".to_string(),
147                0.6,
148            );
149        }
150
151        // E0599: Missing method
152        if code == "E0599" {
153            return (
154                "missing_method".to_string(),
155                "stdlib_mapping".to_string(),
156                "add_trait_impl".to_string(),
157                0.8,
158            );
159        }
160
161        // E0425: Undefined value
162        if code == "E0425" {
163            return (
164                "undefined".to_string(),
165                "missing_import".to_string(),
166                "add_import".to_string(),
167                0.7,
168            );
169        }
170
171        // E0277: Trait bound
172        if code == "E0277" {
173            return (
174                "trait_bound".to_string(),
175                "missing_trait".to_string(),
176                "derive_trait".to_string(),
177                0.75,
178            );
179        }
180
181        // Default
182        (
183            "unknown".to_string(),
184            "unknown".to_string(),
185            "manual_fix".to_string(),
186            0.3,
187        )
188    }
189}
190
191/// Vectorize failures from overlaid errors
192pub fn vectorize_failures(
193    graph: &DependencyGraph,
194    errors: &[OverlaidError],
195    source: &str,
196) -> Vec<VectorizedFailure> {
197    let context = FailureContext::new(graph, source);
198
199    errors
200        .iter()
201        .enumerate()
202        .map(|(idx, error)| {
203            let (category, subcategory, fix_type, confidence) =
204                context.classify_error(&error.code, &error.message);
205
206            // Build graph context
207            let (in_degree, out_degree, callers, callees, inheritance) =
208                if let Some(ref node_id) = error.node_id {
209                    let incoming = graph.incoming_edges(node_id);
210                    let outgoing = graph.outgoing_edges(node_id);
211                    (
212                        incoming.len(),
213                        outgoing.len(),
214                        incoming.iter().map(|(n, _)| n.id.clone()).collect(),
215                        outgoing.iter().map(|(n, _)| n.id.clone()).collect(),
216                        vec![], // Would need more analysis for inheritance
217                    )
218                } else {
219                    (0, 0, vec![], vec![], vec![])
220                };
221
222            VectorizedFailure {
223                id: format!("failure_{}", idx),
224                error_code: error.code.clone(),
225                error_message: error.message.clone(),
226                ast_context: AstContext {
227                    containing_function: error.node_id.clone(),
228                    containing_class: None, // Would extract from node_id if method
229                    return_type: None,
230                    parameter_types: vec![],
231                    local_types: vec![],
232                    statement_kind: "unknown".to_string(),
233                    expression_kind: "unknown".to_string(),
234                    ast_depth: 0,
235                },
236                graph_context: GraphContext {
237                    node_id: error.node_id.clone(),
238                    in_degree,
239                    out_degree,
240                    callees,
241                    callers,
242                    inheritance_chain: inheritance,
243                },
244                source_snippet: context.extract_snippet(error.python_line_estimate, 3),
245                labels: FailureLabels {
246                    category,
247                    subcategory,
248                    fix_type,
249                    confidence,
250                },
251            }
252        })
253        .collect()
254}
255
256/// Serialize failures to JSON for ML training
257pub fn serialize_to_json(failures: &[VectorizedFailure]) -> Result<String, serde_json::Error> {
258    serde_json::to_string_pretty(failures)
259}
260
261/// Serialize failures to NDJSON (newline-delimited JSON) for streaming
262pub fn serialize_to_ndjson(failures: &[VectorizedFailure]) -> Result<String, serde_json::Error> {
263    let lines: Result<Vec<String>, _> = failures.iter().map(serde_json::to_string).collect();
264    Ok(lines?.join("\n"))
265}
266
267#[cfg(test)]
268mod tests {
269    use super::*;
270    use crate::builder::GraphBuilder;
271    use crate::error_overlay::ErrorOverlay;
272
273    #[test]
274    fn test_vectorize_simple() {
275        let python = r#"
276def foo():
277    return 42
278"#;
279
280        let mut builder = GraphBuilder::new();
281        let graph = builder.build_from_source(python).unwrap();
282
283        let overlay = ErrorOverlay::new(&graph);
284        let raw_errors = vec![("E0308".to_string(), "mismatched types".to_string(), 20)];
285        let overlaid = overlay.overlay_errors(&raw_errors);
286
287        let vectorized = vectorize_failures(&graph, &overlaid, python);
288
289        assert_eq!(vectorized.len(), 1);
290        assert_eq!(vectorized[0].error_code, "E0308");
291        assert_eq!(vectorized[0].labels.category, "type_mismatch");
292    }
293
294    #[test]
295    fn test_classify_double_result() {
296        let context = FailureContext {
297            graph: &DependencyGraph::new(),
298            source: "",
299        };
300
301        let (cat, sub, fix, conf) =
302            context.classify_error("E0308", "expected Vec, found Result<Vec>");
303
304        assert_eq!(cat, "type_mismatch");
305        assert_eq!(sub, "double_result_wrap");
306        assert_eq!(fix, "unwrap_result");
307        assert!(conf > 0.8);
308    }
309
310    #[test]
311    fn test_classify_depyler_value() {
312        let context = FailureContext {
313            graph: &DependencyGraph::new(),
314            source: "",
315        };
316
317        let (cat, sub, _, _) = context.classify_error("E0308", "expected f64, found DepylerValue");
318
319        assert_eq!(cat, "type_mismatch");
320        assert_eq!(sub, "depyler_value_leak");
321    }
322
323    #[test]
324    fn test_serialize_to_json() {
325        let failures = vec![VectorizedFailure {
326            id: "test".to_string(),
327            error_code: "E0308".to_string(),
328            error_message: "test".to_string(),
329            ast_context: AstContext {
330                containing_function: Some("foo".to_string()),
331                containing_class: None,
332                return_type: None,
333                parameter_types: vec![],
334                local_types: vec![],
335                statement_kind: "return".to_string(),
336                expression_kind: "call".to_string(),
337                ast_depth: 1,
338            },
339            graph_context: GraphContext {
340                node_id: Some("foo".to_string()),
341                in_degree: 1,
342                out_degree: 0,
343                callees: vec![],
344                callers: vec!["bar".to_string()],
345                inheritance_chain: vec![],
346            },
347            source_snippet: "return 42".to_string(),
348            labels: FailureLabels {
349                category: "type_mismatch".to_string(),
350                subcategory: "general".to_string(),
351                fix_type: "type_inference".to_string(),
352                confidence: 0.8,
353            },
354        }];
355
356        let json = serialize_to_json(&failures).unwrap();
357        assert!(json.contains("E0308"));
358        assert!(json.contains("type_mismatch"));
359    }
360
361    #[test]
362    fn test_serialize_to_ndjson() {
363        let failures = vec![VectorizedFailure {
364            id: "f1".to_string(),
365            error_code: "E0308".to_string(),
366            error_message: "test1".to_string(),
367            ast_context: AstContext {
368                containing_function: None,
369                containing_class: None,
370                return_type: None,
371                parameter_types: vec![],
372                local_types: vec![],
373                statement_kind: "".to_string(),
374                expression_kind: "".to_string(),
375                ast_depth: 0,
376            },
377            graph_context: GraphContext {
378                node_id: None,
379                in_degree: 0,
380                out_degree: 0,
381                callees: vec![],
382                callers: vec![],
383                inheritance_chain: vec![],
384            },
385            source_snippet: "".to_string(),
386            labels: FailureLabels {
387                category: "".to_string(),
388                subcategory: "".to_string(),
389                fix_type: "".to_string(),
390                confidence: 0.0,
391            },
392        }];
393
394        let ndjson = serialize_to_ndjson(&failures).unwrap();
395        // NDJSON should have one line per record
396        assert_eq!(ndjson.lines().count(), 1);
397    }
398
399    #[test]
400    fn test_classify_string_ref_mismatch() {
401        let context = FailureContext {
402            graph: &DependencyGraph::new(),
403            source: "",
404        };
405
406        let (cat, sub, fix, conf) =
407            context.classify_error("E0308", "expected &str, found String");
408        assert_eq!(cat, "type_mismatch");
409        assert_eq!(sub, "string_ref_mismatch");
410        assert_eq!(fix, "to_string");
411        assert!(conf > 0.8);
412    }
413
414    #[test]
415    fn test_classify_numeric_type_mismatch() {
416        let context = FailureContext {
417            graph: &DependencyGraph::new(),
418            source: "",
419        };
420
421        let (cat, sub, fix, _) =
422            context.classify_error("E0308", "expected i32, found f64");
423        assert_eq!(cat, "type_mismatch");
424        assert_eq!(sub, "numeric_type_mismatch");
425        assert_eq!(fix, "cast");
426    }
427
428    #[test]
429    fn test_classify_e0308_general() {
430        let context = FailureContext {
431            graph: &DependencyGraph::new(),
432            source: "",
433        };
434
435        let (cat, sub, fix, conf) =
436            context.classify_error("E0308", "expected bool, found ()");
437        assert_eq!(cat, "type_mismatch");
438        assert_eq!(sub, "general");
439        assert_eq!(fix, "type_inference");
440        assert!(conf > 0.5);
441    }
442
443    #[test]
444    fn test_classify_e0599_missing_method() {
445        let context = FailureContext {
446            graph: &DependencyGraph::new(),
447            source: "",
448        };
449
450        let (cat, sub, fix, _) =
451            context.classify_error("E0599", "no method named `len` found");
452        assert_eq!(cat, "missing_method");
453        assert_eq!(sub, "stdlib_mapping");
454        assert_eq!(fix, "add_trait_impl");
455    }
456
457    #[test]
458    fn test_classify_e0425_undefined() {
459        let context = FailureContext {
460            graph: &DependencyGraph::new(),
461            source: "",
462        };
463
464        let (cat, sub, fix, _) =
465            context.classify_error("E0425", "cannot find value `x` in this scope");
466        assert_eq!(cat, "undefined");
467        assert_eq!(sub, "missing_import");
468        assert_eq!(fix, "add_import");
469    }
470
471    #[test]
472    fn test_classify_e0277_trait_bound() {
473        let context = FailureContext {
474            graph: &DependencyGraph::new(),
475            source: "",
476        };
477
478        let (cat, sub, fix, _) =
479            context.classify_error("E0277", "the trait `Display` is not implemented");
480        assert_eq!(cat, "trait_bound");
481        assert_eq!(sub, "missing_trait");
482        assert_eq!(fix, "derive_trait");
483    }
484
485    #[test]
486    fn test_classify_unknown_error() {
487        let context = FailureContext {
488            graph: &DependencyGraph::new(),
489            source: "",
490        };
491
492        let (cat, sub, fix, conf) =
493            context.classify_error("E9999", "something weird");
494        assert_eq!(cat, "unknown");
495        assert_eq!(sub, "unknown");
496        assert_eq!(fix, "manual_fix");
497        assert!(conf < 0.5);
498    }
499
500    #[test]
501    fn test_extract_snippet_from_source() {
502        let source = "line1\nline2\nline3\nline4\nline5\n";
503        let context = FailureContext {
504            graph: &DependencyGraph::new(),
505            source,
506        };
507
508        let snippet = context.extract_snippet(3, 1);
509        assert!(snippet.contains("line2"));
510        assert!(snippet.contains("line3"));
511        assert!(snippet.contains("line4"));
512    }
513
514    #[test]
515    fn test_extract_snippet_empty_source() {
516        let context = FailureContext {
517            graph: &DependencyGraph::new(),
518            source: "",
519        };
520
521        let snippet = context.extract_snippet(1, 3);
522        assert!(snippet.is_empty());
523    }
524
525    #[test]
526    fn test_extract_snippet_boundary_start() {
527        let source = "line1\nline2\nline3\n";
528        let context = FailureContext {
529            graph: &DependencyGraph::new(),
530            source,
531        };
532
533        // Line 1 with context=2 should not panic
534        let snippet = context.extract_snippet(1, 2);
535        assert!(snippet.contains("line1"));
536    }
537
538    #[test]
539    fn test_extract_snippet_boundary_end() {
540        let source = "line1\nline2\nline3\n";
541        let context = FailureContext {
542            graph: &DependencyGraph::new(),
543            source,
544        };
545
546        // Line beyond end should not panic
547        let snippet = context.extract_snippet(100, 2);
548        assert!(!snippet.is_empty());
549    }
550
551    #[test]
552    fn test_serialize_to_json_empty() {
553        let failures: Vec<VectorizedFailure> = vec![];
554        let json = serialize_to_json(&failures).unwrap();
555        assert_eq!(json, "[]");
556    }
557
558    #[test]
559    fn test_serialize_to_ndjson_empty() {
560        let failures: Vec<VectorizedFailure> = vec![];
561        let ndjson = serialize_to_ndjson(&failures).unwrap();
562        assert!(ndjson.is_empty());
563    }
564
565    #[test]
566    fn test_serialize_to_ndjson_multiple() {
567        let make_failure = |id: &str, code: &str| VectorizedFailure {
568            id: id.to_string(),
569            error_code: code.to_string(),
570            error_message: "msg".to_string(),
571            ast_context: AstContext {
572                containing_function: None,
573                containing_class: None,
574                return_type: None,
575                parameter_types: vec![],
576                local_types: vec![],
577                statement_kind: "".to_string(),
578                expression_kind: "".to_string(),
579                ast_depth: 0,
580            },
581            graph_context: GraphContext {
582                node_id: None,
583                in_degree: 0,
584                out_degree: 0,
585                callees: vec![],
586                callers: vec![],
587                inheritance_chain: vec![],
588            },
589            source_snippet: "".to_string(),
590            labels: FailureLabels {
591                category: "".to_string(),
592                subcategory: "".to_string(),
593                fix_type: "".to_string(),
594                confidence: 0.0,
595            },
596        };
597
598        let failures = vec![
599            make_failure("f1", "E0308"),
600            make_failure("f2", "E0599"),
601            make_failure("f3", "E0425"),
602        ];
603
604        let ndjson = serialize_to_ndjson(&failures).unwrap();
605        assert_eq!(ndjson.lines().count(), 3);
606
607        // Each line should be valid JSON
608        for line in ndjson.lines() {
609            let parsed: serde_json::Value = serde_json::from_str(line).unwrap();
610            assert!(parsed.is_object());
611        }
612    }
613
614    #[test]
615    fn test_vectorize_failure_id_sequential() {
616        let python = "def foo():\n    pass\n";
617        let mut builder = GraphBuilder::new();
618        let graph = builder.build_from_source(python).unwrap();
619
620        let overlay = ErrorOverlay::new(&graph);
621        let raw_errors = vec![
622            ("E0308".to_string(), "a".to_string(), 10),
623            ("E0599".to_string(), "b".to_string(), 20),
624        ];
625        let overlaid = overlay.overlay_errors(&raw_errors);
626
627        let vectorized = vectorize_failures(&graph, &overlaid, python);
628        assert_eq!(vectorized[0].id, "failure_0");
629        assert_eq!(vectorized[1].id, "failure_1");
630    }
631
632    #[test]
633    fn test_vectorize_failure_graph_context_populated() {
634        let python = r#"
635def callee():
636    return 1
637
638def caller():
639    return callee()
640"#;
641        let mut builder = GraphBuilder::new();
642        let graph = builder.build_from_source(python).unwrap();
643
644        let overlay = ErrorOverlay::new(&graph);
645        let raw_errors = vec![("E0308".to_string(), "err".to_string(), 10)];
646        let overlaid = overlay.overlay_errors(&raw_errors);
647
648        let vectorized = vectorize_failures(&graph, &overlaid, python);
649        assert!(!vectorized.is_empty());
650
651        // The failure should have a graph context with a node_id
652        let f = &vectorized[0];
653        if f.graph_context.node_id.is_some() {
654            // in_degree + out_degree should reflect the graph structure
655            assert!(
656                f.graph_context.in_degree > 0 || f.graph_context.out_degree > 0
657                    || f.graph_context.in_degree == 0
658            );
659        }
660    }
661
662    #[test]
663    fn test_vectorized_failure_serde_roundtrip() {
664        let failure = VectorizedFailure {
665            id: "test_rt".to_string(),
666            error_code: "E0277".to_string(),
667            error_message: "trait bound not satisfied".to_string(),
668            ast_context: AstContext {
669                containing_function: Some("process".to_string()),
670                containing_class: Some("Handler".to_string()),
671                return_type: Some("Vec<i32>".to_string()),
672                parameter_types: vec!["i32".to_string(), "String".to_string()],
673                local_types: vec![("x".to_string(), "i32".to_string())],
674                statement_kind: "return".to_string(),
675                expression_kind: "call".to_string(),
676                ast_depth: 2,
677            },
678            graph_context: GraphContext {
679                node_id: Some("Handler.process".to_string()),
680                in_degree: 3,
681                out_degree: 1,
682                callees: vec!["helper".to_string()],
683                callers: vec!["main".to_string(), "test".to_string(), "bench".to_string()],
684                inheritance_chain: vec!["BaseHandler".to_string()],
685            },
686            source_snippet: "return process(x)".to_string(),
687            labels: FailureLabels {
688                category: "trait_bound".to_string(),
689                subcategory: "missing_trait".to_string(),
690                fix_type: "derive_trait".to_string(),
691                confidence: 0.75,
692            },
693        };
694
695        let json = serde_json::to_string(&failure).unwrap();
696        let deserialized: VectorizedFailure = serde_json::from_str(&json).unwrap();
697
698        assert_eq!(deserialized.id, "test_rt");
699        assert_eq!(deserialized.error_code, "E0277");
700        assert_eq!(
701            deserialized.ast_context.containing_class,
702            Some("Handler".to_string())
703        );
704        assert_eq!(deserialized.ast_context.parameter_types.len(), 2);
705        assert_eq!(deserialized.graph_context.callers.len(), 3);
706        assert_eq!(deserialized.labels.subcategory, "missing_trait");
707    }
708
709    #[test]
710    fn test_failure_labels_serde_roundtrip() {
711        let labels = FailureLabels {
712            category: "type_mismatch".to_string(),
713            subcategory: "double_result_wrap".to_string(),
714            fix_type: "unwrap_result".to_string(),
715            confidence: 0.92,
716        };
717
718        let json = serde_json::to_string(&labels).unwrap();
719        let deserialized: FailureLabels = serde_json::from_str(&json).unwrap();
720
721        assert_eq!(deserialized.category, "type_mismatch");
722        assert!((deserialized.confidence - 0.92).abs() < f64::EPSILON);
723    }
724
725    #[test]
726    fn test_ast_context_serde_roundtrip() {
727        let ctx = AstContext {
728            containing_function: Some("foo".to_string()),
729            containing_class: None,
730            return_type: Some("i32".to_string()),
731            parameter_types: vec!["String".to_string()],
732            local_types: vec![("x".to_string(), "i32".to_string())],
733            statement_kind: "assign".to_string(),
734            expression_kind: "binop".to_string(),
735            ast_depth: 3,
736        };
737
738        let json = serde_json::to_string(&ctx).unwrap();
739        let deserialized: AstContext = serde_json::from_str(&json).unwrap();
740
741        assert_eq!(deserialized.ast_depth, 3);
742        assert_eq!(deserialized.local_types.len(), 1);
743        assert_eq!(deserialized.local_types[0].0, "x");
744    }
745
746    #[test]
747    fn test_graph_context_serde_roundtrip() {
748        let ctx = GraphContext {
749            node_id: Some("module.func".to_string()),
750            in_degree: 5,
751            out_degree: 2,
752            callees: vec!["a".to_string(), "b".to_string()],
753            callers: vec!["c".to_string()],
754            inheritance_chain: vec![],
755        };
756
757        let json = serde_json::to_string(&ctx).unwrap();
758        let deserialized: GraphContext = serde_json::from_str(&json).unwrap();
759
760        assert_eq!(deserialized.in_degree, 5);
761        assert_eq!(deserialized.callees.len(), 2);
762    }
763
764    // ========================================================================
765    // S9B7: Coverage tests for vectorize
766    // ========================================================================
767
768    #[test]
769    fn test_s9b7_vectorize_no_errors() {
770        let python = "def foo():\n    pass\n";
771        let mut builder = GraphBuilder::new();
772        let graph = builder.build_from_source(python).unwrap();
773        let vectorized = vectorize_failures(&graph, &[], python);
774        assert!(vectorized.is_empty());
775    }
776
777    #[test]
778    fn test_s9b7_vectorize_error_without_node() {
779        let graph = DependencyGraph::new();
780        let errors = vec![OverlaidError {
781            code: "E0308".to_string(),
782            message: "msg".to_string(),
783            rust_line: 1,
784            python_line_estimate: 1,
785            node_id: None,
786            association_confidence: 0.0,
787            upstream_suspects: vec![],
788        }];
789        let vectorized = vectorize_failures(&graph, &errors, "");
790        assert_eq!(vectorized.len(), 1);
791        assert_eq!(vectorized[0].graph_context.in_degree, 0);
792        assert_eq!(vectorized[0].graph_context.out_degree, 0);
793        assert!(vectorized[0].graph_context.callers.is_empty());
794        assert!(vectorized[0].graph_context.callees.is_empty());
795    }
796
797    #[test]
798    fn test_s9b7_extract_snippet_large_context() {
799        let source = "a\nb\nc\nd\ne\nf\ng\nh\ni\nj\n";
800        let context = FailureContext {
801            graph: &DependencyGraph::new(),
802            source,
803        };
804        let snippet = context.extract_snippet(5, 10);
805        // Should get all lines since context is larger than file
806        assert!(snippet.contains("a"));
807        assert!(snippet.contains("j"));
808    }
809
810    #[test]
811    fn test_s9b7_extract_snippet_single_line() {
812        let source = "only_line";
813        let context = FailureContext {
814            graph: &DependencyGraph::new(),
815            source,
816        };
817        let snippet = context.extract_snippet(1, 0);
818        assert_eq!(snippet, "only_line");
819    }
820
821    #[test]
822    fn test_s9b7_classify_e0308_f64() {
823        let context = FailureContext {
824            graph: &DependencyGraph::new(),
825            source: "",
826        };
827        let (cat, sub, fix, _) = context.classify_error("E0308", "expected i32, found f64");
828        assert_eq!(cat, "type_mismatch");
829        assert_eq!(sub, "numeric_type_mismatch");
830        assert_eq!(fix, "cast");
831    }
832
833    #[test]
834    fn test_s9b7_vectorized_failure_debug_clone() {
835        let failure = VectorizedFailure {
836            id: "f0".to_string(),
837            error_code: "E0308".to_string(),
838            error_message: "err".to_string(),
839            ast_context: AstContext {
840                containing_function: None,
841                containing_class: None,
842                return_type: None,
843                parameter_types: vec![],
844                local_types: vec![],
845                statement_kind: "".to_string(),
846                expression_kind: "".to_string(),
847                ast_depth: 0,
848            },
849            graph_context: GraphContext {
850                node_id: None,
851                in_degree: 0,
852                out_degree: 0,
853                callees: vec![],
854                callers: vec![],
855                inheritance_chain: vec![],
856            },
857            source_snippet: "".to_string(),
858            labels: FailureLabels {
859                category: "".to_string(),
860                subcategory: "".to_string(),
861                fix_type: "".to_string(),
862                confidence: 0.0,
863            },
864        };
865        let debug = format!("{:?}", failure);
866        assert!(debug.contains("VectorizedFailure"));
867        let cloned = failure.clone();
868        assert_eq!(cloned.id, "f0");
869    }
870
871    #[test]
872    fn test_s9b7_serialize_json_multiple() {
873        let make = |id: &str| VectorizedFailure {
874            id: id.to_string(),
875            error_code: "E0308".to_string(),
876            error_message: "m".to_string(),
877            ast_context: AstContext {
878                containing_function: None,
879                containing_class: None,
880                return_type: None,
881                parameter_types: vec![],
882                local_types: vec![],
883                statement_kind: "".to_string(),
884                expression_kind: "".to_string(),
885                ast_depth: 0,
886            },
887            graph_context: GraphContext {
888                node_id: None,
889                in_degree: 0,
890                out_degree: 0,
891                callees: vec![],
892                callers: vec![],
893                inheritance_chain: vec![],
894            },
895            source_snippet: "".to_string(),
896            labels: FailureLabels {
897                category: "".to_string(),
898                subcategory: "".to_string(),
899                fix_type: "".to_string(),
900                confidence: 0.0,
901            },
902        };
903        let failures = vec![make("a"), make("b")];
904        let json = serialize_to_json(&failures).unwrap();
905        assert!(json.contains("\"a\""));
906        assert!(json.contains("\"b\""));
907    }
908
909    #[test]
910    fn test_classify_e0308_i64_numeric() {
911        let context = FailureContext {
912            graph: &DependencyGraph::new(),
913            source: "",
914        };
915
916        let (cat, sub, _, _) =
917            context.classify_error("E0308", "expected usize, found i64");
918        assert_eq!(cat, "type_mismatch");
919        assert_eq!(sub, "numeric_type_mismatch");
920    }
921
922    // ========================================================================
923    // S12: Deep coverage tests for vectorize
924    // ========================================================================
925
926    #[test]
927    fn test_s12_vectorize_with_node_id_in_graph() {
928        let python = r#"
929def callee():
930    return 1
931
932def caller():
933    return callee()
934"#;
935        let mut builder = GraphBuilder::new();
936        let graph = builder.build_from_source(python).unwrap();
937
938        let errors = vec![OverlaidError {
939            code: "E0308".to_string(),
940            message: "type mismatch".to_string(),
941            rust_line: 5,
942            python_line_estimate: 5,
943            node_id: Some("caller".to_string()),
944            association_confidence: 0.9,
945            upstream_suspects: vec!["callee".to_string()],
946        }];
947
948        let vectorized = vectorize_failures(&graph, &errors, python);
949        assert_eq!(vectorized.len(), 1);
950        let f = &vectorized[0];
951        assert_eq!(f.graph_context.node_id, Some("caller".to_string()));
952        // caller has outgoing edge to callee
953        assert!(f.graph_context.out_degree > 0 || f.graph_context.in_degree >= 0);
954    }
955
956    #[test]
957    fn test_s12_classify_e0308_with_result_and_string() {
958        // Tests that "Result" match takes priority over "String"
959        let context = FailureContext {
960            graph: &DependencyGraph::new(),
961            source: "",
962        };
963        let (_, sub, fix, _) =
964            context.classify_error("E0308", "expected String, found Result<String>");
965        assert_eq!(sub, "double_result_wrap");
966        assert_eq!(fix, "unwrap_result");
967    }
968
969    #[test]
970    fn test_s12_classify_e0308_string_only() {
971        // Tests &str/String mismatch without Result keyword
972        let context = FailureContext {
973            graph: &DependencyGraph::new(),
974            source: "",
975        };
976        let (_, sub, _, _) =
977            context.classify_error("E0308", "expected &str but found String");
978        assert_eq!(sub, "string_ref_mismatch");
979    }
980
981    #[test]
982    fn test_s12_classify_e0308_depyler_value_priority() {
983        // DepylerValue should match even with other keywords
984        let context = FailureContext {
985            graph: &DependencyGraph::new(),
986            source: "",
987        };
988        let (_, sub, _, _) =
989            context.classify_error("E0308", "expected i32, found DepylerValue");
990        assert_eq!(sub, "depyler_value_leak");
991    }
992
993    #[test]
994    fn test_s12_extract_snippet_line_zero() {
995        let source = "first\nsecond\nthird\n";
996        let context = FailureContext {
997            graph: &DependencyGraph::new(),
998            source,
999        };
1000        // line 0 should be bounded to 1
1001        let snippet = context.extract_snippet(0, 1);
1002        assert!(snippet.contains("first"));
1003    }
1004
1005    #[test]
1006    fn test_s12_vectorize_multiple_errors_mixed() {
1007        let python = "def foo():\n    return 42\n";
1008        let mut builder = GraphBuilder::new();
1009        let graph = builder.build_from_source(python).unwrap();
1010
1011        let errors = vec![
1012            OverlaidError {
1013                code: "E0308".to_string(),
1014                message: "expected i32, found DepylerValue".to_string(),
1015                rust_line: 5,
1016                python_line_estimate: 2,
1017                node_id: Some("foo".to_string()),
1018                association_confidence: 0.9,
1019                upstream_suspects: vec![],
1020            },
1021            OverlaidError {
1022                code: "E0599".to_string(),
1023                message: "no method".to_string(),
1024                rust_line: 10,
1025                python_line_estimate: 2,
1026                node_id: None,
1027                association_confidence: 0.0,
1028                upstream_suspects: vec![],
1029            },
1030            OverlaidError {
1031                code: "E0277".to_string(),
1032                message: "trait bound".to_string(),
1033                rust_line: 15,
1034                python_line_estimate: 2,
1035                node_id: Some("foo".to_string()),
1036                association_confidence: 0.9,
1037                upstream_suspects: vec![],
1038            },
1039        ];
1040
1041        let vectorized = vectorize_failures(&graph, &errors, python);
1042        assert_eq!(vectorized.len(), 3);
1043        assert_eq!(vectorized[0].labels.subcategory, "depyler_value_leak");
1044        assert_eq!(vectorized[1].labels.category, "missing_method");
1045        assert_eq!(vectorized[2].labels.category, "trait_bound");
1046        assert_eq!(vectorized[0].id, "failure_0");
1047        assert_eq!(vectorized[1].id, "failure_1");
1048        assert_eq!(vectorized[2].id, "failure_2");
1049    }
1050
1051    #[test]
1052    fn test_s12_ndjson_roundtrip_multiple() {
1053        let make = |id: &str, code: &str| VectorizedFailure {
1054            id: id.to_string(),
1055            error_code: code.to_string(),
1056            error_message: "msg".to_string(),
1057            ast_context: AstContext {
1058                containing_function: Some("fn_name".to_string()),
1059                containing_class: None,
1060                return_type: Some("i32".to_string()),
1061                parameter_types: vec!["String".to_string()],
1062                local_types: vec![],
1063                statement_kind: "return".to_string(),
1064                expression_kind: "call".to_string(),
1065                ast_depth: 1,
1066            },
1067            graph_context: GraphContext {
1068                node_id: Some(id.to_string()),
1069                in_degree: 2,
1070                out_degree: 1,
1071                callees: vec!["dep".to_string()],
1072                callers: vec!["c1".to_string(), "c2".to_string()],
1073                inheritance_chain: vec![],
1074            },
1075            source_snippet: "return x".to_string(),
1076            labels: FailureLabels {
1077                category: "type_mismatch".to_string(),
1078                subcategory: "general".to_string(),
1079                fix_type: "type_inference".to_string(),
1080                confidence: 0.7,
1081            },
1082        };
1083
1084        let failures = vec![make("f1", "E0308"), make("f2", "E0599")];
1085        let ndjson = serialize_to_ndjson(&failures).unwrap();
1086
1087        // Each line should deserialize correctly
1088        for (i, line) in ndjson.lines().enumerate() {
1089            let parsed: VectorizedFailure = serde_json::from_str(line).unwrap();
1090            assert_eq!(parsed.id, format!("f{}", i + 1));
1091        }
1092    }
1093
1094    #[test]
1095    fn test_s12_failure_context_new() {
1096        let graph = DependencyGraph::new();
1097        let source = "def foo():\n    pass\n";
1098        let ctx = FailureContext::new(&graph, source);
1099        let snippet = ctx.extract_snippet(1, 0);
1100        assert_eq!(snippet, "def foo():");
1101    }
1102}