Skip to main content

depyler_tooling/
doctest_extractor.rs

1//! # Doctest Extractor for CITL Training Pipeline
2//!
3//! GH-173: Extracts Python `>>>` doctest examples and converts them to
4//! structured format for CITL training and Rust doc test generation.
5//!
6//! ## Overview
7//!
8//! This module implements Phase 1 of the doctest transpilation spec:
9//! - Parse Python docstrings to extract `>>>` blocks
10//! - Extract input expression and expected output
11//! - Handle multi-line continuations (`...`)
12//! - Output structured format: `{function, input, expected, line}`
13//!
14//! ## Example
15//!
16//! ```rust
17//! use depyler_core::doctest_extractor::{DoctestExtractor, Doctest};
18//!
19//! let source = r#"
20//! def square(x: int) -> int:
21//!     """Compute square.
22//!
23//!     >>> square(4)
24//!     16
25//!     >>> square(-3)
26//!     9
27//!     """
28//!     return x * x
29//! "#;
30//!
31//! let extractor = DoctestExtractor::new();
32//! let doctests = extractor.extract(source).unwrap();
33//!
34//! assert_eq!(doctests.len(), 2);
35//! assert_eq!(doctests[0].input, "square(4)");
36//! assert_eq!(doctests[0].expected, "16");
37//! ```
38
39use anyhow::Result;
40use serde::{Deserialize, Serialize};
41
42/// A single extracted doctest example
43#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
44pub struct Doctest {
45    /// The function this doctest belongs to
46    pub function: String,
47    /// The input expression (after `>>>`)
48    pub input: String,
49    /// The expected output
50    pub expected: String,
51    /// Line number in source file
52    pub line: usize,
53}
54
55/// Result of extracting doctests from a module
56#[derive(Debug, Clone, Default, Serialize, Deserialize)]
57pub struct DoctestResult {
58    /// Source file or module name
59    pub source: String,
60    /// Module path (e.g., "os.path")
61    pub module: String,
62    /// Extracted doctests grouped by function
63    pub doctests: Vec<FunctionDoctests>,
64}
65
66/// Doctests for a single function
67#[derive(Debug, Clone, Serialize, Deserialize)]
68pub struct FunctionDoctests {
69    /// Function name
70    pub function: String,
71    /// Function signature if available
72    pub signature: Option<String>,
73    /// Docstring text
74    pub docstring: Option<String>,
75    /// Extracted doctest examples
76    pub examples: Vec<Doctest>,
77}
78
79/// Extracts doctest examples from Python source code
80#[derive(Debug, Clone, Default)]
81pub struct DoctestExtractor {
82    /// Whether to include module-level doctests
83    pub include_module_doctests: bool,
84    /// Whether to include class method doctests
85    pub include_class_methods: bool,
86}
87
88impl DoctestExtractor {
89    /// Creates a new DoctestExtractor with default settings
90    pub fn new() -> Self {
91        Self {
92            include_module_doctests: true,
93            include_class_methods: true,
94        }
95    }
96
97    /// Configure whether to include module-level doctests
98    pub fn with_module_doctests(mut self, include: bool) -> Self {
99        self.include_module_doctests = include;
100        self
101    }
102
103    /// Configure whether to include class method doctests
104    pub fn with_class_methods(mut self, include: bool) -> Self {
105        self.include_class_methods = include;
106        self
107    }
108
109    /// Extract all doctests from Python source code
110    pub fn extract(&self, source: &str) -> Result<Vec<Doctest>> {
111        let mut doctests = Vec::new();
112        let lines: Vec<&str> = source.lines().collect();
113
114        let mut current_function: Option<String> = None;
115        let mut in_docstring = false;
116        let mut docstring_delim: Option<&str> = None;
117        let mut i = 0;
118
119        while i < lines.len() {
120            let line = lines[i];
121            let trimmed = line.trim();
122
123            // Track function definitions
124            if trimmed.starts_with("def ") {
125                if let Some(name) = Self::extract_function_name(trimmed) {
126                    current_function = Some(name);
127                }
128            }
129
130            // Track docstring boundaries
131            if !in_docstring {
132                if trimmed.starts_with("\"\"\"") || trimmed.starts_with("'''") {
133                    in_docstring = true;
134                    docstring_delim = Some(if trimmed.starts_with("\"\"\"") {
135                        "\"\"\""
136                    } else {
137                        "'''"
138                    });
139                    // Check if docstring ends on same line
140                    let rest = &trimmed[3..];
141                    if rest.contains(docstring_delim.unwrap()) {
142                        in_docstring = false;
143                        docstring_delim = None;
144                    }
145                }
146            } else {
147                // Inside docstring - look for >>> lines
148                if trimmed.starts_with(">>>") {
149                    let (doctest, consumed) =
150                        self.parse_doctest(&lines, i, current_function.as_deref())?;
151                    if let Some(dt) = doctest {
152                        doctests.push(dt);
153                    }
154                    i += consumed.saturating_sub(1);
155                }
156
157                // Check for docstring end
158                if let Some(delim) = docstring_delim {
159                    if trimmed.ends_with(delim) && trimmed.len() >= 3 {
160                        in_docstring = false;
161                        docstring_delim = None;
162                    }
163                }
164            }
165
166            i += 1;
167        }
168
169        Ok(doctests)
170    }
171
172    /// Extract function name from a def line
173    fn extract_function_name(line: &str) -> Option<String> {
174        // "def function_name(args):" -> "function_name"
175        let after_def = line.strip_prefix("def ")?.trim();
176        let paren_idx = after_def.find('(')?;
177        Some(after_def[..paren_idx].to_string())
178    }
179
180    /// Parse a single doctest starting at the given line
181    fn parse_doctest(
182        &self,
183        lines: &[&str],
184        start_line: usize,
185        function: Option<&str>,
186    ) -> Result<(Option<Doctest>, usize)> {
187        let first_line = lines
188            .get(start_line)
189            .ok_or_else(|| anyhow::anyhow!("Invalid line index: {}", start_line))?;
190
191        let trimmed = first_line.trim();
192        if !trimmed.starts_with(">>>") {
193            return Ok((None, 1));
194        }
195
196        // Extract input expression (may span multiple lines with ...)
197        let mut input = trimmed
198            .strip_prefix(">>> ")
199            .unwrap_or(&trimmed[3..])
200            .to_string();
201        let mut consumed = 1;
202        let mut next_idx = start_line + 1;
203
204        // Handle multi-line input with ... continuation
205        while next_idx < lines.len() {
206            let next_line = lines[next_idx].trim();
207            if let Some(stripped) = next_line.strip_prefix("...") {
208                let continuation = stripped.strip_prefix(' ').unwrap_or(stripped);
209                input.push('\n');
210                input.push_str(continuation);
211                consumed += 1;
212                next_idx += 1;
213            } else {
214                break;
215            }
216        }
217
218        // Extract expected output (all lines until next >>> or end of docstring)
219        let mut expected_lines = Vec::new();
220        while next_idx < lines.len() {
221            let next_line = lines[next_idx].trim();
222
223            // Stop conditions
224            if next_line.starts_with(">>>")
225                || next_line.starts_with("\"\"\"")
226                || next_line.starts_with("'''")
227                || next_line.is_empty()
228                    && next_idx + 1 < lines.len()
229                    && (lines[next_idx + 1].trim().starts_with(">>>")
230                        || lines[next_idx + 1].trim().starts_with("\"\"\"")
231                        || lines[next_idx + 1].trim().starts_with("'''"))
232            {
233                break;
234            }
235
236            // Skip empty lines at the start of expected output
237            if expected_lines.is_empty() && next_line.is_empty() {
238                consumed += 1;
239                next_idx += 1;
240                continue;
241            }
242
243            expected_lines.push(next_line);
244            consumed += 1;
245            next_idx += 1;
246        }
247
248        let expected = expected_lines.join("\n");
249
250        // Skip doctests with no expected output (statements like assignments)
251        if expected.is_empty() {
252            return Ok((None, consumed));
253        }
254
255        Ok((
256            Some(Doctest {
257                function: function.unwrap_or("<module>").to_string(),
258                input,
259                expected,
260                line: start_line + 1, // 1-indexed
261            }),
262            consumed,
263        ))
264    }
265
266    /// Extract doctests to the JSON format specified in the spec
267    pub fn extract_to_result(&self, source: &str, module: &str) -> Result<DoctestResult> {
268        let doctests = self.extract(source)?;
269
270        // Group by function
271        let mut by_function: std::collections::HashMap<String, Vec<Doctest>> =
272            std::collections::HashMap::new();
273
274        for dt in doctests {
275            by_function.entry(dt.function.clone()).or_default().push(dt);
276        }
277
278        let function_doctests: Vec<FunctionDoctests> = by_function
279            .into_iter()
280            .map(|(function, examples)| FunctionDoctests {
281                function,
282                signature: None,
283                docstring: None,
284                examples,
285            })
286            .collect();
287
288        Ok(DoctestResult {
289            source: module.to_string(),
290            module: module.to_string(),
291            doctests: function_doctests,
292        })
293    }
294}
295
296/// Convert a doctest to a Rust doc test assertion
297pub fn doctest_to_rust_assertion(doctest: &Doctest) -> String {
298    // Simple conversion: >>> f(x) + expected -> assert_eq!(f(x), expected);
299    format!("assert_eq!({}, {});", doctest.input, doctest.expected)
300}
301
302/// Generate Rust doc comment with doc tests
303pub fn generate_rust_doc_tests(doctests: &[Doctest]) -> String {
304    if doctests.is_empty() {
305        return String::new();
306    }
307
308    let mut lines = vec!["/// ```".to_string()];
309    for dt in doctests {
310        lines.push(format!("/// {}", doctest_to_rust_assertion(dt)));
311    }
312    lines.push("/// ```".to_string());
313    lines.join("\n")
314}
315
316#[cfg(test)]
317mod tests {
318    use super::*;
319
320    // =========================================================================
321    // RED TESTS - These define the expected behavior (GH-173)
322    // =========================================================================
323
324    #[test]
325    fn test_extract_simple_doctest() {
326        let source = r#"
327def square(x: int) -> int:
328    """Compute square.
329
330    >>> square(4)
331    16
332    """
333    return x * x
334"#;
335
336        let extractor = DoctestExtractor::new();
337        let doctests = extractor.extract(source).unwrap();
338
339        assert_eq!(doctests.len(), 1);
340        assert_eq!(doctests[0].function, "square");
341        assert_eq!(doctests[0].input, "square(4)");
342        assert_eq!(doctests[0].expected, "16");
343    }
344
345    #[test]
346    fn test_extract_multiple_doctests() {
347        let source = r#"
348def square(x: int) -> int:
349    """Compute square.
350
351    >>> square(4)
352    16
353    >>> square(-3)
354    9
355    """
356    return x * x
357"#;
358
359        let extractor = DoctestExtractor::new();
360        let doctests = extractor.extract(source).unwrap();
361
362        assert_eq!(doctests.len(), 2);
363        assert_eq!(doctests[0].input, "square(4)");
364        assert_eq!(doctests[0].expected, "16");
365        assert_eq!(doctests[1].input, "square(-3)");
366        assert_eq!(doctests[1].expected, "9");
367    }
368
369    #[test]
370    fn test_extract_multiline_continuation() {
371        let source = r#"
372def add_all(a, b, c, d):
373    """Add numbers.
374
375    >>> add_all(1,
376    ...         2,
377    ...         3,
378    ...         4)
379    10
380    """
381    return a + b + c + d
382"#;
383
384        let extractor = DoctestExtractor::new();
385        let doctests = extractor.extract(source).unwrap();
386
387        assert_eq!(doctests.len(), 1);
388        assert!(doctests[0].input.contains("add_all(1,"));
389        assert!(doctests[0].input.contains("2,"));
390        assert_eq!(doctests[0].expected, "10");
391    }
392
393    #[test]
394    fn test_extract_string_output() {
395        let source = r#"
396def greet(name: str) -> str:
397    """Greet someone.
398
399    >>> greet("World")
400    'Hello, World!'
401    """
402    return f"Hello, {name}!"
403"#;
404
405        let extractor = DoctestExtractor::new();
406        let doctests = extractor.extract(source).unwrap();
407
408        assert_eq!(doctests.len(), 1);
409        assert_eq!(doctests[0].input, "greet(\"World\")");
410        assert_eq!(doctests[0].expected, "'Hello, World!'");
411    }
412
413    #[test]
414    fn test_extract_multiple_functions() {
415        let source = r#"
416def add(a: int, b: int) -> int:
417    """Add two numbers.
418
419    >>> add(1, 2)
420    3
421    """
422    return a + b
423
424def multiply(a: int, b: int) -> int:
425    """Multiply two numbers.
426
427    >>> multiply(3, 4)
428    12
429    """
430    return a * b
431"#;
432
433        let extractor = DoctestExtractor::new();
434        let doctests = extractor.extract(source).unwrap();
435
436        assert_eq!(doctests.len(), 2);
437        assert_eq!(doctests[0].function, "add");
438        assert_eq!(doctests[0].input, "add(1, 2)");
439        assert_eq!(doctests[1].function, "multiply");
440        assert_eq!(doctests[1].input, "multiply(3, 4)");
441    }
442
443    #[test]
444    fn test_extract_list_output() {
445        let source = r#"
446def range_list(n: int) -> list:
447    """Create range list.
448
449    >>> range_list(3)
450    [0, 1, 2]
451    """
452    return list(range(n))
453"#;
454
455        let extractor = DoctestExtractor::new();
456        let doctests = extractor.extract(source).unwrap();
457
458        assert_eq!(doctests.len(), 1);
459        assert_eq!(doctests[0].expected, "[0, 1, 2]");
460    }
461
462    #[test]
463    fn test_extract_dict_output() {
464        let source = r#"
465def make_dict(key, value):
466    """Create dict.
467
468    >>> make_dict('a', 1)
469    {'a': 1}
470    """
471    return {key: value}
472"#;
473
474        let extractor = DoctestExtractor::new();
475        let doctests = extractor.extract(source).unwrap();
476
477        assert_eq!(doctests.len(), 1);
478        assert_eq!(doctests[0].expected, "{'a': 1}");
479    }
480
481    #[test]
482    fn test_extract_multiline_output() {
483        let source = r#"
484def describe(x):
485    """Describe value.
486
487    >>> describe(42)
488    Value: 42
489    Type: int
490    """
491    print(f"Value: {x}")
492    print(f"Type: {type(x).__name__}")
493"#;
494
495        let extractor = DoctestExtractor::new();
496        let doctests = extractor.extract(source).unwrap();
497
498        assert_eq!(doctests.len(), 1);
499        assert!(doctests[0].expected.contains("Value: 42"));
500        assert!(doctests[0].expected.contains("Type: int"));
501    }
502
503    #[test]
504    fn test_skip_doctests_without_output() {
505        let source = r#"
506def side_effect():
507    """Do something.
508
509    >>> x = side_effect()
510    >>> print(x)
511    42
512    """
513    return 42
514"#;
515
516        let extractor = DoctestExtractor::new();
517        let doctests = extractor.extract(source).unwrap();
518
519        // Should only capture the print(x) -> 42 doctest
520        assert_eq!(doctests.len(), 1);
521        assert_eq!(doctests[0].input, "print(x)");
522        assert_eq!(doctests[0].expected, "42");
523    }
524
525    #[test]
526    fn test_single_quote_docstring() {
527        let source = r#"
528def foo():
529    '''Single quote docstring.
530
531    >>> foo()
532    'bar'
533    '''
534    return 'bar'
535"#;
536
537        let extractor = DoctestExtractor::new();
538        let doctests = extractor.extract(source).unwrap();
539
540        assert_eq!(doctests.len(), 1);
541        assert_eq!(doctests[0].expected, "'bar'");
542    }
543
544    #[test]
545    fn test_extract_to_result_json_format() {
546        let source = r#"
547def square(x: int) -> int:
548    """Compute square.
549
550    >>> square(4)
551    16
552    """
553    return x * x
554"#;
555
556        let extractor = DoctestExtractor::new();
557        let result = extractor.extract_to_result(source, "math_utils").unwrap();
558
559        assert_eq!(result.module, "math_utils");
560        assert_eq!(result.doctests.len(), 1);
561        assert_eq!(result.doctests[0].function, "square");
562        assert_eq!(result.doctests[0].examples.len(), 1);
563    }
564
565    #[test]
566    fn test_doctest_to_rust_assertion() {
567        let dt = Doctest {
568            function: "square".to_string(),
569            input: "square(4)".to_string(),
570            expected: "16".to_string(),
571            line: 5,
572        };
573
574        let rust = doctest_to_rust_assertion(&dt);
575        assert_eq!(rust, "assert_eq!(square(4), 16);");
576    }
577
578    #[test]
579    fn test_generate_rust_doc_tests() {
580        let doctests = vec![
581            Doctest {
582                function: "square".to_string(),
583                input: "square(4)".to_string(),
584                expected: "16".to_string(),
585                line: 5,
586            },
587            Doctest {
588                function: "square".to_string(),
589                input: "square(-3)".to_string(),
590                expected: "9".to_string(),
591                line: 7,
592            },
593        ];
594
595        let rust_doc = generate_rust_doc_tests(&doctests);
596        assert!(rust_doc.contains("/// ```"));
597        assert!(rust_doc.contains("assert_eq!(square(4), 16);"));
598        assert!(rust_doc.contains("assert_eq!(square(-3), 9);"));
599    }
600
601    #[test]
602    fn test_line_numbers_are_correct() {
603        let source = r#"
604def foo():
605    """Test.
606
607    >>> foo()
608    42
609    """
610    return 42
611"#;
612
613        let extractor = DoctestExtractor::new();
614        let doctests = extractor.extract(source).unwrap();
615
616        // >>> foo() is on line 5 (1-indexed)
617        assert_eq!(doctests[0].line, 5);
618    }
619
620    #[test]
621    fn test_empty_source() {
622        let source = "";
623        let extractor = DoctestExtractor::new();
624        let doctests = extractor.extract(source).unwrap();
625        assert!(doctests.is_empty());
626    }
627
628    #[test]
629    fn test_no_doctests() {
630        let source = r#"
631def foo():
632    """No doctests here."""
633    return 42
634"#;
635
636        let extractor = DoctestExtractor::new();
637        let doctests = extractor.extract(source).unwrap();
638        assert!(doctests.is_empty());
639    }
640
641    #[test]
642    fn test_real_stdlib_example_len() {
643        // Simulated stdlib-style doctest from str.len
644        let source = r#"
645def length(s: str) -> int:
646    """Return the length of s.
647
648    >>> length("hello")
649    5
650    >>> length("")
651    0
652    >>> length("日本語")
653    3
654    """
655    return len(s)
656"#;
657
658        let extractor = DoctestExtractor::new();
659        let doctests = extractor.extract(source).unwrap();
660
661        assert_eq!(doctests.len(), 3);
662        assert_eq!(doctests[0].expected, "5");
663        assert_eq!(doctests[1].expected, "0");
664        assert_eq!(doctests[2].expected, "3");
665    }
666
667    #[test]
668    fn test_boolean_output() {
669        let source = r#"
670def is_even(n: int) -> bool:
671    """Check if n is even.
672
673    >>> is_even(4)
674    True
675    >>> is_even(3)
676    False
677    """
678    return n % 2 == 0
679"#;
680
681        let extractor = DoctestExtractor::new();
682        let doctests = extractor.extract(source).unwrap();
683
684        assert_eq!(doctests.len(), 2);
685        assert_eq!(doctests[0].expected, "True");
686        assert_eq!(doctests[1].expected, "False");
687    }
688
689    #[test]
690    fn test_none_output() {
691        let source = r#"
692def returns_none():
693    """Return None.
694
695    >>> returns_none()
696
697    >>> returns_none() is None
698    True
699    """
700    return None
701"#;
702
703        let extractor = DoctestExtractor::new();
704        let doctests = extractor.extract(source).unwrap();
705
706        // First doctest has no output (None doesn't print), second has True
707        assert!(!doctests.is_empty());
708        assert!(doctests.iter().any(|dt| dt.expected == "True"));
709    }
710
711    #[test]
712    fn test_float_output() {
713        let source = r#"
714def divide(a: float, b: float) -> float:
715    """Divide a by b.
716
717    >>> divide(10.0, 4.0)
718    2.5
719    """
720    return a / b
721"#;
722
723        let extractor = DoctestExtractor::new();
724        let doctests = extractor.extract(source).unwrap();
725
726        assert_eq!(doctests.len(), 1);
727        assert_eq!(doctests[0].expected, "2.5");
728    }
729
730    // DEPYLER-COVERAGE-95: Additional tests for untested components
731
732    #[test]
733    fn test_doctest_struct_debug() {
734        let dt = Doctest {
735            function: "test_func".to_string(),
736            input: "test_func(1)".to_string(),
737            expected: "42".to_string(),
738            line: 10,
739        };
740
741        let debug = format!("{:?}", dt);
742        assert!(debug.contains("Doctest"));
743        assert!(debug.contains("test_func"));
744        assert!(debug.contains("42"));
745        assert!(debug.contains("10"));
746    }
747
748    #[test]
749    fn test_doctest_struct_clone() {
750        let dt = Doctest {
751            function: "original".to_string(),
752            input: "original()".to_string(),
753            expected: "1".to_string(),
754            line: 5,
755        };
756
757        let cloned = dt.clone();
758        assert_eq!(cloned.function, "original");
759        assert_eq!(cloned.input, "original()");
760        assert_eq!(cloned.expected, "1");
761        assert_eq!(cloned.line, 5);
762    }
763
764    #[test]
765    fn test_doctest_struct_partial_eq() {
766        let dt1 = Doctest {
767            function: "f".to_string(),
768            input: "f()".to_string(),
769            expected: "1".to_string(),
770            line: 1,
771        };
772
773        let dt2 = Doctest {
774            function: "f".to_string(),
775            input: "f()".to_string(),
776            expected: "1".to_string(),
777            line: 1,
778        };
779
780        let dt3 = Doctest {
781            function: "g".to_string(),
782            input: "g()".to_string(),
783            expected: "2".to_string(),
784            line: 2,
785        };
786
787        assert_eq!(dt1, dt2);
788        assert_ne!(dt1, dt3);
789    }
790
791    #[test]
792    fn test_doctest_result_default() {
793        let result = DoctestResult::default();
794        assert!(result.source.is_empty());
795        assert!(result.module.is_empty());
796        assert!(result.doctests.is_empty());
797    }
798
799    #[test]
800    fn test_doctest_result_debug() {
801        let result = DoctestResult {
802            source: "test.py".to_string(),
803            module: "test_module".to_string(),
804            doctests: vec![],
805        };
806
807        let debug = format!("{:?}", result);
808        assert!(debug.contains("DoctestResult"));
809        assert!(debug.contains("test.py"));
810        assert!(debug.contains("test_module"));
811    }
812
813    #[test]
814    fn test_doctest_result_clone() {
815        let result = DoctestResult {
816            source: "source.py".to_string(),
817            module: "module".to_string(),
818            doctests: vec![FunctionDoctests {
819                function: "func".to_string(),
820                signature: Some("func(x: int) -> int".to_string()),
821                docstring: Some("Doc".to_string()),
822                examples: vec![],
823            }],
824        };
825
826        let cloned = result.clone();
827        assert_eq!(cloned.source, "source.py");
828        assert_eq!(cloned.module, "module");
829        assert_eq!(cloned.doctests.len(), 1);
830    }
831
832    #[test]
833    fn test_function_doctests_debug() {
834        let fd = FunctionDoctests {
835            function: "my_func".to_string(),
836            signature: Some("my_func() -> None".to_string()),
837            docstring: Some("Docstring text".to_string()),
838            examples: vec![],
839        };
840
841        let debug = format!("{:?}", fd);
842        assert!(debug.contains("FunctionDoctests"));
843        assert!(debug.contains("my_func"));
844    }
845
846    #[test]
847    fn test_function_doctests_clone() {
848        let fd = FunctionDoctests {
849            function: "func".to_string(),
850            signature: None,
851            docstring: None,
852            examples: vec![Doctest {
853                function: "func".to_string(),
854                input: "func()".to_string(),
855                expected: "42".to_string(),
856                line: 1,
857            }],
858        };
859
860        let cloned = fd.clone();
861        assert_eq!(cloned.function, "func");
862        assert!(cloned.signature.is_none());
863        assert!(cloned.docstring.is_none());
864        assert_eq!(cloned.examples.len(), 1);
865    }
866
867    #[test]
868    fn test_doctest_extractor_default() {
869        // Default derive uses false for bools, new() uses true
870        let extractor: DoctestExtractor = Default::default();
871        assert!(!extractor.include_module_doctests); // Default is false
872        assert!(!extractor.include_class_methods); // Default is false
873
874        // new() sets them to true
875        let extractor_new = DoctestExtractor::new();
876        assert!(extractor_new.include_module_doctests);
877        assert!(extractor_new.include_class_methods);
878    }
879
880    #[test]
881    fn test_doctest_extractor_debug() {
882        let extractor = DoctestExtractor::new();
883        let debug = format!("{:?}", extractor);
884        assert!(debug.contains("DoctestExtractor"));
885        assert!(debug.contains("include_module_doctests"));
886        assert!(debug.contains("include_class_methods"));
887    }
888
889    #[test]
890    fn test_doctest_extractor_clone() {
891        let extractor = DoctestExtractor::new()
892            .with_module_doctests(false)
893            .with_class_methods(false);
894
895        let cloned = extractor.clone();
896        assert!(!cloned.include_module_doctests);
897        assert!(!cloned.include_class_methods);
898    }
899
900    #[test]
901    fn test_with_module_doctests_builder() {
902        let extractor = DoctestExtractor::new().with_module_doctests(false);
903        assert!(!extractor.include_module_doctests);
904        assert!(extractor.include_class_methods); // Unchanged
905
906        let extractor2 = DoctestExtractor::new().with_module_doctests(true);
907        assert!(extractor2.include_module_doctests);
908    }
909
910    #[test]
911    fn test_with_class_methods_builder() {
912        let extractor = DoctestExtractor::new().with_class_methods(false);
913        assert!(extractor.include_module_doctests); // Unchanged
914        assert!(!extractor.include_class_methods);
915
916        let extractor2 = DoctestExtractor::new().with_class_methods(true);
917        assert!(extractor2.include_class_methods);
918    }
919
920    #[test]
921    fn test_builder_chaining() {
922        let extractor = DoctestExtractor::new()
923            .with_module_doctests(false)
924            .with_class_methods(false);
925
926        assert!(!extractor.include_module_doctests);
927        assert!(!extractor.include_class_methods);
928    }
929
930    #[test]
931    fn test_generate_rust_doc_tests_empty() {
932        let doctests: Vec<Doctest> = vec![];
933        let result = generate_rust_doc_tests(&doctests);
934        assert!(result.is_empty());
935    }
936
937    #[test]
938    fn test_generate_rust_doc_tests_single() {
939        let doctests = vec![Doctest {
940            function: "f".to_string(),
941            input: "f(1)".to_string(),
942            expected: "2".to_string(),
943            line: 1,
944        }];
945
946        let result = generate_rust_doc_tests(&doctests);
947        assert!(result.contains("/// ```"));
948        assert!(result.contains("assert_eq!(f(1), 2);"));
949    }
950
951    #[test]
952    fn test_extract_function_name_simple() {
953        let result = DoctestExtractor::extract_function_name("def foo():");
954        assert_eq!(result, Some("foo".to_string()));
955    }
956
957    #[test]
958    fn test_extract_function_name_with_args() {
959        let result = DoctestExtractor::extract_function_name("def bar(x: int, y: str) -> bool:");
960        assert_eq!(result, Some("bar".to_string()));
961    }
962
963    #[test]
964    fn test_extract_function_name_underscore() {
965        let result = DoctestExtractor::extract_function_name("def _private_func(arg):");
966        assert_eq!(result, Some("_private_func".to_string()));
967    }
968
969    #[test]
970    fn test_extract_function_name_invalid() {
971        let result = DoctestExtractor::extract_function_name("class Foo:");
972        assert!(result.is_none());
973
974        let result2 = DoctestExtractor::extract_function_name("x = 1");
975        assert!(result2.is_none());
976    }
977
978    #[test]
979    fn test_doctest_serialization() {
980        let dt = Doctest {
981            function: "test".to_string(),
982            input: "test()".to_string(),
983            expected: "42".to_string(),
984            line: 5,
985        };
986
987        let json = serde_json::to_string(&dt).unwrap();
988        assert!(json.contains("\"function\":\"test\""));
989        assert!(json.contains("\"input\":\"test()\""));
990        assert!(json.contains("\"expected\":\"42\""));
991        assert!(json.contains("\"line\":5"));
992    }
993
994    #[test]
995    fn test_doctest_deserialization() {
996        let json = r#"{"function":"f","input":"f()","expected":"1","line":10}"#;
997        let dt: Doctest = serde_json::from_str(json).unwrap();
998
999        assert_eq!(dt.function, "f");
1000        assert_eq!(dt.input, "f()");
1001        assert_eq!(dt.expected, "1");
1002        assert_eq!(dt.line, 10);
1003    }
1004
1005    #[test]
1006    fn test_doctest_result_serialization() {
1007        let result = DoctestResult {
1008            source: "test.py".to_string(),
1009            module: "test".to_string(),
1010            doctests: vec![],
1011        };
1012
1013        let json = serde_json::to_string(&result).unwrap();
1014        assert!(json.contains("\"source\":\"test.py\""));
1015        assert!(json.contains("\"module\":\"test\""));
1016    }
1017
1018    #[test]
1019    fn test_extract_to_result_grouping() {
1020        let source = r#"
1021def foo():
1022    """Foo.
1023
1024    >>> foo()
1025    1
1026    """
1027    return 1
1028
1029def bar():
1030    """Bar.
1031
1032    >>> bar()
1033    2
1034    """
1035    return 2
1036"#;
1037
1038        let extractor = DoctestExtractor::new();
1039        let result = extractor.extract_to_result(source, "test_mod").unwrap();
1040
1041        assert_eq!(result.module, "test_mod");
1042        assert_eq!(result.source, "test_mod");
1043        assert_eq!(result.doctests.len(), 2);
1044    }
1045
1046    #[test]
1047    fn test_module_level_doctest() {
1048        let source = r#"
1049"""Module docstring.
1050
1051>>> 1 + 1
10522
1053"""
1054
1055def foo():
1056    pass
1057"#;
1058
1059        let extractor = DoctestExtractor::new();
1060        let doctests = extractor.extract(source).unwrap();
1061
1062        // Module-level doctest should be captured with <module> function name
1063        assert!(!doctests.is_empty());
1064        assert!(doctests.iter().any(|dt| dt.function == "<module>"));
1065    }
1066
1067    #[test]
1068    fn test_doctest_with_whitespace() {
1069        let source = r#"
1070def foo():
1071    """Test with whitespace.
1072
1073    >>>    foo()
1074    42
1075    """
1076    return 42
1077"#;
1078
1079        let extractor = DoctestExtractor::new();
1080        let doctests = extractor.extract(source).unwrap();
1081
1082        assert_eq!(doctests.len(), 1);
1083        // Input should have leading spaces stripped
1084        assert_eq!(doctests[0].input.trim(), "foo()");
1085    }
1086
1087    #[test]
1088    fn test_inline_docstring() {
1089        let source = r#"
1090def foo():
1091    """Inline docstring. >>> foo() should not be parsed here."""
1092    return 42
1093"#;
1094
1095        let extractor = DoctestExtractor::new();
1096        let doctests = extractor.extract(source).unwrap();
1097
1098        // Inline >>> in docstring text should not be parsed
1099        assert!(doctests.is_empty());
1100    }
1101}