Skip to main content

lean_agent_core/
mine.rs

1//! Mining replayable proof tasks out of a Lean project.
2//!
3//! A task points at one precise span the agent is allowed to rewrite: a `sorry`
4//! or `admit` placeholder, or the declaration around a compiler error. Each
5//! [`MineTask`] carries enough context (imports, enclosing declaration, the goal
6//! state when available) plus an exact `target_span` and `allowed_edit` so a
7//! later step can splice a candidate proof back in safely.
8//!
9//! Placeholder mining (`sorry`, `admit`) is a pure text scan that skips comments
10//! and string literals, so it needs no Lean toolchain. Error mining runs the
11//! file through the tracer and is therefore backed by real diagnostics.
12
13use crate::writer::JsonlWriter;
14use crate::{
15    Declaration, Diagnostic, DiagnosticSeverity, GoalState, LeanFile, Provenance, Result,
16    TraceConfig, capture_provenance, collect_imports, detect_declaration, discover_lean_files,
17    run_lean_file,
18};
19use camino::{Utf8Component, Utf8Path, Utf8PathBuf};
20use serde::{Deserialize, Serialize};
21use std::collections::HashSet;
22use std::time::Duration;
23use tracing::{info, warn};
24
25/// Standing instruction attached to every mined task.
26const INSTRUCTIONS: &str = "Replace only the target span with a Lean proof that compiles.";
27
28/// Default discovery skip pattern so build output is never mined.
29const DEFAULT_EXCLUDE: &str = ".lake/";
30
31/// What a mine run looks for.
32#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize)]
33#[serde(rename_all = "snake_case")]
34pub enum MineKind {
35    /// The `sorry` placeholder term/tactic.
36    Sorry,
37    /// The `admit` placeholder tactic.
38    Admit,
39    /// A compiler error, backed by a tracer diagnostic.
40    Error,
41}
42
43impl MineKind {
44    /// Placeholder keyword for the text-scan kinds, or `None` for [`MineKind::Error`].
45    #[must_use]
46    pub const fn placeholder_keyword(self) -> Option<&'static str> {
47        match self {
48            Self::Sorry => Some("sorry"),
49            Self::Admit => Some("admit"),
50            Self::Error => None,
51        }
52    }
53}
54
55/// The exact span a candidate proof must replace.
56///
57/// Lines are one-based; columns are zero-based codepoint offsets within a line,
58/// with `end_column` exclusive. `text` is the verbatim current content of the
59/// span, so `source_before + text + source_after` reproduces the file byte for
60/// byte.
61#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
62pub struct TargetSpan {
63    /// One-based first line of the span.
64    pub start_line: u32,
65    /// Zero-based first column of the span.
66    pub start_column: u32,
67    /// One-based last line of the span.
68    pub end_line: u32,
69    /// Zero-based exclusive end column on the last line.
70    pub end_column: u32,
71    /// Verbatim current text of the span.
72    pub text: String,
73}
74
75/// The single contiguous line range a replay step is permitted to edit.
76#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
77pub struct AllowedEdit {
78    /// File the edit applies to.
79    pub file: Utf8PathBuf,
80    /// One-based first editable line.
81    pub start_line: u32,
82    /// One-based last editable line.
83    pub end_line: u32,
84}
85
86/// One mined, replayable proof task.
87#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
88pub struct MineTask {
89    /// Stable identifier, shaped `Module.decl:line` (or `Module:line` when no
90    /// declaration name is detected), with a `:column` suffix on collision.
91    pub task_id: String,
92    /// Project the task was mined from.
93    pub project: String,
94    /// Source file holding the span.
95    pub file: LeanFile,
96    /// Enclosing declaration, when one is detectable.
97    #[serde(skip_serializing_if = "Option::is_none")]
98    pub declaration: Option<Declaration>,
99    /// What this task targets.
100    pub kind: MineKind,
101    /// One-based line of the placeholder token or error site.
102    pub line: u32,
103    /// Zero-based column of the placeholder token or error site.
104    pub column: u32,
105    /// Import lines in scope, in source order.
106    pub imports: Vec<String>,
107    /// File text before the target span.
108    pub source_before: String,
109    /// The exact span to replace.
110    pub target_span: TargetSpan,
111    /// File text after the target span.
112    pub source_after: String,
113    /// Backing diagnostic for error tasks; absent for placeholder tasks.
114    #[serde(skip_serializing_if = "Option::is_none")]
115    pub diagnostic: Option<Diagnostic>,
116    /// Goal state when the backing diagnostic exposed one.
117    #[serde(skip_serializing_if = "Option::is_none")]
118    pub goal_state: Option<GoalState>,
119    /// The single span the replay step may edit.
120    pub allowed_edit: AllowedEdit,
121    /// Standing instruction for the agent.
122    pub instructions: String,
123}
124
125/// Runtime options for a mine run.
126#[derive(Clone, Debug, Eq, PartialEq)]
127pub struct MineOptions {
128    /// What to mine.
129    pub kind: MineKind,
130    /// Project name stamped onto every task.
131    pub project: String,
132    /// Lake workspace root, also used to derive module names.
133    pub lake_root: Utf8PathBuf,
134    /// Search directories recursively.
135    pub recursive: bool,
136    /// Per-file Lean timeout for error mining.
137    pub timeout: Duration,
138    /// Path substrings to skip during discovery.
139    pub exclude: Vec<String>,
140}
141
142impl MineOptions {
143    /// Build options for `kind` rooted at `lake_root`, with the build directory
144    /// excluded and a sixty-second per-file timeout.
145    #[must_use]
146    pub fn new(kind: MineKind, project: String, lake_root: Utf8PathBuf) -> Self {
147        Self {
148            kind,
149            project,
150            lake_root,
151            recursive: false,
152            timeout: Duration::from_secs(60),
153            exclude: vec![DEFAULT_EXCLUDE.to_owned()],
154        }
155    }
156}
157
158/// Outcome counts from a single mine run.
159#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
160pub struct MineSummary {
161    /// Lean files read and scanned.
162    pub files_scanned: usize,
163    /// Task records written.
164    pub tasks_written: usize,
165}
166
167/// Discover files under `roots`, mine tasks of the requested kind, and stream
168/// them to `writer`.
169pub async fn run_mine(
170    options: &MineOptions,
171    roots: &[Utf8PathBuf],
172    writer: &mut JsonlWriter,
173) -> Result<MineSummary> {
174    let files = collect_files(options, roots)?;
175    info!(count = files.len(), kind = ?options.kind, "discovered Lean files");
176
177    let provenance = match options.kind {
178        MineKind::Error => capture_provenance(options.lake_root.as_path()).await,
179        MineKind::Sorry | MineKind::Admit => Provenance::default(),
180    };
181
182    let mut summary = MineSummary::default();
183    for file in files {
184        let source = match std::fs::read_to_string(file.as_path()) {
185            Ok(source) => source,
186            Err(err) => {
187                warn!(%file, error = %err, "skipping unreadable file");
188                continue;
189            }
190        };
191        summary.files_scanned += 1;
192
193        let tasks = match options.kind {
194            MineKind::Sorry | MineKind::Admit => mine_placeholders(
195                &file,
196                &source,
197                options.kind,
198                &options.project,
199                options.lake_root.as_path(),
200            ),
201            MineKind::Error => {
202                let mut config =
203                    TraceConfig::new(options.lake_root.clone()).timeout(options.timeout);
204                config.include_warnings = false;
205                let trace = run_lean_file(&config, &provenance, file.clone()).await;
206                mine_errors(
207                    &file,
208                    &source,
209                    &trace.diagnostics,
210                    &options.project,
211                    options.lake_root.as_path(),
212                )
213            }
214        };
215
216        for task in &tasks {
217            writer.write_record(task)?;
218            summary.tasks_written += 1;
219        }
220    }
221
222    writer.flush()?;
223    Ok(summary)
224}
225
226/// Mine `sorry`/`admit` placeholder tasks from one file's source.
227///
228/// This is a pure text scan: no filesystem access beyond the passed source and
229/// no Lean process. Returns an empty vector for [`MineKind::Error`].
230#[must_use]
231pub fn mine_placeholders(
232    file: &LeanFile,
233    source: &str,
234    kind: MineKind,
235    project: &str,
236    root: &Utf8Path,
237) -> Vec<MineTask> {
238    let Some(keyword) = kind.placeholder_keyword() else {
239        return Vec::new();
240    };
241    let lines: Vec<&str> = source.lines().collect();
242    let imports = collect_imports(&lines);
243    let module = module_name(file.as_path(), root);
244    let mut seen_ids: HashSet<String> = HashSet::new();
245    let mut tasks = Vec::new();
246
247    for hit in scan_placeholders(source, keyword) {
248        let target_idx = (hit.line.saturating_sub(1)) as usize;
249        let declaration = detect_declaration(&lines, target_idx);
250        let span = SpanBytes {
251            start_byte: hit.byte_start,
252            end_byte: hit.byte_end,
253            start_line: hit.line,
254            start_column: hit.column,
255            end_line: hit.line,
256            end_column: hit.end_column,
257        };
258        tasks.push(build_task(
259            file,
260            source,
261            project,
262            &module,
263            &imports,
264            declaration,
265            kind,
266            hit.line,
267            hit.column,
268            &span,
269            None,
270            None,
271            &mut seen_ids,
272        ));
273    }
274    tasks
275}
276
277/// Mine error tasks from one file's source plus its tracer diagnostics.
278///
279/// Each error diagnostic maps to the enclosing declaration (or the error line
280/// when no declaration is found). Diagnostics that share a span are de-duplicated
281/// so one broken declaration yields one task.
282#[must_use]
283pub fn mine_errors(
284    file: &LeanFile,
285    source: &str,
286    diagnostics: &[Diagnostic],
287    project: &str,
288    root: &Utf8Path,
289) -> Vec<MineTask> {
290    let lines: Vec<&str> = source.lines().collect();
291    let spans = line_content_spans(source);
292    let imports = collect_imports(&lines);
293    let module = module_name(file.as_path(), root);
294    let mut seen_ids: HashSet<String> = HashSet::new();
295    let mut seen_spans: HashSet<(usize, usize)> = HashSet::new();
296    let mut tasks = Vec::new();
297
298    for diag in diagnostics
299        .iter()
300        .filter(|d| d.severity == DiagnosticSeverity::Error)
301    {
302        if let Some(diag_file) = &diag.file {
303            if diag_file.file_name() != file.as_path().file_name() {
304                continue;
305            }
306        }
307        let Some(line) = diag.line else { continue };
308        let target_idx = (line.saturating_sub(1)) as usize;
309        if target_idx >= spans.len() {
310            continue;
311        }
312
313        let declaration = detect_declaration(&lines, target_idx);
314        let span = match declaration_span(declaration.as_ref(), line, source, &spans) {
315            Some(span) => span,
316            None => continue,
317        };
318        if !seen_spans.insert((span.start_byte, span.end_byte)) {
319            continue;
320        }
321
322        let column = diag.column.unwrap_or(0);
323        tasks.push(build_task(
324            file,
325            source,
326            project,
327            &module,
328            &imports,
329            declaration,
330            MineKind::Error,
331            line,
332            column,
333            &span,
334            Some(diag.clone()),
335            diag.goal_state.clone(),
336            &mut seen_ids,
337        ));
338    }
339    tasks
340}
341
342/// Byte and line/column coordinates of one span within a source string.
343struct SpanBytes {
344    start_byte: usize,
345    end_byte: usize,
346    start_line: u32,
347    start_column: u32,
348    end_line: u32,
349    end_column: u32,
350}
351
352/// Resolve the span to replace for an error: the enclosing declaration when one
353/// is found, otherwise the single error line.
354fn declaration_span(
355    declaration: Option<&Declaration>,
356    error_line: u32,
357    source: &str,
358    spans: &[(usize, usize)],
359) -> Option<SpanBytes> {
360    let (start_line, end_line) = match declaration {
361        Some(decl) => (decl.start_line, decl.end_line),
362        None => (error_line, error_line),
363    };
364    let start = spans.get((start_line.saturating_sub(1)) as usize)?;
365    let end = spans.get((end_line.saturating_sub(1)) as usize)?;
366    let end_column = codepoint_len(source.get(end.0..end.1).unwrap_or(""));
367    Some(SpanBytes {
368        start_byte: start.0,
369        end_byte: end.1,
370        start_line,
371        start_column: 0,
372        end_line,
373        end_column,
374    })
375}
376
377/// Assemble one task record from a resolved span.
378#[allow(clippy::too_many_arguments)]
379fn build_task(
380    file: &LeanFile,
381    source: &str,
382    project: &str,
383    module: &str,
384    imports: &[String],
385    declaration: Option<Declaration>,
386    kind: MineKind,
387    line: u32,
388    column: u32,
389    span: &SpanBytes,
390    diagnostic: Option<Diagnostic>,
391    goal_state: Option<GoalState>,
392    seen_ids: &mut HashSet<String>,
393) -> MineTask {
394    let text = source
395        .get(span.start_byte..span.end_byte)
396        .unwrap_or("")
397        .to_owned();
398    let source_before = source.get(..span.start_byte).unwrap_or("").to_owned();
399    let source_after = source.get(span.end_byte..).unwrap_or("").to_owned();
400
401    let decl_name = declaration.as_ref().and_then(|decl| decl.name.clone());
402    let base_id = match &decl_name {
403        Some(name) => format!("{module}.{name}:{line}"),
404        None => format!("{module}:{line}"),
405    };
406    let task_id = if seen_ids.contains(&base_id) {
407        format!("{base_id}:{column}")
408    } else {
409        base_id
410    };
411    seen_ids.insert(task_id.clone());
412
413    MineTask {
414        task_id,
415        project: project.to_owned(),
416        file: file.clone(),
417        declaration,
418        kind,
419        line,
420        column,
421        imports: imports.to_vec(),
422        source_before,
423        target_span: TargetSpan {
424            start_line: span.start_line,
425            start_column: span.start_column,
426            end_line: span.end_line,
427            end_column: span.end_column,
428            text,
429        },
430        source_after,
431        diagnostic,
432        goal_state,
433        allowed_edit: AllowedEdit {
434            file: file.as_path().clone(),
435            start_line: span.start_line,
436            end_line: span.end_line,
437        },
438        instructions: INSTRUCTIONS.to_owned(),
439    }
440}
441
442/// Discover, sort, de-duplicate, and exclude-filter files under `roots`.
443fn collect_files(options: &MineOptions, roots: &[Utf8PathBuf]) -> Result<Vec<LeanFile>> {
444    let mut files = Vec::new();
445    for root in roots {
446        files.extend(discover_lean_files(root, options.recursive)?);
447    }
448    files.sort();
449    files.dedup();
450    files.retain(|file| {
451        let path = file.as_path().as_str();
452        !options
453            .exclude
454            .iter()
455            .any(|pattern| path.contains(pattern.as_str()))
456    });
457    Ok(files)
458}
459
460/// Derive a dotted module name from a file path relative to the project root.
461pub(crate) fn module_name(file: &Utf8Path, root: &Utf8Path) -> String {
462    let relative = file.strip_prefix(root).unwrap_or(file);
463    let no_extension = relative.with_extension("");
464    let parts: Vec<&str> = no_extension
465        .components()
466        .filter_map(|component| match component {
467            Utf8Component::Normal(part) => Some(part),
468            _ => None,
469        })
470        .collect();
471    if parts.is_empty() {
472        no_extension.as_str().to_owned()
473    } else {
474        parts.join(".")
475    }
476}
477
478/// Codepoint length of a string slice.
479fn codepoint_len(text: &str) -> u32 {
480    text.chars().count() as u32
481}
482
483/// Per-line `(content_start_byte, content_end_byte)` pairs, newline excluded.
484///
485/// Line indexing matches `str::lines`, so index `k` is one-based line `k + 1`.
486fn line_content_spans(source: &str) -> Vec<(usize, usize)> {
487    let bytes = source.as_bytes();
488    let mut spans = Vec::new();
489    let mut start = 0usize;
490    let mut i = 0usize;
491    while i < bytes.len() {
492        if bytes[i] == b'\n' {
493            let mut end = i;
494            if end > start && bytes[end - 1] == b'\r' {
495                end -= 1;
496            }
497            spans.push((start, end));
498            start = i + 1;
499        }
500        i += 1;
501    }
502    if start < bytes.len() {
503        let mut end = bytes.len();
504        if end > start && bytes[end - 1] == b'\r' {
505            end -= 1;
506        }
507        spans.push((start, end));
508    }
509    spans
510}
511
512/// A placeholder occurrence found by the scanner.
513struct Hit {
514    byte_start: usize,
515    byte_end: usize,
516    line: u32,
517    column: u32,
518    end_column: u32,
519}
520
521/// Lexer state for the placeholder scan.
522enum ScanState {
523    Normal,
524    LineComment,
525    BlockComment(u32),
526    Str,
527}
528
529/// True for characters that continue a Lean identifier-like word.
530fn is_word_char(c: char) -> bool {
531    c.is_alphanumeric() || c == '_' || c == '\''
532}
533
534/// Character at `idx`, if any.
535fn peek(chars: &[(usize, char)], idx: usize) -> Option<char> {
536    chars.get(idx).map(|&(_, c)| c)
537}
538
539/// True when `chars[i..j]` equals `keyword` codepoint for codepoint.
540fn word_matches(chars: &[(usize, char)], i: usize, j: usize, keyword: &str) -> bool {
541    let expected: Vec<char> = keyword.chars().collect();
542    if j - i != expected.len() {
543        return false;
544    }
545    chars[i..j]
546        .iter()
547        .map(|&(_, c)| c)
548        .eq(expected.iter().copied())
549}
550
551/// Scan `source` for whole-word `keyword` occurrences outside comments and
552/// string literals, skipping qualified uses preceded by `.`.
553fn scan_placeholders(source: &str, keyword: &str) -> Vec<Hit> {
554    let chars: Vec<(usize, char)> = source.char_indices().collect();
555    let n = chars.len();
556    let mut hits = Vec::new();
557    let mut i = 0usize;
558    let mut line = 1u32;
559    let mut col = 0u32;
560    let mut state = ScanState::Normal;
561
562    while i < n {
563        let (byte, c) = chars[i];
564        match state {
565            ScanState::LineComment => {
566                if c == '\n' {
567                    state = ScanState::Normal;
568                    i += 1;
569                    line += 1;
570                    col = 0;
571                } else {
572                    i += 1;
573                    col += 1;
574                }
575            }
576            ScanState::BlockComment(depth) => {
577                if c == '/' && peek(&chars, i + 1) == Some('-') {
578                    state = ScanState::BlockComment(depth + 1);
579                    i += 2;
580                    col += 2;
581                } else if c == '-' && peek(&chars, i + 1) == Some('/') {
582                    let next = depth - 1;
583                    state = if next == 0 {
584                        ScanState::Normal
585                    } else {
586                        ScanState::BlockComment(next)
587                    };
588                    i += 2;
589                    col += 2;
590                } else if c == '\n' {
591                    i += 1;
592                    line += 1;
593                    col = 0;
594                } else {
595                    i += 1;
596                    col += 1;
597                }
598            }
599            ScanState::Str => {
600                if c == '\\' {
601                    i += 1;
602                    col += 1;
603                    if let Some(escaped) = peek(&chars, i) {
604                        if escaped == '\n' {
605                            line += 1;
606                            col = 0;
607                        } else {
608                            col += 1;
609                        }
610                        i += 1;
611                    }
612                } else if c == '"' {
613                    state = ScanState::Normal;
614                    i += 1;
615                    col += 1;
616                } else if c == '\n' {
617                    i += 1;
618                    line += 1;
619                    col = 0;
620                } else {
621                    i += 1;
622                    col += 1;
623                }
624            }
625            ScanState::Normal => {
626                if c == '-' && peek(&chars, i + 1) == Some('-') {
627                    state = ScanState::LineComment;
628                    i += 2;
629                    col += 2;
630                } else if c == '/' && peek(&chars, i + 1) == Some('-') {
631                    state = ScanState::BlockComment(1);
632                    i += 2;
633                    col += 2;
634                } else if c == '"' {
635                    state = ScanState::Str;
636                    i += 1;
637                    col += 1;
638                } else if is_word_char(c) {
639                    let start_line = line;
640                    let start_col = col;
641                    let mut j = i;
642                    while j < n && is_word_char(chars[j].1) {
643                        j += 1;
644                    }
645                    let word_len = (j - i) as u32;
646                    let end_byte = if j < n { chars[j].0 } else { source.len() };
647                    let preceded_by_dot = i > 0 && chars[i - 1].1 == '.';
648                    if !preceded_by_dot && word_matches(&chars, i, j, keyword) {
649                        hits.push(Hit {
650                            byte_start: byte,
651                            byte_end: end_byte,
652                            line: start_line,
653                            column: start_col,
654                            end_column: start_col + word_len,
655                        });
656                    }
657                    i = j;
658                    col += word_len;
659                } else if c == '\n' {
660                    i += 1;
661                    line += 1;
662                    col = 0;
663                } else {
664                    i += 1;
665                    col += 1;
666                }
667            }
668        }
669    }
670    hits
671}
672
673#[cfg(test)]
674mod tests {
675    use super::*;
676
677    const SAMPLE: &str = "import Init\n\nnamespace Demo\n\ntheorem foo (n : Nat) : n = n := by\n  sorry\n\ndef bar : Nat := by admit\n\nend Demo\n";
678
679    fn lean(path: &str) -> LeanFile {
680        LeanFile(Utf8PathBuf::from(path))
681    }
682
683    fn root() -> Utf8PathBuf {
684        Utf8PathBuf::from(".")
685    }
686
687    #[test]
688    fn reconstructs_file_from_before_span_after() {
689        let file = lean("Demo.lean");
690        let tasks = mine_placeholders(&file, SAMPLE, MineKind::Sorry, "demo", root().as_path());
691        assert_eq!(tasks.len(), 1);
692        let task = &tasks[0];
693        let rebuilt = format!(
694            "{}{}{}",
695            task.source_before, task.target_span.text, task.source_after
696        );
697        assert_eq!(rebuilt, SAMPLE);
698    }
699
700    #[test]
701    fn sorry_task_has_precise_single_span() {
702        let file = lean("Demo.lean");
703        let tasks = mine_placeholders(&file, SAMPLE, MineKind::Sorry, "demo", root().as_path());
704        let task = &tasks[0];
705        assert_eq!(task.kind, MineKind::Sorry);
706        assert_eq!(task.target_span.text, "sorry");
707        assert_eq!(task.line, 6);
708        assert_eq!(task.column, 2);
709        assert_eq!(task.target_span.start_line, task.target_span.end_line);
710        assert_eq!(task.allowed_edit.start_line, 6);
711        assert_eq!(task.allowed_edit.end_line, 6);
712        assert_eq!(task.target_span.end_column, 7);
713        assert_eq!(task.instructions, INSTRUCTIONS);
714    }
715
716    #[test]
717    fn task_id_uses_module_and_declaration() {
718        let file = lean("Demo/Basic.lean");
719        let tasks = mine_placeholders(&file, SAMPLE, MineKind::Sorry, "demo", Utf8Path::new("."));
720        assert_eq!(tasks[0].task_id, "Demo.Basic.foo:6");
721        assert_eq!(tasks[0].imports, vec!["import Init"]);
722        let decl = tasks[0].declaration.as_ref().and_then(|d| d.name.clone());
723        assert_eq!(decl.as_deref(), Some("foo"));
724    }
725
726    #[test]
727    fn admit_is_mined_only_for_admit_kind() {
728        let file = lean("Demo.lean");
729        let sorry_tasks =
730            mine_placeholders(&file, SAMPLE, MineKind::Sorry, "demo", root().as_path());
731        assert!(sorry_tasks.iter().all(|t| t.target_span.text == "sorry"));
732        let admit_tasks =
733            mine_placeholders(&file, SAMPLE, MineKind::Admit, "demo", root().as_path());
734        assert_eq!(admit_tasks.len(), 1);
735        assert_eq!(admit_tasks[0].target_span.text, "admit");
736        assert_eq!(admit_tasks[0].line, 8);
737    }
738
739    #[test]
740    fn skips_placeholders_in_comments_and_strings() {
741        let source = "-- a stray sorry here\n/- block sorry -/\ndef s : String := \"sorry\"\ntheorem real : True := by\n  sorry\n";
742        let file = lean("S.lean");
743        let tasks = mine_placeholders(&file, source, MineKind::Sorry, "demo", root().as_path());
744        assert_eq!(tasks.len(), 1);
745        assert_eq!(tasks[0].line, 5);
746    }
747
748    #[test]
749    fn skips_qualified_and_partial_words() {
750        let source = "def a := Foo.sorry\ndef b := sorryAx\ndef c := mysorry\n";
751        let file = lean("Q.lean");
752        let tasks = mine_placeholders(&file, source, MineKind::Sorry, "demo", root().as_path());
753        assert!(tasks.is_empty());
754    }
755
756    #[test]
757    fn error_task_targets_enclosing_declaration() {
758        let source = "import Init\n\ntheorem broken : 1 = 2 := by\n  rfl\n";
759        let file = lean("Broken.lean");
760        let diag = Diagnostic {
761            file: Some(Utf8PathBuf::from("Broken.lean")),
762            line: Some(4),
763            column: Some(2),
764            severity: DiagnosticSeverity::Error,
765            message: "error: unsolved goals\n⊢ 1 = 2".to_owned(),
766            goal_state: Some(GoalState("⊢ 1 = 2".to_owned())),
767        };
768        let tasks = mine_errors(&file, source, &[diag], "demo", root().as_path());
769        assert_eq!(tasks.len(), 1);
770        let task = &tasks[0];
771        assert_eq!(task.kind, MineKind::Error);
772        assert_eq!(task.target_span.start_line, 3);
773        assert_eq!(task.target_span.end_line, 4);
774        assert!(task.target_span.text.contains("theorem broken"));
775        assert!(task.target_span.text.contains("rfl"));
776        assert_eq!(task.allowed_edit.start_line, 3);
777        assert_eq!(task.allowed_edit.end_line, 4);
778        assert!(task.goal_state.is_some());
779        assert!(task.diagnostic.is_some());
780        let rebuilt = format!(
781            "{}{}{}",
782            task.source_before, task.target_span.text, task.source_after
783        );
784        assert_eq!(rebuilt, source);
785    }
786
787    #[test]
788    fn error_tasks_dedup_by_span() {
789        let source = "theorem broken : 1 = 2 := by\n  rfl\n";
790        let file = lean("Broken.lean");
791        let make = |line: u32| Diagnostic {
792            file: Some(Utf8PathBuf::from("Broken.lean")),
793            line: Some(line),
794            column: Some(0),
795            severity: DiagnosticSeverity::Error,
796            message: "error: something".to_owned(),
797            goal_state: None,
798        };
799        let tasks = mine_errors(&file, source, &[make(1), make(2)], "demo", root().as_path());
800        assert_eq!(tasks.len(), 1);
801    }
802
803    #[test]
804    fn warnings_are_ignored_for_error_mining() {
805        let source = "theorem t : True := trivial\n";
806        let file = lean("T.lean");
807        let warning = Diagnostic {
808            file: Some(Utf8PathBuf::from("T.lean")),
809            line: Some(1),
810            column: Some(0),
811            severity: DiagnosticSeverity::Warning,
812            message: "warning: declaration uses 'sorry'".to_owned(),
813            goal_state: None,
814        };
815        let tasks = mine_errors(&file, source, &[warning], "demo", root().as_path());
816        assert!(tasks.is_empty());
817    }
818
819    #[test]
820    fn module_name_drops_root_and_extension() {
821        assert_eq!(
822            module_name(Utf8Path::new("./Demo/Basic.lean"), Utf8Path::new(".")),
823            "Demo.Basic"
824        );
825        assert_eq!(
826            module_name(
827                Utf8Path::new("/tmp/proj/Demo.lean"),
828                Utf8Path::new("/tmp/proj")
829            ),
830            "Demo"
831        );
832    }
833
834    #[test]
835    fn line_content_spans_match_str_lines() {
836        let source = "abc\n\ndef\n";
837        let spans = line_content_spans(source);
838        let rebuilt: Vec<&str> = spans.iter().map(|&(a, b)| &source[a..b]).collect();
839        let expected: Vec<&str> = source.lines().collect();
840        assert_eq!(rebuilt, expected);
841    }
842}