seqc/
lint.rs

1//! Lint Engine for Seq
2//!
3//! A clippy-inspired lint tool that detects common patterns and suggests improvements.
4//! Phase 1: Syntactic pattern matching on word sequences.
5//!
6//! # Architecture
7//!
8//! - `LintConfig` - Parsed lint rules from TOML
9//! - `Pattern` - Compiled pattern for matching
10//! - `Linter` - Walks AST and finds matches
11//! - `LintDiagnostic` - Output format compatible with LSP
12//!
13//! # Known Limitations (Phase 1)
14//!
15//! - **No quotation boundary awareness**: Patterns match across statement boundaries
16//!   within a word body. Patterns like `[ drop` would incorrectly match `[` followed
17//!   by `drop` anywhere, not just at quotation start. Such patterns should be avoided
18//!   until Phase 2 adds quotation-aware matching.
19
20use crate::ast::{Program, Span, Statement, WordDef};
21use serde::Deserialize;
22use std::path::{Path, PathBuf};
23
24/// Embedded default lint rules
25pub static DEFAULT_LINTS: &str = include_str!("lints.toml");
26
27/// Severity level for lint diagnostics
28#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)]
29#[serde(rename_all = "lowercase")]
30pub enum Severity {
31    Error,
32    Warning,
33    Hint,
34}
35
36impl Severity {
37    /// Convert to LSP DiagnosticSeverity number
38    pub fn to_lsp_severity(&self) -> u32 {
39        match self {
40            Severity::Error => 1,
41            Severity::Warning => 2,
42            Severity::Hint => 4,
43        }
44    }
45}
46
47/// A single lint rule from configuration
48#[derive(Debug, Clone, Deserialize)]
49pub struct LintRule {
50    /// Unique identifier for the lint
51    pub id: String,
52    /// Pattern to match (space-separated words, $X for wildcards)
53    pub pattern: String,
54    /// Suggested replacement (empty string means "remove")
55    #[serde(default)]
56    pub replacement: String,
57    /// Human-readable message
58    pub message: String,
59    /// Severity level
60    #[serde(default = "default_severity")]
61    pub severity: Severity,
62}
63
64fn default_severity() -> Severity {
65    Severity::Warning
66}
67
68/// Lint configuration containing all rules
69#[derive(Debug, Clone, Deserialize)]
70pub struct LintConfig {
71    #[serde(rename = "lint")]
72    pub rules: Vec<LintRule>,
73}
74
75impl LintConfig {
76    /// Parse lint configuration from TOML string
77    pub fn from_toml(toml_str: &str) -> Result<Self, String> {
78        toml::from_str(toml_str).map_err(|e| format!("Failed to parse lint config: {}", e))
79    }
80
81    /// Load default embedded lint configuration
82    pub fn default_config() -> Result<Self, String> {
83        Self::from_toml(DEFAULT_LINTS)
84    }
85
86    /// Merge another config into this one (user overrides)
87    pub fn merge(&mut self, other: LintConfig) {
88        // User rules override defaults with same id
89        for rule in other.rules {
90            if let Some(existing) = self.rules.iter_mut().find(|r| r.id == rule.id) {
91                *existing = rule;
92            } else {
93                self.rules.push(rule);
94            }
95        }
96    }
97}
98
99/// A compiled pattern for efficient matching
100#[derive(Debug, Clone)]
101pub struct CompiledPattern {
102    /// The original rule
103    pub rule: LintRule,
104    /// Pattern elements (words or wildcards)
105    pub elements: Vec<PatternElement>,
106}
107
108/// Element in a compiled pattern
109#[derive(Debug, Clone, PartialEq)]
110pub enum PatternElement {
111    /// Exact word match
112    Word(String),
113    /// Single-word wildcard ($X, $Y, etc.)
114    SingleWildcard(String),
115    /// Multi-word wildcard ($...)
116    MultiWildcard,
117}
118
119impl CompiledPattern {
120    /// Compile a pattern string into elements
121    pub fn compile(rule: LintRule) -> Result<Self, String> {
122        let mut elements = Vec::new();
123        let mut multi_wildcard_count = 0;
124
125        for token in rule.pattern.split_whitespace() {
126            if token == "$..." {
127                multi_wildcard_count += 1;
128                elements.push(PatternElement::MultiWildcard);
129            } else if token.starts_with('$') {
130                elements.push(PatternElement::SingleWildcard(token.to_string()));
131            } else {
132                elements.push(PatternElement::Word(token.to_string()));
133            }
134        }
135
136        if elements.is_empty() {
137            return Err(format!("Empty pattern in lint rule '{}'", rule.id));
138        }
139
140        // Validate: at most one multi-wildcard per pattern to avoid
141        // exponential backtracking complexity
142        if multi_wildcard_count > 1 {
143            return Err(format!(
144                "Pattern in lint rule '{}' has {} multi-wildcards ($...), but at most 1 is allowed",
145                rule.id, multi_wildcard_count
146            ));
147        }
148
149        Ok(CompiledPattern { rule, elements })
150    }
151}
152
153/// A lint diagnostic (match found)
154#[derive(Debug, Clone)]
155pub struct LintDiagnostic {
156    /// Lint rule ID
157    pub id: String,
158    /// Human-readable message
159    pub message: String,
160    /// Severity level
161    pub severity: Severity,
162    /// Suggested replacement
163    pub replacement: String,
164    /// File where the match was found
165    pub file: PathBuf,
166    /// Start line number (0-indexed)
167    pub line: usize,
168    /// End line number (0-indexed), for multi-line matches
169    pub end_line: Option<usize>,
170    /// Start column (0-indexed), if available from source spans
171    pub start_column: Option<usize>,
172    /// End column (0-indexed, exclusive), if available from source spans
173    pub end_column: Option<usize>,
174    /// Word name where the match was found
175    pub word_name: String,
176    /// Start index in the word body
177    pub start_index: usize,
178    /// End index in the word body (exclusive)
179    pub end_index: usize,
180}
181
182/// Word call info extracted from a statement, including optional span
183#[derive(Debug, Clone)]
184struct WordInfo<'a> {
185    name: &'a str,
186    span: Option<&'a Span>,
187}
188
189/// The linter engine
190pub struct Linter {
191    patterns: Vec<CompiledPattern>,
192}
193
194impl Linter {
195    /// Create a new linter with the given configuration
196    pub fn new(config: &LintConfig) -> Result<Self, String> {
197        let mut patterns = Vec::new();
198        for rule in &config.rules {
199            patterns.push(CompiledPattern::compile(rule.clone())?);
200        }
201        Ok(Linter { patterns })
202    }
203
204    /// Create a linter with default configuration
205    pub fn with_defaults() -> Result<Self, String> {
206        let config = LintConfig::default_config()?;
207        Self::new(&config)
208    }
209
210    /// Lint a program and return all diagnostics
211    pub fn lint_program(&self, program: &Program, file: &Path) -> Vec<LintDiagnostic> {
212        let mut diagnostics = Vec::new();
213
214        for word in &program.words {
215            self.lint_word(word, file, &mut diagnostics);
216        }
217
218        diagnostics
219    }
220
221    /// Lint a single word definition
222    fn lint_word(&self, word: &WordDef, file: &Path, diagnostics: &mut Vec<LintDiagnostic>) {
223        let fallback_line = word.source.as_ref().map(|s| s.start_line).unwrap_or(0);
224
225        // Extract word sequence from the body (with span info)
226        let word_infos = self.extract_word_sequence(&word.body);
227
228        // Try each pattern
229        for pattern in &self.patterns {
230            self.find_matches(&word_infos, pattern, word, file, fallback_line, diagnostics);
231        }
232
233        // Recursively lint nested structures (quotations, if branches)
234        self.lint_nested(&word.body, word, file, diagnostics);
235    }
236
237    /// Extract a flat sequence of word names with spans from statements.
238    /// Non-WordCall statements (literals, quotations, etc.) are represented as
239    /// a special marker `<non-word>` to prevent false pattern matches across
240    /// non-consecutive word calls.
241    fn extract_word_sequence<'a>(&self, statements: &'a [Statement]) -> Vec<WordInfo<'a>> {
242        let mut words = Vec::new();
243        for stmt in statements {
244            if let Statement::WordCall { name, span } = stmt {
245                words.push(WordInfo {
246                    name: name.as_str(),
247                    span: span.as_ref(),
248                });
249            } else {
250                // Insert a marker for non-word statements to break up patterns.
251                // This prevents false positives like matching "swap swap" when
252                // there's a literal between them: "swap 0 swap"
253                words.push(WordInfo {
254                    name: "<non-word>",
255                    span: None,
256                });
257            }
258        }
259        words
260    }
261
262    /// Find all matches of a pattern in a word sequence
263    fn find_matches(
264        &self,
265        word_infos: &[WordInfo],
266        pattern: &CompiledPattern,
267        word: &WordDef,
268        file: &Path,
269        fallback_line: usize,
270        diagnostics: &mut Vec<LintDiagnostic>,
271    ) {
272        if word_infos.is_empty() || pattern.elements.is_empty() {
273            return;
274        }
275
276        // Sliding window match
277        let mut i = 0;
278        while i < word_infos.len() {
279            if let Some(match_len) = Self::try_match_at(word_infos, i, &pattern.elements) {
280                // Extract position info from spans if available
281                let first_span = word_infos[i].span;
282                let last_span = word_infos[i + match_len - 1].span;
283
284                // Use span line if available, otherwise fall back to word definition line
285                let line = first_span.map(|s| s.line).unwrap_or(fallback_line);
286
287                // Calculate end line and column range
288                let (end_line, start_column, end_column) =
289                    if let (Some(first), Some(last)) = (first_span, last_span) {
290                        if first.line == last.line {
291                            // Same line: column range spans from first word's start to last word's end
292                            (None, Some(first.column), Some(last.column + last.length))
293                        } else {
294                            // Multi-line match: track end line and end column
295                            (
296                                Some(last.line),
297                                Some(first.column),
298                                Some(last.column + last.length),
299                            )
300                        }
301                    } else {
302                        (None, None, None)
303                    };
304
305                diagnostics.push(LintDiagnostic {
306                    id: pattern.rule.id.clone(),
307                    message: pattern.rule.message.clone(),
308                    severity: pattern.rule.severity,
309                    replacement: pattern.rule.replacement.clone(),
310                    file: file.to_path_buf(),
311                    line,
312                    end_line,
313                    start_column,
314                    end_column,
315                    word_name: word.name.clone(),
316                    start_index: i,
317                    end_index: i + match_len,
318                });
319                // Skip past the match to avoid overlapping matches
320                i += match_len;
321            } else {
322                i += 1;
323            }
324        }
325    }
326
327    /// Try to match pattern at position, returning match length if successful
328    fn try_match_at(
329        word_infos: &[WordInfo],
330        start: usize,
331        elements: &[PatternElement],
332    ) -> Option<usize> {
333        let mut word_idx = start;
334        let mut elem_idx = 0;
335
336        while elem_idx < elements.len() {
337            match &elements[elem_idx] {
338                PatternElement::Word(expected) => {
339                    if word_idx >= word_infos.len() || word_infos[word_idx].name != expected {
340                        return None;
341                    }
342                    word_idx += 1;
343                    elem_idx += 1;
344                }
345                PatternElement::SingleWildcard(_) => {
346                    if word_idx >= word_infos.len() {
347                        return None;
348                    }
349                    word_idx += 1;
350                    elem_idx += 1;
351                }
352                PatternElement::MultiWildcard => {
353                    // Multi-wildcard: try all possible lengths
354                    elem_idx += 1;
355                    if elem_idx >= elements.len() {
356                        // Wildcard at end matches rest
357                        return Some(word_infos.len() - start);
358                    }
359                    // Try matching remaining pattern at each position
360                    for try_idx in word_idx..=word_infos.len() {
361                        if let Some(rest_len) =
362                            Self::try_match_at(word_infos, try_idx, &elements[elem_idx..])
363                        {
364                            return Some(try_idx - start + rest_len);
365                        }
366                    }
367                    return None;
368                }
369            }
370        }
371
372        Some(word_idx - start)
373    }
374
375    /// Recursively lint nested structures
376    fn lint_nested(
377        &self,
378        statements: &[Statement],
379        word: &WordDef,
380        file: &Path,
381        diagnostics: &mut Vec<LintDiagnostic>,
382    ) {
383        let fallback_line = word.source.as_ref().map(|s| s.start_line).unwrap_or(0);
384
385        for stmt in statements {
386            match stmt {
387                Statement::Quotation { body, .. } => {
388                    // Lint the quotation body
389                    let word_infos = self.extract_word_sequence(body);
390                    for pattern in &self.patterns {
391                        self.find_matches(
392                            &word_infos,
393                            pattern,
394                            word,
395                            file,
396                            fallback_line,
397                            diagnostics,
398                        );
399                    }
400                    // Recurse into nested quotations
401                    self.lint_nested(body, word, file, diagnostics);
402                }
403                Statement::If {
404                    then_branch,
405                    else_branch,
406                } => {
407                    // Lint both branches
408                    let word_infos = self.extract_word_sequence(then_branch);
409                    for pattern in &self.patterns {
410                        self.find_matches(
411                            &word_infos,
412                            pattern,
413                            word,
414                            file,
415                            fallback_line,
416                            diagnostics,
417                        );
418                    }
419                    self.lint_nested(then_branch, word, file, diagnostics);
420
421                    if let Some(else_stmts) = else_branch {
422                        let word_infos = self.extract_word_sequence(else_stmts);
423                        for pattern in &self.patterns {
424                            self.find_matches(
425                                &word_infos,
426                                pattern,
427                                word,
428                                file,
429                                fallback_line,
430                                diagnostics,
431                            );
432                        }
433                        self.lint_nested(else_stmts, word, file, diagnostics);
434                    }
435                }
436                Statement::Match { arms } => {
437                    for arm in arms {
438                        let word_infos = self.extract_word_sequence(&arm.body);
439                        for pattern in &self.patterns {
440                            self.find_matches(
441                                &word_infos,
442                                pattern,
443                                word,
444                                file,
445                                fallback_line,
446                                diagnostics,
447                            );
448                        }
449                        self.lint_nested(&arm.body, word, file, diagnostics);
450                    }
451                }
452                _ => {}
453            }
454        }
455    }
456}
457
458/// Format diagnostics for CLI output
459pub fn format_diagnostics(diagnostics: &[LintDiagnostic]) -> String {
460    let mut output = String::new();
461    for d in diagnostics {
462        let severity_str = match d.severity {
463            Severity::Error => "error",
464            Severity::Warning => "warning",
465            Severity::Hint => "hint",
466        };
467        // Include column info in output if available
468        let location = match d.start_column {
469            Some(col) => format!("{}:{}:{}", d.file.display(), d.line + 1, col + 1),
470            None => format!("{}:{}", d.file.display(), d.line + 1),
471        };
472        output.push_str(&format!(
473            "{}: {} [{}]: {}\n",
474            location, severity_str, d.id, d.message
475        ));
476        if !d.replacement.is_empty() {
477            output.push_str(&format!("  suggestion: replace with `{}`\n", d.replacement));
478        } else if d.replacement.is_empty() && d.message.contains("no effect") {
479            output.push_str("  suggestion: remove this code\n");
480        }
481    }
482    output
483}
484
485#[cfg(test)]
486mod tests {
487    use super::*;
488
489    fn test_config() -> LintConfig {
490        LintConfig::from_toml(
491            r#"
492[[lint]]
493id = "redundant-dup-drop"
494pattern = "dup drop"
495replacement = ""
496message = "`dup drop` has no effect"
497severity = "warning"
498
499[[lint]]
500id = "prefer-nip"
501pattern = "swap drop"
502replacement = "nip"
503message = "prefer `nip` over `swap drop`"
504severity = "hint"
505
506[[lint]]
507id = "redundant-swap-swap"
508pattern = "swap swap"
509replacement = ""
510message = "consecutive swaps cancel out"
511severity = "warning"
512"#,
513        )
514        .unwrap()
515    }
516
517    #[test]
518    fn test_parse_config() {
519        let config = test_config();
520        assert_eq!(config.rules.len(), 3);
521        assert_eq!(config.rules[0].id, "redundant-dup-drop");
522        assert_eq!(config.rules[1].severity, Severity::Hint);
523    }
524
525    #[test]
526    fn test_compile_pattern() {
527        let rule = LintRule {
528            id: "test".to_string(),
529            pattern: "swap drop".to_string(),
530            replacement: "nip".to_string(),
531            message: "test".to_string(),
532            severity: Severity::Warning,
533        };
534        let compiled = CompiledPattern::compile(rule).unwrap();
535        assert_eq!(compiled.elements.len(), 2);
536        assert_eq!(
537            compiled.elements[0],
538            PatternElement::Word("swap".to_string())
539        );
540        assert_eq!(
541            compiled.elements[1],
542            PatternElement::Word("drop".to_string())
543        );
544    }
545
546    #[test]
547    fn test_compile_pattern_with_wildcards() {
548        let rule = LintRule {
549            id: "test".to_string(),
550            pattern: "dup $X drop".to_string(),
551            replacement: "".to_string(),
552            message: "test".to_string(),
553            severity: Severity::Warning,
554        };
555        let compiled = CompiledPattern::compile(rule).unwrap();
556        assert_eq!(compiled.elements.len(), 3);
557        assert_eq!(
558            compiled.elements[1],
559            PatternElement::SingleWildcard("$X".to_string())
560        );
561    }
562
563    #[test]
564    fn test_simple_match() {
565        let config = test_config();
566        let linter = Linter::new(&config).unwrap();
567
568        // Create a simple program with "swap drop"
569        let program = Program {
570            includes: vec![],
571            unions: vec![],
572            words: vec![WordDef {
573                name: "test".to_string(),
574                effect: None,
575                body: vec![
576                    Statement::IntLiteral(1),
577                    Statement::IntLiteral(2),
578                    Statement::WordCall {
579                        name: "swap".to_string(),
580                        span: None,
581                    },
582                    Statement::WordCall {
583                        name: "drop".to_string(),
584                        span: None,
585                    },
586                ],
587                source: None,
588            }],
589        };
590
591        let diagnostics = linter.lint_program(&program, &PathBuf::from("test.seq"));
592        assert_eq!(diagnostics.len(), 1);
593        assert_eq!(diagnostics[0].id, "prefer-nip");
594        assert_eq!(diagnostics[0].replacement, "nip");
595    }
596
597    #[test]
598    fn test_no_false_positives() {
599        let config = test_config();
600        let linter = Linter::new(&config).unwrap();
601
602        // "swap" followed by something other than "drop"
603        let program = Program {
604            includes: vec![],
605            unions: vec![],
606            words: vec![WordDef {
607                name: "test".to_string(),
608                effect: None,
609                body: vec![
610                    Statement::WordCall {
611                        name: "swap".to_string(),
612                        span: None,
613                    },
614                    Statement::WordCall {
615                        name: "dup".to_string(),
616                        span: None,
617                    },
618                ],
619                source: None,
620            }],
621        };
622
623        let diagnostics = linter.lint_program(&program, &PathBuf::from("test.seq"));
624        assert!(diagnostics.is_empty());
625    }
626
627    #[test]
628    fn test_multiple_matches() {
629        let config = test_config();
630        let linter = Linter::new(&config).unwrap();
631
632        // Two instances of "swap drop"
633        let program = Program {
634            includes: vec![],
635            unions: vec![],
636            words: vec![WordDef {
637                name: "test".to_string(),
638                effect: None,
639                body: vec![
640                    Statement::WordCall {
641                        name: "swap".to_string(),
642                        span: None,
643                    },
644                    Statement::WordCall {
645                        name: "drop".to_string(),
646                        span: None,
647                    },
648                    Statement::WordCall {
649                        name: "dup".to_string(),
650                        span: None,
651                    },
652                    Statement::WordCall {
653                        name: "swap".to_string(),
654                        span: None,
655                    },
656                    Statement::WordCall {
657                        name: "drop".to_string(),
658                        span: None,
659                    },
660                ],
661                source: None,
662            }],
663        };
664
665        let diagnostics = linter.lint_program(&program, &PathBuf::from("test.seq"));
666        assert_eq!(diagnostics.len(), 2);
667    }
668
669    #[test]
670    fn test_multi_wildcard_validation() {
671        // Pattern with two multi-wildcards should be rejected
672        let rule = LintRule {
673            id: "bad-pattern".to_string(),
674            pattern: "$... foo $...".to_string(),
675            replacement: "".to_string(),
676            message: "test".to_string(),
677            severity: Severity::Warning,
678        };
679        let result = CompiledPattern::compile(rule);
680        assert!(result.is_err());
681        assert!(result.unwrap_err().contains("multi-wildcards"));
682    }
683
684    #[test]
685    fn test_single_multi_wildcard_allowed() {
686        // Pattern with one multi-wildcard should be accepted
687        let rule = LintRule {
688            id: "ok-pattern".to_string(),
689            pattern: "$... foo".to_string(),
690            replacement: "".to_string(),
691            message: "test".to_string(),
692            severity: Severity::Warning,
693        };
694        let result = CompiledPattern::compile(rule);
695        assert!(result.is_ok());
696    }
697
698    #[test]
699    fn test_literal_breaks_pattern() {
700        // "swap 0 swap" should NOT match "swap swap" because the literal breaks the pattern
701        let config = test_config();
702        let linter = Linter::new(&config).unwrap();
703
704        let program = Program {
705            includes: vec![],
706            unions: vec![],
707            words: vec![WordDef {
708                name: "test".to_string(),
709                effect: None,
710                body: vec![
711                    Statement::WordCall {
712                        name: "swap".to_string(),
713                        span: None,
714                    },
715                    Statement::IntLiteral(0), // This should break the pattern
716                    Statement::WordCall {
717                        name: "swap".to_string(),
718                        span: None,
719                    },
720                ],
721                source: None,
722            }],
723        };
724
725        let diagnostics = linter.lint_program(&program, &PathBuf::from("test.seq"));
726        // Should NOT find "swap swap" because there's a literal in between
727        assert!(
728            diagnostics.is_empty(),
729            "Expected no matches, but got: {:?}",
730            diagnostics
731        );
732    }
733
734    #[test]
735    fn test_consecutive_swap_swap_still_matches() {
736        // Actual consecutive "swap swap" should still be detected
737        let config = test_config();
738        let linter = Linter::new(&config).unwrap();
739
740        let program = Program {
741            includes: vec![],
742            unions: vec![],
743            words: vec![WordDef {
744                name: "test".to_string(),
745                effect: None,
746                body: vec![
747                    Statement::WordCall {
748                        name: "swap".to_string(),
749                        span: None,
750                    },
751                    Statement::WordCall {
752                        name: "swap".to_string(),
753                        span: None,
754                    },
755                ],
756                source: None,
757            }],
758        };
759
760        let diagnostics = linter.lint_program(&program, &PathBuf::from("test.seq"));
761        assert_eq!(diagnostics.len(), 1);
762        assert_eq!(diagnostics[0].id, "redundant-swap-swap");
763    }
764}