Skip to main content

coding_tools/
outline.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2026 Jonathan Shook
3
4//! `ct-outline`'s heuristic declaration detection.
5//!
6//! A rule pack per language turns a file's text into ordered [`Entry`] rows —
7//! kind, name, 1-based `start:end` span, and nesting depth. Detection is
8//! line-pattern matching plus a block heuristic per language family: brace
9//! matching for Rust (over comment/string-stripped text), indentation for
10//! Python, heading levels for Markdown (fenced code blocks ignored). It is a
11//! comprehension aid, not a parser: start lines are exact; an end the
12//! heuristic cannot derive is `None` (rendered `start:?`), never a guess.
13
14/// One detected declaration.
15#[derive(Debug, Clone, PartialEq, Eq)]
16pub struct Entry {
17    /// The source language's own keyword for the declaration (`fn`, `class`,
18    /// `h2`, …) — there is no cross-language kind abstraction.
19    pub kind: String,
20    /// The declared name (for `impl`, the trait-for-type text; for headings,
21    /// the title).
22    pub name: String,
23    /// Exact 1-based line the declaration starts on.
24    pub start: usize,
25    /// Best-effort 1-based inclusive end line; `None` when the block
26    /// heuristic could not derive one.
27    pub end: Option<usize>,
28    /// 1-based nesting depth (`1` = top level).
29    pub depth: usize,
30}
31
32/// A supported language rule pack, keyed by file extension via
33/// [`language_for`].
34#[derive(Debug, Clone, Copy, PartialEq, Eq)]
35pub enum Lang {
36    /// `.rs` — brace blocks over comment/string-stripped text.
37    Rust,
38    /// `.py` — indentation blocks.
39    Python,
40    /// `.md` — ATX headings as the outline; fenced code blocks ignored.
41    Markdown,
42}
43
44impl Lang {
45    /// The display name used in messages.
46    pub fn label(self) -> &'static str {
47        match self {
48            Lang::Rust => "rust",
49            Lang::Python => "python",
50            Lang::Markdown => "markdown",
51        }
52    }
53}
54
55/// The rule pack for a file extension, or `None` when the language is not
56/// (yet) supported.
57///
58/// # Examples
59///
60/// ```
61/// use coding_tools::outline::{language_for, Lang};
62///
63/// assert_eq!(language_for("rs"), Some(Lang::Rust));
64/// assert_eq!(language_for("py"), Some(Lang::Python));
65/// assert_eq!(language_for("md"), Some(Lang::Markdown));
66/// assert_eq!(language_for("zig"), None); // skipped in walks
67/// ```
68pub fn language_for(ext: &str) -> Option<Lang> {
69    match ext.to_ascii_lowercase().as_str() {
70        "rs" => Some(Lang::Rust),
71        "py" => Some(Lang::Python),
72        "md" => Some(Lang::Markdown),
73        _ => None,
74    }
75}
76
77/// Outline `text` with the `lang` rule pack, in source order.
78///
79/// # Examples
80///
81/// ```
82/// use coding_tools::outline::{outline, Lang};
83///
84/// let src = "pub struct Point { x: i32 }\n\nimpl Point {\n    fn norm(&self) -> i32 { 0 }\n}\n";
85/// let entries = outline(Lang::Rust, src);
86/// let rows: Vec<String> = entries
87///     .iter()
88///     .map(|e| format!("{}:{} {} {} d{}", e.start, e.end.map_or("?".into(), |n| n.to_string()), e.kind, e.name, e.depth))
89///     .collect();
90/// assert_eq!(rows, [
91///     "1:1 struct Point d1",
92///     "3:5 impl Point d1",
93///     "4:4 fn norm d2",
94/// ]);
95/// ```
96pub fn outline(lang: Lang, text: &str) -> Vec<Entry> {
97    match lang {
98        Lang::Rust => rust_outline(text),
99        Lang::Python => python_outline(text),
100        Lang::Markdown => markdown_outline(text),
101    }
102}
103
104// ----- Rust ---------------------------------------------------------------------
105
106/// Replace comments and string/char-literal contents with spaces, preserving
107/// line structure, so brace counting and keyword matching see only code.
108/// Heuristic: raw strings, nested block comments, and lifetimes are handled;
109/// pathological token sequences may still leak.
110fn strip_rust(text: &str) -> String {
111    let chars: Vec<char> = text.chars().collect();
112    let mut out = String::with_capacity(text.len());
113    let mut i = 0;
114    let n = chars.len();
115
116    // Emit ch verbatim for newline (keep line structure), else a space.
117    let blank = |c: char| if c == '\n' { '\n' } else { ' ' };
118
119    while i < n {
120        let c = chars[i];
121        match c {
122            '/' if i + 1 < n && chars[i + 1] == '/' => {
123                while i < n && chars[i] != '\n' {
124                    out.push(' ');
125                    i += 1;
126                }
127            }
128            '/' if i + 1 < n && chars[i + 1] == '*' => {
129                let mut depth = 1;
130                out.push(' ');
131                out.push(' ');
132                i += 2;
133                while i < n && depth > 0 {
134                    if chars[i] == '/' && i + 1 < n && chars[i + 1] == '*' {
135                        depth += 1;
136                        out.push(blank(chars[i]));
137                        out.push(blank(chars[i + 1]));
138                        i += 2;
139                    } else if chars[i] == '*' && i + 1 < n && chars[i + 1] == '/' {
140                        depth -= 1;
141                        out.push(blank(chars[i]));
142                        out.push(blank(chars[i + 1]));
143                        i += 2;
144                    } else {
145                        out.push(blank(chars[i]));
146                        i += 1;
147                    }
148                }
149            }
150            'r' | 'b'
151                if {
152                    // Raw / byte-string starts: r", r#", br", b".
153                    let mut j = i;
154                    if chars[j] == 'b' && j + 1 < n && chars[j + 1] == 'r' {
155                        j += 1;
156                    }
157                    if chars[j] == 'r' || chars[j] == 'b' {
158                        let mut k = j + 1;
159                        if chars[j] == 'r' {
160                            while k < n && chars[k] == '#' {
161                                k += 1;
162                            }
163                        }
164                        k < n && chars[k] == '"'
165                    } else {
166                        false
167                    }
168                } =>
169            {
170                // Copy the prefix, then blank the quoted body.
171                let mut hashes = 0;
172                out.push(chars[i]);
173                i += 1;
174                if i < n && chars[i] == 'r' {
175                    out.push('r');
176                    i += 1;
177                }
178                while i < n && chars[i] == '#' {
179                    hashes += 1;
180                    out.push('#');
181                    i += 1;
182                }
183                out.push('"');
184                i += 1; // opening quote
185                'raw: while i < n {
186                    if chars[i] == '"' {
187                        let mut k = i + 1;
188                        let mut seen = 0;
189                        while k < n && chars[k] == '#' && seen < hashes {
190                            k += 1;
191                            seen += 1;
192                        }
193                        if seen == hashes {
194                            out.push('"');
195                            for _ in 0..hashes {
196                                out.push('#');
197                            }
198                            i = k;
199                            break 'raw;
200                        }
201                    }
202                    out.push(blank(chars[i]));
203                    i += 1;
204                }
205            }
206            '"' => {
207                out.push('"');
208                i += 1;
209                while i < n {
210                    if chars[i] == '\\' && i + 1 < n {
211                        out.push(' ');
212                        out.push(' ');
213                        i += 2;
214                        continue;
215                    }
216                    if chars[i] == '"' {
217                        out.push('"');
218                        i += 1;
219                        break;
220                    }
221                    out.push(blank(chars[i]));
222                    i += 1;
223                }
224            }
225            '\'' => {
226                // Char literal vs lifetime: 'x' / '\n' are literals; 'a in a
227                // generic position is a lifetime and passes through.
228                if i + 2 < n && chars[i + 1] == '\\' {
229                    // Escaped char literal: blank the body. (Column alignment
230                    // is irrelevant; only line structure matters.)
231                    out.push('\'');
232                    i += 2;
233                    while i < n && chars[i] != '\'' {
234                        out.push(' ');
235                        i += 1;
236                    }
237                    if i < n {
238                        out.push('\'');
239                        i += 1;
240                    }
241                } else if i + 2 < n && chars[i + 2] == '\'' {
242                    out.push('\'');
243                    out.push(' ');
244                    out.push('\'');
245                    i += 3;
246                } else {
247                    out.push('\'');
248                    i += 1;
249                }
250            }
251            _ => {
252                out.push(c);
253                i += 1;
254            }
255        }
256    }
257    out
258}
259
260/// The leading identifier of `s` (`[A-Za-z0-9_]+`), if any.
261fn ident(s: &str) -> Option<&str> {
262    let end = s
263        .char_indices()
264        .find(|(_, c)| !(c.is_ascii_alphanumeric() || *c == '_'))
265        .map(|(i, _)| i)
266        .unwrap_or(s.len());
267    if end == 0 { None } else { Some(&s[..end]) }
268}
269
270/// Strip one `pub` / `pub(...)` visibility prefix.
271fn strip_vis(s: &str) -> &str {
272    let Some(rest) = s.strip_prefix("pub") else {
273        return s;
274    };
275    let rest = rest.trim_start();
276    if let Some(after) = rest.strip_prefix('(') {
277        if let Some(close) = after.find(')') {
278            return after[close + 1..].trim_start();
279        }
280        return s;
281    }
282    if rest.len() < s.len() { rest } else { s }
283}
284
285/// Detect a Rust declaration on a (comment/string-stripped, trimmed) line.
286/// `parent_kind` is the enclosing entry's kind, used to suppress items that
287/// are local detail rather than structure (e.g. `const` inside a `fn`).
288fn rust_decl(trimmed: &str, parent_kind: Option<&str>) -> Option<(String, String)> {
289    let s = strip_vis(trimmed);
290    let inside_fn = parent_kind == Some("fn");
291
292    // fn, with any qualifier prefix.
293    {
294        let mut t = s;
295        loop {
296            let next = ["default ", "const ", "async ", "unsafe "]
297                .iter()
298                .find_map(|q| t.strip_prefix(q));
299            if let Some(rest) = next {
300                t = rest.trim_start();
301                continue;
302            }
303            if let Some(rest) = t.strip_prefix("extern") {
304                let rest = rest.trim_start();
305                if let Some(r) = rest.strip_prefix('"')
306                    && let Some(close) = r.find('"')
307                {
308                    t = r[close + 1..].trim_start();
309                    continue;
310                }
311            }
312            break;
313        }
314        if let Some(rest) = t.strip_prefix("fn ")
315            && let Some(name) = ident(rest.trim_start())
316        {
317            return Some(("fn".into(), name.into()));
318        }
319    }
320
321    for (kw, kind) in [
322        ("mod ", "mod"),
323        ("struct ", "struct"),
324        ("enum ", "enum"),
325        ("trait ", "trait"),
326        ("macro_rules! ", "macro"),
327    ] {
328        if let Some(rest) = s.strip_prefix(kw)
329            && let Some(name) = ident(rest.trim_start())
330        {
331            return Some((kind.into(), name.into()));
332        }
333    }
334    if let Some(rest) = s.strip_prefix("unsafe ")
335        && let Some(rest) = rest.trim_start().strip_prefix("trait ")
336        && let Some(name) = ident(rest.trim_start())
337    {
338        return Some(("trait".into(), name.into()));
339    }
340
341    if s == "impl" || s.starts_with("impl ") || s.starts_with("impl<") {
342        let mut rest = &s[4..];
343        if let Some(r) = rest.strip_prefix('<') {
344            // Skip the generic parameter list to the matching '>'.
345            let mut depth = 1usize;
346            let mut idx = None;
347            for (i, c) in r.char_indices() {
348                match c {
349                    '<' => depth += 1,
350                    '>' => {
351                        depth -= 1;
352                        if depth == 0 {
353                            idx = Some(i);
354                            break;
355                        }
356                    }
357                    _ => {}
358                }
359            }
360            rest = idx.map(|i| &r[i + 1..]).unwrap_or("");
361        }
362        let mut name = rest.trim();
363        if let Some(cut) = name.find('{') {
364            name = name[..cut].trim();
365        }
366        if let Some(cut) = name.find(" where") {
367            name = name[..cut].trim();
368        }
369        if !name.is_empty() {
370            return Some(("impl".into(), name.into()));
371        }
372    }
373
374    if !inside_fn {
375        if let Some(rest) = s.strip_prefix("type ")
376            && let Some(name) = ident(rest.trim_start())
377        {
378            return Some(("type".into(), name.into()));
379        }
380        if let Some(rest) = s.strip_prefix("const ")
381            && let Some(name) = ident(rest.trim_start())
382        {
383            return Some(("const".into(), name.into()));
384        }
385        if let Some(rest) = s.strip_prefix("static ") {
386            let rest = rest.trim_start();
387            let rest = rest.strip_prefix("mut ").unwrap_or(rest).trim_start();
388            if let Some(name) = ident(rest) {
389                return Some(("static".into(), name.into()));
390            }
391        }
392    }
393    None
394}
395
396/// From the declaration's line, derive the block end: the line where the
397/// first `{` after the declaration is balanced, or `start` itself when a `;`
398/// terminates first. `None` when neither resolves within sight (20 lines of
399/// lookahead for the opener) or the block never closes.
400fn brace_block_end(lines: &[&str], start_idx: usize) -> Option<usize> {
401    let mut depth = 0usize;
402    let mut opened = false;
403    for (j, line) in lines.iter().enumerate().skip(start_idx) {
404        for c in line.chars() {
405            match c {
406                '{' => {
407                    depth += 1;
408                    opened = true;
409                }
410                '}' => {
411                    depth = depth.saturating_sub(1);
412                    if opened && depth == 0 {
413                        return Some(j + 1);
414                    }
415                }
416                ';' if !opened => return Some(start_idx + 1),
417                _ => {}
418            }
419        }
420        if !opened && j >= start_idx + 20 {
421            return None;
422        }
423    }
424    None
425}
426
427fn rust_outline(text: &str) -> Vec<Entry> {
428    let stripped = strip_rust(text);
429    let lines: Vec<&str> = stripped.lines().collect();
430    let mut entries: Vec<Entry> = Vec::new();
431    // Stack of (kind, end_line) for open enclosing entries.
432    let mut stack: Vec<(String, Option<usize>)> = Vec::new();
433
434    for (i, raw) in lines.iter().enumerate() {
435        let line_no = i + 1;
436        while let Some((_, Some(end))) = stack.last() {
437            if *end < line_no {
438                stack.pop();
439            } else {
440                break;
441            }
442        }
443        let trimmed = raw.trim_start();
444        if trimmed.is_empty() || trimmed.starts_with('#') {
445            continue; // attributes / blanks
446        }
447        let parent_kind = stack.last().map(|(k, _)| k.as_str());
448        if let Some((kind, name)) = rust_decl(trimmed, parent_kind) {
449            let end = brace_block_end(&lines, i);
450            entries.push(Entry {
451                kind: kind.clone(),
452                name,
453                start: line_no,
454                end,
455                depth: stack.len() + 1,
456            });
457            // Only block-owners enclose later lines.
458            if end.is_some_and(|e| e > line_no) {
459                stack.push((kind, end));
460            }
461        }
462    }
463    entries
464}
465
466// ----- Python -------------------------------------------------------------------
467
468/// Leading indentation in columns (tab = 4).
469fn indent_cols(line: &str) -> usize {
470    line.chars()
471        .take_while(|c| *c == ' ' || *c == '\t')
472        .map(|c| if c == '\t' { 4 } else { 1 })
473        .sum()
474}
475
476fn python_outline(text: &str) -> Vec<Entry> {
477    let lines: Vec<&str> = text.lines().collect();
478    let mut entries: Vec<Entry> = Vec::new();
479    let mut stack: Vec<(usize, usize)> = Vec::new(); // (indent, entry index)
480    let mut in_triple: Option<&str> = None;
481
482    for (i, line) in lines.iter().enumerate() {
483        let content = line.trim();
484        // Rough triple-quoted string tracking so docstring text cannot
485        // masquerade as declarations.
486        if let Some(q) = in_triple {
487            if content.contains(q) {
488                in_triple = None;
489            }
490            continue;
491        }
492        for q in ["\"\"\"", "'''"] {
493            if content.starts_with(q) && !content[q.len()..].contains(q) {
494                in_triple = Some(q);
495            }
496        }
497        if in_triple.is_some() || content.is_empty() || content.starts_with('#') {
498            continue;
499        }
500
501        let decl = content
502            .strip_prefix("async def ")
503            .or_else(|| content.strip_prefix("def "))
504            .map(|rest| ("def", rest))
505            .or_else(|| content.strip_prefix("class ").map(|rest| ("class", rest)));
506        let Some((kind, rest)) = decl else { continue };
507        let Some(name) = ident(rest.trim_start()) else {
508            continue;
509        };
510
511        let indent = indent_cols(line);
512        while let Some((top_indent, _)) = stack.last() {
513            if *top_indent >= indent {
514                stack.pop();
515            } else {
516                break;
517            }
518        }
519
520        // End: the last content line indented deeper than the declaration.
521        let mut end = i + 1;
522        for (j, later) in lines.iter().enumerate().skip(i + 1) {
523            let t = later.trim();
524            if t.is_empty() {
525                continue;
526            }
527            if indent_cols(later) <= indent {
528                break;
529            }
530            end = j + 1;
531        }
532
533        entries.push(Entry {
534            kind: kind.into(),
535            name: name.into(),
536            start: i + 1,
537            end: Some(end),
538            depth: stack.len() + 1,
539        });
540        stack.push((indent, entries.len() - 1));
541    }
542    entries
543}
544
545// ----- Markdown -----------------------------------------------------------------
546
547fn markdown_outline(text: &str) -> Vec<Entry> {
548    let lines: Vec<&str> = text.lines().collect();
549    let mut headings: Vec<(usize, usize, String)> = Vec::new(); // (level, line, title)
550    let mut in_fence = false;
551
552    for (i, line) in lines.iter().enumerate() {
553        let t = line.trim_start();
554        if t.starts_with("```") || t.starts_with("~~~") {
555            in_fence = !in_fence;
556            continue;
557        }
558        if in_fence {
559            continue;
560        }
561        let level = t.chars().take_while(|c| *c == '#').count();
562        if (1..=6).contains(&level)
563            && let Some(rest) = t.get(level..)
564            && rest.starts_with(' ')
565        {
566            let title = rest.trim().trim_end_matches('#').trim().to_string();
567            headings.push((level, i + 1, title));
568        }
569    }
570
571    let total = lines.len();
572    headings
573        .iter()
574        .enumerate()
575        .map(|(idx, (level, start, title))| {
576            // The section runs to the line before the next heading of the
577            // same or a shallower level.
578            let end = headings[idx + 1..]
579                .iter()
580                .find(|(l, ..)| l <= level)
581                .map(|(_, s, _)| s - 1)
582                .unwrap_or(total);
583            Entry {
584                kind: format!("h{level}"),
585                name: title.clone(),
586                start: *start,
587                end: Some(end.max(*start)),
588                depth: *level,
589            }
590        })
591        .collect()
592}
593
594#[cfg(test)]
595mod tests {
596    use super::*;
597
598    fn row(e: &Entry) -> String {
599        format!(
600            "{}:{} {} {} d{}",
601            e.start,
602            e.end.map_or("?".to_string(), |n| n.to_string()),
603            e.kind,
604            e.name,
605            e.depth
606        )
607    }
608
609    #[test]
610    fn rust_detects_kinds_spans_and_nesting() {
611        let src = r#"
612pub mod inner;
613
614/// Doc with a brace { that must not count.
615pub struct Point {
616    x: i32,
617}
618
619impl Display for Point {
620    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
621        let brace = "{"; // string brace must not count
622        write!(f, "{}", self.x)
623    }
624}
625
626pub(crate) async fn fetch() {}
627
628macro_rules! declare {
629    () => {};
630}
631"#;
632        let rows: Vec<String> = rust_outline(src).iter().map(row).collect();
633        assert_eq!(
634            rows,
635            [
636                "2:2 mod inner d1",
637                "5:7 struct Point d1",
638                "9:14 impl Display for Point d1",
639                "10:13 fn fmt d2",
640                "16:16 fn fetch d1",
641                "18:20 macro declare d1",
642            ]
643        );
644    }
645
646    #[test]
647    fn rust_suppresses_locals_inside_fn_but_keeps_assoc_items() {
648        let src = "impl Foo {\n    const MAX: u8 = 1;\n    fn go() {\n        const LOCAL: u8 = 2;\n    }\n}\n";
649        let rows: Vec<String> = rust_outline(src).iter().map(row).collect();
650        assert_eq!(
651            rows,
652            ["1:6 impl Foo d1", "2:2 const MAX d2", "3:5 fn go d2"]
653        );
654    }
655
656    #[test]
657    fn rust_unclosed_block_yields_unknown_end() {
658        let src = "fn broken() {\n    let x = 1;\n";
659        let e = &rust_outline(src)[0];
660        assert_eq!((e.start, e.end), (1, None));
661    }
662
663    #[test]
664    fn python_indentation_nesting_and_spans() {
665        let src = "class A:\n    def m(self):\n        pass\n\n    async def n(self):\n        pass\n\ndef top():\n    pass\n";
666        let rows: Vec<String> = python_outline(src).iter().map(row).collect();
667        assert_eq!(
668            rows,
669            [
670                "1:6 class A d1",
671                "2:3 def m d2",
672                "5:6 def n d2",
673                "8:9 def top d1",
674            ]
675        );
676    }
677
678    #[test]
679    fn python_docstring_text_is_not_a_declaration() {
680        let src = "def real():\n    \"\"\"\n    def fake():\n    \"\"\"\n    pass\n";
681        let rows: Vec<String> = python_outline(src).iter().map(row).collect();
682        assert_eq!(rows, ["1:5 def real d1"]);
683    }
684
685    #[test]
686    fn markdown_headings_with_fenced_code_ignored() {
687        let src = "# Title\n\nintro\n\n```sh\n# not a heading\n```\n\n## Section A\n\nbody\n\n## Section B\n";
688        let rows: Vec<String> = markdown_outline(src).iter().map(row).collect();
689        assert_eq!(
690            rows,
691            ["1:13 h1 Title d1", "9:12 h2 Section A d2", "13:13 h2 Section B d2"]
692        );
693    }
694
695    #[test]
696    fn language_keying_is_extension_based() {
697        assert_eq!(language_for("RS"), Some(Lang::Rust));
698        assert_eq!(language_for("txt"), None);
699    }
700}