mdlens/
cli.rs

1use anyhow::Result;
2use clap::{Parser, Subcommand, ValueEnum};
3use serde::Serialize;
4use std::cmp::Reverse;
5
6use std::collections::{BTreeSet, HashMap, HashSet};
7use std::io::{self, BufRead};
8use std::path::Path;
9
10use crate::errors;
11use crate::model::Section;
12use crate::pack::{pack_by_ids, PackSearchOptions};
13use crate::parse::{load_markdown, parse_markdown};
14use crate::render::{
15    render_pack, render_read, render_search, render_sections, render_stats, render_tree,
16    FileSectionsMap, PackIncluded, SectionsEntry, StatsEntry,
17};
18use crate::search::{discover_markdown_files, get_doc_section_summaries, search_files};
19use crate::tokens::{estimate_tokens, truncate_to_tokens};
20
21const TRUNCATION_NOTICE: &str = "\n\n<!-- mdlens: truncated at token budget -->";
22
23#[derive(Parser)]
24#[command(name = "mdlens")]
25#[command(about = "Token-efficient Markdown structure CLI for AI agents")]
26#[command(
27    long_about = "mdlens parses Markdown files into a hierarchical section tree with\ndotted IDs, token estimates, and bounded-context packing.\n\nDesigned for AI agents that need to navigate, search, and pack\nMarkdown documentation into context windows efficiently.\n\nAgent quickstart:\n  1. For question answering over a Markdown directory, start with:\n       mdlens scout <dir> \"<question>\" --max-tokens 1400\n  2. Answer from scout when [highlights] and [evidence] are sufficient.\n  3. If one detail is missing, use a listed section id:\n       mdlens read <file> --id <N.N> --max-tokens 1200\n  4. Use search/tree/sections only when scout points at the wrong file or you\n     need broader navigation.\n\nScout is the recommended first command for arbitrary messy English markdown.\nIt returns query expansion, a compact file map, ranked highlights, and bounded\nevidence sections with parent heading/status context.\n\nAnswering from scout:\n  - Read [highlights] first, then [evidence].\n  - Preserve distinctive evidence terms: flags, IDs, metrics, option names,\n    labels, row values, and short policy/risk phrases.\n  - Copy short source phrases exactly when they are likely answer terms; avoid\n    changing singular/plural or rewriting concise labels into paraphrases.\n  - If scout already names the answer plus its rule, risk, command, or policy,\n    answer directly instead of continuing broad retrieval.\n  - For current-vs-stale questions, prefer current/current loader sections and\n    treat Do Not Use, copied tables, stale notes, and old runbooks as\n    distractors.\n  - For table questions, keep the table header with the selected row; do not\n    average unrelated rows unless the document says to.\n  - For why, policy, safety, privacy, negative, or tradeoff questions, include\n    the compact rule/risk/rationale bullets, not only the command or metric.\n  - For multi-file comparisons, answer each named entity separately, then\n    summarize the shared pattern.\n  - If evidence is missing, say the corpus does not specify the fact.\n\nRun `mdlens scout --help` for detailed scout-specific guidance."
28)]
29struct Cli {
30    #[command(subcommand)]
31    command: Commands,
32}
33
34#[derive(Subcommand)]
35enum Commands {
36    /// Show section hierarchy with token estimates for a file or directory
37    Tree(TreeArgs),
38    /// Extract a section by ID, heading path, or line range
39    Read(ReadArgs),
40    /// Search files and return section-level matches with snippets
41    Search(SearchArgs),
42    /// One-shot agent evidence pack: find files, show section maps, and include likely evidence
43    Scout(ScoutArgs),
44    /// Pack selected sections into a bounded token budget
45    Pack(PackArgs),
46    /// Inspect file sizes, word counts, and token estimates
47    Stats(StatsArgs),
48    /// Read file paths from stdin and output structured section metadata
49    Sections(SectionsArgs),
50}
51
52#[derive(clap::Args)]
53struct TreeArgs {
54    /// File or directory to analyze
55    path: String,
56    /// Output JSON (machine-readable with schema_version)
57    #[arg(long)]
58    json: bool,
59    /// Limit section depth shown
60    #[arg(long)]
61    max_depth: Option<usize>,
62    /// Show preamble section (content before first heading)
63    #[arg(long)]
64    include_preamble: bool,
65    /// For directory input, include per-file summaries
66    #[arg(long)]
67    files: bool,
68}
69
70#[derive(clap::Args)]
71struct ReadArgs {
72    /// File to read from
73    file: String,
74    /// Section ID to extract (e.g., "1.2.3" — dotted hierarchy)
75    #[arg(long, conflicts_with_all = ["heading_path", "lines"])]
76    id: Option<String>,
77    /// Heading path to extract (e.g., "Usage>Configuration"; escape literal > as \>)
78    #[arg(long, conflicts_with_all = ["id", "lines"])]
79    heading_path: Option<String>,
80    /// Line range to extract (e.g., "120:190")
81    #[arg(long, conflicts_with_all = ["id", "heading_path"])]
82    lines: Option<String>,
83    /// Include parent headings above the section excerpt
84    #[arg(long)]
85    parents: bool,
86    /// Include all child sections (default: true unless --no-children)
87    #[arg(long, conflicts_with = "no_children")]
88    children: bool,
89    /// Only include heading and direct body before first child heading
90    #[arg(long, conflicts_with = "children")]
91    no_children: bool,
92    /// Truncate output to approximate token budget
93    #[arg(long)]
94    max_tokens: Option<usize>,
95    /// Output JSON (machine-readable with schema_version)
96    #[arg(long)]
97    json: bool,
98}
99
100#[derive(clap::Args)]
101struct SearchArgs {
102    /// File or directory to search
103    path: String,
104    /// Search query (plain text or regex with --regex)
105    query: String,
106    /// Output JSON (machine-readable with schema_version)
107    #[arg(long)]
108    json: bool,
109    /// Use regex for the query
110    #[arg(long)]
111    regex: bool,
112    /// Case-sensitive search (default: case-insensitive)
113    #[arg(long)]
114    case_sensitive: bool,
115    /// Maximum number of results (default: 20)
116    #[arg(long, default_value_t = 20)]
117    max_results: usize,
118    /// Context lines around each match (default: 2)
119    #[arg(long, default_value_t = 2)]
120    context_lines: usize,
121    /// Include full section body text for each result
122    #[arg(long)]
123    content: bool,
124    /// Show first N non-empty lines of each matched section inline
125    #[arg(long)]
126    preview: Option<usize>,
127    /// Cap total output tokens across included search results
128    #[arg(long)]
129    max_tokens: Option<usize>,
130}
131
132#[derive(clap::Args)]
133#[command(
134    long_about = "One-shot agent evidence pack for answering a natural-language question over Markdown.\n\n`scout` is optimized for agent workflows: fewer shell calls, bounded output,\nand enough section context to answer without dumping whole files. It searches\nsection text, headings, paths, parent context, and table rows; ranks likely\nevidence; then emits a compact pack."
135)]
136#[command(
137    after_help = "Agent workflow:\n  - Use scout as the first retrieval call for QA over a directory:\n      mdlens scout docs/ \"What policy changed between the old and current loader?\" --max-tokens 1400\n  - Use --json when a harness wants structured metadata plus the same rendered evidence pack.\n  - Read [highlights] first. They are globally ranked compact evidence lines.\n  - Then read [evidence]. Each block names file, section id, heading path, line\n    span, token estimate, and ranking reason.\n  - If the answer is present, stop and answer directly. Preserve distinctive\n    terms: flags, IDs, metrics, option names, row values, labels, and short\n    policy phrases.\n  - Copy short source phrases exactly when they are likely answer terms; avoid\n    changing singular/plural or rewriting concise labels into paraphrases.\n  - If exactly one fact is missing, use the section map from [files] and read\n    one section:\n      mdlens read <file> --id <section-id> --max-tokens 1200\n  - Use `mdlens search` only when scout clearly found the wrong file or when\n    you need a second independent query.\n\nHow to interpret scout output:\n  [queries]   Search expansions derived from the question.\n  [files]     Candidate files, picked section ids, and nearby unread sections.\n  [focus]     Dominant file when the question appears single-file.\n  [highlights] Globally ranked lines/table rows likely to answer the question.\n  [evidence]  Bounded excerpts from the selected sections.\n\nQuestion-shape guidance:\n  - Current-vs-stale questions: prefer sections marked current/current loader;\n    treat Do Not Use, stale notes, copied tables, and old runbooks as distractors.\n  - Table questions: keep the table header with the selected row; do not average\n    unrelated rows unless the document says to.\n  - Why, policy, safety, privacy, negative, or tradeoff questions: include the\n    compact rule/risk/rationale bullets, not only the command or metric.\n  - Multi-file comparison: answer each named entity separately, then summarize\n    the shared pattern.\n  - Missing evidence: say the corpus does not specify the fact rather than\n    guessing from file names.\n\nUseful defaults:\n  --max-tokens 1400 keeps scout cheap for most agent turns.\n  --max-sections 12 gives enough diversity before packing.\n  --max-files 4 keeps the file map readable."
138)]
139struct ScoutArgs {
140    /// File or directory to scout
141    path: String,
142    /// Natural-language question or retrieval goal
143    question: String,
144    /// Output JSON (machine-readable with schema_version)
145    #[arg(long)]
146    json: bool,
147    /// Approximate evidence-token budget (default: 1400)
148    #[arg(long, default_value_t = 1400)]
149    max_tokens: usize,
150    /// Maximum candidate sections to consider before packing (default: 12)
151    #[arg(long, default_value_t = 12)]
152    max_sections: usize,
153    /// Maximum files to include in the file map (default: 4)
154    #[arg(long, default_value_t = 4)]
155    max_files: usize,
156}
157
158#[derive(clap::Args)]
159struct PackArgs {
160    /// File or directory to pack from
161    path: String,
162    /// Comma-separated section IDs to include
163    #[arg(long, conflicts_with_all = ["paths", "search"])]
164    ids: Option<String>,
165    /// Semicolon-separated heading paths to include
166    #[arg(long, conflicts_with_all = ["ids", "search"])]
167    paths: Option<String>,
168    /// Search query to find sections to pack
169    #[arg(long, conflicts_with_all = ["ids", "paths"])]
170    search: Option<String>,
171    /// Required: maximum token budget
172    #[arg(long)]
173    max_tokens: usize,
174    /// Include parent heading context above selected sections
175    #[arg(long)]
176    parents: bool,
177    /// Avoid duplicate nested sections (default)
178    #[arg(long, conflicts_with = "no_dedupe")]
179    dedupe: bool,
180    /// Allow duplicate sections in the final pack
181    #[arg(long, conflicts_with = "dedupe")]
182    no_dedupe: bool,
183    /// Use regex when selecting sections via --search
184    #[arg(long)]
185    regex: bool,
186    /// Case-sensitive search when selecting sections via --search
187    #[arg(long)]
188    case_sensitive: bool,
189    /// Maximum number of search results to consider for --search (default: 20)
190    #[arg(long, default_value_t = 20)]
191    max_results: usize,
192    /// Context lines when searching via --search (default: 2)
193    #[arg(long, default_value_t = 2)]
194    context_lines: usize,
195    /// Output JSON (machine-readable with schema_version)
196    #[arg(long)]
197    json: bool,
198}
199
200#[derive(Clone, ValueEnum)]
201enum StatsSort {
202    Path,
203    Tokens,
204    Lines,
205}
206
207#[derive(clap::Args)]
208struct StatsArgs {
209    /// File or directory to analyze
210    path: String,
211    /// Output JSON (machine-readable with schema_version)
212    #[arg(long)]
213    json: bool,
214    /// Sort by field: path, tokens, or lines (default: path)
215    #[arg(long, value_enum, default_value_t = StatsSort::Path)]
216    sort: StatsSort,
217    /// Show top N results
218    #[arg(long)]
219    top: Option<usize>,
220}
221
222#[derive(clap::Args)]
223struct SectionsArgs {
224    /// File paths to process (alternative or supplement to stdin)
225    #[arg(value_name = "FILE")]
226    files: Vec<String>,
227    /// Include full section body text (default: metadata only)
228    #[arg(long)]
229    content: bool,
230    /// Include descendant subsection text inside each section body
231    #[arg(long)]
232    children: bool,
233    /// Show first N lines of each section body inline (cheaper than --content; helps pick the right section before a full read)
234    #[arg(long)]
235    preview: Option<usize>,
236    /// Limit section hierarchy depth shown (default: unlimited)
237    #[arg(long)]
238    max_depth: Option<usize>,
239    /// Cap total output tokens (truncates last section if exceeded)
240    #[arg(long)]
241    max_tokens: Option<usize>,
242    /// Cap the number of sections emitted after selection/ranking
243    #[arg(long)]
244    max_sections: Option<usize>,
245    /// Reject input if more than N files are piped (prevents accidental large reads; recommended: 5)
246    #[arg(long)]
247    max_files: Option<usize>,
248    /// Machine-readable JSON output
249    #[arg(long)]
250    json: bool,
251    /// Include heading path (e.g. "SGOCR Champion > Candidate Quality")
252    #[arg(long)]
253    heading_paths: bool,
254    /// Include original line numbers (start-end)
255    #[arg(long)]
256    lines: bool,
257    /// Deduplicate sections if same section matches multiple lines (default: true)
258    #[arg(long, default_value_t = true)]
259    dedupe: bool,
260    /// Allow duplicate sections in output
261    #[arg(long, conflicts_with = "dedupe")]
262    no_dedupe: bool,
263}
264
265#[derive(Clone)]
266struct SectionHit {
267    path: String,
268    line: usize,
269}
270
271enum SectionInput {
272    File(String),
273    Hit(SectionHit),
274}
275
276pub fn run() -> Result<()> {
277    let cli = Cli::parse();
278
279    match cli.command {
280        Commands::Tree(args) => cmd_tree(args),
281        Commands::Read(args) => cmd_read(args),
282        Commands::Search(args) => cmd_search(args),
283        Commands::Scout(args) => cmd_scout(args),
284        Commands::Pack(args) => cmd_pack(args),
285        Commands::Stats(args) => cmd_stats(args),
286        Commands::Sections(args) => cmd_sections(args),
287    }
288}
289
290fn cmd_tree(args: TreeArgs) -> Result<()> {
291    let files = crate::search::discover_markdown_files(&args.path)?;
292
293    if files.len() == 1 {
294        let doc = parse_markdown(&files[0])?;
295        if args.json {
296            let output = TreeJsonOutput {
297                schema_version: 1,
298                path: doc.path.clone(),
299                line_count: doc.line_count,
300                byte_count: doc.byte_count,
301                char_count: doc.char_count,
302                word_count: doc.word_count,
303                token_estimate: doc.token_estimate,
304                sections: serialize_sections(
305                    &doc.sections,
306                    args.max_depth,
307                    args.include_preamble,
308                    0,
309                ),
310            };
311            println!("{}", serde_json::to_string_pretty(&output)?);
312        } else {
313            println!(
314                "{}",
315                render_tree(&doc, args.max_depth, args.include_preamble)
316            );
317        }
318    } else {
319        // Multiple files — cap depth at 1 by default to keep directory output manageable
320        let depth_capped = args.max_depth.is_none();
321        let effective_depth = args.max_depth.or(Some(1));
322
323        if args.json {
324            let mut file_outputs = Vec::new();
325            for file in &files {
326                let doc = parse_markdown(file)?;
327                file_outputs.push(TreeFileJsonOutput {
328                    path: doc.path.clone(),
329                    line_count: doc.line_count,
330                    byte_count: doc.byte_count,
331                    char_count: doc.char_count,
332                    word_count: doc.word_count,
333                    token_estimate: doc.token_estimate,
334                    sections: serialize_sections(
335                        &doc.sections,
336                        effective_depth,
337                        args.include_preamble,
338                        0,
339                    ),
340                });
341            }
342            let output = TreeMultiJsonOutput {
343                schema_version: 1,
344                files: file_outputs,
345            };
346            println!("{}", serde_json::to_string_pretty(&output)?);
347        } else {
348            for file in &files {
349                let doc = parse_markdown(file)?;
350                println!(
351                    "\n{}",
352                    render_tree(&doc, effective_depth, args.include_preamble)
353                );
354            }
355            if depth_capped {
356                eprintln!("[tree] directory mode: showing depth ≤1 by default; use --max-depth N for more");
357            }
358        }
359    }
360
361    Ok(())
362}
363
364fn cmd_read(args: ReadArgs) -> Result<()> {
365    let parsed = load_markdown(&args.file)?;
366    let doc = &parsed.doc;
367    let lines = &parsed.lines;
368    let include_children = !args.no_children || args.children;
369
370    let (section_text, section_meta, selector_type, selector_value, section_ref) =
371        if let Some(ref id) = args.id {
372            let section = doc
373                .find_section_by_id(id)
374                .ok_or_else(|| anyhow::anyhow!("section id not found: {id}"))?;
375            let content = if include_children {
376                section.extract_content(lines)
377            } else {
378                section.extract_direct_content(lines)
379            }
380            .join("\n");
381            (
382                content,
383                SectionMeta::from(section),
384                "id",
385                id.clone(),
386                Some(section),
387            )
388        } else if let Some(ref path_str) = args.heading_path {
389            let section = find_unique_section_by_path(doc, path_str)?;
390            let content = if include_children {
391                section.extract_content(lines)
392            } else {
393                section.extract_direct_content(lines)
394            }
395            .join("\n");
396            (
397                content,
398                SectionMeta::from(section),
399                "path",
400                path_str.clone(),
401                Some(section),
402            )
403        } else if let Some(ref lines_str) = args.lines {
404            let parts: Vec<&str> = lines_str.split(':').collect();
405            if parts.len() != 2 {
406                return Err(anyhow::anyhow!(
407                    "invalid line range: {}; expected format START:END",
408                    lines_str
409                ));
410            }
411            let start: usize = parts[0].trim().parse()?;
412            let end: usize = parts[1].trim().parse()?;
413            if start > end {
414                return Err(errors::invalid_line_range(start, end));
415            }
416            if start < 1 || end > lines.len() {
417                return Err(anyhow::anyhow!(
418                    "line range {}:{} out of bounds (file has {} lines)",
419                    start,
420                    end,
421                    lines.len()
422                ));
423            }
424            let content = lines[(start - 1)..end].join("\n");
425            let token_est = estimate_tokens(&content);
426            (
427                content,
428                SectionMeta {
429                    id: format!("lines:{}:{}", start, end),
430                    title: format!("Lines {}-{}", start, end),
431                    level: 0,
432                    path: vec![format!("Lines {}-{}", start, end)],
433                    line_start: start,
434                    line_end: end,
435                    token_estimate: token_est,
436                },
437                "lines",
438                format!("{}:{}", start, end),
439                None,
440            )
441        } else {
442            return Err(anyhow::anyhow!(
443                "exactly one of --id, --heading-path, or --lines is required"
444            ));
445        };
446
447    let mut full_content = String::new();
448
449    if args.parents {
450        if let Some(sec) = section_ref {
451            let parents = find_parent_headings(doc, sec);
452            for line_idx in parents {
453                if !full_content.is_empty() {
454                    full_content.push_str("\n\n");
455                }
456                full_content.push_str(&lines[line_idx - 1]);
457            }
458        }
459    }
460
461    if !full_content.is_empty() && !section_text.is_empty() {
462        full_content.push_str("\n\n");
463    }
464    full_content.push_str(&section_text);
465
466    let truncated = if let Some(max_tokens) = args.max_tokens {
467        if estimate_tokens(&full_content) > max_tokens {
468            full_content = truncate_content_to_tokens(&full_content, max_tokens);
469            true
470        } else {
471            false
472        }
473    } else {
474        false
475    };
476
477    if args.json {
478        let output = ReadJsonOutput {
479            schema_version: 1,
480            path: doc.path.clone(),
481            selector: ReadSelector {
482                r#type: selector_type.to_string(),
483                value: selector_value.to_string(),
484            },
485            section: SectionJsonOutput {
486                id: section_meta.id.clone(),
487                title: section_meta.title.clone(),
488                level: section_meta.level,
489                path: section_meta.path.clone(),
490                line_start: section_meta.line_start,
491                line_end: section_meta.line_end,
492                token_estimate: section_meta.token_estimate,
493                children: Vec::new(),
494            },
495            content: full_content,
496            truncated,
497        };
498        println!("{}", serde_json::to_string_pretty(&output)?);
499    } else {
500        let section = Section {
501            id: section_meta.id.clone(),
502            slug: Section::slugify(&section_meta.title),
503            title: section_meta.title.clone(),
504            level: section_meta.level,
505            path: section_meta.path.clone(),
506            line_start: section_meta.line_start,
507            line_end: section_meta.line_end,
508            content_line_start: section_meta.line_start,
509            byte_start: 0,
510            byte_end: 0,
511            char_count: 0,
512            word_count: 0,
513            token_estimate: section_meta.token_estimate,
514            children: Vec::new(),
515        };
516        println!("{}", render_read(&section, &full_content, truncated));
517    }
518
519    Ok(())
520}
521
522struct SectionMeta {
523    id: String,
524    title: String,
525    level: u8,
526    path: Vec<String>,
527    line_start: usize,
528    line_end: usize,
529    token_estimate: usize,
530}
531
532impl From<&Section> for SectionMeta {
533    fn from(s: &Section) -> Self {
534        SectionMeta {
535            id: s.id.clone(),
536            title: s.title.clone(),
537            level: s.level,
538            path: s.path.clone(),
539            line_start: s.line_start,
540            line_end: s.line_end,
541            token_estimate: s.token_estimate,
542        }
543    }
544}
545
546/// Find parent heading line numbers for a section.
547fn find_parent_headings(doc: &crate::model::Document, section: &Section) -> Vec<usize> {
548    let mut parent_map: std::collections::HashMap<String, Option<String>> =
549        std::collections::HashMap::new();
550    build_parent_map(&doc.sections, None, &mut parent_map);
551    let mut chain = Vec::new();
552    let mut current_id = section.id.clone();
553    while let Some(Some(pid)) = parent_map.get(&current_id) {
554        if let Some(parent_sec) = doc.find_section_by_id(pid) {
555            chain.push(parent_sec.line_start);
556        }
557        current_id = pid.clone();
558    }
559    chain.reverse();
560    chain
561}
562
563fn find_unique_section_by_path<'a>(
564    doc: &'a crate::model::Document,
565    path_str: &str,
566) -> Result<&'a Section> {
567    let path = parse_heading_path(path_str);
568    let matches = doc.find_sections_by_path(&path);
569    match matches.len() {
570        0 => Err(anyhow::anyhow!("path not found: {path_str}")),
571        1 => Ok(matches[0]),
572        _ => Err(errors::ambiguous_path(path_str, &matches)),
573    }
574}
575
576fn parse_heading_path(path: &str) -> Vec<String> {
577    let mut parts = Vec::new();
578    let mut current = String::new();
579    let mut escaped = false;
580
581    for ch in path.chars() {
582        if escaped {
583            current.push(ch);
584            escaped = false;
585            continue;
586        }
587
588        match ch {
589            '\\' => escaped = true,
590            '>' => {
591                let part = current.trim();
592                if !part.is_empty() {
593                    parts.push(part.to_string());
594                }
595                current.clear();
596            }
597            _ => current.push(ch),
598        }
599    }
600
601    let part = current.trim();
602    if !part.is_empty() {
603        parts.push(part.to_string());
604    }
605
606    parts
607}
608
609fn build_parent_map(
610    sections: &[Section],
611    parent_id: Option<String>,
612    map: &mut std::collections::HashMap<String, Option<String>>,
613) {
614    for section in sections {
615        map.insert(section.id.clone(), parent_id.clone());
616        build_parent_map(&section.children, Some(section.id.clone()), map);
617    }
618}
619
620fn cmd_search(args: SearchArgs) -> Result<()> {
621    let mut results = search_files(
622        &args.path,
623        &args.query,
624        args.case_sensitive,
625        args.regex,
626        args.max_results,
627        args.context_lines,
628    )?;
629
630    if args.content || args.preview.is_some() || args.max_tokens.is_some() {
631        enrich_search_results(&mut results, args.content, args.preview)?;
632    }
633
634    if let Some(max_tokens) = args.max_tokens {
635        let mut kept = Vec::new();
636        let mut total_tokens = 0usize;
637        for result in results {
638            let item_tokens = if args.content {
639                result
640                    .body
641                    .as_ref()
642                    .map(|body| estimate_tokens(body))
643                    .unwrap_or(result.token_estimate)
644            } else if let Some(preview) = &result.preview {
645                estimate_tokens(preview)
646            } else {
647                result.token_estimate
648            };
649            if total_tokens + item_tokens > max_tokens {
650                break;
651            }
652            total_tokens += item_tokens;
653            kept.push(result);
654        }
655        results = kept;
656    }
657
658    if args.json {
659        let output = SearchJsonOutput {
660            schema_version: 1,
661            query: args.query,
662            root: args.path,
663            results: results
664                .iter()
665                .map(|r| SearchJsonResult {
666                    path: r.path.clone(),
667                    section_id: r.section_id.clone(),
668                    section_title: r.section_title.clone(),
669                    section_path: r.section_path.clone(),
670                    line_start: r.line_start,
671                    line_end: r.line_end,
672                    token_estimate: r.token_estimate,
673                    match_count: r.match_count,
674                    body: r.body.clone(),
675                    preview: r.preview.clone(),
676                    snippets: r
677                        .snippets
678                        .iter()
679                        .map(|s| SearchJsonSnippet {
680                            line_start: s.line_start,
681                            line_end: s.line_end,
682                            text: s.text.clone(),
683                        })
684                        .collect(),
685                })
686                .collect(),
687        };
688        println!("{}", serde_json::to_string_pretty(&output)?);
689    } else {
690        let file_sections = build_file_sections_map(&results);
691        println!("{}", render_search(&results, args.content, &file_sections));
692    }
693
694    Ok(())
695}
696
697fn build_file_sections_map(results: &[crate::render::SearchResult]) -> FileSectionsMap {
698    let unique_files: std::collections::HashSet<&str> =
699        results.iter().map(|r| r.path.as_str()).collect();
700    let mut map = FileSectionsMap::new();
701    for path in unique_files {
702        if let Ok(summaries) = get_doc_section_summaries(path) {
703            map.insert(path.to_string(), summaries);
704        }
705    }
706    map
707}
708
709#[derive(Clone, Serialize)]
710struct ScoutCandidate {
711    path: String,
712    section_id: String,
713    score: i32,
714    reason: String,
715}
716
717struct ScoutHighlight {
718    score: i32,
719    path: String,
720    section_id: String,
721    line_no: usize,
722    line: String,
723}
724
725fn cmd_scout(args: ScoutArgs) -> Result<()> {
726    let queries = scout_queries(&args.question);
727    let mut candidates: Vec<ScoutCandidate> = Vec::new();
728    let per_query_results = (args.max_sections * 3).max(args.max_sections).min(60);
729
730    for query in &queries {
731        let results = search_files(&args.path, query, false, false, per_query_results, 2)?;
732        for result in results {
733            let query_tokens = signal_tokens(query);
734            let normalized_path = normalize_for_match(&result.path);
735            let path_quality_score = scout_path_quality_score(&result.path);
736            let path_hits = query_tokens
737                .iter()
738                .filter(|token| normalized_path.contains(&normalize_for_match(token)))
739                .count() as i32;
740            let path_boost = if path_hits > 0 {
741                180 + path_hits * 45
742            } else {
743                0
744            };
745            let broad_penalty = if path_hits == 0 && query_tokens.len() <= 1 {
746                60
747            } else {
748                0
749            };
750            candidates.push(ScoutCandidate {
751                path: result.path,
752                section_id: result.section_id,
753                score: 100
754                    + path_boost
755                    + path_quality_score
756                    + result.match_count as i32 * 5
757                    + scout_heading_score(
758                        &result.section_path,
759                        &result.section_title,
760                        &args.question,
761                    )
762                    - result.token_estimate as i32 / 250
763                    - broad_penalty,
764                reason: format!("content match: {query}"),
765            });
766        }
767    }
768
769    add_lexical_scout_candidates(
770        &args.path,
771        &args.question,
772        &mut candidates,
773        args.max_sections * 4,
774    )?;
775    add_path_match_candidates(&args.path, &args.question, &mut candidates)?;
776    add_named_target_candidates(&args.path, &args.question, &mut candidates)?;
777    add_neighbor_candidates(&mut candidates)?;
778
779    candidates.sort_by(|lhs, rhs| {
780        rhs.score
781            .cmp(&lhs.score)
782            .then(lhs.path.cmp(&rhs.path))
783            .then(lhs.section_id.cmp(&rhs.section_id))
784    });
785    dedupe_scout_candidates(&mut candidates);
786    prune_parent_scout_candidates(&mut candidates);
787    let candidate_pool = candidates.clone();
788    diversify_scout_candidates(&mut candidates, args.max_sections, &args.question);
789    ensure_named_target_coverage(
790        &mut candidates,
791        &candidate_pool,
792        args.max_sections,
793        &args.question,
794    )?;
795    candidates.truncate(args.max_sections);
796
797    let mut out = String::new();
798    out.push_str(&format!(
799        "[scout] question=\"{}\" budget=~{}t candidates={}\n",
800        args.question,
801        args.max_tokens,
802        candidates.len()
803    ));
804    if !queries.is_empty() {
805        out.push_str(&format!("[queries] {}\n", queries.join(" | ")));
806    }
807    out.push('\n');
808    let evidence_candidates = order_scout_evidence(
809        focused_scout_candidates(&candidates, &args.question),
810        &args.question,
811    )?;
812    let map_candidates = if evidence_candidates.len() < candidates.len() {
813        &evidence_candidates
814    } else {
815        &candidates
816    };
817    render_scout_file_maps(&mut out, map_candidates, args.max_files)?;
818    if !evidence_candidates.is_empty() && evidence_candidates.len() < candidates.len() {
819        out.push_str(&format!("\n[focus] {}\n", evidence_candidates[0].path));
820    }
821    out.push_str("\n[highlights]\n");
822    render_scout_highlights(&mut out, &evidence_candidates, &args.question, 10)?;
823    out.push_str("\n[evidence]\n");
824    render_scout_evidence(
825        &mut out,
826        &evidence_candidates,
827        &args.question,
828        args.max_tokens,
829    )?;
830
831    if args.json {
832        let output = ScoutJsonOutput {
833            schema_version: 1,
834            root: args.path,
835            question: args.question,
836            token_budget: args.max_tokens,
837            candidate_count: candidates.len(),
838            queries,
839            candidates: evidence_candidates,
840            rendered_text: out,
841        };
842        println!("{}", serde_json::to_string_pretty(&output)?);
843    } else {
844        print!("{out}");
845    }
846    Ok(())
847}
848
849fn scout_queries(question: &str) -> Vec<String> {
850    let mut queries = Vec::new();
851    let phrases = extract_capitalized_phrases(question);
852    for phrase in phrases {
853        let cleaned = clean_query_phrase(&phrase);
854        push_unique_query(&mut queries, cleaned.clone());
855        if cleaned.contains('-') {
856            push_unique_query(&mut queries, cleaned.replace('-', " "));
857        }
858    }
859
860    for phrase in scout_semantic_queries(question) {
861        push_unique_query(&mut queries, phrase);
862    }
863
864    let signal_tokens = signal_tokens(question);
865    for token in signal_tokens.into_iter().take(8) {
866        if token.len() >= 8
867            || token.contains('-')
868            || token.contains('_')
869            || token.chars().any(|c| c.is_ascii_digit())
870        {
871            push_unique_query(&mut queries, token);
872        }
873    }
874
875    if queries.is_empty() {
876        push_unique_query(&mut queries, question.to_string());
877    }
878    queries.truncate(12);
879    queries
880}
881
882fn add_lexical_scout_candidates(
883    root: &str,
884    question: &str,
885    candidates: &mut Vec<ScoutCandidate>,
886    limit: usize,
887) -> Result<()> {
888    let query_terms = lexical_query_terms(question);
889    if query_terms.is_empty() {
890        return Ok(());
891    }
892
893    struct LexicalSection {
894        path: String,
895        section_id: String,
896        section_path: Vec<String>,
897        section_title: String,
898        token_estimate: usize,
899        len: usize,
900        terms: HashMap<String, usize>,
901        title_terms: HashSet<String>,
902        path_terms: HashSet<String>,
903    }
904
905    let files = discover_markdown_files(root)?;
906    let mut sections = Vec::new();
907    let mut df: HashMap<String, usize> = HashMap::new();
908    let mut total_len = 0usize;
909
910    for file in files {
911        let parsed = load_markdown(&file)?;
912        let path_terms = lexical_terms(&file).into_iter().collect::<HashSet<_>>();
913        for section in flatten_doc_sections(&parsed.doc.sections) {
914            if section.title == "<preamble>" {
915                continue;
916            }
917            let content = section.extract_content(&parsed.lines).join("\n");
918            let title_text = section.path.join(" ");
919            let mut terms = lexical_terms(&format!("{title_text}\n{content}"));
920            if terms.is_empty() {
921                continue;
922            }
923            let title_terms = lexical_terms(&title_text)
924                .into_iter()
925                .collect::<HashSet<_>>();
926            let mut tf = HashMap::new();
927            let mut unique = HashSet::new();
928            for term in terms.drain(..) {
929                *tf.entry(term.clone()).or_insert(0) += 1;
930                unique.insert(term);
931            }
932            for term in unique {
933                *df.entry(term).or_insert(0) += 1;
934            }
935            let len = tf.values().sum::<usize>().max(1);
936            total_len += len;
937            sections.push(LexicalSection {
938                path: file.clone(),
939                section_id: section.id.clone(),
940                section_path: section.path.clone(),
941                section_title: section.title.clone(),
942                token_estimate: section.token_estimate,
943                len,
944                terms: tf,
945                title_terms,
946                path_terms: path_terms.clone(),
947            });
948        }
949    }
950
951    let n = sections.len();
952    if n == 0 {
953        return Ok(());
954    }
955    let avg_len = total_len as f64 / n as f64;
956    let unique_query_terms = query_terms.into_iter().collect::<BTreeSet<_>>();
957    let mut scored = Vec::new();
958
959    for section in sections {
960        let mut score = 0.0f64;
961        let mut matched = 0usize;
962        for term in &unique_query_terms {
963            let tf = section.terms.get(term).copied().unwrap_or(0) as f64;
964            let title_hit = section.title_terms.contains(term);
965            let path_hit = section.path_terms.contains(term);
966            if tf == 0.0 && !title_hit && !path_hit {
967                continue;
968            }
969            matched += 1;
970            let doc_freq = df.get(term).copied().unwrap_or(1) as f64;
971            let idf = ((n as f64 - doc_freq + 0.5) / (doc_freq + 0.5) + 1.0).ln();
972            let k1 = 1.2;
973            let b = 0.75;
974            let bm25 = if tf > 0.0 {
975                idf * (tf * (k1 + 1.0)) / (tf + k1 * (1.0 - b + b * section.len as f64 / avg_len))
976            } else {
977                0.0
978            };
979            score += bm25;
980            if title_hit {
981                score += idf * 1.8;
982            }
983            if path_hit {
984                score += idf * 1.1;
985            }
986        }
987        if matched == 0 {
988            continue;
989        }
990        let coverage = matched as f64 / unique_query_terms.len().max(1) as f64;
991        let structural_prior =
992            scout_heading_score(&section.section_path, &section.section_title, question) as f64
993                / 25.0;
994        let path_prior = scout_path_quality_score(&section.path) as f64 / 20.0;
995        let authority_prior =
996            scout_source_authority_score(&section.path, &section.section_path, "", question) as f64
997                / 15.0;
998        let compactness = -(section.token_estimate as f64 / 900.0);
999        let final_score = (score * (0.75 + coverage)
1000            + structural_prior
1001            + path_prior
1002            + authority_prior
1003            + compactness)
1004            * 100.0;
1005        scored.push((
1006            final_score.round() as i32,
1007            section.path,
1008            section.section_id,
1009            matched,
1010        ));
1011    }
1012
1013    scored.sort_by(|lhs, rhs| {
1014        rhs.0
1015            .cmp(&lhs.0)
1016            .then(rhs.3.cmp(&lhs.3))
1017            .then(lhs.1.cmp(&rhs.1))
1018            .then(lhs.2.cmp(&rhs.2))
1019    });
1020    for (score, path, section_id, matched) in scored.into_iter().take(limit.max(1)) {
1021        candidates.push(ScoutCandidate {
1022            path,
1023            section_id,
1024            score,
1025            reason: format!("lexical relevance: {matched} query terms"),
1026        });
1027    }
1028    Ok(())
1029}
1030
1031fn lexical_query_terms(text: &str) -> Vec<String> {
1032    let mut out = Vec::new();
1033    for token in lexical_terms(text) {
1034        if token.len() >= 3
1035            && !matches!(
1036                token.as_str(),
1037                "answer" | "doc" | "docs" | "file" | "markdown" | "readme" | "section"
1038            )
1039            && !out.contains(&token)
1040        {
1041            out.push(token);
1042        }
1043    }
1044    out
1045}
1046
1047fn lexical_terms(text: &str) -> Vec<String> {
1048    text.split(|c: char| !c.is_ascii_alphanumeric() && c != '_' && c != '-')
1049        .filter_map(normalize_lexical_term)
1050        .collect()
1051}
1052
1053fn normalize_lexical_term(raw: &str) -> Option<String> {
1054    let mut token = raw.trim().trim_matches('-').to_ascii_lowercase();
1055    if token.len() < 3 || is_stopword(&token) {
1056        return None;
1057    }
1058    if token.chars().all(|c| c.is_ascii_digit()) {
1059        return Some(token);
1060    }
1061    for suffix in ["ing", "edly", "edly", "ed", "es", "s"] {
1062        if token.len() > suffix.len() + 3 && token.ends_with(suffix) {
1063            token.truncate(token.len() - suffix.len());
1064            break;
1065        }
1066    }
1067    Some(token)
1068}
1069
1070fn scout_heading_score(section_path: &[String], section_title: &str, question: &str) -> i32 {
1071    let question_l = question.to_ascii_lowercase();
1072    let heading_l = format!("{} {}", section_path.join(" "), section_title).to_ascii_lowercase();
1073    let mut score = 0;
1074
1075    for token in signal_tokens(question).iter().take(8) {
1076        if heading_l.contains(&token.to_ascii_lowercase()) {
1077            score += 20;
1078        }
1079    }
1080    for (needle, heading, weight) in [
1081        ("install", "install", 90),
1082        ("command", "install", 45),
1083        ("usage", "usage", 70),
1084        ("example", "example", 55),
1085        ("configure", "configuration", 70),
1086        ("config", "configuration", 70),
1087        ("option", "option", 65),
1088        ("hyperparameter", "hyperparameter", 75),
1089        ("limitation", "limitation", 90),
1090        ("caveat", "caveat", 90),
1091        ("good fit", "for you", 130),
1092        ("compared", "for you", 90),
1093        ("yourself", "for you", 90),
1094        ("proxy", "proxy", 120),
1095        ("external", "external", 45),
1096        ("caveat", "finding", 50),
1097        ("caveat", "bottom line", 35),
1098        ("caveat", "unambiguous", 55),
1099        ("uniformly", "unambiguous", 55),
1100        ("conclude", "conclude", 70),
1101        ("conclude", "bottom line", 65),
1102        ("why", "finding", 35),
1103        ("why", "conclude", 35),
1104        ("analysis", "analysis", 45),
1105        ("failure", "failure", 55),
1106        ("recommend", "recommendation", 95),
1107        ("policy", "recommendation", 65),
1108        ("policy", "policy", 95),
1109        ("privacy", "privacy", 95),
1110        ("mask", "privacy", 75),
1111        ("masking", "privacy", 75),
1112        ("rule", "rule", 90),
1113        ("rules", "rule", 90),
1114        ("counting", "counting", 100),
1115        ("safety", "safety", 100),
1116        ("hazard", "safety", 75),
1117        ("hazard", "hazard", 85),
1118        ("risk", "risk", 80),
1119        ("why", "policy", 70),
1120        ("why", "rule", 70),
1121        ("why", "risk", 65),
1122        ("treat", "policy", 70),
1123        ("treat", "rule", 70),
1124        ("treat", "risk", 65),
1125        ("reflected", "policy", 65),
1126        ("reflection", "policy", 65),
1127        ("glare", "risk", 65),
1128        ("corrupted", "risk", 55),
1129        ("current", "current loader", 90),
1130        ("loader", "current loader", 90),
1131        ("flag", "current loader", 85),
1132        ("flag", "do not use", 75),
1133        ("stale", "do not use", 95),
1134        ("still", "do not use", 70),
1135        ("recommended", "current loader", 85),
1136        ("direction", "recommendation", 45),
1137    ] {
1138        if question_l.contains(needle) && heading_l.contains(heading) {
1139            score += weight;
1140        }
1141    }
1142    if (question_l.contains("hard") || question_l.contains("remains"))
1143        && heading_l.contains("ambiguity")
1144    {
1145        score += 80;
1146    }
1147    for (low_value, penalty) in [
1148        ("license", 70),
1149        ("citation", 80),
1150        ("cite", 80),
1151        ("contact", 55),
1152        ("contribute", 55),
1153        ("acknowledg", 55),
1154    ] {
1155        if heading_l.contains(low_value) && !question_l.contains(low_value) {
1156            score -= penalty;
1157        }
1158    }
1159    score
1160}
1161
1162fn scout_path_quality_score(path: &str) -> i32 {
1163    let stem = Path::new(path)
1164        .file_stem()
1165        .and_then(|name| name.to_str())
1166        .unwrap_or(path)
1167        .to_ascii_lowercase();
1168    let mut score = 0;
1169    for marker in [
1170        "policy",
1171        "runbook",
1172        "guide",
1173        "manual",
1174        "spec",
1175        "reference",
1176        "card",
1177        "schema",
1178        "protocol",
1179    ] {
1180        if stem.contains(marker) {
1181            score += 45;
1182        }
1183    }
1184    for marker in [
1185        "scratch",
1186        "tmp",
1187        "temp",
1188        "draft",
1189        "random",
1190        "copied",
1191        "copy",
1192        "chat",
1193        "conversation",
1194    ] {
1195        if stem.contains(marker) {
1196            score -= 180;
1197        }
1198    }
1199    score
1200}
1201
1202fn scout_source_authority_score(
1203    path: &str,
1204    section_path: &[String],
1205    content: &str,
1206    question: &str,
1207) -> i32 {
1208    let mut score = scout_path_quality_score(path);
1209    let question_l = question.to_ascii_lowercase();
1210    let heading_l = section_path.join(" ").to_ascii_lowercase();
1211    let content_l = content.to_ascii_lowercase();
1212    let combined = format!("{heading_l}\n{content_l}");
1213
1214    for marker in [
1215        "source of truth",
1216        "current",
1217        "locked",
1218        "policy",
1219        "rule",
1220        "spec",
1221        "reference",
1222        "runbook",
1223        "known risk",
1224        "export notes",
1225        "current loader",
1226        "annotation policy",
1227    ] {
1228        if combined.contains(marker) {
1229            score += 28;
1230        }
1231    }
1232
1233    let asks_for_informal = [
1234        "scratch",
1235        "draft",
1236        "old note",
1237        "old notes",
1238        "stale",
1239        "historical",
1240        "outdated",
1241        "do not use",
1242    ]
1243    .iter()
1244    .any(|needle| question_l.contains(needle));
1245    let low_authority_multiplier = if asks_for_informal { 1 } else { 2 };
1246    for (marker, penalty) in [
1247        ("not authoritative", 180),
1248        ("maybe stale", 140),
1249        ("random copied", 120),
1250        ("todo maybe", 110),
1251        ("scratch note", 100),
1252        ("copied wrong", 80),
1253        ("old notes disagree", 75),
1254    ] {
1255        if combined.contains(marker) {
1256            score -= penalty * low_authority_multiplier;
1257        }
1258    }
1259
1260    score
1261}
1262
1263fn wants_multi_file_evidence(question: &str) -> bool {
1264    let question_l = question.to_ascii_lowercase();
1265    [
1266        " across ",
1267        " between ",
1268        " compare ",
1269        " compares ",
1270        " comparing ",
1271        " contrast ",
1272        " both ",
1273        " each ",
1274        " multiple ",
1275        " multi-file ",
1276    ]
1277    .iter()
1278    .any(|needle| format!(" {question_l} ").contains(needle))
1279}
1280
1281fn scout_semantic_queries(question: &str) -> Vec<String> {
1282    let question_l = question.to_ascii_lowercase();
1283    let mut queries = Vec::new();
1284
1285    if question_l.contains("external") {
1286        queries.push("external".to_string());
1287        if question_l.contains("proxy") {
1288            queries.push("proxy".to_string());
1289        }
1290        if question_l.contains("panel") {
1291            queries.push("panel".to_string());
1292            queries.push("agreement".to_string());
1293        }
1294    }
1295    if question_l.contains("caveat")
1296        || question_l.contains("not specify")
1297        || question_l.contains("does not specify")
1298        || question_l.contains("uniformly")
1299    {
1300        queries.push("caveat".to_string());
1301        queries.push("not uniformly".to_string());
1302        queries.push("not specified".to_string());
1303    }
1304    if question_l.contains("compare")
1305        || question_l.contains("compared")
1306        || question_l.contains("difference")
1307        || question_l.contains("changed")
1308    {
1309        queries.push("compared".to_string());
1310        queries.push("difference".to_string());
1311    }
1312    if question_l.contains("best") && question_l.contains("candidate") {
1313        queries.push("best candidate".to_string());
1314    }
1315    if question_l.contains("failure") && question_l.contains("analysis") {
1316        queries.push("failure analysis".to_string());
1317    }
1318    if question_l.contains("recommend") || question_l.contains("policy direction") {
1319        queries.push("recommendation".to_string());
1320    }
1321    if question_l.contains("why")
1322        || question_l.contains("rule")
1323        || question_l.contains("policy")
1324        || question_l.contains("privacy")
1325        || question_l.contains("safety")
1326        || question_l.contains("hazard")
1327        || question_l.contains("counting")
1328        || question_l.contains("treat")
1329        || question_l.contains("reflected")
1330        || question_l.contains("reflection")
1331        || question_l.contains("glare")
1332        || question_l.contains("corrupted")
1333    {
1334        queries.push("policy".to_string());
1335        queries.push("rule".to_string());
1336        queries.push("known risk".to_string());
1337    }
1338    if question_l.contains("stale")
1339        || question_l.contains("current")
1340        || question_l.contains("recommended")
1341        || question_l.contains("still")
1342        || question_l.contains("flag")
1343        || question_l.contains("loader")
1344    {
1345        queries.push("current loader".to_string());
1346        queries.push("do not use".to_string());
1347        queries.push("stale flag".to_string());
1348    }
1349
1350    queries
1351}
1352
1353fn push_unique_query(queries: &mut Vec<String>, query: String) {
1354    let query = query
1355        .trim()
1356        .trim_matches(|c: char| !c.is_alphanumeric())
1357        .to_string();
1358    if query.len() < 3 {
1359        return;
1360    }
1361    if is_stopword(&query) {
1362        return;
1363    }
1364    if !queries
1365        .iter()
1366        .any(|existing| existing.eq_ignore_ascii_case(&query))
1367    {
1368        queries.push(query);
1369    }
1370}
1371
1372fn clean_query_phrase(phrase: &str) -> String {
1373    phrase
1374        .split_whitespace()
1375        .filter_map(|token| {
1376            let cleaned =
1377                token.trim_matches(|c: char| !c.is_alphanumeric() && c != '-' && c != '/');
1378            if cleaned.eq_ignore_ascii_case("readme") || is_stopword(cleaned) {
1379                None
1380            } else {
1381                Some(cleaned.to_string())
1382            }
1383        })
1384        .collect::<Vec<_>>()
1385        .join(" ")
1386}
1387
1388fn extract_capitalized_phrases(text: &str) -> Vec<String> {
1389    let mut phrases = Vec::new();
1390    let mut current: Vec<String> = Vec::new();
1391    for raw in text.split_whitespace() {
1392        let word = raw.trim_matches(|c: char| !c.is_alphanumeric() && c != '-' && c != '/');
1393        let is_signal = word
1394            .chars()
1395            .next()
1396            .is_some_and(|c| c.is_ascii_uppercase() || c.is_ascii_digit())
1397            || word.chars().any(|c| c.is_ascii_digit())
1398            || word.contains('-')
1399            || word.contains('/');
1400        if is_signal && word.len() > 1 {
1401            current.push(word.to_string());
1402            if raw.ends_with(',') || raw.ends_with(';') {
1403                if current.len() >= 2 || current[0].len() >= 5 {
1404                    phrases.push(current.join(" "));
1405                }
1406                current.clear();
1407            }
1408        } else if !current.is_empty() {
1409            if current.len() >= 2 || current[0].len() >= 5 {
1410                phrases.push(current.join(" "));
1411            }
1412            current.clear();
1413        }
1414    }
1415    if !current.is_empty() && (current.len() >= 2 || current[0].len() >= 5) {
1416        phrases.push(current.join(" "));
1417    }
1418    phrases
1419}
1420
1421fn signal_tokens(text: &str) -> Vec<String> {
1422    let mut out = Vec::new();
1423    for raw in text.split(|c: char| !c.is_ascii_alphanumeric() && c != '_' && c != '-') {
1424        let token = raw.trim().trim_matches('-');
1425        if token.len() < 3 {
1426            continue;
1427        }
1428        if is_stopword(token) {
1429            continue;
1430        }
1431        if !out
1432            .iter()
1433            .any(|existing: &String| existing.eq_ignore_ascii_case(token))
1434        {
1435            out.push(token.to_string());
1436        }
1437    }
1438    out
1439}
1440
1441fn is_stopword(token: &str) -> bool {
1442    matches!(
1443        token.to_ascii_lowercase().as_str(),
1444        "about"
1445            | "according"
1446            | "added"
1447            | "after"
1448            | "against"
1449            | "answer"
1450            | "are"
1451            | "across"
1452            | "before"
1453            | "between"
1454            | "can"
1455            | "compared"
1456            | "complete"
1457            | "does"
1458            | "during"
1459            | "explain"
1460            | "fit"
1461            | "for"
1462            | "from"
1463            | "given"
1464            | "good"
1465            | "has"
1466            | "have"
1467            | "how"
1468            | "in"
1469            | "instead"
1470            | "into"
1471            | "its"
1472            | "list"
1473            | "provide"
1474            | "readme"
1475            | "row"
1476            | "run"
1477            | "should"
1478            | "than"
1479            | "that"
1480            | "the"
1481            | "their"
1482            | "there"
1483            | "these"
1484            | "they"
1485            | "this"
1486            | "toolbox"
1487            | "using"
1488            | "user"
1489            | "wants"
1490            | "what"
1491            | "when"
1492            | "where"
1493            | "which"
1494            | "while"
1495            | "with"
1496            | "without"
1497            | "would"
1498            | "yourself"
1499            | "and"
1500    )
1501}
1502
1503fn add_path_match_candidates(
1504    root: &str,
1505    question: &str,
1506    candidates: &mut Vec<ScoutCandidate>,
1507) -> Result<()> {
1508    let files = discover_markdown_files(root)?;
1509    let question_tokens = signal_tokens(question);
1510    if question_tokens.is_empty() {
1511        return Ok(());
1512    }
1513    for path in files {
1514        let normalized = normalize_for_match(&path);
1515        let mut hits = 0;
1516        for token in &question_tokens {
1517            if normalized.contains(&normalize_for_match(token)) {
1518                hits += 1;
1519            }
1520        }
1521        let source_like_path = scout_path_quality_score(&path) > 0;
1522        let policy_or_multi_question = wants_multi_file_evidence(question)
1523            || question.to_ascii_lowercase().contains("why")
1524            || question.to_ascii_lowercase().contains("rule")
1525            || question.to_ascii_lowercase().contains("policy")
1526            || question.to_ascii_lowercase().contains("safety")
1527            || question.to_ascii_lowercase().contains("privacy");
1528        let required_hits = if source_like_path && policy_or_multi_question {
1529            1
1530        } else {
1531            2
1532        };
1533        if hits < required_hits {
1534            continue;
1535        }
1536        let parsed = load_markdown(&path)?;
1537        for section in parsed.doc.sections.iter().take(2) {
1538            candidates.push(ScoutCandidate {
1539                path: path.clone(),
1540                section_id: section.id.clone(),
1541                score: 240 + hits * 30,
1542                reason: "path/name match".to_string(),
1543            });
1544        }
1545        if let Some(best) = best_named_section(&parsed.doc.sections, question) {
1546            candidates.push(ScoutCandidate {
1547                path: path.clone(),
1548                section_id: best.id.clone(),
1549                score: 300
1550                    + hits * 45
1551                    + scout_path_quality_score(&path)
1552                    + scout_heading_score(&best.path, &best.title, question),
1553                reason: "path/name match + relevant heading".to_string(),
1554            });
1555        }
1556    }
1557    Ok(())
1558}
1559
1560fn add_named_target_candidates(
1561    root: &str,
1562    question: &str,
1563    candidates: &mut Vec<ScoutCandidate>,
1564) -> Result<()> {
1565    let targets = target_phrases_from_question(question);
1566    if targets.len() < 2 {
1567        return Ok(());
1568    }
1569
1570    for target in targets {
1571        let results = search_files(root, &target, false, false, 12, 2)?;
1572        let mut seen_files = HashSet::new();
1573        for result in results.into_iter().take(8) {
1574            let content_authority =
1575                scout_source_authority_score(&result.path, &result.section_path, "", question);
1576            candidates.push(ScoutCandidate {
1577                path: result.path.clone(),
1578                section_id: result.section_id.clone(),
1579                score: 620
1580                    + content_authority
1581                    + result.match_count as i32 * 20
1582                    + scout_heading_score(&result.section_path, &result.section_title, question),
1583                reason: format!("named target: {target}"),
1584            });
1585
1586            if seen_files.insert(result.path.clone()) {
1587                let parsed = load_markdown(&result.path)?;
1588                if let Some(best) = best_named_section(&parsed.doc.sections, question) {
1589                    candidates.push(ScoutCandidate {
1590                        path: result.path.clone(),
1591                        section_id: best.id.clone(),
1592                        score: 760
1593                            + scout_source_authority_score(&result.path, &best.path, "", question)
1594                            + scout_heading_score(&best.path, &best.title, question),
1595                        reason: format!("named target + relevant heading: {target}"),
1596                    });
1597                }
1598            }
1599        }
1600    }
1601    Ok(())
1602}
1603
1604fn normalize_for_match(text: &str) -> String {
1605    text.chars()
1606        .map(|c| {
1607            if c.is_ascii_alphanumeric() {
1608                c.to_ascii_lowercase()
1609            } else {
1610                ' '
1611            }
1612        })
1613        .collect::<String>()
1614}
1615
1616fn best_named_section<'a>(sections: &'a [Section], question: &str) -> Option<&'a Section> {
1617    let mut best: Option<(&Section, i32)> = None;
1618    score_named_sections(sections, question, &mut best);
1619    best.map(|(section, _)| section)
1620}
1621
1622fn score_named_sections<'a>(
1623    sections: &'a [Section],
1624    question: &str,
1625    best: &mut Option<(&'a Section, i32)>,
1626) {
1627    for section in sections {
1628        let title = section.title.to_ascii_lowercase();
1629        let mut score = 0;
1630        for (needle, weight) in [
1631            ("usage", 30),
1632            ("install", 30),
1633            ("quick", 20),
1634            ("example", 20),
1635            ("configuration", 20),
1636            ("training", 20),
1637            ("preprocess", 20),
1638            ("limitation", 25),
1639            ("caveat", 25),
1640            ("documentation", 10),
1641            ("overview", 10),
1642            ("policy", 120),
1643            ("privacy", 110),
1644            ("rule", 115),
1645            ("counting", 110),
1646            ("safety", 115),
1647            ("risk", 90),
1648            ("current", 75),
1649            ("loader", 75),
1650            ("stale", 75),
1651            ("do not use", 90),
1652        ] {
1653            if title.contains(needle) {
1654                score += weight;
1655            }
1656        }
1657        for token in signal_tokens(question).iter().take(8) {
1658            if title.contains(&token.to_ascii_lowercase()) {
1659                score += 25;
1660            }
1661        }
1662        if score > 0 && best.is_none_or(|(_, best_score)| score > best_score) {
1663            *best = Some((section, score));
1664        }
1665        score_named_sections(&section.children, question, best);
1666    }
1667}
1668
1669fn add_neighbor_candidates(candidates: &mut Vec<ScoutCandidate>) -> Result<()> {
1670    let originals = candidates.to_vec();
1671    let mut by_file: HashMap<String, HashSet<String>> = HashMap::new();
1672    for candidate in &originals {
1673        by_file
1674            .entry(candidate.path.clone())
1675            .or_default()
1676            .insert(candidate.section_id.clone());
1677    }
1678    for (path, ids) in by_file {
1679        let parsed = load_markdown(&path)?;
1680        let flat = flatten_doc_sections(&parsed.doc.sections);
1681        for (idx, section) in flat.iter().enumerate() {
1682            if !ids.contains(&section.id) {
1683                continue;
1684            }
1685            let start = idx.saturating_sub(1);
1686            let end = (idx + 1).min(flat.len().saturating_sub(1));
1687            for neighbor in flat.iter().take(end + 1).skip(start) {
1688                if neighbor.id == section.id {
1689                    continue;
1690                }
1691                candidates.push(ScoutCandidate {
1692                    path: path.clone(),
1693                    section_id: neighbor.id.clone(),
1694                    score: 70,
1695                    reason: format!("neighbor of §{}", section.id),
1696                });
1697            }
1698        }
1699    }
1700    Ok(())
1701}
1702
1703fn flatten_doc_sections(sections: &[Section]) -> Vec<&Section> {
1704    let mut out = Vec::new();
1705    collect_flat_sections(sections, &mut out);
1706    out.sort_by_key(|section| section.line_start);
1707    out
1708}
1709
1710fn collect_flat_sections<'a>(sections: &'a [Section], out: &mut Vec<&'a Section>) {
1711    for section in sections {
1712        out.push(section);
1713        collect_flat_sections(&section.children, out);
1714    }
1715}
1716
1717fn dedupe_scout_candidates(candidates: &mut Vec<ScoutCandidate>) {
1718    let mut seen = HashSet::new();
1719    candidates
1720        .retain(|candidate| seen.insert(format!("{}::{}", candidate.path, candidate.section_id)));
1721}
1722
1723fn prune_parent_scout_candidates(candidates: &mut Vec<ScoutCandidate>) {
1724    let ids_by_file: HashMap<String, Vec<String>> =
1725        candidates
1726            .iter()
1727            .fold(HashMap::new(), |mut by_file, candidate| {
1728                by_file
1729                    .entry(candidate.path.clone())
1730                    .or_default()
1731                    .push(candidate.section_id.clone());
1732                by_file
1733            });
1734
1735    candidates.retain(|candidate| {
1736        !ids_by_file.get(&candidate.path).is_some_and(|ids| {
1737            ids.iter()
1738                .any(|id| is_child_section_id(&candidate.section_id, id))
1739        })
1740    });
1741}
1742
1743fn diversify_scout_candidates(
1744    candidates: &mut Vec<ScoutCandidate>,
1745    max_sections: usize,
1746    question: &str,
1747) {
1748    if !wants_multi_file_evidence(question) || candidates.len() <= max_sections {
1749        return;
1750    }
1751
1752    let mut targets = target_phrases_from_question(question);
1753    if targets.len() < 2 {
1754        targets = target_tokens_from_question(question);
1755    }
1756    if let Some(selected) =
1757        target_coverage_scout_candidates(candidates, max_sections, &targets, question)
1758    {
1759        *candidates = selected;
1760        return;
1761    }
1762
1763    let mut selected = Vec::new();
1764    let mut selected_keys = HashSet::new();
1765    let mut per_file_count: HashMap<String, usize> = HashMap::new();
1766
1767    for candidate in candidates.iter() {
1768        if selected.len() >= max_sections {
1769            break;
1770        }
1771        let count = per_file_count.get(&candidate.path).copied().unwrap_or(0);
1772        if count >= 2 {
1773            continue;
1774        }
1775        let key = format!("{}::{}", candidate.path, candidate.section_id);
1776        if selected_keys.insert(key) {
1777            selected.push(candidate.clone());
1778            *per_file_count.entry(candidate.path.clone()).or_default() += 1;
1779        }
1780    }
1781
1782    for candidate in candidates.iter() {
1783        if selected.len() >= max_sections {
1784            break;
1785        }
1786        let key = format!("{}::{}", candidate.path, candidate.section_id);
1787        if selected_keys.insert(key) {
1788            selected.push(candidate.clone());
1789        }
1790    }
1791
1792    if selected.len() >= 2 {
1793        *candidates = selected;
1794    }
1795}
1796
1797fn target_coverage_scout_candidates(
1798    candidates: &[ScoutCandidate],
1799    max_sections: usize,
1800    targets: &[String],
1801    question: &str,
1802) -> Option<Vec<ScoutCandidate>> {
1803    if targets.len() < 2 || max_sections == 0 {
1804        return None;
1805    }
1806
1807    let mut cache: HashMap<String, crate::parse::ParsedMarkdown> = HashMap::new();
1808    let mut selected = Vec::new();
1809    let mut selected_keys = HashSet::new();
1810    let mut covered_targets: HashSet<String> = HashSet::new();
1811    let mut per_file_count: HashMap<String, usize> = HashMap::new();
1812
1813    while selected.len() < max_sections {
1814        let mut best_idx = None;
1815        let mut best_score = i32::MIN;
1816        let mut best_new_targets = HashSet::new();
1817
1818        for (idx, candidate) in candidates.iter().enumerate() {
1819            let key = format!("{}::{}", candidate.path, candidate.section_id);
1820            if selected_keys.contains(&key) {
1821                continue;
1822            }
1823            let Ok((target_hits, authority)) =
1824                scout_candidate_target_hits(candidate, targets, question, &mut cache)
1825            else {
1826                continue;
1827            };
1828            let new_targets = target_hits
1829                .difference(&covered_targets)
1830                .cloned()
1831                .collect::<HashSet<_>>();
1832            if new_targets.is_empty() && covered_targets.len() < targets.len() {
1833                continue;
1834            }
1835            let same_file_penalty =
1836                per_file_count.get(&candidate.path).copied().unwrap_or(0) as i32 * 160;
1837            let coverage_gain = new_targets.len() as i32 * 420 + target_hits.len() as i32 * 35;
1838            let score = candidate.score + authority + coverage_gain - same_file_penalty;
1839            if score > best_score {
1840                best_score = score;
1841                best_idx = Some(idx);
1842                best_new_targets = new_targets;
1843            }
1844        }
1845
1846        let Some(idx) = best_idx else {
1847            break;
1848        };
1849        let candidate = candidates[idx].clone();
1850        let key = format!("{}::{}", candidate.path, candidate.section_id);
1851        selected_keys.insert(key);
1852        for target in best_new_targets {
1853            covered_targets.insert(target);
1854        }
1855        *per_file_count.entry(candidate.path.clone()).or_default() += 1;
1856        selected.push(candidate);
1857
1858        if covered_targets.len() >= targets.len() {
1859            break;
1860        }
1861    }
1862
1863    if selected.len() < 2 {
1864        return None;
1865    }
1866
1867    for candidate in candidates {
1868        if selected.len() >= max_sections {
1869            break;
1870        }
1871        let key = format!("{}::{}", candidate.path, candidate.section_id);
1872        if selected_keys.contains(&key) {
1873            continue;
1874        }
1875        let Ok((_, authority)) =
1876            scout_candidate_target_hits(candidate, targets, question, &mut cache)
1877        else {
1878            continue;
1879        };
1880        if authority < -250 && selected.len() >= 2 {
1881            continue;
1882        }
1883        selected_keys.insert(key);
1884        selected.push(candidate.clone());
1885    }
1886
1887    Some(selected)
1888}
1889
1890fn ensure_named_target_coverage(
1891    selected: &mut Vec<ScoutCandidate>,
1892    pool: &[ScoutCandidate],
1893    max_sections: usize,
1894    question: &str,
1895) -> Result<()> {
1896    let targets = target_phrases_from_question(question);
1897    if targets.len() < 2 || max_sections == 0 {
1898        return Ok(());
1899    }
1900
1901    let mut cache: HashMap<String, crate::parse::ParsedMarkdown> = HashMap::new();
1902    let mut selected_keys = selected
1903        .iter()
1904        .map(|candidate| format!("{}::{}", candidate.path, candidate.section_id))
1905        .collect::<HashSet<_>>();
1906    let mut covered = HashSet::new();
1907    for candidate in selected.iter() {
1908        let (hits, _) = scout_candidate_target_hits(candidate, &targets, question, &mut cache)?;
1909        covered.extend(hits);
1910    }
1911
1912    for target in targets {
1913        if covered.contains(&target) {
1914            continue;
1915        }
1916
1917        let mut best: Option<(ScoutCandidate, i32)> = None;
1918        for candidate in pool {
1919            let key = format!("{}::{}", candidate.path, candidate.section_id);
1920            if selected_keys.contains(&key) {
1921                continue;
1922            }
1923            let (hits, authority) = scout_candidate_target_hits(
1924                candidate,
1925                std::slice::from_ref(&target),
1926                question,
1927                &mut cache,
1928            )?;
1929            if hits.is_empty() {
1930                continue;
1931            }
1932            let score = candidate.score + authority;
1933            if best
1934                .as_ref()
1935                .is_none_or(|(_, best_score)| score > *best_score)
1936            {
1937                best = Some((candidate.clone(), score));
1938            }
1939        }
1940
1941        let Some((candidate, _)) = best else {
1942            continue;
1943        };
1944        let key = format!("{}::{}", candidate.path, candidate.section_id);
1945        if selected.len() >= max_sections {
1946            selected.pop();
1947        }
1948        selected_keys.insert(key);
1949        covered.insert(target);
1950        selected.push(candidate);
1951    }
1952
1953    Ok(())
1954}
1955
1956fn scout_candidate_target_hits(
1957    candidate: &ScoutCandidate,
1958    targets: &[String],
1959    question: &str,
1960    cache: &mut HashMap<String, crate::parse::ParsedMarkdown>,
1961) -> Result<(HashSet<String>, i32)> {
1962    if !cache.contains_key(&candidate.path) {
1963        cache.insert(candidate.path.clone(), load_markdown(&candidate.path)?);
1964    }
1965    let parsed = cache.get(&candidate.path).expect("cached parsed markdown");
1966    let Some(section) = parsed.doc.find_section_by_id(&candidate.section_id) else {
1967        return Ok((HashSet::new(), scout_path_quality_score(&candidate.path)));
1968    };
1969    let content = section.extract_content(&parsed.lines).join("\n");
1970    let source_haystack =
1971        normalize_compact(&format!("{}\n{}", candidate.path, section.path.join(" ")));
1972    let haystack = normalize_compact(&format!(
1973        "{}\n{}\n{}",
1974        candidate.path,
1975        section.path.join(" "),
1976        content
1977    ));
1978    let hits = targets
1979        .iter()
1980        .filter(|target| haystack.contains(&normalize_compact(target)))
1981        .cloned()
1982        .collect::<HashSet<_>>();
1983    let source_hit_count = targets
1984        .iter()
1985        .filter(|target| source_haystack.contains(&normalize_compact(target)))
1986        .count() as i32;
1987    let mut authority =
1988        scout_source_authority_score(&candidate.path, &section.path, &content, question);
1989    authority += source_hit_count * 360;
1990    if source_hit_count == 0 && !hits.is_empty() {
1991        authority -= 120;
1992    }
1993    Ok((hits, authority))
1994}
1995
1996fn is_child_section_id(parent: &str, child: &str) -> bool {
1997    child.len() > parent.len()
1998        && child.starts_with(parent)
1999        && child[parent.len()..].starts_with('.')
2000}
2001
2002fn focused_scout_candidates(candidates: &[ScoutCandidate], question: &str) -> Vec<ScoutCandidate> {
2003    let Some(top) = candidates.first() else {
2004        return Vec::new();
2005    };
2006    if wants_multi_file_evidence(question) {
2007        let targets = target_tokens_from_question(question);
2008        if !targets.is_empty() {
2009            let focused = candidates
2010                .iter()
2011                .filter(|candidate| path_matches_any_target(&candidate.path, &targets))
2012                .cloned()
2013                .collect::<Vec<_>>();
2014            if focused.len() >= 2 {
2015                return focused;
2016            }
2017        }
2018        return candidates.to_vec();
2019    }
2020    let top_path_tokens = distinctive_path_tokens(&top.path);
2021    if scout_path_quality_score(&top.path) > 0 && !top_path_tokens.is_empty() {
2022        let focused = candidates
2023            .iter()
2024            .filter(|candidate| {
2025                candidate.path == top.path
2026                    || distinctive_path_tokens(&candidate.path)
2027                        .iter()
2028                        .any(|token| top_path_tokens.contains(token))
2029            })
2030            .cloned()
2031            .collect::<Vec<_>>();
2032        if focused.len() >= 2 {
2033            return focused;
2034        }
2035    }
2036    let best_other_score = candidates
2037        .iter()
2038        .find(|candidate| candidate.path != top.path)
2039        .map(|candidate| candidate.score);
2040    let dominant_file =
2041        top.score >= 280 && best_other_score.is_none_or(|score| top.score - score >= 80);
2042    if dominant_file {
2043        candidates
2044            .iter()
2045            .filter(|candidate| candidate.path == top.path)
2046            .cloned()
2047            .collect()
2048    } else {
2049        candidates.to_vec()
2050    }
2051}
2052
2053fn order_scout_evidence(
2054    mut candidates: Vec<ScoutCandidate>,
2055    question: &str,
2056) -> Result<Vec<ScoutCandidate>> {
2057    let question_l = question.to_ascii_lowercase();
2058    if !wants_rationale_or_policy_evidence(&question_l) {
2059        return Ok(candidates);
2060    }
2061
2062    let mut cache: HashMap<String, crate::parse::ParsedMarkdown> = HashMap::new();
2063    let mut scored = Vec::new();
2064    for (idx, candidate) in candidates.drain(..).enumerate() {
2065        if !cache.contains_key(&candidate.path) {
2066            cache.insert(candidate.path.clone(), load_markdown(&candidate.path)?);
2067        }
2068        let parsed = cache.get(&candidate.path).expect("cached parsed markdown");
2069        let score = parsed
2070            .doc
2071            .find_section_by_id(&candidate.section_id)
2072            .map(|section| {
2073                let content = section.extract_content(&parsed.lines).join("\n");
2074                candidate.score
2075                    + scout_rationale_evidence_score(&section.path, &content, &question_l)
2076            })
2077            .unwrap_or(candidate.score);
2078        scored.push((score, idx, candidate));
2079    }
2080    scored.sort_by(|lhs, rhs| rhs.0.cmp(&lhs.0).then(lhs.1.cmp(&rhs.1)));
2081    Ok(scored
2082        .into_iter()
2083        .map(|(_, _, candidate)| candidate)
2084        .collect())
2085}
2086
2087fn wants_rationale_or_policy_evidence(question_l: &str) -> bool {
2088    [
2089        "why",
2090        "what makes",
2091        "rather than",
2092        "policy",
2093        "privacy",
2094        "safety",
2095        "allow",
2096        "allows",
2097        "exporting",
2098        "mask",
2099        "masking",
2100        "rationale",
2101        "reason",
2102    ]
2103    .iter()
2104    .any(|needle| question_l.contains(needle))
2105}
2106
2107fn asks_for_metric_or_table(question_l: &str) -> bool {
2108    [
2109        "metric",
2110        "score",
2111        "baseline",
2112        "table",
2113        "row",
2114        "0.",
2115        "current score",
2116    ]
2117    .iter()
2118    .any(|needle| question_l.contains(needle))
2119}
2120
2121fn scout_rationale_evidence_score(section_path: &[String], content: &str, question_l: &str) -> i32 {
2122    let text = format!("{}\n{}", section_path.join(" "), content).to_ascii_lowercase();
2123    let mut score = 0;
2124    score += scout_rationale_marker_score(&text);
2125    score += scout_question_token_overlap_score(&text, question_l, 28, 220);
2126    if !asks_for_metric_or_table(question_l) {
2127        for needle in [
2128            "metric | score",
2129            "| score |",
2130            "baseline",
2131            "current metric",
2132            "benchmark",
2133            "leaderboard",
2134        ] {
2135            if text.contains(needle) {
2136                score -= 220;
2137            }
2138        }
2139    }
2140    score
2141}
2142
2143fn distinctive_path_tokens(path: &str) -> HashSet<String> {
2144    let stem = Path::new(path)
2145        .file_stem()
2146        .and_then(|name| name.to_str())
2147        .unwrap_or(path);
2148    stem.split(|c: char| !c.is_ascii_alphanumeric())
2149        .map(str::to_ascii_lowercase)
2150        .filter(|token| {
2151            token.len() >= 4
2152                && !matches!(
2153                    token.as_str(),
2154                    "readme"
2155                        | "index"
2156                        | "docs"
2157                        | "doc"
2158                        | "notes"
2159                        | "note"
2160                        | "eval"
2161                        | "scene"
2162                        | "card"
2163                        | "annotation"
2164                        | "policy"
2165                        | "scratch"
2166                        | "draft"
2167                        | "copy"
2168                        | "copied"
2169                        | "tmp"
2170                        | "temp"
2171                        | "anchor"
2172                )
2173        })
2174        .collect()
2175}
2176
2177fn target_tokens_from_question(question: &str) -> Vec<String> {
2178    let mut out = Vec::new();
2179    for phrase in extract_capitalized_phrases(question) {
2180        for token in signal_tokens(&phrase) {
2181            for part in token.split('-') {
2182                let part = part.to_ascii_lowercase();
2183                if part.len() >= 4 && !is_stopword(&part) && !out.contains(&part) {
2184                    out.push(part);
2185                }
2186            }
2187        }
2188    }
2189    out
2190}
2191
2192fn target_phrases_from_question(question: &str) -> Vec<String> {
2193    let mut out = Vec::new();
2194    for phrase in extract_capitalized_phrases(question) {
2195        if !phrase
2196            .chars()
2197            .any(|ch| ch.is_ascii_uppercase() || ch.is_ascii_digit())
2198        {
2199            continue;
2200        }
2201        let tokens = signal_tokens(&phrase)
2202            .into_iter()
2203            .filter(|token| {
2204                !matches!(
2205                    token.to_ascii_lowercase().as_str(),
2206                    "compare" | "contrast" | "across" | "between" | "which"
2207                )
2208            })
2209            .collect::<Vec<_>>();
2210        if tokens.is_empty() {
2211            continue;
2212        }
2213        let phrase = tokens.join(" ");
2214        if phrase.len() >= 4 && !out.iter().any(|existing| existing == &phrase) {
2215            out.push(phrase);
2216        }
2217    }
2218    out
2219}
2220
2221#[cfg(test)]
2222mod scout_tests {
2223    use super::target_phrases_from_question;
2224
2225    #[test]
2226    fn target_phrases_keep_hyphenated_entities() {
2227        let targets = target_phrases_from_question(
2228            "Across Harbor-17, Rainy Rail Depot, and Night Bus Stop, how do the docs treat reflected or glare-corrupted text?",
2229        );
2230        assert!(targets.contains(&"Harbor-17".to_string()), "{targets:?}");
2231        assert!(
2232            targets.contains(&"Rainy Rail Depot".to_string()),
2233            "{targets:?}"
2234        );
2235        assert!(
2236            targets.contains(&"Night Bus Stop".to_string()),
2237            "{targets:?}"
2238        );
2239    }
2240}
2241
2242fn path_matches_any_target(path: &str, targets: &[String]) -> bool {
2243    let path_l = normalize_compact(path);
2244    targets
2245        .iter()
2246        .any(|target| path_l.contains(&normalize_compact(target)))
2247}
2248
2249fn normalize_compact(text: &str) -> String {
2250    text.chars()
2251        .filter(|c| c.is_ascii_alphanumeric())
2252        .map(|c| c.to_ascii_lowercase())
2253        .collect()
2254}
2255
2256fn render_scout_file_maps(
2257    out: &mut String,
2258    candidates: &[ScoutCandidate],
2259    max_files: usize,
2260) -> Result<()> {
2261    let mut files = Vec::new();
2262    let mut seen = HashSet::new();
2263    for candidate in candidates {
2264        if seen.insert(candidate.path.clone()) {
2265            files.push(candidate.path.clone());
2266        }
2267        if files.len() >= max_files {
2268            break;
2269        }
2270    }
2271    out.push_str("[files]\n");
2272    for path in files {
2273        let summaries = get_doc_section_summaries(&path)?;
2274        let picked: HashSet<&str> = candidates
2275            .iter()
2276            .filter(|c| c.path == path)
2277            .map(|c| c.section_id.as_str())
2278            .collect();
2279        let sections = summaries
2280            .iter()
2281            .filter(|(id, title)| title != "<preamble>" && picked.contains(id.as_str()))
2282            .map(|(id, title)| format!("§{} {}", id, title))
2283            .take(6)
2284            .collect::<Vec<_>>();
2285        let also = summaries
2286            .iter()
2287            .filter(|(id, title)| title != "<preamble>" && !picked.contains(id.as_str()))
2288            .take(6)
2289            .map(|(id, title)| format!("§{} {}", id, title))
2290            .collect::<Vec<_>>();
2291        out.push_str(&format!("- {}\n", path));
2292        if !sections.is_empty() {
2293            out.push_str(&format!("  picked: {}\n", sections.join(" · ")));
2294        }
2295        if !also.is_empty() {
2296            out.push_str(&format!("  also: {}\n", also.join(" · ")));
2297        }
2298    }
2299    Ok(())
2300}
2301
2302fn render_scout_highlights(
2303    out: &mut String,
2304    candidates: &[ScoutCandidate],
2305    question: &str,
2306    max_lines: usize,
2307) -> Result<()> {
2308    let tokens: Vec<String> = signal_tokens(question)
2309        .into_iter()
2310        .map(|token| token.to_ascii_lowercase())
2311        .collect();
2312    let question_l = question.to_ascii_lowercase();
2313    let wants_code = ["cli", "command", "install", "invoke"]
2314        .iter()
2315        .any(|needle| question_l.contains(needle));
2316    let mut emitted = 0usize;
2317    let mut seen = HashSet::new();
2318    let mut highlights = Vec::new();
2319    let mut cache: HashMap<String, crate::parse::ParsedMarkdown> = HashMap::new();
2320
2321    for candidate in candidates {
2322        if !cache.contains_key(&candidate.path) {
2323            cache.insert(candidate.path.clone(), load_markdown(&candidate.path)?);
2324        }
2325        let parsed = cache.get(&candidate.path).expect("cached parsed markdown");
2326        let Some(section) = parsed.doc.find_section_by_id(&candidate.section_id) else {
2327            continue;
2328        };
2329        if is_low_value_section_for_question(section, &question_l) {
2330            continue;
2331        }
2332        let lines = section.extract_content(&parsed.lines);
2333        for (idx, line) in lines.iter().enumerate() {
2334            if emitted >= max_lines {
2335                break;
2336            }
2337            let trimmed = line.trim();
2338            let lower = trimmed.to_ascii_lowercase();
2339            if is_noisy_highlight_line(trimmed) && !is_relevant_table_line(trimmed, &tokens) {
2340                continue;
2341            }
2342            let token_hits = tokens.iter().filter(|token| lower.contains(*token)).count();
2343            let useful_code_line = trimmed.contains("--")
2344                || (wants_code
2345                    && (trimmed.contains('`')
2346                        || trimmed.starts_with("pip ")
2347                        || trimmed.starts_with("conda ")
2348                        || trimmed.starts_with("python ")
2349                        || trimmed.starts_with("git ")
2350                        || trimmed.starts_with("cmake ")
2351                        || trimmed.starts_with("make ")));
2352            let useful_table_line = is_relevant_table_line(trimmed, &tokens);
2353            if token_hits == 0 && !useful_code_line && !useful_table_line {
2354                continue;
2355            }
2356            let mut score = token_hits as i32 * 20;
2357            if useful_table_line {
2358                score += 80;
2359            }
2360            if wants_rationale_or_policy_evidence(&question_l) {
2361                score += scout_rationale_highlight_score(&lower, &question_l);
2362            }
2363            for (needle, weight) in [
2364                ("--", 70),
2365                ("cpu", 45),
2366                ("gpu", 45),
2367                ("warning", 45),
2368                ("disable", 45),
2369                ("configuration", 30),
2370                ("header", 30),
2371                ("human-readable", 30),
2372                ("supported formats", 30),
2373                ("convert", 30),
2374            ] {
2375                if lower.contains(needle) {
2376                    score += weight;
2377                }
2378            }
2379            highlights.push(ScoutHighlight {
2380                score,
2381                path: candidate.path.clone(),
2382                section_id: section.id.clone(),
2383                line_no: section.line_start + idx,
2384                line: if useful_table_line {
2385                    scout_table_context(lines, idx)
2386                } else {
2387                    scout_highlight_context(lines, idx, &lower)
2388                },
2389            });
2390        }
2391    }
2392
2393    highlights.sort_by(|lhs, rhs| {
2394        rhs.score
2395            .cmp(&lhs.score)
2396            .then(lhs.path.cmp(&rhs.path))
2397            .then(lhs.line_no.cmp(&rhs.line_no))
2398    });
2399    for highlight in highlights {
2400        if emitted >= max_lines {
2401            break;
2402        }
2403        emit_scout_highlight(out, &mut seen, &mut emitted, &highlight);
2404    }
2405
2406    if emitted == 0 {
2407        out.push_str("- no compact highlights; read evidence sections below\n");
2408    }
2409    Ok(())
2410}
2411
2412fn scout_rationale_highlight_score(lower: &str, question_l: &str) -> i32 {
2413    let mut score = 0;
2414    score += scout_rationale_marker_score(lower) / 2;
2415    score += scout_question_token_overlap_score(lower, question_l, 18, 120);
2416    if !asks_for_metric_or_table(question_l) {
2417        for needle in ["| score |", "baseline", "current metric", "benchmark", "0."] {
2418            if lower.contains(needle) {
2419                score -= 140;
2420            }
2421        }
2422    }
2423    score
2424}
2425
2426fn scout_rationale_marker_score(lower: &str) -> i32 {
2427    let mut score = 0;
2428    for (needles, weight) in [
2429        (
2430            &["rule:", "rule ", "policy", "guideline", "standard"][..],
2431            180,
2432        ),
2433        (
2434            &[
2435                "known risk",
2436                "risk",
2437                "unsafe",
2438                "wrong answer",
2439                "misread",
2440                "confus",
2441                "ambiguous",
2442            ][..],
2443            160,
2444        ),
2445        (
2446            &[
2447                "privacy",
2448                "personal data",
2449                "identifiable",
2450                "redact",
2451                "mask",
2452                "export",
2453                "leak",
2454            ][..],
2455            150,
2456        ),
2457        (
2458            &[
2459                "must",
2460                "should",
2461                "requires",
2462                "allow",
2463                "not enough",
2464                "do not",
2465                "rather than",
2466            ][..],
2467            100,
2468        ),
2469        (
2470            &["because", "reason", "rationale", "therefore", "so that"][..],
2471            80,
2472        ),
2473    ] {
2474        if needles.iter().any(|needle| lower.contains(needle)) {
2475            score += weight;
2476        }
2477    }
2478    score
2479}
2480
2481fn scout_question_token_overlap_score(
2482    lower: &str,
2483    question_l: &str,
2484    per_token: i32,
2485    cap: i32,
2486) -> i32 {
2487    let hits = signal_tokens(question_l)
2488        .into_iter()
2489        .map(|token| token.to_ascii_lowercase())
2490        .filter(|token| lower.contains(token))
2491        .count() as i32;
2492    (hits * per_token).min(cap)
2493}
2494
2495fn is_noisy_highlight_line(line: &str) -> bool {
2496    line.is_empty()
2497        || line.starts_with('|')
2498        || line == "```"
2499        || line == "```shell"
2500        || line.trim_matches('~') == "```"
2501        || line.trim_matches('~') == "```shell"
2502        || line.starts_with("<!--")
2503        || line.starts_with("[!")
2504        || line.starts_with("![")
2505        || line.starts_with("[![")
2506        || line.starts_with("@article")
2507        || line.starts_with("@inproceedings")
2508        || (line.starts_with('[') && line.contains("]: "))
2509        || line.len() > 1000
2510}
2511
2512fn is_relevant_table_line(line: &str, tokens: &[String]) -> bool {
2513    line.starts_with('|')
2514        && line.matches('|').count() >= 3
2515        && !is_table_separator_line(line)
2516        && tokens
2517            .iter()
2518            .any(|token| line.to_ascii_lowercase().contains(token))
2519}
2520
2521fn is_table_separator_line(line: &str) -> bool {
2522    line.chars()
2523        .all(|ch| ch == '|' || ch == '-' || ch == ':' || ch.is_whitespace())
2524}
2525
2526fn scout_table_context(lines: &[String], idx: usize) -> String {
2527    let row = lines[idx].trim();
2528    let header = (1..idx).rev().find_map(|candidate_idx| {
2529        let separator = lines[candidate_idx].trim();
2530        if !separator.starts_with('|') || !is_table_separator_line(separator) {
2531            return None;
2532        }
2533        let header = lines[candidate_idx - 1].trim();
2534        header.starts_with('|').then_some(header)
2535    });
2536
2537    match header {
2538        Some(header) if header != row => format!("{header} => {row}"),
2539        _ => row.to_string(),
2540    }
2541}
2542
2543fn scout_highlight_context(lines: &[String], idx: usize, lower: &str) -> String {
2544    let radius = if lower.contains("disable") || lower.contains("warning") {
2545        5
2546    } else if lines[idx].trim().len() < 300 {
2547        2
2548    } else {
2549        0
2550    };
2551    let start = idx.saturating_sub(radius);
2552    let end = (idx + radius).min(lines.len().saturating_sub(1));
2553    let mut parts = Vec::new();
2554    for line in &lines[start..=end] {
2555        let trimmed = line.trim();
2556        if is_noisy_highlight_line(trimmed) && !trimmed.starts_with('|') {
2557            continue;
2558        }
2559        parts.push(trimmed);
2560    }
2561    let mut joined = parts.join(" ");
2562    if joined.len() > 900 {
2563        joined.truncate(900);
2564        joined.push_str("...");
2565    }
2566    joined
2567}
2568
2569fn is_low_value_section_for_question(section: &Section, question_l: &str) -> bool {
2570    let section_path = section.path.join(" ").to_ascii_lowercase();
2571    let citation_section = section_path.contains("citation")
2572        || section_path.contains("cite")
2573        || section_path.contains("references");
2574    citation_section
2575        && !["citation", "cite", "doi", "reference", "paper"]
2576            .iter()
2577            .any(|needle| question_l.contains(needle))
2578}
2579
2580fn emit_scout_highlight(
2581    out: &mut String,
2582    seen: &mut HashSet<String>,
2583    emitted: &mut usize,
2584    highlight: &ScoutHighlight,
2585) {
2586    let key = format!(
2587        "{}:{}:{}",
2588        highlight.path, highlight.line_no, highlight.line
2589    );
2590    if !seen.insert(key) {
2591        return;
2592    }
2593    out.push_str(&format!(
2594        "- {} §{} l{}: {}\n",
2595        highlight.path, highlight.section_id, highlight.line_no, highlight.line
2596    ));
2597    *emitted += 1;
2598}
2599
2600fn render_scout_evidence(
2601    out: &mut String,
2602    candidates: &[ScoutCandidate],
2603    question: &str,
2604    max_tokens: usize,
2605) -> Result<()> {
2606    let mut total_tokens = 0usize;
2607    let mut cache: HashMap<String, crate::parse::ParsedMarkdown> = HashMap::new();
2608    let mut emitted_ranges: HashMap<String, Vec<(usize, usize)>> = HashMap::new();
2609    let question_l = question.to_ascii_lowercase();
2610    for candidate in candidates {
2611        if total_tokens >= max_tokens {
2612            out.push_str("\n<!-- mdlens: scout budget exhausted -->\n");
2613            break;
2614        }
2615        if !cache.contains_key(&candidate.path) {
2616            cache.insert(candidate.path.clone(), load_markdown(&candidate.path)?);
2617        }
2618        let parsed = cache.get(&candidate.path).expect("cached parsed markdown");
2619        let Some(section) = parsed.doc.find_section_by_id(&candidate.section_id) else {
2620            continue;
2621        };
2622        if is_low_value_section_for_question(section, &question_l) {
2623            continue;
2624        }
2625        let ranges = emitted_ranges.entry(candidate.path.clone()).or_default();
2626        if ranges.iter().any(|(start, end)| {
2627            section.line_start <= *start
2628                && section.line_end >= *end
2629                && (section.line_end - section.line_start) > (*end - *start)
2630        }) {
2631            continue;
2632        }
2633        let remaining = max_tokens.saturating_sub(total_tokens);
2634        let section_budget = remaining.min(650);
2635        let ancestors = section_ancestors(&parsed.doc.sections, &section.id);
2636        let (content, truncated) =
2637            scout_section_content(section, &ancestors, &parsed.lines, question, section_budget);
2638        let emitted_tokens = estimate_tokens(&content);
2639        if emitted_tokens == 0 {
2640            continue;
2641        }
2642        out.push_str(&format!(
2643            "\n--- {} §{} {} l{}-{} ~{}t reason={} ---\n",
2644            candidate.path,
2645            section.id,
2646            section.path.join(" > "),
2647            section.line_start,
2648            section.line_end,
2649            section.token_estimate,
2650            candidate.reason
2651        ));
2652        out.push_str(&content);
2653        if !content.ends_with('\n') {
2654            out.push('\n');
2655        }
2656        ranges.push((section.line_start, section.line_end));
2657        total_tokens += emitted_tokens;
2658        if truncated {
2659            continue;
2660        }
2661    }
2662    Ok(())
2663}
2664
2665fn scout_section_content(
2666    section: &Section,
2667    ancestors: &[&Section],
2668    lines: &[String],
2669    question: &str,
2670    max_tokens: usize,
2671) -> (String, bool) {
2672    let parent_context = scout_parent_context(ancestors, lines, max_tokens.min(220));
2673    let content_lines = section.extract_content(lines);
2674    let full = content_lines.join("\n");
2675    let full_with_context = if parent_context.trim().is_empty() {
2676        full.clone()
2677    } else {
2678        format!("{parent_context}\n...\n{full}")
2679    };
2680    let full_tokens = estimate_tokens(&full_with_context);
2681    if full_tokens <= max_tokens {
2682        return (full_with_context, false);
2683    }
2684
2685    let focused_budget = max_tokens
2686        .saturating_sub(estimate_tokens(&parent_context))
2687        .max(max_tokens / 2);
2688    let focused = scout_focused_excerpt(content_lines, question, focused_budget);
2689    if !focused.trim().is_empty() {
2690        if parent_context.trim().is_empty() {
2691            return (focused, true);
2692        }
2693        return (format!("{parent_context}\n...\n{focused}"), true);
2694    }
2695
2696    (
2697        truncate_to_tokens(&full_with_context, max_tokens, TRUNCATION_NOTICE),
2698        true,
2699    )
2700}
2701
2702fn section_ancestors<'a>(sections: &'a [Section], target_id: &str) -> Vec<&'a Section> {
2703    let mut path = Vec::new();
2704    collect_section_ancestors(sections, target_id, &mut path);
2705    path
2706}
2707
2708fn collect_section_ancestors<'a>(
2709    sections: &'a [Section],
2710    target_id: &str,
2711    path: &mut Vec<&'a Section>,
2712) -> bool {
2713    for section in sections {
2714        if section.id == target_id {
2715            return true;
2716        }
2717        path.push(section);
2718        if collect_section_ancestors(&section.children, target_id, path) {
2719            return true;
2720        }
2721        path.pop();
2722    }
2723    false
2724}
2725
2726fn scout_parent_context(ancestors: &[&Section], lines: &[String], max_tokens: usize) -> String {
2727    if ancestors.is_empty() || max_tokens == 0 {
2728        return String::new();
2729    }
2730
2731    let mut parts = Vec::new();
2732    for ancestor in ancestors {
2733        let direct = ancestor.extract_direct_content(lines);
2734        let cleaned = direct
2735            .iter()
2736            .map(|line| line.trim_end())
2737            .filter(|line| !line.trim().is_empty() && !is_noisy_highlight_line(line.trim()))
2738            .collect::<Vec<_>>()
2739            .join("\n");
2740        if cleaned.trim().is_empty() {
2741            continue;
2742        }
2743        parts.push(cleaned);
2744    }
2745
2746    let joined = parts.join("\n");
2747    if estimate_tokens(&joined) <= max_tokens {
2748        joined
2749    } else {
2750        truncate_to_tokens(&joined, max_tokens, TRUNCATION_NOTICE)
2751    }
2752}
2753
2754fn scout_focused_excerpt(lines: &[String], question: &str, max_tokens: usize) -> String {
2755    let tokens: Vec<String> = signal_tokens(question)
2756        .into_iter()
2757        .map(|token| token.to_ascii_lowercase())
2758        .collect();
2759    let question_l = question.to_ascii_lowercase();
2760    let wants_code = ["cli", "command", "install", "invoke"]
2761        .iter()
2762        .any(|needle| question_l.contains(needle));
2763
2764    let mut selected = BTreeSet::new();
2765    for (idx, line) in lines.iter().enumerate() {
2766        let trimmed = line.trim();
2767        let lower = trimmed.to_ascii_lowercase();
2768        if is_noisy_highlight_line(trimmed) && !is_relevant_table_line(trimmed, &tokens) {
2769            continue;
2770        }
2771        let token_hits = tokens.iter().filter(|token| lower.contains(*token)).count();
2772        let code_hit = trimmed.contains("--")
2773            || (wants_code
2774                && (trimmed.contains('`')
2775                    || trimmed.starts_with("pip ")
2776                    || trimmed.starts_with("conda ")
2777                    || trimmed.starts_with("python ")
2778                    || trimmed.starts_with("git ")
2779                    || trimmed.starts_with("cmake ")
2780                    || trimmed.starts_with("make ")));
2781        let table_hit = is_relevant_table_line(trimmed, &tokens);
2782        if token_hits == 0 && !code_hit && !table_hit {
2783            continue;
2784        }
2785        let radius = if table_hit {
2786            2
2787        } else if lower.contains("disable") || lower.contains("warning") || code_hit {
2788            5
2789        } else if token_hits >= 2 {
2790            2
2791        } else {
2792            1
2793        };
2794        for context_idx in idx.saturating_sub(radius)..=(idx + radius).min(lines.len() - 1) {
2795            selected.insert(context_idx);
2796        }
2797    }
2798
2799    let mut out = String::new();
2800    let mut last_idx = None;
2801    for idx in selected {
2802        let line = lines[idx].trim_end();
2803        if line.trim().is_empty() {
2804            continue;
2805        }
2806        if let Some(last) = last_idx {
2807            if idx > last + 1 && !out.ends_with("\n...\n") {
2808                out.push_str("...\n");
2809            }
2810        }
2811        let candidate = format!("{out}{line}\n");
2812        if estimate_tokens(&candidate) > max_tokens {
2813            out.push_str(TRUNCATION_NOTICE);
2814            break;
2815        }
2816        out = candidate;
2817        last_idx = Some(idx);
2818    }
2819
2820    out
2821}
2822
2823fn cmd_pack(args: PackArgs) -> Result<()> {
2824    let dedupe = args.dedupe && !args.no_dedupe;
2825    let result = if let Some(ref ids_str) = args.ids {
2826        let ids: Vec<String> = ids_str.split(',').map(|s| s.trim().to_string()).collect();
2827        pack_by_ids(&args.path, &ids, args.max_tokens, args.parents, dedupe)?
2828    } else if let Some(ref paths_str) = args.paths {
2829        let doc = parse_markdown(&args.path)?;
2830        let path_list: Vec<&str> = paths_str.split(';').collect();
2831        let mut ids = Vec::new();
2832        for p in path_list {
2833            ids.push(find_unique_section_by_path(&doc, p)?.id.clone());
2834        }
2835        pack_by_ids(&args.path, &ids, args.max_tokens, args.parents, dedupe)?
2836    } else if let Some(ref query) = args.search {
2837        crate::pack::pack_by_search(
2838            &args.path,
2839            query,
2840            args.max_tokens,
2841            PackSearchOptions {
2842                include_parents: args.parents,
2843                dedupe,
2844                case_sensitive: args.case_sensitive,
2845                use_regex: args.regex,
2846                max_results: args.max_results,
2847                context_lines: args.context_lines,
2848            },
2849        )?
2850    } else {
2851        return Err(anyhow::anyhow!(
2852            "exactly one of --ids, --paths, or --search is required"
2853        ));
2854    };
2855
2856    if args.json {
2857        let output = PackJsonOutput {
2858            schema_version: 1,
2859            token_budget: result.token_budget,
2860            token_estimate: result.token_estimate,
2861            truncated: result.truncated,
2862            included: result
2863                .included
2864                .iter()
2865                .map(|inc| PackJsonIncluded {
2866                    path: inc.path.clone(),
2867                    section_id: inc.section_id.clone(),
2868                    section_path: inc.section_path.clone(),
2869                    line_start: inc.line_start,
2870                    line_end: inc.line_end,
2871                    token_estimate: inc.token_estimate,
2872                    truncated: inc.truncated,
2873                })
2874                .collect(),
2875            content: result.content.clone(),
2876        };
2877        println!("{}", serde_json::to_string_pretty(&output)?);
2878    } else {
2879        let included_render: Vec<PackIncluded> = result
2880            .included
2881            .iter()
2882            .map(|inc| PackIncluded {
2883                section_id: inc.section_id.clone(),
2884                section_title: inc.section_path.last().cloned().unwrap_or_default(),
2885                line_range: format!("{}-{}", inc.line_start, inc.line_end),
2886                token_estimate: inc.token_estimate,
2887            })
2888            .collect();
2889        println!(
2890            "{}",
2891            render_pack(
2892                &args.path,
2893                result.token_budget,
2894                &included_render,
2895                &result.content,
2896                result.truncated
2897            )
2898        );
2899    }
2900
2901    Ok(())
2902}
2903
2904fn cmd_stats(args: StatsArgs) -> Result<()> {
2905    let files = crate::search::discover_markdown_files(&args.path)?;
2906    let mut entries = Vec::new();
2907
2908    for file in &files {
2909        let doc = parse_markdown(file)?;
2910        entries.push(StatsEntry {
2911            path: doc.path,
2912            lines: doc.line_count,
2913            words: doc.word_count,
2914            tokens: doc.token_estimate,
2915        });
2916    }
2917
2918    // Sort
2919    match args.sort {
2920        StatsSort::Tokens => entries.sort_by_key(|entry| Reverse(entry.tokens)),
2921        StatsSort::Lines => entries.sort_by_key(|entry| Reverse(entry.lines)),
2922        StatsSort::Path => entries.sort_by(|lhs, rhs| lhs.path.cmp(&rhs.path)),
2923    }
2924
2925    // Apply top limit
2926    let entries = if let Some(top) = args.top {
2927        &entries[..std::cmp::min(top, entries.len())]
2928    } else {
2929        &entries
2930    };
2931
2932    if args.json {
2933        let output = StatsJsonOutput {
2934            schema_version: 1,
2935            entries: entries
2936                .iter()
2937                .map(|e| StatsJsonEntry {
2938                    path: e.path.clone(),
2939                    lines: e.lines,
2940                    words: e.words,
2941                    tokens: e.tokens,
2942                })
2943                .collect(),
2944        };
2945        println!("{}", serde_json::to_string_pretty(&output)?);
2946    } else {
2947        println!("{}", render_stats(entries));
2948    }
2949
2950    Ok(())
2951}
2952
2953fn cmd_sections(args: SectionsArgs) -> Result<()> {
2954    let stdin = io::stdin();
2955    let mut inputs: Vec<SectionInput> = Vec::new();
2956
2957    // Read from stdin only when it is not a tty (i.e. piped input)
2958    if !args.files.is_empty() {
2959        // Positional args provided — use those, skip stdin
2960        for f in &args.files {
2961            let trimmed = f.trim().to_string();
2962            if !trimmed.is_empty() {
2963                inputs.push(SectionInput::File(trimmed));
2964            }
2965        }
2966    } else {
2967        for line in stdin.lock().lines() {
2968            let line = line?;
2969            if let Some(input) = parse_sections_input_line(&line) {
2970                inputs.push(input);
2971            }
2972        }
2973    }
2974
2975    if inputs.is_empty() {
2976        return Ok(());
2977    }
2978
2979    let dedupe = args.dedupe && !args.no_dedupe;
2980    let has_hit_input = inputs
2981        .iter()
2982        .any(|input| matches!(input, SectionInput::Hit(_)));
2983
2984    if !has_hit_input {
2985        let mut paths: Vec<String> = inputs
2986            .into_iter()
2987            .filter_map(|input| match input {
2988                SectionInput::File(path) => Some(path),
2989                SectionInput::Hit(_) => None,
2990            })
2991            .collect();
2992
2993        if dedupe {
2994            let mut seen = HashSet::new();
2995            paths.retain(|p| seen.insert(p.clone()));
2996        }
2997
2998        return render_sections_from_paths(args, paths);
2999    }
3000
3001    let mut file_order: Vec<String> = Vec::new();
3002    let mut file_hits: HashMap<String, Vec<usize>> = HashMap::new();
3003
3004    for input in inputs {
3005        match input {
3006            SectionInput::File(path) => {
3007                if !file_order.iter().any(|existing| existing == &path) {
3008                    file_order.push(path.clone());
3009                }
3010                file_hits.entry(path).or_default();
3011            }
3012            SectionInput::Hit(hit) => {
3013                let entry = file_hits.entry(hit.path.clone()).or_default();
3014                if !dedupe || !entry.contains(&hit.line) {
3015                    entry.push(hit.line);
3016                }
3017                if !file_order.iter().any(|existing| existing == &hit.path) {
3018                    file_order.push(hit.path);
3019                }
3020            }
3021        }
3022    }
3023
3024    if let Some(max_files) = args.max_files {
3025        if file_order.len() > max_files {
3026            anyhow::bail!(
3027                "[error] {} files exceed --max-files {}; narrow with a more specific grep or raise the limit",
3028                file_order.len(),
3029                max_files
3030            );
3031        }
3032    } else if args.max_tokens.is_none() && file_order.len() > 8 {
3033        eprintln!(
3034            "[warn] {} files piped without --max-tokens or --max-files; output may be large",
3035            file_order.len()
3036        );
3037    }
3038
3039    let mut file_outputs: Vec<SectionsFileOutput> = Vec::new();
3040    let mut total_tokens: usize = 0;
3041    let mut omitted: usize = 0;
3042
3043    for path in &file_order {
3044        let parsed = match load_markdown(path) {
3045            Ok(p) => p,
3046            Err(e) => {
3047                eprintln!("Warning: could not read {}: {}", path, e);
3048                continue;
3049            }
3050        };
3051
3052        let doc = &parsed.doc;
3053        let lines = &parsed.lines;
3054
3055        let mut sections: Vec<SectionsSectionOutput> =
3056            if let Some(hit_lines) = file_hits.get(path).filter(|lines| !lines.is_empty()) {
3057                collect_hit_sections(
3058                    &doc.sections,
3059                    lines,
3060                    hit_lines,
3061                    args.children,
3062                    args.preview,
3063                    dedupe,
3064                )
3065            } else {
3066                let mut collected = Vec::new();
3067                collect_all_sections(
3068                    &doc.sections,
3069                    lines,
3070                    args.children,
3071                    args.preview,
3072                    args.max_depth,
3073                    0,
3074                    &mut collected,
3075                );
3076                collected
3077            };
3078
3079        if sections.is_empty() {
3080            continue;
3081        }
3082
3083        if let Some(max_sections) = args.max_sections {
3084            if sections.len() > max_sections {
3085                omitted += sections.len() - max_sections;
3086                sections.truncate(max_sections);
3087            }
3088        }
3089
3090        // Apply max-tokens cap
3091        if let Some(max_tokens) = args.max_tokens {
3092            let mut kept: Vec<SectionsSectionOutput> = Vec::new();
3093            for sec in sections {
3094                if total_tokens + sec.token_estimate > max_tokens {
3095                    omitted += 1;
3096                } else {
3097                    total_tokens += sec.token_estimate;
3098                    kept.push(sec);
3099                }
3100            }
3101            sections = kept;
3102        }
3103
3104        if !sections.is_empty() {
3105            file_outputs.push(SectionsFileOutput {
3106                path: path.clone(),
3107                sections,
3108            });
3109        }
3110    }
3111
3112    emit_sections_output(&args, file_outputs, omitted)
3113}
3114
3115fn render_sections_from_paths(args: SectionsArgs, paths: Vec<String>) -> Result<()> {
3116    if paths.is_empty() {
3117        return Ok(());
3118    }
3119
3120    let depth_capped = args.max_depth.is_none() && (!args.content || args.preview.is_some());
3121    let effective_depth = if depth_capped {
3122        Some(2)
3123    } else {
3124        args.max_depth
3125    };
3126
3127    if let Some(max_files) = args.max_files {
3128        if paths.len() > max_files {
3129            anyhow::bail!(
3130                "[error] {} files exceed --max-files {}; narrow with a more specific grep or raise the limit",
3131                paths.len(),
3132                max_files
3133            );
3134        }
3135    } else if args.max_tokens.is_none() && paths.len() > 8 {
3136        eprintln!(
3137            "[warn] {} files piped without --max-tokens or --max-files; output may be large",
3138            paths.len()
3139        );
3140    }
3141
3142    let mut file_outputs: Vec<SectionsFileOutput> = Vec::new();
3143    let mut total_tokens: usize = 0;
3144    let mut omitted: usize = 0;
3145
3146    for path in &paths {
3147        let parsed = match load_markdown(path) {
3148            Ok(p) => p,
3149            Err(e) => {
3150                eprintln!("Warning: could not read {}: {}", path, e);
3151                continue;
3152            }
3153        };
3154
3155        let doc = &parsed.doc;
3156        let lines = &parsed.lines;
3157        let mut sections: Vec<SectionsSectionOutput> = Vec::new();
3158        collect_all_sections(
3159            &doc.sections,
3160            lines,
3161            args.children,
3162            args.preview,
3163            effective_depth,
3164            0,
3165            &mut sections,
3166        );
3167
3168        if sections.is_empty() {
3169            continue;
3170        }
3171
3172        if let Some(max_sections) = args.max_sections {
3173            if sections.len() > max_sections {
3174                omitted += sections.len() - max_sections;
3175                sections.truncate(max_sections);
3176            }
3177        }
3178
3179        if let Some(max_tokens) = args.max_tokens {
3180            let mut kept: Vec<SectionsSectionOutput> = Vec::new();
3181            for sec in sections {
3182                if total_tokens + sec.token_estimate > max_tokens {
3183                    omitted += 1;
3184                } else {
3185                    total_tokens += sec.token_estimate;
3186                    kept.push(sec);
3187                }
3188            }
3189            sections = kept;
3190        }
3191
3192        if !sections.is_empty() {
3193            file_outputs.push(SectionsFileOutput {
3194                path: path.clone(),
3195                sections,
3196            });
3197        }
3198    }
3199
3200    if depth_capped {
3201        eprintln!(
3202            "[sections] whole-file mode: showing depth ≤2 by default; use --max-depth N for more"
3203        );
3204    }
3205
3206    emit_sections_output(&args, file_outputs, omitted)
3207}
3208
3209fn emit_sections_output(
3210    args: &SectionsArgs,
3211    file_outputs: Vec<SectionsFileOutput>,
3212    omitted: usize,
3213) -> Result<()> {
3214    if omitted > 0 {
3215        if let Some(max_tokens) = args.max_tokens {
3216            eprintln!(
3217                "[warn] {} sections omitted by limits (budget ~{}t)",
3218                omitted, max_tokens
3219            );
3220        } else {
3221            eprintln!("[warn] {} sections omitted by limits", omitted);
3222        }
3223    }
3224
3225    if file_outputs.is_empty() {
3226        return Ok(());
3227    }
3228
3229    if args.json {
3230        let output = SectionsJsonOutput {
3231            schema_version: 1,
3232            files: file_outputs
3233                .iter()
3234                .map(|fo| SectionsJsonFile {
3235                    path: fo.path.clone(),
3236                    sections: fo
3237                        .sections
3238                        .iter()
3239                        .map(|s| SectionsJsonSection {
3240                            id: s.id.clone(),
3241                            title: s.title.clone(),
3242                            heading_path: if args.heading_paths {
3243                                Some(s.heading_path.clone())
3244                            } else {
3245                                None
3246                            },
3247                            line_start: if args.lines { Some(s.line_start) } else { None },
3248                            line_end: if args.lines { Some(s.line_end) } else { None },
3249                            token_estimate: s.token_estimate,
3250                            body: if args.content {
3251                                Some(s.body.clone())
3252                            } else {
3253                                None
3254                            },
3255                            preview: s.preview.clone(),
3256                        })
3257                        .collect(),
3258                })
3259                .collect(),
3260        };
3261        println!("{}", serde_json::to_string_pretty(&output)?);
3262    } else {
3263        let entries: Vec<SectionsEntry> = file_outputs
3264            .iter()
3265            .flat_map(|fo| {
3266                fo.sections.iter().map(|s| SectionsEntry {
3267                    file_path: fo.path.clone(),
3268                    id: s.id.clone(),
3269                    title: s.title.clone(),
3270                    heading_path: if args.heading_paths {
3271                        Some(s.heading_path.clone())
3272                    } else {
3273                        None
3274                    },
3275                    line_start: if args.lines { Some(s.line_start) } else { None },
3276                    line_end: if args.lines { Some(s.line_end) } else { None },
3277                    token_estimate: s.token_estimate,
3278                    body: if args.content {
3279                        Some(s.body.clone())
3280                    } else {
3281                        None
3282                    },
3283                    preview: s.preview.clone(),
3284                })
3285            })
3286            .collect();
3287        println!("{}", render_sections(&entries, args.content));
3288    }
3289
3290    Ok(())
3291}
3292
3293struct SectionsSectionOutput {
3294    id: String,
3295    title: String,
3296    heading_path: Vec<String>,
3297    line_start: usize,
3298    line_end: usize,
3299    token_estimate: usize,
3300    body: String,
3301    preview: Option<String>,
3302}
3303
3304struct SectionsFileOutput {
3305    path: String,
3306    sections: Vec<SectionsSectionOutput>,
3307}
3308
3309#[derive(Clone)]
3310struct HitSectionAggregate<'a> {
3311    section: &'a Section,
3312    hit_count: usize,
3313    first_line: usize,
3314}
3315
3316fn parse_sections_input_line(line: &str) -> Option<SectionInput> {
3317    let trimmed = line.trim();
3318    if trimmed.is_empty() {
3319        return None;
3320    }
3321
3322    if let Some((path, line_num)) = parse_grep_hit(trimmed) {
3323        return Some(SectionInput::Hit(SectionHit {
3324            path: path.to_string(),
3325            line: line_num,
3326        }));
3327    }
3328
3329    Some(SectionInput::File(trimmed.to_string()))
3330}
3331
3332fn parse_grep_hit(line: &str) -> Option<(&str, usize)> {
3333    let first = line.find(':')?;
3334    let rest = &line[(first + 1)..];
3335    let second = rest.find(':')?;
3336    let path = &line[..first];
3337    let line_num = rest[..second].parse().ok()?;
3338    Some((path, line_num))
3339}
3340
3341fn collect_hit_sections(
3342    sections: &[Section],
3343    lines: &[String],
3344    hit_lines: &[usize],
3345    include_children: bool,
3346    preview_lines: Option<usize>,
3347    dedupe: bool,
3348) -> Vec<SectionsSectionOutput> {
3349    let mut by_section: HashMap<String, HitSectionAggregate<'_>> = HashMap::new();
3350    let mut ordered_hits: Vec<(usize, &Section)> = Vec::new();
3351
3352    for line_num in hit_lines {
3353        let Some(section) = find_deepest_section_for_line(sections, *line_num) else {
3354            continue;
3355        };
3356        if dedupe {
3357            by_section
3358                .entry(section.id.clone())
3359                .and_modify(|entry| entry.hit_count += 1)
3360                .or_insert(HitSectionAggregate {
3361                    section,
3362                    hit_count: 1,
3363                    first_line: *line_num,
3364                });
3365        } else {
3366            ordered_hits.push((*line_num, section));
3367        }
3368    }
3369
3370    let aggregates: Vec<HitSectionAggregate<'_>> = if dedupe {
3371        let mut ranked: Vec<HitSectionAggregate<'_>> = by_section.into_values().collect();
3372        ranked.sort_by(|lhs, rhs| {
3373            rhs.hit_count
3374                .cmp(&lhs.hit_count)
3375                .then(lhs.section.token_estimate.cmp(&rhs.section.token_estimate))
3376                .then(lhs.first_line.cmp(&rhs.first_line))
3377                .then(lhs.section.line_start.cmp(&rhs.section.line_start))
3378        });
3379        ranked
3380    } else {
3381        ordered_hits.sort_by(|lhs, rhs| {
3382            lhs.0
3383                .cmp(&rhs.0)
3384                .then(lhs.1.line_start.cmp(&rhs.1.line_start))
3385                .then(lhs.1.id.cmp(&rhs.1.id))
3386        });
3387        ordered_hits
3388            .into_iter()
3389            .map(|(first_line, section)| HitSectionAggregate {
3390                section,
3391                hit_count: 1,
3392                first_line,
3393            })
3394            .collect()
3395    };
3396
3397    let mut collected = Vec::new();
3398    for aggregate in aggregates {
3399        let section = aggregate.section;
3400        let body_lines = if include_children {
3401            section.extract_content(lines)
3402        } else {
3403            section.extract_direct_content(lines)
3404        };
3405        let body = body_lines.join("\n");
3406        let preview = preview_lines.map(|n| {
3407            body_lines
3408                .iter()
3409                .filter(|l| !l.trim().is_empty())
3410                .take(n)
3411                .cloned()
3412                .collect::<Vec<_>>()
3413                .join("\n")
3414        });
3415
3416        collected.push(SectionsSectionOutput {
3417            id: section.id.clone(),
3418            title: section.title.clone(),
3419            heading_path: section.path.clone(),
3420            line_start: section.line_start,
3421            line_end: section.line_end,
3422            token_estimate: estimate_tokens(&body),
3423            body,
3424            preview,
3425        });
3426    }
3427
3428    collected
3429}
3430
3431fn collect_all_sections(
3432    sections: &[Section],
3433    lines: &[String],
3434    include_children: bool,
3435    preview_lines: Option<usize>,
3436    max_depth: Option<usize>,
3437    current_depth: usize,
3438    result: &mut Vec<SectionsSectionOutput>,
3439) {
3440    for section in sections {
3441        if section.title == "<preamble>" {
3442            continue;
3443        }
3444        if let Some(max) = max_depth {
3445            if current_depth >= max {
3446                continue;
3447            }
3448        }
3449        let body_lines = if include_children {
3450            section.extract_content(lines)
3451        } else {
3452            section.extract_direct_content(lines)
3453        };
3454        let body = body_lines.join("\n");
3455        let preview = preview_lines.map(|n| {
3456            body_lines
3457                .iter()
3458                .filter(|l| !l.trim().is_empty())
3459                .take(n)
3460                .cloned()
3461                .collect::<Vec<_>>()
3462                .join("\n")
3463        });
3464        result.push(SectionsSectionOutput {
3465            id: section.id.clone(),
3466            title: section.title.clone(),
3467            heading_path: section.path.clone(),
3468            line_start: section.line_start,
3469            line_end: section.line_end,
3470            token_estimate: estimate_tokens(&body),
3471            body,
3472            preview,
3473        });
3474        collect_all_sections(
3475            &section.children,
3476            lines,
3477            include_children,
3478            preview_lines,
3479            max_depth,
3480            current_depth + 1,
3481            result,
3482        );
3483    }
3484}
3485
3486fn enrich_search_results(
3487    results: &mut [crate::render::SearchResult],
3488    with_content: bool,
3489    preview_lines: Option<usize>,
3490) -> Result<()> {
3491    let mut docs: HashMap<String, crate::parse::ParsedMarkdown> = HashMap::new();
3492
3493    for result in results.iter_mut() {
3494        let parsed = if let Some(parsed) = docs.get(&result.path) {
3495            parsed
3496        } else {
3497            let loaded = load_markdown(&result.path)?;
3498            docs.insert(result.path.clone(), loaded);
3499            docs.get(&result.path).expect("inserted parsed markdown")
3500        };
3501
3502        let Some(section) = parsed.doc.find_section_by_id(&result.section_id) else {
3503            continue;
3504        };
3505        let body_lines = section.extract_direct_content(&parsed.lines);
3506        if with_content {
3507            result.body = Some(body_lines.join("\n"));
3508        }
3509        if let Some(n) = preview_lines {
3510            result.preview = Some(
3511                body_lines
3512                    .iter()
3513                    .filter(|line| !line.trim().is_empty())
3514                    .take(n)
3515                    .cloned()
3516                    .collect::<Vec<_>>()
3517                    .join("\n"),
3518            );
3519        }
3520    }
3521
3522    Ok(())
3523}
3524
3525fn find_deepest_section_for_line(sections: &[Section], line_num: usize) -> Option<&Section> {
3526    for section in sections {
3527        if line_num < section.line_start || line_num > section.line_end {
3528            continue;
3529        }
3530        if let Some(child) = find_deepest_section_for_line(&section.children, line_num) {
3531            return Some(child);
3532        }
3533        return Some(section);
3534    }
3535    None
3536}
3537
3538// --- JSON output types ---
3539
3540#[derive(Serialize)]
3541struct TreeJsonOutput {
3542    schema_version: u32,
3543    path: String,
3544    line_count: usize,
3545    byte_count: usize,
3546    char_count: usize,
3547    word_count: usize,
3548    token_estimate: usize,
3549    sections: Vec<SectionJsonOutput>,
3550}
3551
3552#[derive(Serialize)]
3553struct TreeFileJsonOutput {
3554    path: String,
3555    line_count: usize,
3556    byte_count: usize,
3557    char_count: usize,
3558    word_count: usize,
3559    token_estimate: usize,
3560    sections: Vec<SectionJsonOutput>,
3561}
3562
3563#[derive(Serialize)]
3564struct TreeMultiJsonOutput {
3565    schema_version: u32,
3566    files: Vec<TreeFileJsonOutput>,
3567}
3568
3569#[derive(Serialize)]
3570struct SectionJsonOutput {
3571    id: String,
3572    title: String,
3573    level: u8,
3574    path: Vec<String>,
3575    line_start: usize,
3576    line_end: usize,
3577    token_estimate: usize,
3578    #[serde(skip_serializing_if = "Vec::is_empty")]
3579    children: Vec<SectionJsonOutput>,
3580}
3581
3582#[derive(Serialize)]
3583struct ReadJsonOutput {
3584    schema_version: u32,
3585    path: String,
3586    selector: ReadSelector,
3587    section: SectionJsonOutput,
3588    content: String,
3589    truncated: bool,
3590}
3591
3592#[derive(Serialize)]
3593struct ReadSelector {
3594    #[serde(rename = "type")]
3595    r#type: String,
3596    value: String,
3597}
3598
3599#[derive(Serialize)]
3600struct SearchJsonOutput {
3601    schema_version: u32,
3602    query: String,
3603    root: String,
3604    results: Vec<SearchJsonResult>,
3605}
3606
3607#[derive(Serialize)]
3608struct SearchJsonResult {
3609    path: String,
3610    section_id: String,
3611    section_title: String,
3612    section_path: Vec<String>,
3613    line_start: usize,
3614    line_end: usize,
3615    token_estimate: usize,
3616    match_count: usize,
3617    body: Option<String>,
3618    preview: Option<String>,
3619    snippets: Vec<SearchJsonSnippet>,
3620}
3621
3622#[derive(Serialize)]
3623struct SearchJsonSnippet {
3624    line_start: usize,
3625    line_end: usize,
3626    text: String,
3627}
3628
3629#[derive(Serialize)]
3630struct ScoutJsonOutput {
3631    schema_version: u32,
3632    root: String,
3633    question: String,
3634    token_budget: usize,
3635    candidate_count: usize,
3636    queries: Vec<String>,
3637    candidates: Vec<ScoutCandidate>,
3638    rendered_text: String,
3639}
3640
3641#[derive(Serialize)]
3642struct PackJsonOutput {
3643    schema_version: u32,
3644    token_budget: usize,
3645    token_estimate: usize,
3646    truncated: bool,
3647    included: Vec<PackJsonIncluded>,
3648    content: String,
3649}
3650
3651#[derive(Serialize)]
3652struct PackJsonIncluded {
3653    path: String,
3654    section_id: String,
3655    section_path: Vec<String>,
3656    line_start: usize,
3657    line_end: usize,
3658    token_estimate: usize,
3659    truncated: bool,
3660}
3661
3662#[derive(Serialize)]
3663struct StatsJsonOutput {
3664    schema_version: u32,
3665    entries: Vec<StatsJsonEntry>,
3666}
3667
3668#[derive(Serialize)]
3669struct StatsJsonEntry {
3670    path: String,
3671    lines: usize,
3672    words: usize,
3673    tokens: usize,
3674}
3675
3676#[derive(Serialize)]
3677struct SectionsJsonOutput {
3678    schema_version: u32,
3679    files: Vec<SectionsJsonFile>,
3680}
3681
3682#[derive(Serialize)]
3683struct SectionsJsonFile {
3684    path: String,
3685    sections: Vec<SectionsJsonSection>,
3686}
3687
3688#[derive(Serialize)]
3689struct SectionsJsonSection {
3690    id: String,
3691    title: String,
3692    #[serde(skip_serializing_if = "Option::is_none")]
3693    heading_path: Option<Vec<String>>,
3694    #[serde(skip_serializing_if = "Option::is_none")]
3695    line_start: Option<usize>,
3696    #[serde(skip_serializing_if = "Option::is_none")]
3697    line_end: Option<usize>,
3698    token_estimate: usize,
3699    #[serde(skip_serializing_if = "Option::is_none")]
3700    body: Option<String>,
3701    #[serde(skip_serializing_if = "Option::is_none")]
3702    preview: Option<String>,
3703}
3704
3705// --- Helper functions ---
3706
3707fn serialize_sections(
3708    sections: &[Section],
3709    max_depth: Option<usize>,
3710    include_preamble: bool,
3711    current_depth: usize,
3712) -> Vec<SectionJsonOutput> {
3713    let mut result = Vec::new();
3714    for section in sections {
3715        if section.title == "<preamble>" && !include_preamble {
3716            continue;
3717        }
3718        let children = if let Some(max) = max_depth {
3719            if current_depth + 1 < max {
3720                serialize_sections(
3721                    &section.children,
3722                    max_depth,
3723                    include_preamble,
3724                    current_depth + 1,
3725                )
3726            } else {
3727                Vec::new()
3728            }
3729        } else {
3730            serialize_sections(
3731                &section.children,
3732                max_depth,
3733                include_preamble,
3734                current_depth + 1,
3735            )
3736        };
3737
3738        result.push(SectionJsonOutput {
3739            id: section.id.clone(),
3740            title: section.title.clone(),
3741            level: section.level,
3742            path: section.path.clone(),
3743            line_start: section.line_start,
3744            line_end: section.line_end,
3745            token_estimate: section.token_estimate,
3746            children,
3747        });
3748    }
3749    result
3750}
3751
3752fn truncate_content_to_tokens(content: &str, max_tokens: usize) -> String {
3753    truncate_to_tokens(content, max_tokens, TRUNCATION_NOTICE)
3754}
mdlens/cli.rs

mdlens/
cli.rs