Skip to main content

aptu_coder_core/
analyze.rs

1// SPDX-FileCopyrightText: 2026 aptu-coder contributors
2// SPDX-License-Identifier: Apache-2.0
3//! Main analysis engine for extracting code structure from files and directories.
4//!
5//! Implements the five MCP tools: `analyze_directory` (Overview), `analyze_file` (`FileDetails`),
6//! `analyze_symbol` (call graph), `analyze_module` (lightweight index), and `analyze_raw` (raw file read). Handles parallel processing and cancellation.
7
8use crate::formatter::{
9    format_file_details, format_focused_internal, format_focused_summary_internal, format_structure,
10};
11use crate::graph::{CallGraph, InternalCallChain};
12use crate::lang::{language_for_extension, supported_languages};
13use crate::parser::{ElementExtractor, SemanticExtractor};
14use crate::test_detection::is_test_file;
15use crate::traversal::{WalkEntry, walk_directory};
16use crate::types::{
17    AnalysisMode, FileInfo, ImplTraitInfo, ImportInfo, SemanticAnalysis, SymbolMatchMode,
18};
19use rayon::prelude::*;
20#[cfg(feature = "schemars")]
21use schemars::JsonSchema;
22use serde::{Deserialize, Serialize};
23use std::path::{Path, PathBuf};
24use std::sync::Arc;
25use std::sync::atomic::{AtomicUsize, Ordering};
26use std::time::Instant;
27use thiserror::Error;
28use tokio_util::sync::CancellationToken;
29use tracing::instrument;
30
31pub const MAX_FILE_SIZE_BYTES: u64 = 10_000_000;
32
33#[derive(Debug, Error)]
34#[non_exhaustive]
35pub enum AnalyzeError {
36    #[error("Traversal error: {0}")]
37    Traversal(#[from] crate::traversal::TraversalError),
38    #[error("Parser error: {0}")]
39    Parser(#[from] crate::parser::ParserError),
40    #[error("Graph error: {0}")]
41    Graph(#[from] crate::graph::GraphError),
42    #[error("Formatter error: {0}")]
43    Formatter(#[from] crate::formatter::FormatterError),
44    #[error("Analysis cancelled")]
45    Cancelled,
46    #[error("unsupported language: {0}")]
47    UnsupportedLanguage(String),
48    #[error("I/O error: {0}")]
49    Io(#[from] std::io::Error),
50    #[error("invalid range: start ({start}) > end ({end}); file has {total} lines")]
51    InvalidRange {
52        start: usize,
53        end: usize,
54        total: usize,
55    },
56    #[error("path is a directory, not a file: {0}")]
57    NotAFile(PathBuf),
58    #[error(
59        "file has {total_lines} lines; provide start_line and end_line, or call analyze_module first to locate the range"
60    )]
61    RangelessLargeFile { total_lines: usize },
62}
63
64/// Result of directory analysis containing both formatted output and file data.
65#[derive(Debug, Clone, Serialize)]
66#[cfg_attr(feature = "schemars", derive(JsonSchema))]
67#[non_exhaustive]
68pub struct AnalysisOutput {
69    #[cfg_attr(
70        feature = "schemars",
71        schemars(description = "Formatted text representation of the analysis")
72    )]
73    pub formatted: String,
74    #[cfg_attr(
75        feature = "schemars",
76        schemars(description = "List of files analyzed in the directory")
77    )]
78    pub files: Vec<FileInfo>,
79    /// Walk entries used internally for summary generation; not serialized.
80    #[serde(skip)]
81    #[cfg_attr(feature = "schemars", schemars(skip))]
82    pub entries: Vec<WalkEntry>,
83    /// Subtree file counts computed from an unbounded walk; used by `format_summary`; not serialized.
84    #[serde(skip)]
85    #[cfg_attr(feature = "schemars", schemars(skip))]
86    pub subtree_counts: Option<Vec<(std::path::PathBuf, usize)>>,
87    #[serde(skip_serializing_if = "Option::is_none")]
88    #[cfg_attr(
89        feature = "schemars",
90        schemars(
91            description = "Opaque cursor token for the next page of results (absent when no more results)"
92        )
93    )]
94    pub next_cursor: Option<String>,
95}
96
97/// Result of file-level semantic analysis.
98#[derive(Debug, Clone, Serialize)]
99#[cfg_attr(feature = "schemars", derive(JsonSchema))]
100#[non_exhaustive]
101pub struct FileAnalysisOutput {
102    #[cfg_attr(
103        feature = "schemars",
104        schemars(description = "Formatted text representation of the analysis")
105    )]
106    pub formatted: String,
107    #[cfg_attr(
108        feature = "schemars",
109        schemars(description = "Semantic analysis data including functions, classes, and imports")
110    )]
111    pub semantic: SemanticAnalysis,
112    #[cfg_attr(
113        feature = "schemars",
114        schemars(description = "Total line count of the analyzed file")
115    )]
116    #[cfg_attr(
117        feature = "schemars",
118        schemars(schema_with = "crate::schema_helpers::integer_schema")
119    )]
120    pub line_count: usize,
121    #[serde(skip_serializing_if = "Option::is_none")]
122    #[cfg_attr(
123        feature = "schemars",
124        schemars(
125            description = "Opaque cursor token for the next page of results (absent when no more results)"
126        )
127    )]
128    pub next_cursor: Option<String>,
129}
130
131impl FileAnalysisOutput {
132    /// Create a new `FileAnalysisOutput`.
133    #[must_use]
134    pub fn new(
135        formatted: String,
136        semantic: SemanticAnalysis,
137        line_count: usize,
138        next_cursor: Option<String>,
139    ) -> Self {
140        Self {
141            formatted,
142            semantic,
143            line_count,
144            next_cursor,
145        }
146    }
147}
148#[instrument(skip_all, fields(path = %root.display()))]
149// public API; callers expect owned semantics
150#[allow(clippy::needless_pass_by_value)]
151pub fn analyze_directory_with_progress(
152    root: &Path,
153    entries: Vec<WalkEntry>,
154    progress: Arc<AtomicUsize>,
155    ct: CancellationToken,
156) -> Result<AnalysisOutput, AnalyzeError> {
157    // Check if already cancelled
158    if ct.is_cancelled() {
159        return Err(AnalyzeError::Cancelled);
160    }
161
162    // Detect language from file extension
163    let file_entries: Vec<&WalkEntry> = entries
164        .iter()
165        .filter(|e| !e.is_dir && !e.is_symlink)
166        .collect();
167
168    let start = Instant::now();
169    tracing::debug!(file_count = file_entries.len(), root = %root.display(), "analysis start");
170
171    // Parallel analysis of files
172    let analysis_results: Vec<FileInfo> = file_entries
173        .par_iter()
174        .filter_map(|entry| {
175            // Check cancellation per file
176            if ct.is_cancelled() {
177                return None;
178            }
179
180            let path_str = entry.path.display().to_string();
181
182            // Detect language from extension
183            let ext = entry.path.extension().and_then(|e| e.to_str());
184
185            // Check file size before reading
186            if entry.path.metadata().map(|m| m.len()).unwrap_or(0) > MAX_FILE_SIZE_BYTES {
187                tracing::debug!("skipping large file: {}", entry.path.display());
188                progress.fetch_add(1, Ordering::Relaxed);
189                return None;
190            }
191
192            // Try to read file content; skip binary or unreadable files
193            let Ok(source) = std::fs::read_to_string(&entry.path) else {
194                progress.fetch_add(1, Ordering::Relaxed);
195                return None;
196            };
197
198            // Count lines
199            let line_count = source.lines().count();
200
201            // Detect language and extract counts
202            let (language, function_count, class_count) = if let Some(ext_str) = ext {
203                if let Some(lang) = language_for_extension(ext_str) {
204                    let lang_str = lang.to_string();
205                    match ElementExtractor::extract_with_depth(&source, &lang_str) {
206                        Ok((func_count, class_count)) => (lang_str, func_count, class_count),
207                        Err(_) => (lang_str, 0, 0),
208                    }
209                } else {
210                    ("unknown".to_string(), 0, 0)
211                }
212            } else {
213                ("unknown".to_string(), 0, 0)
214            };
215
216            progress.fetch_add(1, Ordering::Relaxed);
217
218            let is_test = is_test_file(&entry.path);
219
220            Some(FileInfo {
221                path: path_str,
222                line_count,
223                function_count,
224                class_count,
225                language,
226                is_test,
227            })
228        })
229        .collect();
230
231    // Check if cancelled after parallel processing
232    if ct.is_cancelled() {
233        return Err(AnalyzeError::Cancelled);
234    }
235
236    tracing::debug!(
237        file_count = file_entries.len(),
238        duration_ms = u64::try_from(start.elapsed().as_millis()).unwrap_or(u64::MAX),
239        "analysis complete"
240    );
241
242    // Format output
243    let formatted = format_structure(&entries, &analysis_results, None);
244
245    Ok(AnalysisOutput {
246        formatted,
247        files: analysis_results,
248        entries,
249        next_cursor: None,
250        subtree_counts: None,
251    })
252}
253
254/// Analyze a directory structure and return formatted output and file data.
255#[instrument(skip_all, fields(path = %root.display()))]
256pub fn analyze_directory(
257    root: &Path,
258    max_depth: Option<u32>,
259) -> Result<AnalysisOutput, AnalyzeError> {
260    let entries = walk_directory(root, max_depth)?;
261    let counter = Arc::new(AtomicUsize::new(0));
262    let ct = CancellationToken::new();
263    analyze_directory_with_progress(root, entries, counter, ct)
264}
265
266/// Determine analysis mode based on parameters and path.
267#[must_use]
268pub fn determine_mode(path: &str, focus: Option<&str>) -> AnalysisMode {
269    if focus.is_some() {
270        return AnalysisMode::SymbolFocus;
271    }
272
273    let path_obj = Path::new(path);
274    if path_obj.is_dir() {
275        AnalysisMode::Overview
276    } else {
277        AnalysisMode::FileDetails
278    }
279}
280
281/// Analyze a single file and return semantic analysis with formatted output.
282#[instrument(skip_all, fields(path))]
283pub fn analyze_file(
284    path: &str,
285    ast_recursion_limit: Option<usize>,
286) -> Result<FileAnalysisOutput, AnalyzeError> {
287    let start = Instant::now();
288
289    // Check file size before reading
290    if Path::new(path).metadata().map(|m| m.len()).unwrap_or(0) > MAX_FILE_SIZE_BYTES {
291        tracing::debug!("skipping large file: {}", path);
292        return Err(AnalyzeError::Parser(
293            crate::parser::ParserError::ParseError("file too large".to_string()),
294        ));
295    }
296
297    let source = std::fs::read_to_string(path)
298        .map_err(|e| AnalyzeError::Parser(crate::parser::ParserError::ParseError(e.to_string())))?;
299
300    let line_count = source.lines().count();
301
302    // Detect language from extension
303    let ext = Path::new(path)
304        .extension()
305        .and_then(|e| e.to_str())
306        .and_then(language_for_extension)
307        .map_or_else(|| "unknown".to_string(), std::string::ToString::to_string);
308
309    // Extract semantic information
310    let mut semantic = SemanticExtractor::extract(&source, &ext, ast_recursion_limit)?;
311
312    // Populate the file path on references now that the path is known
313    for r in &mut semantic.references {
314        r.location = path.to_string();
315    }
316
317    // Resolve Python wildcard imports
318    if ext == "python" {
319        resolve_wildcard_imports(Path::new(path), &mut semantic.imports);
320    }
321
322    // Detect if this is a test file
323    let is_test = is_test_file(Path::new(path));
324
325    // Extract parent directory for relative path display
326    let parent_dir = Path::new(path).parent();
327
328    // Format output
329    let formatted = format_file_details(path, &semantic, line_count, is_test, parent_dir);
330
331    tracing::debug!(path = %path, language = %ext, functions = semantic.functions.len(), classes = semantic.classes.len(), imports = semantic.imports.len(), duration_ms = u64::try_from(start.elapsed().as_millis()).unwrap_or(u64::MAX), "file analysis complete");
332
333    Ok(FileAnalysisOutput::new(
334        formatted, semantic, line_count, None,
335    ))
336}
337
338/// Analyze source code from a string buffer without filesystem access.
339///
340/// This function analyzes in-memory source code by language identifier. The `language`
341/// parameter can be either a language name (e.g., `"rust"`, `"python"`, `"go"`) or a file
342/// extension (e.g., `"rs"`, `"py"`).
343///
344/// Accepted language identifiers depend on compiled features. Use [`supported_languages()`] to
345/// discover the available language names at runtime, and [`language_for_extension()`] to resolve
346/// a file extension to its supported language identifier.
347///
348/// # Arguments
349///
350/// * `source` - The source code to analyze
351/// * `language` - The language identifier (language name or extension)
352/// * `ast_recursion_limit` - Optional limit for AST traversal depth
353///
354/// # Returns
355///
356/// - `Ok(FileAnalysisOutput)` on success
357/// - `Err(AnalyzeError::UnsupportedLanguage)` if the language is not recognized
358/// - `Err(AnalyzeError::Parser)` if parsing fails
359///
360/// # Notes
361///
362/// - Python wildcard import resolution is skipped for in-memory analysis (no filesystem path available)
363/// - The formatted output uses the standard file-details formatter, so it includes a `FILE:` header with an empty path
364#[inline]
365pub fn analyze_str(
366    source: &str,
367    language: &str,
368    ast_recursion_limit: Option<usize>,
369) -> Result<FileAnalysisOutput, AnalyzeError> {
370    // Resolve language: first try as a file extension, then as a language name
371    // (case-insensitive match against supported_languages()).
372    let lang = language_for_extension(language).or_else(|| {
373        let lower = language.to_ascii_lowercase();
374        supported_languages()
375            .iter()
376            .find(|&&name| name == lower)
377            .copied()
378    });
379    let lang = lang.ok_or_else(|| AnalyzeError::UnsupportedLanguage(language.to_string()))?;
380
381    // Extract semantic information
382    let mut semantic = SemanticExtractor::extract(source, lang, ast_recursion_limit)?;
383
384    // Populate a stable in-memory sentinel on all reference locations
385    for r in &mut semantic.references {
386        r.location = "<memory>".to_string();
387    }
388
389    // Count lines in the source
390    let line_count = source.lines().count();
391
392    // Format output with empty path (no filesystem access)
393    let formatted = format_file_details("", &semantic, line_count, false, None);
394
395    Ok(FileAnalysisOutput::new(
396        formatted, semantic, line_count, None,
397    ))
398}
399
400/// Single entry in a call chain (depth-1 direct caller or callee).
401#[non_exhaustive]
402#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
403#[cfg_attr(feature = "schemars", derive(JsonSchema))]
404pub struct CallChainEntry {
405    #[cfg_attr(
406        feature = "schemars",
407        schemars(description = "Symbol name of the caller or callee")
408    )]
409    pub symbol: String,
410    #[cfg_attr(
411        feature = "schemars",
412        schemars(description = "File path relative to the repository root")
413    )]
414    pub file: String,
415    #[cfg_attr(
416        feature = "schemars",
417        schemars(
418            description = "Line number of the definition or call site (1-indexed)",
419            schema_with = "crate::schema_helpers::integer_schema"
420        )
421    )]
422    pub line: usize,
423}
424
425/// Result of focused symbol analysis.
426#[derive(Debug, Serialize)]
427#[cfg_attr(feature = "schemars", derive(JsonSchema))]
428#[non_exhaustive]
429pub struct FocusedAnalysisOutput {
430    #[cfg_attr(
431        feature = "schemars",
432        schemars(description = "Formatted text representation of the call graph analysis")
433    )]
434    pub formatted: String,
435    #[serde(skip_serializing_if = "Option::is_none")]
436    #[cfg_attr(
437        feature = "schemars",
438        schemars(
439            description = "Opaque cursor token for the next page of results (absent when no more results)"
440        )
441    )]
442    pub next_cursor: Option<String>,
443    /// Production caller chains (partitioned from incoming chains, excluding test callers).
444    /// Not serialized; used for pagination in lib.rs.
445    #[serde(skip)]
446    #[cfg_attr(feature = "schemars", schemars(skip))]
447    pub prod_chains: Vec<InternalCallChain>,
448    /// Test caller chains. Not serialized; used for pagination summary in lib.rs.
449    #[serde(skip)]
450    #[cfg_attr(feature = "schemars", schemars(skip))]
451    pub test_chains: Vec<InternalCallChain>,
452    /// Outgoing (callee) chains. Not serialized; used for pagination in lib.rs.
453    #[serde(skip)]
454    #[cfg_attr(feature = "schemars", schemars(skip))]
455    pub outgoing_chains: Vec<InternalCallChain>,
456    /// Number of definitions for the symbol. Not serialized; used for pagination headers.
457    #[serde(skip)]
458    #[cfg_attr(feature = "schemars", schemars(skip))]
459    pub def_count: usize,
460    /// Total unique callers before `impl_only` filter. Not serialized; used for FILTER header.
461    #[serde(skip)]
462    #[cfg_attr(feature = "schemars", schemars(skip))]
463    pub unfiltered_caller_count: usize,
464    /// Unique callers after `impl_only` filter. Not serialized; used for FILTER header.
465    #[serde(skip)]
466    #[cfg_attr(feature = "schemars", schemars(skip))]
467    pub impl_trait_caller_count: usize,
468    /// Direct (depth-1) production callers. `follow_depth` does not affect this field.
469    #[serde(skip_serializing_if = "Option::is_none")]
470    pub callers: Option<Vec<CallChainEntry>>,
471    /// Direct (depth-1) test callers. `follow_depth` does not affect this field.
472    #[serde(skip_serializing_if = "Option::is_none")]
473    pub test_callers: Option<Vec<CallChainEntry>>,
474    /// Direct (depth-1) callees. `follow_depth` does not affect this field.
475    #[serde(skip_serializing_if = "Option::is_none")]
476    pub callees: Option<Vec<CallChainEntry>>,
477    /// Definition and use sites for the symbol.
478    #[serde(default)]
479    pub def_use_sites: Vec<crate::types::DefUseSite>,
480}
481
482/// Parameters for focused symbol analysis. Groups high-arity parameters to keep
483/// function signatures under clippy's default 7-argument threshold.
484#[derive(Clone)]
485pub struct FocusedAnalysisConfig {
486    pub focus: String,
487    pub match_mode: SymbolMatchMode,
488    pub follow_depth: u32,
489    pub max_depth: Option<u32>,
490    pub ast_recursion_limit: Option<usize>,
491    pub use_summary: bool,
492    pub impl_only: Option<bool>,
493    pub def_use: bool,
494}
495
496/// Internal parameters for focused analysis phases.
497#[derive(Clone)]
498struct InternalFocusedParams {
499    focus: String,
500    match_mode: SymbolMatchMode,
501    follow_depth: u32,
502    ast_recursion_limit: Option<usize>,
503    use_summary: bool,
504    impl_only: Option<bool>,
505    def_use: bool,
506}
507
508/// Type alias for analysis results: (`file_path`, `semantic_analysis`) pairs and impl-trait info.
509type FileAnalysisBatch = (Vec<(PathBuf, SemanticAnalysis)>, Vec<ImplTraitInfo>);
510
511/// Phase 1: Collect semantic analysis for all files in parallel.
512fn collect_file_analysis(
513    entries: &[WalkEntry],
514    progress: &Arc<AtomicUsize>,
515    ct: &CancellationToken,
516    ast_recursion_limit: Option<usize>,
517) -> Result<FileAnalysisBatch, AnalyzeError> {
518    // Check if already cancelled
519    if ct.is_cancelled() {
520        return Err(AnalyzeError::Cancelled);
521    }
522
523    // Use pre-walked entries (passed by caller)
524    // Collect semantic analysis for all files in parallel
525    let file_entries: Vec<&WalkEntry> = entries
526        .iter()
527        .filter(|e| !e.is_dir && !e.is_symlink)
528        .collect();
529
530    let analysis_results: Vec<(PathBuf, SemanticAnalysis)> = file_entries
531        .par_iter()
532        .filter_map(|entry| {
533            // Check cancellation per file
534            if ct.is_cancelled() {
535                return None;
536            }
537
538            let ext = entry.path.extension().and_then(|e| e.to_str());
539
540            // Check file size before reading
541            if entry.path.metadata().map(|m| m.len()).unwrap_or(0) > MAX_FILE_SIZE_BYTES {
542                tracing::debug!("skipping large file: {}", entry.path.display());
543                progress.fetch_add(1, Ordering::Relaxed);
544                return None;
545            }
546
547            // Try to read file content
548            let Ok(source) = std::fs::read_to_string(&entry.path) else {
549                progress.fetch_add(1, Ordering::Relaxed);
550                return None;
551            };
552
553            // Detect language and extract semantic information
554            let language = if let Some(ext_str) = ext {
555                language_for_extension(ext_str)
556                    .map_or_else(|| "unknown".to_string(), std::string::ToString::to_string)
557            } else {
558                "unknown".to_string()
559            };
560
561            if let Ok(mut semantic) =
562                SemanticExtractor::extract(&source, &language, ast_recursion_limit)
563            {
564                // Populate file path on references
565                for r in &mut semantic.references {
566                    r.location = entry.path.display().to_string();
567                }
568                // Populate file path on impl_traits (already extracted during SemanticExtractor::extract)
569                for trait_info in &mut semantic.impl_traits {
570                    trait_info.path.clone_from(&entry.path);
571                }
572                progress.fetch_add(1, Ordering::Relaxed);
573                Some((entry.path.clone(), semantic))
574            } else {
575                progress.fetch_add(1, Ordering::Relaxed);
576                None
577            }
578        })
579        .collect();
580
581    // Check if cancelled after parallel processing
582    if ct.is_cancelled() {
583        return Err(AnalyzeError::Cancelled);
584    }
585
586    // Collect all impl-trait info from analysis results
587    let all_impl_traits: Vec<ImplTraitInfo> = analysis_results
588        .iter()
589        .flat_map(|(_, sem)| sem.impl_traits.iter().cloned())
590        .collect();
591
592    Ok((analysis_results, all_impl_traits))
593}
594
595/// Phase 2: Build call graph from analysis results.
596fn build_call_graph(
597    analysis_results: Vec<(PathBuf, SemanticAnalysis)>,
598    all_impl_traits: &[ImplTraitInfo],
599) -> Result<CallGraph, AnalyzeError> {
600    // Build call graph. Always build without impl_only filter first so we can
601    // record the unfiltered caller count before discarding those edges.
602    CallGraph::build_from_results(
603        analysis_results,
604        all_impl_traits,
605        false, // filter applied below after counting
606    )
607    .map_err(std::convert::Into::into)
608}
609
610/// Phase 3: Resolve symbol and apply `impl_only` filter.
611/// Returns (`resolved_focus`, `unfiltered_caller_count`, `impl_trait_caller_count`).
612/// CRITICAL: Must capture `unfiltered_caller_count` BEFORE `retain()`, then apply `retain()`,
613/// then compute `impl_trait_caller_count`.
614fn resolve_symbol(
615    graph: &mut CallGraph,
616    params: &InternalFocusedParams,
617) -> Result<(String, usize, usize), AnalyzeError> {
618    // Resolve symbol name using the requested match mode.
619    let resolved_focus = if params.match_mode == SymbolMatchMode::Exact {
620        let exists = graph.definitions.contains_key(&params.focus)
621            || graph.callers.contains_key(&params.focus)
622            || graph.callees.contains_key(&params.focus);
623        if exists {
624            params.focus.clone()
625        } else {
626            return Err(crate::graph::GraphError::SymbolNotFound {
627                symbol: params.focus.clone(),
628                hint: "Try match_mode=insensitive for a case-insensitive search, or match_mode=prefix to list symbols starting with this name.".to_string(),
629            }
630            .into());
631        }
632    } else {
633        graph.resolve_symbol_indexed(&params.focus, &params.match_mode)?
634    };
635
636    // Count unique callers for the focus symbol before applying impl_only filter.
637    let unfiltered_caller_count = graph.callers.get(&resolved_focus).map_or(0, |edges| {
638        edges
639            .iter()
640            .map(|e| &e.neighbor_name)
641            .collect::<std::collections::HashSet<_>>()
642            .len()
643    });
644
645    // Apply impl_only filter now if requested, then count filtered callers.
646    // Filter all caller adjacency lists so traversal and formatting are consistently
647    // restricted to impl-trait edges regardless of follow_depth.
648    let impl_trait_caller_count = if params.impl_only.unwrap_or(false) {
649        for edges in graph.callers.values_mut() {
650            edges.retain(|e| e.is_impl_trait);
651        }
652        graph.callers.get(&resolved_focus).map_or(0, |edges| {
653            edges
654                .iter()
655                .map(|e| &e.neighbor_name)
656                .collect::<std::collections::HashSet<_>>()
657                .len()
658        })
659    } else {
660        unfiltered_caller_count
661    };
662
663    Ok((
664        resolved_focus,
665        unfiltered_caller_count,
666        impl_trait_caller_count,
667    ))
668}
669
670/// Type alias for `compute_chains` return type: (`formatted_output`, `prod_chains`, `test_chains`, `outgoing_chains`, `def_count`).
671type ChainComputeResult = (
672    String,
673    Vec<InternalCallChain>,
674    Vec<InternalCallChain>,
675    Vec<InternalCallChain>,
676    usize,
677);
678
679/// Helper function to convert InternalCallChain data to CallChainEntry vec.
680/// Takes the first (depth-1) element of each chain and converts it to a CallChainEntry.
681/// Returns None if chains is empty, otherwise returns a vec of up to 10 entries.
682fn chains_to_entries(
683    chains: &[InternalCallChain],
684    root: Option<&std::path::Path>,
685) -> Option<Vec<CallChainEntry>> {
686    if chains.is_empty() {
687        return None;
688    }
689    let entries: Vec<CallChainEntry> = chains
690        .iter()
691        .take(10)
692        .filter_map(|chain| {
693            let (symbol, path, line) = chain.chain.first()?;
694            let file = match root {
695                Some(root) => path
696                    .strip_prefix(root)
697                    .unwrap_or(path.as_path())
698                    .to_string_lossy()
699                    .into_owned(),
700                None => path.to_string_lossy().into_owned(),
701            };
702            Some(CallChainEntry {
703                symbol: symbol.clone(),
704                file,
705                line: *line,
706            })
707        })
708        .collect();
709    if entries.is_empty() {
710        None
711    } else {
712        Some(entries)
713    }
714}
715
716/// Phase 4: Compute chains and format output.
717fn compute_chains(
718    graph: &CallGraph,
719    resolved_focus: &str,
720    root: &Path,
721    params: &InternalFocusedParams,
722    unfiltered_caller_count: usize,
723    impl_trait_caller_count: usize,
724    def_use_sites: &[crate::types::DefUseSite],
725) -> Result<ChainComputeResult, AnalyzeError> {
726    // Compute chain data for pagination (always, regardless of summary mode)
727    let def_count = graph.definitions.get(resolved_focus).map_or(0, Vec::len);
728    let incoming_chains = graph.find_incoming_chains(resolved_focus, params.follow_depth)?;
729    let outgoing_chains = graph.find_outgoing_chains(resolved_focus, params.follow_depth)?;
730
731    let (prod_chains, test_chains): (Vec<_>, Vec<_>) =
732        incoming_chains.iter().cloned().partition(|chain| {
733            chain
734                .chain
735                .first()
736                .is_none_or(|(name, path, _)| !is_test_file(path) && !name.starts_with("test_"))
737        });
738
739    // Format output with pre-computed chains
740    let mut formatted = if params.use_summary {
741        format_focused_summary_internal(
742            graph,
743            resolved_focus,
744            params.follow_depth,
745            Some(root),
746            Some(&incoming_chains),
747            Some(&outgoing_chains),
748            def_use_sites,
749        )?
750    } else {
751        format_focused_internal(
752            graph,
753            resolved_focus,
754            params.follow_depth,
755            Some(root),
756            Some(&incoming_chains),
757            Some(&outgoing_chains),
758            def_use_sites,
759        )?
760    };
761
762    // Add FILTER header if impl_only filter was applied
763    if params.impl_only.unwrap_or(false) {
764        let filter_header = format!(
765            "FILTER: impl_only=true ({impl_trait_caller_count} of {unfiltered_caller_count} callers shown)\n",
766        );
767        formatted = format!("{filter_header}{formatted}");
768    }
769
770    Ok((
771        formatted,
772        prod_chains,
773        test_chains,
774        outgoing_chains,
775        def_count,
776    ))
777}
778
779/// Analyze a symbol's call graph across a directory with progress tracking.
780// public API; callers expect owned semantics
781#[allow(clippy::needless_pass_by_value)]
782pub fn analyze_focused_with_progress(
783    root: &Path,
784    params: &FocusedAnalysisConfig,
785    progress: Arc<AtomicUsize>,
786    ct: CancellationToken,
787) -> Result<FocusedAnalysisOutput, AnalyzeError> {
788    let entries = walk_directory(root, params.max_depth)?;
789    let internal_params = InternalFocusedParams {
790        focus: params.focus.clone(),
791        match_mode: params.match_mode.clone(),
792        follow_depth: params.follow_depth,
793        ast_recursion_limit: params.ast_recursion_limit,
794        use_summary: params.use_summary,
795        impl_only: params.impl_only,
796        def_use: params.def_use,
797    };
798    analyze_focused_with_progress_with_entries_internal(
799        root,
800        params.max_depth,
801        &progress,
802        &ct,
803        &internal_params,
804        &entries,
805    )
806}
807
808/// Internal implementation of focused analysis using pre-walked entries and params struct.
809#[instrument(skip_all, fields(path = %root.display(), symbol = %params.focus))]
810fn analyze_focused_with_progress_with_entries_internal(
811    root: &Path,
812    _max_depth: Option<u32>,
813    progress: &Arc<AtomicUsize>,
814    ct: &CancellationToken,
815    params: &InternalFocusedParams,
816    entries: &[WalkEntry],
817) -> Result<FocusedAnalysisOutput, AnalyzeError> {
818    // Check if already cancelled
819    if ct.is_cancelled() {
820        return Err(AnalyzeError::Cancelled);
821    }
822
823    // Check if path is a file (hint to use directory)
824    if root.is_file() {
825        let formatted =
826            "Single-file focus not supported. Please provide a directory path for cross-file call graph analysis.\n"
827                .to_string();
828        return Ok(FocusedAnalysisOutput {
829            formatted,
830            next_cursor: None,
831            prod_chains: vec![],
832            test_chains: vec![],
833            outgoing_chains: vec![],
834            def_count: 0,
835            unfiltered_caller_count: 0,
836            impl_trait_caller_count: 0,
837            callers: None,
838            test_callers: None,
839            callees: None,
840            def_use_sites: vec![],
841        });
842    }
843
844    // Phase 1: Collect file analysis
845    let (analysis_results, all_impl_traits) =
846        collect_file_analysis(entries, progress, ct, params.ast_recursion_limit)?;
847
848    // Check for cancellation before building the call graph (phase 2)
849    if ct.is_cancelled() {
850        return Err(AnalyzeError::Cancelled);
851    }
852
853    // Phase 2: Build call graph
854    let mut graph = build_call_graph(analysis_results, &all_impl_traits)?;
855
856    // Check for cancellation before resolving the symbol (phase 3)
857    if ct.is_cancelled() {
858        return Err(AnalyzeError::Cancelled);
859    }
860
861    // Phase 3: Resolve symbol and apply impl_only filter.
862    // When def_use=true and the symbol is not in the call graph (e.g. a variable),
863    // fall through to def-use extraction instead of returning SymbolNotFound.
864    let resolve_result = resolve_symbol(&mut graph, params);
865    if let Err(AnalyzeError::Graph(crate::graph::GraphError::SymbolNotFound { .. })) =
866        &resolve_result
867    {
868        // Deliberately not collapsed: resolve_result must stay alive past this block
869        // so that the `?` below can propagate non-SymbolNotFound errors.
870        if params.def_use {
871            let def_use_sites =
872                collect_def_use_sites(entries, &params.focus, params.ast_recursion_limit, root, ct);
873            if def_use_sites.is_empty() {
874                // Symbol not found anywhere (neither in call graph nor as def/use site).
875                // Propagate the original SymbolNotFound error instead of returning an
876                // empty success response.
877                return Err(resolve_result.unwrap_err());
878            }
879            use std::fmt::Write as _;
880            let mut formatted = String::new();
881            let _ = writeln!(
882                formatted,
883                "FOCUS: {} (0 defs, 0 callers, 0 callees)",
884                params.focus
885            );
886            {
887                let writes = def_use_sites
888                    .iter()
889                    .filter(|s| {
890                        matches!(
891                            s.kind,
892                            crate::types::DefUseKind::Write | crate::types::DefUseKind::WriteRead
893                        )
894                    })
895                    .count();
896                let reads = def_use_sites
897                    .iter()
898                    .filter(|s| s.kind == crate::types::DefUseKind::Read)
899                    .count();
900                let _ = writeln!(
901                    formatted,
902                    "DEF-USE SITES  {}  ({} total: {} writes, {} reads)",
903                    params.focus,
904                    def_use_sites.len(),
905                    writes,
906                    reads
907                );
908            }
909            return Ok(FocusedAnalysisOutput {
910                formatted,
911                next_cursor: None,
912                callers: None,
913                test_callers: None,
914                callees: None,
915                prod_chains: vec![],
916                test_chains: vec![],
917                outgoing_chains: vec![],
918                def_count: 0,
919                unfiltered_caller_count: 0,
920                impl_trait_caller_count: 0,
921                def_use_sites,
922            });
923        }
924    }
925    let (resolved_focus, unfiltered_caller_count, impl_trait_caller_count) = resolve_result?;
926
927    // Check for cancellation before computing chains (phase 4)
928    if ct.is_cancelled() {
929        return Err(AnalyzeError::Cancelled);
930    }
931
932    // Phase 5 (optional, before formatting): Def-use site extraction.
933    // Use params.focus (the raw user-supplied string) rather than resolved_focus
934    // so that variable/field names that are not in the call graph still work.
935    let def_use_sites = if params.def_use {
936        collect_def_use_sites(entries, &params.focus, params.ast_recursion_limit, root, ct)
937    } else {
938        Vec::new()
939    };
940
941    // Phase 4: Compute chains and format output (includes def_use_sites in one pass)
942    let (formatted, prod_chains, test_chains, outgoing_chains, def_count) = compute_chains(
943        &graph,
944        &resolved_focus,
945        root,
946        params,
947        unfiltered_caller_count,
948        impl_trait_caller_count,
949        &def_use_sites,
950    )?;
951
952    // Compute depth-1 chains for structured output fields (always direct relationships only,
953    // regardless of `follow_depth` used for the text-formatted output).
954    let (depth1_callers, depth1_test_callers, depth1_callees) = if params.follow_depth <= 1 {
955        // Chains already at depth 1; reuse the partitioned vecs.
956        let callers = chains_to_entries(&prod_chains, Some(root));
957        let test_callers = chains_to_entries(&test_chains, Some(root));
958        let callees = chains_to_entries(&outgoing_chains, Some(root));
959        (callers, test_callers, callees)
960    } else {
961        // follow_depth > 1: re-query at depth 1 to get only direct edges.
962        let incoming1 = graph
963            .find_incoming_chains(&resolved_focus, 1)
964            .unwrap_or_default();
965        let outgoing1 = graph
966            .find_outgoing_chains(&resolved_focus, 1)
967            .unwrap_or_default();
968        let (prod1, test1): (Vec<_>, Vec<_>) = incoming1.into_iter().partition(|chain| {
969            chain
970                .chain
971                .first()
972                .is_none_or(|(name, path, _)| !is_test_file(path) && !name.starts_with("test_"))
973        });
974        let callers = chains_to_entries(&prod1, Some(root));
975        let test_callers = chains_to_entries(&test1, Some(root));
976        let callees = chains_to_entries(&outgoing1, Some(root));
977        (callers, test_callers, callees)
978    };
979
980    Ok(FocusedAnalysisOutput {
981        formatted,
982        next_cursor: None,
983        callers: depth1_callers,
984        test_callers: depth1_test_callers,
985        callees: depth1_callees,
986        prod_chains,
987        test_chains,
988        outgoing_chains,
989        def_count,
990        unfiltered_caller_count,
991        impl_trait_caller_count,
992        def_use_sites,
993    })
994}
995
996/// Phase 5: Extract def-use sites for `symbol` across all entries.
997/// Writes go before reads; within each kind ordered by file, line, then column.
998fn collect_def_use_sites(
999    entries: &[WalkEntry],
1000    symbol: &str,
1001    ast_recursion_limit: Option<usize>,
1002    root: &std::path::Path,
1003    ct: &CancellationToken,
1004) -> Vec<crate::types::DefUseSite> {
1005    use crate::parser::SemanticExtractor;
1006
1007    let file_entries: Vec<&WalkEntry> = entries
1008        .iter()
1009        .filter(|e| !e.is_dir && !e.is_symlink)
1010        .collect();
1011
1012    let mut sites: Vec<crate::types::DefUseSite> = file_entries
1013        .par_iter()
1014        .filter_map(|entry| {
1015            if ct.is_cancelled() {
1016                return None;
1017            }
1018
1019            // Check file size before reading
1020            if entry.path.metadata().map(|m| m.len()).unwrap_or(0) > MAX_FILE_SIZE_BYTES {
1021                tracing::debug!("skipping large file: {}", entry.path.display());
1022                return None;
1023            }
1024
1025            let Ok(source) = std::fs::read_to_string(&entry.path) else {
1026                return None;
1027            };
1028            let ext = entry
1029                .path
1030                .extension()
1031                .and_then(|e| e.to_str())
1032                .unwrap_or("");
1033            let lang = crate::lang::language_for_extension(ext)?;
1034            let file_path = entry
1035                .path
1036                .strip_prefix(root)
1037                .unwrap_or(&entry.path)
1038                .display()
1039                .to_string();
1040            let sites = SemanticExtractor::extract_def_use_for_file(
1041                &source,
1042                lang,
1043                symbol,
1044                &file_path,
1045                ast_recursion_limit,
1046            );
1047            if sites.is_empty() { None } else { Some(sites) }
1048        })
1049        .flatten()
1050        .collect();
1051
1052    // Writes before reads; within each kind: file, line, then column for deterministic order
1053    sites.sort_by(|a, b| {
1054        use crate::types::DefUseKind;
1055        let kind_ord = |k: &DefUseKind| match k {
1056            DefUseKind::Write | DefUseKind::WriteRead => 0,
1057            DefUseKind::Read => 1,
1058        };
1059        kind_ord(&a.kind)
1060            .cmp(&kind_ord(&b.kind))
1061            .then_with(|| a.file.cmp(&b.file))
1062            .then_with(|| a.line.cmp(&b.line))
1063            .then_with(|| a.column.cmp(&b.column))
1064    });
1065
1066    sites
1067}
1068
1069/// Analyze a symbol's call graph using pre-walked directory entries.
1070pub fn analyze_focused_with_progress_with_entries(
1071    root: &Path,
1072    params: &FocusedAnalysisConfig,
1073    progress: &Arc<AtomicUsize>,
1074    ct: &CancellationToken,
1075    entries: &[WalkEntry],
1076) -> Result<FocusedAnalysisOutput, AnalyzeError> {
1077    let internal_params = InternalFocusedParams {
1078        focus: params.focus.clone(),
1079        match_mode: params.match_mode.clone(),
1080        follow_depth: params.follow_depth,
1081        ast_recursion_limit: params.ast_recursion_limit,
1082        use_summary: params.use_summary,
1083        impl_only: params.impl_only,
1084        def_use: params.def_use,
1085    };
1086    analyze_focused_with_progress_with_entries_internal(
1087        root,
1088        params.max_depth,
1089        progress,
1090        ct,
1091        &internal_params,
1092        entries,
1093    )
1094}
1095
1096#[instrument(skip_all, fields(path = %root.display(), symbol = %focus))]
1097pub fn analyze_focused(
1098    root: &Path,
1099    focus: &str,
1100    follow_depth: u32,
1101    max_depth: Option<u32>,
1102    ast_recursion_limit: Option<usize>,
1103) -> Result<FocusedAnalysisOutput, AnalyzeError> {
1104    let entries = walk_directory(root, max_depth)?;
1105    let counter = Arc::new(AtomicUsize::new(0));
1106    let ct = CancellationToken::new();
1107    let params = FocusedAnalysisConfig {
1108        focus: focus.to_string(),
1109        match_mode: SymbolMatchMode::Exact,
1110        follow_depth,
1111        max_depth,
1112        ast_recursion_limit,
1113        use_summary: false,
1114        impl_only: None,
1115        def_use: false,
1116    };
1117    analyze_focused_with_progress_with_entries(root, &params, &counter, &ct, &entries)
1118}
1119
1120/// Analyze a single file and return a minimal fixed schema (name, line count, language,
1121/// functions, imports) for lightweight code understanding.
1122#[instrument(skip_all, fields(path))]
1123pub fn analyze_module_file(path: &str) -> Result<crate::types::ModuleInfo, AnalyzeError> {
1124    // Check file size before reading
1125    if Path::new(path).metadata().map(|m| m.len()).unwrap_or(0) > MAX_FILE_SIZE_BYTES {
1126        tracing::debug!("skipping large file: {}", path);
1127        return Err(AnalyzeError::Parser(
1128            crate::parser::ParserError::ParseError("file too large".to_string()),
1129        ));
1130    }
1131
1132    let source = std::fs::read_to_string(path)
1133        .map_err(|e| AnalyzeError::Parser(crate::parser::ParserError::ParseError(e.to_string())))?;
1134
1135    let file_path = Path::new(path);
1136    let name = file_path
1137        .file_name()
1138        .and_then(|s| s.to_str())
1139        .unwrap_or("unknown")
1140        .to_string();
1141
1142    let line_count = source.lines().count();
1143
1144    let language = file_path
1145        .extension()
1146        .and_then(|e| e.to_str())
1147        .and_then(language_for_extension)
1148        .ok_or_else(|| {
1149            AnalyzeError::Parser(crate::parser::ParserError::ParseError(
1150                "unsupported or missing file extension".to_string(),
1151            ))
1152        })?;
1153
1154    let semantic = SemanticExtractor::extract(&source, language, None)?;
1155
1156    let functions = semantic
1157        .functions
1158        .into_iter()
1159        .map(|f| crate::types::ModuleFunctionInfo {
1160            name: f.name,
1161            line: f.line,
1162        })
1163        .collect();
1164
1165    let imports = semantic
1166        .imports
1167        .into_iter()
1168        .map(|i| crate::types::ModuleImportInfo {
1169            module: i.module,
1170            items: i.items,
1171        })
1172        .collect();
1173
1174    Ok(crate::types::ModuleInfo {
1175        name,
1176        line_count,
1177        language: language.to_string(),
1178        functions,
1179        imports,
1180    })
1181}
1182
1183/// Scan a directory for files that import a given module path.
1184///
1185/// For each non-directory walk entry, extracts imports via [`SemanticExtractor`] and
1186/// checks whether `module` matches `ImportInfo.module` or appears in `ImportInfo.items`.
1187/// Returns a [`FocusedAnalysisOutput`] whose `formatted` field lists matching files.
1188pub fn analyze_import_lookup(
1189    root: &Path,
1190    module: &str,
1191    entries: &[WalkEntry],
1192    ast_recursion_limit: Option<usize>,
1193) -> Result<FocusedAnalysisOutput, AnalyzeError> {
1194    let matches: Vec<(PathBuf, usize)> = entries
1195        .par_iter()
1196        .filter_map(|entry| {
1197            if entry.is_dir || entry.is_symlink {
1198                tracing::debug!("skipping symlink: {}", entry.path.display());
1199                return None;
1200            }
1201            let ext = entry
1202                .path
1203                .extension()
1204                .and_then(|e| e.to_str())
1205                .and_then(crate::lang::language_for_extension)?;
1206            let source = std::fs::read_to_string(&entry.path).ok()?;
1207            let semantic = SemanticExtractor::extract(&source, ext, ast_recursion_limit).ok()?;
1208            for import in &semantic.imports {
1209                if import.module == module || import.items.iter().any(|item| item == module) {
1210                    return Some((entry.path.clone(), import.line));
1211                }
1212            }
1213            None
1214        })
1215        .collect();
1216
1217    let mut text = format!("IMPORT_LOOKUP: {module}\n");
1218    text.push_str(&format!("ROOT: {}\n", root.display()));
1219    text.push_str(&format!("MATCHES: {}\n", matches.len()));
1220    for (path, line) in &matches {
1221        let rel = path.strip_prefix(root).unwrap_or(path);
1222        text.push_str(&format!("  {}:{line}\n", rel.display()));
1223    }
1224
1225    Ok(FocusedAnalysisOutput {
1226        formatted: text,
1227        next_cursor: None,
1228        prod_chains: vec![],
1229        test_chains: vec![],
1230        outgoing_chains: vec![],
1231        def_count: 0,
1232        unfiltered_caller_count: 0,
1233        impl_trait_caller_count: 0,
1234        callers: None,
1235        test_callers: None,
1236        callees: None,
1237        def_use_sites: vec![],
1238    })
1239}
1240
1241/// Resolve Python wildcard imports to actual symbol names.
1242///
1243/// For each import with items=`["*"]`, this function:
1244/// 1. Parses the relative dots (if any) and climbs the directory tree
1245/// 2. Finds the target .py file or __init__.py
1246/// 3. Extracts symbols (functions and classes) from the target
1247/// 4. Honors __all__ if defined, otherwise uses function+class names
1248///
1249/// All resolution failures are non-fatal: debug-logged and the wildcard is preserved.
1250fn resolve_wildcard_imports(file_path: &Path, imports: &mut [ImportInfo]) {
1251    use std::collections::HashMap;
1252
1253    let mut resolved_cache: HashMap<PathBuf, Vec<String>> = HashMap::new();
1254    let Ok(file_path_canonical) = file_path.canonicalize() else {
1255        tracing::debug!(file = ?file_path, "unable to canonicalize current file path");
1256        return;
1257    };
1258
1259    for import in imports.iter_mut() {
1260        if import.items != ["*"] {
1261            continue;
1262        }
1263        resolve_single_wildcard(import, file_path, &file_path_canonical, &mut resolved_cache);
1264    }
1265}
1266
1267/// Resolve one wildcard import in place. On any failure the import is left unchanged.
1268fn resolve_single_wildcard(
1269    import: &mut ImportInfo,
1270    file_path: &Path,
1271    file_path_canonical: &Path,
1272    resolved_cache: &mut std::collections::HashMap<PathBuf, Vec<String>>,
1273) {
1274    let module = import.module.clone();
1275    let dot_count = module.chars().take_while(|c| *c == '.').count();
1276    if dot_count == 0 {
1277        return;
1278    }
1279    let module_path = module.trim_start_matches('.');
1280
1281    let Some(target_to_read) = locate_target_file(file_path, dot_count, module_path, &module)
1282    else {
1283        return;
1284    };
1285
1286    let Ok(canonical) = target_to_read.canonicalize() else {
1287        tracing::debug!(target = ?target_to_read, import = %module, "unable to canonicalize path");
1288        return;
1289    };
1290
1291    if canonical == file_path_canonical {
1292        tracing::debug!(target = ?canonical, import = %module, "cannot import from self");
1293        return;
1294    }
1295
1296    if let Some(cached) = resolved_cache.get(&canonical) {
1297        tracing::debug!(import = %module, symbols_count = cached.len(), "using cached symbols");
1298        import.items.clone_from(cached);
1299        return;
1300    }
1301
1302    if let Some(symbols) = parse_target_symbols(&target_to_read, &module) {
1303        tracing::debug!(import = %module, resolved_count = symbols.len(), "wildcard import resolved");
1304        import.items.clone_from(&symbols);
1305        resolved_cache.insert(canonical, symbols);
1306    }
1307}
1308
1309/// Locate the .py file that a wildcard import refers to. Returns None if not found.
1310fn locate_target_file(
1311    file_path: &Path,
1312    dot_count: usize,
1313    module_path: &str,
1314    module: &str,
1315) -> Option<PathBuf> {
1316    let mut target_dir = file_path.parent()?.to_path_buf();
1317
1318    for _ in 1..dot_count {
1319        if !target_dir.pop() {
1320            tracing::debug!(import = %module, "unable to climb {} levels", dot_count.saturating_sub(1));
1321            return None;
1322        }
1323    }
1324
1325    let target_file = if module_path.is_empty() {
1326        target_dir.join("__init__.py")
1327    } else {
1328        let rel_path = module_path.replace('.', "/");
1329        target_dir.join(format!("{rel_path}.py"))
1330    };
1331
1332    if target_file.exists() {
1333        Some(target_file)
1334    } else if target_file.with_extension("").is_dir() {
1335        let init = target_file.with_extension("").join("__init__.py");
1336        if init.exists() { Some(init) } else { None }
1337    } else {
1338        tracing::debug!(target = ?target_file, import = %module, "target file not found");
1339        None
1340    }
1341}
1342
1343/// Read and parse a target .py file, returning its exported symbols.
1344fn parse_target_symbols(target_path: &Path, module: &str) -> Option<Vec<String>> {
1345    use tree_sitter::Parser;
1346
1347    // Check file size before reading
1348    if target_path.metadata().map(|m| m.len()).unwrap_or(0) > MAX_FILE_SIZE_BYTES {
1349        tracing::debug!("skipping large file: {}", target_path.display());
1350        return None;
1351    }
1352
1353    let source = match std::fs::read_to_string(target_path) {
1354        Ok(s) => s,
1355        Err(e) => {
1356            tracing::debug!(target = ?target_path, import = %module, error = %e, "unable to read target file");
1357            return None;
1358        }
1359    };
1360
1361    // Parse once with tree-sitter
1362    let lang_info = crate::languages::get_language_info("python")?;
1363    let mut parser = Parser::new();
1364    if parser.set_language(&lang_info.language).is_err() {
1365        return None;
1366    }
1367    let tree = parser.parse(&source, None)?;
1368
1369    // First, try to extract __all__ from the same tree
1370    let mut symbols = Vec::new();
1371    extract_all_from_tree(&tree, &source, &mut symbols);
1372    if !symbols.is_empty() {
1373        tracing::debug!(import = %module, symbols = ?symbols, "using __all__ symbols");
1374        return Some(symbols);
1375    }
1376
1377    // Fallback: extract functions/classes from the tree
1378    let root = tree.root_node();
1379    let mut cursor = root.walk();
1380    for child in root.children(&mut cursor) {
1381        if matches!(child.kind(), "function_definition" | "class_definition")
1382            && let Some(name_node) = child.child_by_field_name("name")
1383        {
1384            let name = source[name_node.start_byte()..name_node.end_byte()].to_string();
1385            if !name.starts_with('_') {
1386                symbols.push(name);
1387            }
1388        }
1389    }
1390    tracing::debug!(import = %module, fallback_symbols = ?symbols, "using fallback function/class names");
1391    Some(symbols)
1392}
1393
1394/// Extract __all__ from a tree-sitter tree.
1395fn extract_all_from_tree(tree: &tree_sitter::Tree, source: &str, result: &mut Vec<String>) {
1396    let root = tree.root_node();
1397    let mut cursor = root.walk();
1398    for child in root.children(&mut cursor) {
1399        if child.kind() == "simple_statement" {
1400            // simple_statement contains assignment and other statement types
1401            let mut simple_cursor = child.walk();
1402            for simple_child in child.children(&mut simple_cursor) {
1403                if simple_child.kind() == "assignment"
1404                    && let Some(left) = simple_child.child_by_field_name("left")
1405                {
1406                    let target_text = source[left.start_byte()..left.end_byte()].trim();
1407                    if target_text == "__all__"
1408                        && let Some(right) = simple_child.child_by_field_name("right")
1409                    {
1410                        extract_string_list_from_list_node(&right, source, result);
1411                    }
1412                }
1413            }
1414        } else if child.kind() == "expression_statement" {
1415            // Fallback for older Python AST structures
1416            let mut stmt_cursor = child.walk();
1417            for stmt_child in child.children(&mut stmt_cursor) {
1418                if stmt_child.kind() == "assignment"
1419                    && let Some(left) = stmt_child.child_by_field_name("left")
1420                {
1421                    let target_text = source[left.start_byte()..left.end_byte()].trim();
1422                    if target_text == "__all__"
1423                        && let Some(right) = stmt_child.child_by_field_name("right")
1424                    {
1425                        extract_string_list_from_list_node(&right, source, result);
1426                    }
1427                }
1428            }
1429        }
1430    }
1431}
1432
1433/// Extract string literals from a Python list node.
1434fn extract_string_list_from_list_node(
1435    list_node: &tree_sitter::Node,
1436    source: &str,
1437    result: &mut Vec<String>,
1438) {
1439    let mut cursor = list_node.walk();
1440    for child in list_node.named_children(&mut cursor) {
1441        if child.kind() == "string" {
1442            let raw = source[child.start_byte()..child.end_byte()].trim();
1443            // Strip quotes: "name" -> name
1444            let unquoted = raw.trim_matches('"').trim_matches('\'').to_string();
1445            if !unquoted.is_empty() {
1446                result.push(unquoted);
1447            }
1448        }
1449    }
1450}
1451
1452/// Read a file and return its raw content with line numbers for a specified range.
1453///
1454/// # Arguments
1455/// - `path`: File path to read
1456/// - `start_line`: Starting line (1-indexed, optional; defaults to 1)
1457/// - `end_line`: Ending line (1-indexed, optional; defaults to total lines)
1458///
1459/// # Returns
1460/// - `Ok(AnalyzeRawOutput)` with formatted content and metadata
1461/// - `Err(AnalyzeError::NotAFile)` if path is a directory
1462/// - `Err(AnalyzeError::InvalidRange)` if start > end
1463/// - `Err(AnalyzeError::Io)` for file I/O errors
1464pub fn analyze_raw_range(
1465    path: &Path,
1466    start_line: Option<usize>,
1467    end_line: Option<usize>,
1468) -> Result<crate::types::AnalyzeRawOutput, AnalyzeError> {
1469    if path.is_dir() {
1470        return Err(AnalyzeError::NotAFile(path.to_path_buf()));
1471    }
1472
1473    // Check file size before reading
1474    if path.metadata().map(|m| m.len()).unwrap_or(0) > MAX_FILE_SIZE_BYTES {
1475        tracing::debug!("skipping large file: {}", path.display());
1476        return Err(AnalyzeError::Parser(
1477            crate::parser::ParserError::ParseError("file too large".to_string()),
1478        ));
1479    }
1480
1481    let raw = std::fs::read_to_string(path)?;
1482    let lines: Vec<&str> = raw.lines().collect();
1483    let total = lines.len();
1484    if total == 0 {
1485        return Ok(crate::types::AnalyzeRawOutput {
1486            path: path.display().to_string(),
1487            total_lines: 0,
1488            start_line: 0,
1489            end_line: 0,
1490            content: String::new(),
1491            next_start_line: None,
1492        });
1493    }
1494    /// Files above this line count require explicit start_line/end_line on rangeless calls.
1495    const MAX_RANGELESS_LINES: usize = 100;
1496    let ext = path.extension().and_then(|e| e.to_str());
1497    if ext.and_then(language_for_extension).is_some()
1498        && start_line.is_none()
1499        && end_line.is_none()
1500        && total > MAX_RANGELESS_LINES
1501    {
1502        return Err(AnalyzeError::RangelessLargeFile { total_lines: total });
1503    }
1504    let start = start_line.unwrap_or(1).max(1).min(total.max(1));
1505    let end = end_line.unwrap_or(total).min(total).max(1);
1506    if start > end {
1507        return Err(AnalyzeError::InvalidRange { start, end, total });
1508    }
1509    let width = end.to_string().len();
1510    let content = lines[start - 1..end]
1511        .iter()
1512        .enumerate()
1513        .map(|(i, line)| format!("{:>width$}: {}", start + i, line, width = width))
1514        .collect::<Vec<_>>()
1515        .join("\n");
1516    Ok(crate::types::AnalyzeRawOutput {
1517        path: path.display().to_string(),
1518        total_lines: total,
1519        start_line: start,
1520        end_line: end,
1521        content,
1522        next_start_line: if end == total { None } else { Some(end + 1) },
1523    })
1524}
1525
1526#[cfg(test)]
1527mod tests {
1528    use super::*;
1529    use crate::formatter::format_focused_paginated;
1530    use crate::graph::InternalCallChain;
1531    use crate::pagination::{PaginationMode, decode_cursor, paginate_slice};
1532    use std::fs;
1533    use std::path::PathBuf;
1534    use tempfile::TempDir;
1535
1536    #[cfg(feature = "lang-rust")]
1537    #[test]
1538    fn analyze_str_rust_happy_path() {
1539        let source = "fn hello() -> i32 { 42 }";
1540        let result = analyze_str(source, "rs", None);
1541        assert!(result.is_ok());
1542    }
1543
1544    #[cfg(feature = "lang-python")]
1545    #[test]
1546    fn analyze_str_python_happy_path() {
1547        let source = "def greet(name):\n    return f'Hello {name}'";
1548        let result = analyze_str(source, "py", None);
1549        assert!(result.is_ok());
1550    }
1551
1552    #[cfg(feature = "lang-rust")]
1553    #[test]
1554    fn analyze_str_rust_by_language_name() {
1555        let source = "fn hello() -> i32 { 42 }";
1556        let result = analyze_str(source, "rust", None);
1557        assert!(result.is_ok());
1558    }
1559
1560    #[cfg(feature = "lang-python")]
1561    #[test]
1562    fn analyze_str_python_by_language_name() {
1563        let source = "def greet(name):\n    return f'Hello {name}'";
1564        let result = analyze_str(source, "python", None);
1565        assert!(result.is_ok());
1566    }
1567
1568    #[cfg(feature = "lang-rust")]
1569    #[test]
1570    fn analyze_str_rust_mixed_case() {
1571        let source = "fn hello() -> i32 { 42 }";
1572        let result = analyze_str(source, "RuSt", None);
1573        assert!(result.is_ok());
1574    }
1575
1576    #[cfg(feature = "lang-python")]
1577    #[test]
1578    fn analyze_str_python_mixed_case() {
1579        let source = "def greet(name):\n    return f'Hello {name}'";
1580        let result = analyze_str(source, "PyThOn", None);
1581        assert!(result.is_ok());
1582    }
1583
1584    #[test]
1585    fn analyze_str_unsupported_language() {
1586        let result = analyze_str("code", "brainfuck", None);
1587        assert!(
1588            matches!(result, Err(AnalyzeError::UnsupportedLanguage(lang)) if lang == "brainfuck")
1589        );
1590    }
1591
1592    #[cfg(feature = "lang-rust")]
1593    #[test]
1594    fn test_symbol_focus_callers_pagination_first_page() {
1595        let temp_dir = TempDir::new().unwrap();
1596
1597        // Create a file with many callers of `target`
1598        let mut code = String::from("fn target() {}\n");
1599        for i in 0..15 {
1600            code.push_str(&format!("fn caller_{:02}() {{ target(); }}\n", i));
1601        }
1602        fs::write(temp_dir.path().join("lib.rs"), &code).unwrap();
1603
1604        // Act
1605        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1606
1607        // Paginate prod callers with page_size=5
1608        let paginated = paginate_slice(&output.prod_chains, 0, 5, PaginationMode::Callers)
1609            .expect("paginate failed");
1610        assert!(
1611            paginated.total >= 5,
1612            "should have enough callers to paginate"
1613        );
1614        assert!(
1615            paginated.next_cursor.is_some(),
1616            "should have next_cursor for page 1"
1617        );
1618
1619        // Verify cursor encodes callers mode
1620        assert_eq!(paginated.items.len(), 5);
1621    }
1622
1623    #[test]
1624    fn test_symbol_focus_callers_pagination_second_page() {
1625        let temp_dir = TempDir::new().unwrap();
1626
1627        let mut code = String::from("fn target() {}\n");
1628        for i in 0..12 {
1629            code.push_str(&format!("fn caller_{:02}() {{ target(); }}\n", i));
1630        }
1631        fs::write(temp_dir.path().join("lib.rs"), &code).unwrap();
1632
1633        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1634        let total_prod = output.prod_chains.len();
1635
1636        if total_prod > 5 {
1637            // Get page 1 cursor
1638            let p1 = paginate_slice(&output.prod_chains, 0, 5, PaginationMode::Callers)
1639                .expect("paginate failed");
1640            assert!(p1.next_cursor.is_some());
1641
1642            let cursor_str = p1.next_cursor.unwrap();
1643            let cursor_data = decode_cursor(&cursor_str).expect("decode failed");
1644
1645            // Get page 2
1646            let p2 = paginate_slice(
1647                &output.prod_chains,
1648                cursor_data.offset,
1649                5,
1650                PaginationMode::Callers,
1651            )
1652            .expect("paginate failed");
1653
1654            // Format paginated output
1655            let formatted = format_focused_paginated(
1656                &p2.items,
1657                total_prod,
1658                PaginationMode::Callers,
1659                "target",
1660                &output.prod_chains,
1661                &output.test_chains,
1662                &output.outgoing_chains,
1663                output.def_count,
1664                cursor_data.offset,
1665                Some(temp_dir.path()),
1666                true,
1667            );
1668
1669            // Assert: header shows correct range for page 2
1670            let expected_start = cursor_data.offset + 1;
1671            assert!(
1672                formatted.contains(&format!("CALLERS ({}", expected_start)),
1673                "header should show page 2 range, got: {}",
1674                formatted
1675            );
1676        }
1677    }
1678
1679    #[test]
1680    fn test_chains_to_entries_empty_returns_none() {
1681        // Arrange
1682        let chains: Vec<InternalCallChain> = vec![];
1683
1684        // Act
1685        let result = chains_to_entries(&chains, None);
1686
1687        // Assert
1688        assert!(result.is_none());
1689    }
1690
1691    #[test]
1692    fn test_chains_to_entries_with_data_returns_entries() {
1693        // Arrange
1694        let chains = vec![
1695            InternalCallChain {
1696                chain: vec![("caller1".to_string(), PathBuf::from("/root/lib.rs"), 10)],
1697            },
1698            InternalCallChain {
1699                chain: vec![("caller2".to_string(), PathBuf::from("/root/other.rs"), 20)],
1700            },
1701        ];
1702        let root = PathBuf::from("/root");
1703
1704        // Act
1705        let result = chains_to_entries(&chains, Some(root.as_path()));
1706
1707        // Assert
1708        assert!(result.is_some());
1709        let entries = result.unwrap();
1710        assert_eq!(entries.len(), 2);
1711        assert_eq!(entries[0].symbol, "caller1");
1712        assert_eq!(entries[0].file, "lib.rs");
1713        assert_eq!(entries[0].line, 10);
1714        assert_eq!(entries[1].symbol, "caller2");
1715        assert_eq!(entries[1].file, "other.rs");
1716        assert_eq!(entries[1].line, 20);
1717    }
1718
1719    #[test]
1720    fn test_symbol_focus_callees_pagination() {
1721        let temp_dir = TempDir::new().unwrap();
1722
1723        // target calls many functions
1724        let mut code = String::from("fn target() {\n");
1725        for i in 0..10 {
1726            code.push_str(&format!("    callee_{:02}();\n", i));
1727        }
1728        code.push_str("}\n");
1729        for i in 0..10 {
1730            code.push_str(&format!("fn callee_{:02}() {{}}\n", i));
1731        }
1732        fs::write(temp_dir.path().join("lib.rs"), &code).unwrap();
1733
1734        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1735        let total_callees = output.outgoing_chains.len();
1736
1737        if total_callees > 3 {
1738            let paginated = paginate_slice(&output.outgoing_chains, 0, 3, PaginationMode::Callees)
1739                .expect("paginate failed");
1740
1741            let formatted = format_focused_paginated(
1742                &paginated.items,
1743                total_callees,
1744                PaginationMode::Callees,
1745                "target",
1746                &output.prod_chains,
1747                &output.test_chains,
1748                &output.outgoing_chains,
1749                output.def_count,
1750                0,
1751                Some(temp_dir.path()),
1752                true,
1753            );
1754
1755            assert!(
1756                formatted.contains(&format!(
1757                    "CALLEES (1-{} of {})",
1758                    paginated.items.len(),
1759                    total_callees
1760                )),
1761                "header should show callees range, got: {}",
1762                formatted
1763            );
1764        }
1765    }
1766
1767    #[test]
1768    fn test_symbol_focus_empty_prod_callers() {
1769        let temp_dir = TempDir::new().unwrap();
1770
1771        // target is only called from test functions
1772        let code = r#"
1773fn target() {}
1774
1775#[cfg(test)]
1776mod tests {
1777    use super::*;
1778    #[test]
1779    fn test_something() { target(); }
1780}
1781"#;
1782        fs::write(temp_dir.path().join("lib.rs"), code).unwrap();
1783
1784        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1785
1786        // prod_chains may be empty; pagination should handle it gracefully
1787        let paginated = paginate_slice(&output.prod_chains, 0, 100, PaginationMode::Callers)
1788            .expect("paginate failed");
1789        assert_eq!(paginated.items.len(), output.prod_chains.len());
1790        assert!(
1791            paginated.next_cursor.is_none(),
1792            "no next_cursor for empty or single-page prod_chains"
1793        );
1794    }
1795
1796    #[test]
1797    fn test_impl_only_filter_header_correct_counts() {
1798        let temp_dir = TempDir::new().unwrap();
1799
1800        // Create a Rust fixture with:
1801        // - A trait definition
1802        // - An impl Trait for SomeType block that calls the focus symbol
1803        // - A regular (non-trait-impl) function that also calls the focus symbol
1804        let code = r#"
1805trait MyTrait {
1806    fn focus_symbol();
1807}
1808
1809struct SomeType;
1810
1811impl MyTrait for SomeType {
1812    fn focus_symbol() {}
1813}
1814
1815fn impl_caller() {
1816    SomeType::focus_symbol();
1817}
1818
1819fn regular_caller() {
1820    SomeType::focus_symbol();
1821}
1822"#;
1823        fs::write(temp_dir.path().join("lib.rs"), code).unwrap();
1824
1825        // Call analyze_focused with impl_only=Some(true)
1826        let params = FocusedAnalysisConfig {
1827            focus: "focus_symbol".to_string(),
1828            match_mode: SymbolMatchMode::Insensitive,
1829            follow_depth: 1,
1830            max_depth: None,
1831            ast_recursion_limit: None,
1832            use_summary: false,
1833            impl_only: Some(true),
1834            def_use: false,
1835        };
1836        let output = analyze_focused_with_progress(
1837            temp_dir.path(),
1838            &params,
1839            Arc::new(AtomicUsize::new(0)),
1840            CancellationToken::new(),
1841        )
1842        .unwrap();
1843
1844        // Assert the result contains "FILTER: impl_only=true"
1845        assert!(
1846            output.formatted.contains("FILTER: impl_only=true"),
1847            "formatted output should contain FILTER header for impl_only=true, got: {}",
1848            output.formatted
1849        );
1850
1851        // Assert the retained count N < total count M
1852        assert!(
1853            output.impl_trait_caller_count < output.unfiltered_caller_count,
1854            "impl_trait_caller_count ({}) should be less than unfiltered_caller_count ({})",
1855            output.impl_trait_caller_count,
1856            output.unfiltered_caller_count
1857        );
1858
1859        // Assert format is "FILTER: impl_only=true (N of M callers shown)"
1860        let filter_line = output
1861            .formatted
1862            .lines()
1863            .find(|line| line.contains("FILTER: impl_only=true"))
1864            .expect("should find FILTER line");
1865        assert!(
1866            filter_line.contains(&format!(
1867                "({} of {} callers shown)",
1868                output.impl_trait_caller_count, output.unfiltered_caller_count
1869            )),
1870            "FILTER line should show correct N of M counts, got: {}",
1871            filter_line
1872        );
1873    }
1874
1875    #[test]
1876    fn test_callers_count_matches_formatted_output() {
1877        let temp_dir = TempDir::new().unwrap();
1878
1879        // Create a file with multiple callers of `target`
1880        let code = r#"
1881fn target() {}
1882fn caller_a() { target(); }
1883fn caller_b() { target(); }
1884fn caller_c() { target(); }
1885"#;
1886        fs::write(temp_dir.path().join("lib.rs"), code).unwrap();
1887
1888        // Analyze the symbol
1889        let output = analyze_focused(temp_dir.path(), "target", 1, None, None).unwrap();
1890
1891        // Extract CALLERS count from formatted output
1892        let formatted = &output.formatted;
1893        let callers_count_from_output = formatted
1894            .lines()
1895            .find(|line| line.contains("FOCUS:"))
1896            .and_then(|line| {
1897                line.split(',')
1898                    .find(|part| part.contains("callers"))
1899                    .and_then(|part| {
1900                        part.trim()
1901                            .split_whitespace()
1902                            .next()
1903                            .and_then(|s| s.parse::<usize>().ok())
1904                    })
1905            })
1906            .expect("should find CALLERS count in formatted output");
1907
1908        // Compute expected count from prod_chains (unique first-caller names)
1909        let expected_callers_count = output
1910            .prod_chains
1911            .iter()
1912            .filter_map(|chain| chain.chain.first().map(|(name, _, _)| name))
1913            .collect::<std::collections::HashSet<_>>()
1914            .len();
1915
1916        assert_eq!(
1917            callers_count_from_output, expected_callers_count,
1918            "CALLERS count in formatted output should match unique-first-caller count in prod_chains"
1919        );
1920    }
1921
1922    #[cfg(feature = "lang-rust")]
1923    #[test]
1924    fn test_def_use_focused_analysis() {
1925        let temp_dir = TempDir::new().unwrap();
1926        fs::write(
1927            temp_dir.path().join("lib.rs"),
1928            "fn example() {\n    let x = 10;\n    x += 1;\n    println!(\"{}\", x);\n    let y = x + 1;\n}\n",
1929        )
1930        .unwrap();
1931
1932        let entries = walk_directory(temp_dir.path(), None).unwrap();
1933        let counter = Arc::new(AtomicUsize::new(0));
1934        let ct = CancellationToken::new();
1935        let params = FocusedAnalysisConfig {
1936            focus: "x".to_string(),
1937            match_mode: SymbolMatchMode::Exact,
1938            follow_depth: 1,
1939            max_depth: None,
1940            ast_recursion_limit: None,
1941            use_summary: false,
1942            impl_only: None,
1943            def_use: true,
1944        };
1945
1946        let output = analyze_focused_with_progress_with_entries(
1947            temp_dir.path(),
1948            &params,
1949            &counter,
1950            &ct,
1951            &entries,
1952        )
1953        .expect("def_use analysis should succeed");
1954
1955        assert!(
1956            !output.def_use_sites.is_empty(),
1957            "should find def-use sites for x"
1958        );
1959        assert!(
1960            output
1961                .def_use_sites
1962                .iter()
1963                .any(|s| s.kind == crate::types::DefUseKind::Write),
1964            "should have at least one Write site",
1965        );
1966        // No location appears as both write and read
1967        let write_locs: std::collections::HashSet<_> = output
1968            .def_use_sites
1969            .iter()
1970            .filter(|s| {
1971                matches!(
1972                    s.kind,
1973                    crate::types::DefUseKind::Write | crate::types::DefUseKind::WriteRead
1974                )
1975            })
1976            .map(|s| (&s.file, s.line, s.column))
1977            .collect();
1978        assert!(
1979            output
1980                .def_use_sites
1981                .iter()
1982                .filter(|s| s.kind == crate::types::DefUseKind::Read)
1983                .all(|s| !write_locs.contains(&(&s.file, s.line, s.column))),
1984            "no location should appear as both write and read",
1985        );
1986        assert!(
1987            output.formatted.contains("DEF-USE SITES"),
1988            "formatted output should contain DEF-USE SITES"
1989        );
1990    }
1991
1992    fn make_temp_file(content: &str) -> tempfile::NamedTempFile {
1993        let mut f = tempfile::NamedTempFile::new().unwrap();
1994        use std::io::Write;
1995        f.write_all(content.as_bytes()).unwrap();
1996        f.flush().unwrap();
1997        f
1998    }
1999
2000    #[test]
2001    fn test_analyze_raw_full_file() {
2002        let f = make_temp_file("line1\nline2\nline3\n");
2003        let out = analyze_raw_range(f.path(), None, None).unwrap();
2004        assert_eq!(out.total_lines, 3);
2005        assert_eq!(out.start_line, 1);
2006        assert_eq!(out.end_line, 3);
2007        assert_eq!(out.next_start_line, None);
2008        assert!(out.content.contains("line1"));
2009        assert!(out.content.contains("line3"));
2010    }
2011
2012    #[test]
2013    fn test_analyze_raw_partial_range() {
2014        let f = make_temp_file("a\nb\nc\nd\ne\n");
2015        let out = analyze_raw_range(f.path(), Some(2), Some(4)).unwrap();
2016        assert_eq!(out.start_line, 2);
2017        assert_eq!(out.end_line, 4);
2018        assert_eq!(out.next_start_line, Some(5));
2019        assert!(out.content.contains("b"));
2020        assert!(out.content.contains("d"));
2021        assert!(!out.content.contains("a"));
2022        assert!(!out.content.contains("e"));
2023    }
2024
2025    #[test]
2026    fn test_analyze_raw_invalid_range() {
2027        let f = make_temp_file("a\nb\nc\n");
2028        let err = analyze_raw_range(f.path(), Some(3), Some(1)).unwrap_err();
2029        assert!(matches!(err, AnalyzeError::InvalidRange { .. }));
2030    }
2031
2032    #[test]
2033    fn test_analyze_raw_clamped_range() {
2034        let f = make_temp_file("x\ny\nz\n");
2035        // end_line beyond total should clamp
2036        let out = analyze_raw_range(f.path(), Some(1), Some(999)).unwrap();
2037        assert_eq!(out.end_line, 3);
2038        assert_eq!(out.total_lines, 3);
2039        assert_eq!(out.next_start_line, None);
2040    }
2041
2042    #[test]
2043    fn test_analyze_raw_empty_file() {
2044        let f = make_temp_file("");
2045        let out = analyze_raw_range(f.path(), None, None).unwrap();
2046        assert_eq!(out.total_lines, 0);
2047        assert_eq!(out.content, "");
2048        assert_eq!(out.next_start_line, None);
2049    }
2050
2051    #[test]
2052    fn test_analyze_raw_pagination_loop() {
2053        // Create a temp file with 10 lines
2054        let content = "line1\nline2\nline3\nline4\nline5\nline6\nline7\nline8\nline9\nline10\n";
2055        let f = make_temp_file(content);
2056
2057        let mut all_collected = String::new();
2058        let mut start = 1;
2059        let mut iterations = 0;
2060        let max_iterations = 10; // Safety check
2061
2062        loop {
2063            iterations += 1;
2064            assert!(
2065                iterations <= max_iterations,
2066                "pagination loop exceeded max iterations"
2067            );
2068
2069            let out = analyze_raw_range(f.path(), Some(start), Some(start + 2)).unwrap();
2070            all_collected.push_str(&out.content);
2071            all_collected.push('\n');
2072
2073            match out.next_start_line {
2074                Some(next) => {
2075                    start = next;
2076                }
2077                None => {
2078                    break;
2079                }
2080            }
2081        }
2082
2083        // Verify all 10 lines were collected and loop terminated
2084        assert!(all_collected.contains("line1"));
2085        assert!(all_collected.contains("line10"));
2086        assert!(
2087            iterations <= 5,
2088            "should take at most 5 iterations for 10 lines with page_size=3"
2089        );
2090    }
2091
2092    #[test]
2093    fn test_analyze_raw_rangeless_large_file_rejected() {
2094        let content = "line\n".repeat(101);
2095        let f = tempfile::Builder::new().suffix(".rs").tempfile().unwrap();
2096        use std::io::Write;
2097        let mut f_mut = f;
2098        f_mut.write_all(content.as_bytes()).unwrap();
2099        f_mut.flush().unwrap();
2100        let err = analyze_raw_range(f_mut.path(), None, None).unwrap_err();
2101        assert!(matches!(
2102            err,
2103            AnalyzeError::RangelessLargeFile { total_lines: 101 }
2104        ));
2105    }
2106
2107    #[test]
2108    fn test_analyze_raw_rangeless_small_file_allowed() {
2109        let content = "line\n".repeat(100);
2110        let f = make_temp_file(&content);
2111        let out = analyze_raw_range(f.path(), None, None).unwrap();
2112        assert_eq!(out.total_lines, 100);
2113    }
2114
2115    #[test]
2116    fn test_analyze_raw_rangeless_large_noncode_file_allowed() {
2117        let content = "line\n".repeat(101);
2118        let f = tempfile::Builder::new().suffix(".md").tempfile().unwrap();
2119        use std::io::Write;
2120        let mut f_mut = f;
2121        f_mut.write_all(content.as_bytes()).unwrap();
2122        f_mut.flush().unwrap();
2123        let out = analyze_raw_range(f_mut.path(), None, None).unwrap();
2124        assert_eq!(out.total_lines, 101);
2125    }
2126}