llm_utl/
api.rs

1//! # API Start API
2//!
3//! High-level, ergonomic API for common use cases. Start here if you want to get
4//! results fast without configuration overhead.
5//!
6//! ## Examples
7//!
8//! ```no_run
9//!
10//! // Simplest usage - scan current directory
11//! use llm_utl::api::{Format, Preset, Scan};
12//!
13//! Scan::current_dir().run()?;
14//!
15//! // Scan specific directory
16//! Scan::dir("./src").run()?;
17//!
18//! // Use a preset for common tasks
19//! Scan::dir("./src")
20//!     .preset(Preset::CodeReview)
21//!     .run()?;
22//!
23//! // Custom configuration
24//! Scan::dir("./project")
25//!     .output("./prompts")
26//!     .max_tokens(200_000)
27//!     .format(Format::Json)
28//!     .keep_tests()
29//!     .keep_comments()
30//!     .run()?;
31//! # Ok::<(), llm_utl::Error>(())
32//! ```
33
34use crate::{Config, FileFilterConfig, FilterConfig, OutputFormat, Pipeline, PipelineStats, PresetKind, Result, TokenizerKind};
35use std::path::{Path, PathBuf};
36
37// ============================================================================
38// Core API
39// ============================================================================
40
41/// Entry point for the API Start API.
42///
43/// Use this to build and execute scans with a fluent, type-safe interface.
44///
45/// # Examples
46///
47/// ```no_run
48/// use llm_utl::api::*;
49///
50/// // Basic usage
51/// Scan::current_dir().run()?;
52///
53/// // With configuration
54/// Scan::dir("./src")
55///     .max_tokens(150_000)
56///     .preset(Preset::CodeReview)
57///     .run()?;
58/// # Ok::<(), llm_utl::Error>(())
59/// ```
60#[derive(Debug, Clone)]
61#[must_use = "call .run() to execute the scan"]
62pub struct Scan {
63    dir: PathBuf,
64    output: PathBuf,
65    format: OutputFormat,
66    max_tokens: usize,
67    overlap: usize,
68    preset: Option<PresetKind>,
69    filters: FilterOptions,
70    allow_files: Vec<String>,
71    excludes: Vec<String>,
72    exclude_files: Vec<String>,
73    template_path: Option<PathBuf>,
74    custom_format_name: Option<String>,
75    custom_extension: Option<String>,
76    custom_data: std::collections::HashMap<String, serde_json::Value>,
77}
78
79/// Filtering options for code processing.
80#[derive(Debug, Clone)]
81struct FilterOptions {
82    tests: FilterMode,
83    comments: FilterMode,
84    doc_comments: FilterMode,
85    debug_prints: FilterMode,
86}
87
88#[derive(Debug, Clone, Copy, PartialEq, Eq)]
89enum FilterMode {
90    Remove,
91    Keep,
92}
93
94impl Default for Scan {
95    fn default() -> Self {
96        Self {
97            dir: PathBuf::from("."),
98            output: PathBuf::from("./out"),
99            format: OutputFormat::Markdown,
100            max_tokens: 100_000,
101            overlap: 1_000,
102            preset: None,
103            filters: FilterOptions::default(),
104            excludes: default_excludes(),
105            exclude_files: vec![],
106            allow_files: vec![],
107            template_path: None,
108            custom_format_name: None,
109            custom_extension: None,
110            custom_data: std::collections::HashMap::new(),
111        }
112    }
113}
114
115impl Default for FilterOptions {
116    fn default() -> Self {
117        Self {
118            tests: FilterMode::Remove,
119            comments: FilterMode::Remove,
120            doc_comments: FilterMode::Remove,
121            debug_prints: FilterMode::Remove,
122        }
123    }
124}
125
126impl Scan {
127    /// Start a scan of the current directory.
128    ///
129    /// # Examples
130    ///
131    /// ```no_run
132    /// use llm_utl::api::*;
133    ///
134    /// let stats = Scan::current_dir().run()?;
135    /// println!("Processed {} files", stats.total_files);
136    /// # Ok::<(), llm_utl::Error>(())
137    /// ```
138    pub fn current_dir() -> Self {
139        Self::default()
140    }
141
142    /// Start a scan of the specified directory.
143    ///
144    /// # Examples
145    ///
146    /// ```no_run
147    /// use llm_utl::api::*;
148    ///
149    /// Scan::dir("./src").run()?;
150    /// Scan::dir("./my-project").run()?;
151    /// # Ok::<(), llm_utl::Error>(())
152    /// ```
153    pub fn dir(path: impl Into<PathBuf>) -> Self {
154        Self {
155            dir: path.into(),
156            ..Self::default()
157        }
158    }
159
160    /// Set the output directory for generated files.
161    ///
162    /// Default: `./out`
163    pub fn output(mut self, path: impl Into<PathBuf>) -> Self {
164        self.output = path.into();
165        self
166    }
167
168    /// Set the output format.
169    ///
170    /// Default: `Format::Markdown`
171    ///
172    /// # Examples
173    ///
174    /// ```no_run
175    /// use llm_utl::api::*;
176    ///
177    /// Scan::dir("./src")
178    ///     .format(Format::Json)
179    ///     .run()?;
180    /// # Ok::<(), llm_utl::Error>(())
181    /// ```
182    pub fn format(mut self, format: Format) -> Self {
183        self.format = format.into();
184        self
185    }
186
187    /// Set maximum tokens per output file.
188    ///
189    /// Default: `100_000`
190    pub fn max_tokens(mut self, tokens: usize) -> Self {
191        self.max_tokens = tokens;
192        self
193    }
194
195    /// Set overlap between chunks in tokens.
196    ///
197    /// Default: `1_000`
198    pub fn overlap(mut self, tokens: usize) -> Self {
199        self.overlap = tokens;
200        self
201    }
202
203    /// Use a preset configuration for common tasks.
204    ///
205    /// Presets override filter settings with optimized defaults for specific use cases.
206    ///
207    /// # Examples
208    ///
209    /// ```no_run
210    /// use llm_utl::api::*;
211    ///
212    /// // Optimized for code review
213    /// Scan::dir("./src")
214    ///     .preset(Preset::CodeReview)
215    ///     .run()?;
216    ///
217    /// // Optimized for documentation
218    /// Scan::dir("./project")
219    ///     .preset(Preset::Documentation)
220    ///     .run()?;
221    /// # Ok::<(), llm_utl::Error>(())
222    /// ```
223    pub fn preset(mut self, preset: Preset) -> Self {
224        self.preset = Some(preset.into());
225        self
226    }
227
228    /// Use a custom external Tera template.
229    ///
230    /// The template will override the built-in template for the selected format.
231    /// For custom formats, combine with `.custom_format()`.
232    ///
233    /// # Examples
234    ///
235    /// ```no_run
236    /// use llm_utl::api::*;
237    ///
238    /// // Override built-in markdown template
239    /// Scan::dir("./src")
240    ///     .format(Format::Markdown)
241    ///     .template("./my-markdown.tera")
242    ///     .run()?;
243    ///
244    /// // Use completely custom format
245    /// Scan::dir("./src")
246    ///     .custom_format("my_format", "txt")
247    ///     .template("./custom.tera")
248    ///     .run()?;
249    /// # Ok::<(), llm_utl::Error>(())
250    /// ```
251    pub fn template(mut self, path: impl Into<PathBuf>) -> Self {
252        self.template_path = Some(path.into());
253        self
254    }
255
256    /// Define a custom output format with name and extension.
257    ///
258    /// Automatically sets format to `Format::Custom`. Requires a template
259    /// to be specified via `.template()`.
260    ///
261    /// # Arguments
262    ///
263    /// * `name` - Internal template name
264    /// * `extension` - File extension for output files (without leading dot)
265    ///
266    /// # Examples
267    ///
268    /// ```no_run
269    /// use llm_utl::api::*;
270    ///
271    /// Scan::dir("./src")
272    ///     .custom_format("my_format", "txt")
273    ///     .template("./custom.tera")
274    ///     .run()?;
275    /// # Ok::<(), llm_utl::Error>(())
276    /// ```
277    pub fn custom_format(mut self, name: impl Into<String>, extension: impl Into<String>) -> Self {
278        self.format = OutputFormat::Custom;
279        self.custom_format_name = Some(name.into());
280        self.custom_extension = Some(extension.into());
281        self
282    }
283
284    /// Add custom data to pass to templates.
285    ///
286    /// The data will be available in templates under `ctx.custom.<key>`.
287    /// Can be called multiple times to add more data.
288    ///
289    /// # Examples
290    ///
291    /// ```no_run
292    /// use llm_utl::api::*;
293    /// use serde_json::json;
294    ///
295    /// Scan::dir("./src")
296    ///     .template("./custom.tera")
297    ///     .template_data("version", json!("1.0.0"))
298    ///     .template_data("author", json!("John Doe"))
299    ///     .template_data("project", json!("My Project"))
300    ///     .run()?;
301    /// # Ok::<(), llm_utl::Error>(())
302    /// ```
303    ///
304    /// In template:
305    /// ```tera
306    /// Project: {{ ctx.custom.project }}
307    /// Version: {{ ctx.custom.version }}
308    /// Author: {{ ctx.custom.author }}
309    /// ```
310    pub fn template_data(mut self, key: impl Into<String>, value: serde_json::Value) -> Self {
311        self.custom_data.insert(key.into(), value);
312        self
313    }
314
315    /// Include test files in the output.
316    ///
317    /// By default, tests are removed.
318    pub fn keep_tests(mut self) -> Self {
319        self.filters.tests = FilterMode::Keep;
320        self
321    }
322
323    /// Remove test files from the output (default behavior).
324    pub fn remove_tests(mut self) -> Self {
325        self.filters.tests = FilterMode::Remove;
326        self
327    }
328
329    /// Include comments in the output.
330    ///
331    /// By default, comments are removed.
332    pub fn keep_comments(mut self) -> Self {
333        self.filters.comments = FilterMode::Keep;
334        self
335    }
336
337    /// Remove comments from the output (default behavior).
338    pub fn remove_comments(mut self) -> Self {
339        self.filters.comments = FilterMode::Remove;
340        self
341    }
342
343    /// Include documentation comments in the output.
344    ///
345    /// By default, doc comments are removed.
346    pub fn keep_doc_comments(mut self) -> Self {
347        self.filters.doc_comments = FilterMode::Keep;
348        self
349    }
350
351    /// Remove documentation comments from the output (default behavior).
352    pub fn remove_doc_comments(mut self) -> Self {
353        self.filters.doc_comments = FilterMode::Remove;
354        self
355    }
356
357    /// Include debug print statements in the output.
358    ///
359    /// By default, debug prints are removed.
360    pub fn keep_debug_prints(mut self) -> Self {
361        self.filters.debug_prints = FilterMode::Keep;
362        self
363    }
364
365    /// Remove debug print statements from the output (default behavior).
366    pub fn remove_debug_prints(mut self) -> Self {
367        self.filters.debug_prints = FilterMode::Remove;
368        self
369    }
370
371    /// Add directories to exclude from scanning.
372    ///
373    /// Supports glob patterns (e.g., `**/node_modules`, `target/**`).
374    ///
375    /// # Examples
376    ///
377    /// ```no_run
378    /// use llm_utl::api::*;
379    ///
380    /// Scan::dir("./project")
381    ///     .exclude(["**/node_modules", "**/dist"])
382    ///     .run()?;
383    /// # Ok::<(), llm_utl::Error>(())
384    /// ```
385    pub fn exclude<I, S>(mut self, patterns: I) -> Self
386    where
387        I: IntoIterator<Item = S>,
388        S: Into<String>,
389    {
390        self.excludes.extend(patterns.into_iter().map(Into::into));
391        self
392    }
393
394    /// Add files to exclude from scanning.
395    ///
396    /// Supports glob patterns (e.g., `**/*.rs`, `**.md`).
397    /// ```
398    pub fn exclude_files<I, S>(mut self, patterns: I) -> Self
399    where
400        I: IntoIterator<Item = S>,
401        S: Into<String>,
402    {
403        self.excludes.extend(patterns.into_iter().map(Into::into));
404        self
405    }
406
407    /// Add files to allow for scanning.
408    ///
409    /// Supports glob patterns (e.g., `**/*.rs`, `**.md`).
410    /// ```
411    pub fn allow_only<I, S>(mut self, patterns: I) -> Self
412    where
413        I: IntoIterator<Item = S>,
414        S: Into<String>,
415    {
416        self.allow_files.extend(patterns.into_iter().map(Into::into));
417        self
418    }
419
420    /// Execute the scan and return statistics.
421    ///
422    /// This is a terminal operation that consumes the builder.
423    ///
424    /// # Errors
425    ///
426    /// Returns an error if:
427    /// - The directory doesn't exist
428    /// - No processable files are found
429    /// - Configuration is invalid
430    /// - I/O errors occur during processing
431    ///
432    /// # Examples
433    ///
434    /// ```no_run
435    /// use llm_utl::api::*;
436    ///
437    /// let stats = Scan::dir("./src").run()?;
438    ///
439    /// println!("Processed {} files in {:.2}s",
440    ///     stats.total_files,
441    ///     stats.duration.as_secs_f64()
442    /// );
443    /// # Ok::<(), llm_utl::Error>(())
444    /// ```
445    pub fn run(self) -> Result<PipelineStats> {
446        let config = self.build_config()?;
447        Pipeline::new(config)?.run()
448    }
449
450    fn build_config(self) -> Result<Config> {
451        let mut builder = Config::builder()
452            .root_dir(self.dir)
453            .output_dir(self.output)
454            .format(self.format)
455            .max_tokens(self.max_tokens)
456            .overlap_tokens(self.overlap)
457            .tokenizer(TokenizerKind::Enhanced)
458            .filter_config(FilterConfig {
459                remove_tests: matches!(self.filters.tests, FilterMode::Remove),
460                remove_doc_comments: matches!(self.filters.doc_comments, FilterMode::Remove),
461                remove_comments: matches!(self.filters.comments, FilterMode::Remove),
462                remove_blank_lines: true,
463                preserve_headers: true,
464                remove_debug_prints: matches!(self.filters.debug_prints, FilterMode::Remove),
465            })
466            .file_filter_config(FileFilterConfig::default()
467                .allow_only(self.allow_files)
468                .exclude_files(self.exclude_files)
469                .exclude_directories(self.excludes));
470
471        if let Some(preset) = self.preset {
472            builder = builder.preset(preset);
473        }
474
475        // Add template configuration
476        if let Some(template_path) = self.template_path {
477            builder = builder.template_path(template_path);
478        }
479
480        if let Some(format_name) = self.custom_format_name {
481            builder = builder.custom_format_name(format_name);
482        }
483
484        if let Some(extension) = self.custom_extension {
485            builder = builder.custom_extension(extension);
486        }
487
488        if !self.custom_data.is_empty() {
489            builder = builder.custom_data(self.custom_data);
490        }
491
492        builder.build()
493    }
494}
495
496// ============================================================================
497// Type-safe enums for common options
498// ============================================================================
499
500/// Output format for generated files.
501#[derive(Debug, Clone, Copy, PartialEq, Eq)]
502pub enum Format {
503    /// Markdown format (default)
504    Markdown,
505    /// XML format
506    Xml,
507    /// JSON format
508    Json,
509    /// Custom format (use with `.custom_format()`)
510    Custom,
511}
512
513impl From<Format> for OutputFormat {
514    fn from(format: Format) -> Self {
515        match format {
516            Format::Markdown => Self::Markdown,
517            Format::Xml => Self::Xml,
518            Format::Json => Self::Json,
519            Format::Custom => Self::Custom,
520        }
521    }
522}
523
524/// Preset configurations for common use cases.
525///
526/// Each preset optimizes settings for a specific task.
527#[derive(Debug, Clone, Copy, PartialEq, Eq)]
528pub enum Preset {
529    /// Optimized for code review: removes tests, comments, and debug prints
530    CodeReview,
531    /// Optimized for documentation: keeps all comments and docs
532    Documentation,
533    /// Optimized for refactoring: clean view of structure
534    Refactoring,
535    /// Optimized for bug analysis: focuses on logic
536    BugAnalysis,
537    /// Optimized for security audit: includes everything
538    SecurityAudit,
539    /// Optimized for test generation: keeps tests as examples
540    TestGeneration,
541    /// Optimized for architecture review: high-level view
542    ArchitectureReview,
543    /// Optimized for performance analysis: focuses on algorithms
544    PerformanceAnalysis,
545    /// Optimized for migration planning: comprehensive view
546    MigrationPlan,
547    /// Optimized for API design: focuses on public interfaces
548    ApiDesign,
549}
550
551impl From<Preset> for PresetKind {
552    fn from(preset: Preset) -> Self {
553        match preset {
554            Preset::CodeReview => Self::CodeReview,
555            Preset::Documentation => Self::Documentation,
556            Preset::Refactoring => Self::Refactoring,
557            Preset::BugAnalysis => Self::BugAnalysis,
558            Preset::SecurityAudit => Self::SecurityAudit,
559            Preset::TestGeneration => Self::TestGeneration,
560            Preset::ArchitectureReview => Self::ArchitectureReview,
561            Preset::PerformanceAnalysis => Self::PerformanceAnalysis,
562            Preset::MigrationPlan => Self::MigrationPlan,
563            Preset::ApiDesign => Self::ApiDesign,
564        }
565    }
566}
567
568// ============================================================================
569// Preset shortcuts for common tasks
570// ============================================================================
571
572impl Scan {
573    /// API preset: Code review configuration.
574    ///
575    /// Equivalent to `.preset(Preset::CodeReview)`.
576    ///
577    /// # Examples
578    ///
579    /// ```no_run
580    /// use llm_utl::api::*;
581    ///
582    /// Scan::dir("./src")
583    ///     .code_review()
584    ///     .run()?;
585    /// # Ok::<(), llm_utl::Error>(())
586    /// ```
587    pub fn code_review(self) -> Self {
588        self.preset(Preset::CodeReview)
589    }
590
591    /// API preset: Documentation configuration.
592    ///
593    /// Equivalent to `.preset(Preset::Documentation)`.
594    pub fn documentation(self) -> Self {
595        self.preset(Preset::Documentation)
596    }
597
598    /// API preset: Security audit configuration.
599    ///
600    /// Equivalent to `.preset(Preset::SecurityAudit)`.
601    pub fn security_audit(self) -> Self {
602        self.preset(Preset::SecurityAudit)
603    }
604
605    /// API preset: Bug analysis configuration.
606    ///
607    /// Equivalent to `.preset(Preset::BugAnalysis)`.
608    pub fn bug_analysis(self) -> Self {
609        self.preset(Preset::BugAnalysis)
610    }
611
612    /// API preset: Refactoring configuration.
613    ///
614    /// Equivalent to `.preset(Preset::Refactoring)`.
615    pub fn refactoring(self) -> Self {
616        self.preset(Preset::Refactoring)
617    }
618
619    /// API preset: Test generation configuration.
620    ///
621    /// Equivalent to `.preset(Preset::TestGeneration)`.
622    pub fn test_generation(self) -> Self {
623        self.preset(Preset::TestGeneration)
624    }
625}
626
627// ============================================================================
628// Convenience functions
629// ============================================================================
630
631/// Scan the current directory with default settings.
632///
633/// This is the simplest way to use the library.
634///
635/// # Examples
636///
637/// ```no_run
638/// use llm_utl::api::*;
639///
640/// let stats = scan()?;
641/// println!("Created {} files", stats.files_written);
642/// # Ok::<(), llm_utl::Error>(())
643/// ```
644pub fn scan() -> Result<PipelineStats> {
645    Scan::current_dir().run()
646}
647
648/// Scan a specific directory with default settings.
649///
650/// # Examples
651///
652/// ```no_run
653/// use llm_utl::api::*;
654///
655/// let stats = scan_dir("./src")?;
656/// # Ok::<(), llm_utl::Error>(())
657/// ```
658pub fn scan_dir(path: impl AsRef<Path>) -> Result<PipelineStats> {
659    Scan::dir(path.as_ref()).run()
660}
661
662// ============================================================================
663// Utilities
664// ============================================================================
665
666fn default_excludes() -> Vec<String> {
667    vec![
668        "**/node_modules".to_string(),
669        "**/target".to_string(),
670        "**/out".to_string(),
671        "**/dist".to_string(),
672        "**/build".to_string(),
673        "**/.git".to_string(),
674        "**/templates".to_string(),
675        "**/.idea".to_string(),
676        "**/.vscode".to_string(),
677        "**/vendor".to_string(),
678    ]
679}
680
681// ============================================================================
682// Tests
683// ============================================================================
684
685#[cfg(test)]
686mod tests {
687    use serde_json::json;
688    use super::*;
689
690    #[test]
691    fn scan_builder_has_sensible_defaults() {
692        let scan = Scan::current_dir();
693        assert_eq!(scan.dir, PathBuf::from("."));
694        assert_eq!(scan.output, PathBuf::from("./out"));
695        assert_eq!(scan.max_tokens, 100_000);
696    }
697
698    #[test]
699    fn scan_builder_is_fluent() {
700        let scan = Scan::dir("./test")
701            .output("./custom-out")
702            .max_tokens(200_000)
703            .format(Format::Json)
704            .keep_tests()
705            .keep_comments();
706
707        assert_eq!(scan.dir, PathBuf::from("./test"));
708        assert_eq!(scan.output, PathBuf::from("./custom-out"));
709        assert_eq!(scan.max_tokens, 200_000);
710        assert_eq!(scan.format, OutputFormat::Json);
711        assert_eq!(scan.filters.tests, FilterMode::Keep);
712        assert_eq!(scan.filters.comments, FilterMode::Keep);
713    }
714
715    #[test]
716    fn preset_shortcuts_work() {
717        let scan = Scan::dir("./src").code_review();
718        assert_eq!(scan.preset, Some(PresetKind::CodeReview));
719
720        let scan = Scan::dir("./src").documentation();
721        assert_eq!(scan.preset, Some(PresetKind::Documentation));
722    }
723
724    #[test]
725    fn exclude_patterns_are_additive() {
726        let scan = Scan::dir("./src")
727            .exclude(["**/test1"])
728            .exclude(["**/test2", "**/test3"]);
729
730        assert!(scan.excludes.contains(&"**/test1".to_string()));
731        assert!(scan.excludes.contains(&"**/test2".to_string()));
732        assert!(scan.excludes.contains(&"**/test3".to_string()));
733    }
734}