llm_utl/api.rs
1//! # API Start API
2//!
3//! High-level, ergonomic API for common use cases. Start here if you want to get
4//! results fast without configuration overhead.
5//!
6//! ## Examples
7//!
8//! ```no_run
9//!
10//! // Simplest usage - scan current directory
11//! use llm_utl::api::{Format, Preset, Scan};
12//!
13//! Scan::current_dir().run()?;
14//!
15//! // Scan specific directory
16//! Scan::dir("./src").run()?;
17//!
18//! // Use a preset for common tasks
19//! Scan::dir("./src")
20//! .preset(Preset::CodeReview)
21//! .run()?;
22//!
23//! // Custom configuration
24//! Scan::dir("./project")
25//! .output("./prompts")
26//! .max_tokens(200_000)
27//! .format(Format::Json)
28//! .keep_tests()
29//! .keep_comments()
30//! .run()?;
31//! # Ok::<(), llm_utl::Error>(())
32//! ```
33
34use crate::{Config, FileFilterConfig, FilterConfig, OutputFormat, Pipeline, PipelineStats, PresetKind, Result, TokenizerKind};
35use std::path::{Path, PathBuf};
36
37// ============================================================================
38// Core API
39// ============================================================================
40
41/// Entry point for the API Start API.
42///
43/// Use this to build and execute scans with a fluent, type-safe interface.
44///
45/// # Examples
46///
47/// ```no_run
48/// use llm_utl::api::*;
49///
50/// // Basic usage
51/// Scan::current_dir().run()?;
52///
53/// // With configuration
54/// Scan::dir("./src")
55/// .max_tokens(150_000)
56/// .preset(Preset::CodeReview)
57/// .run()?;
58/// # Ok::<(), llm_utl::Error>(())
59/// ```
60#[derive(Debug, Clone)]
61#[must_use = "call .run() to execute the scan"]
62pub struct Scan {
63 dir: PathBuf,
64 output: PathBuf,
65 format: OutputFormat,
66 max_tokens: usize,
67 overlap: usize,
68 preset: Option<PresetKind>,
69 filters: FilterOptions,
70 allow_files: Vec<String>,
71 excludes: Vec<String>,
72 exclude_files: Vec<String>,
73 template_path: Option<PathBuf>,
74 custom_format_name: Option<String>,
75 custom_extension: Option<String>,
76 custom_data: std::collections::HashMap<String, serde_json::Value>,
77}
78
79/// Filtering options for code processing.
80#[derive(Debug, Clone)]
81struct FilterOptions {
82 tests: FilterMode,
83 comments: FilterMode,
84 doc_comments: FilterMode,
85 debug_prints: FilterMode,
86}
87
88#[derive(Debug, Clone, Copy, PartialEq, Eq)]
89enum FilterMode {
90 Remove,
91 Keep,
92}
93
94impl Default for Scan {
95 fn default() -> Self {
96 Self {
97 dir: PathBuf::from("."),
98 output: PathBuf::from("./out"),
99 format: OutputFormat::Markdown,
100 max_tokens: 100_000,
101 overlap: 1_000,
102 preset: None,
103 filters: FilterOptions::default(),
104 excludes: default_excludes(),
105 exclude_files: vec![],
106 allow_files: vec![],
107 template_path: None,
108 custom_format_name: None,
109 custom_extension: None,
110 custom_data: std::collections::HashMap::new(),
111 }
112 }
113}
114
115impl Default for FilterOptions {
116 fn default() -> Self {
117 Self {
118 tests: FilterMode::Remove,
119 comments: FilterMode::Remove,
120 doc_comments: FilterMode::Remove,
121 debug_prints: FilterMode::Remove,
122 }
123 }
124}
125
126impl Scan {
127 /// Start a scan of the current directory.
128 ///
129 /// # Examples
130 ///
131 /// ```no_run
132 /// use llm_utl::api::*;
133 ///
134 /// let stats = Scan::current_dir().run()?;
135 /// println!("Processed {} files", stats.total_files);
136 /// # Ok::<(), llm_utl::Error>(())
137 /// ```
138 pub fn current_dir() -> Self {
139 Self::default()
140 }
141
142 /// Start a scan of the specified directory.
143 ///
144 /// # Examples
145 ///
146 /// ```no_run
147 /// use llm_utl::api::*;
148 ///
149 /// Scan::dir("./src").run()?;
150 /// Scan::dir("./my-project").run()?;
151 /// # Ok::<(), llm_utl::Error>(())
152 /// ```
153 pub fn dir(path: impl Into<PathBuf>) -> Self {
154 Self {
155 dir: path.into(),
156 ..Self::default()
157 }
158 }
159
160 /// Set the output directory for generated files.
161 ///
162 /// Default: `./out`
163 pub fn output(mut self, path: impl Into<PathBuf>) -> Self {
164 self.output = path.into();
165 self
166 }
167
168 /// Set the output format.
169 ///
170 /// Default: `Format::Markdown`
171 ///
172 /// # Examples
173 ///
174 /// ```no_run
175 /// use llm_utl::api::*;
176 ///
177 /// Scan::dir("./src")
178 /// .format(Format::Json)
179 /// .run()?;
180 /// # Ok::<(), llm_utl::Error>(())
181 /// ```
182 pub fn format(mut self, format: Format) -> Self {
183 self.format = format.into();
184 self
185 }
186
187 /// Set maximum tokens per output file.
188 ///
189 /// Default: `100_000`
190 pub fn max_tokens(mut self, tokens: usize) -> Self {
191 self.max_tokens = tokens;
192 self
193 }
194
195 /// Set overlap between chunks in tokens.
196 ///
197 /// Default: `1_000`
198 pub fn overlap(mut self, tokens: usize) -> Self {
199 self.overlap = tokens;
200 self
201 }
202
203 /// Use a preset configuration for common tasks.
204 ///
205 /// Presets override filter settings with optimized defaults for specific use cases.
206 ///
207 /// # Examples
208 ///
209 /// ```no_run
210 /// use llm_utl::api::*;
211 ///
212 /// // Optimized for code review
213 /// Scan::dir("./src")
214 /// .preset(Preset::CodeReview)
215 /// .run()?;
216 ///
217 /// // Optimized for documentation
218 /// Scan::dir("./project")
219 /// .preset(Preset::Documentation)
220 /// .run()?;
221 /// # Ok::<(), llm_utl::Error>(())
222 /// ```
223 pub fn preset(mut self, preset: Preset) -> Self {
224 self.preset = Some(preset.into());
225 self
226 }
227
228 /// Use a custom external Tera template.
229 ///
230 /// The template will override the built-in template for the selected format.
231 /// For custom formats, combine with `.custom_format()`.
232 ///
233 /// # Examples
234 ///
235 /// ```no_run
236 /// use llm_utl::api::*;
237 ///
238 /// // Override built-in markdown template
239 /// Scan::dir("./src")
240 /// .format(Format::Markdown)
241 /// .template("./my-markdown.tera")
242 /// .run()?;
243 ///
244 /// // Use completely custom format
245 /// Scan::dir("./src")
246 /// .custom_format("my_format", "txt")
247 /// .template("./custom.tera")
248 /// .run()?;
249 /// # Ok::<(), llm_utl::Error>(())
250 /// ```
251 pub fn template(mut self, path: impl Into<PathBuf>) -> Self {
252 self.template_path = Some(path.into());
253 self
254 }
255
256 /// Define a custom output format with name and extension.
257 ///
258 /// Automatically sets format to `Format::Custom`. Requires a template
259 /// to be specified via `.template()`.
260 ///
261 /// # Arguments
262 ///
263 /// * `name` - Internal template name
264 /// * `extension` - File extension for output files (without leading dot)
265 ///
266 /// # Examples
267 ///
268 /// ```no_run
269 /// use llm_utl::api::*;
270 ///
271 /// Scan::dir("./src")
272 /// .custom_format("my_format", "txt")
273 /// .template("./custom.tera")
274 /// .run()?;
275 /// # Ok::<(), llm_utl::Error>(())
276 /// ```
277 pub fn custom_format(mut self, name: impl Into<String>, extension: impl Into<String>) -> Self {
278 self.format = OutputFormat::Custom;
279 self.custom_format_name = Some(name.into());
280 self.custom_extension = Some(extension.into());
281 self
282 }
283
284 /// Add custom data to pass to templates.
285 ///
286 /// The data will be available in templates under `ctx.custom.<key>`.
287 /// Can be called multiple times to add more data.
288 ///
289 /// # Examples
290 ///
291 /// ```no_run
292 /// use llm_utl::api::*;
293 /// use serde_json::json;
294 ///
295 /// Scan::dir("./src")
296 /// .template("./custom.tera")
297 /// .template_data("version", json!("1.0.0"))
298 /// .template_data("author", json!("John Doe"))
299 /// .template_data("project", json!("My Project"))
300 /// .run()?;
301 /// # Ok::<(), llm_utl::Error>(())
302 /// ```
303 ///
304 /// In template:
305 /// ```tera
306 /// Project: {{ ctx.custom.project }}
307 /// Version: {{ ctx.custom.version }}
308 /// Author: {{ ctx.custom.author }}
309 /// ```
310 pub fn template_data(mut self, key: impl Into<String>, value: serde_json::Value) -> Self {
311 self.custom_data.insert(key.into(), value);
312 self
313 }
314
315 /// Include test files in the output.
316 ///
317 /// By default, tests are removed.
318 pub fn keep_tests(mut self) -> Self {
319 self.filters.tests = FilterMode::Keep;
320 self
321 }
322
323 /// Remove test files from the output (default behavior).
324 pub fn remove_tests(mut self) -> Self {
325 self.filters.tests = FilterMode::Remove;
326 self
327 }
328
329 /// Include comments in the output.
330 ///
331 /// By default, comments are removed.
332 pub fn keep_comments(mut self) -> Self {
333 self.filters.comments = FilterMode::Keep;
334 self
335 }
336
337 /// Remove comments from the output (default behavior).
338 pub fn remove_comments(mut self) -> Self {
339 self.filters.comments = FilterMode::Remove;
340 self
341 }
342
343 /// Include documentation comments in the output.
344 ///
345 /// By default, doc comments are removed.
346 pub fn keep_doc_comments(mut self) -> Self {
347 self.filters.doc_comments = FilterMode::Keep;
348 self
349 }
350
351 /// Remove documentation comments from the output (default behavior).
352 pub fn remove_doc_comments(mut self) -> Self {
353 self.filters.doc_comments = FilterMode::Remove;
354 self
355 }
356
357 /// Include debug print statements in the output.
358 ///
359 /// By default, debug prints are removed.
360 pub fn keep_debug_prints(mut self) -> Self {
361 self.filters.debug_prints = FilterMode::Keep;
362 self
363 }
364
365 /// Remove debug print statements from the output (default behavior).
366 pub fn remove_debug_prints(mut self) -> Self {
367 self.filters.debug_prints = FilterMode::Remove;
368 self
369 }
370
371 /// Add directories to exclude from scanning.
372 ///
373 /// Supports glob patterns (e.g., `**/node_modules`, `target/**`).
374 ///
375 /// # Examples
376 ///
377 /// ```no_run
378 /// use llm_utl::api::*;
379 ///
380 /// Scan::dir("./project")
381 /// .exclude(["**/node_modules", "**/dist"])
382 /// .run()?;
383 /// # Ok::<(), llm_utl::Error>(())
384 /// ```
385 pub fn exclude<I, S>(mut self, patterns: I) -> Self
386 where
387 I: IntoIterator<Item = S>,
388 S: Into<String>,
389 {
390 self.excludes.extend(patterns.into_iter().map(Into::into));
391 self
392 }
393
394 /// Add files to exclude from scanning.
395 ///
396 /// Supports glob patterns (e.g., `**/*.rs`, `**.md`).
397 /// ```
398 pub fn exclude_files<I, S>(mut self, patterns: I) -> Self
399 where
400 I: IntoIterator<Item = S>,
401 S: Into<String>,
402 {
403 self.excludes.extend(patterns.into_iter().map(Into::into));
404 self
405 }
406
407 /// Add files to allow for scanning.
408 ///
409 /// Supports glob patterns (e.g., `**/*.rs`, `**.md`).
410 /// ```
411 pub fn allow_only<I, S>(mut self, patterns: I) -> Self
412 where
413 I: IntoIterator<Item = S>,
414 S: Into<String>,
415 {
416 self.allow_files.extend(patterns.into_iter().map(Into::into));
417 self
418 }
419
420 /// Execute the scan and return statistics.
421 ///
422 /// This is a terminal operation that consumes the builder.
423 ///
424 /// # Errors
425 ///
426 /// Returns an error if:
427 /// - The directory doesn't exist
428 /// - No processable files are found
429 /// - Configuration is invalid
430 /// - I/O errors occur during processing
431 ///
432 /// # Examples
433 ///
434 /// ```no_run
435 /// use llm_utl::api::*;
436 ///
437 /// let stats = Scan::dir("./src").run()?;
438 ///
439 /// println!("Processed {} files in {:.2}s",
440 /// stats.total_files,
441 /// stats.duration.as_secs_f64()
442 /// );
443 /// # Ok::<(), llm_utl::Error>(())
444 /// ```
445 pub fn run(self) -> Result<PipelineStats> {
446 let config = self.build_config()?;
447 Pipeline::new(config)?.run()
448 }
449
450 fn build_config(self) -> Result<Config> {
451 let mut builder = Config::builder()
452 .root_dir(self.dir)
453 .output_dir(self.output)
454 .format(self.format)
455 .max_tokens(self.max_tokens)
456 .overlap_tokens(self.overlap)
457 .tokenizer(TokenizerKind::Enhanced)
458 .filter_config(FilterConfig {
459 remove_tests: matches!(self.filters.tests, FilterMode::Remove),
460 remove_doc_comments: matches!(self.filters.doc_comments, FilterMode::Remove),
461 remove_comments: matches!(self.filters.comments, FilterMode::Remove),
462 remove_blank_lines: true,
463 preserve_headers: true,
464 remove_debug_prints: matches!(self.filters.debug_prints, FilterMode::Remove),
465 })
466 .file_filter_config(FileFilterConfig::default()
467 .allow_only(self.allow_files)
468 .exclude_files(self.exclude_files)
469 .exclude_directories(self.excludes));
470
471 if let Some(preset) = self.preset {
472 builder = builder.preset(preset);
473 }
474
475 // Add template configuration
476 if let Some(template_path) = self.template_path {
477 builder = builder.template_path(template_path);
478 }
479
480 if let Some(format_name) = self.custom_format_name {
481 builder = builder.custom_format_name(format_name);
482 }
483
484 if let Some(extension) = self.custom_extension {
485 builder = builder.custom_extension(extension);
486 }
487
488 if !self.custom_data.is_empty() {
489 builder = builder.custom_data(self.custom_data);
490 }
491
492 builder.build()
493 }
494}
495
496// ============================================================================
497// Type-safe enums for common options
498// ============================================================================
499
500/// Output format for generated files.
501#[derive(Debug, Clone, Copy, PartialEq, Eq)]
502pub enum Format {
503 /// Markdown format (default)
504 Markdown,
505 /// XML format
506 Xml,
507 /// JSON format
508 Json,
509 /// Custom format (use with `.custom_format()`)
510 Custom,
511}
512
513impl From<Format> for OutputFormat {
514 fn from(format: Format) -> Self {
515 match format {
516 Format::Markdown => Self::Markdown,
517 Format::Xml => Self::Xml,
518 Format::Json => Self::Json,
519 Format::Custom => Self::Custom,
520 }
521 }
522}
523
524/// Preset configurations for common use cases.
525///
526/// Each preset optimizes settings for a specific task.
527#[derive(Debug, Clone, Copy, PartialEq, Eq)]
528pub enum Preset {
529 /// Optimized for code review: removes tests, comments, and debug prints
530 CodeReview,
531 /// Optimized for documentation: keeps all comments and docs
532 Documentation,
533 /// Optimized for refactoring: clean view of structure
534 Refactoring,
535 /// Optimized for bug analysis: focuses on logic
536 BugAnalysis,
537 /// Optimized for security audit: includes everything
538 SecurityAudit,
539 /// Optimized for test generation: keeps tests as examples
540 TestGeneration,
541 /// Optimized for architecture review: high-level view
542 ArchitectureReview,
543 /// Optimized for performance analysis: focuses on algorithms
544 PerformanceAnalysis,
545 /// Optimized for migration planning: comprehensive view
546 MigrationPlan,
547 /// Optimized for API design: focuses on public interfaces
548 ApiDesign,
549}
550
551impl From<Preset> for PresetKind {
552 fn from(preset: Preset) -> Self {
553 match preset {
554 Preset::CodeReview => Self::CodeReview,
555 Preset::Documentation => Self::Documentation,
556 Preset::Refactoring => Self::Refactoring,
557 Preset::BugAnalysis => Self::BugAnalysis,
558 Preset::SecurityAudit => Self::SecurityAudit,
559 Preset::TestGeneration => Self::TestGeneration,
560 Preset::ArchitectureReview => Self::ArchitectureReview,
561 Preset::PerformanceAnalysis => Self::PerformanceAnalysis,
562 Preset::MigrationPlan => Self::MigrationPlan,
563 Preset::ApiDesign => Self::ApiDesign,
564 }
565 }
566}
567
568// ============================================================================
569// Preset shortcuts for common tasks
570// ============================================================================
571
572impl Scan {
573 /// API preset: Code review configuration.
574 ///
575 /// Equivalent to `.preset(Preset::CodeReview)`.
576 ///
577 /// # Examples
578 ///
579 /// ```no_run
580 /// use llm_utl::api::*;
581 ///
582 /// Scan::dir("./src")
583 /// .code_review()
584 /// .run()?;
585 /// # Ok::<(), llm_utl::Error>(())
586 /// ```
587 pub fn code_review(self) -> Self {
588 self.preset(Preset::CodeReview)
589 }
590
591 /// API preset: Documentation configuration.
592 ///
593 /// Equivalent to `.preset(Preset::Documentation)`.
594 pub fn documentation(self) -> Self {
595 self.preset(Preset::Documentation)
596 }
597
598 /// API preset: Security audit configuration.
599 ///
600 /// Equivalent to `.preset(Preset::SecurityAudit)`.
601 pub fn security_audit(self) -> Self {
602 self.preset(Preset::SecurityAudit)
603 }
604
605 /// API preset: Bug analysis configuration.
606 ///
607 /// Equivalent to `.preset(Preset::BugAnalysis)`.
608 pub fn bug_analysis(self) -> Self {
609 self.preset(Preset::BugAnalysis)
610 }
611
612 /// API preset: Refactoring configuration.
613 ///
614 /// Equivalent to `.preset(Preset::Refactoring)`.
615 pub fn refactoring(self) -> Self {
616 self.preset(Preset::Refactoring)
617 }
618
619 /// API preset: Test generation configuration.
620 ///
621 /// Equivalent to `.preset(Preset::TestGeneration)`.
622 pub fn test_generation(self) -> Self {
623 self.preset(Preset::TestGeneration)
624 }
625}
626
627// ============================================================================
628// Convenience functions
629// ============================================================================
630
631/// Scan the current directory with default settings.
632///
633/// This is the simplest way to use the library.
634///
635/// # Examples
636///
637/// ```no_run
638/// use llm_utl::api::*;
639///
640/// let stats = scan()?;
641/// println!("Created {} files", stats.files_written);
642/// # Ok::<(), llm_utl::Error>(())
643/// ```
644pub fn scan() -> Result<PipelineStats> {
645 Scan::current_dir().run()
646}
647
648/// Scan a specific directory with default settings.
649///
650/// # Examples
651///
652/// ```no_run
653/// use llm_utl::api::*;
654///
655/// let stats = scan_dir("./src")?;
656/// # Ok::<(), llm_utl::Error>(())
657/// ```
658pub fn scan_dir(path: impl AsRef<Path>) -> Result<PipelineStats> {
659 Scan::dir(path.as_ref()).run()
660}
661
662// ============================================================================
663// Utilities
664// ============================================================================
665
666fn default_excludes() -> Vec<String> {
667 vec![
668 "**/node_modules".to_string(),
669 "**/target".to_string(),
670 "**/out".to_string(),
671 "**/dist".to_string(),
672 "**/build".to_string(),
673 "**/.git".to_string(),
674 "**/templates".to_string(),
675 "**/.idea".to_string(),
676 "**/.vscode".to_string(),
677 "**/vendor".to_string(),
678 ]
679}
680
681// ============================================================================
682// Tests
683// ============================================================================
684
685#[cfg(test)]
686mod tests {
687 use serde_json::json;
688 use super::*;
689
690 #[test]
691 fn scan_builder_has_sensible_defaults() {
692 let scan = Scan::current_dir();
693 assert_eq!(scan.dir, PathBuf::from("."));
694 assert_eq!(scan.output, PathBuf::from("./out"));
695 assert_eq!(scan.max_tokens, 100_000);
696 }
697
698 #[test]
699 fn scan_builder_is_fluent() {
700 let scan = Scan::dir("./test")
701 .output("./custom-out")
702 .max_tokens(200_000)
703 .format(Format::Json)
704 .keep_tests()
705 .keep_comments();
706
707 assert_eq!(scan.dir, PathBuf::from("./test"));
708 assert_eq!(scan.output, PathBuf::from("./custom-out"));
709 assert_eq!(scan.max_tokens, 200_000);
710 assert_eq!(scan.format, OutputFormat::Json);
711 assert_eq!(scan.filters.tests, FilterMode::Keep);
712 assert_eq!(scan.filters.comments, FilterMode::Keep);
713 }
714
715 #[test]
716 fn preset_shortcuts_work() {
717 let scan = Scan::dir("./src").code_review();
718 assert_eq!(scan.preset, Some(PresetKind::CodeReview));
719
720 let scan = Scan::dir("./src").documentation();
721 assert_eq!(scan.preset, Some(PresetKind::Documentation));
722 }
723
724 #[test]
725 fn exclude_patterns_are_additive() {
726 let scan = Scan::dir("./src")
727 .exclude(["**/test1"])
728 .exclude(["**/test2", "**/test3"]);
729
730 assert!(scan.excludes.contains(&"**/test1".to_string()));
731 assert!(scan.excludes.contains(&"**/test2".to_string()));
732 assert!(scan.excludes.contains(&"**/test3".to_string()));
733 }
734}