blz_core/lib.rs
1//! # blz-core
2//!
3//! Core functionality for blz - a fast, local search cache for llms.txt documentation.
4//!
5//! This crate provides the foundational components for parsing, storing, and searching
6//! llms.txt documentation files locally. It's designed for speed (sub-10ms search latency),
7//! offline-first usage, and exact line citations.
8//!
9//! ## Architecture
10//!
11//! The crate is organized around several key components:
12//!
13//! - **Configuration**: Global and per-source settings management
14//! - **Parsing**: Tree-sitter based markdown parsing with structured output
15//! - **Types**: Core data structures representing sources, search results, and metadata
16//! - **Error Handling**: Comprehensive error types with categorization and recovery hints
17//!
18//! ## Quick Start
19//!
20//! ```rust
21//! use blz_core::{Config, MarkdownParser, Result};
22//!
23//! // Load global configuration
24//! let config = Config::load()?;
25//!
26//! // Parse markdown content
27//! let mut parser = MarkdownParser::new()?;
28//! let result = parser.parse("# Hello World\n\nThis is content.")?;
29//!
30//! println!("Found {} heading blocks", result.heading_blocks.len());
31//! println!("Generated TOC with {} entries", result.toc.len());
32//! # Ok::<(), blz_core::Error>(())
33//! ```
34//!
35//! ## Performance Characteristics
36//!
37//! - **Parse time**: < 150ms per MB of markdown content
38//! - **Memory usage**: < 2x source document size during parsing
39//! - **Thread safety**: All types are `Send + Sync` where appropriate
40//!
41//! ## Error Handling
42//!
43//! All operations return [`Result<T, Error>`] with structured error information:
44//!
45//! ```rust
46//! use blz_core::{Error, MarkdownParser};
47//!
48//! let mut parser = MarkdownParser::new()?;
49//! match parser.parse("malformed content") {
50//! Ok(result) => println!("Parsed successfully"),
51//! Err(Error::Parse(msg)) => eprintln!("Parse error: {}", msg),
52//! Err(e) if e.is_recoverable() => eprintln!("Recoverable error: {}", e),
53//! Err(e) => eprintln!("Fatal error: {}", e),
54//! }
55//! # Ok::<(), blz_core::Error>(())
56//! ```
57
58/// Configuration management for global and per-source settings
59pub mod config;
60/// Documentation source discovery
61pub mod discovery;
62/// Error types and result aliases
63pub mod error;
64/// HTTP fetching with conditional requests support
65pub mod fetcher;
66/// Firecrawl CLI detection and web scraping integration
67pub mod firecrawl;
68/// Generation pipeline for creating llms.txt from web scraping
69pub mod generate;
70/// Heading sanitization and normalization helpers
71pub mod heading;
72/// Health check types for diagnostics and source health monitoring
73pub mod health;
74/// Search index implementation using Tantivy
75pub mod index;
76/// JSON builder helpers for llms.json structures
77pub mod json_builder;
78/// Language filtering for multilingual llms.txt files
79pub mod language_filter;
80/// Anchor remapping utilities between versions
81pub mod mapping;
82/// Safe numeric conversion helpers
83pub mod numeric;
84/// Page cache for scraped web content
85pub mod page_cache;
86/// Tree-sitter based markdown parser
87pub mod parser;
88/// Application profile detection helpers
89pub mod profile;
90/// Performance profiling utilities
91pub mod profiling;
92/// Refresh helpers shared across CLI and MCP
93pub mod refresh;
94/// Built-in registry of known documentation sources
95pub mod registry;
96/// Local filesystem storage for cached documentation
97pub mod storage;
98/// Core data types and structures
99pub mod types;
100/// URL resolver for llms.txt variants
101pub mod url_resolver;
102
103// Re-export commonly used types
104pub use config::{
105 Config, DefaultsConfig, FetchConfig, FollowLinks, IndexConfig, PathsConfig, ToolConfig,
106 ToolMeta,
107};
108pub use discovery::{ProbeResult, probe_domain};
109pub use error::{Error, Result};
110pub use fetcher::{FetchResult, Fetcher};
111pub use heading::{
112 HeadingPathVariants, HeadingSegmentVariants, normalize_text_for_search, path_variants,
113 segment_variants,
114};
115pub use health::{
116 CacheInfo, HealthCheck, HealthReport, HealthStatus, SourceHealth, SourceHealthEntry, SourceKind,
117};
118pub use index::SearchIndex;
119pub use json_builder::build_llms_json;
120pub use language_filter::{FilterStats, LanguageFilter};
121pub use mapping::{build_anchors_map, compute_anchor_mappings};
122pub use parser::{MarkdownParser, ParseResult};
123pub use profiling::{PerformanceMetrics, ResourceMonitor};
124pub use registry::Registry;
125pub use storage::Storage;
126pub use types::*;