Skip to main content

langextract_rust/
logging.rs

1//! Logging and progress reporting system for LangExtract.
2//!
3//! This module provides a unified system for logging and progress reporting
4//! that can be controlled by library users and CLI applications.
5
6use std::sync::Arc;
7
8/// Progress event types for different stages of processing
9#[derive(Debug, Clone)]
10pub enum ProgressEvent {
11    /// Text processing started
12    ProcessingStarted {
13        text_length: usize,
14        model: String,
15        provider: String,
16    },
17    /// Text is being chunked
18    ChunkingStarted {
19        total_chars: usize,
20        chunk_count: usize,
21        strategy: String,
22    },
23    /// Batch processing progress
24    BatchProgress {
25        batch_number: usize,
26        total_batches: usize,
27        chunks_processed: usize,
28        total_chunks: usize,
29    },
30    /// Language model call in progress
31    ModelCall {
32        provider: String,
33        model: String,
34        input_length: usize,
35    },
36    /// Model response received
37    ModelResponse {
38        success: bool,
39        output_length: Option<usize>,
40    },
41    /// Extraction validation and parsing
42    ValidationStarted {
43        raw_output_length: usize,
44    },
45    /// Validation completed
46    ValidationCompleted {
47        extractions_found: usize,
48        aligned_count: usize,
49        errors: usize,
50        warnings: usize,
51    },
52    /// Results aggregation
53    AggregationStarted {
54        chunk_count: usize,
55    },
56    /// Processing completed
57    ProcessingCompleted {
58        total_extractions: usize,
59        processing_time_ms: u64,
60    },
61    /// Retry attempt
62    RetryAttempt {
63        operation: String,
64        attempt: usize,
65        max_attempts: usize,
66        delay_seconds: u64,
67    },
68    /// Error occurred
69    Error {
70        operation: String,
71        error: String,
72    },
73    /// Debug information
74    Debug {
75        operation: String,
76        details: String,
77    },
78}
79
80/// Trait for handling progress events
81pub trait ProgressHandler: Send + Sync {
82    /// Handle a progress event
83    fn handle_progress(&self, event: ProgressEvent);
84}
85
86/// Console progress handler that outputs to stdout with pipeline stage tags.
87pub struct ConsoleProgressHandler {
88    /// Whether to show progress messages
89    pub show_progress: bool,
90    /// Whether to show debug information
91    pub show_debug: bool,
92}
93
94impl ConsoleProgressHandler {
95    /// Create a new console handler with default settings
96    pub fn new() -> Self {
97        Self {
98            show_progress: true,
99            show_debug: false,
100        }
101    }
102
103    /// Create a quiet console handler (only errors)
104    pub fn quiet() -> Self {
105        Self {
106            show_progress: false,
107            show_debug: false,
108        }
109    }
110
111    /// Create a verbose console handler (everything including debug)
112    pub fn verbose() -> Self {
113        Self {
114            show_progress: true,
115            show_debug: true,
116        }
117    }
118
119    /// Create a machine-readable handler (same as default, no emoji)
120    pub fn machine_readable() -> Self {
121        Self {
122            show_progress: true,
123            show_debug: false,
124        }
125    }
126
127    fn format_message(&self, tag: &str, message: &str) -> String {
128        format!("[{}] {}", tag, message)
129    }
130}
131
132impl Default for ConsoleProgressHandler {
133    fn default() -> Self {
134        Self::new()
135    }
136}
137
138impl ProgressHandler for ConsoleProgressHandler {
139    fn handle_progress(&self, event: ProgressEvent) {
140        match event {
141            ProgressEvent::ProcessingStarted { text_length, model, provider } => {
142                if self.show_progress {
143                    println!("{}", self.format_message("inference",
144                        &format!("{}/{} -- {} chars input", provider, model, text_length)));
145                }
146            }
147            ProgressEvent::ChunkingStarted { total_chars, chunk_count, strategy } => {
148                if self.show_progress {
149                    println!("{}", self.format_message("chunking",
150                        &format!("{} chunks ({} strategy, {} chars total)", chunk_count, strategy, total_chars)));
151                }
152            }
153            ProgressEvent::BatchProgress { batch_number: _, total_batches: _, chunks_processed, total_chunks } => {
154                if self.show_progress {
155                    println!("{}", self.format_message("progress",
156                        &format!("{}/{} chunks processed", chunks_processed, total_chunks)));
157                }
158            }
159            ProgressEvent::ModelCall { provider, model: _, input_length } => {
160                if self.show_debug {
161                    println!("{}", self.format_message("inference",
162                        &format!("{} API call -- {} chars", provider, input_length)));
163                }
164            }
165            ProgressEvent::ModelResponse { success, output_length } => {
166                if self.show_debug {
167                    if success {
168                        println!("{}", self.format_message("inference",
169                            &format!("response received -- {} chars", output_length.unwrap_or(0))));
170                    } else {
171                        println!("{}", self.format_message("inference", "no response from model"));
172                    }
173                }
174            }
175            ProgressEvent::AggregationStarted { chunk_count } => {
176                if self.show_progress {
177                    println!("{}", self.format_message("aggregation",
178                        &format!("merging results from {} chunks", chunk_count)));
179                }
180            }
181            ProgressEvent::ProcessingCompleted { total_extractions, processing_time_ms: _ } => {
182                if self.show_progress {
183                    println!("{}", self.format_message("done",
184                        &format!("{} extractions found", total_extractions)));
185                }
186            }
187            ProgressEvent::RetryAttempt { operation, attempt, max_attempts, delay_seconds } => {
188                if self.show_progress {
189                    println!("{}", self.format_message("retry",
190                        &format!("{} failed (attempt {}/{}), retrying in {}s", operation, attempt, max_attempts, delay_seconds)));
191                }
192            }
193            ProgressEvent::Error { operation, error } => {
194                // Always show errors
195                eprintln!("{}", self.format_message("error", &format!("{}: {}", operation, error)));
196            }
197            ProgressEvent::Debug { operation, details } => {
198                if self.show_debug {
199                    println!("{}", self.format_message("debug", &format!("{}: {}", operation, details)));
200                }
201            }
202            ProgressEvent::ValidationStarted { raw_output_length: _ } => {
203                // Internal event, no output needed
204            }
205            ProgressEvent::ValidationCompleted { extractions_found, aligned_count, errors, warnings } => {
206                if self.show_debug {
207                    println!("{}", self.format_message("validation",
208                        &format!("{} extractions ({} aligned), {} errors, {} warnings",
209                            extractions_found, aligned_count, errors, warnings)));
210                }
211            }
212        }
213    }
214}
215
216/// Silent progress handler that does nothing
217pub struct SilentProgressHandler;
218
219impl ProgressHandler for SilentProgressHandler {
220    fn handle_progress(&self, _event: ProgressEvent) {
221        // Do nothing
222    }
223}
224
225/// Logger that integrates with the standard log crate
226pub struct LogProgressHandler;
227
228impl ProgressHandler for LogProgressHandler {
229    fn handle_progress(&self, event: ProgressEvent) {
230        match event {
231            ProgressEvent::ProcessingStarted { text_length, model, provider } => {
232                log::info!("Starting extraction with {} model {} ({} chars)", provider, model, text_length);
233            }
234            ProgressEvent::ChunkingStarted { total_chars, chunk_count, strategy } => {
235                log::info!("Chunking document: {} {} chunks ({} chars)", chunk_count, strategy, total_chars);
236            }
237            ProgressEvent::BatchProgress { batch_number, total_batches: _, chunks_processed, total_chunks } => {
238                log::debug!("Processing batch {}: {}/{} chunks", batch_number, chunks_processed, total_chunks);
239            }
240            ProgressEvent::ModelCall { provider, model, input_length } => {
241                log::debug!("Calling {} model {} with {} chars input", provider, model, input_length);
242            }
243            ProgressEvent::ModelResponse { success, output_length } => {
244                if success {
245                    log::debug!("Received response: {} chars", output_length.unwrap_or(0));
246                } else {
247                    log::warn!("Failed to receive model response");
248                }
249            }
250            ProgressEvent::ValidationCompleted { extractions_found, aligned_count, errors, warnings } => {
251                log::debug!("Validation: {} extractions ({} aligned), {} errors, {} warnings", 
252                    extractions_found, aligned_count, errors, warnings);
253            }
254            ProgressEvent::AggregationStarted { chunk_count } => {
255                log::debug!("Aggregating {} chunks", chunk_count);
256            }
257            ProgressEvent::ProcessingCompleted { total_extractions, processing_time_ms } => {
258                log::info!("Extraction completed: {} extractions in {}ms", total_extractions, processing_time_ms);
259            }
260            ProgressEvent::RetryAttempt { operation, attempt, max_attempts, delay_seconds } => {
261                log::warn!("Retry {}/{} for {}, waiting {}s", attempt, max_attempts, operation, delay_seconds);
262            }
263            ProgressEvent::Error { operation, error } => {
264                log::error!("{}: {}", operation, error);
265            }
266            ProgressEvent::Debug { operation, details } => {
267                log::debug!("{}: {}", operation, details);
268            }
269            ProgressEvent::ValidationStarted { .. } => {
270                log::trace!("Starting validation");
271            }
272        }
273    }
274}
275
276/// Global progress handler
277static PROGRESS_HANDLER: std::sync::OnceLock<Arc<dyn ProgressHandler>> = std::sync::OnceLock::new();
278
279/// Initialize the global progress handler
280pub fn init_progress_handler(handler: Arc<dyn ProgressHandler>) {
281    let _ = PROGRESS_HANDLER.set(handler);
282}
283
284/// Get the current progress handler, or create a default one
285fn get_progress_handler() -> Arc<dyn ProgressHandler> {
286    PROGRESS_HANDLER.get_or_init(|| Arc::new(ConsoleProgressHandler::new())).clone()
287}
288
289/// Report a progress event
290pub fn report_progress(event: ProgressEvent) {
291    let handler = get_progress_handler();
292    handler.handle_progress(event);
293}
294
295/// Convenience macros for common progress events
296#[macro_export]
297macro_rules! progress_info {
298    ($($arg:tt)*) => {
299        $crate::logging::report_progress($crate::logging::ProgressEvent::Debug {
300            operation: "info".to_string(),
301            details: format!($($arg)*),
302        });
303    };
304}
305
306#[macro_export]
307macro_rules! progress_debug {
308    ($operation:expr, $($arg:tt)*) => {
309        $crate::logging::report_progress($crate::logging::ProgressEvent::Debug {
310            operation: $operation.to_string(),
311            details: format!($($arg)*),
312        });
313    };
314}
315
316#[macro_export]
317macro_rules! progress_error {
318    ($operation:expr, $($arg:tt)*) => {
319        $crate::logging::report_progress($crate::logging::ProgressEvent::Error {
320            operation: $operation.to_string(),
321            error: format!($($arg)*),
322        });
323    };
324}
325
326#[cfg(test)]
327mod tests {
328    use super::*;
329
330    #[test]
331    fn test_console_handler_formatting() {
332        let handler = ConsoleProgressHandler::new();
333        let message = handler.format_message("inference", "Test message");
334        assert!(message.contains("[inference]"));
335        assert!(message.contains("Test message"));
336
337        let machine_handler = ConsoleProgressHandler::machine_readable();
338        let machine_message = machine_handler.format_message("chunking", "Test message");
339        assert!(machine_message.contains("[chunking]"));
340        assert!(machine_message.contains("Test message"));
341    }
342
343    #[test]
344    fn test_progress_events() {
345        let handler = ConsoleProgressHandler::quiet();
346        
347        // Should not panic
348        handler.handle_progress(ProgressEvent::ProcessingStarted {
349            text_length: 1000,
350            model: "test-model".to_string(),
351            provider: "test-provider".to_string(),
352        });
353    }
354}