halldyll_core/observe/
logs.rs

1//! Logs - Structured logs
2
3use serde::Serialize;
4use tracing::{info, warn, error, debug};
5
6/// Structured logger
7pub struct StructuredLogger {
8    /// Current job ID
9    job_id: Option<String>,
10}
11
12impl Default for StructuredLogger {
13    fn default() -> Self {
14        Self::new()
15    }
16}
17
18impl StructuredLogger {
19    /// New logger
20    pub fn new() -> Self {
21        Self { job_id: None }
22    }
23
24    /// With Job ID
25    pub fn with_job_id(job_id: &str) -> Self {
26        Self {
27            job_id: Some(job_id.to_string()),
28        }
29    }
30
31    /// Log a request
32    pub fn log_request(&self, event: &RequestEvent) {
33        info!(
34            job_id = %self.job_id.as_deref().unwrap_or("-"),
35            url = %event.url,
36            method = %event.method,
37            "Request started"
38        );
39    }
40
41    /// Log a response
42    pub fn log_response(&self, event: &ResponseEvent) {
43        if event.status_code >= 400 {
44            warn!(
45                job_id = %self.job_id.as_deref().unwrap_or("-"),
46                url = %event.url,
47                status = event.status_code,
48                bytes = event.bytes,
49                duration_ms = event.duration_ms,
50                "Response error"
51            );
52        } else {
53            info!(
54                job_id = %self.job_id.as_deref().unwrap_or("-"),
55                url = %event.url,
56                status = event.status_code,
57                bytes = event.bytes,
58                duration_ms = event.duration_ms,
59                "Response received"
60            );
61        }
62    }
63
64    /// Log an error
65    pub fn log_error(&self, event: &ErrorEvent) {
66        error!(
67            job_id = %self.job_id.as_deref().unwrap_or("-"),
68            url = %event.url,
69            error_type = %event.error_type,
70            message = %event.message,
71            recoverable = event.recoverable,
72            "Scrape error"
73        );
74    }
75
76    /// Log parsing
77    pub fn log_parse(&self, event: &ParseEvent) {
78        debug!(
79            job_id = %self.job_id.as_deref().unwrap_or("-"),
80            url = %event.url,
81            text_length = event.text_length,
82            links_count = event.links_count,
83            images_count = event.images_count,
84            duration_ms = event.duration_ms,
85            "Parsing completed"
86        );
87    }
88
89    /// Log a retry
90    pub fn log_retry(&self, url: &str, attempt: u32, delay_ms: u64) {
91        warn!(
92            job_id = %self.job_id.as_deref().unwrap_or("-"),
93            url = %url,
94            attempt = attempt,
95            delay_ms = delay_ms,
96            "Retrying request"
97        );
98    }
99
100    /// Log rate limiting
101    pub fn log_rate_limited(&self, url: &str, pause_ms: u64) {
102        warn!(
103            job_id = %self.job_id.as_deref().unwrap_or("-"),
104            url = %url,
105            pause_ms = pause_ms,
106            "Rate limited, pausing"
107        );
108    }
109
110    /// Convenience: Log request from URL
111    pub fn log_request_url(&self, url: &url::Url, method: &str) {
112        self.log_request(&RequestEvent {
113            url: url.to_string(),
114            method: method.to_string(),
115        });
116    }
117
118    /// Convenience: Log response from parts
119    pub fn log_response_parts(&self, url: &url::Url, status_code: u16, bytes: u64, duration_ms: u64) {
120        self.log_response(&ResponseEvent {
121            url: url.to_string(),
122            status_code,
123            bytes,
124            duration_ms,
125        });
126    }
127
128    /// Convenience: Log error from parts
129    pub fn log_error_parts(&self, url: &url::Url, message: &str, recoverable: bool) {
130        self.log_error(&ErrorEvent {
131            url: url.to_string(),
132            error_type: "scrape_error".to_string(),
133            message: message.to_string(),
134            recoverable,
135        });
136    }
137
138    /// Log info message
139    pub fn log_info(&self, message: &str) {
140        info!(
141            job_id = %self.job_id.as_deref().unwrap_or("-"),
142            "{}", message
143        );
144    }
145
146    /// Log warning message
147    pub fn log_warn(&self, message: &str) {
148        warn!(
149            job_id = %self.job_id.as_deref().unwrap_or("-"),
150            "{}", message
151        );
152    }
153
154    /// Log debug message
155    pub fn log_debug(&self, message: &str) {
156        debug!(
157            job_id = %self.job_id.as_deref().unwrap_or("-"),
158            "{}", message
159        );
160    }
161}
162
163/// Request event
164#[derive(Debug, Serialize)]
165pub struct RequestEvent {
166    /// Request URL
167    pub url: String,
168    /// HTTP method
169    pub method: String,
170}
171
172/// Response event
173#[derive(Debug, Serialize)]
174pub struct ResponseEvent {
175    /// Response URL
176    pub url: String,
177    /// HTTP status code
178    pub status_code: u16,
179    /// Response size in bytes
180    pub bytes: u64,
181    /// Request duration in milliseconds
182    pub duration_ms: u64,
183}
184
185/// Error event
186#[derive(Debug, Serialize)]
187pub struct ErrorEvent {
188    /// Error URL context
189    pub url: String,
190    /// Error type classification
191    pub error_type: String,
192    /// Error message
193    pub message: String,
194    /// Whether the error is recoverable
195    pub recoverable: bool,
196}
197
198/// Parse event
199#[derive(Debug, Serialize)]
200pub struct ParseEvent {
201    /// Parsed URL
202    pub url: String,
203    /// Extracted text length in characters
204    pub text_length: usize,
205    /// Number of links found
206    pub links_count: usize,
207    /// Number of images found
208    pub images_count: usize,
209    /// Parse duration in milliseconds
210    pub duration_ms: u64,
211}