ingest/error.rs
1//! Error types produced by the ingest crate.
2//!
3//! This module defines the error surface for ingest operations. All errors are
4//! typed, cloneable, and comparable to enable precise error handling and testing.
5//!
6//! # Error Philosophy
7//!
8//! The ingest crate uses typed errors (not generic strings) to allow callers to:
9//! - Handle specific error cases differently
10//! - Map errors to appropriate HTTP status codes
11//! - Display user-friendly error messages
12//! - Log structured error information
13//!
14//! # Error Categories
15//!
16//! | Error | Category | Description |
17//! |-------|----------|-------------|
18//! | [`MissingPayload`](IngestError::MissingPayload) | Validation | Source requires payload but none provided |
19//! | [`EmptyBinaryPayload`](IngestError::EmptyBinaryPayload) | Validation | Binary payload has zero bytes |
20//! | [`InvalidMetadata`](IngestError::InvalidMetadata) | Validation | Metadata policy violation |
21//! | [`InvalidUtf8`](IngestError::InvalidUtf8) | Validation | TextBytes not valid UTF-8 |
22//! | [`EmptyNormalizedText`](IngestError::EmptyNormalizedText) | Validation | Text empty after normalization |
23//! | [`PayloadTooLarge`](IngestError::PayloadTooLarge) | Validation | Size limit exceeded |
24//!
25//! # HTTP Status Code Mapping
26//!
27//! ```rust
28//! use ingest::IngestError;
29//!
30//! fn to_http_status(error: &IngestError) -> u16 {
31//! match error {
32//! IngestError::PayloadTooLarge(_) => 413, // Payload Too Large
33//! _ => 400, // Bad Request (all validation errors)
34//! }
35//! }
36//! ```
37//!
38//! # Examples
39//!
40//! ## Basic Error Handling
41//!
42//! ```rust
43//! use ingest::{ingest, IngestConfig, IngestError};
44//! use ingest::{RawIngestRecord, IngestMetadata, IngestSource};
45//!
46//! let config = IngestConfig::default();
47//! let record = RawIngestRecord {
48//! id: "test".to_string(),
49//! source: IngestSource::RawText,
50//! metadata: IngestMetadata {
51//! tenant_id: Some("t".to_string()),
52//! doc_id: Some("d".to_string()),
53//! received_at: None,
54//! original_source: None,
55//! attributes: None,
56//! },
57//! payload: None, // Missing required payload
58//! };
59//!
60//! match ingest(record, &config) {
61//! Ok(canonical) => println!("Success: {}", canonical.doc_id),
62//! Err(IngestError::MissingPayload) => {
63//! println!("Error: Payload is required for this source");
64//! }
65//! Err(e) => println!("Error: {}", e),
66//! }
67//! ```
68//!
69//! ## Pattern Matching
70//!
71//! ```rust
72//! use ingest::IngestError;
73//!
74//! fn handle_error(error: IngestError) -> String {
75//! match error {
76//! IngestError::EmptyNormalizedText => {
77//! "Content cannot be empty or whitespace-only".to_string()
78//! }
79//! IngestError::InvalidUtf8(msg) => {
80//! format!("Invalid text encoding: {}", msg)
81//! }
82//! IngestError::PayloadTooLarge(msg) => {
83//! format!("Content too large: {}", msg)
84//! }
85//! _ => error.to_string(),
86//! }
87//! }
88//! ```
89use thiserror::Error;
90
91/// Errors that can occur during ingest normalization and validation.
92///
93/// These errors represent validation failures that prevent content from being
94/// ingested. All variants are:
95///
96/// - **Cloneable**: Can be copied for error propagation
97/// - **Comparable**: Support equality checks for testing
98/// - **Displayable**: Implement `std::fmt::Display` for user messages
99/// - **Debuggable**: Implement `std::fmt::Debug` for development
100///
101/// The enum is marked `#[non_exhaustive]` to allow future additions without
102/// breaking existing code. Callers should always include a catch-all arm when
103/// matching.
104///
105/// # Examples
106///
107/// ```rust
108/// use ingest::IngestError;
109///
110/// // Error messages
111/// let err = IngestError::MissingPayload;
112/// assert_eq!(err.to_string(), "missing payload for source that requires payload");
113///
114/// let err = IngestError::InvalidMetadata("tenant required".to_string());
115/// assert!(err.to_string().contains("tenant required"));
116/// ```
117#[derive(Error, Debug, Clone, PartialEq, Eq)]
118#[non_exhaustive]
119pub enum IngestError {
120 /// Missing payload for source that requires one.
121 ///
122 /// This error occurs when a source type (e.g., [`RawText`](crate::IngestSource::RawText),
123 /// [`File`](crate::IngestSource::File)) requires a payload but `None` was provided.
124 ///
125 /// # Example
126 ///
127 /// ```rust
128 /// use ingest::{ingest, IngestConfig, IngestError};
129 /// use ingest::{RawIngestRecord, IngestMetadata, IngestSource};
130 ///
131 /// let record = RawIngestRecord {
132 /// id: "test".to_string(),
133 /// source: IngestSource::RawText, // Requires payload
134 /// metadata: IngestMetadata {
135 /// tenant_id: Some("t".to_string()),
136 /// doc_id: Some("d".to_string()),
137 /// received_at: None,
138 /// original_source: None,
139 /// attributes: None,
140 /// },
141 /// payload: None, // ERROR: Required but missing
142 /// };
143 ///
144 /// // This will fail with MissingPayload
145 /// // let result = ingest(record, &IngestConfig::default());
146 /// ```
147 #[error("missing payload for source that requires payload")]
148 MissingPayload,
149
150 /// Binary payload is empty (zero bytes).
151 ///
152 /// This error occurs when [`IngestPayload::Binary`](crate::IngestPayload::Binary)
153 /// contains an empty vector. Empty binary payloads are rejected to prevent
154 /// meaningless ingests.
155 ///
156 /// # Example
157 ///
158 /// ```rust
159 /// use ingest::IngestPayload;
160 ///
161 /// // This will be rejected
162 /// let empty_binary = IngestPayload::Binary(vec![]);
163 /// ```
164 #[error("binary payload is empty")]
165 EmptyBinaryPayload,
166
167 /// Invalid metadata or policy violation.
168 ///
169 /// This is a catch-all error for metadata validation failures:
170 /// - Required field missing (per [`MetadataPolicy`](crate::MetadataPolicy))
171 /// - Attributes exceed size limit
172 /// - Future timestamp (when [`reject_future_timestamps`](crate::MetadataPolicy::reject_future_timestamps) is enabled)
173 /// - Empty required field after sanitization
174 /// - Invalid source/payload combination
175 ///
176 /// The message provides details about the specific violation.
177 ///
178 /// # Example
179 ///
180 /// ```rust
181 /// use ingest::IngestError;
182 ///
183 /// let err = IngestError::InvalidMetadata(
184 /// "tenant_id is required by ingest policy".to_string()
185 /// );
186 /// ```
187 #[error("invalid metadata: {0}")]
188 InvalidMetadata(String),
189
190 /// Invalid UTF-8 in TextBytes payload.
191 ///
192 /// This error occurs when [`IngestPayload::TextBytes`](crate::IngestPayload::TextBytes)
193 /// contains bytes that cannot be decoded as valid UTF-8.
194 ///
195 /// # Solutions
196 ///
197 /// - Use [`IngestPayload::Binary`](crate::IngestPayload::Binary) for non-text data
198 /// - Validate encoding before ingest
199 /// - Use encoding detection libraries (e.g., `chardetng`)
200 /// - Use `String::from_utf8_lossy` and convert to `Text`
201 ///
202 /// # Example
203 ///
204 /// ```rust
205 /// use ingest::{IngestPayload, ingest, IngestConfig, IngestError};
206 /// use ingest::{RawIngestRecord, IngestMetadata, IngestSource};
207 ///
208 /// let record = RawIngestRecord {
209 /// id: "test".to_string(),
210 /// source: IngestSource::RawText,
211 /// metadata: IngestMetadata {
212 /// tenant_id: Some("t".to_string()),
213 /// doc_id: Some("d".to_string()),
214 /// received_at: None,
215 /// original_source: None,
216 /// attributes: None,
217 /// },
218 /// payload: Some(IngestPayload::TextBytes(vec![0xFF, 0xFE])), // Invalid UTF-8
219 /// };
220 ///
221 /// // This will fail with InvalidUtf8
222 /// // let result = ingest(record, &IngestConfig::default());
223 /// ```
224 #[error("invalid utf-8 payload: {0}")]
225 InvalidUtf8(String),
226
227 /// Text payload became empty after normalization.
228 ///
229 /// This error occurs when [`normalize_payload()`](crate::normalize_payload) produces
230 /// an empty string, typically because:
231 ///
232 /// - Input was whitespace-only (e.g., `" "`, `"\n\n"`)
233 /// - Input contained only control characters (which were stripped)
234 /// - Input was empty string
235 ///
236 /// # Solutions
237 ///
238 /// - Check input before ingest: `if content.trim().is_empty()`
239 /// - Provide meaningful error messages to users
240 /// - Consider rejecting at API layer before calling ingest
241 ///
242 /// # Example
243 ///
244 /// ```rust
245 /// use ingest::{ingest, IngestConfig, IngestError, IngestPayload};
246 /// use ingest::{RawIngestRecord, IngestMetadata, IngestSource};
247 ///
248 /// let record = RawIngestRecord {
249 /// id: "test".to_string(),
250 /// source: IngestSource::RawText,
251 /// metadata: IngestMetadata {
252 /// tenant_id: Some("t".to_string()),
253 /// doc_id: Some("d".to_string()),
254 /// received_at: None,
255 /// original_source: None,
256 /// attributes: None,
257 /// },
258 /// payload: Some(IngestPayload::Text(" \n\t ".to_string())), // Whitespace only
259 /// };
260 ///
261 /// // This will fail with EmptyNormalizedText
262 /// // let result = ingest(record, &IngestConfig::default());
263 /// ```
264 #[error("text payload empty after normalization")]
265 EmptyNormalizedText,
266
267 /// Payload exceeds configured size limit.
268 ///
269 /// This error occurs when a payload violates:
270 /// - [`IngestConfig::max_payload_bytes`](crate::IngestConfig::max_payload_bytes) (raw size)
271 /// - [`IngestConfig::max_normalized_bytes`](crate::IngestConfig::max_normalized_bytes) (after normalization)
272 ///
273 /// The message contains details about which limit was exceeded and by how much.
274 ///
275 /// # HTTP Status Code
276 ///
277 /// This error should map to **413 Payload Too Large** in HTTP contexts.
278 ///
279 /// # Solutions
280 ///
281 /// - Increase limits if appropriate
282 /// - Reject at API layer before calling ingest
283 /// - Implement chunked processing for large documents
284 ///
285 /// # Example
286 ///
287 /// ```rust
288 /// use ingest::{IngestConfig, IngestError};
289 ///
290 /// let err = IngestError::PayloadTooLarge(
291 /// "raw payload size 15000000 exceeds limit of 10000000".to_string()
292 /// );
293 ///
294 /// // Map to HTTP status
295 /// let status = match err {
296 /// IngestError::PayloadTooLarge(_) => 413,
297 /// _ => 400,
298 /// };
299 /// ```
300 #[error("payload exceeds size limit: {0}")]
301 PayloadTooLarge(String),
302}
303
304impl IngestError {
305 /// Returns true if this error indicates a client-side issue.
306 ///
307 /// All ingest errors are client-side issues (invalid input), so this
308 /// always returns true. It is provided for API consistency with other
309 /// error types that might have server-side variants.
310 ///
311 /// # Example
312 ///
313 /// ```rust
314 /// use ingest::IngestError;
315 ///
316 /// let err = IngestError::MissingPayload;
317 /// assert!(err.is_client_error());
318 /// ```
319 pub fn is_client_error(&self) -> bool {
320 true
321 }
322
323 /// Returns a suggested HTTP status code for this error.
324 ///
325 /// This is a convenience method for HTTP API implementations.
326 ///
327 /// # Status Codes
328 ///
329 /// - `PayloadTooLarge`: 413
330 /// - All others: 400
331 ///
332 /// # Example
333 ///
334 /// ```rust
335 /// use ingest::IngestError;
336 ///
337 /// let err = IngestError::PayloadTooLarge("too big".to_string());
338 /// assert_eq!(err.http_status_code(), 413);
339 ///
340 /// let err = IngestError::MissingPayload;
341 /// assert_eq!(err.http_status_code(), 400);
342 /// ```
343 pub fn http_status_code(&self) -> u16 {
344 match self {
345 IngestError::PayloadTooLarge(_) => 413,
346 _ => 400,
347 }
348 }
349}