Skip to main content

ingest/
error.rs

1//! Error types produced by the ingest crate.
2//!
3//! This module defines the error surface for ingest operations. All errors are
4//! typed, cloneable, and comparable to enable precise error handling and testing.
5//!
6//! # Error Philosophy
7//!
8//! The ingest crate uses typed errors (not generic strings) to allow callers to:
9//! - Handle specific error cases differently
10//! - Map errors to appropriate HTTP status codes
11//! - Display user-friendly error messages
12//! - Log structured error information
13//!
14//! # Error Categories
15//!
16//! | Error | Category | Description |
17//! |-------|----------|-------------|
18//! | [`MissingPayload`](IngestError::MissingPayload) | Validation | Source requires payload but none provided |
19//! | [`EmptyBinaryPayload`](IngestError::EmptyBinaryPayload) | Validation | Binary payload has zero bytes |
20//! | [`InvalidMetadata`](IngestError::InvalidMetadata) | Validation | Metadata policy violation |
21//! | [`InvalidUtf8`](IngestError::InvalidUtf8) | Validation | TextBytes not valid UTF-8 |
22//! | [`EmptyNormalizedText`](IngestError::EmptyNormalizedText) | Validation | Text empty after normalization |
23//! | [`PayloadTooLarge`](IngestError::PayloadTooLarge) | Validation | Size limit exceeded |
24//!
25//! # HTTP Status Code Mapping
26//!
27//! ```rust
28//! use ingest::IngestError;
29//!
30//! fn to_http_status(error: &IngestError) -> u16 {
31//!     match error {
32//!         IngestError::PayloadTooLarge(_) => 413, // Payload Too Large
33//!         _ => 400, // Bad Request (all validation errors)
34//!     }
35//! }
36//! ```
37//!
38//! # Examples
39//!
40//! ## Basic Error Handling
41//!
42//! ```rust
43//! use ingest::{ingest, IngestConfig, IngestError};
44//! use ingest::{RawIngestRecord, IngestMetadata, IngestSource};
45//!
46//! let config = IngestConfig::default();
47//! let record = RawIngestRecord {
48//!     id: "test".to_string(),
49//!     source: IngestSource::RawText,
50//!     metadata: IngestMetadata {
51//!         tenant_id: Some("t".to_string()),
52//!         doc_id: Some("d".to_string()),
53//!         received_at: None,
54//!         original_source: None,
55//!         attributes: None,
56//!     },
57//!     payload: None, // Missing required payload
58//! };
59//!
60//! match ingest(record, &config) {
61//!     Ok(canonical) => println!("Success: {}", canonical.doc_id),
62//!     Err(IngestError::MissingPayload) => {
63//!         println!("Error: Payload is required for this source");
64//!     }
65//!     Err(e) => println!("Error: {}", e),
66//! }
67//! ```
68//!
69//! ## Pattern Matching
70//!
71//! ```rust
72//! use ingest::IngestError;
73//!
74//! fn handle_error(error: IngestError) -> String {
75//!     match error {
76//!         IngestError::EmptyNormalizedText => {
77//!             "Content cannot be empty or whitespace-only".to_string()
78//!         }
79//!         IngestError::InvalidUtf8(msg) => {
80//!             format!("Invalid text encoding: {}", msg)
81//!         }
82//!         IngestError::PayloadTooLarge(msg) => {
83//!             format!("Content too large: {}", msg)
84//!         }
85//!         _ => error.to_string(),
86//!     }
87//! }
88//! ```
89use thiserror::Error;
90
91/// Errors that can occur during ingest normalization and validation.
92///
93/// These errors represent validation failures that prevent content from being
94/// ingested. All variants are:
95///
96/// - **Cloneable**: Can be copied for error propagation
97/// - **Comparable**: Support equality checks for testing
98/// - **Displayable**: Implement `std::fmt::Display` for user messages
99/// - **Debuggable**: Implement `std::fmt::Debug` for development
100///
101/// The enum is marked `#[non_exhaustive]` to allow future additions without
102/// breaking existing code. Callers should always include a catch-all arm when
103/// matching.
104///
105/// # Examples
106///
107/// ```rust
108/// use ingest::IngestError;
109///
110/// // Error messages
111/// let err = IngestError::MissingPayload;
112/// assert_eq!(err.to_string(), "missing payload for source that requires payload");
113///
114/// let err = IngestError::InvalidMetadata("tenant required".to_string());
115/// assert!(err.to_string().contains("tenant required"));
116/// ```
117#[derive(Error, Debug, Clone, PartialEq, Eq)]
118#[non_exhaustive]
119pub enum IngestError {
120    /// Missing payload for source that requires one.
121    ///
122    /// This error occurs when a source type (e.g., [`RawText`](crate::IngestSource::RawText),
123    /// [`File`](crate::IngestSource::File)) requires a payload but `None` was provided.
124    ///
125    /// # Example
126    ///
127    /// ```rust
128    /// use ingest::{ingest, IngestConfig, IngestError};
129    /// use ingest::{RawIngestRecord, IngestMetadata, IngestSource};
130    ///
131    /// let record = RawIngestRecord {
132    ///     id: "test".to_string(),
133    ///     source: IngestSource::RawText, // Requires payload
134    ///     metadata: IngestMetadata {
135    ///         tenant_id: Some("t".to_string()),
136    ///         doc_id: Some("d".to_string()),
137    ///         received_at: None,
138    ///         original_source: None,
139    ///         attributes: None,
140    ///     },
141    ///     payload: None, // ERROR: Required but missing
142    /// };
143    ///
144    /// // This will fail with MissingPayload
145    /// // let result = ingest(record, &IngestConfig::default());
146    /// ```
147    #[error("missing payload for source that requires payload")]
148    MissingPayload,
149
150    /// Binary payload is empty (zero bytes).
151    ///
152    /// This error occurs when [`IngestPayload::Binary`](crate::IngestPayload::Binary)
153    /// contains an empty vector. Empty binary payloads are rejected to prevent
154    /// meaningless ingests.
155    ///
156    /// # Example
157    ///
158    /// ```rust
159    /// use ingest::IngestPayload;
160    ///
161    /// // This will be rejected
162    /// let empty_binary = IngestPayload::Binary(vec![]);
163    /// ```
164    #[error("binary payload is empty")]
165    EmptyBinaryPayload,
166
167    /// Invalid metadata or policy violation.
168    ///
169    /// This is a catch-all error for metadata validation failures:
170    /// - Required field missing (per [`MetadataPolicy`](crate::MetadataPolicy))
171    /// - Attributes exceed size limit
172    /// - Future timestamp (when [`reject_future_timestamps`](crate::MetadataPolicy::reject_future_timestamps) is enabled)
173    /// - Empty required field after sanitization
174    /// - Invalid source/payload combination
175    ///
176    /// The message provides details about the specific violation.
177    ///
178    /// # Example
179    ///
180    /// ```rust
181    /// use ingest::IngestError;
182    ///
183    /// let err = IngestError::InvalidMetadata(
184    ///     "tenant_id is required by ingest policy".to_string()
185    /// );
186    /// ```
187    #[error("invalid metadata: {0}")]
188    InvalidMetadata(String),
189
190    /// Invalid UTF-8 in TextBytes payload.
191    ///
192    /// This error occurs when [`IngestPayload::TextBytes`](crate::IngestPayload::TextBytes)
193    /// contains bytes that cannot be decoded as valid UTF-8.
194    ///
195    /// # Solutions
196    ///
197    /// - Use [`IngestPayload::Binary`](crate::IngestPayload::Binary) for non-text data
198    /// - Validate encoding before ingest
199    /// - Use encoding detection libraries (e.g., `chardetng`)
200    /// - Use `String::from_utf8_lossy` and convert to `Text`
201    ///
202    /// # Example
203    ///
204    /// ```rust
205    /// use ingest::{IngestPayload, ingest, IngestConfig, IngestError};
206    /// use ingest::{RawIngestRecord, IngestMetadata, IngestSource};
207    ///
208    /// let record = RawIngestRecord {
209    ///     id: "test".to_string(),
210    ///     source: IngestSource::RawText,
211    ///     metadata: IngestMetadata {
212    ///         tenant_id: Some("t".to_string()),
213    ///         doc_id: Some("d".to_string()),
214    ///         received_at: None,
215    ///         original_source: None,
216    ///         attributes: None,
217    ///     },
218    ///     payload: Some(IngestPayload::TextBytes(vec![0xFF, 0xFE])), // Invalid UTF-8
219    /// };
220    ///
221    /// // This will fail with InvalidUtf8
222    /// // let result = ingest(record, &IngestConfig::default());
223    /// ```
224    #[error("invalid utf-8 payload: {0}")]
225    InvalidUtf8(String),
226
227    /// Text payload became empty after normalization.
228    ///
229    /// This error occurs when [`normalize_payload()`](crate::normalize_payload) produces
230    /// an empty string, typically because:
231    ///
232    /// - Input was whitespace-only (e.g., `"   "`, `"\n\n"`)
233    /// - Input contained only control characters (which were stripped)
234    /// - Input was empty string
235    ///
236    /// # Solutions
237    ///
238    /// - Check input before ingest: `if content.trim().is_empty()`
239    /// - Provide meaningful error messages to users
240    /// - Consider rejecting at API layer before calling ingest
241    ///
242    /// # Example
243    ///
244    /// ```rust
245    /// use ingest::{ingest, IngestConfig, IngestError, IngestPayload};
246    /// use ingest::{RawIngestRecord, IngestMetadata, IngestSource};
247    ///
248    /// let record = RawIngestRecord {
249    ///     id: "test".to_string(),
250    ///     source: IngestSource::RawText,
251    ///     metadata: IngestMetadata {
252    ///         tenant_id: Some("t".to_string()),
253    ///         doc_id: Some("d".to_string()),
254    ///         received_at: None,
255    ///         original_source: None,
256    ///         attributes: None,
257    ///     },
258    ///     payload: Some(IngestPayload::Text("   \n\t   ".to_string())), // Whitespace only
259    /// };
260    ///
261    /// // This will fail with EmptyNormalizedText
262    /// // let result = ingest(record, &IngestConfig::default());
263    /// ```
264    #[error("text payload empty after normalization")]
265    EmptyNormalizedText,
266
267    /// Payload exceeds configured size limit.
268    ///
269    /// This error occurs when a payload violates:
270    /// - [`IngestConfig::max_payload_bytes`](crate::IngestConfig::max_payload_bytes) (raw size)
271    /// - [`IngestConfig::max_normalized_bytes`](crate::IngestConfig::max_normalized_bytes) (after normalization)
272    ///
273    /// The message contains details about which limit was exceeded and by how much.
274    ///
275    /// # HTTP Status Code
276    ///
277    /// This error should map to **413 Payload Too Large** in HTTP contexts.
278    ///
279    /// # Solutions
280    ///
281    /// - Increase limits if appropriate
282    /// - Reject at API layer before calling ingest
283    /// - Implement chunked processing for large documents
284    ///
285    /// # Example
286    ///
287    /// ```rust
288    /// use ingest::{IngestConfig, IngestError};
289    ///
290    /// let err = IngestError::PayloadTooLarge(
291    ///     "raw payload size 15000000 exceeds limit of 10000000".to_string()
292    /// );
293    ///
294    /// // Map to HTTP status
295    /// let status = match err {
296    ///     IngestError::PayloadTooLarge(_) => 413,
297    ///     _ => 400,
298    /// };
299    /// ```
300    #[error("payload exceeds size limit: {0}")]
301    PayloadTooLarge(String),
302}
303
304impl IngestError {
305    /// Returns true if this error indicates a client-side issue.
306    ///
307    /// All ingest errors are client-side issues (invalid input), so this
308    /// always returns true. It is provided for API consistency with other
309    /// error types that might have server-side variants.
310    ///
311    /// # Example
312    ///
313    /// ```rust
314    /// use ingest::IngestError;
315    ///
316    /// let err = IngestError::MissingPayload;
317    /// assert!(err.is_client_error());
318    /// ```
319    pub fn is_client_error(&self) -> bool {
320        true
321    }
322
323    /// Returns a suggested HTTP status code for this error.
324    ///
325    /// This is a convenience method for HTTP API implementations.
326    ///
327    /// # Status Codes
328    ///
329    /// - `PayloadTooLarge`: 413
330    /// - All others: 400
331    ///
332    /// # Example
333    ///
334    /// ```rust
335    /// use ingest::IngestError;
336    ///
337    /// let err = IngestError::PayloadTooLarge("too big".to_string());
338    /// assert_eq!(err.http_status_code(), 413);
339    ///
340    /// let err = IngestError::MissingPayload;
341    /// assert_eq!(err.http_status_code(), 400);
342    /// ```
343    pub fn http_status_code(&self) -> u16 {
344        match self {
345            IngestError::PayloadTooLarge(_) => 413,
346            _ => 400,
347        }
348    }
349}