pub struct CanonicalIngestRecord {
pub id: String,
pub tenant_id: String,
pub doc_id: String,
pub received_at: DateTime<Utc>,
pub original_source: Option<String>,
pub source: IngestSource,
pub normalized_payload: Option<CanonicalPayload>,
pub attributes: Option<Value>,
}Expand description
Normalized record produced by ingest.
CanonicalIngestRecord is the output of the ingest pipeline. It represents
a cleaned, validated, and deterministic version of the input that downstream
stages can rely on.
§Guarantees
- All required fields are present (tenant_id, doc_id, received_at)
- Metadata is sanitized (control characters stripped)
- Payload is normalized (text whitespace collapsed, binary preserved)
- Document ID is stable (derived deterministically if not provided)
§Examples
use ingest::{ingest, IngestConfig, RawIngestRecord, CanonicalPayload};
use ingest::{IngestMetadata, IngestSource, IngestPayload};
let config = IngestConfig::default();
let record = RawIngestRecord {
id: "test-001".to_string(),
source: IngestSource::RawText,
metadata: IngestMetadata {
tenant_id: Some("tenant".to_string()),
doc_id: None, // Will be derived
received_at: None, // Will default to now
original_source: None,
attributes: None,
},
payload: Some(IngestPayload::Text(" Hello world ".to_string())),
};
let canonical = ingest(record, &config).unwrap();
// All fields are guaranteed present
assert!(!canonical.tenant_id.is_empty());
assert!(!canonical.doc_id.is_empty());
// Text is normalized
match &canonical.normalized_payload {
Some(CanonicalPayload::Text(text)) => {
assert_eq!(text, "Hello world");
}
_ => panic!("Expected text payload"),
}Fields§
§id: StringUnique identifier for this ingest operation (mirrors RawIngestRecord::id).
This is the sanitized version of the original ID (control characters stripped).
tenant_id: StringTenant identifier for multi-tenant isolation.
This is the effective tenant ID after applying defaults:
- If provided and non-empty: the sanitized provided value
- Otherwise:
IngestConfig::default_tenant_id
doc_id: StringDocument identifier.
This is the effective document ID after derivation:
- If provided and non-empty: the sanitized provided value
- Otherwise: UUIDv5 derived from tenant + record ID
received_at: DateTime<Utc>Timestamp when the record was received.
This is the effective timestamp after applying defaults:
- If provided: the sanitized provided value
- Otherwise: current UTC time at ingest
original_source: Option<String>Original source information if provided.
Sanitized version of IngestMetadata::original_source with control
characters stripped. None if not provided.
source: IngestSourceSource of the content (mirrors RawIngestRecord::source).
normalized_payload: Option<CanonicalPayload>Normalized payload ready for downstream stages.
- For text: whitespace collapsed, size limits enforced
- For binary: preserved unchanged, non-empty check performed
Noneif no payload was provided
attributes: Option<Value>Attributes JSON preserved for downstream use.
This is the sanitized and size-checked version of
IngestMetadata::attributes. None if not provided.
Implementations§
Source§impl CanonicalIngestRecord
impl CanonicalIngestRecord
Sourcepub fn has_text_payload(&self) -> bool
pub fn has_text_payload(&self) -> bool
Returns true if this record has a text payload.
§Example
use ingest::{CanonicalIngestRecord, CanonicalPayload};
let record = CanonicalIngestRecord {
id: "test".to_string(),
tenant_id: "tenant".to_string(),
doc_id: "doc".to_string(),
received_at: chrono::Utc::now(),
original_source: None,
source: ingest::IngestSource::RawText,
normalized_payload: Some(CanonicalPayload::Text("hello".to_string())),
attributes: None,
};
assert!(record.has_text_payload());Sourcepub fn has_binary_payload(&self) -> bool
pub fn has_binary_payload(&self) -> bool
Returns true if this record has a binary payload.
§Example
use ingest::{CanonicalIngestRecord, CanonicalPayload};
let record = CanonicalIngestRecord {
id: "test".to_string(),
tenant_id: "tenant".to_string(),
doc_id: "doc".to_string(),
received_at: chrono::Utc::now(),
original_source: None,
source: ingest::IngestSource::File {
filename: "test.bin".to_string(),
content_type: None,
},
normalized_payload: Some(CanonicalPayload::Binary(vec![1, 2, 3])),
attributes: None,
};
assert!(record.has_binary_payload());Sourcepub fn text_payload(&self) -> Option<&str>
pub fn text_payload(&self) -> Option<&str>
Returns the text payload if present, otherwise None.
§Example
use ingest::{CanonicalIngestRecord, CanonicalPayload};
let record = CanonicalIngestRecord {
id: "test".to_string(),
tenant_id: "tenant".to_string(),
doc_id: "doc".to_string(),
received_at: chrono::Utc::now(),
original_source: None,
source: ingest::IngestSource::RawText,
normalized_payload: Some(CanonicalPayload::Text("hello world".to_string())),
attributes: None,
};
assert_eq!(record.text_payload(), Some("hello world"));Sourcepub fn binary_payload(&self) -> Option<&[u8]>
pub fn binary_payload(&self) -> Option<&[u8]>
Returns the binary payload if present, otherwise None.
Trait Implementations§
Source§impl Clone for CanonicalIngestRecord
impl Clone for CanonicalIngestRecord
Source§fn clone(&self) -> CanonicalIngestRecord
fn clone(&self) -> CanonicalIngestRecord
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read more