pub struct IngestMetadata {
pub tenant_id: Option<String>,
pub doc_id: Option<String>,
pub received_at: Option<DateTime<Utc>>,
pub original_source: Option<String>,
pub attributes: Option<Value>,
}Expand description
Metadata associated with an ingest request.
IngestMetadata carries contextual information about the content being ingested.
All fields are optional and will be defaulted during normalization if not provided.
§Field Defaults
| Field | Default Behavior |
|---|---|
tenant_id | Falls back to IngestConfig::default_tenant_id |
doc_id | Derived via UUIDv5 if not provided |
received_at | Set to current UTC time |
original_source | Remains None if not provided |
attributes | Remains None if not provided |
§Examples
§Minimal Metadata
use ingest::IngestMetadata;
let metadata = IngestMetadata {
tenant_id: None,
doc_id: None,
received_at: None,
original_source: None,
attributes: None,
};
// All fields will be defaulted during ingest§Full Metadata
use ingest::IngestMetadata;
use chrono::Utc;
use serde_json::json;
let metadata = IngestMetadata {
tenant_id: Some("acme-corp".to_string()),
doc_id: Some("report-q4-2024".to_string()),
received_at: Some(Utc::now()),
original_source: Some("https://docs.example.com/reports/q4".to_string()),
attributes: Some(json!({
"department": "Engineering",
"classification": "internal",
"tags": ["quarterly", "2024"]
})),
};Fields§
§tenant_id: Option<String>Optional tenant identifier for multi-tenant isolation.
When None or empty after sanitization, falls back to
IngestConfig::default_tenant_id.
§Example
use ingest::IngestMetadata;
let metadata = IngestMetadata {
tenant_id: Some("tenant-123".to_string()),
doc_id: None,
received_at: None,
original_source: None,
attributes: None,
};doc_id: Option<String>Optional document identifier.
When None or empty after sanitization, a deterministic UUIDv5 is generated
using IngestConfig::doc_id_namespace:
UUIDv5(namespace, tenant_id + "\0" + record_id)
§Example
use ingest::IngestMetadata;
let metadata = IngestMetadata {
tenant_id: None,
doc_id: Some("doc-abc-123".to_string()),
received_at: None,
original_source: None,
attributes: None,
};received_at: Option<DateTime<Utc>>Optional timestamp when the content was received.
When None, defaults to the current UTC time at ingest.
Can be validated against future time if
MetadataPolicy::reject_future_timestamps
is enabled.
§Example
use ingest::IngestMetadata;
use chrono::Utc;
let metadata = IngestMetadata {
tenant_id: None,
doc_id: None,
received_at: Some(Utc::now()),
original_source: None,
attributes: None,
};original_source: Option<String>Optional original source identifier (e.g., URL or external ID).
This is a human-readable reference to where the content originated. Control characters are stripped during sanitization.
§Example
use ingest::IngestMetadata;
let metadata = IngestMetadata {
tenant_id: None,
doc_id: None,
received_at: None,
original_source: Some("https://example.com/source".to_string()),
attributes: None,
};attributes: Option<Value>Arbitrary JSON attributes for extensibility.
This field can store any JSON-serializable data for application-specific
use cases. Size is limited by
MetadataPolicy::max_attribute_bytes
when configured.
§Example
use ingest::IngestMetadata;
use serde_json::json;
let metadata = IngestMetadata {
tenant_id: None,
doc_id: None,
received_at: None,
original_source: None,
attributes: Some(json!({
"category": "report",
"priority": "high",
"metadata": {
"author": "Jane Smith",
"department": "Engineering"
}
})),
};Trait Implementations§
Source§impl Clone for IngestMetadata
impl Clone for IngestMetadata
Source§fn clone(&self) -> IngestMetadata
fn clone(&self) -> IngestMetadata
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read more