reasoninglayer 0.2.1

Rust client SDK for the Reasoning Layer API
Documentation
//! Ingestion DTOs (markdown, document/OCR, RDF, session lifecycle).
//!
//! The ingestion surface is broad: many session-management endpoints currently return
//! free-form JSON on the backend itself. We type the request/response shapes that the
//! backend exposes via dto/ingestion.rs and dto/document_ingestion.rs; the rest stay as
//! `serde_json::Value` until the backend's contract solidifies.

use std::collections::BTreeMap;

use serde::{Deserialize, Serialize};

/// Free-form ingestion configuration. Backend currently uses an open struct;
/// pass-through as Value to avoid lock-in.
pub type IngestionConfigDto = serde_json::Value;

/// Free-form pending-review payload (mirrors `crate::dto::review::PendingReviewDto`
/// where used inside ingestion stats).
pub type IngestionPendingReviewDto = serde_json::Value;

#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct IngestionStatsDto {
    #[serde(default)]
    pub extra: BTreeMap<String, serde_json::Value>,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct IngestMarkdownRequest {
    pub content: String,
    pub owner_id: String,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub config: Option<IngestionConfigDto>,
}

#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct IngestMarkdownResponse {
    pub success: bool,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub session_id: Option<String>,
    #[serde(default)]
    pub stats: IngestionStatsDto,
    #[serde(default)]
    pub pending_review: Vec<IngestionPendingReviewDto>,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MarkdownDocumentDto {
    pub document_id: String,
    pub content: String,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct IngestMarkdownBatchRequest {
    pub documents: Vec<MarkdownDocumentDto>,
    pub owner_id: String,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub config: Option<IngestionConfigDto>,
}

/// Document source: either inline base64 or a URL the backend can fetch.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum DocumentSource {
    Base64 { data: String, filename: String },
    Url(String),
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum DocumentType {
    Pdf,
    Docx,
    Pptx,
    Xlsx,
    Html,
    Image,
    Markdown,
    Unknown,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum DocumentParser {
    Docling,
    DotsOcr,
    OlmOcr,
    #[default]
    Auto,
}

#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct OcrConfigDto {
    #[serde(default)]
    pub parser: DocumentParser,
    #[serde(default = "default_true")]
    pub enable_fallback: bool,
    #[serde(default)]
    pub use_llm_enhancement: bool,
    #[serde(default)]
    pub force_ocr: bool,
    #[serde(default)]
    pub languages: Vec<String>,
    #[serde(default)]
    pub extract_images: bool,
    #[serde(default = "default_true")]
    pub extract_tables: bool,
}

fn default_true() -> bool {
    true
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct IngestDocumentRequest {
    pub document: DocumentSource,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub document_type: Option<DocumentType>,
    pub owner_id: String,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub ocr_config: Option<OcrConfigDto>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub ingestion_config: Option<IngestionConfigDto>,
}

#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ParsedDocumentMetadataDto {
    #[serde(default)]
    pub extra: BTreeMap<String, serde_json::Value>,
}

#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct DocumentExtractionReportDto {
    #[serde(default)]
    pub extra: BTreeMap<String, serde_json::Value>,
}

#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ExtractedTableDto {
    #[serde(default)]
    pub extra: BTreeMap<String, serde_json::Value>,
}

#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct DocumentParseStatsDto {
    pub parse_time_ms: u64,
    pub parser_used: String,
    pub pages_processed: u64,
    pub tables_extracted: u64,
    pub images_extracted: u64,
    #[serde(default)]
    pub warnings: Vec<String>,
}

#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct IngestDocumentResponse {
    pub success: bool,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub session_id: Option<String>,
    pub parsed_metadata: ParsedDocumentMetadataDto,
    pub extraction_report: DocumentExtractionReportDto,
    #[serde(default)]
    pub tables: Vec<ExtractedTableDto>,
    pub parse_stats: DocumentParseStatsDto,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DocumentBatchItem {
    pub document_id: String,
    pub document: DocumentSource,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub document_type: Option<DocumentType>,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct IngestDocumentBatchRequest {
    pub documents: Vec<DocumentBatchItem>,
    pub owner_id: String,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub ocr_config: Option<OcrConfigDto>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub ingestion_config: Option<IngestionConfigDto>,
}