#![doc = include_str!("../README.md")]
#![cfg_attr(docsrs, feature(doc_cfg))]
#![cfg_attr(docsrs, feature(doc_auto_cfg))]
#![allow(missing_docs)]
#![warn(clippy::all)]
#![deny(unsafe_code)]
#[cfg(feature = "performance")]
#[global_allocator]
static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc;
#[cfg(feature = "python")]
pub mod bindings;
pub mod constrained;
pub mod constants;
pub mod engine;
pub mod error;
pub mod evaluation;
pub mod llm;
pub mod ingestion;
pub mod m2;
pub mod mcp;
pub mod optimization;
pub mod orchestration;
pub mod processing;
pub mod telemetry;
pub mod thinktool;
#[cfg(feature = "verification")]
pub mod verification;
pub mod web;
pub mod web_interface;
#[allow(unused_imports, unused_variables, unused_mut, dead_code)]
#[allow(clippy::field_reassign_with_default)]
#[allow(clippy::new_without_default)]
#[allow(clippy::redundant_closure)]
#[cfg(feature = "ml-testing")]
pub mod ml_testing;
pub mod traits;
#[cfg(feature = "aesthetic")]
pub mod aesthetic;
#[cfg(feature = "vibe")]
pub mod vibe;
#[cfg(feature = "code-intelligence")]
pub mod code_intelligence;
#[cfg(any(
feature = "llm-orchestration",
feature = "formal-verification",
feature = "caching",
feature = "otel",
feature = "structured-output",
feature = "streaming",
feature = "performance",
feature = "graph-execution",
feature = "got",
feature = "datalog",
feature = "egraph",
feature = "wasm-sandbox",
feature = "tokenization",
feature = "llm-observability",
feature = "probabilistic",
feature = "sampling",
))]
pub mod integrations;
pub mod memory_interface;
#[cfg(feature = "memory")]
pub use reasonkit_mem;
#[cfg(feature = "memory")]
pub use reasonkit_mem::{
embedding, indexing, raptor, retrieval, storage, Error as MemError, Result as MemResult,
};
#[cfg(feature = "memory")]
pub mod rag;
#[cfg(feature = "arf")]
pub mod arf;
#[cfg(feature = "glm46")]
pub mod glm46;
pub mod synthetic;
pub use error::{Error, Result};
pub const VERSION: &str = env!("CARGO_PKG_VERSION");
pub use orchestration::{
ComponentCoordinator, ErrorRecovery, LongHorizonConfig, LongHorizonOrchestrator,
LongHorizonResult, PerformanceTracker, StateManager, TaskGraph, TaskNode, TaskPriority,
TaskStatus,
};
pub use engine::{
Decision, MemoryContext, Profile as ReasoningProfile, ReasoningConfig, ReasoningError,
ReasoningEvent, ReasoningLoop, ReasoningLoopBuilder, ReasoningSession, ReasoningStep, StepKind,
StreamHandle, ThinkToolResult,
};
#[cfg(feature = "python")]
pub use bindings::{
Profile as PyProfile, Reasoner as PyReasoner, ThinkToolOutput as PyThinkToolOutput,
};
#[cfg(feature = "ml-testing")]
pub use ml_testing::{
AdversarialConfig, AdversarialGenerator, AttackMethod, EdgeCaseConfig, EdgeCaseGenerator,
EdgeCaseType, FeatureConstraint, FeatureType, GenerationConfig, GenerationResult, InputSchema,
SynthesisMethod, SyntheticConfig, SyntheticDataGenerator, TestCase, TestCaseType,
};
pub use optimization::{
Demonstration, FieldType, OptimizationError, OptimizationMetrics, OptimizationResult,
OptimizerConfig, PromptOptimizer, Signature, SignatureField, TrainingExample, TrialResult,
};
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use uuid::Uuid;
#[cfg(feature = "python")]
mod python_module {
#[allow(unused_imports)] use super::*;
use pyo3::prelude::*;
#[pymodule]
#[pyo3(name = "reasonkit")]
fn reasonkit(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
crate::bindings::register_bindings(m)?;
Ok(())
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum DocumentType {
Paper,
Documentation,
Code,
Note,
Transcript,
Benchmark,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum SourceType {
Arxiv,
Github,
Website,
Local,
Api,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Source {
#[serde(rename = "type")]
pub source_type: SourceType,
pub url: Option<String>,
pub path: Option<String>,
pub arxiv_id: Option<String>,
pub github_repo: Option<String>,
pub retrieved_at: DateTime<Utc>,
pub version: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Author {
pub name: String,
pub affiliation: Option<String>,
pub email: Option<String>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct Metadata {
pub title: Option<String>,
pub authors: Vec<Author>,
#[serde(rename = "abstract")]
pub abstract_text: Option<String>,
pub date: Option<String>,
pub venue: Option<String>,
pub citations: Option<i32>,
pub tags: Vec<String>,
pub categories: Vec<String>,
pub keywords: Vec<String>,
pub doi: Option<String>,
pub license: Option<String>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct EmbeddingIds {
pub dense: Option<String>,
pub sparse: Option<String>,
pub colbert: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Chunk {
pub id: Uuid,
pub text: String,
pub index: usize,
pub start_char: usize,
pub end_char: usize,
pub token_count: Option<usize>,
pub section: Option<String>,
pub page: Option<usize>,
pub embedding_ids: EmbeddingIds,
}
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ProcessingState {
#[default]
Pending,
Processing,
Completed,
Failed,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ProcessingStatus {
pub status: ProcessingState,
pub chunked: bool,
pub embedded: bool,
pub indexed: bool,
pub raptor_processed: bool,
pub errors: Vec<String>,
}
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ContentFormat {
#[default]
Text,
Markdown,
Html,
Latex,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct DocumentContent {
pub raw: String,
pub format: ContentFormat,
pub language: String,
pub word_count: usize,
pub char_count: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Document {
pub id: Uuid,
#[serde(rename = "type")]
pub doc_type: DocumentType,
pub source: Source,
pub content: DocumentContent,
pub metadata: Metadata,
pub processing: ProcessingStatus,
pub chunks: Vec<Chunk>,
pub created_at: DateTime<Utc>,
pub updated_at: Option<DateTime<Utc>>,
}
impl Document {
pub fn new(doc_type: DocumentType, source: Source) -> Self {
Self {
id: Uuid::new_v4(),
doc_type,
source,
content: DocumentContent::default(),
metadata: Metadata::default(),
processing: ProcessingStatus::default(),
chunks: Vec::new(),
created_at: Utc::now(),
updated_at: None,
}
}
pub fn with_content(mut self, raw: String) -> Self {
let word_count = raw.split_whitespace().count();
let char_count = raw.len();
self.content = DocumentContent {
raw,
format: ContentFormat::Text,
language: "en".to_string(),
word_count,
char_count,
};
self
}
pub fn with_metadata(mut self, metadata: Metadata) -> Self {
self.metadata = metadata;
self
}
}
#[cfg(feature = "memory")]
impl From<Document> for reasonkit_mem::Document {
fn from(doc: Document) -> Self {
use reasonkit_mem::types::{
Author as MemAuthor, Chunk as MemChunk, ContentFormat as MemContentFormat,
DocumentContent as MemDocumentContent, DocumentType as MemDocumentType,
EmbeddingIds as MemEmbeddingIds, Metadata as MemMetadata,
ProcessingState as MemProcessingState, ProcessingStatus as MemProcessingStatus,
Source as MemSource, SourceType as MemSourceType,
};
let doc_type = match doc.doc_type {
DocumentType::Paper => MemDocumentType::Paper,
DocumentType::Documentation => MemDocumentType::Documentation,
DocumentType::Code => MemDocumentType::Code,
DocumentType::Note => MemDocumentType::Note,
DocumentType::Transcript => MemDocumentType::Transcript,
DocumentType::Benchmark => MemDocumentType::Benchmark,
};
let source_type = match doc.source.source_type {
SourceType::Arxiv => MemSourceType::Arxiv,
SourceType::Github => MemSourceType::Github,
SourceType::Website => MemSourceType::Website,
SourceType::Local => MemSourceType::Local,
SourceType::Api => MemSourceType::Api,
};
let source = MemSource {
source_type,
url: doc.source.url,
path: doc.source.path,
arxiv_id: doc.source.arxiv_id,
github_repo: doc.source.github_repo,
retrieved_at: doc.source.retrieved_at,
version: doc.source.version,
};
let format = match doc.content.format {
ContentFormat::Text => MemContentFormat::Text,
ContentFormat::Markdown => MemContentFormat::Markdown,
ContentFormat::Html => MemContentFormat::Html,
ContentFormat::Latex => MemContentFormat::Latex,
};
let content = MemDocumentContent {
raw: doc.content.raw,
format,
language: doc.content.language,
word_count: doc.content.word_count,
char_count: doc.content.char_count,
};
let authors = doc
.metadata
.authors
.into_iter()
.map(|a| MemAuthor {
name: a.name,
affiliation: a.affiliation,
email: a.email,
})
.collect();
let metadata = MemMetadata {
title: doc.metadata.title,
authors,
abstract_text: doc.metadata.abstract_text,
date: doc.metadata.date,
venue: doc.metadata.venue,
citations: doc.metadata.citations,
tags: doc.metadata.tags,
categories: doc.metadata.categories,
keywords: doc.metadata.keywords,
doi: doc.metadata.doi,
license: doc.metadata.license,
};
let status = match doc.processing.status {
ProcessingState::Pending => MemProcessingState::Pending,
ProcessingState::Processing => MemProcessingState::Processing,
ProcessingState::Completed => MemProcessingState::Completed,
ProcessingState::Failed => MemProcessingState::Failed,
};
let processing = MemProcessingStatus {
status,
chunked: doc.processing.chunked,
embedded: doc.processing.embedded,
indexed: doc.processing.indexed,
raptor_processed: doc.processing.raptor_processed,
errors: doc.processing.errors,
};
let chunks = doc
.chunks
.into_iter()
.map(|c| {
let embedding_ids = MemEmbeddingIds {
dense: c.embedding_ids.dense,
sparse: c.embedding_ids.sparse,
colbert: c.embedding_ids.colbert,
};
MemChunk {
id: c.id,
text: c.text,
index: c.index,
start_char: c.start_char,
end_char: c.end_char,
token_count: c.token_count,
section: c.section,
page: c.page,
embedding_ids,
}
})
.collect();
reasonkit_mem::Document {
id: doc.id,
doc_type,
source,
content,
metadata,
processing,
chunks,
created_at: doc.created_at,
updated_at: doc.updated_at,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum MatchSource {
Dense,
Sparse,
Hybrid,
Raptor,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SearchResult {
pub score: f32,
pub document_id: Uuid,
pub chunk: Chunk,
pub match_source: MatchSource,
}
#[cfg(feature = "memory")]
pub use reasonkit_mem::RetrievalConfig;
#[cfg(not(feature = "memory"))]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RetrievalConfig {
pub top_k: usize,
pub min_score: f32,
pub alpha: f32,
pub use_raptor: bool,
pub rerank: bool,
}
#[cfg(not(feature = "memory"))]
impl Default for RetrievalConfig {
fn default() -> Self {
Self {
top_k: 10,
min_score: 0.0,
alpha: 0.7,
use_raptor: false,
rerank: false,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_core_compiles() {
}
#[test]
fn test_document_creation() {
let source = Source {
source_type: SourceType::Local,
url: None,
path: Some("/test.txt".to_string()),
arxiv_id: None,
github_repo: None,
retrieved_at: Utc::now(),
version: None,
};
let doc = Document::new(DocumentType::Note, source);
assert_eq!(doc.doc_type, DocumentType::Note);
}
#[test]
fn test_document_with_content() {
let source = Source {
source_type: SourceType::Local,
url: None,
path: None,
arxiv_id: None,
github_repo: None,
retrieved_at: Utc::now(),
version: None,
};
let doc =
Document::new(DocumentType::Note, source).with_content("Hello world test".to_string());
assert_eq!(doc.content.word_count, 3);
assert_eq!(doc.content.char_count, 16);
}
#[test]
fn test_version_available() {
assert!(!VERSION.is_empty());
}
}