kizzasi_inference/
lib.rs

1//! # kizzasi-inference
2//!
3//! Unified autoregressive inference engine for Kizzasi AGSP.
4//!
5//! This crate provides the core inference loop that combines:
6//! - Signal tokenization (via kizzasi-tokenizer)
7//! - Model forward pass (via kizzasi-model)
8//! - Constraint enforcement (via kizzasi-logic)
9//!
10//! ## The Inference Pipeline
11//!
12//! ```text
13//! Raw Signal → Tokenize → Model → Constrain → Decode → Output
14//!     ↑                     ↓
15//!     └──── Hidden State ───┘
16//! ```
17//!
18//! ## Autoregressive Prediction
19//!
20//! As described in a.md, the AGSP predicts "the next value" based on
21//! history, similar to how LLMs predict "the next token". This crate
22//! implements:
23//!
24//! - Single-step prediction: `step(input) -> output`
25//! - Multi-step rollout: `rollout(input, steps) -> [outputs]`
26//! - Streaming inference: Real-time continuous prediction
27//!
28//! ## COOLJAPAN Ecosystem
29//!
30//! This crate follows KIZZASI_POLICY.md and coordinates between
31//! all kizzasi-* crates.
32
33mod batch;
34mod checkpoint;
35mod compression;
36mod context;
37mod engine;
38mod ensemble;
39mod error;
40mod lora;
41mod metrics;
42mod multimodal;
43mod pipeline;
44mod pool;
45mod precision;
46mod registry;
47mod sampling;
48mod speculative;
49pub mod temporal;
50
51#[cfg(feature = "async")]
52mod hotswap;
53
54#[cfg(feature = "streaming")]
55pub mod streaming;
56
57#[cfg(any(feature = "websocket", feature = "mqtt", feature = "grpc"))]
58pub mod adapters;
59
60pub mod versioning;
61
62pub use batch::{
63    BatchConfig, BatchRequest, BatchResponse, BatchScheduler, Priority, SchedulerStats,
64};
65pub use checkpoint::{Checkpoint, CheckpointManager, CheckpointMetadata};
66pub use compression::{CompressedState, CompressionMethod, StateCompressor};
67pub use context::{ContextConfig, InferenceContext};
68pub use engine::{EngineConfig, InferenceEngine, InferenceMode, ModelInfo};
69pub use ensemble::{EnsembleBuilder, EnsembleConfig, EnsembleStrategy, ModelEnsemble};
70pub use error::{InferenceError, InferenceResult};
71
72#[cfg(feature = "async")]
73pub use hotswap::{HotSwapManager, ModelInstance, SwapEvent, SwapStrategy};
74
75pub use lora::{
76    LoraAdapter, LoraAdapterBuilder, LoraAdapterLoader, LoraAdapterManager, LoraConfig,
77};
78pub use metrics::{InferenceMetrics, InferenceProfiler, MetricsSummary, ProfileBreakdown, Timer};
79pub use multimodal::{
80    FusionStrategy, ModalityConfig, ModalityPreprocessor, ModalityType, MultiModalPipeline,
81    MultiModalPipelineBuilder,
82};
83pub use pipeline::{Pipeline, PipelineBuilder, PostprocessHook, PreprocessHook};
84pub use pool::{BufferKey, PoolStats, PooledBuffer, TensorPool};
85pub use precision::{
86    ComputePrecision, PrecisionConfig, PrecisionConverter, PrecisionMode, PrecisionStats,
87};
88pub use registry::{ModelBuilder, ModelConfig, ModelRegistry};
89pub use sampling::{
90    AdaptiveRejectionSampler, Beam, BeamSearch, ConstrainedBeamSearch, ConstraintFn,
91    CustomSamplingFn, FallbackStrategy, RejectionSampler, Sampler, SamplingConfig,
92    SamplingStrategy,
93};
94pub use speculative::{SpeculativeConfig, SpeculativeDecoder};
95pub use temporal::{LTLFormula, STLFormula, TemporalBound, TemporalConstraintEnforcer};
96pub use versioning::{
97    FallbackStrategy as VersionFallbackStrategy, HealthCheck, HealthStatus, ModelMetadata,
98    ModelStats as VersionModelStats, ModelVersion, ModelVersionManager, VersioningConfig,
99};
100
101// Re-export core types
102pub use kizzasi_core::SignalPredictor;
103pub use kizzasi_model::{AutoregressiveModel, ModelType};
104pub use kizzasi_tokenizer::SignalTokenizer;
105pub use scirs2_core::ndarray::{Array1, Array2};
106
107#[cfg(test)]
108mod tests {
109    use super::*;
110
111    #[test]
112    fn test_imports() {
113        // Verify re-exports work
114        let _: ModelType = ModelType::Mamba2;
115    }
116}