1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
//! orion-core: Agent harness for local LLM inference.
//!
//! Provides the agent loop, context pipeline, tool execution,
//! and event system for building AI chat interfaces on top of
//! local model backends (llama.cpp, MLX, etc.).
//!
//! # Architecture
//!
//! ```text
//! User prompt
//! → Agent.prompt()
//! → Context pipeline (prune pairs + template format)
//! → LlmBackend.generate() (streaming tokens)
//! → Tool execution loop (parse calls → run tools → feed results back)
//! → AgentEvent stream → UI
//! ```
//!
//! The crate is backend-agnostic. Implement [`backend::LlmBackend`]
//! for your inference engine and the agent handles the rest.
//!
//! # Example
//!
//! Implement [`LlmBackend`] for your engine, then drive the agent. The mock
//! backend below streams a canned reply so the whole loop runs end to end (a
//! complete version lives in `examples/mock_backend.rs`):
//!
//! ```
//! use std::sync::Arc;
//! use std::sync::atomic::AtomicBool;
//! use orion_core::{
//! Agent, AgentConfig, AgentEvent, CoreResult, GenerationResult,
//! InferenceParams, LlmBackend, TokenCallback,
//! };
//! use tokio::sync::mpsc;
//!
//! struct MockBackend;
//! impl LlmBackend for MockBackend {
//! fn generate(
//! &self,
//! _prompt: &str,
//! _params: &InferenceParams,
//! _abort: Arc<AtomicBool>,
//! mut on_token: TokenCallback,
//! ) -> CoreResult<GenerationResult> {
//! on_token("Hi!", 1, 10.0);
//! Ok(GenerationResult {
//! text: "Hi!".into(),
//! tokens_generated: 1,
//! prompt_tokens: 0,
//! tokens_per_sec: 10.0,
//! time_to_first_token_ms: 1.0,
//! generation_time_ms: 1.0,
//! })
//! }
//! fn tokenize_count(&self, text: &str) -> CoreResult<u32> {
//! Ok(text.split_whitespace().count() as u32)
//! }
//! fn is_ready(&self) -> bool { true }
//! }
//!
//! # fn main() {
//! let rt = tokio::runtime::Runtime::new().unwrap();
//! rt.block_on(async {
//! let mut agent = Agent::new(AgentConfig::default());
//! let backend: Arc<dyn LlmBackend> = Arc::new(MockBackend);
//! let (tx, mut rx) = mpsc::unbounded_channel::<AgentEvent>();
//!
//! // Consume events concurrently while the agent generates.
//! let consumer = tokio::spawn(async move {
//! let mut reply = String::new();
//! while let Some(event) = rx.recv().await {
//! if let AgentEvent::MessageDelta { delta, .. } = event {
//! reply.push_str(&delta);
//! }
//! }
//! reply
//! });
//!
//! agent.prompt("Hello", backend, tx).await.unwrap();
//! assert_eq!(consumer.await.unwrap(), "Hi!");
//! });
//! # }
//! ```
/// The [`Agent`] orchestrator and its configuration.
/// The [`LlmBackend`] trait and inference parameter/result types.
/// Context-window management: pruning, token budgeting, and prompt formatting.
/// Error and result types for the crate.
/// The [`AgentEvent`] stream emitted while the agent runs.
/// Conversation data types: [`Message`], [`Role`], and tool call/result records.
/// Chat prompt templates for the supported model families.
/// The `Tool` trait (feature `tools`), tool schemas, and tool-call parsing.
pub use ;
pub use ;
pub use ;
pub use ;
pub use AgentEvent;
pub use ;
pub use ;
pub use ;
pub use ;