llm_toolkit/
lib.rs

1//! 'llm-toolkit' - A low-level Rust toolkit for the LLM last mile problem.
2//!
3//! This library provides a set of sharp, reliable, and unopinionated "tools"
4//! for building robust LLM-powered applications in Rust. It focuses on solving
5//! the common and frustrating problems that occur at the boundary between a
6//! strongly-typed Rust application and the unstructured, often unpredictable
7//! string-based responses from LLM APIs.
8
9// Allow the crate to reference itself by name, which is needed for proc macros
10// to work correctly in examples, tests, and bins
11extern crate self as llm_toolkit;
12
13// Re-export tracing for use by generated code from macros
14// Using extern crate to ensure it's accessible via absolute path
15pub extern crate tracing;
16
17// Re-export minijinja for use by ToPrompt derive macro
18pub extern crate minijinja;
19
20/// A derive macro to implement the `ToPrompt` trait for structs.
21///
22/// This macro is available only when the `derive` feature is enabled.
23/// See the [crate-level documentation](index.html#2-structured-prompts-with-derivetoprompt) for usage examples.
24#[cfg(feature = "derive")]
25pub use llm_toolkit_macros::ToPrompt;
26
27/// A derive macro to implement the `ToPromptSet` trait for structs.
28///
29/// This macro is available only when the `derive` feature is enabled.
30#[cfg(feature = "derive")]
31pub use llm_toolkit_macros::ToPromptSet;
32
33/// A derive macro to implement the `ToPromptFor` trait for structs.
34///
35/// This macro is available only when the `derive` feature is enabled.
36#[cfg(feature = "derive")]
37pub use llm_toolkit_macros::ToPromptFor;
38
39/// A macro for creating examples sections in prompts.
40///
41/// This macro is available only when the `derive` feature is enabled.
42#[cfg(feature = "derive")]
43pub use llm_toolkit_macros::examples_section;
44
45/// A procedural attribute macro for defining intent enums with automatic prompt and extractor generation.
46///
47/// This macro is available only when the `derive` feature is enabled.
48#[cfg(feature = "derive")]
49pub use llm_toolkit_macros::define_intent;
50
51/// A derive macro to implement the `Agent` trait for structs.
52///
53/// This macro is available only when the `agent` feature is enabled.
54/// It automatically generates an Agent implementation that uses ClaudeCodeAgent
55/// internally and deserializes responses into a structured output type.
56///
57/// # Example
58///
59/// ```ignore
60/// use llm_toolkit_macros::Agent;
61/// use serde::{Deserialize, Serialize};
62///
63/// #[derive(Serialize, Deserialize)]
64/// struct MyOutput {
65///     result: String,
66/// }
67///
68/// #[derive(Agent)]
69/// #[agent(expertise = "My expertise", output = "MyOutput")]
70/// struct MyAgent;
71/// ```
72#[cfg(feature = "agent")]
73pub use llm_toolkit_macros::Agent;
74
75/// An attribute macro to define agent structs with automatic trait implementations.
76///
77/// This macro is available only when the `agent` feature is enabled.
78#[cfg(feature = "agent")]
79pub use llm_toolkit_macros::agent;
80
81/// A derive macro to implement the `TypeMarker` trait for structs.
82///
83/// This macro is available only when the `agent` feature is enabled.
84/// It automatically generates a TypeMarker implementation that provides
85/// a type identifier string for type-based orchestrator output retrieval.
86///
87/// # Example
88///
89/// ```ignore
90/// use llm_toolkit::TypeMarker;
91/// use serde::{Deserialize, Serialize};
92///
93/// #[derive(Serialize, Deserialize, TypeMarker)]
94/// struct MyResponse {
95///     #[serde(default = "default_type")]
96///     __type: String,
97///     result: String,
98/// }
99///
100/// fn default_type() -> String {
101///     "MyResponse".to_string()
102/// }
103/// ```
104#[cfg(feature = "agent")]
105pub use llm_toolkit_macros::{TypeMarker, type_marker};
106
107pub mod attachment;
108pub mod context;
109pub mod extract;
110pub mod intent;
111pub mod models;
112pub mod multimodal;
113pub mod prompt;
114pub mod retrieval;
115
116#[cfg(feature = "agent")]
117pub mod observability;
118
119#[cfg(feature = "agent")]
120pub mod agent;
121
122#[cfg(feature = "agent")]
123pub mod orchestrator;
124
125pub use attachment::{Attachment, AttachmentSchema, ToAttachments};
126pub use context::{ContextProfile, Priority, TaskHealth};
127pub use extract::{FlexibleExtractor, MarkdownCodeBlockExtractor};
128#[cfg(feature = "agent")]
129pub use intent::expandable::{
130    Expandable, ReActConfig, ReActError, ReActResult, RegistryError, Selectable, SelectionRegistry,
131    react_loop, simple_tag_selector,
132};
133pub use intent::frame::IntentFrame;
134#[allow(deprecated)]
135pub use intent::{IntentError, IntentExtractor, PromptBasedExtractor};
136pub use models::{ClaudeModel, GeminiModel, Model, ModelError, OpenAIModel};
137pub use multimodal::ImageData;
138pub use prompt::{PromptPart, PromptSetError, ToPrompt, ToPromptFor, ToPromptSet};
139pub use retrieval::Document;
140
141#[cfg(feature = "agent")]
142pub use agent::{Agent, AgentError, AnyAgent, ToExpertise};
143
144#[cfg(feature = "agent")]
145pub use agent::persona::{ContextConfig, Persona, PersonaAgent};
146
147#[cfg(feature = "agent")]
148pub use agent::retrieval::RetrievalAwareAgent;
149
150#[cfg(feature = "agent")]
151pub use agent::expertise::{
152    Anchor, ContextualPrompt, Expertise, KnowledgeFragment, RenderContext, WeightedFragment,
153};
154
155#[cfg(feature = "agent")]
156pub use orchestrator::{
157    BlueprintWorkflow, Orchestrator, OrchestratorError, StrategyMap, TypeMarker,
158};
159
160use extract::ParseError;
161
162/// Extracts a JSON string from a raw LLM response string.
163///
164/// This function uses a `FlexibleExtractor` with its standard strategies
165/// to find and extract a JSON object from a string that may contain extraneous
166/// text, such as explanations or Markdown code blocks.
167///
168/// For more advanced control over extraction strategies, see the `extract::FlexibleExtractor` struct.
169///
170/// # Returns
171///
172/// A `Result` containing the extracted JSON `String` on success, or a `ParseError`
173/// if no JSON could be extracted.
174pub fn extract_json(text: &str) -> Result<String, ParseError> {
175    // Try markdown code block first (common LLM output format)
176    if let Ok(content) = extract_markdown_block_with_lang(text, "json") {
177        return Ok(content);
178    }
179
180    // Also try generic markdown block (might contain JSON without language hint)
181    if let Ok(content) = extract_markdown_block(text) {
182        // Verify it's actually JSON by trying to extract JSON from it
183        let extractor = FlexibleExtractor::new();
184        if let Ok(json) = extractor.extract(&content) {
185            return Ok(json);
186        }
187    }
188
189    // Fall back to standard extraction strategies
190    let extractor = FlexibleExtractor::new();
191    extractor.extract(text)
192}
193
194/// Extracts content from any Markdown code block in the text.
195///
196/// This function searches for the first code block (delimited by triple backticks)
197/// and returns its content. The code block can have any language specifier or none at all.
198///
199/// # Returns
200///
201/// A `Result` containing the extracted code block content on success, or a `ParseError`
202/// if no code block is found.
203pub fn extract_markdown_block(text: &str) -> Result<String, ParseError> {
204    let extractor = MarkdownCodeBlockExtractor::new();
205    extractor.extract(text)
206}
207
208/// Extracts content from a Markdown code block with a specific language.
209///
210/// This function searches for a code block with the specified language hint
211/// (e.g., ```rust, ```python) and returns its content.
212///
213/// # Arguments
214///
215/// * `text` - The text containing the markdown code block
216/// * `lang` - The language specifier to match (e.g., "rust", "python")
217///
218/// # Returns
219///
220/// A `Result` containing the extracted code block content on success, or a `ParseError`
221/// if no code block with the specified language is found.
222pub fn extract_markdown_block_with_lang(text: &str, lang: &str) -> Result<String, ParseError> {
223    let extractor = MarkdownCodeBlockExtractor::with_language(lang.to_string());
224    extractor.extract(text)
225}
226
227#[cfg(test)]
228mod tests {
229    use super::*;
230
231    #[test]
232    fn test_json_extraction() {
233        let input = "Some text before {\"key\": \"value\"} and after.";
234        assert_eq!(extract_json(input).unwrap(), "{\"key\": \"value\"}");
235    }
236
237    #[test]
238    fn test_standard_extraction_from_tagged_content() {
239        let text = "<answer>{\"type\": \"success\"}</answer>";
240        let result = extract_json(text);
241        assert!(result.is_ok());
242        assert_eq!(result.unwrap(), "{\"type\": \"success\"}");
243    }
244
245    #[test]
246    fn test_markdown_extraction() {
247        // Test simple code block with no language
248        let text1 = "Here is some code:\n```\nlet x = 42;\n```\nAnd some text after.";
249        let result1 = extract_markdown_block(text1);
250        assert!(result1.is_ok());
251        assert_eq!(result1.unwrap(), "let x = 42;");
252
253        // Test code block with specific language (rust)
254        let text2 = "Here's Rust code:\n```rust\nfn main() {
255    println!(\"Hello\");
256}
257```";
258        let result2 = extract_markdown_block_with_lang(text2, "rust");
259        assert!(result2.is_ok());
260        assert_eq!(result2.unwrap(), "fn main() {\n    println!(\"Hello\");\n}");
261
262        // Test extracting rust block when json block is also present
263        let text3 = r#"\nFirst a JSON block:
264```json
265{"key": "value"}
266```
267
268Then a Rust block:
269```rust
270let data = vec![1, 2, 3];
271```
272"#;
273        let result3 = extract_markdown_block_with_lang(text3, "rust");
274        assert!(result3.is_ok());
275        assert_eq!(result3.unwrap(), "let data = vec![1, 2, 3];");
276
277        // Test case where no code block is found
278        let text4 = "This text has no code blocks at all.";
279        let result4 = extract_markdown_block(text4);
280        assert!(result4.is_err());
281
282        // Test with messy surrounding text and newlines
283        let text5 = r#"\nLots of text before...
284
285
286   ```python
287def hello():
288    print("world")
289    return True
290   ```
291
292
293And more text after with various spacing.
294"#;
295        let result5 = extract_markdown_block_with_lang(text5, "python");
296        assert!(result5.is_ok());
297        assert_eq!(
298            result5.unwrap(),
299            "def hello():\n    print(\"world\")\n    return True"
300        );
301    }
302
303    #[test]
304    fn test_extract_json_from_json_markdown_block() {
305        // Test extraction from JSON markdown block (highest priority)
306        let text = r#"Here's the response:
307```json
308{"status": "success", "count": 42}
309```
310That's the data you requested."#;
311        let result = extract_json(text);
312        assert!(result.is_ok());
313        assert_eq!(result.unwrap(), r#"{"status": "success", "count": 42}"#);
314    }
315
316    #[test]
317    fn test_extract_json_from_generic_markdown_block() {
318        // Test extraction from generic markdown block containing JSON
319        let text = r#"The output is:
320```
321{"result": "ok", "value": 123}
322```
323End of output."#;
324        let result = extract_json(text);
325        assert!(result.is_ok());
326        assert_eq!(result.unwrap(), r#"{"result": "ok", "value": 123}"#);
327    }
328
329    #[test]
330    fn test_extract_json_priority_json_block_over_inline() {
331        // When both JSON markdown block and inline JSON exist, JSON block should be preferred
332        let text = r#"Some inline {"inline": "data"} here.
333```json
334{"block": "data"}
335```
336More text."#;
337        let result = extract_json(text);
338        assert!(result.is_ok());
339        assert_eq!(result.unwrap(), r#"{"block": "data"}"#);
340    }
341
342    #[test]
343    fn test_extract_json_priority_json_block_over_generic_block() {
344        // JSON markdown block should be preferred over generic block
345        let text = r#"First a generic block:
346```
347{"generic": "block"}
348```
349
350Then a JSON block:
351```json
352{"json": "block"}
353```"#;
354        let result = extract_json(text);
355        assert!(result.is_ok());
356        assert_eq!(result.unwrap(), r#"{"json": "block"}"#);
357    }
358
359    #[test]
360    fn test_extract_json_fallback_from_non_json_markdown_block() {
361        // When markdown block contains non-JSON, fallback to inline extraction
362        let text = r#"Here's some code:
363```
364This is not JSON at all
365```
366But this is JSON: {"fallback": "value"}"#;
367        let result = extract_json(text);
368        assert!(result.is_ok());
369        assert_eq!(result.unwrap(), r#"{"fallback": "value"}"#);
370    }
371
372    #[test]
373    fn test_extract_json_from_rust_block_fallback() {
374        // When only non-JSON markdown blocks exist, fallback to inline extraction
375        let text = r#"```rust
376let x = 42;
377```
378The result is {"data": "inline"}"#;
379        let result = extract_json(text);
380        assert!(result.is_ok());
381        assert_eq!(result.unwrap(), r#"{"data": "inline"}"#);
382    }
383
384    #[test]
385    fn test_extract_json_multiline_in_markdown_block() {
386        // Test extraction of multiline JSON from markdown block
387        let text = r#"Response:
388```json
389{
390  "name": "test",
391  "values": [1, 2, 3],
392  "nested": {
393    "key": "value"
394  }
395}
396```"#;
397        let result = extract_json(text);
398        assert!(result.is_ok());
399        let json = result.unwrap();
400        // Verify it contains the expected structure
401        assert!(json.contains("\"name\": \"test\""));
402        assert!(json.contains("\"values\": [1, 2, 3]"));
403        assert!(json.contains("\"nested\""));
404    }
405}