llm_toolkit/
lib.rs

1//! 'llm-toolkit' - A low-level Rust toolkit for the LLM last mile problem.
2//!
3//! This library provides a set of sharp, reliable, and unopinionated "tools"
4//! for building robust LLM-powered applications in Rust. It focuses on solving
5//! the common and frustrating problems that occur at the boundary between a
6//! strongly-typed Rust application and the unstructured, often unpredictable
7//! string-based responses from LLM APIs.
8
9// Allow the crate to reference itself by name, which is needed for proc macros
10// to work correctly in examples, tests, and bins
11extern crate self as llm_toolkit;
12
13// Re-export tracing for use by generated code from macros
14// Using extern crate to ensure it's accessible via absolute path
15pub extern crate tracing;
16
17// Re-export minijinja for use by ToPrompt derive macro
18pub extern crate minijinja;
19
20/// A derive macro to implement the `ToPrompt` trait for structs.
21///
22/// This macro is available only when the `derive` feature is enabled.
23/// See the [crate-level documentation](index.html#2-structured-prompts-with-derivetoprompt) for usage examples.
24#[cfg(feature = "derive")]
25pub use llm_toolkit_macros::ToPrompt;
26
27/// A derive macro to implement the `ToPromptSet` trait for structs.
28///
29/// This macro is available only when the `derive` feature is enabled.
30#[cfg(feature = "derive")]
31pub use llm_toolkit_macros::ToPromptSet;
32
33/// A derive macro to implement the `ToPromptFor` trait for structs.
34///
35/// This macro is available only when the `derive` feature is enabled.
36#[cfg(feature = "derive")]
37pub use llm_toolkit_macros::ToPromptFor;
38
39/// A macro for creating examples sections in prompts.
40///
41/// This macro is available only when the `derive` feature is enabled.
42#[cfg(feature = "derive")]
43pub use llm_toolkit_macros::examples_section;
44
45/// A procedural attribute macro for defining intent enums with automatic prompt and extractor generation.
46///
47/// This macro is available only when the `derive` feature is enabled.
48#[cfg(feature = "derive")]
49pub use llm_toolkit_macros::define_intent;
50
51/// A derive macro to implement the `Agent` trait for structs.
52///
53/// This macro is available only when the `agent` feature is enabled.
54/// It automatically generates an Agent implementation that uses ClaudeCodeAgent
55/// internally and deserializes responses into a structured output type.
56///
57/// # Example
58///
59/// ```ignore
60/// use llm_toolkit_macros::Agent;
61/// use serde::{Deserialize, Serialize};
62///
63/// #[derive(Serialize, Deserialize)]
64/// struct MyOutput {
65///     result: String,
66/// }
67///
68/// #[derive(Agent)]
69/// #[agent(expertise = "My expertise", output = "MyOutput")]
70/// struct MyAgent;
71/// ```
72#[cfg(feature = "agent")]
73pub use llm_toolkit_macros::Agent;
74
75/// An attribute macro to define agent structs with automatic trait implementations.
76///
77/// This macro is available only when the `agent` feature is enabled.
78#[cfg(feature = "agent")]
79pub use llm_toolkit_macros::agent;
80
81/// A derive macro to implement the `TypeMarker` trait for structs.
82///
83/// This macro is available only when the `agent` feature is enabled.
84/// It automatically generates a TypeMarker implementation that provides
85/// a type identifier string for type-based orchestrator output retrieval.
86///
87/// # Example
88///
89/// ```ignore
90/// use llm_toolkit::TypeMarker;
91/// use serde::{Deserialize, Serialize};
92///
93/// #[derive(Serialize, Deserialize, TypeMarker)]
94/// struct MyResponse {
95///     #[serde(default = "default_type")]
96///     __type: String,
97///     result: String,
98/// }
99///
100/// fn default_type() -> String {
101///     "MyResponse".to_string()
102/// }
103/// ```
104#[cfg(feature = "agent")]
105pub use llm_toolkit_macros::{TypeMarker, type_marker};
106
107pub mod attachment;
108pub mod extract;
109pub mod intent;
110pub mod multimodal;
111pub mod prompt;
112pub mod retrieval;
113
114#[cfg(feature = "agent")]
115pub mod observability;
116
117#[cfg(feature = "agent")]
118pub mod agent;
119
120#[cfg(feature = "agent")]
121pub mod orchestrator;
122
123pub use attachment::{Attachment, AttachmentSchema, ToAttachments};
124pub use extract::{FlexibleExtractor, MarkdownCodeBlockExtractor};
125#[cfg(feature = "agent")]
126pub use intent::expandable::{
127    Expandable, ReActConfig, ReActError, ReActResult, RegistryError, Selectable, SelectionRegistry,
128    react_loop, simple_tag_selector,
129};
130pub use intent::frame::IntentFrame;
131#[allow(deprecated)]
132pub use intent::{IntentError, IntentExtractor, PromptBasedExtractor};
133pub use multimodal::ImageData;
134pub use prompt::{PromptPart, PromptSetError, ToPrompt, ToPromptFor, ToPromptSet};
135pub use retrieval::Document;
136
137#[cfg(feature = "agent")]
138pub use agent::{Agent, AgentError};
139
140#[cfg(feature = "agent")]
141pub use agent::persona::{ContextConfig, Persona, PersonaAgent};
142
143#[cfg(feature = "agent")]
144pub use agent::retrieval::RetrievalAwareAgent;
145
146#[cfg(feature = "agent")]
147pub use orchestrator::{
148    BlueprintWorkflow, Orchestrator, OrchestratorError, StrategyMap, TypeMarker,
149};
150
151use extract::ParseError;
152
153/// Extracts a JSON string from a raw LLM response string.
154///
155/// This function uses a `FlexibleExtractor` with its standard strategies
156/// to find and extract a JSON object from a string that may contain extraneous
157/// text, such as explanations or Markdown code blocks.
158///
159/// For more advanced control over extraction strategies, see the `extract::FlexibleExtractor` struct.
160///
161/// # Returns
162///
163/// A `Result` containing the extracted JSON `String` on success, or a `ParseError`
164/// if no JSON could be extracted.
165pub fn extract_json(text: &str) -> Result<String, ParseError> {
166    // Try markdown code block first (common LLM output format)
167    if let Ok(content) = extract_markdown_block_with_lang(text, "json") {
168        return Ok(content);
169    }
170
171    // Also try generic markdown block (might contain JSON without language hint)
172    if let Ok(content) = extract_markdown_block(text) {
173        // Verify it's actually JSON by trying to extract JSON from it
174        let extractor = FlexibleExtractor::new();
175        if let Ok(json) = extractor.extract(&content) {
176            return Ok(json);
177        }
178    }
179
180    // Fall back to standard extraction strategies
181    let extractor = FlexibleExtractor::new();
182    extractor.extract(text)
183}
184
185/// Extracts content from any Markdown code block in the text.
186///
187/// This function searches for the first code block (delimited by triple backticks)
188/// and returns its content. The code block can have any language specifier or none at all.
189///
190/// # Returns
191///
192/// A `Result` containing the extracted code block content on success, or a `ParseError`
193/// if no code block is found.
194pub fn extract_markdown_block(text: &str) -> Result<String, ParseError> {
195    let extractor = MarkdownCodeBlockExtractor::new();
196    extractor.extract(text)
197}
198
199/// Extracts content from a Markdown code block with a specific language.
200///
201/// This function searches for a code block with the specified language hint
202/// (e.g., ```rust, ```python) and returns its content.
203///
204/// # Arguments
205///
206/// * `text` - The text containing the markdown code block
207/// * `lang` - The language specifier to match (e.g., "rust", "python")
208///
209/// # Returns
210///
211/// A `Result` containing the extracted code block content on success, or a `ParseError`
212/// if no code block with the specified language is found.
213pub fn extract_markdown_block_with_lang(text: &str, lang: &str) -> Result<String, ParseError> {
214    let extractor = MarkdownCodeBlockExtractor::with_language(lang.to_string());
215    extractor.extract(text)
216}
217
218#[cfg(test)]
219mod tests {
220    use super::*;
221
222    #[test]
223    fn test_json_extraction() {
224        let input = "Some text before {\"key\": \"value\"} and after.";
225        assert_eq!(extract_json(input).unwrap(), "{\"key\": \"value\"}");
226    }
227
228    #[test]
229    fn test_standard_extraction_from_tagged_content() {
230        let text = "<answer>{\"type\": \"success\"}</answer>";
231        let result = extract_json(text);
232        assert!(result.is_ok());
233        assert_eq!(result.unwrap(), "{\"type\": \"success\"}");
234    }
235
236    #[test]
237    fn test_markdown_extraction() {
238        // Test simple code block with no language
239        let text1 = "Here is some code:\n```\nlet x = 42;\n```\nAnd some text after.";
240        let result1 = extract_markdown_block(text1);
241        assert!(result1.is_ok());
242        assert_eq!(result1.unwrap(), "let x = 42;");
243
244        // Test code block with specific language (rust)
245        let text2 = "Here's Rust code:\n```rust\nfn main() {
246    println!(\"Hello\");
247}
248```";
249        let result2 = extract_markdown_block_with_lang(text2, "rust");
250        assert!(result2.is_ok());
251        assert_eq!(result2.unwrap(), "fn main() {\n    println!(\"Hello\");\n}");
252
253        // Test extracting rust block when json block is also present
254        let text3 = r#"\nFirst a JSON block:
255```json
256{"key": "value"}
257```
258
259Then a Rust block:
260```rust
261let data = vec![1, 2, 3];
262```
263"#;
264        let result3 = extract_markdown_block_with_lang(text3, "rust");
265        assert!(result3.is_ok());
266        assert_eq!(result3.unwrap(), "let data = vec![1, 2, 3];");
267
268        // Test case where no code block is found
269        let text4 = "This text has no code blocks at all.";
270        let result4 = extract_markdown_block(text4);
271        assert!(result4.is_err());
272
273        // Test with messy surrounding text and newlines
274        let text5 = r#"\nLots of text before...
275
276
277   ```python
278def hello():
279    print("world")
280    return True
281   ```
282
283
284And more text after with various spacing.
285"#;
286        let result5 = extract_markdown_block_with_lang(text5, "python");
287        assert!(result5.is_ok());
288        assert_eq!(
289            result5.unwrap(),
290            "def hello():\n    print(\"world\")\n    return True"
291        );
292    }
293
294    #[test]
295    fn test_extract_json_from_json_markdown_block() {
296        // Test extraction from JSON markdown block (highest priority)
297        let text = r#"Here's the response:
298```json
299{"status": "success", "count": 42}
300```
301That's the data you requested."#;
302        let result = extract_json(text);
303        assert!(result.is_ok());
304        assert_eq!(result.unwrap(), r#"{"status": "success", "count": 42}"#);
305    }
306
307    #[test]
308    fn test_extract_json_from_generic_markdown_block() {
309        // Test extraction from generic markdown block containing JSON
310        let text = r#"The output is:
311```
312{"result": "ok", "value": 123}
313```
314End of output."#;
315        let result = extract_json(text);
316        assert!(result.is_ok());
317        assert_eq!(result.unwrap(), r#"{"result": "ok", "value": 123}"#);
318    }
319
320    #[test]
321    fn test_extract_json_priority_json_block_over_inline() {
322        // When both JSON markdown block and inline JSON exist, JSON block should be preferred
323        let text = r#"Some inline {"inline": "data"} here.
324```json
325{"block": "data"}
326```
327More text."#;
328        let result = extract_json(text);
329        assert!(result.is_ok());
330        assert_eq!(result.unwrap(), r#"{"block": "data"}"#);
331    }
332
333    #[test]
334    fn test_extract_json_priority_json_block_over_generic_block() {
335        // JSON markdown block should be preferred over generic block
336        let text = r#"First a generic block:
337```
338{"generic": "block"}
339```
340
341Then a JSON block:
342```json
343{"json": "block"}
344```"#;
345        let result = extract_json(text);
346        assert!(result.is_ok());
347        assert_eq!(result.unwrap(), r#"{"json": "block"}"#);
348    }
349
350    #[test]
351    fn test_extract_json_fallback_from_non_json_markdown_block() {
352        // When markdown block contains non-JSON, fallback to inline extraction
353        let text = r#"Here's some code:
354```
355This is not JSON at all
356```
357But this is JSON: {"fallback": "value"}"#;
358        let result = extract_json(text);
359        assert!(result.is_ok());
360        assert_eq!(result.unwrap(), r#"{"fallback": "value"}"#);
361    }
362
363    #[test]
364    fn test_extract_json_from_rust_block_fallback() {
365        // When only non-JSON markdown blocks exist, fallback to inline extraction
366        let text = r#"```rust
367let x = 42;
368```
369The result is {"data": "inline"}"#;
370        let result = extract_json(text);
371        assert!(result.is_ok());
372        assert_eq!(result.unwrap(), r#"{"data": "inline"}"#);
373    }
374
375    #[test]
376    fn test_extract_json_multiline_in_markdown_block() {
377        // Test extraction of multiline JSON from markdown block
378        let text = r#"Response:
379```json
380{
381  "name": "test",
382  "values": [1, 2, 3],
383  "nested": {
384    "key": "value"
385  }
386}
387```"#;
388        let result = extract_json(text);
389        assert!(result.is_ok());
390        let json = result.unwrap();
391        // Verify it contains the expected structure
392        assert!(json.contains("\"name\": \"test\""));
393        assert!(json.contains("\"values\": [1, 2, 3]"));
394        assert!(json.contains("\"nested\""));
395    }
396}