llm_toolkit/
lib.rs

1//! 'llm-toolkit' - A low-level Rust toolkit for the LLM last mile problem.
2//!
3//! This library provides a set of sharp, reliable, and unopinionated "tools"
4//! for building robust LLM-powered applications in Rust. It focuses on solving
5//! the common and frustrating problems that occur at the boundary between a
6//! strongly-typed Rust application and the unstructured, often unpredictable
7//! string-based responses from LLM APIs.
8
9// Allow the crate to reference itself by name, which is needed for proc macros
10// to work correctly in examples, tests, and bins
11extern crate self as llm_toolkit;
12
13// Re-export tracing for use by generated code from macros
14// Using extern crate to ensure it's accessible via absolute path
15pub extern crate tracing;
16
17// Re-export minijinja for use by ToPrompt derive macro
18pub extern crate minijinja;
19
20/// A derive macro to implement the `ToPrompt` trait for structs.
21///
22/// This macro is available only when the `derive` feature is enabled.
23/// See the [crate-level documentation](index.html#2-structured-prompts-with-derivetoprompt) for usage examples.
24#[cfg(feature = "derive")]
25pub use llm_toolkit_macros::ToPrompt;
26
27/// A derive macro to implement the `ToPromptSet` trait for structs.
28///
29/// This macro is available only when the `derive` feature is enabled.
30#[cfg(feature = "derive")]
31pub use llm_toolkit_macros::ToPromptSet;
32
33/// A derive macro to implement the `ToPromptFor` trait for structs.
34///
35/// This macro is available only when the `derive` feature is enabled.
36#[cfg(feature = "derive")]
37pub use llm_toolkit_macros::ToPromptFor;
38
39/// A macro for creating examples sections in prompts.
40///
41/// This macro is available only when the `derive` feature is enabled.
42#[cfg(feature = "derive")]
43pub use llm_toolkit_macros::examples_section;
44
45/// A procedural attribute macro for defining intent enums with automatic prompt and extractor generation.
46///
47/// This macro is available only when the `derive` feature is enabled.
48#[cfg(feature = "derive")]
49pub use llm_toolkit_macros::define_intent;
50
51/// A derive macro to implement the `Agent` trait for structs.
52///
53/// This macro is available only when the `agent` feature is enabled.
54/// It automatically generates an Agent implementation that uses ClaudeCodeAgent
55/// internally and deserializes responses into a structured output type.
56///
57/// # Example
58///
59/// ```ignore
60/// use llm_toolkit_macros::Agent;
61/// use serde::{Deserialize, Serialize};
62///
63/// #[derive(Serialize, Deserialize)]
64/// struct MyOutput {
65///     result: String,
66/// }
67///
68/// #[derive(Agent)]
69/// #[agent(expertise = "My expertise", output = "MyOutput")]
70/// struct MyAgent;
71/// ```
72#[cfg(feature = "agent")]
73pub use llm_toolkit_macros::Agent;
74
75/// An attribute macro to define agent structs with automatic trait implementations.
76///
77/// This macro is available only when the `agent` feature is enabled.
78#[cfg(feature = "agent")]
79pub use llm_toolkit_macros::agent;
80
81/// A derive macro to implement the `TypeMarker` trait for structs.
82///
83/// This macro is available only when the `agent` feature is enabled.
84/// It automatically generates a TypeMarker implementation that provides
85/// a type identifier string for type-based orchestrator output retrieval.
86///
87/// # Example
88///
89/// ```ignore
90/// use llm_toolkit::TypeMarker;
91/// use serde::{Deserialize, Serialize};
92///
93/// #[derive(Serialize, Deserialize, TypeMarker)]
94/// struct MyResponse {
95///     #[serde(default = "default_type")]
96///     __type: String,
97///     result: String,
98/// }
99///
100/// fn default_type() -> String {
101///     "MyResponse".to_string()
102/// }
103/// ```
104#[cfg(feature = "agent")]
105pub use llm_toolkit_macros::{TypeMarker, type_marker};
106
107pub mod attachment;
108pub mod context;
109pub mod extract;
110pub mod intent;
111pub mod multimodal;
112pub mod prompt;
113pub mod retrieval;
114
115#[cfg(feature = "agent")]
116pub mod observability;
117
118#[cfg(feature = "agent")]
119pub mod agent;
120
121#[cfg(feature = "agent")]
122pub mod orchestrator;
123
124pub use attachment::{Attachment, AttachmentSchema, ToAttachments};
125pub use context::{ContextProfile, Priority, TaskHealth};
126pub use extract::{FlexibleExtractor, MarkdownCodeBlockExtractor};
127#[cfg(feature = "agent")]
128pub use intent::expandable::{
129    Expandable, ReActConfig, ReActError, ReActResult, RegistryError, Selectable, SelectionRegistry,
130    react_loop, simple_tag_selector,
131};
132pub use intent::frame::IntentFrame;
133#[allow(deprecated)]
134pub use intent::{IntentError, IntentExtractor, PromptBasedExtractor};
135pub use multimodal::ImageData;
136pub use prompt::{PromptPart, PromptSetError, ToPrompt, ToPromptFor, ToPromptSet};
137pub use retrieval::Document;
138
139#[cfg(feature = "agent")]
140pub use agent::{Agent, AgentError, AnyAgent, ToExpertise};
141
142#[cfg(feature = "agent")]
143pub use agent::persona::{ContextConfig, Persona, PersonaAgent};
144
145#[cfg(feature = "agent")]
146pub use agent::retrieval::RetrievalAwareAgent;
147
148#[cfg(feature = "agent")]
149pub use agent::expertise::{
150    Anchor, ContextualPrompt, Expertise, KnowledgeFragment, RenderContext, WeightedFragment,
151};
152
153#[cfg(feature = "agent")]
154pub use orchestrator::{
155    BlueprintWorkflow, Orchestrator, OrchestratorError, StrategyMap, TypeMarker,
156};
157
158use extract::ParseError;
159
160/// Extracts a JSON string from a raw LLM response string.
161///
162/// This function uses a `FlexibleExtractor` with its standard strategies
163/// to find and extract a JSON object from a string that may contain extraneous
164/// text, such as explanations or Markdown code blocks.
165///
166/// For more advanced control over extraction strategies, see the `extract::FlexibleExtractor` struct.
167///
168/// # Returns
169///
170/// A `Result` containing the extracted JSON `String` on success, or a `ParseError`
171/// if no JSON could be extracted.
172pub fn extract_json(text: &str) -> Result<String, ParseError> {
173    // Try markdown code block first (common LLM output format)
174    if let Ok(content) = extract_markdown_block_with_lang(text, "json") {
175        return Ok(content);
176    }
177
178    // Also try generic markdown block (might contain JSON without language hint)
179    if let Ok(content) = extract_markdown_block(text) {
180        // Verify it's actually JSON by trying to extract JSON from it
181        let extractor = FlexibleExtractor::new();
182        if let Ok(json) = extractor.extract(&content) {
183            return Ok(json);
184        }
185    }
186
187    // Fall back to standard extraction strategies
188    let extractor = FlexibleExtractor::new();
189    extractor.extract(text)
190}
191
192/// Extracts content from any Markdown code block in the text.
193///
194/// This function searches for the first code block (delimited by triple backticks)
195/// and returns its content. The code block can have any language specifier or none at all.
196///
197/// # Returns
198///
199/// A `Result` containing the extracted code block content on success, or a `ParseError`
200/// if no code block is found.
201pub fn extract_markdown_block(text: &str) -> Result<String, ParseError> {
202    let extractor = MarkdownCodeBlockExtractor::new();
203    extractor.extract(text)
204}
205
206/// Extracts content from a Markdown code block with a specific language.
207///
208/// This function searches for a code block with the specified language hint
209/// (e.g., ```rust, ```python) and returns its content.
210///
211/// # Arguments
212///
213/// * `text` - The text containing the markdown code block
214/// * `lang` - The language specifier to match (e.g., "rust", "python")
215///
216/// # Returns
217///
218/// A `Result` containing the extracted code block content on success, or a `ParseError`
219/// if no code block with the specified language is found.
220pub fn extract_markdown_block_with_lang(text: &str, lang: &str) -> Result<String, ParseError> {
221    let extractor = MarkdownCodeBlockExtractor::with_language(lang.to_string());
222    extractor.extract(text)
223}
224
225#[cfg(test)]
226mod tests {
227    use super::*;
228
229    #[test]
230    fn test_json_extraction() {
231        let input = "Some text before {\"key\": \"value\"} and after.";
232        assert_eq!(extract_json(input).unwrap(), "{\"key\": \"value\"}");
233    }
234
235    #[test]
236    fn test_standard_extraction_from_tagged_content() {
237        let text = "<answer>{\"type\": \"success\"}</answer>";
238        let result = extract_json(text);
239        assert!(result.is_ok());
240        assert_eq!(result.unwrap(), "{\"type\": \"success\"}");
241    }
242
243    #[test]
244    fn test_markdown_extraction() {
245        // Test simple code block with no language
246        let text1 = "Here is some code:\n```\nlet x = 42;\n```\nAnd some text after.";
247        let result1 = extract_markdown_block(text1);
248        assert!(result1.is_ok());
249        assert_eq!(result1.unwrap(), "let x = 42;");
250
251        // Test code block with specific language (rust)
252        let text2 = "Here's Rust code:\n```rust\nfn main() {
253    println!(\"Hello\");
254}
255```";
256        let result2 = extract_markdown_block_with_lang(text2, "rust");
257        assert!(result2.is_ok());
258        assert_eq!(result2.unwrap(), "fn main() {\n    println!(\"Hello\");\n}");
259
260        // Test extracting rust block when json block is also present
261        let text3 = r#"\nFirst a JSON block:
262```json
263{"key": "value"}
264```
265
266Then a Rust block:
267```rust
268let data = vec![1, 2, 3];
269```
270"#;
271        let result3 = extract_markdown_block_with_lang(text3, "rust");
272        assert!(result3.is_ok());
273        assert_eq!(result3.unwrap(), "let data = vec![1, 2, 3];");
274
275        // Test case where no code block is found
276        let text4 = "This text has no code blocks at all.";
277        let result4 = extract_markdown_block(text4);
278        assert!(result4.is_err());
279
280        // Test with messy surrounding text and newlines
281        let text5 = r#"\nLots of text before...
282
283
284   ```python
285def hello():
286    print("world")
287    return True
288   ```
289
290
291And more text after with various spacing.
292"#;
293        let result5 = extract_markdown_block_with_lang(text5, "python");
294        assert!(result5.is_ok());
295        assert_eq!(
296            result5.unwrap(),
297            "def hello():\n    print(\"world\")\n    return True"
298        );
299    }
300
301    #[test]
302    fn test_extract_json_from_json_markdown_block() {
303        // Test extraction from JSON markdown block (highest priority)
304        let text = r#"Here's the response:
305```json
306{"status": "success", "count": 42}
307```
308That's the data you requested."#;
309        let result = extract_json(text);
310        assert!(result.is_ok());
311        assert_eq!(result.unwrap(), r#"{"status": "success", "count": 42}"#);
312    }
313
314    #[test]
315    fn test_extract_json_from_generic_markdown_block() {
316        // Test extraction from generic markdown block containing JSON
317        let text = r#"The output is:
318```
319{"result": "ok", "value": 123}
320```
321End of output."#;
322        let result = extract_json(text);
323        assert!(result.is_ok());
324        assert_eq!(result.unwrap(), r#"{"result": "ok", "value": 123}"#);
325    }
326
327    #[test]
328    fn test_extract_json_priority_json_block_over_inline() {
329        // When both JSON markdown block and inline JSON exist, JSON block should be preferred
330        let text = r#"Some inline {"inline": "data"} here.
331```json
332{"block": "data"}
333```
334More text."#;
335        let result = extract_json(text);
336        assert!(result.is_ok());
337        assert_eq!(result.unwrap(), r#"{"block": "data"}"#);
338    }
339
340    #[test]
341    fn test_extract_json_priority_json_block_over_generic_block() {
342        // JSON markdown block should be preferred over generic block
343        let text = r#"First a generic block:
344```
345{"generic": "block"}
346```
347
348Then a JSON block:
349```json
350{"json": "block"}
351```"#;
352        let result = extract_json(text);
353        assert!(result.is_ok());
354        assert_eq!(result.unwrap(), r#"{"json": "block"}"#);
355    }
356
357    #[test]
358    fn test_extract_json_fallback_from_non_json_markdown_block() {
359        // When markdown block contains non-JSON, fallback to inline extraction
360        let text = r#"Here's some code:
361```
362This is not JSON at all
363```
364But this is JSON: {"fallback": "value"}"#;
365        let result = extract_json(text);
366        assert!(result.is_ok());
367        assert_eq!(result.unwrap(), r#"{"fallback": "value"}"#);
368    }
369
370    #[test]
371    fn test_extract_json_from_rust_block_fallback() {
372        // When only non-JSON markdown blocks exist, fallback to inline extraction
373        let text = r#"```rust
374let x = 42;
375```
376The result is {"data": "inline"}"#;
377        let result = extract_json(text);
378        assert!(result.is_ok());
379        assert_eq!(result.unwrap(), r#"{"data": "inline"}"#);
380    }
381
382    #[test]
383    fn test_extract_json_multiline_in_markdown_block() {
384        // Test extraction of multiline JSON from markdown block
385        let text = r#"Response:
386```json
387{
388  "name": "test",
389  "values": [1, 2, 3],
390  "nested": {
391    "key": "value"
392  }
393}
394```"#;
395        let result = extract_json(text);
396        assert!(result.is_ok());
397        let json = result.unwrap();
398        // Verify it contains the expected structure
399        assert!(json.contains("\"name\": \"test\""));
400        assert!(json.contains("\"values\": [1, 2, 3]"));
401        assert!(json.contains("\"nested\""));
402    }
403}