llm_toolkit/
lib.rs

1//! 'llm-toolkit' - A low-level Rust toolkit for the LLM last mile problem.
2//!
3//! This library provides a set of sharp, reliable, and unopinionated "tools"
4//! for building robust LLM-powered applications in Rust. It focuses on solving
5//! the common and frustrating problems that occur at the boundary between a
6//! strongly-typed Rust application and the unstructured, often unpredictable
7//! string-based responses from LLM APIs.
8
9// Allow the crate to reference itself by name, which is needed for proc macros
10// to work correctly in examples, tests, and bins
11extern crate self as llm_toolkit;
12
13// Re-export tracing for use by generated code from macros
14// Using extern crate to ensure it's accessible via absolute path
15pub extern crate tracing;
16
17// Re-export minijinja for use by ToPrompt derive macro
18pub extern crate minijinja;
19
20/// A derive macro to implement the `ToPrompt` trait for structs.
21///
22/// This macro is available only when the `derive` feature is enabled.
23/// See the [crate-level documentation](index.html#2-structured-prompts-with-derivetoprompt) for usage examples.
24#[cfg(feature = "derive")]
25pub use llm_toolkit_macros::ToPrompt;
26
27/// A derive macro to implement the `ToPromptSet` trait for structs.
28///
29/// This macro is available only when the `derive` feature is enabled.
30#[cfg(feature = "derive")]
31pub use llm_toolkit_macros::ToPromptSet;
32
33/// A derive macro to implement the `ToPromptFor` trait for structs.
34///
35/// This macro is available only when the `derive` feature is enabled.
36#[cfg(feature = "derive")]
37pub use llm_toolkit_macros::ToPromptFor;
38
39/// A macro for creating examples sections in prompts.
40///
41/// This macro is available only when the `derive` feature is enabled.
42#[cfg(feature = "derive")]
43pub use llm_toolkit_macros::examples_section;
44
45/// A procedural attribute macro for defining intent enums with automatic prompt and extractor generation.
46///
47/// This macro is available only when the `derive` feature is enabled.
48#[cfg(feature = "derive")]
49pub use llm_toolkit_macros::define_intent;
50
51/// A derive macro to implement the `Agent` trait for structs.
52///
53/// This macro is available only when the `agent` feature is enabled.
54/// It automatically generates an Agent implementation that uses ClaudeCodeAgent
55/// internally and deserializes responses into a structured output type.
56///
57/// # Example
58///
59/// ```ignore
60/// use llm_toolkit_macros::Agent;
61/// use serde::{Deserialize, Serialize};
62///
63/// #[derive(Serialize, Deserialize)]
64/// struct MyOutput {
65///     result: String,
66/// }
67///
68/// #[derive(Agent)]
69/// #[agent(expertise = "My expertise", output = "MyOutput")]
70/// struct MyAgent;
71/// ```
72#[cfg(feature = "agent")]
73pub use llm_toolkit_macros::Agent;
74
75/// An attribute macro to define agent structs with automatic trait implementations.
76///
77/// This macro is available only when the `agent` feature is enabled.
78#[cfg(feature = "agent")]
79pub use llm_toolkit_macros::agent;
80
81/// A derive macro to implement the `TypeMarker` trait for structs.
82///
83/// This macro is available only when the `agent` feature is enabled.
84/// It automatically generates a TypeMarker implementation that provides
85/// a type identifier string for type-based orchestrator output retrieval.
86///
87/// # Example
88///
89/// ```ignore
90/// use llm_toolkit::TypeMarker;
91/// use serde::{Deserialize, Serialize};
92///
93/// #[derive(Serialize, Deserialize, TypeMarker)]
94/// struct MyResponse {
95///     #[serde(default = "default_type")]
96///     __type: String,
97///     result: String,
98/// }
99///
100/// fn default_type() -> String {
101///     "MyResponse".to_string()
102/// }
103/// ```
104#[cfg(feature = "agent")]
105pub use llm_toolkit_macros::{TypeMarker, type_marker};
106
107pub mod attachment;
108pub mod extract;
109pub mod intent;
110pub mod multimodal;
111pub mod prompt;
112pub mod retrieval;
113
114#[cfg(feature = "agent")]
115pub mod observability;
116
117#[cfg(feature = "agent")]
118pub mod agent;
119
120#[cfg(feature = "agent")]
121pub mod orchestrator;
122
123pub use attachment::{Attachment, AttachmentSchema, ToAttachments};
124pub use extract::{FlexibleExtractor, MarkdownCodeBlockExtractor};
125pub use intent::frame::IntentFrame;
126#[allow(deprecated)]
127pub use intent::{IntentError, IntentExtractor, PromptBasedExtractor};
128pub use multimodal::ImageData;
129pub use prompt::{PromptPart, PromptSetError, ToPrompt, ToPromptFor, ToPromptSet};
130pub use retrieval::Document;
131
132#[cfg(feature = "agent")]
133pub use agent::{Agent, AgentError};
134
135#[cfg(feature = "agent")]
136pub use agent::persona::{Persona, PersonaAgent};
137
138#[cfg(feature = "agent")]
139pub use agent::retrieval::RetrievalAwareAgent;
140
141#[cfg(feature = "agent")]
142pub use orchestrator::{
143    BlueprintWorkflow, Orchestrator, OrchestratorError, StrategyMap, TypeMarker,
144};
145
146use extract::ParseError;
147
148/// Extracts a JSON string from a raw LLM response string.
149///
150/// This function uses a `FlexibleExtractor` with its standard strategies
151/// to find and extract a JSON object from a string that may contain extraneous
152/// text, such as explanations or Markdown code blocks.
153///
154/// For more advanced control over extraction strategies, see the `extract::FlexibleExtractor` struct.
155///
156/// # Returns
157///
158/// A `Result` containing the extracted JSON `String` on success, or a `ParseError`
159/// if no JSON could be extracted.
160pub fn extract_json(text: &str) -> Result<String, ParseError> {
161    // Try markdown code block first (common LLM output format)
162    if let Ok(content) = extract_markdown_block_with_lang(text, "json") {
163        return Ok(content);
164    }
165
166    // Also try generic markdown block (might contain JSON without language hint)
167    if let Ok(content) = extract_markdown_block(text) {
168        // Verify it's actually JSON by trying to extract JSON from it
169        let extractor = FlexibleExtractor::new();
170        if let Ok(json) = extractor.extract(&content) {
171            return Ok(json);
172        }
173    }
174
175    // Fall back to standard extraction strategies
176    let extractor = FlexibleExtractor::new();
177    extractor.extract(text)
178}
179
180/// Extracts content from any Markdown code block in the text.
181///
182/// This function searches for the first code block (delimited by triple backticks)
183/// and returns its content. The code block can have any language specifier or none at all.
184///
185/// # Returns
186///
187/// A `Result` containing the extracted code block content on success, or a `ParseError`
188/// if no code block is found.
189pub fn extract_markdown_block(text: &str) -> Result<String, ParseError> {
190    let extractor = MarkdownCodeBlockExtractor::new();
191    extractor.extract(text)
192}
193
194/// Extracts content from a Markdown code block with a specific language.
195///
196/// This function searches for a code block with the specified language hint
197/// (e.g., ```rust, ```python) and returns its content.
198///
199/// # Arguments
200///
201/// * `text` - The text containing the markdown code block
202/// * `lang` - The language specifier to match (e.g., "rust", "python")
203///
204/// # Returns
205///
206/// A `Result` containing the extracted code block content on success, or a `ParseError`
207/// if no code block with the specified language is found.
208pub fn extract_markdown_block_with_lang(text: &str, lang: &str) -> Result<String, ParseError> {
209    let extractor = MarkdownCodeBlockExtractor::with_language(lang.to_string());
210    extractor.extract(text)
211}
212
213#[cfg(test)]
214mod tests {
215    use super::*;
216
217    #[test]
218    fn test_json_extraction() {
219        let input = "Some text before {\"key\": \"value\"} and after.";
220        assert_eq!(extract_json(input).unwrap(), "{\"key\": \"value\"}");
221    }
222
223    #[test]
224    fn test_standard_extraction_from_tagged_content() {
225        let text = "<answer>{\"type\": \"success\"}</answer>";
226        let result = extract_json(text);
227        assert!(result.is_ok());
228        assert_eq!(result.unwrap(), "{\"type\": \"success\"}");
229    }
230
231    #[test]
232    fn test_markdown_extraction() {
233        // Test simple code block with no language
234        let text1 = "Here is some code:\n```\nlet x = 42;\n```\nAnd some text after.";
235        let result1 = extract_markdown_block(text1);
236        assert!(result1.is_ok());
237        assert_eq!(result1.unwrap(), "let x = 42;");
238
239        // Test code block with specific language (rust)
240        let text2 = "Here's Rust code:\n```rust\nfn main() {
241    println!(\"Hello\");
242}
243```";
244        let result2 = extract_markdown_block_with_lang(text2, "rust");
245        assert!(result2.is_ok());
246        assert_eq!(result2.unwrap(), "fn main() {\n    println!(\"Hello\");\n}");
247
248        // Test extracting rust block when json block is also present
249        let text3 = r#"\nFirst a JSON block:
250```json
251{"key": "value"}
252```
253
254Then a Rust block:
255```rust
256let data = vec![1, 2, 3];
257```
258"#;
259        let result3 = extract_markdown_block_with_lang(text3, "rust");
260        assert!(result3.is_ok());
261        assert_eq!(result3.unwrap(), "let data = vec![1, 2, 3];");
262
263        // Test case where no code block is found
264        let text4 = "This text has no code blocks at all.";
265        let result4 = extract_markdown_block(text4);
266        assert!(result4.is_err());
267
268        // Test with messy surrounding text and newlines
269        let text5 = r#"\nLots of text before...
270
271
272   ```python
273def hello():
274    print("world")
275    return True
276   ```
277
278
279And more text after with various spacing.
280"#;
281        let result5 = extract_markdown_block_with_lang(text5, "python");
282        assert!(result5.is_ok());
283        assert_eq!(
284            result5.unwrap(),
285            "def hello():\n    print(\"world\")\n    return True"
286        );
287    }
288
289    #[test]
290    fn test_extract_json_from_json_markdown_block() {
291        // Test extraction from JSON markdown block (highest priority)
292        let text = r#"Here's the response:
293```json
294{"status": "success", "count": 42}
295```
296That's the data you requested."#;
297        let result = extract_json(text);
298        assert!(result.is_ok());
299        assert_eq!(result.unwrap(), r#"{"status": "success", "count": 42}"#);
300    }
301
302    #[test]
303    fn test_extract_json_from_generic_markdown_block() {
304        // Test extraction from generic markdown block containing JSON
305        let text = r#"The output is:
306```
307{"result": "ok", "value": 123}
308```
309End of output."#;
310        let result = extract_json(text);
311        assert!(result.is_ok());
312        assert_eq!(result.unwrap(), r#"{"result": "ok", "value": 123}"#);
313    }
314
315    #[test]
316    fn test_extract_json_priority_json_block_over_inline() {
317        // When both JSON markdown block and inline JSON exist, JSON block should be preferred
318        let text = r#"Some inline {"inline": "data"} here.
319```json
320{"block": "data"}
321```
322More text."#;
323        let result = extract_json(text);
324        assert!(result.is_ok());
325        assert_eq!(result.unwrap(), r#"{"block": "data"}"#);
326    }
327
328    #[test]
329    fn test_extract_json_priority_json_block_over_generic_block() {
330        // JSON markdown block should be preferred over generic block
331        let text = r#"First a generic block:
332```
333{"generic": "block"}
334```
335
336Then a JSON block:
337```json
338{"json": "block"}
339```"#;
340        let result = extract_json(text);
341        assert!(result.is_ok());
342        assert_eq!(result.unwrap(), r#"{"json": "block"}"#);
343    }
344
345    #[test]
346    fn test_extract_json_fallback_from_non_json_markdown_block() {
347        // When markdown block contains non-JSON, fallback to inline extraction
348        let text = r#"Here's some code:
349```
350This is not JSON at all
351```
352But this is JSON: {"fallback": "value"}"#;
353        let result = extract_json(text);
354        assert!(result.is_ok());
355        assert_eq!(result.unwrap(), r#"{"fallback": "value"}"#);
356    }
357
358    #[test]
359    fn test_extract_json_from_rust_block_fallback() {
360        // When only non-JSON markdown blocks exist, fallback to inline extraction
361        let text = r#"```rust
362let x = 42;
363```
364The result is {"data": "inline"}"#;
365        let result = extract_json(text);
366        assert!(result.is_ok());
367        assert_eq!(result.unwrap(), r#"{"data": "inline"}"#);
368    }
369
370    #[test]
371    fn test_extract_json_multiline_in_markdown_block() {
372        // Test extraction of multiline JSON from markdown block
373        let text = r#"Response:
374```json
375{
376  "name": "test",
377  "values": [1, 2, 3],
378  "nested": {
379    "key": "value"
380  }
381}
382```"#;
383        let result = extract_json(text);
384        assert!(result.is_ok());
385        let json = result.unwrap();
386        // Verify it contains the expected structure
387        assert!(json.contains("\"name\": \"test\""));
388        assert!(json.contains("\"values\": [1, 2, 3]"));
389        assert!(json.contains("\"nested\""));
390    }
391}