llm_toolkit/
lib.rs

1//! 'llm-toolkit' - A low-level Rust toolkit for the LLM last mile problem.
2//!
3//! This library provides a set of sharp, reliable, and unopinionated "tools"
4//! for building robust LLM-powered applications in Rust. It focuses on solving
5//! the common and frustrating problems that occur at the boundary between a
6//! strongly-typed Rust application and the unstructured, often unpredictable
7//! string-based responses from LLM APIs.
8
9// Allow the crate to reference itself by name, which is needed for proc macros
10// to work correctly in examples, tests, and bins
11extern crate self as llm_toolkit;
12
13/// A derive macro to implement the `ToPrompt` trait for structs.
14///
15/// This macro is available only when the `derive` feature is enabled.
16/// See the [crate-level documentation](index.html#2-structured-prompts-with-derivetoprompt) for usage examples.
17#[cfg(feature = "derive")]
18pub use llm_toolkit_macros::ToPrompt;
19
20/// A derive macro to implement the `ToPromptSet` trait for structs.
21///
22/// This macro is available only when the `derive` feature is enabled.
23#[cfg(feature = "derive")]
24pub use llm_toolkit_macros::ToPromptSet;
25
26/// A derive macro to implement the `ToPromptFor` trait for structs.
27///
28/// This macro is available only when the `derive` feature is enabled.
29#[cfg(feature = "derive")]
30pub use llm_toolkit_macros::ToPromptFor;
31
32/// A macro for creating examples sections in prompts.
33///
34/// This macro is available only when the `derive` feature is enabled.
35#[cfg(feature = "derive")]
36pub use llm_toolkit_macros::examples_section;
37
38/// A procedural attribute macro for defining intent enums with automatic prompt and extractor generation.
39///
40/// This macro is available only when the `derive` feature is enabled.
41#[cfg(feature = "derive")]
42pub use llm_toolkit_macros::define_intent;
43
44/// A derive macro to implement the `Agent` trait for structs.
45///
46/// This macro is available only when the `agent` feature is enabled.
47/// It automatically generates an Agent implementation that uses ClaudeCodeAgent
48/// internally and deserializes responses into a structured output type.
49///
50/// # Example
51///
52/// ```ignore
53/// use llm_toolkit_macros::Agent;
54/// use serde::{Deserialize, Serialize};
55///
56/// #[derive(Serialize, Deserialize)]
57/// struct MyOutput {
58///     result: String,
59/// }
60///
61/// #[derive(Agent)]
62/// #[agent(expertise = "My expertise", output = "MyOutput")]
63/// struct MyAgent;
64/// ```
65#[cfg(feature = "agent")]
66pub use llm_toolkit_macros::Agent;
67
68/// An attribute macro to define agent structs with automatic trait implementations.
69///
70/// This macro is available only when the `agent` feature is enabled.
71#[cfg(feature = "agent")]
72pub use llm_toolkit_macros::agent;
73
74/// A derive macro to implement the `TypeMarker` trait for structs.
75///
76/// This macro is available only when the `agent` feature is enabled.
77/// It automatically generates a TypeMarker implementation that provides
78/// a type identifier string for type-based orchestrator output retrieval.
79///
80/// # Example
81///
82/// ```ignore
83/// use llm_toolkit::TypeMarker;
84/// use serde::{Deserialize, Serialize};
85///
86/// #[derive(Serialize, Deserialize, TypeMarker)]
87/// struct MyResponse {
88///     #[serde(default = "default_type")]
89///     __type: String,
90///     result: String,
91/// }
92///
93/// fn default_type() -> String {
94///     "MyResponse".to_string()
95/// }
96/// ```
97#[cfg(feature = "agent")]
98pub use llm_toolkit_macros::{TypeMarker, type_marker};
99
100pub mod attachment;
101pub mod extract;
102pub mod intent;
103pub mod multimodal;
104pub mod prompt;
105
106#[cfg(feature = "agent")]
107pub mod agent;
108
109#[cfg(feature = "agent")]
110pub mod orchestrator;
111
112pub use attachment::{Attachment, AttachmentSchema, ToAttachments};
113pub use extract::{FlexibleExtractor, MarkdownCodeBlockExtractor};
114pub use intent::frame::IntentFrame;
115#[allow(deprecated)]
116pub use intent::{IntentError, IntentExtractor, PromptBasedExtractor};
117pub use multimodal::ImageData;
118pub use prompt::{PromptPart, PromptSetError, ToPrompt, ToPromptFor, ToPromptSet};
119
120#[cfg(feature = "agent")]
121pub use agent::{Agent, AgentError};
122
123#[cfg(feature = "agent")]
124pub use agent::persona::{Persona, PersonaAgent};
125
126#[cfg(feature = "agent")]
127pub use orchestrator::{
128    BlueprintWorkflow, Orchestrator, OrchestratorError, StrategyMap, TypeMarker,
129};
130
131use extract::ParseError;
132
133/// Extracts a JSON string from a raw LLM response string.
134///
135/// This function uses a `FlexibleExtractor` with its standard strategies
136/// to find and extract a JSON object from a string that may contain extraneous
137/// text, such as explanations or Markdown code blocks.
138///
139/// For more advanced control over extraction strategies, see the `extract::FlexibleExtractor` struct.
140///
141/// # Returns
142///
143/// A `Result` containing the extracted JSON `String` on success, or a `ParseError`
144/// if no JSON could be extracted.
145pub fn extract_json(text: &str) -> Result<String, ParseError> {
146    // Try markdown code block first (common LLM output format)
147    if let Ok(content) = extract_markdown_block_with_lang(text, "json") {
148        return Ok(content);
149    }
150
151    // Also try generic markdown block (might contain JSON without language hint)
152    if let Ok(content) = extract_markdown_block(text) {
153        // Verify it's actually JSON by trying to extract JSON from it
154        let extractor = FlexibleExtractor::new();
155        if let Ok(json) = extractor.extract(&content) {
156            return Ok(json);
157        }
158    }
159
160    // Fall back to standard extraction strategies
161    let extractor = FlexibleExtractor::new();
162    extractor.extract(text)
163}
164
165/// Extracts content from any Markdown code block in the text.
166///
167/// This function searches for the first code block (delimited by triple backticks)
168/// and returns its content. The code block can have any language specifier or none at all.
169///
170/// # Returns
171///
172/// A `Result` containing the extracted code block content on success, or a `ParseError`
173/// if no code block is found.
174pub fn extract_markdown_block(text: &str) -> Result<String, ParseError> {
175    let extractor = MarkdownCodeBlockExtractor::new();
176    extractor.extract(text)
177}
178
179/// Extracts content from a Markdown code block with a specific language.
180///
181/// This function searches for a code block with the specified language hint
182/// (e.g., ```rust, ```python) and returns its content.
183///
184/// # Arguments
185///
186/// * `text` - The text containing the markdown code block
187/// * `lang` - The language specifier to match (e.g., "rust", "python")
188///
189/// # Returns
190///
191/// A `Result` containing the extracted code block content on success, or a `ParseError`
192/// if no code block with the specified language is found.
193pub fn extract_markdown_block_with_lang(text: &str, lang: &str) -> Result<String, ParseError> {
194    let extractor = MarkdownCodeBlockExtractor::with_language(lang.to_string());
195    extractor.extract(text)
196}
197
198#[cfg(test)]
199mod tests {
200    use super::*;
201
202    #[test]
203    fn test_json_extraction() {
204        let input = "Some text before {\"key\": \"value\"} and after.";
205        assert_eq!(extract_json(input).unwrap(), "{\"key\": \"value\"}");
206    }
207
208    #[test]
209    fn test_standard_extraction_from_tagged_content() {
210        let text = "<answer>{\"type\": \"success\"}</answer>";
211        let result = extract_json(text);
212        assert!(result.is_ok());
213        assert_eq!(result.unwrap(), "{\"type\": \"success\"}");
214    }
215
216    #[test]
217    fn test_markdown_extraction() {
218        // Test simple code block with no language
219        let text1 = "Here is some code:\n```\nlet x = 42;\n```\nAnd some text after.";
220        let result1 = extract_markdown_block(text1);
221        assert!(result1.is_ok());
222        assert_eq!(result1.unwrap(), "let x = 42;");
223
224        // Test code block with specific language (rust)
225        let text2 = "Here's Rust code:\n```rust\nfn main() {
226    println!(\"Hello\");
227}
228```";
229        let result2 = extract_markdown_block_with_lang(text2, "rust");
230        assert!(result2.is_ok());
231        assert_eq!(result2.unwrap(), "fn main() {\n    println!(\"Hello\");\n}");
232
233        // Test extracting rust block when json block is also present
234        let text3 = r#"\nFirst a JSON block:
235```json
236{"key": "value"}
237```
238
239Then a Rust block:
240```rust
241let data = vec![1, 2, 3];
242```
243"#;
244        let result3 = extract_markdown_block_with_lang(text3, "rust");
245        assert!(result3.is_ok());
246        assert_eq!(result3.unwrap(), "let data = vec![1, 2, 3];");
247
248        // Test case where no code block is found
249        let text4 = "This text has no code blocks at all.";
250        let result4 = extract_markdown_block(text4);
251        assert!(result4.is_err());
252
253        // Test with messy surrounding text and newlines
254        let text5 = r#"\nLots of text before...
255
256
257   ```python
258def hello():
259    print("world")
260    return True
261   ```
262
263
264And more text after with various spacing.
265"#;
266        let result5 = extract_markdown_block_with_lang(text5, "python");
267        assert!(result5.is_ok());
268        assert_eq!(
269            result5.unwrap(),
270            "def hello():\n    print(\"world\")\n    return True"
271        );
272    }
273
274    #[test]
275    fn test_extract_json_from_json_markdown_block() {
276        // Test extraction from JSON markdown block (highest priority)
277        let text = r#"Here's the response:
278```json
279{"status": "success", "count": 42}
280```
281That's the data you requested."#;
282        let result = extract_json(text);
283        assert!(result.is_ok());
284        assert_eq!(result.unwrap(), r#"{"status": "success", "count": 42}"#);
285    }
286
287    #[test]
288    fn test_extract_json_from_generic_markdown_block() {
289        // Test extraction from generic markdown block containing JSON
290        let text = r#"The output is:
291```
292{"result": "ok", "value": 123}
293```
294End of output."#;
295        let result = extract_json(text);
296        assert!(result.is_ok());
297        assert_eq!(result.unwrap(), r#"{"result": "ok", "value": 123}"#);
298    }
299
300    #[test]
301    fn test_extract_json_priority_json_block_over_inline() {
302        // When both JSON markdown block and inline JSON exist, JSON block should be preferred
303        let text = r#"Some inline {"inline": "data"} here.
304```json
305{"block": "data"}
306```
307More text."#;
308        let result = extract_json(text);
309        assert!(result.is_ok());
310        assert_eq!(result.unwrap(), r#"{"block": "data"}"#);
311    }
312
313    #[test]
314    fn test_extract_json_priority_json_block_over_generic_block() {
315        // JSON markdown block should be preferred over generic block
316        let text = r#"First a generic block:
317```
318{"generic": "block"}
319```
320
321Then a JSON block:
322```json
323{"json": "block"}
324```"#;
325        let result = extract_json(text);
326        assert!(result.is_ok());
327        assert_eq!(result.unwrap(), r#"{"json": "block"}"#);
328    }
329
330    #[test]
331    fn test_extract_json_fallback_from_non_json_markdown_block() {
332        // When markdown block contains non-JSON, fallback to inline extraction
333        let text = r#"Here's some code:
334```
335This is not JSON at all
336```
337But this is JSON: {"fallback": "value"}"#;
338        let result = extract_json(text);
339        assert!(result.is_ok());
340        assert_eq!(result.unwrap(), r#"{"fallback": "value"}"#);
341    }
342
343    #[test]
344    fn test_extract_json_from_rust_block_fallback() {
345        // When only non-JSON markdown blocks exist, fallback to inline extraction
346        let text = r#"```rust
347let x = 42;
348```
349The result is {"data": "inline"}"#;
350        let result = extract_json(text);
351        assert!(result.is_ok());
352        assert_eq!(result.unwrap(), r#"{"data": "inline"}"#);
353    }
354
355    #[test]
356    fn test_extract_json_multiline_in_markdown_block() {
357        // Test extraction of multiline JSON from markdown block
358        let text = r#"Response:
359```json
360{
361  "name": "test",
362  "values": [1, 2, 3],
363  "nested": {
364    "key": "value"
365  }
366}
367```"#;
368        let result = extract_json(text);
369        assert!(result.is_ok());
370        let json = result.unwrap();
371        // Verify it contains the expected structure
372        assert!(json.contains("\"name\": \"test\""));
373        assert!(json.contains("\"values\": [1, 2, 3]"));
374        assert!(json.contains("\"nested\""));
375    }
376}