llm_toolkit/
lib.rs

1//! 'llm-toolkit' - A low-level Rust toolkit for the LLM last mile problem.
2//!
3//! This library provides a set of sharp, reliable, and unopinionated "tools"
4//! for building robust LLM-powered applications in Rust. It focuses on solving
5//! the common and frustrating problems that occur at the boundary between a
6//! strongly-typed Rust application and the unstructured, often unpredictable
7//! string-based responses from LLM APIs.
8
9// Allow the crate to reference itself by name, which is needed for proc macros
10// to work correctly in examples, tests, and bins
11extern crate self as llm_toolkit;
12
13// Re-export tracing for use by generated code from macros
14// Using extern crate to ensure it's accessible via absolute path
15pub extern crate tracing;
16
17/// A derive macro to implement the `ToPrompt` trait for structs.
18///
19/// This macro is available only when the `derive` feature is enabled.
20/// See the [crate-level documentation](index.html#2-structured-prompts-with-derivetoprompt) for usage examples.
21#[cfg(feature = "derive")]
22pub use llm_toolkit_macros::ToPrompt;
23
24/// A derive macro to implement the `ToPromptSet` trait for structs.
25///
26/// This macro is available only when the `derive` feature is enabled.
27#[cfg(feature = "derive")]
28pub use llm_toolkit_macros::ToPromptSet;
29
30/// A derive macro to implement the `ToPromptFor` trait for structs.
31///
32/// This macro is available only when the `derive` feature is enabled.
33#[cfg(feature = "derive")]
34pub use llm_toolkit_macros::ToPromptFor;
35
36/// A macro for creating examples sections in prompts.
37///
38/// This macro is available only when the `derive` feature is enabled.
39#[cfg(feature = "derive")]
40pub use llm_toolkit_macros::examples_section;
41
42/// A procedural attribute macro for defining intent enums with automatic prompt and extractor generation.
43///
44/// This macro is available only when the `derive` feature is enabled.
45#[cfg(feature = "derive")]
46pub use llm_toolkit_macros::define_intent;
47
48/// A derive macro to implement the `Agent` trait for structs.
49///
50/// This macro is available only when the `agent` feature is enabled.
51/// It automatically generates an Agent implementation that uses ClaudeCodeAgent
52/// internally and deserializes responses into a structured output type.
53///
54/// # Example
55///
56/// ```ignore
57/// use llm_toolkit_macros::Agent;
58/// use serde::{Deserialize, Serialize};
59///
60/// #[derive(Serialize, Deserialize)]
61/// struct MyOutput {
62///     result: String,
63/// }
64///
65/// #[derive(Agent)]
66/// #[agent(expertise = "My expertise", output = "MyOutput")]
67/// struct MyAgent;
68/// ```
69#[cfg(feature = "agent")]
70pub use llm_toolkit_macros::Agent;
71
72/// An attribute macro to define agent structs with automatic trait implementations.
73///
74/// This macro is available only when the `agent` feature is enabled.
75#[cfg(feature = "agent")]
76pub use llm_toolkit_macros::agent;
77
78/// A derive macro to implement the `TypeMarker` trait for structs.
79///
80/// This macro is available only when the `agent` feature is enabled.
81/// It automatically generates a TypeMarker implementation that provides
82/// a type identifier string for type-based orchestrator output retrieval.
83///
84/// # Example
85///
86/// ```ignore
87/// use llm_toolkit::TypeMarker;
88/// use serde::{Deserialize, Serialize};
89///
90/// #[derive(Serialize, Deserialize, TypeMarker)]
91/// struct MyResponse {
92///     #[serde(default = "default_type")]
93///     __type: String,
94///     result: String,
95/// }
96///
97/// fn default_type() -> String {
98///     "MyResponse".to_string()
99/// }
100/// ```
101#[cfg(feature = "agent")]
102pub use llm_toolkit_macros::{TypeMarker, type_marker};
103
104pub mod attachment;
105pub mod extract;
106pub mod intent;
107pub mod multimodal;
108pub mod prompt;
109
110#[cfg(feature = "agent")]
111pub mod observability;
112
113#[cfg(feature = "agent")]
114pub mod agent;
115
116#[cfg(feature = "agent")]
117pub mod orchestrator;
118
119pub use attachment::{Attachment, AttachmentSchema, ToAttachments};
120pub use extract::{FlexibleExtractor, MarkdownCodeBlockExtractor};
121pub use intent::frame::IntentFrame;
122#[allow(deprecated)]
123pub use intent::{IntentError, IntentExtractor, PromptBasedExtractor};
124pub use multimodal::ImageData;
125pub use prompt::{PromptPart, PromptSetError, ToPrompt, ToPromptFor, ToPromptSet};
126
127#[cfg(feature = "agent")]
128pub use agent::{Agent, AgentError};
129
130#[cfg(feature = "agent")]
131pub use agent::persona::{Persona, PersonaAgent};
132
133#[cfg(feature = "agent")]
134pub use orchestrator::{
135    BlueprintWorkflow, Orchestrator, OrchestratorError, StrategyMap, TypeMarker,
136};
137
138use extract::ParseError;
139
140/// Extracts a JSON string from a raw LLM response string.
141///
142/// This function uses a `FlexibleExtractor` with its standard strategies
143/// to find and extract a JSON object from a string that may contain extraneous
144/// text, such as explanations or Markdown code blocks.
145///
146/// For more advanced control over extraction strategies, see the `extract::FlexibleExtractor` struct.
147///
148/// # Returns
149///
150/// A `Result` containing the extracted JSON `String` on success, or a `ParseError`
151/// if no JSON could be extracted.
152pub fn extract_json(text: &str) -> Result<String, ParseError> {
153    // Try markdown code block first (common LLM output format)
154    if let Ok(content) = extract_markdown_block_with_lang(text, "json") {
155        return Ok(content);
156    }
157
158    // Also try generic markdown block (might contain JSON without language hint)
159    if let Ok(content) = extract_markdown_block(text) {
160        // Verify it's actually JSON by trying to extract JSON from it
161        let extractor = FlexibleExtractor::new();
162        if let Ok(json) = extractor.extract(&content) {
163            return Ok(json);
164        }
165    }
166
167    // Fall back to standard extraction strategies
168    let extractor = FlexibleExtractor::new();
169    extractor.extract(text)
170}
171
172/// Extracts content from any Markdown code block in the text.
173///
174/// This function searches for the first code block (delimited by triple backticks)
175/// and returns its content. The code block can have any language specifier or none at all.
176///
177/// # Returns
178///
179/// A `Result` containing the extracted code block content on success, or a `ParseError`
180/// if no code block is found.
181pub fn extract_markdown_block(text: &str) -> Result<String, ParseError> {
182    let extractor = MarkdownCodeBlockExtractor::new();
183    extractor.extract(text)
184}
185
186/// Extracts content from a Markdown code block with a specific language.
187///
188/// This function searches for a code block with the specified language hint
189/// (e.g., ```rust, ```python) and returns its content.
190///
191/// # Arguments
192///
193/// * `text` - The text containing the markdown code block
194/// * `lang` - The language specifier to match (e.g., "rust", "python")
195///
196/// # Returns
197///
198/// A `Result` containing the extracted code block content on success, or a `ParseError`
199/// if no code block with the specified language is found.
200pub fn extract_markdown_block_with_lang(text: &str, lang: &str) -> Result<String, ParseError> {
201    let extractor = MarkdownCodeBlockExtractor::with_language(lang.to_string());
202    extractor.extract(text)
203}
204
205#[cfg(test)]
206mod tests {
207    use super::*;
208
209    #[test]
210    fn test_json_extraction() {
211        let input = "Some text before {\"key\": \"value\"} and after.";
212        assert_eq!(extract_json(input).unwrap(), "{\"key\": \"value\"}");
213    }
214
215    #[test]
216    fn test_standard_extraction_from_tagged_content() {
217        let text = "<answer>{\"type\": \"success\"}</answer>";
218        let result = extract_json(text);
219        assert!(result.is_ok());
220        assert_eq!(result.unwrap(), "{\"type\": \"success\"}");
221    }
222
223    #[test]
224    fn test_markdown_extraction() {
225        // Test simple code block with no language
226        let text1 = "Here is some code:\n```\nlet x = 42;\n```\nAnd some text after.";
227        let result1 = extract_markdown_block(text1);
228        assert!(result1.is_ok());
229        assert_eq!(result1.unwrap(), "let x = 42;");
230
231        // Test code block with specific language (rust)
232        let text2 = "Here's Rust code:\n```rust\nfn main() {
233    println!(\"Hello\");
234}
235```";
236        let result2 = extract_markdown_block_with_lang(text2, "rust");
237        assert!(result2.is_ok());
238        assert_eq!(result2.unwrap(), "fn main() {\n    println!(\"Hello\");\n}");
239
240        // Test extracting rust block when json block is also present
241        let text3 = r#"\nFirst a JSON block:
242```json
243{"key": "value"}
244```
245
246Then a Rust block:
247```rust
248let data = vec![1, 2, 3];
249```
250"#;
251        let result3 = extract_markdown_block_with_lang(text3, "rust");
252        assert!(result3.is_ok());
253        assert_eq!(result3.unwrap(), "let data = vec![1, 2, 3];");
254
255        // Test case where no code block is found
256        let text4 = "This text has no code blocks at all.";
257        let result4 = extract_markdown_block(text4);
258        assert!(result4.is_err());
259
260        // Test with messy surrounding text and newlines
261        let text5 = r#"\nLots of text before...
262
263
264   ```python
265def hello():
266    print("world")
267    return True
268   ```
269
270
271And more text after with various spacing.
272"#;
273        let result5 = extract_markdown_block_with_lang(text5, "python");
274        assert!(result5.is_ok());
275        assert_eq!(
276            result5.unwrap(),
277            "def hello():\n    print(\"world\")\n    return True"
278        );
279    }
280
281    #[test]
282    fn test_extract_json_from_json_markdown_block() {
283        // Test extraction from JSON markdown block (highest priority)
284        let text = r#"Here's the response:
285```json
286{"status": "success", "count": 42}
287```
288That's the data you requested."#;
289        let result = extract_json(text);
290        assert!(result.is_ok());
291        assert_eq!(result.unwrap(), r#"{"status": "success", "count": 42}"#);
292    }
293
294    #[test]
295    fn test_extract_json_from_generic_markdown_block() {
296        // Test extraction from generic markdown block containing JSON
297        let text = r#"The output is:
298```
299{"result": "ok", "value": 123}
300```
301End of output."#;
302        let result = extract_json(text);
303        assert!(result.is_ok());
304        assert_eq!(result.unwrap(), r#"{"result": "ok", "value": 123}"#);
305    }
306
307    #[test]
308    fn test_extract_json_priority_json_block_over_inline() {
309        // When both JSON markdown block and inline JSON exist, JSON block should be preferred
310        let text = r#"Some inline {"inline": "data"} here.
311```json
312{"block": "data"}
313```
314More text."#;
315        let result = extract_json(text);
316        assert!(result.is_ok());
317        assert_eq!(result.unwrap(), r#"{"block": "data"}"#);
318    }
319
320    #[test]
321    fn test_extract_json_priority_json_block_over_generic_block() {
322        // JSON markdown block should be preferred over generic block
323        let text = r#"First a generic block:
324```
325{"generic": "block"}
326```
327
328Then a JSON block:
329```json
330{"json": "block"}
331```"#;
332        let result = extract_json(text);
333        assert!(result.is_ok());
334        assert_eq!(result.unwrap(), r#"{"json": "block"}"#);
335    }
336
337    #[test]
338    fn test_extract_json_fallback_from_non_json_markdown_block() {
339        // When markdown block contains non-JSON, fallback to inline extraction
340        let text = r#"Here's some code:
341```
342This is not JSON at all
343```
344But this is JSON: {"fallback": "value"}"#;
345        let result = extract_json(text);
346        assert!(result.is_ok());
347        assert_eq!(result.unwrap(), r#"{"fallback": "value"}"#);
348    }
349
350    #[test]
351    fn test_extract_json_from_rust_block_fallback() {
352        // When only non-JSON markdown blocks exist, fallback to inline extraction
353        let text = r#"```rust
354let x = 42;
355```
356The result is {"data": "inline"}"#;
357        let result = extract_json(text);
358        assert!(result.is_ok());
359        assert_eq!(result.unwrap(), r#"{"data": "inline"}"#);
360    }
361
362    #[test]
363    fn test_extract_json_multiline_in_markdown_block() {
364        // Test extraction of multiline JSON from markdown block
365        let text = r#"Response:
366```json
367{
368  "name": "test",
369  "values": [1, 2, 3],
370  "nested": {
371    "key": "value"
372  }
373}
374```"#;
375        let result = extract_json(text);
376        assert!(result.is_ok());
377        let json = result.unwrap();
378        // Verify it contains the expected structure
379        assert!(json.contains("\"name\": \"test\""));
380        assert!(json.contains("\"values\": [1, 2, 3]"));
381        assert!(json.contains("\"nested\""));
382    }
383}