llm_toolkit/
lib.rs

1//! 'llm-toolkit' - A low-level Rust toolkit for the LLM last mile problem.
2//!
3//! This library provides a set of sharp, reliable, and unopinionated "tools"
4//! for building robust LLM-powered applications in Rust. It focuses on solving
5//! the common and frustrating problems that occur at the boundary between a
6//! strongly-typed Rust application and the unstructured, often unpredictable
7//! string-based responses from LLM APIs.
8
9// Allow the crate to reference itself by name, which is needed for proc macros
10// to work correctly in examples, tests, and bins
11extern crate self as llm_toolkit;
12
13// Re-export tracing for use by generated code from macros
14// Using extern crate to ensure it's accessible via absolute path
15pub extern crate tracing;
16
17/// A derive macro to implement the `ToPrompt` trait for structs.
18///
19/// This macro is available only when the `derive` feature is enabled.
20/// See the [crate-level documentation](index.html#2-structured-prompts-with-derivetoprompt) for usage examples.
21#[cfg(feature = "derive")]
22pub use llm_toolkit_macros::ToPrompt;
23
24/// A derive macro to implement the `ToPromptSet` trait for structs.
25///
26/// This macro is available only when the `derive` feature is enabled.
27#[cfg(feature = "derive")]
28pub use llm_toolkit_macros::ToPromptSet;
29
30/// A derive macro to implement the `ToPromptFor` trait for structs.
31///
32/// This macro is available only when the `derive` feature is enabled.
33#[cfg(feature = "derive")]
34pub use llm_toolkit_macros::ToPromptFor;
35
36/// A macro for creating examples sections in prompts.
37///
38/// This macro is available only when the `derive` feature is enabled.
39#[cfg(feature = "derive")]
40pub use llm_toolkit_macros::examples_section;
41
42/// A procedural attribute macro for defining intent enums with automatic prompt and extractor generation.
43///
44/// This macro is available only when the `derive` feature is enabled.
45#[cfg(feature = "derive")]
46pub use llm_toolkit_macros::define_intent;
47
48/// A derive macro to implement the `Agent` trait for structs.
49///
50/// This macro is available only when the `agent` feature is enabled.
51/// It automatically generates an Agent implementation that uses ClaudeCodeAgent
52/// internally and deserializes responses into a structured output type.
53///
54/// # Example
55///
56/// ```ignore
57/// use llm_toolkit_macros::Agent;
58/// use serde::{Deserialize, Serialize};
59///
60/// #[derive(Serialize, Deserialize)]
61/// struct MyOutput {
62///     result: String,
63/// }
64///
65/// #[derive(Agent)]
66/// #[agent(expertise = "My expertise", output = "MyOutput")]
67/// struct MyAgent;
68/// ```
69#[cfg(feature = "agent")]
70pub use llm_toolkit_macros::Agent;
71
72/// An attribute macro to define agent structs with automatic trait implementations.
73///
74/// This macro is available only when the `agent` feature is enabled.
75#[cfg(feature = "agent")]
76pub use llm_toolkit_macros::agent;
77
78/// A derive macro to implement the `TypeMarker` trait for structs.
79///
80/// This macro is available only when the `agent` feature is enabled.
81/// It automatically generates a TypeMarker implementation that provides
82/// a type identifier string for type-based orchestrator output retrieval.
83///
84/// # Example
85///
86/// ```ignore
87/// use llm_toolkit::TypeMarker;
88/// use serde::{Deserialize, Serialize};
89///
90/// #[derive(Serialize, Deserialize, TypeMarker)]
91/// struct MyResponse {
92///     #[serde(default = "default_type")]
93///     __type: String,
94///     result: String,
95/// }
96///
97/// fn default_type() -> String {
98///     "MyResponse".to_string()
99/// }
100/// ```
101#[cfg(feature = "agent")]
102pub use llm_toolkit_macros::{TypeMarker, type_marker};
103
104pub mod attachment;
105pub mod extract;
106pub mod intent;
107pub mod multimodal;
108pub mod prompt;
109pub mod retrieval;
110
111#[cfg(feature = "agent")]
112pub mod observability;
113
114#[cfg(feature = "agent")]
115pub mod agent;
116
117#[cfg(feature = "agent")]
118pub mod orchestrator;
119
120pub use attachment::{Attachment, AttachmentSchema, ToAttachments};
121pub use extract::{FlexibleExtractor, MarkdownCodeBlockExtractor};
122pub use intent::frame::IntentFrame;
123#[allow(deprecated)]
124pub use intent::{IntentError, IntentExtractor, PromptBasedExtractor};
125pub use multimodal::ImageData;
126pub use prompt::{PromptPart, PromptSetError, ToPrompt, ToPromptFor, ToPromptSet};
127pub use retrieval::Document;
128
129#[cfg(feature = "agent")]
130pub use agent::{Agent, AgentError};
131
132#[cfg(feature = "agent")]
133pub use agent::persona::{Persona, PersonaAgent};
134
135#[cfg(feature = "agent")]
136pub use agent::retrieval::RetrievalAwareAgent;
137
138#[cfg(feature = "agent")]
139pub use orchestrator::{
140    BlueprintWorkflow, Orchestrator, OrchestratorError, StrategyMap, TypeMarker,
141};
142
143use extract::ParseError;
144
145/// Extracts a JSON string from a raw LLM response string.
146///
147/// This function uses a `FlexibleExtractor` with its standard strategies
148/// to find and extract a JSON object from a string that may contain extraneous
149/// text, such as explanations or Markdown code blocks.
150///
151/// For more advanced control over extraction strategies, see the `extract::FlexibleExtractor` struct.
152///
153/// # Returns
154///
155/// A `Result` containing the extracted JSON `String` on success, or a `ParseError`
156/// if no JSON could be extracted.
157pub fn extract_json(text: &str) -> Result<String, ParseError> {
158    // Try markdown code block first (common LLM output format)
159    if let Ok(content) = extract_markdown_block_with_lang(text, "json") {
160        return Ok(content);
161    }
162
163    // Also try generic markdown block (might contain JSON without language hint)
164    if let Ok(content) = extract_markdown_block(text) {
165        // Verify it's actually JSON by trying to extract JSON from it
166        let extractor = FlexibleExtractor::new();
167        if let Ok(json) = extractor.extract(&content) {
168            return Ok(json);
169        }
170    }
171
172    // Fall back to standard extraction strategies
173    let extractor = FlexibleExtractor::new();
174    extractor.extract(text)
175}
176
177/// Extracts content from any Markdown code block in the text.
178///
179/// This function searches for the first code block (delimited by triple backticks)
180/// and returns its content. The code block can have any language specifier or none at all.
181///
182/// # Returns
183///
184/// A `Result` containing the extracted code block content on success, or a `ParseError`
185/// if no code block is found.
186pub fn extract_markdown_block(text: &str) -> Result<String, ParseError> {
187    let extractor = MarkdownCodeBlockExtractor::new();
188    extractor.extract(text)
189}
190
191/// Extracts content from a Markdown code block with a specific language.
192///
193/// This function searches for a code block with the specified language hint
194/// (e.g., ```rust, ```python) and returns its content.
195///
196/// # Arguments
197///
198/// * `text` - The text containing the markdown code block
199/// * `lang` - The language specifier to match (e.g., "rust", "python")
200///
201/// # Returns
202///
203/// A `Result` containing the extracted code block content on success, or a `ParseError`
204/// if no code block with the specified language is found.
205pub fn extract_markdown_block_with_lang(text: &str, lang: &str) -> Result<String, ParseError> {
206    let extractor = MarkdownCodeBlockExtractor::with_language(lang.to_string());
207    extractor.extract(text)
208}
209
210#[cfg(test)]
211mod tests {
212    use super::*;
213
214    #[test]
215    fn test_json_extraction() {
216        let input = "Some text before {\"key\": \"value\"} and after.";
217        assert_eq!(extract_json(input).unwrap(), "{\"key\": \"value\"}");
218    }
219
220    #[test]
221    fn test_standard_extraction_from_tagged_content() {
222        let text = "<answer>{\"type\": \"success\"}</answer>";
223        let result = extract_json(text);
224        assert!(result.is_ok());
225        assert_eq!(result.unwrap(), "{\"type\": \"success\"}");
226    }
227
228    #[test]
229    fn test_markdown_extraction() {
230        // Test simple code block with no language
231        let text1 = "Here is some code:\n```\nlet x = 42;\n```\nAnd some text after.";
232        let result1 = extract_markdown_block(text1);
233        assert!(result1.is_ok());
234        assert_eq!(result1.unwrap(), "let x = 42;");
235
236        // Test code block with specific language (rust)
237        let text2 = "Here's Rust code:\n```rust\nfn main() {
238    println!(\"Hello\");
239}
240```";
241        let result2 = extract_markdown_block_with_lang(text2, "rust");
242        assert!(result2.is_ok());
243        assert_eq!(result2.unwrap(), "fn main() {\n    println!(\"Hello\");\n}");
244
245        // Test extracting rust block when json block is also present
246        let text3 = r#"\nFirst a JSON block:
247```json
248{"key": "value"}
249```
250
251Then a Rust block:
252```rust
253let data = vec![1, 2, 3];
254```
255"#;
256        let result3 = extract_markdown_block_with_lang(text3, "rust");
257        assert!(result3.is_ok());
258        assert_eq!(result3.unwrap(), "let data = vec![1, 2, 3];");
259
260        // Test case where no code block is found
261        let text4 = "This text has no code blocks at all.";
262        let result4 = extract_markdown_block(text4);
263        assert!(result4.is_err());
264
265        // Test with messy surrounding text and newlines
266        let text5 = r#"\nLots of text before...
267
268
269   ```python
270def hello():
271    print("world")
272    return True
273   ```
274
275
276And more text after with various spacing.
277"#;
278        let result5 = extract_markdown_block_with_lang(text5, "python");
279        assert!(result5.is_ok());
280        assert_eq!(
281            result5.unwrap(),
282            "def hello():\n    print(\"world\")\n    return True"
283        );
284    }
285
286    #[test]
287    fn test_extract_json_from_json_markdown_block() {
288        // Test extraction from JSON markdown block (highest priority)
289        let text = r#"Here's the response:
290```json
291{"status": "success", "count": 42}
292```
293That's the data you requested."#;
294        let result = extract_json(text);
295        assert!(result.is_ok());
296        assert_eq!(result.unwrap(), r#"{"status": "success", "count": 42}"#);
297    }
298
299    #[test]
300    fn test_extract_json_from_generic_markdown_block() {
301        // Test extraction from generic markdown block containing JSON
302        let text = r#"The output is:
303```
304{"result": "ok", "value": 123}
305```
306End of output."#;
307        let result = extract_json(text);
308        assert!(result.is_ok());
309        assert_eq!(result.unwrap(), r#"{"result": "ok", "value": 123}"#);
310    }
311
312    #[test]
313    fn test_extract_json_priority_json_block_over_inline() {
314        // When both JSON markdown block and inline JSON exist, JSON block should be preferred
315        let text = r#"Some inline {"inline": "data"} here.
316```json
317{"block": "data"}
318```
319More text."#;
320        let result = extract_json(text);
321        assert!(result.is_ok());
322        assert_eq!(result.unwrap(), r#"{"block": "data"}"#);
323    }
324
325    #[test]
326    fn test_extract_json_priority_json_block_over_generic_block() {
327        // JSON markdown block should be preferred over generic block
328        let text = r#"First a generic block:
329```
330{"generic": "block"}
331```
332
333Then a JSON block:
334```json
335{"json": "block"}
336```"#;
337        let result = extract_json(text);
338        assert!(result.is_ok());
339        assert_eq!(result.unwrap(), r#"{"json": "block"}"#);
340    }
341
342    #[test]
343    fn test_extract_json_fallback_from_non_json_markdown_block() {
344        // When markdown block contains non-JSON, fallback to inline extraction
345        let text = r#"Here's some code:
346```
347This is not JSON at all
348```
349But this is JSON: {"fallback": "value"}"#;
350        let result = extract_json(text);
351        assert!(result.is_ok());
352        assert_eq!(result.unwrap(), r#"{"fallback": "value"}"#);
353    }
354
355    #[test]
356    fn test_extract_json_from_rust_block_fallback() {
357        // When only non-JSON markdown blocks exist, fallback to inline extraction
358        let text = r#"```rust
359let x = 42;
360```
361The result is {"data": "inline"}"#;
362        let result = extract_json(text);
363        assert!(result.is_ok());
364        assert_eq!(result.unwrap(), r#"{"data": "inline"}"#);
365    }
366
367    #[test]
368    fn test_extract_json_multiline_in_markdown_block() {
369        // Test extraction of multiline JSON from markdown block
370        let text = r#"Response:
371```json
372{
373  "name": "test",
374  "values": [1, 2, 3],
375  "nested": {
376    "key": "value"
377  }
378}
379```"#;
380        let result = extract_json(text);
381        assert!(result.is_ok());
382        let json = result.unwrap();
383        // Verify it contains the expected structure
384        assert!(json.contains("\"name\": \"test\""));
385        assert!(json.contains("\"values\": [1, 2, 3]"));
386        assert!(json.contains("\"nested\""));
387    }
388}