Skip to main content

llm_toolkit/
lib.rs

1//! 'llm-toolkit' - A low-level Rust toolkit for the LLM last mile problem.
2//!
3//! This library provides a set of sharp, reliable, and unopinionated "tools"
4//! for building robust LLM-powered applications in Rust. It focuses on solving
5//! the common and frustrating problems that occur at the boundary between a
6//! strongly-typed Rust application and the unstructured, often unpredictable
7//! string-based responses from LLM APIs.
8
9// Allow the crate to reference itself by name, which is needed for proc macros
10// to work correctly in examples, tests, and bins
11extern crate self as llm_toolkit;
12
13// Re-export tracing for use by generated code from macros
14// Using extern crate to ensure it's accessible via absolute path
15#[cfg(feature = "agent")]
16pub extern crate tracing;
17
18// Re-export minijinja for use by ToPrompt derive macro
19pub extern crate minijinja;
20
21// Re-export quick_xml for use by define_intent macro
22#[cfg(feature = "derive")]
23pub extern crate quick_xml;
24
25/// A derive macro to implement the `ToPrompt` trait for structs.
26///
27/// This macro is available only when the `derive` feature is enabled.
28/// See the [crate-level documentation](index.html#2-structured-prompts-with-derivetoprompt) for usage examples.
29#[cfg(feature = "derive")]
30pub use llm_toolkit_macros::ToPrompt;
31
32/// A derive macro to implement the `ToPromptSet` trait for structs.
33///
34/// This macro is available only when the `derive` feature is enabled.
35#[cfg(feature = "derive")]
36pub use llm_toolkit_macros::ToPromptSet;
37
38/// A derive macro to implement the `ToPromptFor` trait for structs.
39///
40/// This macro is available only when the `derive` feature is enabled.
41#[cfg(feature = "derive")]
42pub use llm_toolkit_macros::ToPromptFor;
43
44/// A macro for creating examples sections in prompts.
45///
46/// This macro is available only when the `derive` feature is enabled.
47#[cfg(feature = "derive")]
48pub use llm_toolkit_macros::examples_section;
49
50/// A procedural attribute macro for defining intent enums with automatic prompt and extractor generation.
51///
52/// This macro is available only when the `derive` feature is enabled.
53#[cfg(feature = "derive")]
54pub use llm_toolkit_macros::define_intent;
55
56/// A derive macro to implement the `Agent` trait for structs.
57///
58/// This macro is available only when the `agent` feature is enabled.
59/// It automatically generates an Agent implementation that uses ClaudeCodeAgent
60/// internally and deserializes responses into a structured output type.
61///
62/// # Example
63///
64/// ```ignore
65/// use llm_toolkit_macros::Agent;
66/// use serde::{Deserialize, Serialize};
67///
68/// #[derive(Serialize, Deserialize)]
69/// struct MyOutput {
70///     result: String,
71/// }
72///
73/// #[derive(Agent)]
74/// #[agent(expertise = "My expertise", output = "MyOutput")]
75/// struct MyAgent;
76/// ```
77#[cfg(feature = "agent")]
78pub use llm_toolkit_macros::Agent;
79
80/// An attribute macro to define agent structs with automatic trait implementations.
81///
82/// This macro is available only when the `agent` feature is enabled.
83#[cfg(feature = "agent")]
84pub use llm_toolkit_macros::agent;
85
86/// A derive macro to implement the `TypeMarker` trait for structs.
87///
88/// This macro is available only when the `agent` feature is enabled.
89/// It automatically generates a TypeMarker implementation that provides
90/// a type identifier string for type-based orchestrator output retrieval.
91///
92/// # Example
93///
94/// ```ignore
95/// use llm_toolkit::TypeMarker;
96/// use serde::{Deserialize, Serialize};
97///
98/// #[derive(Serialize, Deserialize, TypeMarker)]
99/// struct MyResponse {
100///     #[serde(default = "default_type")]
101///     __type: String,
102///     result: String,
103/// }
104///
105/// fn default_type() -> String {
106///     "MyResponse".to_string()
107/// }
108/// ```
109#[cfg(feature = "agent")]
110pub use llm_toolkit_macros::{TypeMarker, type_marker};
111
112pub mod attachment;
113pub mod context;
114pub mod extract;
115pub mod intent;
116pub mod models;
117pub mod multimodal;
118pub mod prompt;
119pub mod retrieval;
120
121#[cfg(feature = "agent")]
122pub mod observability;
123
124#[cfg(feature = "agent")]
125pub mod agent;
126
127#[cfg(feature = "agent")]
128pub mod orchestrator;
129
130pub use attachment::{Attachment, AttachmentSchema, ToAttachments};
131pub use context::{ContextProfile, Priority, TaskHealth};
132pub use extract::{FlexibleExtractor, MarkdownCodeBlockExtractor};
133#[cfg(feature = "agent")]
134pub use intent::expandable::{
135    Expandable, ReActConfig, ReActError, ReActResult, RegistryError, Selectable, SelectionRegistry,
136    react_loop, simple_tag_selector,
137};
138pub use intent::frame::IntentFrame;
139#[allow(deprecated)]
140pub use intent::{IntentError, IntentExtractor, PromptBasedExtractor};
141pub use models::{ClaudeModel, GeminiModel, Model, ModelError, OpenAIModel};
142pub use multimodal::ImageData;
143pub use prompt::{PromptPart, PromptSetError, ToPrompt, ToPromptFor, ToPromptSet};
144pub use retrieval::Document;
145
146#[cfg(feature = "agent")]
147pub use agent::{Agent, AgentError, AnyAgent, ToExpertise};
148
149#[cfg(feature = "agent")]
150pub use agent::persona::{ContextConfig, Persona, PersonaAgent};
151
152#[cfg(feature = "agent")]
153pub use agent::retrieval::RetrievalAwareAgent;
154
155#[cfg(feature = "agent")]
156pub use agent::expertise::{
157    Anchor, ContextualPrompt, Expertise, KnowledgeFragment, RenderContext, WeightedFragment,
158};
159
160#[cfg(feature = "agent")]
161pub use orchestrator::{
162    BlueprintWorkflow, Orchestrator, OrchestratorError, StrategyMap, TypeMarker,
163};
164
165use extract::ParseError;
166
167/// Extracts a JSON string from a raw LLM response string.
168///
169/// This function uses a `FlexibleExtractor` with its standard strategies
170/// to find and extract a JSON object from a string that may contain extraneous
171/// text, such as explanations or Markdown code blocks.
172///
173/// For more advanced control over extraction strategies, see the `extract::FlexibleExtractor` struct.
174///
175/// # Returns
176///
177/// A `Result` containing the extracted JSON `String` on success, or a `ParseError`
178/// if no JSON could be extracted.
179pub fn extract_json(text: &str) -> Result<String, ParseError> {
180    // Try markdown code block first (common LLM output format)
181    if let Ok(content) = extract_markdown_block_with_lang(text, "json") {
182        return Ok(content);
183    }
184
185    // Also try generic markdown block (might contain JSON without language hint)
186    if let Ok(content) = extract_markdown_block(text) {
187        // Verify it's actually JSON by trying to extract JSON from it
188        let extractor = FlexibleExtractor::new();
189        if let Ok(json) = extractor.extract(&content) {
190            return Ok(json);
191        }
192    }
193
194    // Fall back to standard extraction strategies
195    let extractor = FlexibleExtractor::new();
196    extractor.extract(text)
197}
198
199/// Extracts content from any Markdown code block in the text.
200///
201/// This function searches for the first code block (delimited by triple backticks)
202/// and returns its content. The code block can have any language specifier or none at all.
203///
204/// # Returns
205///
206/// A `Result` containing the extracted code block content on success, or a `ParseError`
207/// if no code block is found.
208pub fn extract_markdown_block(text: &str) -> Result<String, ParseError> {
209    let extractor = MarkdownCodeBlockExtractor::new();
210    extractor.extract(text)
211}
212
213/// Extracts content from a Markdown code block with a specific language.
214///
215/// This function searches for a code block with the specified language hint
216/// (e.g., ```rust, ```python) and returns its content.
217///
218/// # Arguments
219///
220/// * `text` - The text containing the markdown code block
221/// * `lang` - The language specifier to match (e.g., "rust", "python")
222///
223/// # Returns
224///
225/// A `Result` containing the extracted code block content on success, or a `ParseError`
226/// if no code block with the specified language is found.
227pub fn extract_markdown_block_with_lang(text: &str, lang: &str) -> Result<String, ParseError> {
228    let extractor = MarkdownCodeBlockExtractor::with_language(lang.to_string());
229    extractor.extract(text)
230}
231
232#[cfg(test)]
233mod tests {
234    use super::*;
235
236    #[test]
237    fn test_json_extraction() {
238        let input = "Some text before {\"key\": \"value\"} and after.";
239        assert_eq!(extract_json(input).unwrap(), "{\"key\": \"value\"}");
240    }
241
242    #[test]
243    fn test_standard_extraction_from_tagged_content() {
244        let text = "<answer>{\"type\": \"success\"}</answer>";
245        let result = extract_json(text);
246        assert!(result.is_ok());
247        assert_eq!(result.unwrap(), "{\"type\": \"success\"}");
248    }
249
250    #[test]
251    fn test_markdown_extraction() {
252        // Test simple code block with no language
253        let text1 = "Here is some code:\n```\nlet x = 42;\n```\nAnd some text after.";
254        let result1 = extract_markdown_block(text1);
255        assert!(result1.is_ok());
256        assert_eq!(result1.unwrap(), "let x = 42;");
257
258        // Test code block with specific language (rust)
259        let text2 = "Here's Rust code:\n```rust\nfn main() {
260    println!(\"Hello\");
261}
262```";
263        let result2 = extract_markdown_block_with_lang(text2, "rust");
264        assert!(result2.is_ok());
265        assert_eq!(result2.unwrap(), "fn main() {\n    println!(\"Hello\");\n}");
266
267        // Test extracting rust block when json block is also present
268        let text3 = r#"\nFirst a JSON block:
269```json
270{"key": "value"}
271```
272
273Then a Rust block:
274```rust
275let data = vec![1, 2, 3];
276```
277"#;
278        let result3 = extract_markdown_block_with_lang(text3, "rust");
279        assert!(result3.is_ok());
280        assert_eq!(result3.unwrap(), "let data = vec![1, 2, 3];");
281
282        // Test case where no code block is found
283        let text4 = "This text has no code blocks at all.";
284        let result4 = extract_markdown_block(text4);
285        assert!(result4.is_err());
286
287        // Test with messy surrounding text and newlines
288        let text5 = r#"\nLots of text before...
289
290
291   ```python
292def hello():
293    print("world")
294    return True
295   ```
296
297
298And more text after with various spacing.
299"#;
300        let result5 = extract_markdown_block_with_lang(text5, "python");
301        assert!(result5.is_ok());
302        assert_eq!(
303            result5.unwrap(),
304            "def hello():\n    print(\"world\")\n    return True"
305        );
306    }
307
308    #[test]
309    fn test_extract_json_from_json_markdown_block() {
310        // Test extraction from JSON markdown block (highest priority)
311        let text = r#"Here's the response:
312```json
313{"status": "success", "count": 42}
314```
315That's the data you requested."#;
316        let result = extract_json(text);
317        assert!(result.is_ok());
318        assert_eq!(result.unwrap(), r#"{"status": "success", "count": 42}"#);
319    }
320
321    #[test]
322    fn test_extract_json_from_generic_markdown_block() {
323        // Test extraction from generic markdown block containing JSON
324        let text = r#"The output is:
325```
326{"result": "ok", "value": 123}
327```
328End of output."#;
329        let result = extract_json(text);
330        assert!(result.is_ok());
331        assert_eq!(result.unwrap(), r#"{"result": "ok", "value": 123}"#);
332    }
333
334    #[test]
335    fn test_extract_json_priority_json_block_over_inline() {
336        // When both JSON markdown block and inline JSON exist, JSON block should be preferred
337        let text = r#"Some inline {"inline": "data"} here.
338```json
339{"block": "data"}
340```
341More text."#;
342        let result = extract_json(text);
343        assert!(result.is_ok());
344        assert_eq!(result.unwrap(), r#"{"block": "data"}"#);
345    }
346
347    #[test]
348    fn test_extract_json_priority_json_block_over_generic_block() {
349        // JSON markdown block should be preferred over generic block
350        let text = r#"First a generic block:
351```
352{"generic": "block"}
353```
354
355Then a JSON block:
356```json
357{"json": "block"}
358```"#;
359        let result = extract_json(text);
360        assert!(result.is_ok());
361        assert_eq!(result.unwrap(), r#"{"json": "block"}"#);
362    }
363
364    #[test]
365    fn test_extract_json_fallback_from_non_json_markdown_block() {
366        // When markdown block contains non-JSON, fallback to inline extraction
367        let text = r#"Here's some code:
368```
369This is not JSON at all
370```
371But this is JSON: {"fallback": "value"}"#;
372        let result = extract_json(text);
373        assert!(result.is_ok());
374        assert_eq!(result.unwrap(), r#"{"fallback": "value"}"#);
375    }
376
377    #[test]
378    fn test_extract_json_from_rust_block_fallback() {
379        // When only non-JSON markdown blocks exist, fallback to inline extraction
380        let text = r#"```rust
381let x = 42;
382```
383The result is {"data": "inline"}"#;
384        let result = extract_json(text);
385        assert!(result.is_ok());
386        assert_eq!(result.unwrap(), r#"{"data": "inline"}"#);
387    }
388
389    #[test]
390    fn test_extract_json_multiline_in_markdown_block() {
391        // Test extraction of multiline JSON from markdown block
392        let text = r#"Response:
393```json
394{
395  "name": "test",
396  "values": [1, 2, 3],
397  "nested": {
398    "key": "value"
399  }
400}
401```"#;
402        let result = extract_json(text);
403        assert!(result.is_ok());
404        let json = result.unwrap();
405        // Verify it contains the expected structure
406        assert!(json.contains("\"name\": \"test\""));
407        assert!(json.contains("\"values\": [1, 2, 3]"));
408        assert!(json.contains("\"nested\""));
409    }
410}