llm_toolkit/
lib.rs

1//! 'llm-toolkit' - A low-level Rust toolkit for the LLM last mile problem.
2//!
3//! This library provides a set of sharp, reliable, and unopinionated "tools"
4//! for building robust LLM-powered applications in Rust. It focuses on solving
5//! the common and frustrating problems that occur at the boundary between a
6//! strongly-typed Rust application and the unstructured, often unpredictable
7//! string-based responses from LLM APIs.
8
9/// A derive macro to implement the `ToPrompt` trait for structs.
10///
11/// This macro is available only when the `derive` feature is enabled.
12/// See the [crate-level documentation](index.html#2-structured-prompts-with-derivetoprompt) for usage examples.
13#[cfg(feature = "derive")]
14pub use llm_toolkit_macros::ToPrompt;
15
16pub mod extract;
17pub mod intent;
18pub mod prompt;
19
20pub use extract::{FlexibleExtractor, MarkdownCodeBlockExtractor};
21pub use intent::{IntentError, IntentExtractor, PromptBasedExtractor};
22pub use prompt::ToPrompt;
23
24use extract::ParseError;
25
26/// Extracts a JSON string from a raw LLM response string.
27///
28/// This function uses a `FlexibleExtractor` with its standard strategies
29/// to find and extract a JSON object from a string that may contain extraneous
30/// text, such as explanations or Markdown code blocks.
31///
32/// For more advanced control over extraction strategies, see the `extract::FlexibleExtractor` struct.
33///
34/// # Returns
35///
36/// A `Result` containing the extracted JSON `String` on success, or a `ParseError`
37/// if no JSON could be extracted.
38pub fn extract_json(text: &str) -> Result<String, ParseError> {
39    let extractor = FlexibleExtractor::new();
40    // Note: The standard strategies in the copied code are TaggedContent("answer"), JsonBrackets, FirstJsonObject.
41    // We will add a markdown strategy later during refactoring.
42    extractor.extract(text)
43}
44
45/// Extracts content from any Markdown code block in the text.
46///
47/// This function searches for the first code block (delimited by triple backticks)
48/// and returns its content. The code block can have any language specifier or none at all.
49///
50/// # Returns
51///
52/// A `Result` containing the extracted code block content on success, or a `ParseError`
53/// if no code block is found.
54pub fn extract_markdown_block(text: &str) -> Result<String, ParseError> {
55    let extractor = MarkdownCodeBlockExtractor::new();
56    extractor.extract(text)
57}
58
59/// Extracts content from a Markdown code block with a specific language.
60///
61/// This function searches for a code block with the specified language hint
62/// (e.g., ```rust, ```python) and returns its content.
63///
64/// # Arguments
65///
66/// * `text` - The text containing the markdown code block
67/// * `lang` - The language specifier to match (e.g., "rust", "python")
68///
69/// # Returns
70///
71/// A `Result` containing the extracted code block content on success, or a `ParseError`
72/// if no code block with the specified language is found.
73pub fn extract_markdown_block_with_lang(text: &str, lang: &str) -> Result<String, ParseError> {
74    let extractor = MarkdownCodeBlockExtractor::with_language(lang.to_string());
75    extractor.extract(text)
76}
77
78#[cfg(test)]
79mod tests {
80    use super::*;
81
82    #[test]
83    fn test_json_extraction() {
84        let input = "Some text before {\"key\": \"value\"} and after.";
85        assert_eq!(extract_json(input).unwrap(), "{\"key\": \"value\"}");
86    }
87
88    #[test]
89    fn test_standard_extraction_from_tagged_content() {
90        let text = "<answer>{\"type\": \"success\"}</answer>";
91        let result = extract_json(text);
92        assert!(result.is_ok());
93        assert_eq!(result.unwrap(), "{\"type\": \"success\"}");
94    }
95
96    #[test]
97    fn test_markdown_extraction() {
98        // Test simple code block with no language
99        let text1 = "Here is some code:\n```\nlet x = 42;\n```\nAnd some text after.";
100        let result1 = extract_markdown_block(text1);
101        assert!(result1.is_ok());
102        assert_eq!(result1.unwrap(), "let x = 42;");
103
104        // Test code block with specific language (rust)
105        let text2 = "Here's Rust code:\n```rust\nfn main() {
106    println!(\"Hello\");
107}
108```";
109        let result2 = extract_markdown_block_with_lang(text2, "rust");
110        assert!(result2.is_ok());
111        assert_eq!(result2.unwrap(), "fn main() {\n    println!(\"Hello\");\n}");
112
113        // Test extracting rust block when json block is also present
114        let text3 = r#"\nFirst a JSON block:
115```json
116{"key": "value"}
117```
118
119Then a Rust block:
120```rust
121let data = vec![1, 2, 3];
122```
123"#;
124        let result3 = extract_markdown_block_with_lang(text3, "rust");
125        assert!(result3.is_ok());
126        assert_eq!(result3.unwrap(), "let data = vec![1, 2, 3];");
127
128        // Test case where no code block is found
129        let text4 = "This text has no code blocks at all.";
130        let result4 = extract_markdown_block(text4);
131        assert!(result4.is_err());
132
133        // Test with messy surrounding text and newlines
134        let text5 = r#"\nLots of text before...
135
136
137   ```python
138def hello():
139    print("world")
140    return True
141   ```   
142
143
144And more text after with various spacing.
145"#;
146        let result5 = extract_markdown_block_with_lang(text5, "python");
147        assert!(result5.is_ok());
148        assert_eq!(
149            result5.unwrap(),
150            "def hello():\n    print(\"world\")\n    return True"
151        );
152    }
153}