llm-toolkit 0.63.1

A low-level, unopinionated Rust toolkit for the LLM last mile problem.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
//! 'llm-toolkit' - A low-level Rust toolkit for the LLM last mile problem.
//!
//! This library provides a set of sharp, reliable, and unopinionated "tools"
//! for building robust LLM-powered applications in Rust. It focuses on solving
//! the common and frustrating problems that occur at the boundary between a
//! strongly-typed Rust application and the unstructured, often unpredictable
//! string-based responses from LLM APIs.

// Allow the crate to reference itself by name, which is needed for proc macros
// to work correctly in examples, tests, and bins
extern crate self as llm_toolkit;

// Re-export tracing for use by generated code from macros
// Using extern crate to ensure it's accessible via absolute path
#[cfg(feature = "agent")]
pub extern crate tracing;

// Re-export minijinja for use by ToPrompt derive macro
pub extern crate minijinja;

// Re-export quick_xml for use by define_intent macro
#[cfg(feature = "derive")]
pub extern crate quick_xml;

/// A derive macro to implement the `ToPrompt` trait for structs.
///
/// This macro is available only when the `derive` feature is enabled.
/// See the [crate-level documentation](index.html#2-structured-prompts-with-derivetoprompt) for usage examples.
#[cfg(feature = "derive")]
pub use llm_toolkit_macros::ToPrompt;

/// A derive macro to implement the `ToPromptSet` trait for structs.
///
/// This macro is available only when the `derive` feature is enabled.
#[cfg(feature = "derive")]
pub use llm_toolkit_macros::ToPromptSet;

/// A derive macro to implement the `ToPromptFor` trait for structs.
///
/// This macro is available only when the `derive` feature is enabled.
#[cfg(feature = "derive")]
pub use llm_toolkit_macros::ToPromptFor;

/// A macro for creating examples sections in prompts.
///
/// This macro is available only when the `derive` feature is enabled.
#[cfg(feature = "derive")]
pub use llm_toolkit_macros::examples_section;

/// A procedural attribute macro for defining intent enums with automatic prompt and extractor generation.
///
/// This macro is available only when the `derive` feature is enabled.
#[cfg(feature = "derive")]
pub use llm_toolkit_macros::define_intent;

/// A derive macro to implement the `Agent` trait for structs.
///
/// This macro is available only when the `agent` feature is enabled.
/// It automatically generates an Agent implementation that uses ClaudeCodeAgent
/// internally and deserializes responses into a structured output type.
///
/// # Example
///
/// ```ignore
/// use llm_toolkit_macros::Agent;
/// use serde::{Deserialize, Serialize};
///
/// #[derive(Serialize, Deserialize)]
/// struct MyOutput {
///     result: String,
/// }
///
/// #[derive(Agent)]
/// #[agent(expertise = "My expertise", output = "MyOutput")]
/// struct MyAgent;
/// ```
#[cfg(feature = "agent")]
pub use llm_toolkit_macros::Agent;

/// An attribute macro to define agent structs with automatic trait implementations.
///
/// This macro is available only when the `agent` feature is enabled.
#[cfg(feature = "agent")]
pub use llm_toolkit_macros::agent;

/// A derive macro to implement the `TypeMarker` trait for structs.
///
/// This macro is available only when the `agent` feature is enabled.
/// It automatically generates a TypeMarker implementation that provides
/// a type identifier string for type-based orchestrator output retrieval.
///
/// # Example
///
/// ```ignore
/// use llm_toolkit::TypeMarker;
/// use serde::{Deserialize, Serialize};
///
/// #[derive(Serialize, Deserialize, TypeMarker)]
/// struct MyResponse {
///     #[serde(default = "default_type")]
///     __type: String,
///     result: String,
/// }
///
/// fn default_type() -> String {
///     "MyResponse".to_string()
/// }
/// ```
#[cfg(feature = "agent")]
pub use llm_toolkit_macros::{TypeMarker, type_marker};

pub mod attachment;
pub mod context;
pub mod extract;
pub mod intent;
pub mod models;
pub mod multimodal;
pub mod prompt;
pub mod retrieval;

#[cfg(feature = "agent")]
pub mod observability;

#[cfg(feature = "agent")]
pub mod agent;

#[cfg(feature = "agent")]
pub mod orchestrator;

pub use attachment::{Attachment, AttachmentSchema, ToAttachments};
pub use context::{ContextProfile, Priority, TaskHealth};
pub use extract::{FlexibleExtractor, MarkdownCodeBlockExtractor};
#[cfg(feature = "agent")]
pub use intent::expandable::{
    Expandable, ReActConfig, ReActError, ReActResult, RegistryError, Selectable, SelectionRegistry,
    react_loop, simple_tag_selector,
};
pub use intent::frame::IntentFrame;
#[allow(deprecated)]
pub use intent::{IntentError, IntentExtractor, PromptBasedExtractor};
pub use models::{ClaudeModel, GeminiModel, Model, ModelError, OpenAIModel};
pub use multimodal::ImageData;
pub use prompt::{PromptPart, PromptSetError, ToPrompt, ToPromptFor, ToPromptSet};
pub use retrieval::Document;

#[cfg(feature = "agent")]
pub use agent::{Agent, AgentError, AnyAgent, ToExpertise};

#[cfg(feature = "agent")]
pub use agent::persona::{ContextConfig, Persona, PersonaAgent};

#[cfg(feature = "agent")]
pub use agent::retrieval::RetrievalAwareAgent;

#[cfg(feature = "agent")]
pub use agent::expertise::{
    Anchor, ContextualPrompt, Expertise, KnowledgeFragment, RenderContext, WeightedFragment,
};

#[cfg(feature = "agent")]
pub use orchestrator::{
    BlueprintWorkflow, Orchestrator, OrchestratorError, StrategyMap, TypeMarker,
};

use extract::ParseError;

/// Extracts a JSON string from a raw LLM response string.
///
/// This function uses a `FlexibleExtractor` with its standard strategies
/// to find and extract a JSON object from a string that may contain extraneous
/// text, such as explanations or Markdown code blocks.
///
/// For more advanced control over extraction strategies, see the `extract::FlexibleExtractor` struct.
///
/// # Returns
///
/// A `Result` containing the extracted JSON `String` on success, or a `ParseError`
/// if no JSON could be extracted.
pub fn extract_json(text: &str) -> Result<String, ParseError> {
    // Try markdown code block first (common LLM output format)
    if let Ok(content) = extract_markdown_block_with_lang(text, "json") {
        return Ok(content);
    }

    // Also try generic markdown block (might contain JSON without language hint)
    if let Ok(content) = extract_markdown_block(text) {
        // Verify it's actually JSON by trying to extract JSON from it
        let extractor = FlexibleExtractor::new();
        if let Ok(json) = extractor.extract(&content) {
            return Ok(json);
        }
    }

    // Fall back to standard extraction strategies
    let extractor = FlexibleExtractor::new();
    extractor.extract(text)
}

/// Extracts content from any Markdown code block in the text.
///
/// This function searches for the first code block (delimited by triple backticks)
/// and returns its content. The code block can have any language specifier or none at all.
///
/// # Returns
///
/// A `Result` containing the extracted code block content on success, or a `ParseError`
/// if no code block is found.
pub fn extract_markdown_block(text: &str) -> Result<String, ParseError> {
    let extractor = MarkdownCodeBlockExtractor::new();
    extractor.extract(text)
}

/// Extracts content from a Markdown code block with a specific language.
///
/// This function searches for a code block with the specified language hint
/// (e.g., ```rust, ```python) and returns its content.
///
/// # Arguments
///
/// * `text` - The text containing the markdown code block
/// * `lang` - The language specifier to match (e.g., "rust", "python")
///
/// # Returns
///
/// A `Result` containing the extracted code block content on success, or a `ParseError`
/// if no code block with the specified language is found.
pub fn extract_markdown_block_with_lang(text: &str, lang: &str) -> Result<String, ParseError> {
    let extractor = MarkdownCodeBlockExtractor::with_language(lang.to_string());
    extractor.extract(text)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_json_extraction() {
        let input = "Some text before {\"key\": \"value\"} and after.";
        assert_eq!(extract_json(input).unwrap(), "{\"key\": \"value\"}");
    }

    #[test]
    fn test_standard_extraction_from_tagged_content() {
        let text = "<answer>{\"type\": \"success\"}</answer>";
        let result = extract_json(text);
        assert!(result.is_ok());
        assert_eq!(result.unwrap(), "{\"type\": \"success\"}");
    }

    #[test]
    fn test_markdown_extraction() {
        // Test simple code block with no language
        let text1 = "Here is some code:\n```\nlet x = 42;\n```\nAnd some text after.";
        let result1 = extract_markdown_block(text1);
        assert!(result1.is_ok());
        assert_eq!(result1.unwrap(), "let x = 42;");

        // Test code block with specific language (rust)
        let text2 = "Here's Rust code:\n```rust\nfn main() {
    println!(\"Hello\");
}
```";
        let result2 = extract_markdown_block_with_lang(text2, "rust");
        assert!(result2.is_ok());
        assert_eq!(result2.unwrap(), "fn main() {\n    println!(\"Hello\");\n}");

        // Test extracting rust block when json block is also present
        let text3 = r#"\nFirst a JSON block:
```json
{"key": "value"}
```

Then a Rust block:
```rust
let data = vec![1, 2, 3];
```
"#;
        let result3 = extract_markdown_block_with_lang(text3, "rust");
        assert!(result3.is_ok());
        assert_eq!(result3.unwrap(), "let data = vec![1, 2, 3];");

        // Test case where no code block is found
        let text4 = "This text has no code blocks at all.";
        let result4 = extract_markdown_block(text4);
        assert!(result4.is_err());

        // Test with messy surrounding text and newlines
        let text5 = r#"\nLots of text before...


   ```python
def hello():
    print("world")
    return True
   ```


And more text after with various spacing.
"#;
        let result5 = extract_markdown_block_with_lang(text5, "python");
        assert!(result5.is_ok());
        assert_eq!(
            result5.unwrap(),
            "def hello():\n    print(\"world\")\n    return True"
        );
    }

    #[test]
    fn test_extract_json_from_json_markdown_block() {
        // Test extraction from JSON markdown block (highest priority)
        let text = r#"Here's the response:
```json
{"status": "success", "count": 42}
```
That's the data you requested."#;
        let result = extract_json(text);
        assert!(result.is_ok());
        assert_eq!(result.unwrap(), r#"{"status": "success", "count": 42}"#);
    }

    #[test]
    fn test_extract_json_from_generic_markdown_block() {
        // Test extraction from generic markdown block containing JSON
        let text = r#"The output is:
```
{"result": "ok", "value": 123}
```
End of output."#;
        let result = extract_json(text);
        assert!(result.is_ok());
        assert_eq!(result.unwrap(), r#"{"result": "ok", "value": 123}"#);
    }

    #[test]
    fn test_extract_json_priority_json_block_over_inline() {
        // When both JSON markdown block and inline JSON exist, JSON block should be preferred
        let text = r#"Some inline {"inline": "data"} here.
```json
{"block": "data"}
```
More text."#;
        let result = extract_json(text);
        assert!(result.is_ok());
        assert_eq!(result.unwrap(), r#"{"block": "data"}"#);
    }

    #[test]
    fn test_extract_json_priority_json_block_over_generic_block() {
        // JSON markdown block should be preferred over generic block
        let text = r#"First a generic block:
```
{"generic": "block"}
```

Then a JSON block:
```json
{"json": "block"}
```"#;
        let result = extract_json(text);
        assert!(result.is_ok());
        assert_eq!(result.unwrap(), r#"{"json": "block"}"#);
    }

    #[test]
    fn test_extract_json_fallback_from_non_json_markdown_block() {
        // When markdown block contains non-JSON, fallback to inline extraction
        let text = r#"Here's some code:
```
This is not JSON at all
```
But this is JSON: {"fallback": "value"}"#;
        let result = extract_json(text);
        assert!(result.is_ok());
        assert_eq!(result.unwrap(), r#"{"fallback": "value"}"#);
    }

    #[test]
    fn test_extract_json_from_rust_block_fallback() {
        // When only non-JSON markdown blocks exist, fallback to inline extraction
        let text = r#"```rust
let x = 42;
```
The result is {"data": "inline"}"#;
        let result = extract_json(text);
        assert!(result.is_ok());
        assert_eq!(result.unwrap(), r#"{"data": "inline"}"#);
    }

    #[test]
    fn test_extract_json_multiline_in_markdown_block() {
        // Test extraction of multiline JSON from markdown block
        let text = r#"Response:
```json
{
  "name": "test",
  "values": [1, 2, 3],
  "nested": {
    "key": "value"
  }
}
```"#;
        let result = extract_json(text);
        assert!(result.is_ok());
        let json = result.unwrap();
        // Verify it contains the expected structure
        assert!(json.contains("\"name\": \"test\""));
        assert!(json.contains("\"values\": [1, 2, 3]"));
        assert!(json.contains("\"nested\""));
    }
}