llm_toolkit/extract/mod.rs
1//! Content extraction and JSON repair utilities for LLM responses.
2//!
3//! This module provides tools for extracting structured data from unstructured
4//! LLM outputs and repairing common JSON syntax errors.
5//!
6//! # Features
7//!
8//! - **Content Extraction**: Extract JSON objects, tagged content, and code blocks
9//! - **JSON Sanitization**: Auto-fix trailing commas, unclosed brackets/strings
10//! - **Fuzzy Repair**: Schema-based typo correction for tagged enums
11//!
12//! # Examples
13//!
14//! ## Extract JSON from LLM response
15//!
16//! ```rust
17//! use llm_toolkit::extract::FlexibleExtractor;
18//!
19//! let extractor = FlexibleExtractor::new();
20//! let response = r#"Here's the data: {"status": "ok", "count": 42}"#;
21//! let json = extractor.extract(response).unwrap();
22//! assert_eq!(json, r#"{"status": "ok", "count": 42}"#);
23//! ```
24//!
25//! ## Sanitize malformed JSON
26//!
27//! ```rust
28//! use llm_toolkit::extract::sanitize_json;
29//!
30//! // Fix trailing commas
31//! let fixed = sanitize_json(r#"{"name": "Alice", "age": 30,}"#);
32//! assert_eq!(fixed, r#"{"name": "Alice", "age": 30}"#);
33//! ```
34//!
35//! ## Repair typos in tagged enums
36//!
37//! ```rust
38//! use llm_toolkit::extract::{repair_tagged_enum_json, TaggedEnumSchema, FuzzyOptions};
39//!
40//! let schema = TaggedEnumSchema::new(
41//! "type",
42//! &["AddDerive", "RemoveDerive"],
43//! |_| None,
44//! );
45//!
46//! // LLM output has typo: "AddDeriv" instead of "AddDerive"
47//! let result = repair_tagged_enum_json(
48//! r#"{"type": "AddDeriv", "target": "MyStruct"}"#,
49//! &schema,
50//! &FuzzyOptions::default(),
51//! ).unwrap();
52//!
53//! assert!(result.repaired.to_string().contains("AddDerive"));
54//! ```
55
56pub mod core;
57pub mod error;
58pub mod extractors;
59
60pub use self::core::{ContentExtractor, ExtractionStrategy, ParsingConfig};
61pub use self::error::ParseError;
62pub use self::extractors::{FlexibleExtractor, MarkdownCodeBlockExtractor};
63
64// Re-export fuzzy-parser for LLM JSON repair capabilities
65pub use fuzzy_parser::{
66 Algorithm, Correction, FuzzyError, FuzzyOptions, RepairResult, TaggedEnumSchema,
67 repair_tagged_enum_json, sanitize_json,
68};