llm_toolkit/extract/
core.rs

1use super::error::ParseError;
2use serde::{Deserialize, Serialize};
3
4/// Core trait for response parsing
5pub trait ResponseParser<T> {
6    /// Parse response content into target type
7    fn parse(&self, content: &str) -> Result<T, ParseError>;
8
9    /// Extract content using configured strategies
10    fn extract_content(&self, text: &str) -> String;
11
12    /// Fallback parsing when primary parsing fails
13    fn fallback_parse(&self, content: &str, error: &ParseError) -> Result<T, ParseError>;
14}
15
16/// Trait for extracting tagged or structured content
17pub trait ContentExtractor {
18    /// Extract content within specified tags
19    fn extract_tagged(&self, text: &str, tag: &str) -> Option<String>;
20
21    /// Extract JSON-like content
22    fn extract_json_like(&self, text: &str) -> Option<String>;
23
24    /// Extract using custom pattern
25    fn extract_pattern(&self, text: &str, pattern: &str) -> Option<String>;
26}
27
28/// Extraction strategy configuration
29#[derive(Debug, Clone, Serialize, Deserialize)]
30pub enum ExtractionStrategy {
31    /// Extract content within XML-like tags: <tag>content</tag>
32    TaggedContent(String),
33
34    /// Extract content within JSON braces: {...}
35    JsonBrackets,
36
37    /// Find first complete JSON object
38    FirstJsonObject,
39
40    /// Search for specific keywords and determine type
41    KeywordSearch(Vec<String>),
42
43    /// Use regex pattern for extraction
44    RegexPattern(String),
45
46    /// Return original text as-is
47    OriginalText,
48}
49
50/// Configuration for response parsing
51#[derive(Debug, Clone)]
52pub struct ParsingConfig {
53    /// Primary tag to look for (e.g., "answer", "response")
54    pub primary_tag: String,
55
56    /// Ordered list of extraction strategies to try
57    pub extraction_strategies: Vec<ExtractionStrategy>,
58
59    /// Whether to enable debug logging
60    pub debug_mode: bool,
61
62    /// Maximum content length to process
63    pub max_content_length: Option<usize>,
64}
65
66impl Default for ParsingConfig {
67    fn default() -> Self {
68        Self {
69            primary_tag: "answer".to_string(),
70            extraction_strategies: vec![
71                ExtractionStrategy::TaggedContent("answer".to_string()),
72                ExtractionStrategy::JsonBrackets,
73                ExtractionStrategy::OriginalText,
74            ],
75            debug_mode: false,
76            max_content_length: Some(50_000), // 50KB limit
77        }
78    }
79}
80
81impl ParsingConfig {
82    /// Create new config with custom tag
83    pub fn with_tag(tag: &str) -> Self {
84        Self {
85            primary_tag: tag.to_string(),
86            extraction_strategies: vec![ExtractionStrategy::TaggedContent(tag.to_string())],
87            ..Default::default()
88        }
89    }
90
91    /// Add extraction strategy
92    pub fn add_strategy(mut self, strategy: ExtractionStrategy) -> Self {
93        self.extraction_strategies.push(strategy);
94        self
95    }
96
97    /// Enable debug mode
98    pub fn with_debug(mut self) -> Self {
99        self.debug_mode = true;
100        self
101    }
102}