agent_chain_core/output_parsers/
base.rs

1//! Base parser for language model outputs.
2//!
3//! This module contains the base traits and types for output parsers,
4//! mirroring `langchain_core.output_parsers.base`.
5
6use std::fmt::Debug;
7
8use async_trait::async_trait;
9use serde::{Deserialize, Serialize};
10use serde_json::Value;
11
12use crate::error::{Error, Result};
13use crate::messages::BaseMessage;
14use crate::outputs::{ChatGeneration, Generation};
15use crate::prompt_values::PromptValue;
16use crate::runnables::RunnableConfig;
17
18/// Abstract base trait for parsing the outputs of a model.
19///
20/// This is the most basic output parser trait. It requires implementing
21/// `parse_result` which takes a list of candidate `Generation` objects
22/// and parses them into a specific format.
23#[async_trait]
24pub trait BaseLLMOutputParser: Send + Sync + Debug {
25    /// The output type of this parser.
26    type Output: Send + Sync + Clone + Debug;
27
28    /// Parse a list of candidate model `Generation` objects into a specific format.
29    ///
30    /// # Arguments
31    ///
32    /// * `result` - A list of `Generation` to be parsed. The `Generation` objects are
33    ///   assumed to be different candidate outputs for a single model input.
34    /// * `partial` - Whether to parse the output as a partial result. This is useful
35    ///   for parsers that can parse partial results.
36    ///
37    /// # Returns
38    ///
39    /// Structured output.
40    fn parse_result(&self, result: &[Generation], partial: bool) -> Result<Self::Output>;
41
42    /// Async parse a list of candidate model `Generation` objects into a specific format.
43    ///
44    /// Default implementation calls the sync version.
45    async fn aparse_result(&self, result: &[Generation], partial: bool) -> Result<Self::Output> {
46        self.parse_result(result, partial)
47    }
48}
49
50/// Base trait to parse the output of an LLM call.
51///
52/// `BaseGenerationOutputParser` extends `BaseLLMOutputParser` and integrates with
53/// the Runnable interface. It processes raw generation outputs from language models.
54#[async_trait]
55pub trait BaseGenerationOutputParser: BaseLLMOutputParser {
56    /// Invoke the parser on a string or message input.
57    ///
58    /// For string inputs, creates a `Generation` with the text.
59    /// For message inputs, creates a `ChatGeneration` with the message,
60    /// matching the Python implementation.
61    ///
62    /// # Arguments
63    ///
64    /// * `input` - Either a string or a BaseMessage.
65    /// * `config` - Optional runnable configuration.
66    fn invoke(&self, input: BaseMessage, _config: Option<RunnableConfig>) -> Result<Self::Output> {
67        // Match Python: use ChatGeneration for message inputs
68        let chat_gen = ChatGeneration::new(input);
69        self.parse_result(&[Generation::new(&chat_gen.text)], false)
70    }
71
72    /// Async invoke the parser on a string or message input.
73    async fn ainvoke(
74        &self,
75        input: BaseMessage,
76        config: Option<RunnableConfig>,
77    ) -> Result<Self::Output> {
78        self.invoke(input, config)
79    }
80}
81
82/// Base trait to parse the output of an LLM call.
83///
84/// Output parsers help structure language model responses.
85/// This is the main trait that most output parsers implement.
86///
87/// # Example
88///
89/// ```ignore
90/// struct BooleanOutputParser {
91///     true_val: String,
92///     false_val: String,
93/// }
94///
95/// impl BaseOutputParser for BooleanOutputParser {
96///     type Output = bool;
97///
98///     fn parse(&self, text: &str) -> Result<bool> {
99///         let cleaned_text = text.trim().to_uppercase();
100///         if cleaned_text == self.true_val.to_uppercase() {
101///             Ok(true)
102///         } else if cleaned_text == self.false_val.to_uppercase() {
103///             Ok(false)
104///         } else {
105///             Err(OutputParserError::parse_error(format!(
106///                 "Expected {} or {}, got {}",
107///                 self.true_val, self.false_val, cleaned_text
108///             )).into())
109///         }
110///     }
111///
112///     fn parser_type(&self) -> &str {
113///         "boolean_output_parser"
114///     }
115/// }
116/// ```
117#[async_trait]
118pub trait BaseOutputParser: Send + Sync + Debug {
119    /// The output type of this parser.
120    type Output: Send + Sync + Clone + Debug;
121
122    /// Parse a single string model output into some structure.
123    ///
124    /// # Arguments
125    ///
126    /// * `text` - String output of a language model.
127    ///
128    /// # Returns
129    ///
130    /// Structured output.
131    fn parse(&self, text: &str) -> Result<Self::Output>;
132
133    /// Async parse a single string model output into some structure.
134    ///
135    /// Default implementation calls the sync version.
136    async fn aparse(&self, text: &str) -> Result<Self::Output> {
137        self.parse(text)
138    }
139
140    /// Parse a list of candidate model `Generation` objects into a specific format.
141    ///
142    /// The return value is parsed from only the first `Generation` in the result,
143    /// which is assumed to be the highest-likelihood `Generation`.
144    ///
145    /// # Arguments
146    ///
147    /// * `result` - A list of `Generation` to be parsed.
148    /// * `partial` - Whether to parse the output as a partial result.
149    ///
150    /// # Panics
151    ///
152    /// This method will panic if `result` is empty, matching the Python behavior
153    /// which raises an IndexError when accessing `result[0]` on an empty list.
154    fn parse_result(&self, result: &[Generation], _partial: bool) -> Result<Self::Output> {
155        // Match Python behavior: access result[0] directly (panics if empty)
156        self.parse(&result[0].text)
157    }
158
159    /// Async parse a list of candidate model `Generation` objects into a specific format.
160    async fn aparse_result(&self, result: &[Generation], partial: bool) -> Result<Self::Output> {
161        self.parse_result(result, partial)
162    }
163
164    /// Parse the output of an LLM call with the input prompt for context.
165    ///
166    /// The prompt is largely provided in the event the `OutputParser` wants
167    /// to retry or fix the output in some way, and needs information from
168    /// the prompt to do so.
169    ///
170    /// # Arguments
171    ///
172    /// * `completion` - String output of a language model.
173    /// * `prompt` - Input `PromptValue`.
174    fn parse_with_prompt(
175        &self,
176        completion: &str,
177        _prompt: &dyn PromptValue,
178    ) -> Result<Self::Output> {
179        self.parse(completion)
180    }
181
182    /// Instructions on how the LLM output should be formatted.
183    ///
184    /// # Errors
185    ///
186    /// Returns an error if format instructions are not implemented for this parser.
187    /// Subclasses should override this method to provide format instructions.
188    fn get_format_instructions(&self) -> Result<String> {
189        Err(Error::Other(
190            "get_format_instructions not implemented".to_string(),
191        ))
192    }
193
194    /// Return the output parser type for serialization.
195    fn parser_type(&self) -> &str;
196
197    /// Invoke the parser on input.
198    ///
199    /// For string inputs, creates a `Generation` with the text.
200    /// For message inputs, creates a `ChatGeneration` with the message,
201    /// matching the Python implementation.
202    fn invoke(&self, input: BaseMessage, _config: Option<RunnableConfig>) -> Result<Self::Output> {
203        // Match Python: use ChatGeneration for message inputs
204        let chat_gen = ChatGeneration::new(input);
205        // ChatGeneration has a text field that extracts content from message
206        self.parse_result(&[Generation::new(&chat_gen.text)], false)
207    }
208
209    /// Async invoke the parser on input.
210    async fn ainvoke(
211        &self,
212        input: BaseMessage,
213        config: Option<RunnableConfig>,
214    ) -> Result<Self::Output> {
215        self.invoke(input, config)
216    }
217}
218
219/// Error type for output parser operations.
220#[derive(Debug, Clone, Serialize, Deserialize)]
221pub struct OutputParserError {
222    /// The error message.
223    pub message: String,
224    /// The raw LLM output that caused the error.
225    pub llm_output: Option<String>,
226    /// Whether the error is retryable.
227    pub send_to_llm: bool,
228    /// Observation to send back to the LLM if retrying.
229    pub observation: Option<String>,
230}
231
232impl OutputParserError {
233    /// Create a new output parser error.
234    pub fn new(message: impl Into<String>) -> Self {
235        Self {
236            message: message.into(),
237            llm_output: None,
238            send_to_llm: false,
239            observation: None,
240        }
241    }
242
243    /// Create a parse error with the LLM output.
244    pub fn parse_error(message: impl Into<String>, llm_output: impl Into<String>) -> Self {
245        Self {
246            message: message.into(),
247            llm_output: Some(llm_output.into()),
248            send_to_llm: false,
249            observation: None,
250        }
251    }
252
253    /// Set whether this error should be sent back to the LLM.
254    pub fn with_send_to_llm(mut self, send: bool) -> Self {
255        self.send_to_llm = send;
256        self
257    }
258
259    /// Set the observation to send back to the LLM.
260    pub fn with_observation(mut self, observation: impl Into<String>) -> Self {
261        self.observation = Some(observation.into());
262        self
263    }
264
265    /// Set the LLM output.
266    pub fn with_llm_output(mut self, llm_output: impl Into<String>) -> Self {
267        self.llm_output = Some(llm_output.into());
268        self
269    }
270}
271
272impl std::fmt::Display for OutputParserError {
273    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
274        write!(f, "{}", self.message)
275    }
276}
277
278impl std::error::Error for OutputParserError {}
279
280impl From<OutputParserError> for Error {
281    fn from(err: OutputParserError) -> Self {
282        Error::Other(err.message)
283    }
284}
285
286/// Convert a Generation to a Value for JSON operations.
287pub fn generation_to_value(generation: &Generation) -> Value {
288    serde_json::json!({
289        "text": generation.text,
290        "generation_info": generation.generation_info,
291    })
292}
293
294/// Convert a ChatGeneration to a Value for JSON operations.
295pub fn chat_generation_to_value(generation: &ChatGeneration) -> Value {
296    serde_json::json!({
297        "text": generation.text,
298        "message": generation.message,
299        "generation_info": generation.generation_info,
300    })
301}
302
303#[cfg(test)]
304mod tests {
305    use super::*;
306
307    #[derive(Debug)]
308    struct TestParser;
309
310    impl BaseOutputParser for TestParser {
311        type Output = String;
312
313        fn parse(&self, text: &str) -> Result<String> {
314            Ok(text.to_uppercase())
315        }
316
317        fn parser_type(&self) -> &str {
318            "test"
319        }
320    }
321
322    #[test]
323    fn test_base_output_parser() {
324        let parser = TestParser;
325        let result = parser.parse("hello").unwrap();
326        assert_eq!(result, "HELLO");
327    }
328
329    #[test]
330    fn test_parse_result() {
331        let parser = TestParser;
332        let generations = vec![Generation::new("hello")];
333        let result = parser.parse_result(&generations, false).unwrap();
334        assert_eq!(result, "HELLO");
335    }
336
337    #[test]
338    fn test_output_parser_error() {
339        let err = OutputParserError::parse_error("Invalid JSON", "{invalid}");
340        assert_eq!(err.message, "Invalid JSON");
341        assert_eq!(err.llm_output, Some("{invalid}".to_string()));
342    }
343}