tryparse/
lib.rs

1//! # tryparse
2//!
3//! A forgiving parser that converts messy LLM responses into strongly-typed Rust structs.
4//!
5//! This library handles common issues in LLM outputs like:
6//! - JSON wrapped in markdown code blocks
7//! - Trailing commas
8//! - Single quotes instead of double quotes
9//! - Unquoted object keys
10//! - Type mismatches (string numbers, etc.)
11//!
12//! ## Quick Start
13//!
14//! ```rust
15//! use tryparse::parse;
16//! use serde::Deserialize;
17//!
18//! #[derive(Deserialize, Debug)]
19//! struct User {
20//!     name: String,
21//!     age: u32,
22//! }
23//!
24//! // Parse messy LLM output with unquoted keys and string numbers
25//! let messy_response = r#"{name: "Alice", age: "30"}"#;
26//!
27//! let user: User = parse(messy_response).unwrap();
28//! assert_eq!(user.name, "Alice");
29//! assert_eq!(user.age, 30); // Automatically coerced from string
30//! ```
31//!
32//! ## Features
33//!
34//! - **Multi-Strategy Parsing**: Tries multiple approaches to extract JSON
35//! - **Smart Type Coercion**: Converts between compatible types automatically
36//! - **Transformation Tracking**: Records all modifications made during parsing
37//! - **Candidate Scoring**: Ranks multiple interpretations by quality
38//! - **Zero Configuration**: Works out of the box with sensible defaults
39//!
40//! ## Advanced Usage
41//!
42//! For more control over the parsing process:
43//!
44//! ```rust
45//! use tryparse::{parse_with_candidates, parser::FlexibleParser};
46//! use serde::Deserialize;
47//!
48//! #[derive(Deserialize)]
49//! struct Data {
50//!     value: i32,
51//! }
52//!
53//! let response = r#"{"value": "42"}"#;
54//!
55//! // Get all candidates with metadata
56//! let (result, candidates) = parse_with_candidates::<Data>(response).unwrap();
57//!
58//! // Or use the parser directly
59//! let parser = FlexibleParser::new();
60//! let flex_values = parser.parse(response).unwrap();
61//! ```
62
63pub mod constraints;
64pub mod deserializer;
65pub mod error;
66pub mod parser;
67pub mod scoring;
68pub mod value;
69
70// Ensure primitive type implementations are linked
71// This prevents "trait bound not satisfied" errors in integration tests
72#[doc(hidden)]
73pub fn __ensure_primitives_linked() {
74    deserializer::primitives::__ensure_linked();
75}
76
77use std::time::{Duration, Instant};
78
79use deserializer::{CoercingDeserializer, CoercionContext, LlmDeserialize};
80use error::{ParseError, Result};
81use parser::FlexibleParser;
82use serde::de::DeserializeOwned;
83
84/// Metadata about a parsing operation.
85///
86/// This provides insight into which strategy succeeded and how long parsing took.
87#[derive(Debug, Clone)]
88pub struct ParseMetadata {
89    /// Which parsing strategy produced the successful candidate.
90    pub strategy_used: String,
91    /// Total parse duration.
92    pub duration: Duration,
93    /// Number of candidates evaluated before finding a match.
94    pub candidates_evaluated: usize,
95    /// Total number of candidates produced by all strategies.
96    pub total_candidates: usize,
97    /// Score of the winning candidate (lower is better).
98    pub winning_score: u32,
99}
100use value::FlexValue;
101
102/// Parses an LLM response into a strongly-typed Rust struct.
103///
104/// This is the main entry point for the library. It combines flexible parsing
105/// with smart type coercion to handle messy LLM outputs.
106///
107/// # Examples
108///
109/// ```
110/// use tryparse::parse;
111/// use serde::Deserialize;
112///
113/// #[derive(Deserialize, Debug, PartialEq)]
114/// struct User {
115///     name: String,
116///     age: u32,
117/// }
118///
119/// let response = r#"{"name": "Alice", "age": "30"}"#;
120/// let user: User = parse(response).unwrap();
121/// assert_eq!(user, User { name: "Alice".into(), age: 30 });
122/// ```
123///
124/// # Errors
125///
126/// Returns `ParseError::NoCandidates` if no valid JSON could be extracted.
127/// Returns `ParseError::DeserializeFailed` if deserialization fails for all candidates.
128pub fn parse<T: DeserializeOwned>(input: &str) -> Result<T> {
129    let (result, _candidates) = parse_with_candidates(input)?;
130    Ok(result)
131}
132
133/// Parses an LLM response and returns both the result and all candidates.
134///
135/// This variant provides access to all parsing candidates with their metadata,
136/// allowing inspection of what transformations were applied.
137///
138/// # Examples
139///
140/// ```
141/// use tryparse::parse_with_candidates;
142/// use serde::Deserialize;
143///
144/// #[derive(Deserialize)]
145/// struct Data {
146///     value: i32,
147/// }
148///
149/// let response = r#"{"value": "42"}"#;
150/// let (data, candidates) = parse_with_candidates::<Data>(response).unwrap();
151///
152/// assert_eq!(data.value, 42);
153/// assert!(!candidates.is_empty());
154/// ```
155///
156/// # Errors
157///
158/// Returns `ParseError::NoCandidates` if no valid JSON could be extracted.
159/// Returns `ParseError::DeserializeFailed` if deserialization fails for all candidates.
160pub fn parse_with_candidates<T: DeserializeOwned>(input: &str) -> Result<(T, Vec<FlexValue>)> {
161    let parser = FlexibleParser::new();
162    let candidates = parser.parse(input)?;
163
164    if candidates.is_empty() {
165        return Err(ParseError::NoCandidates);
166    }
167
168    // Try to deserialize each candidate
169    let mut errors = Vec::new();
170    let ranked = scoring::rank_candidates(candidates);
171
172    // Clone each candidate individually instead of cloning entire vector upfront.
173    // This is more efficient when early candidates succeed (common case).
174    for i in 0..ranked.len() {
175        let candidate = &ranked[i];
176        let mut deserializer = CoercingDeserializer::new(candidate.clone());
177        match T::deserialize(&mut deserializer) {
178            Ok(value) => {
179                return Ok((value, ranked));
180            }
181            Err(e) => {
182                // Collect detailed error information for this candidate
183                let source_name = match &candidate.source {
184                    value::Source::Direct => "direct".to_string(),
185                    value::Source::Markdown { lang } => {
186                        format!("markdown({})", lang.as_deref().unwrap_or(""))
187                    }
188                    value::Source::Fixed { .. } => "fixed".to_string(),
189                    value::Source::MultiJson { index } => format!("multi_json[{}]", index),
190                    value::Source::MultiJsonArray => "multi_json_array".to_string(),
191                    value::Source::Heuristic { pattern } => format!("heuristic({})", pattern),
192                    value::Source::Yaml => "yaml".to_string(),
193                };
194                let preview: String = candidate.value.to_string().chars().take(100).collect();
195                let score = scoring::score_candidate(candidate);
196
197                errors.push(error::CandidateError {
198                    source: source_name,
199                    score,
200                    preview,
201                    error: e,
202                });
203            }
204        }
205    }
206
207    // All candidates failed - return aggregated errors
208    Err(ParseError::AllCandidatesFailed(error::AllCandidatesError {
209        attempts: errors,
210    }))
211}
212
213/// Parses an LLM response and returns metadata about the parsing process.
214///
215/// This variant provides visibility into which strategy was used, timing information,
216/// and candidate evaluation details for debugging and observability.
217///
218/// # Examples
219///
220/// ```
221/// use tryparse::parse_with_metadata;
222/// use serde::Deserialize;
223///
224/// #[derive(Deserialize)]
225/// struct Data {
226///     value: i32,
227/// }
228///
229/// let response = r#"{"value": 42}"#;
230/// let (data, metadata) = parse_with_metadata::<Data>(response).unwrap();
231///
232/// println!("Strategy used: {}", metadata.strategy_used);
233/// println!("Duration: {:?}", metadata.duration);
234/// println!("Candidates evaluated: {}", metadata.candidates_evaluated);
235/// ```
236///
237/// # Errors
238///
239/// Returns `ParseError::NoCandidates` if no valid JSON could be extracted.
240/// Returns `ParseError::AllCandidatesFailed` if deserialization fails for all candidates.
241pub fn parse_with_metadata<T: DeserializeOwned>(input: &str) -> Result<(T, ParseMetadata)> {
242    let start = Instant::now();
243
244    let parser = FlexibleParser::new();
245    let candidates = parser.parse(input)?;
246
247    if candidates.is_empty() {
248        return Err(ParseError::NoCandidates);
249    }
250
251    let total_candidates = candidates.len();
252    let ranked = scoring::rank_candidates(candidates);
253
254    let mut errors = Vec::new();
255
256    for (idx, candidate) in ranked.iter().enumerate() {
257        let candidates_evaluated = idx + 1;
258        let mut deserializer = CoercingDeserializer::new(candidate.clone());
259        match T::deserialize(&mut deserializer) {
260            Ok(value) => {
261                let source_name = match &candidate.source {
262                    value::Source::Direct => "direct".to_string(),
263                    value::Source::Markdown { lang } => {
264                        format!("markdown({})", lang.as_deref().unwrap_or(""))
265                    }
266                    value::Source::Fixed { .. } => "fixed".to_string(),
267                    value::Source::MultiJson { index } => format!("multi_json[{}]", index),
268                    value::Source::MultiJsonArray => "multi_json_array".to_string(),
269                    value::Source::Heuristic { pattern } => format!("heuristic({})", pattern),
270                    value::Source::Yaml => "yaml".to_string(),
271                };
272
273                let metadata = ParseMetadata {
274                    strategy_used: source_name,
275                    duration: start.elapsed(),
276                    candidates_evaluated,
277                    total_candidates,
278                    winning_score: scoring::score_candidate(candidate),
279                };
280
281                return Ok((value, metadata));
282            }
283            Err(e) => {
284                errors.push(e);
285            }
286        }
287    }
288
289    // All candidates failed
290    Err(ParseError::AllCandidatesFailed(error::AllCandidatesError {
291        attempts: ranked
292            .iter()
293            .zip(errors)
294            .map(|(candidate, error)| {
295                let source_name = match &candidate.source {
296                    value::Source::Direct => "direct".to_string(),
297                    value::Source::Markdown { lang } => {
298                        format!("markdown({})", lang.as_deref().unwrap_or(""))
299                    }
300                    value::Source::Fixed { .. } => "fixed".to_string(),
301                    value::Source::MultiJson { index } => format!("multi_json[{}]", index),
302                    value::Source::MultiJsonArray => "multi_json_array".to_string(),
303                    value::Source::Heuristic { pattern } => format!("heuristic({})", pattern),
304                    value::Source::Yaml => "yaml".to_string(),
305                };
306                error::CandidateError {
307                    source: source_name,
308                    score: scoring::score_candidate(candidate),
309                    preview: candidate.value.to_string().chars().take(100).collect(),
310                    error,
311                }
312            })
313            .collect(),
314    }))
315}
316
317/// Parses an LLM response using a custom parser.
318///
319/// This allows you to configure the parsing strategies used.
320///
321/// # Examples
322///
323/// ```
324/// use tryparse::{parse_with_parser, parser::FlexibleParser};
325/// use serde::Deserialize;
326///
327/// #[derive(Deserialize)]
328/// struct Data {
329///     value: i32,
330/// }
331///
332/// let parser = FlexibleParser::new();
333/// let response = r#"{"value": 42}"#;
334/// let data: Data = parse_with_parser(response, &parser).unwrap();
335/// ```
336pub fn parse_with_parser<T: DeserializeOwned>(input: &str, parser: &FlexibleParser) -> Result<T> {
337    let candidates = parser.parse(input)?;
338
339    if candidates.is_empty() {
340        return Err(ParseError::NoCandidates);
341    }
342
343    let ranked = scoring::rank_candidates(candidates);
344
345    for candidate in ranked {
346        let mut deserializer = CoercingDeserializer::new(candidate);
347        if let Ok(value) = T::deserialize(&mut deserializer) {
348            return Ok(value);
349        }
350    }
351
352    Err(ParseError::NoCandidates)
353}
354
355// ================================================================================================
356// LlmDeserialize API
357// ================================================================================================
358
359/// Parses an LLM response using BAML's deserialization algorithms.
360///
361/// This function uses the custom `LlmDeserialize` trait which provides:
362/// - Fuzzy field matching (camelCase ↔ snake_case)
363/// - Enum variant fuzzy matching
364/// - Union type scoring
365/// - Two-mode coercion (strict + lenient)
366/// - Circular reference detection
367///
368/// # Examples
369///
370/// ```rust
371/// use tryparse::parse_llm;
372///
373/// #[cfg(feature = "derive")]
374/// use tryparse::deserializer::LlmDeserialize;
375///
376/// #[cfg(feature = "derive")]
377/// use tryparse_derive::LlmDeserialize;
378///
379/// #[cfg(feature = "derive")]
380/// #[derive(Debug, serde::Deserialize, LlmDeserialize, PartialEq)]
381/// struct User {
382///     name: String,
383///     age: i64,
384/// }
385///
386/// #[cfg(feature = "derive")]
387/// {
388///     // Type coercion: age as string → i64
389///     let response = r#"{"name": "Alice", "age": "30"}"#;
390///     let user: User = parse_llm(response).unwrap();
391///     assert_eq!(user.name, "Alice");
392///     assert_eq!(user.age, 30);
393/// }
394/// ```
395///
396/// # Errors
397///
398/// Returns `ParseError::NoCandidates` if no valid JSON could be extracted.
399/// Returns `ParseError::DeserializeFailed` if deserialization fails for all candidates.
400pub fn parse_llm<T: LlmDeserialize>(input: &str) -> Result<T> {
401    let (result, _candidates) = parse_llm_with_candidates(input)?;
402    Ok(result)
403}
404
405/// Parses an LLM response using BAML's algorithms and returns all candidates.
406///
407/// This variant provides access to all parsing candidates with their metadata,
408/// showing what transformations were applied by the fuzzy matching system.
409///
410/// # Examples
411///
412/// ```rust
413/// use tryparse::parse_llm_with_candidates;
414///
415/// #[cfg(feature = "derive")]
416/// use tryparse::deserializer::LlmDeserialize;
417///
418/// #[cfg(feature = "derive")]
419/// use tryparse_derive::LlmDeserialize;
420///
421/// #[cfg(feature = "derive")]
422/// #[derive(serde::Deserialize, LlmDeserialize)]
423/// struct Data {
424///     value: i64,
425/// }
426///
427/// #[cfg(feature = "derive")]
428/// {
429///     let response = r#"{"value": "42"}"#;
430///     let (data, candidates) = parse_llm_with_candidates::<Data>(response).unwrap();
431///     assert_eq!(data.value, 42);
432/// }
433/// ```
434///
435/// # Errors
436///
437/// Returns `ParseError::NoCandidates` if no valid JSON could be extracted.
438/// Returns `ParseError::DeserializeFailed` if deserialization fails for all candidates.
439pub fn parse_llm_with_candidates<T: LlmDeserialize>(input: &str) -> Result<(T, Vec<FlexValue>)> {
440    let parser = FlexibleParser::new();
441    let candidates = parser.parse(input)?;
442
443    if candidates.is_empty() {
444        return Err(ParseError::NoCandidates);
445    }
446
447    // Rank candidates by quality
448    let ranked = scoring::rank_candidates(candidates);
449
450    // BAML TWO-MODE COERCION:
451    // 1. First pass: Try strict deserialization (try_deserialize) on all candidates
452    //    This allows array candidates to win for Vec<T> before single-value wrapping
453    // 2. Second pass: Try lenient deserialization (deserialize) on all candidates
454    //    This applies coercions like single-value wrapping for Vec<T>
455
456    // First pass: Strict mode (try_deserialize)
457    for (idx, candidate) in ranked.iter().enumerate() {
458        let mut ctx = CoercionContext::new();
459        if let Some(value) = T::try_deserialize(candidate, &mut ctx) {
460            // Merge transformations from deserialization into the winning candidate
461            let mut updated_ranked = ranked.clone();
462            for transformation in ctx.transformations() {
463                updated_ranked[idx].add_transformation(transformation.clone());
464            }
465            return Ok((value, updated_ranked));
466        }
467    }
468
469    // Second pass: Lenient mode (deserialize)
470    for (idx, candidate) in ranked.iter().enumerate() {
471        let mut ctx = CoercionContext::new();
472        match T::deserialize(candidate, &mut ctx) {
473            Ok(value) => {
474                // Merge transformations from deserialization into the winning candidate
475                let mut updated_ranked = ranked.clone();
476                for transformation in ctx.transformations() {
477                    updated_ranked[idx].add_transformation(transformation.clone());
478                }
479                return Ok((value, updated_ranked));
480            }
481            Err(_) => {
482                // Continue to next candidate
483                continue;
484            }
485        }
486    }
487
488    // All candidates failed
489    Err(ParseError::NoCandidates)
490}
491
492#[cfg(test)]
493mod tests {
494    use serde::Deserialize;
495
496    use super::*;
497
498    #[derive(Deserialize, Debug, PartialEq)]
499    struct User {
500        name: String,
501        age: u32,
502    }
503
504    #[test]
505    fn test_parse_clean_json() {
506        let input = r#"{"name": "Alice", "age": 30}"#;
507        let user: User = parse(input).unwrap();
508        assert_eq!(user.name, "Alice");
509        assert_eq!(user.age, 30);
510    }
511
512    #[test]
513    fn test_parse_with_type_coercion() {
514        let input = r#"{"name": "Bob", "age": "25"}"#;
515        let user: User = parse(input).unwrap();
516        assert_eq!(user.age, 25);
517    }
518
519    #[test]
520    fn test_parse_markdown() {
521        let input = r#"
522Here's the user:
523```json
524{"name": "Charlie", "age": 35}
525```
526"#;
527        let user: User = parse(input).unwrap();
528        assert_eq!(user.name, "Charlie");
529    }
530
531    #[test]
532    fn test_parse_with_trailing_comma() {
533        let input = r#"{"name": "Dave", "age": 40,}"#;
534        let user: User = parse(input).unwrap();
535        assert_eq!(user.name, "Dave");
536    }
537
538    #[test]
539    fn test_parse_with_unquoted_keys() {
540        let input = r#"{name: "Eve", age: 45}"#;
541        let user: User = parse(input).unwrap();
542        assert_eq!(user.name, "Eve");
543    }
544
545    #[test]
546    fn test_parse_with_single_quotes() {
547        let input = r#"{'name': 'Frank', 'age': 50}"#;
548        let user: User = parse(input).unwrap();
549        assert_eq!(user.name, "Frank");
550    }
551
552    #[test]
553    fn test_parse_with_candidates() {
554        let input = r#"{"name": "Grace", "age": "55"}"#;
555        let (user, candidates): (User, _) = parse_with_candidates(input).unwrap();
556        assert_eq!(user.name, "Grace");
557        assert!(!candidates.is_empty());
558    }
559
560    #[test]
561    fn test_parse_invalid_input() {
562        let input = "This is not JSON at all";
563        let result: Result<User> = parse(input);
564        assert!(result.is_err());
565    }
566
567    #[test]
568    fn test_parse_array() {
569        let input = r#"[{"name": "Alice", "age": 30}, {"name": "Bob", "age": 25}]"#;
570        let users: Vec<User> = parse(input).unwrap();
571        assert_eq!(users.len(), 2);
572    }
573
574    #[test]
575    fn test_parse_nested_struct() {
576        #[derive(Deserialize, Debug, PartialEq)]
577        struct Address {
578            city: String,
579        }
580
581        #[derive(Deserialize, Debug, PartialEq)]
582        struct Person {
583            name: String,
584            address: Address,
585        }
586
587        let input = r#"{"name": "Alice", "address": {"city": "NYC"}}"#;
588        let person: Person = parse(input).unwrap();
589        assert_eq!(person.address.city, "NYC");
590    }
591
592    #[test]
593    fn test_parse_with_custom_parser() {
594        let parser = FlexibleParser::new();
595        let input = r#"{"name": "Alice", "age": 30}"#;
596        let user: User = parse_with_parser(input, &parser).unwrap();
597        assert_eq!(user.name, "Alice");
598    }
599}