tryparse/lib.rs
1//! # tryparse
2//!
3//! A forgiving parser that converts messy LLM responses into strongly-typed Rust structs.
4//!
5//! This library handles common issues in LLM outputs like:
6//! - JSON wrapped in markdown code blocks
7//! - Trailing commas
8//! - Single quotes instead of double quotes
9//! - Unquoted object keys
10//! - Type mismatches (string numbers, etc.)
11//!
12//! ## Quick Start
13//!
14//! ```rust
15//! use tryparse::parse;
16//! use serde::Deserialize;
17//!
18//! #[derive(Deserialize, Debug)]
19//! struct User {
20//! name: String,
21//! age: u32,
22//! }
23//!
24//! // Parse messy LLM output with unquoted keys and string numbers
25//! let messy_response = r#"{name: "Alice", age: "30"}"#;
26//!
27//! let user: User = parse(messy_response).unwrap();
28//! assert_eq!(user.name, "Alice");
29//! assert_eq!(user.age, 30); // Automatically coerced from string
30//! ```
31//!
32//! ## Features
33//!
34//! - **Multi-Strategy Parsing**: Tries multiple approaches to extract JSON
35//! - **Smart Type Coercion**: Converts between compatible types automatically
36//! - **Transformation Tracking**: Records all modifications made during parsing
37//! - **Candidate Scoring**: Ranks multiple interpretations by quality
38//! - **Zero Configuration**: Works out of the box with sensible defaults
39//!
40//! ## Advanced Usage
41//!
42//! For more control over the parsing process:
43//!
44//! ```rust
45//! use tryparse::{parse_with_candidates, parser::FlexibleParser};
46//! use serde::Deserialize;
47//!
48//! #[derive(Deserialize)]
49//! struct Data {
50//! value: i32,
51//! }
52//!
53//! let response = r#"{"value": "42"}"#;
54//!
55//! // Get all candidates with metadata
56//! let (result, candidates) = parse_with_candidates::<Data>(response).unwrap();
57//!
58//! // Or use the parser directly
59//! let parser = FlexibleParser::new();
60//! let flex_values = parser.parse(response).unwrap();
61//! ```
62
63pub mod constraints;
64pub mod deserializer;
65pub mod error;
66pub mod parser;
67pub mod scoring;
68pub mod value;
69
70// Ensure primitive type implementations are linked
71// This prevents "trait bound not satisfied" errors in integration tests
72#[doc(hidden)]
73pub fn __ensure_primitives_linked() {
74 deserializer::primitives::__ensure_linked();
75}
76
77use std::time::{Duration, Instant};
78
79use deserializer::{CoercingDeserializer, CoercionContext, LlmDeserialize};
80use error::{ParseError, Result};
81use parser::FlexibleParser;
82use serde::de::DeserializeOwned;
83
84/// Metadata about a parsing operation.
85///
86/// This provides insight into which strategy succeeded and how long parsing took.
87#[derive(Debug, Clone)]
88pub struct ParseMetadata {
89 /// Which parsing strategy produced the successful candidate.
90 pub strategy_used: String,
91 /// Total parse duration.
92 pub duration: Duration,
93 /// Number of candidates evaluated before finding a match.
94 pub candidates_evaluated: usize,
95 /// Total number of candidates produced by all strategies.
96 pub total_candidates: usize,
97 /// Score of the winning candidate (lower is better).
98 pub winning_score: u32,
99}
100use value::FlexValue;
101
102/// Parses an LLM response into a strongly-typed Rust struct.
103///
104/// This is the main entry point for the library. It combines flexible parsing
105/// with smart type coercion to handle messy LLM outputs.
106///
107/// # Examples
108///
109/// ```
110/// use tryparse::parse;
111/// use serde::Deserialize;
112///
113/// #[derive(Deserialize, Debug, PartialEq)]
114/// struct User {
115/// name: String,
116/// age: u32,
117/// }
118///
119/// let response = r#"{"name": "Alice", "age": "30"}"#;
120/// let user: User = parse(response).unwrap();
121/// assert_eq!(user, User { name: "Alice".into(), age: 30 });
122/// ```
123///
124/// # Errors
125///
126/// Returns `ParseError::NoCandidates` if no valid JSON could be extracted.
127/// Returns `ParseError::DeserializeFailed` if deserialization fails for all candidates.
128pub fn parse<T: DeserializeOwned>(input: &str) -> Result<T> {
129 let (result, _candidates) = parse_with_candidates(input)?;
130 Ok(result)
131}
132
133/// Parses an LLM response and returns both the result and all candidates.
134///
135/// This variant provides access to all parsing candidates with their metadata,
136/// allowing inspection of what transformations were applied.
137///
138/// # Examples
139///
140/// ```
141/// use tryparse::parse_with_candidates;
142/// use serde::Deserialize;
143///
144/// #[derive(Deserialize)]
145/// struct Data {
146/// value: i32,
147/// }
148///
149/// let response = r#"{"value": "42"}"#;
150/// let (data, candidates) = parse_with_candidates::<Data>(response).unwrap();
151///
152/// assert_eq!(data.value, 42);
153/// assert!(!candidates.is_empty());
154/// ```
155///
156/// # Errors
157///
158/// Returns `ParseError::NoCandidates` if no valid JSON could be extracted.
159/// Returns `ParseError::DeserializeFailed` if deserialization fails for all candidates.
160pub fn parse_with_candidates<T: DeserializeOwned>(input: &str) -> Result<(T, Vec<FlexValue>)> {
161 let parser = FlexibleParser::new();
162 let candidates = parser.parse(input)?;
163
164 if candidates.is_empty() {
165 return Err(ParseError::NoCandidates);
166 }
167
168 // Try to deserialize each candidate
169 let mut errors = Vec::new();
170 let ranked = scoring::rank_candidates(candidates);
171
172 // Clone each candidate individually instead of cloning entire vector upfront.
173 // This is more efficient when early candidates succeed (common case).
174 for i in 0..ranked.len() {
175 let candidate = &ranked[i];
176 let mut deserializer = CoercingDeserializer::new(candidate.clone());
177 match T::deserialize(&mut deserializer) {
178 Ok(value) => {
179 return Ok((value, ranked));
180 }
181 Err(e) => {
182 // Collect detailed error information for this candidate
183 let source_name = match &candidate.source {
184 value::Source::Direct => "direct".to_string(),
185 value::Source::Markdown { lang } => {
186 format!("markdown({})", lang.as_deref().unwrap_or(""))
187 }
188 value::Source::Fixed { .. } => "fixed".to_string(),
189 value::Source::MultiJson { index } => format!("multi_json[{}]", index),
190 value::Source::MultiJsonArray => "multi_json_array".to_string(),
191 value::Source::Heuristic { pattern } => format!("heuristic({})", pattern),
192 value::Source::Yaml => "yaml".to_string(),
193 };
194 let preview: String = candidate.value.to_string().chars().take(100).collect();
195 let score = scoring::score_candidate(candidate);
196
197 errors.push(error::CandidateError {
198 source: source_name,
199 score,
200 preview,
201 error: e,
202 });
203 }
204 }
205 }
206
207 // All candidates failed - return aggregated errors
208 Err(ParseError::AllCandidatesFailed(error::AllCandidatesError {
209 attempts: errors,
210 }))
211}
212
213/// Parses an LLM response and returns metadata about the parsing process.
214///
215/// This variant provides visibility into which strategy was used, timing information,
216/// and candidate evaluation details for debugging and observability.
217///
218/// # Examples
219///
220/// ```
221/// use tryparse::parse_with_metadata;
222/// use serde::Deserialize;
223///
224/// #[derive(Deserialize)]
225/// struct Data {
226/// value: i32,
227/// }
228///
229/// let response = r#"{"value": 42}"#;
230/// let (data, metadata) = parse_with_metadata::<Data>(response).unwrap();
231///
232/// println!("Strategy used: {}", metadata.strategy_used);
233/// println!("Duration: {:?}", metadata.duration);
234/// println!("Candidates evaluated: {}", metadata.candidates_evaluated);
235/// ```
236///
237/// # Errors
238///
239/// Returns `ParseError::NoCandidates` if no valid JSON could be extracted.
240/// Returns `ParseError::AllCandidatesFailed` if deserialization fails for all candidates.
241pub fn parse_with_metadata<T: DeserializeOwned>(input: &str) -> Result<(T, ParseMetadata)> {
242 let start = Instant::now();
243
244 let parser = FlexibleParser::new();
245 let candidates = parser.parse(input)?;
246
247 if candidates.is_empty() {
248 return Err(ParseError::NoCandidates);
249 }
250
251 let total_candidates = candidates.len();
252 let ranked = scoring::rank_candidates(candidates);
253
254 let mut errors = Vec::new();
255
256 for (idx, candidate) in ranked.iter().enumerate() {
257 let candidates_evaluated = idx + 1;
258 let mut deserializer = CoercingDeserializer::new(candidate.clone());
259 match T::deserialize(&mut deserializer) {
260 Ok(value) => {
261 let source_name = match &candidate.source {
262 value::Source::Direct => "direct".to_string(),
263 value::Source::Markdown { lang } => {
264 format!("markdown({})", lang.as_deref().unwrap_or(""))
265 }
266 value::Source::Fixed { .. } => "fixed".to_string(),
267 value::Source::MultiJson { index } => format!("multi_json[{}]", index),
268 value::Source::MultiJsonArray => "multi_json_array".to_string(),
269 value::Source::Heuristic { pattern } => format!("heuristic({})", pattern),
270 value::Source::Yaml => "yaml".to_string(),
271 };
272
273 let metadata = ParseMetadata {
274 strategy_used: source_name,
275 duration: start.elapsed(),
276 candidates_evaluated,
277 total_candidates,
278 winning_score: scoring::score_candidate(candidate),
279 };
280
281 return Ok((value, metadata));
282 }
283 Err(e) => {
284 errors.push(e);
285 }
286 }
287 }
288
289 // All candidates failed
290 Err(ParseError::AllCandidatesFailed(error::AllCandidatesError {
291 attempts: ranked
292 .iter()
293 .zip(errors)
294 .map(|(candidate, error)| {
295 let source_name = match &candidate.source {
296 value::Source::Direct => "direct".to_string(),
297 value::Source::Markdown { lang } => {
298 format!("markdown({})", lang.as_deref().unwrap_or(""))
299 }
300 value::Source::Fixed { .. } => "fixed".to_string(),
301 value::Source::MultiJson { index } => format!("multi_json[{}]", index),
302 value::Source::MultiJsonArray => "multi_json_array".to_string(),
303 value::Source::Heuristic { pattern } => format!("heuristic({})", pattern),
304 value::Source::Yaml => "yaml".to_string(),
305 };
306 error::CandidateError {
307 source: source_name,
308 score: scoring::score_candidate(candidate),
309 preview: candidate.value.to_string().chars().take(100).collect(),
310 error,
311 }
312 })
313 .collect(),
314 }))
315}
316
317/// Parses an LLM response using a custom parser.
318///
319/// This allows you to configure the parsing strategies used.
320///
321/// # Examples
322///
323/// ```
324/// use tryparse::{parse_with_parser, parser::FlexibleParser};
325/// use serde::Deserialize;
326///
327/// #[derive(Deserialize)]
328/// struct Data {
329/// value: i32,
330/// }
331///
332/// let parser = FlexibleParser::new();
333/// let response = r#"{"value": 42}"#;
334/// let data: Data = parse_with_parser(response, &parser).unwrap();
335/// ```
336pub fn parse_with_parser<T: DeserializeOwned>(input: &str, parser: &FlexibleParser) -> Result<T> {
337 let candidates = parser.parse(input)?;
338
339 if candidates.is_empty() {
340 return Err(ParseError::NoCandidates);
341 }
342
343 let ranked = scoring::rank_candidates(candidates);
344
345 for candidate in ranked {
346 let mut deserializer = CoercingDeserializer::new(candidate);
347 if let Ok(value) = T::deserialize(&mut deserializer) {
348 return Ok(value);
349 }
350 }
351
352 Err(ParseError::NoCandidates)
353}
354
355// ================================================================================================
356// LlmDeserialize API
357// ================================================================================================
358
359/// Parses an LLM response using BAML's deserialization algorithms.
360///
361/// This function uses the custom `LlmDeserialize` trait which provides:
362/// - Fuzzy field matching (camelCase ↔ snake_case)
363/// - Enum variant fuzzy matching
364/// - Union type scoring
365/// - Two-mode coercion (strict + lenient)
366/// - Circular reference detection
367///
368/// # Examples
369///
370/// ```rust
371/// use tryparse::parse_llm;
372///
373/// #[cfg(feature = "derive")]
374/// use tryparse::deserializer::LlmDeserialize;
375///
376/// #[cfg(feature = "derive")]
377/// use tryparse_derive::LlmDeserialize;
378///
379/// #[cfg(feature = "derive")]
380/// #[derive(Debug, serde::Deserialize, LlmDeserialize, PartialEq)]
381/// struct User {
382/// name: String,
383/// age: i64,
384/// }
385///
386/// #[cfg(feature = "derive")]
387/// {
388/// // Type coercion: age as string → i64
389/// let response = r#"{"name": "Alice", "age": "30"}"#;
390/// let user: User = parse_llm(response).unwrap();
391/// assert_eq!(user.name, "Alice");
392/// assert_eq!(user.age, 30);
393/// }
394/// ```
395///
396/// # Errors
397///
398/// Returns `ParseError::NoCandidates` if no valid JSON could be extracted.
399/// Returns `ParseError::DeserializeFailed` if deserialization fails for all candidates.
400pub fn parse_llm<T: LlmDeserialize>(input: &str) -> Result<T> {
401 let (result, _candidates) = parse_llm_with_candidates(input)?;
402 Ok(result)
403}
404
405/// Parses an LLM response using BAML's algorithms and returns all candidates.
406///
407/// This variant provides access to all parsing candidates with their metadata,
408/// showing what transformations were applied by the fuzzy matching system.
409///
410/// # Examples
411///
412/// ```rust
413/// use tryparse::parse_llm_with_candidates;
414///
415/// #[cfg(feature = "derive")]
416/// use tryparse::deserializer::LlmDeserialize;
417///
418/// #[cfg(feature = "derive")]
419/// use tryparse_derive::LlmDeserialize;
420///
421/// #[cfg(feature = "derive")]
422/// #[derive(serde::Deserialize, LlmDeserialize)]
423/// struct Data {
424/// value: i64,
425/// }
426///
427/// #[cfg(feature = "derive")]
428/// {
429/// let response = r#"{"value": "42"}"#;
430/// let (data, candidates) = parse_llm_with_candidates::<Data>(response).unwrap();
431/// assert_eq!(data.value, 42);
432/// }
433/// ```
434///
435/// # Errors
436///
437/// Returns `ParseError::NoCandidates` if no valid JSON could be extracted.
438/// Returns `ParseError::DeserializeFailed` if deserialization fails for all candidates.
439pub fn parse_llm_with_candidates<T: LlmDeserialize>(input: &str) -> Result<(T, Vec<FlexValue>)> {
440 let parser = FlexibleParser::new();
441 let candidates = parser.parse(input)?;
442
443 if candidates.is_empty() {
444 return Err(ParseError::NoCandidates);
445 }
446
447 // Rank candidates by quality
448 let ranked = scoring::rank_candidates(candidates);
449
450 // BAML TWO-MODE COERCION:
451 // 1. First pass: Try strict deserialization (try_deserialize) on all candidates
452 // This allows array candidates to win for Vec<T> before single-value wrapping
453 // 2. Second pass: Try lenient deserialization (deserialize) on all candidates
454 // This applies coercions like single-value wrapping for Vec<T>
455
456 // First pass: Strict mode (try_deserialize)
457 for (idx, candidate) in ranked.iter().enumerate() {
458 let mut ctx = CoercionContext::new();
459 if let Some(value) = T::try_deserialize(candidate, &mut ctx) {
460 // Merge transformations from deserialization into the winning candidate
461 let mut updated_ranked = ranked.clone();
462 for transformation in ctx.transformations() {
463 updated_ranked[idx].add_transformation(transformation.clone());
464 }
465 return Ok((value, updated_ranked));
466 }
467 }
468
469 // Second pass: Lenient mode (deserialize)
470 for (idx, candidate) in ranked.iter().enumerate() {
471 let mut ctx = CoercionContext::new();
472 match T::deserialize(candidate, &mut ctx) {
473 Ok(value) => {
474 // Merge transformations from deserialization into the winning candidate
475 let mut updated_ranked = ranked.clone();
476 for transformation in ctx.transformations() {
477 updated_ranked[idx].add_transformation(transformation.clone());
478 }
479 return Ok((value, updated_ranked));
480 }
481 Err(_) => {
482 // Continue to next candidate
483 continue;
484 }
485 }
486 }
487
488 // All candidates failed
489 Err(ParseError::NoCandidates)
490}
491
492#[cfg(test)]
493mod tests {
494 use serde::Deserialize;
495
496 use super::*;
497
498 #[derive(Deserialize, Debug, PartialEq)]
499 struct User {
500 name: String,
501 age: u32,
502 }
503
504 #[test]
505 fn test_parse_clean_json() {
506 let input = r#"{"name": "Alice", "age": 30}"#;
507 let user: User = parse(input).unwrap();
508 assert_eq!(user.name, "Alice");
509 assert_eq!(user.age, 30);
510 }
511
512 #[test]
513 fn test_parse_with_type_coercion() {
514 let input = r#"{"name": "Bob", "age": "25"}"#;
515 let user: User = parse(input).unwrap();
516 assert_eq!(user.age, 25);
517 }
518
519 #[test]
520 fn test_parse_markdown() {
521 let input = r#"
522Here's the user:
523```json
524{"name": "Charlie", "age": 35}
525```
526"#;
527 let user: User = parse(input).unwrap();
528 assert_eq!(user.name, "Charlie");
529 }
530
531 #[test]
532 fn test_parse_with_trailing_comma() {
533 let input = r#"{"name": "Dave", "age": 40,}"#;
534 let user: User = parse(input).unwrap();
535 assert_eq!(user.name, "Dave");
536 }
537
538 #[test]
539 fn test_parse_with_unquoted_keys() {
540 let input = r#"{name: "Eve", age: 45}"#;
541 let user: User = parse(input).unwrap();
542 assert_eq!(user.name, "Eve");
543 }
544
545 #[test]
546 fn test_parse_with_single_quotes() {
547 let input = r#"{'name': 'Frank', 'age': 50}"#;
548 let user: User = parse(input).unwrap();
549 assert_eq!(user.name, "Frank");
550 }
551
552 #[test]
553 fn test_parse_with_candidates() {
554 let input = r#"{"name": "Grace", "age": "55"}"#;
555 let (user, candidates): (User, _) = parse_with_candidates(input).unwrap();
556 assert_eq!(user.name, "Grace");
557 assert!(!candidates.is_empty());
558 }
559
560 #[test]
561 fn test_parse_invalid_input() {
562 let input = "This is not JSON at all";
563 let result: Result<User> = parse(input);
564 assert!(result.is_err());
565 }
566
567 #[test]
568 fn test_parse_array() {
569 let input = r#"[{"name": "Alice", "age": 30}, {"name": "Bob", "age": 25}]"#;
570 let users: Vec<User> = parse(input).unwrap();
571 assert_eq!(users.len(), 2);
572 }
573
574 #[test]
575 fn test_parse_nested_struct() {
576 #[derive(Deserialize, Debug, PartialEq)]
577 struct Address {
578 city: String,
579 }
580
581 #[derive(Deserialize, Debug, PartialEq)]
582 struct Person {
583 name: String,
584 address: Address,
585 }
586
587 let input = r#"{"name": "Alice", "address": {"city": "NYC"}}"#;
588 let person: Person = parse(input).unwrap();
589 assert_eq!(person.address.city, "NYC");
590 }
591
592 #[test]
593 fn test_parse_with_custom_parser() {
594 let parser = FlexibleParser::new();
595 let input = r#"{"name": "Alice", "age": 30}"#;
596 let user: User = parse_with_parser(input, &parser).unwrap();
597 assert_eq!(user.name, "Alice");
598 }
599}