Skip to main content

eure_document/
text.rs

1//! Text type unifying strings and code in Eure.
2//!
3//! This module provides the [`Text`] type which represents all text values in Eure,
4//! whether they originated from string syntax (`"..."`) or code syntax (`` `...` ``).
5
6use alloc::{borrow::Cow, string::String, vec::Vec};
7use core::iter::Peekable;
8use thiserror::Error;
9
10/// Language tag for text values.
11///
12/// # Variants
13///
14/// - [`Plaintext`](Language::Plaintext): Explicitly plain text, from `"..."` string syntax.
15///   Use when the content is data/text, not code.
16///
17/// - [`Implicit`](Language::Implicit): No language specified, from `` `...` `` or
18///   ```` ``` ```` without a language tag. The language can be inferred from schema context.
19///
20/// - [`Other`](Language::Other): Explicit language tag, from `` rust`...` `` or
21///   ```` ```rust ```` syntax. Use when the language must be specified.
22///
23/// # Schema Validation
24///
25/// | Schema | `Plaintext` | `Implicit` | `Other("rust")` |
26/// |--------|-------------|------------|-----------------|
27/// | `.text` (any) | ✓ | ✓ | ✓ |
28/// | `.text.plaintext` | ✓ | ✓ (coerce) | ✗ |
29/// | `.text.rust` | ✗ | ✓ (coerce) | ✓ |
30///
31/// `Implicit` allows users to write `` `let a = 1;` `` when the schema
32/// already specifies `.text.rust`, without redundantly repeating the language.
33#[derive(Debug, Clone, PartialEq, Eq, Hash, Default)]
34pub enum Language {
35    /// Explicitly plain text (from `"..."` syntax).
36    ///
37    /// This variant is rejected by schemas expecting a specific language like `.text.rust`.
38    /// Use this when the content is data/text, not code.
39    #[default]
40    Plaintext,
41    /// No language specified (from `` `...` `` without language tag).
42    ///
43    /// Can be coerced to match the schema's expected language. This allows users
44    /// to write `` `let a = 1;` `` when the schema already specifies `.text.rust`.
45    Implicit,
46    /// Explicit language tag (from `` lang`...` `` syntax).
47    ///
48    /// The string contains the language identifier (e.g., "rust", "sql", "email").
49    Other(Cow<'static, str>),
50}
51
52impl Language {
53    /// Create a Language from a string.
54    ///
55    /// - Empty string or "plaintext" → [`Plaintext`](Language::Plaintext)
56    /// - Other strings → [`Other`](Language::Other)
57    ///
58    /// Note: This does NOT produce [`Implicit`](Language::Implicit). Use `Language::Implicit`
59    /// directly when parsing code syntax without a language tag.
60    pub fn new(s: impl Into<Cow<'static, str>>) -> Self {
61        let s = s.into();
62        if s == "plaintext" || s.is_empty() {
63            Language::Plaintext
64        } else {
65            Language::Other(s)
66        }
67    }
68
69    /// Returns the language as a string slice, or `None` for [`Implicit`](Language::Implicit).
70    pub fn as_str(&self) -> Option<&str> {
71        match self {
72            Language::Plaintext => Some("plaintext"),
73            Language::Implicit => None,
74            Language::Other(s) => Some(s.as_ref()),
75        }
76    }
77
78    /// Returns true if this is the [`Plaintext`](Language::Plaintext) variant.
79    pub fn is_plaintext(&self) -> bool {
80        matches!(self, Language::Plaintext)
81    }
82
83    /// Returns true if this is the [`Implicit`](Language::Implicit) variant.
84    pub fn is_implicit(&self) -> bool {
85        matches!(self, Language::Implicit)
86    }
87
88    /// Returns true if this language can be coerced to the expected language.
89    ///
90    /// # Coercion Rules
91    ///
92    /// - `Implicit` can be coerced to any language (it's "infer from schema")
93    /// - Any language matches an `Implicit` expectation (schema says "any")
94    /// - Otherwise, languages must match exactly
95    pub fn is_compatible_with(&self, expected: &Language) -> bool {
96        match (self, expected) {
97            (_, Language::Implicit) => true, // Any matches implicit expectation
98            (Language::Implicit, _) => true, // Implicit can be coerced to anything
99            (a, b) => a == b,                // Otherwise must match exactly
100        }
101    }
102
103    pub fn is_other(&self, arg: &str) -> bool {
104        match self {
105            Language::Other(s) => s == arg,
106            _ => false,
107        }
108    }
109}
110
111/// Hint for serialization: which syntax was used to parse this text.
112///
113/// This hint allows round-tripping to preserve the original syntax when possible.
114/// The generic variants (`Inline`, `Block`) let the serializer pick the best syntax
115/// when the exact form doesn't matter.
116#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
117pub enum SyntaxHint {
118    // === String syntax variants ===
119    /// Escaped string: `"..."`
120    Str,
121    /// Literal string: `'...'`
122    LitStr,
123    /// Literal string with level 1 delimiters: `<'...'>`
124    LitStr1,
125    /// Literal string with level 2 delimiters: `<<'...'>>`
126    LitStr2,
127    /// Literal string with level 3 delimiters: `<<<'...'>>>`
128    LitStr3,
129
130    // === Inline code syntax variants ===
131    /// Generic inline code (serializer picks appropriate syntax)
132    Inline,
133    /// Single backtick inline: `` `...` ``
134    Inline1,
135    /// Single-delimited code: `<`...`>`
136    Delim1,
137    /// Double-delimited code: `<<`...`>>`
138    Delim2,
139    /// Triple-delimited code: `<<<`...`>>>`
140    Delim3,
141
142    // === Block code syntax variants ===
143    /// Generic block code (serializer picks backtick count)
144    Block,
145    /// Triple backtick block: ```` ```...``` ````
146    Block3,
147    /// Quadruple backtick block: ````` ````...```` `````
148    Block4,
149    /// Quintuple backtick block
150    Block5,
151    /// Sextuple backtick block
152    Block6,
153}
154
155impl SyntaxHint {
156    /// Returns true if this is any string syntax (escaped or literal).
157    pub fn is_string(&self) -> bool {
158        matches!(
159            self,
160            SyntaxHint::Str
161                | SyntaxHint::LitStr
162                | SyntaxHint::LitStr1
163                | SyntaxHint::LitStr2
164                | SyntaxHint::LitStr3
165        )
166    }
167
168    /// Returns true if this is an escaped string syntax (`"..."`).
169    pub fn is_escaped_string(&self) -> bool {
170        matches!(self, SyntaxHint::Str)
171    }
172
173    /// Returns true if this is a literal string syntax (`'...'` variants).
174    pub fn is_literal_string(&self) -> bool {
175        matches!(
176            self,
177            SyntaxHint::LitStr | SyntaxHint::LitStr1 | SyntaxHint::LitStr2 | SyntaxHint::LitStr3
178        )
179    }
180
181    /// Returns true if this is any inline code syntax.
182    pub fn is_inline(&self) -> bool {
183        matches!(
184            self,
185            SyntaxHint::Inline
186                | SyntaxHint::Inline1
187                | SyntaxHint::Delim1
188                | SyntaxHint::Delim2
189                | SyntaxHint::Delim3
190        )
191    }
192
193    /// Returns true if this is any block code syntax.
194    pub fn is_block(&self) -> bool {
195        matches!(
196            self,
197            SyntaxHint::Block
198                | SyntaxHint::Block3
199                | SyntaxHint::Block4
200                | SyntaxHint::Block5
201                | SyntaxHint::Block6
202        )
203    }
204}
205
206/// A text value in Eure, unifying strings and code.
207///
208/// # Overview
209///
210/// `Text` represents all text values in Eure, regardless of whether they were
211/// written using string syntax (`"..."`) or code syntax (`` `...` ``). This
212/// unification simplifies the data model while preserving the semantic distinction
213/// through the [`language`](Text::language) field.
214///
215/// # Syntax Mapping
216///
217/// | Syntax | Language | SyntaxHint |
218/// |--------|----------|------------|
219/// | `"hello"` | `Plaintext` | `Str` |
220/// | `'hello'` | `Plaintext` | `LitStr` |
221/// | `<'hello'>` | `Plaintext` | `LitStr1` |
222/// | `` `hello` `` | `Implicit` | `Inline1` |
223/// | `` sql`SELECT` `` | `Other("sql")` | `Inline1` |
224/// | `<`hello`>` | `Implicit` | `Delim1` |
225/// | `sql<`SELECT`>` | `Other("sql")` | `Delim1` |
226/// | `<<`hello`>>` | `Implicit` | `Delim2` |
227/// | `<<<`hello`>>>` | `Implicit` | `Delim3` |
228/// | ```` ``` ```` (no lang) | `Implicit` | `Block3` |
229/// | ```` ```rust ```` | `Other("rust")` | `Block3` |
230///
231/// # Key Distinction
232///
233/// - `"..."` → `Plaintext` (explicit: "this is text, not code")
234/// - `` `...` `` without lang → `Implicit` (code, language inferred from schema)
235/// - `` lang`...` `` → `Other(lang)` (code with explicit language)
236#[derive(Debug, Clone)]
237pub struct Text {
238    /// The text content.
239    pub content: String,
240    /// The language tag for this text.
241    pub language: Language,
242    /// Hint for serialization about the original syntax.
243    /// Note: This is NOT included in equality comparison as it's formatting metadata.
244    pub syntax_hint: Option<SyntaxHint>,
245}
246
247impl PartialEq for Text {
248    fn eq(&self, other: &Self) -> bool {
249        // syntax_hint is intentionally excluded - it's formatting metadata, not semantic content
250        self.content == other.content && self.language == other.language
251    }
252}
253
254impl Text {
255    /// Create a new text value.
256    pub fn new(content: impl Into<String>, language: Language) -> Self {
257        Self {
258            content: content.into(),
259            language,
260            syntax_hint: None,
261        }
262    }
263
264    /// Create a new text value with a syntax hint.
265    ///
266    /// For block syntax hints, automatically ensures trailing newline.
267    pub fn with_syntax_hint(
268        content: impl Into<String>,
269        language: Language,
270        syntax_hint: SyntaxHint,
271    ) -> Self {
272        let mut content = content.into();
273        if syntax_hint.is_block() && !content.ends_with('\n') {
274            content.push('\n');
275        }
276        Self {
277            content,
278            language,
279            syntax_hint: Some(syntax_hint),
280        }
281    }
282
283    /// Create a plaintext value (from `"..."` syntax).
284    pub fn plaintext(content: impl Into<String>) -> Self {
285        Self {
286            content: content.into(),
287            language: Language::Plaintext,
288            syntax_hint: Some(SyntaxHint::Str),
289        }
290    }
291
292    /// Create an inline code value with implicit language (from `` `...` `` syntax).
293    pub fn inline_implicit(content: impl Into<String>) -> Self {
294        Self {
295            content: content.into(),
296            language: Language::Implicit,
297            syntax_hint: Some(SyntaxHint::Inline1),
298        }
299    }
300
301    /// Create an inline code value with explicit language (from `` lang`...` `` syntax).
302    pub fn inline(content: impl Into<String>, language: impl Into<Cow<'static, str>>) -> Self {
303        Self {
304            content: content.into(),
305            language: Language::new(language),
306            syntax_hint: Some(SyntaxHint::Inline1),
307        }
308    }
309
310    /// Create a block code value with implicit language (from ```` ``` ```` syntax without lang).
311    pub fn block_implicit(content: impl Into<String>) -> Self {
312        let mut content = content.into();
313        if !content.ends_with('\n') {
314            content.push('\n');
315        }
316        Self {
317            content,
318            language: Language::Implicit,
319            syntax_hint: Some(SyntaxHint::Block3),
320        }
321    }
322
323    /// Create a block code value with explicit language.
324    pub fn block(content: impl Into<String>, language: impl Into<Cow<'static, str>>) -> Self {
325        let mut content = content.into();
326        if !content.ends_with('\n') {
327            content.push('\n');
328        }
329        Self {
330            content,
331            language: Language::new(language),
332            syntax_hint: Some(SyntaxHint::Block3),
333        }
334    }
335
336    /// Create a block code value without adding a trailing newline. This must be used only when performing convertion to eure from another data format.
337    pub fn block_without_trailing_newline(
338        content: impl Into<String>,
339        language: impl Into<Cow<'static, str>>,
340    ) -> Self {
341        Self {
342            content: content.into(),
343            language: Language::new(language),
344            syntax_hint: Some(SyntaxHint::Block3),
345        }
346    }
347
348    /// Returns the content as a string slice.
349    pub fn as_str(&self) -> &str {
350        &self.content
351    }
352}
353
354/// Errors that can occur when parsing text.
355#[derive(Debug, PartialEq, Eq, Clone, Error)]
356pub enum TextParseError {
357    /// Invalid escape sequence encountered.
358    #[error("Invalid escape sequence: {0}")]
359    InvalidEscapeSequence(char),
360    /// Unexpected end of string after escape character.
361    #[error("Invalid end of string after escape")]
362    InvalidEndOfStringAfterEscape,
363    /// Invalid Unicode code point in escape sequence.
364    #[error("Invalid unicode code point: {0}")]
365    InvalidUnicodeCodePoint(u32),
366    /// Newline found in text binding (only single line allowed).
367    #[error("Newline in text binding")]
368    NewlineInTextBinding,
369    /// Invalid indent in code block.
370    #[error(
371        "Invalid indent on code block at line {line}: actual {actual_indent} to be indented more than {expected_indent}"
372    )]
373    IndentError {
374        line: usize,
375        actual_indent: usize,
376        expected_indent: usize,
377    },
378}
379
380impl Text {
381    /// Parse a quoted string like `"hello world"` into a Text value.
382    ///
383    /// Handles escape sequences: `\\`, `\"`, `\'`, `\n`, `\r`, `\t`, `\0`, `\u{...}`.
384    pub fn parse_quoted_string(s: &str) -> Result<Self, TextParseError> {
385        let content = parse_escape_sequences(s)?;
386        Ok(Text::plaintext(content))
387    }
388
389    /// Parse a text binding content (after the colon) like `: hello world\n`.
390    ///
391    /// Strips trailing newline and trims whitespace.
392    pub fn parse_text_binding(s: &str) -> Result<Self, TextParseError> {
393        let stripped = s.strip_suffix('\n').unwrap_or(s);
394        let stripped = stripped.strip_suffix('\r').unwrap_or(stripped);
395        if stripped.contains(['\r', '\n']) {
396            return Err(TextParseError::NewlineInTextBinding);
397        }
398        let content = String::from(stripped.trim());
399        Ok(Text::plaintext(content))
400    }
401
402    /// Parse an indented code block, removing base indentation.
403    ///
404    /// The base indentation is auto-detected from trailing whitespace in the content.
405    /// If the content ends with `\n` followed by spaces, those spaces represent
406    /// the closing delimiter's indentation and determine how much to strip.
407    pub fn parse_indented_block(
408        language: Language,
409        content: String,
410        syntax_hint: SyntaxHint,
411    ) -> Result<Self, TextParseError> {
412        // Detect base_indent from trailing whitespace after last newline
413        let base_indent = if let Some(last_newline_pos) = content.rfind('\n') {
414            let trailing = &content[last_newline_pos + 1..];
415            if trailing.chars().all(|c| c == ' ') {
416                trailing.len()
417            } else {
418                0
419            }
420        } else {
421            0
422        };
423
424        // Collect lines, excluding the trailing whitespace line (delimiter indent)
425        let lines: Vec<&str> = content.lines().collect();
426        let line_count = if base_indent > 0 && !content.ends_with('\n') && lines.len() > 1 {
427            lines.len() - 1
428        } else {
429            lines.len()
430        };
431
432        let expected_whitespace_removals = base_indent * line_count;
433        let mut result = String::with_capacity(content.len() - expected_whitespace_removals);
434
435        for (line_number, line) in lines.iter().take(line_count).enumerate() {
436            // Empty lines (including whitespace-only lines) are allowed and don't need to match the indent
437            if line.trim_start().is_empty() {
438                result.push('\n');
439                continue;
440            }
441
442            let actual_indent = line
443                .chars()
444                .take_while(|c| *c == ' ')
445                .take(base_indent)
446                .count();
447            if actual_indent < base_indent {
448                return Err(TextParseError::IndentError {
449                    line: line_number + 1,
450                    actual_indent,
451                    expected_indent: base_indent,
452                });
453            }
454            // Remove the base indent from the line
455            result.push_str(&line[base_indent..]);
456            result.push('\n');
457        }
458
459        Ok(Self {
460            content: result,
461            language,
462            syntax_hint: Some(syntax_hint),
463        })
464    }
465}
466
467/// Parse escape sequences in a string.
468fn parse_escape_sequences(s: &str) -> Result<String, TextParseError> {
469    let mut result = String::with_capacity(s.len());
470    let mut chars = s.chars().peekable();
471
472    fn parse_unicode_escape(
473        chars: &mut Peekable<impl Iterator<Item = char>>,
474    ) -> Result<char, TextParseError> {
475        match chars.next() {
476            Some('{') => {}
477            Some(ch) => return Err(TextParseError::InvalidEscapeSequence(ch)),
478            None => return Err(TextParseError::InvalidEndOfStringAfterEscape),
479        }
480
481        let mut count = 0;
482        let mut code_point = 0;
483        while let Some(ch) = chars.peek()
484            && count < 6
485        // max 6 hex digits
486        {
487            if let Some(digit) = match ch {
488                '0'..='9' => Some(*ch as u32 - '0' as u32),
489                'a'..='f' => Some(*ch as u32 - 'a' as u32 + 10),
490                'A'..='F' => Some(*ch as u32 - 'A' as u32 + 10),
491                '_' | '-' => None,
492                _ => break,
493            } {
494                code_point = code_point * 16 + digit;
495                count += 1;
496            }
497            chars.next();
498        }
499
500        let Some(result) = core::char::from_u32(code_point) else {
501            return Err(TextParseError::InvalidUnicodeCodePoint(code_point));
502        };
503
504        match chars.next() {
505            Some('}') => {}
506            Some(ch) => return Err(TextParseError::InvalidEscapeSequence(ch)),
507            None => return Err(TextParseError::InvalidEndOfStringAfterEscape),
508        }
509
510        Ok(result)
511    }
512
513    while let Some(ch) = chars.next() {
514        match ch {
515            '\\' => match chars.next() {
516                Some('\\') => result.push('\\'),
517                Some('"') => result.push('"'),
518                Some('\'') => result.push('\''),
519                Some('n') => result.push('\n'),
520                Some('r') => result.push('\r'),
521                Some('t') => result.push('\t'),
522                Some('0') => result.push('\0'),
523                Some('u') => result.push(parse_unicode_escape(&mut chars)?),
524                Some(ch) => return Err(TextParseError::InvalidEscapeSequence(ch)),
525                None => return Err(TextParseError::InvalidEndOfStringAfterEscape),
526            },
527            _ => result.push(ch),
528        }
529    }
530
531    Ok(result)
532}
533
534// Re-export for backwards compatibility during transition
535pub use TextParseError as EureStringError;
536
537/// Backwards-compatible type alias for EureString.
538///
539/// **Deprecated**: Use [`Text`] instead.
540pub type EureString = Cow<'static, str>;
541
542#[cfg(test)]
543mod tests {
544    extern crate alloc;
545
546    use super::*;
547    use alloc::format;
548
549    #[test]
550    fn test_language_new_plaintext() {
551        assert_eq!(Language::new("plaintext"), Language::Plaintext);
552        assert_eq!(Language::new(""), Language::Plaintext);
553    }
554
555    #[test]
556    fn test_language_new_other() {
557        assert_eq!(Language::new("rust"), Language::Other("rust".into()));
558        assert_eq!(Language::new("sql"), Language::Other("sql".into()));
559    }
560
561    #[test]
562    fn test_language_as_str() {
563        assert_eq!(Language::Plaintext.as_str(), Some("plaintext"));
564        assert_eq!(Language::Implicit.as_str(), None);
565        assert_eq!(Language::Other("rust".into()).as_str(), Some("rust"));
566    }
567
568    #[test]
569    fn test_language_compatibility() {
570        // Implicit is compatible with everything
571        assert!(Language::Implicit.is_compatible_with(&Language::Plaintext));
572        assert!(Language::Implicit.is_compatible_with(&Language::Other("rust".into())));
573
574        // Everything is compatible with Implicit expectation
575        assert!(Language::Plaintext.is_compatible_with(&Language::Implicit));
576        assert!(Language::Other("rust".into()).is_compatible_with(&Language::Implicit));
577
578        // Same languages are compatible
579        assert!(Language::Plaintext.is_compatible_with(&Language::Plaintext));
580        assert!(Language::Other("rust".into()).is_compatible_with(&Language::Other("rust".into())));
581
582        // Different explicit languages are not compatible
583        assert!(!Language::Plaintext.is_compatible_with(&Language::Other("rust".into())));
584        assert!(!Language::Other("rust".into()).is_compatible_with(&Language::Plaintext));
585        assert!(!Language::Other("rust".into()).is_compatible_with(&Language::Other("sql".into())));
586    }
587
588    #[test]
589    fn test_text_plaintext() {
590        let text = Text::plaintext("hello");
591        assert_eq!(text.content, "hello");
592        assert_eq!(text.language, Language::Plaintext);
593        assert_eq!(text.syntax_hint, Some(SyntaxHint::Str));
594    }
595
596    #[test]
597    fn test_text_inline_implicit() {
598        let text = Text::inline_implicit("let a = 1");
599        assert_eq!(text.content, "let a = 1");
600        assert_eq!(text.language, Language::Implicit);
601        assert_eq!(text.syntax_hint, Some(SyntaxHint::Inline1));
602    }
603
604    #[test]
605    fn test_text_inline_with_language() {
606        let text = Text::inline("SELECT *", "sql");
607        assert_eq!(text.content, "SELECT *");
608        assert_eq!(text.language, Language::Other("sql".into()));
609        assert_eq!(text.syntax_hint, Some(SyntaxHint::Inline1));
610    }
611
612    #[test]
613    fn test_text_block_implicit() {
614        let text = Text::block_implicit("fn main() {}");
615        assert_eq!(text.content, "fn main() {}\n");
616        assert_eq!(text.language, Language::Implicit);
617        assert_eq!(text.syntax_hint, Some(SyntaxHint::Block3));
618    }
619
620    #[test]
621    fn test_text_block_with_language() {
622        let text = Text::block("fn main() {}", "rust");
623        assert_eq!(text.content, "fn main() {}\n");
624        assert_eq!(text.language, Language::Other("rust".into()));
625        assert_eq!(text.syntax_hint, Some(SyntaxHint::Block3));
626    }
627
628    #[test]
629    fn test_parse_quoted_string() {
630        let text = Text::parse_quoted_string("hello\\nworld").unwrap();
631        assert_eq!(text.content, "hello\nworld");
632        assert_eq!(text.language, Language::Plaintext);
633    }
634
635    #[test]
636    fn test_parse_text_binding() {
637        let text = Text::parse_text_binding("  hello world  \n").unwrap();
638        assert_eq!(text.content, "hello world");
639        assert_eq!(text.language, Language::Plaintext);
640    }
641
642    #[test]
643    fn test_parse_text_binding_raw_backslashes() {
644        // Text bindings should NOT process escape sequences
645        let text = Text::parse_text_binding("  \\b\\w+\\b  \n").unwrap();
646        assert_eq!(text.content, "\\b\\w+\\b");
647        assert_eq!(text.language, Language::Plaintext);
648    }
649
650    #[test]
651    fn test_parse_text_binding_literal_backslash_n() {
652        // Literal \n should stay as two characters, not converted to newline
653        let text = Text::parse_text_binding("  line1\\nline2  \n").unwrap();
654        assert_eq!(text.content, "line1\\nline2");
655        assert_eq!(text.language, Language::Plaintext);
656    }
657
658    #[test]
659    fn test_parse_text_binding_windows_path() {
660        // Windows paths should work without escaping
661        let text = Text::parse_text_binding("  C:\\Users\\name\\file.txt  \n").unwrap();
662        assert_eq!(text.content, "C:\\Users\\name\\file.txt");
663    }
664
665    #[test]
666    fn test_parse_text_binding_double_backslash() {
667        // Double backslashes stay as-is (two characters each = 4 total)
668        let text = Text::parse_text_binding("  \\\\  \n").unwrap();
669        assert_eq!(text.content, "\\\\");
670    }
671
672    #[test]
673    fn test_syntax_hint_is_string() {
674        // Escaped strings
675        assert!(SyntaxHint::Str.is_string());
676        // Literal strings
677        assert!(SyntaxHint::LitStr.is_string());
678        assert!(SyntaxHint::LitStr1.is_string());
679        assert!(SyntaxHint::LitStr2.is_string());
680        assert!(SyntaxHint::LitStr3.is_string());
681        // Non-strings
682        assert!(!SyntaxHint::Inline1.is_string());
683        assert!(!SyntaxHint::Block3.is_string());
684    }
685
686    #[test]
687    fn test_syntax_hint_is_escaped_string() {
688        assert!(SyntaxHint::Str.is_escaped_string());
689        assert!(!SyntaxHint::LitStr.is_escaped_string());
690        assert!(!SyntaxHint::Inline1.is_escaped_string());
691    }
692
693    #[test]
694    fn test_syntax_hint_is_literal_string() {
695        assert!(SyntaxHint::LitStr.is_literal_string());
696        assert!(SyntaxHint::LitStr1.is_literal_string());
697        assert!(SyntaxHint::LitStr2.is_literal_string());
698        assert!(SyntaxHint::LitStr3.is_literal_string());
699        assert!(!SyntaxHint::Str.is_literal_string());
700        assert!(!SyntaxHint::Inline1.is_literal_string());
701    }
702
703    #[test]
704    fn test_syntax_hint_is_inline() {
705        assert!(SyntaxHint::Inline.is_inline());
706        assert!(SyntaxHint::Inline1.is_inline());
707        assert!(SyntaxHint::Delim1.is_inline());
708        assert!(SyntaxHint::Delim2.is_inline());
709        assert!(SyntaxHint::Delim3.is_inline());
710        assert!(!SyntaxHint::Str.is_inline());
711        assert!(!SyntaxHint::Block3.is_inline());
712    }
713
714    #[test]
715    fn test_syntax_hint_is_block() {
716        assert!(SyntaxHint::Block.is_block());
717        assert!(SyntaxHint::Block3.is_block());
718        assert!(SyntaxHint::Block4.is_block());
719        assert!(SyntaxHint::Block5.is_block());
720        assert!(SyntaxHint::Block6.is_block());
721        assert!(!SyntaxHint::Str.is_block());
722        assert!(!SyntaxHint::Inline1.is_block());
723    }
724
725    mod parse_indented_block_tests {
726        use super::*;
727        use alloc::string::ToString;
728
729        #[test]
730        fn test_parse_indented_block_single_line() {
731            // 4 spaces trailing = base_indent of 4
732            let content = "    hello\n    ".to_string();
733            let result = Text::parse_indented_block(
734                Language::Other("text".into()),
735                content,
736                SyntaxHint::Block3,
737            )
738            .unwrap();
739            assert_eq!(result.language, Language::Other("text".into()));
740            assert_eq!(result.content, "hello\n");
741        }
742
743        #[test]
744        fn test_parse_indented_block_multiple_lines() {
745            // 4 spaces trailing = base_indent of 4
746            let content = "    line1\n    line2\n    line3\n    ".to_string();
747            let result = Text::parse_indented_block(
748                Language::Other("text".into()),
749                content,
750                SyntaxHint::Block3,
751            )
752            .unwrap();
753            assert_eq!(result.content, "line1\nline2\nline3\n");
754        }
755
756        #[test]
757        fn test_parse_indented_block_with_empty_lines() {
758            // 4 spaces trailing = base_indent of 4
759            let content = "    line1\n\n    line2\n    ".to_string();
760            let result = Text::parse_indented_block(
761                Language::Other("text".into()),
762                content,
763                SyntaxHint::Block3,
764            )
765            .unwrap();
766            assert_eq!(result.content, "line1\n\nline2\n");
767        }
768
769        #[test]
770        fn test_parse_indented_block_whitespace_only_line() {
771            // 3 spaces trailing = base_indent of 3
772            let content = "    line1\n        \n    line2\n   ".to_string();
773            let result = Text::parse_indented_block(
774                Language::Other("text".into()),
775                content,
776                SyntaxHint::Block3,
777            )
778            .unwrap();
779            assert_eq!(result.content, " line1\n\n line2\n");
780        }
781
782        #[test]
783        fn test_parse_indented_block_empty_content() {
784            // Just trailing whitespace, no actual content lines
785            let content = "    ".to_string();
786            let result = Text::parse_indented_block(
787                Language::Other("text".into()),
788                content,
789                SyntaxHint::Block3,
790            )
791            .unwrap();
792            // No newline in content, so it's treated as single empty line
793            assert_eq!(result.content, "\n");
794        }
795
796        #[test]
797        fn test_parse_indented_block_implicit_language() {
798            let content = "    hello\n    ".to_string();
799            let result =
800                Text::parse_indented_block(Language::Implicit, content, SyntaxHint::Block3)
801                    .unwrap();
802            assert_eq!(result.language, Language::Implicit);
803            assert_eq!(result.content, "hello\n");
804        }
805
806        #[test]
807        fn test_parse_indented_block_insufficient_indent() {
808            // 4 spaces trailing = base_indent of 4, but line2 only has 2
809            let content = "    line1\n  line2\n    ".to_string();
810            let result = Text::parse_indented_block(
811                Language::Other("text".into()),
812                content,
813                SyntaxHint::Block3,
814            );
815            assert_eq!(
816                result,
817                Err(TextParseError::IndentError {
818                    line: 2,
819                    actual_indent: 2,
820                    expected_indent: 4,
821                })
822            );
823        }
824
825        #[test]
826        fn test_parse_indented_block_no_indent() {
827            // 4 spaces trailing = base_indent of 4, but line1 has 0
828            let content = "line1\n    line2\n    ".to_string();
829            let result = Text::parse_indented_block(
830                Language::Other("text".into()),
831                content,
832                SyntaxHint::Block3,
833            );
834            assert_eq!(
835                result,
836                Err(TextParseError::IndentError {
837                    line: 1,
838                    actual_indent: 0,
839                    expected_indent: 4,
840                })
841            );
842        }
843
844        #[test]
845        fn test_parse_indented_block_empty_string() {
846            let content = String::new();
847            let result = Text::parse_indented_block(
848                Language::Other("text".into()),
849                content,
850                SyntaxHint::Block3,
851            );
852            assert!(result.is_ok());
853        }
854
855        #[test]
856        fn test_parse_indented_block_zero_indent() {
857            // No trailing whitespace = base_indent of 0
858            let content = "line1\nline2\n".to_string();
859            let result = Text::parse_indented_block(
860                Language::Other("text".into()),
861                content,
862                SyntaxHint::Block3,
863            )
864            .unwrap();
865            assert_eq!(result.content, "line1\nline2\n");
866        }
867
868        #[test]
869        fn test_parse_indented_block_empty_line_only() {
870            // 4 spaces trailing = base_indent of 4
871            let content = "    \n    ".to_string();
872            let result = Text::parse_indented_block(
873                Language::Other("text".into()),
874                content,
875                SyntaxHint::Block3,
876            )
877            .unwrap();
878            // First line is whitespace-only, treated as empty
879            assert_eq!(result.content, "\n");
880        }
881
882        #[test]
883        fn test_parse_indented_block_whitespace_only_line_insufficient_indent() {
884            // 4 spaces trailing = base_indent of 4
885            let content = "    line1\n  \n    line2\n    ".to_string();
886            let result = Text::parse_indented_block(
887                Language::Other("text".into()),
888                content,
889                SyntaxHint::Block3,
890            )
891            .unwrap();
892            // Whitespace-only lines are treated as empty and don't need to match indent
893            assert_eq!(result.content, "line1\n\nline2\n");
894        }
895
896        #[test]
897        fn test_parse_indented_block_whitespace_only_line_no_indent() {
898            // 3 spaces trailing = base_indent of 3
899            let content = "    line1\n\n    line2\n   ".to_string();
900            let result = Text::parse_indented_block(
901                Language::Other("text".into()),
902                content,
903                SyntaxHint::Block3,
904            )
905            .unwrap();
906            // Empty line (no whitespace) should be preserved
907            assert_eq!(result.content, " line1\n\n line2\n");
908        }
909
910        // =====================================================================
911        // Deterministic tests moved from proptests
912        // =====================================================================
913
914        #[test]
915        fn test_parse_quoted_string_escape_sequences() {
916            let cases = [
917                ("\\n", "\n"),
918                ("\\r", "\r"),
919                ("\\t", "\t"),
920                ("\\0", "\0"),
921                ("\\\\", "\\"),
922                ("\\\"", "\""),
923                ("\\'", "'"),
924                ("\\u{0041}", "A"),
925                ("\\u{3042}", "あ"),
926            ];
927            for (input, expected) in cases {
928                let result = Text::parse_quoted_string(input);
929                assert!(result.is_ok(), "Failed to parse: {:?}", input);
930                assert_eq!(
931                    result.unwrap().content,
932                    expected,
933                    "Mismatch for: {:?}",
934                    input
935                );
936            }
937        }
938
939        #[test]
940        fn test_parse_quoted_string_invalid_unicode_escapes() {
941            // Missing closing brace
942            let result = Text::parse_quoted_string("\\u{0041");
943            assert!(result.is_err(), "Should fail for missing closing brace");
944
945            // Note: \u{} parses to '\0' (null character) - this is valid behavior
946
947            // Invalid hex characters (Z is not a hex digit)
948            let result = Text::parse_quoted_string("\\u{ZZZZ}");
949            assert!(result.is_err(), "Should fail for invalid hex");
950
951            // Out of range codepoint (beyond Unicode max 0x10FFFF)
952            let result = Text::parse_quoted_string("\\u{110000}");
953            assert!(result.is_err(), "Should fail for out of range codepoint");
954
955            // Missing opening brace
956            let result = Text::parse_quoted_string("\\u0041}");
957            assert!(result.is_err(), "Should fail for missing opening brace");
958        }
959
960        #[test]
961        fn test_parse_text_binding_preserves_backslashes() {
962            let inputs = [
963                ("\\n", "\\n"),
964                ("\\t", "\\t"),
965                ("C:\\Users\\test", "C:\\Users\\test"),
966                ("\\b\\w+\\b", "\\b\\w+\\b"),
967            ];
968            for (input, expected) in inputs {
969                let with_newline = format!("{}\n", input);
970                let result = Text::parse_text_binding(&with_newline);
971                assert!(result.is_ok(), "Failed to parse: {:?}", input);
972                assert_eq!(result.unwrap().content, expected);
973            }
974        }
975
976        #[test]
977        fn test_parse_text_binding_trims_tabs_and_mixed_whitespace() {
978            // Tabs
979            let result = Text::parse_text_binding("\thello\t\n");
980            assert!(result.is_ok());
981            assert_eq!(result.unwrap().content, "hello");
982
983            // Mixed spaces and tabs
984            let result = Text::parse_text_binding("  \thello\t  \n");
985            assert!(result.is_ok());
986            assert_eq!(result.unwrap().content, "hello");
987
988            // Only tabs
989            let result = Text::parse_text_binding("\t\thello world\t\t\n");
990            assert!(result.is_ok());
991            assert_eq!(result.unwrap().content, "hello world");
992        }
993
994        #[test]
995        fn test_language_new_plaintext_variants() {
996            assert_eq!(Language::new("plaintext"), Language::Plaintext);
997            assert_eq!(Language::new(""), Language::Plaintext);
998        }
999
1000        #[test]
1001        fn test_empty_content_handling() {
1002            let text = Text::plaintext("");
1003            assert_eq!(text.content, "");
1004
1005            let text = Text::inline_implicit("");
1006            assert_eq!(text.content, "");
1007
1008            let text = Text::block_implicit("");
1009            assert_eq!(text.content, "\n"); // Should add newline
1010
1011            let text = Text::block("", "rust");
1012            assert_eq!(text.content, "\n"); // Should add newline
1013        }
1014
1015        #[test]
1016        fn test_parse_indented_block_with_tabs() {
1017            // Content with tab indentation - should be rejected or handled
1018            // since parse_indented_block uses space-based indent detection
1019            let content = "\tline1\n\tline2\n\t".to_string();
1020            let result = Text::parse_indented_block(
1021                Language::Other("text".into()),
1022                content,
1023                SyntaxHint::Block3,
1024            );
1025            // Tabs count as characters, not as indent spaces, so this should work
1026            // with 0 base indent (trailing has 1 tab = no spaces for indent detection)
1027            assert!(result.is_ok() || result.is_err()); // Just ensure no panic
1028
1029            // Mixed tabs and spaces - spaces for indent, tabs in content
1030            let content = "    line\twith\ttabs\n    ".to_string();
1031            let result = Text::parse_indented_block(
1032                Language::Other("text".into()),
1033                content,
1034                SyntaxHint::Block3,
1035            );
1036            assert!(result.is_ok());
1037            let text = result.unwrap();
1038            assert_eq!(text.content, "line\twith\ttabs\n");
1039        }
1040    }
1041}
1042
1043#[cfg(test)]
1044mod proptests {
1045    extern crate std;
1046
1047    use super::*;
1048    use alloc::vec;
1049    use proptest::prelude::*;
1050    use std::format;
1051    use std::string::String;
1052    use std::vec::Vec;
1053
1054    // =========================================================================
1055    // Strategy generators
1056    // =========================================================================
1057
1058    /// Strategy for generating arbitrary Language values.
1059    fn arb_language() -> impl Strategy<Value = Language> {
1060        prop_oneof![
1061            Just(Language::Plaintext),
1062            Just(Language::Implicit),
1063            // Common language tags
1064            Just(Language::Other("rust".into())),
1065            Just(Language::Other("sql".into())),
1066            Just(Language::Other("python".into())),
1067            Just(Language::Other("javascript".into())),
1068            // Arbitrary language tags
1069            "[a-z][a-z0-9_-]{0,15}".prop_map(|s| Language::Other(s.into())),
1070        ]
1071    }
1072
1073    /// Strategy for generating arbitrary SyntaxHint values.
1074    fn arb_syntax_hint() -> impl Strategy<Value = SyntaxHint> {
1075        prop_oneof![
1076            // String variants
1077            Just(SyntaxHint::Str),
1078            Just(SyntaxHint::LitStr),
1079            Just(SyntaxHint::LitStr1),
1080            Just(SyntaxHint::LitStr2),
1081            Just(SyntaxHint::LitStr3),
1082            // Inline variants
1083            Just(SyntaxHint::Inline),
1084            Just(SyntaxHint::Inline1),
1085            Just(SyntaxHint::Delim1),
1086            Just(SyntaxHint::Delim2),
1087            Just(SyntaxHint::Delim3),
1088            // Block variants
1089            Just(SyntaxHint::Block),
1090            Just(SyntaxHint::Block3),
1091            Just(SyntaxHint::Block4),
1092            Just(SyntaxHint::Block5),
1093            Just(SyntaxHint::Block6),
1094        ]
1095    }
1096
1097    /// Strategy for generating text content without control characters.
1098    fn arb_text_content() -> impl Strategy<Value = String> {
1099        // Printable ASCII and common Unicode, excluding null and other control chars
1100        proptest::collection::vec(
1101            prop_oneof![
1102                // Printable ASCII
1103                prop::char::range(' ', '~'),
1104                // Some Unicode characters
1105                Just('日'),
1106                Just('本'),
1107                Just('語'),
1108                Just('α'),
1109                Just('β'),
1110                Just('γ'),
1111                Just('é'),
1112                Just('ñ'),
1113                Just('ü'),
1114            ],
1115            0..100,
1116        )
1117        .prop_map(|chars| chars.into_iter().collect())
1118    }
1119
1120    /// Strategy for generating text content that's valid for escaped strings.
1121    /// Excludes characters that would need escaping for simpler testing.
1122    fn arb_simple_text_content() -> impl Strategy<Value = String> {
1123        proptest::collection::vec(
1124            prop_oneof![
1125                // Printable ASCII excluding backslash and quotes
1126                prop::char::range(' ', '!'), // space and !
1127                prop::char::range('#', '&'), // # $ % &
1128                prop::char::range('(', '['), // ( through [
1129                prop::char::range(']', '~'), // ] through ~
1130            ],
1131            0..50,
1132        )
1133        .prop_map(|chars| chars.into_iter().collect())
1134    }
1135
1136    /// Strategy for single-line content (no newlines).
1137    fn arb_single_line_content() -> impl Strategy<Value = String> {
1138        proptest::collection::vec(
1139            prop_oneof![
1140                // Printable ASCII excluding newlines
1141                prop::char::range(' ', '~'),
1142            ],
1143            0..50,
1144        )
1145        .prop_map(|chars| chars.into_iter().collect())
1146    }
1147
1148    // =========================================================================
1149    // Constructor tests
1150    // =========================================================================
1151
1152    proptest! {
1153        /// Text::plaintext should always set Language::Plaintext and SyntaxHint::Str.
1154        #[test]
1155        fn plaintext_constructor_sets_correct_fields(content in arb_text_content()) {
1156            let text = Text::plaintext(content.clone());
1157            prop_assert_eq!(text.content, content);
1158            prop_assert_eq!(text.language, Language::Plaintext);
1159            prop_assert_eq!(text.syntax_hint, Some(SyntaxHint::Str));
1160        }
1161
1162        /// Text::inline_implicit should always set Language::Implicit and SyntaxHint::Inline1.
1163        #[test]
1164        fn inline_implicit_constructor_sets_correct_fields(content in arb_text_content()) {
1165            let text = Text::inline_implicit(content.clone());
1166            prop_assert_eq!(text.content, content);
1167            prop_assert_eq!(text.language, Language::Implicit);
1168            prop_assert_eq!(text.syntax_hint, Some(SyntaxHint::Inline1));
1169        }
1170
1171        /// Text::inline should set Language from parameter and SyntaxHint::Inline1.
1172        #[test]
1173        fn inline_constructor_sets_correct_fields(
1174            content in arb_text_content(),
1175            lang in "[a-z][a-z0-9]{0,10}",
1176        ) {
1177            let text = Text::inline(content.clone(), lang.clone());
1178            prop_assert_eq!(text.content, content);
1179            prop_assert_eq!(text.language, Language::new(lang));
1180            prop_assert_eq!(text.syntax_hint, Some(SyntaxHint::Inline1));
1181        }
1182
1183        /// Text::block_implicit should add trailing newline if missing.
1184        #[test]
1185        fn block_implicit_adds_trailing_newline(content in arb_text_content()) {
1186            let text = Text::block_implicit(content.clone());
1187            prop_assert!(text.content.ends_with('\n'), "Block content should end with newline");
1188            prop_assert_eq!(text.language, Language::Implicit);
1189            prop_assert_eq!(text.syntax_hint, Some(SyntaxHint::Block3));
1190        }
1191
1192        /// Text::block_implicit should not add extra newline if already present.
1193        #[test]
1194        fn block_implicit_no_double_newline(content in arb_text_content()) {
1195            let content_with_newline = format!("{}\n", content);
1196            let text = Text::block_implicit(content_with_newline.clone());
1197            prop_assert_eq!(&text.content, &content_with_newline);
1198            prop_assert!(!text.content.ends_with("\n\n") || content.ends_with('\n'),
1199                "Should not add extra newline when already present");
1200        }
1201
1202        /// Text::block should add trailing newline if missing.
1203        #[test]
1204        fn block_adds_trailing_newline(
1205            content in arb_text_content(),
1206            lang in "[a-z][a-z0-9]{0,10}",
1207        ) {
1208            let text = Text::block(content.clone(), lang.clone());
1209            prop_assert!(text.content.ends_with('\n'), "Block content should end with newline");
1210            prop_assert_eq!(text.language, Language::new(lang));
1211            prop_assert_eq!(text.syntax_hint, Some(SyntaxHint::Block3));
1212        }
1213
1214        /// Text::block_without_trailing_newline should preserve content exactly.
1215        #[test]
1216        fn block_without_trailing_newline_preserves_content(
1217            content in arb_text_content(),
1218            lang in "[a-z][a-z0-9]{0,10}",
1219        ) {
1220            let text = Text::block_without_trailing_newline(content.clone(), lang.clone());
1221            prop_assert_eq!(text.content, content);
1222            prop_assert_eq!(text.language, Language::new(lang));
1223            prop_assert_eq!(text.syntax_hint, Some(SyntaxHint::Block3));
1224        }
1225
1226        /// Text::new should not modify content.
1227        #[test]
1228        fn new_preserves_content(
1229            content in arb_text_content(),
1230            language in arb_language(),
1231        ) {
1232            let text = Text::new(content.clone(), language.clone());
1233            prop_assert_eq!(text.content, content);
1234            prop_assert_eq!(text.language, language);
1235            prop_assert_eq!(text.syntax_hint, None);
1236        }
1237
1238        /// Text::with_syntax_hint should add trailing newline for block hints.
1239        #[test]
1240        fn with_syntax_hint_adds_newline_for_block(
1241            content in arb_text_content(),
1242            language in arb_language(),
1243            hint in prop_oneof![
1244                Just(SyntaxHint::Block),
1245                Just(SyntaxHint::Block3),
1246                Just(SyntaxHint::Block4),
1247                Just(SyntaxHint::Block5),
1248                Just(SyntaxHint::Block6),
1249            ],
1250        ) {
1251            let text = Text::with_syntax_hint(content.clone(), language.clone(), hint);
1252            prop_assert!(text.content.ends_with('\n'), "Block content should end with newline");
1253            prop_assert_eq!(text.language, language);
1254            prop_assert_eq!(text.syntax_hint, Some(hint));
1255        }
1256
1257        /// Text::with_syntax_hint should not modify content for non-block hints.
1258        #[test]
1259        fn with_syntax_hint_preserves_content_for_non_block(
1260            content in arb_text_content(),
1261            language in arb_language(),
1262            hint in prop_oneof![
1263                Just(SyntaxHint::Str),
1264                Just(SyntaxHint::LitStr),
1265                Just(SyntaxHint::Inline1),
1266                Just(SyntaxHint::Delim1),
1267            ],
1268        ) {
1269            let text = Text::with_syntax_hint(content.clone(), language.clone(), hint);
1270            prop_assert_eq!(text.content, content);
1271            prop_assert_eq!(text.language, language);
1272            prop_assert_eq!(text.syntax_hint, Some(hint));
1273        }
1274    }
1275
1276    // =========================================================================
1277    // Equality tests
1278    // =========================================================================
1279
1280    proptest! {
1281        /// PartialEq should ignore syntax_hint.
1282        #[test]
1283        fn equality_ignores_syntax_hint(
1284            content in arb_text_content(),
1285            language in arb_language(),
1286            hint1 in arb_syntax_hint(),
1287            hint2 in arb_syntax_hint(),
1288        ) {
1289            let text1 = Text {
1290                content: content.clone(),
1291                language: language.clone(),
1292                syntax_hint: Some(hint1),
1293            };
1294            let text2 = Text {
1295                content: content.clone(),
1296                language: language.clone(),
1297                syntax_hint: Some(hint2),
1298            };
1299            prop_assert_eq!(text1, text2, "Equality should ignore syntax_hint");
1300        }
1301
1302        /// PartialEq should compare content.
1303        #[test]
1304        fn equality_compares_content(
1305            content1 in arb_text_content(),
1306            content2 in arb_text_content(),
1307            language in arb_language(),
1308        ) {
1309            let text1 = Text::new(content1.clone(), language.clone());
1310            let text2 = Text::new(content2.clone(), language.clone());
1311            if content1 == content2 {
1312                prop_assert_eq!(text1, text2);
1313            } else {
1314                prop_assert_ne!(text1, text2);
1315            }
1316        }
1317
1318        /// PartialEq should compare language.
1319        #[test]
1320        fn equality_compares_language(
1321            content in arb_text_content(),
1322            lang1 in arb_language(),
1323            lang2 in arb_language(),
1324        ) {
1325            let text1 = Text::new(content.clone(), lang1.clone());
1326            let text2 = Text::new(content.clone(), lang2.clone());
1327            if lang1 == lang2 {
1328                prop_assert_eq!(text1, text2);
1329            } else {
1330                prop_assert_ne!(text1, text2);
1331            }
1332        }
1333    }
1334
1335    // =========================================================================
1336    // Language tests
1337    // =========================================================================
1338
1339    proptest! {
1340        /// Language::new with other strings should produce Other.
1341        #[test]
1342        fn language_new_other(lang in "[a-z][a-z0-9]{1,15}") {
1343            if lang != "plaintext" {
1344                let result = Language::new(lang.clone());
1345                prop_assert_eq!(result, Language::Other(lang.into()));
1346            }
1347        }
1348
1349        /// Language::Implicit is compatible with everything.
1350        #[test]
1351        fn implicit_is_compatible_with_all(lang in arb_language()) {
1352            prop_assert!(Language::Implicit.is_compatible_with(&lang),
1353                "Implicit should be compatible with {:?}", lang);
1354        }
1355
1356        /// Everything is compatible with Language::Implicit.
1357        #[test]
1358        fn all_compatible_with_implicit(lang in arb_language()) {
1359            prop_assert!(lang.is_compatible_with(&Language::Implicit),
1360                "{:?} should be compatible with Implicit", lang);
1361        }
1362
1363        /// Same languages are compatible with themselves.
1364        #[test]
1365        fn same_language_compatible(lang in arb_language()) {
1366            prop_assert!(lang.is_compatible_with(&lang),
1367                "{:?} should be compatible with itself", lang);
1368        }
1369
1370        /// Language::as_str returns correct values.
1371        #[test]
1372        fn language_as_str_correct(lang in arb_language()) {
1373            match &lang {
1374                Language::Plaintext => prop_assert_eq!(lang.as_str(), Some("plaintext")),
1375                Language::Implicit => prop_assert_eq!(lang.as_str(), None),
1376                Language::Other(s) => prop_assert_eq!(lang.as_str(), Some(s.as_ref())),
1377            }
1378        }
1379    }
1380
1381    // =========================================================================
1382    // SyntaxHint tests
1383    // =========================================================================
1384
1385    proptest! {
1386        /// SyntaxHint classification methods are mutually exclusive (except Str which is both escaped and string).
1387        #[test]
1388        fn syntax_hint_classification_consistency(hint in arb_syntax_hint()) {
1389            let is_str = hint.is_string();
1390            let is_inline = hint.is_inline();
1391            let is_block = hint.is_block();
1392
1393            // At most one category should be true (inline and block are mutually exclusive with string)
1394            if is_inline {
1395                prop_assert!(!is_str, "Inline hints should not be strings");
1396                prop_assert!(!is_block, "Inline hints should not be blocks");
1397            }
1398            if is_block {
1399                prop_assert!(!is_str, "Block hints should not be strings");
1400                prop_assert!(!is_inline, "Block hints should not be inline");
1401            }
1402            if is_str {
1403                prop_assert!(!is_inline, "String hints should not be inline");
1404                prop_assert!(!is_block, "String hints should not be blocks");
1405            }
1406        }
1407
1408        /// Every SyntaxHint should belong to exactly one category.
1409        #[test]
1410        fn syntax_hint_belongs_to_one_category(hint in arb_syntax_hint()) {
1411            let categories = [
1412                hint.is_string(),
1413                hint.is_inline(),
1414                hint.is_block(),
1415            ];
1416            let count = categories.iter().filter(|&&b| b).count();
1417            prop_assert_eq!(count, 1, "Each hint should belong to exactly one category: {:?}", hint);
1418        }
1419    }
1420
1421    // =========================================================================
1422    // Parsing tests
1423    // =========================================================================
1424
1425    proptest! {
1426        /// Parsing simple content (no escapes) should round-trip through escape parsing.
1427        #[test]
1428        fn parse_quoted_string_simple_roundtrip(content in arb_simple_text_content()) {
1429            let text = Text::parse_quoted_string(&content);
1430            prop_assert!(text.is_ok(), "Failed to parse simple content: {:?}", content);
1431            let text = text.unwrap();
1432            prop_assert_eq!(text.content, content);
1433            prop_assert_eq!(text.language, Language::Plaintext);
1434        }
1435
1436        /// Invalid escape sequences should produce errors.
1437        #[test]
1438        fn parse_quoted_string_invalid_escape(c in prop::char::range('a', 'z').prop_filter(
1439            "not a valid escape",
1440            |c| !matches!(*c, 'n' | 'r' | 't' | '0' | 'u')
1441        )) {
1442            let input = format!("\\{}", c);
1443            let result = Text::parse_quoted_string(&input);
1444            prop_assert!(result.is_err(), "Should fail for invalid escape: {:?}", input);
1445            match result {
1446                Err(TextParseError::InvalidEscapeSequence(ch)) => {
1447                    prop_assert_eq!(ch, c, "Error should report the invalid char");
1448                }
1449                other => {
1450                    prop_assert!(false, "Expected InvalidEscapeSequence, got {:?}", other);
1451                }
1452            }
1453        }
1454
1455        /// parse_text_binding should trim whitespace (spaces and tabs) and strip trailing newline.
1456        #[test]
1457        fn parse_text_binding_trims_correctly(
1458            leading_space in "[ \t]{0,10}",
1459            content in arb_single_line_content().prop_filter("no whitespace only", |s| !s.trim().is_empty()),
1460            trailing_space in "[ \t]{0,10}",
1461        ) {
1462            let input = format!("{}{}{}\n", leading_space, content, trailing_space);
1463            let result = Text::parse_text_binding(&input);
1464            prop_assert!(result.is_ok(), "Failed to parse: {:?}", input);
1465            let text = result.unwrap();
1466            prop_assert_eq!(text.content, content.trim());
1467            prop_assert_eq!(text.language, Language::Plaintext);
1468        }
1469
1470        /// parse_text_binding should reject content with embedded newlines.
1471        #[test]
1472        fn parse_text_binding_rejects_embedded_newlines(
1473            before in arb_single_line_content(),
1474            after in arb_single_line_content(),
1475        ) {
1476            let input = format!("{}\n{}\n", before, after);
1477            let result = Text::parse_text_binding(&input);
1478            prop_assert!(matches!(result, Err(TextParseError::NewlineInTextBinding)),
1479                "Should reject embedded newlines: {:?}", input);
1480        }
1481
1482        /// as_str should return the content.
1483        #[test]
1484        fn as_str_returns_content(content in arb_text_content(), language in arb_language()) {
1485            let text = Text::new(content.clone(), language);
1486            prop_assert_eq!(text.as_str(), content.as_str());
1487        }
1488    }
1489
1490    // =========================================================================
1491    // parse_indented_block tests
1492    // =========================================================================
1493
1494    proptest! {
1495        /// parse_indented_block should correctly detect and remove base indentation.
1496        #[test]
1497        fn parse_indented_block_removes_base_indent(
1498            // Use lines without leading/trailing whitespace; whitespace-only lines are treated specially
1499            lines in proptest::collection::vec("[!-~]+", 1..10),
1500            indent in 0usize..8,
1501        ) {
1502            // Build indented content with trailing indent marker
1503            let indent_str: String = " ".repeat(indent);
1504            let mut content = String::new();
1505            for line in &lines {
1506                content.push_str(&indent_str);
1507                content.push_str(line);
1508                content.push('\n');
1509            }
1510            // Add trailing indent for delimiter (without newline at end)
1511            content.push_str(&indent_str);
1512
1513            let result = Text::parse_indented_block(
1514                Language::Implicit,
1515                content,
1516                SyntaxHint::Block3,
1517            );
1518            prop_assert!(result.is_ok(), "Failed to parse indented block");
1519            let text = result.unwrap();
1520
1521            // Verify each line had indent removed
1522            let result_lines: Vec<&str> = text.content.lines().collect();
1523            prop_assert_eq!(result_lines.len(), lines.len(),
1524                "Line count should match: {:?} vs {:?}", result_lines, lines);
1525            for (i, (result_line, orig_line)) in result_lines.iter().zip(lines.iter()).enumerate() {
1526                prop_assert_eq!(*result_line, orig_line.as_str(),
1527                    "Line {} should have indent removed", i);
1528            }
1529        }
1530
1531        /// parse_indented_block should preserve empty lines.
1532        #[test]
1533        fn parse_indented_block_preserves_empty_lines(
1534            line1 in arb_single_line_content(),
1535            line2 in arb_single_line_content(),
1536            indent in 2usize..6,
1537        ) {
1538            let indent_str: String = " ".repeat(indent);
1539            // Content with empty line in the middle
1540            let content = format!(
1541                "{}{}\n\n{}{}\n{}",
1542                indent_str, line1,
1543                indent_str, line2,
1544                indent_str
1545            );
1546
1547            let result = Text::parse_indented_block(
1548                Language::Implicit,
1549                content,
1550                SyntaxHint::Block3,
1551            );
1552            prop_assert!(result.is_ok(), "Failed to parse");
1553            let text = result.unwrap();
1554
1555            let expected_line1 = if line1.trim().is_empty() { "" } else { line1.as_str() };
1556            let expected_line2 = if line2.trim().is_empty() { "" } else { line2.as_str() };
1557            let expected = format!("{}\n\n{}\n", expected_line1, expected_line2);
1558            prop_assert_eq!(text.content, expected);
1559        }
1560
1561        /// parse_indented_block should return error for insufficient indent.
1562        #[test]
1563        fn parse_indented_block_error_on_insufficient_indent(
1564            line1 in arb_single_line_content().prop_filter("non-empty", |s| !s.is_empty()),
1565            // Line2 must not start with whitespace (we control indent via bad_str)
1566            // and must have non-whitespace content
1567            line2 in "[!-~]{1,20}",  // Non-whitespace printable ASCII
1568            base_indent in 4usize..8,
1569            bad_indent in 0usize..4,
1570        ) {
1571            prop_assume!(bad_indent < base_indent);
1572            let base_str: String = " ".repeat(base_indent);
1573            let bad_str: String = " ".repeat(bad_indent);
1574
1575            let content = format!(
1576                "{}{}\n{}{}\n{}",
1577                base_str, line1,
1578                bad_str, line2,  // insufficient indent
1579                base_str
1580            );
1581
1582            let result = Text::parse_indented_block(
1583                Language::Implicit,
1584                content,
1585                SyntaxHint::Block3,
1586            );
1587
1588            match result {
1589                Err(TextParseError::IndentError { line: 2, actual_indent, expected_indent }) => {
1590                    prop_assert_eq!(actual_indent, bad_indent);
1591                    prop_assert_eq!(expected_indent, base_indent);
1592                }
1593                other => {
1594                    prop_assert!(false, "Expected IndentError for line 2, got {:?}", other);
1595                }
1596            }
1597        }
1598
1599        /// parse_indented_block should handle zero indent correctly.
1600        #[test]
1601        fn parse_indented_block_zero_indent(lines in proptest::collection::vec("[!-~]+", 1..10)) {
1602            let mut content = String::new();
1603            for line in &lines {
1604                content.push_str(line);
1605                content.push('\n');
1606            }
1607            // No trailing indent
1608
1609            let result = Text::parse_indented_block(
1610                Language::Other("test".into()),
1611                content.clone(),
1612                SyntaxHint::Block3,
1613            );
1614            prop_assert!(result.is_ok(), "Failed to parse zero-indent block");
1615            let text = result.unwrap();
1616
1617            // Content should be preserved as-is
1618            let expected_lines: Vec<&str> = lines.iter().map(|s| s.as_str()).collect();
1619            let result_lines: Vec<&str> = text.content.lines().collect();
1620            prop_assert_eq!(result_lines, expected_lines);
1621        }
1622
1623        /// parse_indented_block should preserve language and syntax_hint.
1624        #[test]
1625        fn parse_indented_block_preserves_metadata(
1626            line in arb_single_line_content(),
1627            language in arb_language(),
1628            hint in prop_oneof![
1629                Just(SyntaxHint::Block3),
1630                Just(SyntaxHint::Block4),
1631                Just(SyntaxHint::Block5),
1632                Just(SyntaxHint::Block6),
1633            ],
1634        ) {
1635            let content = format!("{}\n", line);
1636            let result = Text::parse_indented_block(language.clone(), content, hint);
1637            prop_assert!(result.is_ok());
1638            let text = result.unwrap();
1639            prop_assert_eq!(text.language, language);
1640            prop_assert_eq!(text.syntax_hint, Some(hint));
1641        }
1642    }
1643
1644    // =========================================================================
1645    // Edge case tests
1646    // =========================================================================
1647
1648    proptest! {
1649        /// Unicode content should be preserved correctly.
1650        #[test]
1651        fn unicode_content_preserved(content in "[\u{0080}-\u{FFFF}]{1,50}") {
1652            let text = Text::plaintext(content.clone());
1653            prop_assert_eq!(&text.content, &content);
1654
1655            let text = Text::inline_implicit(content.clone());
1656            prop_assert_eq!(&text.content, &content);
1657        }
1658
1659        /// Text with only whitespace should be handled correctly.
1660        #[test]
1661        fn whitespace_only_content(spaces in "[ \t]{1,20}") {
1662            let text = Text::plaintext(spaces.clone());
1663            prop_assert_eq!(&text.content, &spaces);
1664
1665            // parse_text_binding should trim to empty
1666            let input = format!("{}\n", spaces);
1667            let result = Text::parse_text_binding(&input);
1668            prop_assert!(result.is_ok());
1669            prop_assert_eq!(result.unwrap().content, "");
1670        }
1671    }
1672}