1use crate::error::{err, ErrorKind, Result};
10use crate::types::*;
11use std::collections::HashMap;
12
13pub fn parse(input: &str) -> Result<Document> {
17 let mut parser = DocumentParser::new(input);
18 parser.parse()
19}
20
21pub fn parse_schema(input: &str) -> Result<Schema> {
23 parse_schema_str(input, 0)
24}
25
26pub fn parse_type_str(input: &str) -> Result<Type> {
28 let (ty, rest) = parse_type(input)?;
29 if !rest.is_empty() {
30 return Err(err(
31 ErrorKind::InvalidType,
32 0,
33 format!("trailing characters after type: {:?}", rest),
34 ));
35 }
36 Ok(ty)
37}
38
39pub fn parse_typed_value(input: &str, ty: &Type) -> Result<Value> {
41 parse_value_typed(input, ty, 0)
42}
43
44pub fn parse_untyped_value(input: &str) -> Value {
46 parse_value_untyped(input)
47}
48
49pub fn parse_inline_annotations(input: &str) -> Vec<Span> {
51 parse_spans(input)
52}
53
54pub fn parse_inline_sif(input: &str) -> Result<Document> {
56 if !input.starts_with("sif::") {
57 return Err(err(ErrorKind::InvalidHeader, 0, "inline SIF must start with 'sif::'"));
58 }
59 let rest = &input[5..];
60 let parts: Vec<&str> = rest.split("::").collect();
61 let mut doc_str = String::from("#!sif v1\n");
63 for part in parts {
64 doc_str.push_str(part);
65 doc_str.push('\n');
66 }
67 parse(&doc_str)
68}
69
70fn parse_type(input: &str) -> Result<(Type, &str)> {
79 let (base, mut rest) = parse_base_type(input)?;
80
81 let mut ty = base;
83 loop {
84 if rest.starts_with("[]") {
85 ty = Type::Array(Box::new(ty));
86 rest = &rest[2..];
87 } else if rest.starts_with('?') {
88 ty = Type::Nullable(Box::new(ty));
89 rest = &rest[1..];
90 } else {
91 break;
92 }
93 }
94 Ok((ty, rest))
95}
96
97fn parse_base_type(input: &str) -> Result<(Type, &str)> {
98 let keywords: &[(&str, Type)] = &[
100 ("datetime", Type::DateTime),
101 ("duration", Type::Duration),
102 ("float", Type::Float),
103 ("bool", Type::Bool),
104 ("uint", Type::Uint),
105 ("int", Type::Int),
106 ("str", Type::Str),
107 ("date", Type::Date),
108 ("bytes", Type::Bytes),
109 ("null", Type::Null),
110 ("any", Type::Any),
111 ("map", Type::Map),
112 ];
113
114 for (kw, ty) in keywords {
115 if input.starts_with(kw) {
116 let after = &input[kw.len()..];
117 if after.is_empty()
119 || after.starts_with("[]")
120 || after.starts_with('?')
121 || after.starts_with(':')
122 || after.starts_with('|')
123 || after.starts_with(' ')
124 || after.starts_with(',')
125 || after.starts_with(')')
126 {
127 return Ok((ty.clone(), after));
128 }
129 }
130 }
131
132 if input.starts_with("enum(") {
134 let after_paren = &input[5..];
135 let close = after_paren
136 .find(')')
137 .ok_or_else(|| err(ErrorKind::InvalidType, 0, "unterminated enum type"))?;
138 let variants_str = &after_paren[..close];
139 let variants: Vec<String> = variants_str.split(',').map(|s| s.trim().to_string()).collect();
140 if variants.iter().any(|v| v.is_empty()) {
141 return Err(err(ErrorKind::InvalidType, 0, "empty enum variant"));
142 }
143 for v in &variants {
145 if !v.chars().all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-') {
146 return Err(err(
147 ErrorKind::InvalidType,
148 0,
149 format!("invalid enum variant: {:?}", v),
150 ));
151 }
152 }
153 return Ok((Type::Enum(variants), &after_paren[close + 1..]));
154 }
155
156 Err(err(
157 ErrorKind::InvalidType,
158 0,
159 format!("unrecognized type at: {:?}", &input[..input.len().min(20)]),
160 ))
161}
162
163pub(crate) fn parse_schema_str(input: &str, line_num: usize) -> Result<Schema> {
169 let input = input.trim();
170 if input.is_empty() {
171 return Err(err(ErrorKind::InvalidSchema, line_num, "empty schema"));
172 }
173
174 let mut fields = Vec::new();
175 let mut seen_names: HashMap<&str, usize> = HashMap::new();
176
177 let tokens = split_schema_fields(input);
180
181 for token in &tokens {
182 let field = parse_field_def(token, line_num)?;
183 if let Some(prev) = seen_names.get(field.name.as_str()) {
184 return Err(err(
185 ErrorKind::DuplicateField,
186 line_num,
187 format!(
188 "duplicate field name {:?} (first at position {})",
189 field.name,
190 prev + 1
191 ),
192 ));
193 }
194 seen_names.insert(unsafe {
195 std::mem::transmute::<&str, &str>(field.name.as_str())
198 }, fields.len());
199 fields.push(field);
200 }
201
202 Ok(Schema { fields })
203}
204
205fn split_schema_fields(input: &str) -> Vec<&str> {
208 let mut fields = Vec::new();
209 let mut start = 0;
210 let mut paren_depth = 0;
211 let mut in_quotes = false;
212
213 for (i, c) in input.char_indices() {
214 match c {
215 '(' if !in_quotes => paren_depth += 1,
216 ')' if !in_quotes => {
217 if paren_depth > 0 {
218 paren_depth -= 1;
219 }
220 }
221 '"' => in_quotes = !in_quotes,
222 ' ' if paren_depth == 0 && !in_quotes => {
223 let token = input[start..i].trim();
224 if !token.is_empty() {
225 fields.push(token);
226 }
227 start = i + 1;
228 }
229 _ => {}
230 }
231 }
232 let last = input[start..].trim();
233 if !last.is_empty() {
234 fields.push(last);
235 }
236 fields
237}
238
239fn parse_field_def(input: &str, line_num: usize) -> Result<FieldDef> {
244 let mut s = input;
245
246 let deprecated = if s.starts_with('∅') {
248 s = &s['∅'.len_utf8()..];
249 true
250 } else {
251 false
252 };
253
254 let (left, modifiers_str) = split_on_pipe(s);
256
257 let modifiers = if let Some(mods) = modifiers_str {
259 parse_modifiers(mods, line_num)?
260 } else {
261 Vec::new()
262 };
263
264 let parts = split_field_parts(left);
268
269 if parts.len() < 2 {
270 return Err(err(
271 ErrorKind::InvalidSchema,
272 line_num,
273 format!("field definition must have at least name:type, got {:?}", input),
274 ));
275 }
276
277 let name = parts[0].to_string();
278 if name.is_empty() || name.len() > 63 {
280 return Err(err(
281 ErrorKind::InvalidSchema,
282 line_num,
283 format!("field name must be 1-63 characters, got {:?}", name),
284 ));
285 }
286 if !name
287 .chars()
288 .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '.')
289 {
290 return Err(err(
291 ErrorKind::InvalidSchema,
292 line_num,
293 format!("invalid field name characters: {:?}", name),
294 ));
295 }
296
297 let (field_type, _) = parse_type(parts[1])
298 .map_err(|e| err(ErrorKind::InvalidType, line_num, e.message))?;
299
300 let semantic = if parts.len() >= 3 && !parts[2].is_empty() {
301 Some(parts[2].to_string())
302 } else {
303 None
304 };
305
306 Ok(FieldDef {
307 name,
308 field_type,
309 semantic,
310 deprecated,
311 modifiers,
312 })
313}
314
315fn split_on_pipe(input: &str) -> (&str, Option<&str>) {
317 let mut paren_depth = 0;
318 let mut in_quotes = false;
319 for (i, c) in input.char_indices() {
320 match c {
321 '(' if !in_quotes => paren_depth += 1,
322 ')' if !in_quotes => {
323 if paren_depth > 0 {
324 paren_depth -= 1;
325 }
326 }
327 '"' => in_quotes = !in_quotes,
328 '|' if paren_depth == 0 && !in_quotes => {
329 return (&input[..i], Some(&input[i + 1..]));
330 }
331 _ => {}
332 }
333 }
334 (input, None)
335}
336
337fn split_field_parts(input: &str) -> Vec<&str> {
345 let mut parts = Vec::new();
346 let mut paren_depth = 0;
347
348 let mut first_colon = None;
350 for (i, c) in input.char_indices() {
351 match c {
352 '(' => paren_depth += 1,
353 ')' => {
354 if paren_depth > 0 {
355 paren_depth -= 1;
356 }
357 }
358 ':' if paren_depth == 0 => {
359 first_colon = Some(i);
360 break;
361 }
362 _ => {}
363 }
364 }
365
366 let Some(fc) = first_colon else {
367 return vec![input];
368 };
369
370 parts.push(&input[..fc]);
371 let after_name = &input[fc + 1..];
372
373 paren_depth = 0;
376 let mut second_colon = None;
377 for (i, c) in after_name.char_indices() {
378 match c {
379 '(' => paren_depth += 1,
380 ')' => {
381 if paren_depth > 0 {
382 paren_depth -= 1;
383 }
384 }
385 ':' if paren_depth == 0 => {
386 second_colon = Some(i);
387 break;
388 }
389 _ => {}
390 }
391 }
392
393 if let Some(sc) = second_colon {
394 parts.push(&after_name[..sc]);
395 parts.push(&after_name[sc + 1..]);
396 } else {
397 parts.push(after_name);
398 }
399
400 parts
401}
402
403fn parse_modifiers(input: &str, line_num: usize) -> Result<Vec<Modifier>> {
405 let mut modifiers = Vec::new();
406 if input.is_empty() {
407 return Ok(modifiers);
408 }
409
410 let tokens = split_on_comma_unquoted(input);
412
413 for token in tokens {
414 let token = token.trim();
415 if token.is_empty() {
416 continue;
417 }
418 if let Some(eq_pos) = token.find('=') {
419 let name = token[..eq_pos].to_string();
420 let raw_value = &token[eq_pos + 1..];
421 let value = if raw_value.starts_with('"') {
422 parse_quoted_string_value(raw_value)
424 .map_err(|e| err(ErrorKind::InvalidSchema, line_num, e.message))?
425 } else {
426 raw_value.to_string()
427 };
428 modifiers.push(Modifier {
429 name,
430 value: Some(value),
431 });
432 } else {
433 modifiers.push(Modifier {
434 name: token.to_string(),
435 value: None,
436 });
437 }
438 }
439 Ok(modifiers)
440}
441
442fn split_on_comma_unquoted(input: &str) -> Vec<&str> {
443 let mut parts = Vec::new();
444 let mut start = 0;
445 let mut in_quotes = false;
446
447 for (i, c) in input.char_indices() {
448 match c {
449 '"' => in_quotes = !in_quotes,
450 ',' if !in_quotes => {
451 parts.push(&input[start..i]);
452 start = i + 1;
453 }
454 _ => {}
455 }
456 }
457 parts.push(&input[start..]);
458 parts
459}
460
461fn parse_value_typed(input: &str, ty: &Type, line_num: usize) -> Result<Value> {
465 if input == "_" {
470 return Ok(Value::Null);
471 }
472
473 if input.is_empty() {
475 return match ty {
476 Type::Str | Type::Any => Ok(Value::Str(String::new())),
477 Type::Nullable(_) => Ok(Value::Null),
478 _ => Err(err(
479 ErrorKind::TypeMismatch,
480 line_num,
481 format!("empty field for non-nullable type {}", ty),
482 )),
483 };
484 }
485
486 match ty {
487 Type::Bool => parse_bool(input, line_num),
488 Type::Int => parse_int(input, line_num),
489 Type::Uint => parse_uint(input, line_num),
490 Type::Float => parse_float(input, line_num),
491 Type::Str => Ok(Value::Str(parse_string_value(input)?)),
492 Type::Date => {
493 let s = parse_string_value(input)?;
494 validate_date(&s, line_num)?;
495 Ok(Value::Date(s))
496 }
497 Type::DateTime => {
498 let s = parse_string_value(input)?;
499 validate_datetime(&s, line_num)?;
500 Ok(Value::DateTime(s))
501 }
502 Type::Duration => {
503 let s = parse_string_value(input)?;
504 validate_duration(&s, line_num)?;
505 Ok(Value::Duration(s))
506 }
507 Type::Bytes => {
508 let s = parse_string_value(input)?;
509 let bytes = base64_decode(&s)
510 .map_err(|e| err(ErrorKind::TypeMismatch, line_num, e))?;
511 Ok(Value::Bytes(bytes))
512 }
513 Type::Enum(variants) => {
514 let s = parse_string_value(input)?;
515 if !variants.contains(&s) {
516 return Err(err(
517 ErrorKind::TypeMismatch,
518 line_num,
519 format!("value {:?} not in enum({})", s, variants.join(",")),
520 ));
521 }
522 Ok(Value::Enum(s))
523 }
524 Type::Null => {
525 Err(err(
527 ErrorKind::TypeMismatch,
528 line_num,
529 format!("expected null ('_'), got {:?}", input),
530 ))
531 }
532 Type::Any => Ok(parse_value_untyped(input)),
533 Type::Map => parse_map_value(input, line_num),
534 Type::Array(elem_ty) => parse_array_value(input, elem_ty, line_num),
535 Type::Nullable(inner) => parse_value_typed(input, inner, line_num),
536 }
537}
538
539fn parse_value_untyped(input: &str) -> Value {
541 if input == "_" {
543 return Value::Null;
544 }
545 if input == "T" {
547 return Value::Bool(true);
548 }
549 if input == "F" {
550 return Value::Bool(false);
551 }
552 if input.starts_with('[') && input.ends_with(']') {
554 if let Ok(v) = parse_array_value(input, &Type::Any, 0) {
555 return v;
556 }
557 }
558 if input.starts_with('{') && input.ends_with('}') {
560 if let Ok(v) = parse_map_value(input, 0) {
561 return v;
562 }
563 }
564 if is_int_literal(input) {
566 if let Ok(n) = input.parse::<i64>() {
567 return Value::Int(n);
568 }
569 }
570 if input.contains('.') {
572 if let Ok(n) = input.parse::<f64>() {
573 if n.is_finite() {
574 return Value::Float(n);
575 }
576 }
577 }
578 if input.starts_with('"') {
580 if let Ok(s) = parse_quoted_string_value(input) {
581 return Value::Str(s);
582 }
583 }
584 Value::Str(input.to_string())
585}
586
587fn is_int_literal(s: &str) -> bool {
588 let s = s.strip_prefix('-').unwrap_or(s);
589 !s.is_empty() && s.chars().all(|c| c.is_ascii_digit())
590}
591
592fn parse_bool(input: &str, line_num: usize) -> Result<Value> {
593 match input {
594 "T" | "true" | "TRUE" | "True" | "1" => Ok(Value::Bool(true)),
595 "F" | "false" | "FALSE" | "False" | "0" => Ok(Value::Bool(false)),
596 _ => Err(err(
597 ErrorKind::TypeMismatch,
598 line_num,
599 format!("invalid bool: {:?}", input),
600 )),
601 }
602}
603
604fn parse_int(input: &str, line_num: usize) -> Result<Value> {
605 input
606 .parse::<i64>()
607 .map(Value::Int)
608 .map_err(|_| err(ErrorKind::TypeMismatch, line_num, format!("invalid int: {:?}", input)))
609}
610
611fn parse_uint(input: &str, line_num: usize) -> Result<Value> {
612 if input.starts_with('-') {
613 return Err(err(
614 ErrorKind::TypeMismatch,
615 line_num,
616 format!("negative value for uint: {:?}", input),
617 ));
618 }
619 input
620 .parse::<u64>()
621 .map(Value::Uint)
622 .map_err(|_| err(ErrorKind::TypeMismatch, line_num, format!("invalid uint: {:?}", input)))
623}
624
625fn parse_float(input: &str, line_num: usize) -> Result<Value> {
626 let n: f64 = input
627 .parse()
628 .map_err(|_| err(ErrorKind::TypeMismatch, line_num, format!("invalid float: {:?}", input)))?;
629 if !n.is_finite() {
630 return Err(err(
631 ErrorKind::TypeMismatch,
632 line_num,
633 "NaN/Inf not allowed in SIF floats",
634 ));
635 }
636 Ok(Value::Float(n))
637}
638
639fn parse_string_value(input: &str) -> Result<String> {
643 if input.starts_with('"') {
644 parse_quoted_string_value(input)
645 } else {
646 Ok(input.to_string())
647 }
648}
649
650fn parse_quoted_string_value(input: &str) -> Result<String> {
652 if !input.starts_with('"') {
653 return Err(err(ErrorKind::InvalidString, 0, "expected opening quote"));
654 }
655
656 let inner = &input[1..];
657 let mut result = String::new();
658 let mut chars = inner.chars();
659 loop {
660 match chars.next() {
661 None => {
662 return Err(err(ErrorKind::InvalidString, 0, "unterminated quoted string"));
663 }
664 Some('"') => {
665 break;
668 }
669 Some('\\') => {
670 match chars.next() {
671 Some('n') => result.push('\n'),
672 Some('t') => result.push('\t'),
673 Some('\\') => result.push('\\'),
674 Some('"') => result.push('"'),
675 Some(c) => {
676 return Err(err(
677 ErrorKind::InvalidString,
678 0,
679 format!("invalid escape sequence: \\{}", c),
680 ));
681 }
682 None => {
683 return Err(err(
684 ErrorKind::InvalidString,
685 0,
686 "unterminated escape sequence",
687 ));
688 }
689 }
690 }
691 Some(c) => result.push(c),
692 }
693 }
694 Ok(result)
695}
696
697fn parse_array_value(input: &str, elem_ty: &Type, line_num: usize) -> Result<Value> {
700 if !input.starts_with('[') || !input.ends_with(']') {
701 return Err(err(
702 ErrorKind::InvalidArray,
703 line_num,
704 format!("expected array literal, got {:?}", input),
705 ));
706 }
707 let inner = &input[1..input.len() - 1];
708 if inner.is_empty() {
709 return Ok(Value::Array(Vec::new()));
710 }
711
712 let elements = split_array_elements(inner);
713 let mut values = Vec::with_capacity(elements.len());
714 for elem in &elements {
715 let v = parse_value_typed(elem, elem_ty, line_num)?;
716 values.push(v);
717 }
718 Ok(Value::Array(values))
719}
720
721fn split_array_elements(input: &str) -> Vec<&str> {
723 let mut elements = Vec::new();
724 let mut start = 0;
725 let mut bracket_depth = 0;
726 let mut brace_depth = 0;
727 let mut in_quotes = false;
728
729 for (i, c) in input.char_indices() {
730 match c {
731 '"' => in_quotes = !in_quotes,
732 '[' if !in_quotes => bracket_depth += 1,
733 ']' if !in_quotes => bracket_depth -= 1,
734 '{' if !in_quotes => brace_depth += 1,
735 '}' if !in_quotes => brace_depth -= 1,
736 ',' if !in_quotes && bracket_depth == 0 && brace_depth == 0 => {
737 elements.push(&input[start..i]);
738 start = i + 1;
739 }
740 _ => {}
741 }
742 }
743 elements.push(&input[start..]);
744 elements
745}
746
747fn parse_map_value(input: &str, line_num: usize) -> Result<Value> {
750 if !input.starts_with('{') || !input.ends_with('}') {
751 return Err(err(
752 ErrorKind::InvalidMap,
753 line_num,
754 format!("expected map literal, got {:?}", input),
755 ));
756 }
757 let inner = &input[1..input.len() - 1];
758 if inner.is_empty() {
759 return Ok(Value::Map(Vec::new()));
760 }
761
762 let entries_str = split_array_elements(inner); let mut entries = Vec::with_capacity(entries_str.len());
764 for entry in &entries_str {
765 let colon_pos = entry.find(':').ok_or_else(|| {
766 err(
767 ErrorKind::InvalidMap,
768 line_num,
769 format!("map entry missing ':' separator: {:?}", entry),
770 )
771 })?;
772 let key = &entry[..colon_pos];
773 if key.is_empty() || !key.chars().all(|c| c.is_ascii_alphanumeric() || c == '_') {
775 return Err(err(
776 ErrorKind::InvalidMap,
777 line_num,
778 format!("invalid map key: {:?}", key),
779 ));
780 }
781 let val_str = &entry[colon_pos + 1..];
782 let val = parse_value_untyped(val_str);
783 entries.push((key.to_string(), val));
784 }
785 Ok(Value::Map(entries))
786}
787
788fn validate_date(s: &str, line_num: usize) -> Result<()> {
791 if s.len() < 10 {
793 return Err(err(ErrorKind::TypeMismatch, line_num, format!("invalid date: {:?}", s)));
794 }
795 let bytes = s.as_bytes();
796 if bytes[4] != b'-' || bytes[7] != b'-' {
797 return Err(err(ErrorKind::TypeMismatch, line_num, format!("invalid date format: {:?}", s)));
798 }
799 for &i in &[0, 1, 2, 3, 5, 6, 8, 9] {
801 if !bytes[i].is_ascii_digit() {
802 return Err(err(ErrorKind::TypeMismatch, line_num, format!("invalid date: {:?}", s)));
803 }
804 }
805 Ok(())
806}
807
808fn validate_datetime(s: &str, line_num: usize) -> Result<()> {
809 if s.len() < 10 {
811 return Err(err(ErrorKind::TypeMismatch, line_num, format!("invalid datetime: {:?}", s)));
812 }
813 validate_date(&s[..10], line_num)?;
814 if s.len() > 10 && s.as_bytes()[10] != b'T' {
816 return Err(err(
817 ErrorKind::TypeMismatch,
818 line_num,
819 format!("invalid datetime separator (expected 'T'): {:?}", s),
820 ));
821 }
822 Ok(())
823}
824
825fn validate_duration(s: &str, line_num: usize) -> Result<()> {
826 if !s.starts_with('P') {
827 return Err(err(
828 ErrorKind::TypeMismatch,
829 line_num,
830 format!("duration must start with 'P': {:?}", s),
831 ));
832 }
833 Ok(())
834}
835
836fn base64_decode(input: &str) -> std::result::Result<Vec<u8>, String> {
839 fn char_val(c: u8) -> std::result::Result<u8, String> {
840 match c {
841 b'A'..=b'Z' => Ok(c - b'A'),
842 b'a'..=b'z' => Ok(c - b'a' + 26),
843 b'0'..=b'9' => Ok(c - b'0' + 52),
844 b'+' => Ok(62),
845 b'/' => Ok(63),
846 _ => Err(format!("invalid base64 character: {:?}", c as char)),
847 }
848 }
849
850 let input = input.trim_end_matches('=');
851 let bytes = input.as_bytes();
852 let mut out = Vec::with_capacity(bytes.len() * 3 / 4);
853
854 for chunk in bytes.chunks(4) {
855 let a = char_val(chunk[0])?;
856 let b = if chunk.len() > 1 { char_val(chunk[1])? } else { 0 };
857 let c = if chunk.len() > 2 { char_val(chunk[2])? } else { 0 };
858 let d = if chunk.len() > 3 { char_val(chunk[3])? } else { 0 };
859
860 let n = (a as u32) << 18 | (b as u32) << 12 | (c as u32) << 6 | d as u32;
861
862 out.push((n >> 16) as u8);
863 if chunk.len() > 2 {
864 out.push((n >> 8) as u8);
865 }
866 if chunk.len() > 3 {
867 out.push(n as u8);
868 }
869 }
870 Ok(out)
871}
872
873fn parse_header(line: &str, line_num: usize) -> Result<Header> {
876 let s = line.trim_end();
877 if !s.starts_with("#!sif ") {
878 return Err(err(ErrorKind::InvalidHeader, line_num, "header must start with '#!sif '"));
879 }
880 let rest = &s[6..]; let tokens = tokenize_header(rest);
884 if tokens.is_empty() {
885 return Err(err(ErrorKind::InvalidHeader, line_num, "missing version"));
886 }
887
888 let version_str = &tokens[0];
890 if !version_str.starts_with('v') {
891 return Err(err(
892 ErrorKind::InvalidHeader,
893 line_num,
894 format!("expected version like 'v1', got {:?}", version_str),
895 ));
896 }
897 let version: u32 = version_str[1..]
898 .parse()
899 .map_err(|_| err(ErrorKind::InvalidHeader, line_num, "invalid version number"))?;
900
901 if version != 1 {
902 return Err(err(
903 ErrorKind::UnsupportedVersion,
904 line_num,
905 format!("unsupported SIF version {}, only v1 is supported", version),
906 ));
907 }
908
909 let mut attributes = HashMap::new();
911 for token in &tokens[1..] {
912 if let Some(eq_pos) = token.find('=') {
913 let key = &token[..eq_pos];
914 let raw_val = &token[eq_pos + 1..];
915 let val = if raw_val.starts_with('"') {
916 parse_quoted_string_value(raw_val)
917 .map_err(|e| err(ErrorKind::InvalidHeader, line_num, e.message))?
918 } else {
919 raw_val.to_string()
920 };
921 attributes.insert(key.to_string(), val);
922 } else {
923 return Err(err(
924 ErrorKind::InvalidHeader,
925 line_num,
926 format!("invalid header attribute (missing '='): {:?}", token),
927 ));
928 }
929 }
930
931 Ok(Header { version, attributes })
932}
933
934fn tokenize_header(input: &str) -> Vec<&str> {
936 let mut tokens = Vec::new();
937 let mut start = 0;
938 let mut in_quotes = false;
939 let bytes = input.as_bytes();
940
941 let mut i = 0;
942 while i < bytes.len() {
943 match bytes[i] {
944 b'"' => in_quotes = !in_quotes,
945 b' ' if !in_quotes => {
946 let tok = &input[start..i];
947 if !tok.is_empty() {
948 tokens.push(tok);
949 }
950 start = i + 1;
951 }
952 _ => {}
953 }
954 i += 1;
955 }
956 let last = &input[start..];
957 if !last.is_empty() {
958 tokens.push(last);
959 }
960 tokens
961}
962
963fn parse_directive(line: &str, line_num: usize) -> Result<Option<Directive>> {
966 let s = line.trim_end();
967
968 if s == "#recall schema" {
969 return Ok(Some(Directive::Recall));
970 }
971
972 if let Some(rest) = s.strip_prefix("#context ") {
973 return Ok(Some(Directive::Context(rest.to_string())));
974 }
975 if let Some(rest) = s.strip_prefix("#source ") {
976 return Ok(Some(Directive::Source(rest.to_string())));
977 }
978 if let Some(rest) = s.strip_prefix("#license ") {
979 return Ok(Some(Directive::License(rest.to_string())));
980 }
981 if let Some(rest) = s.strip_prefix("#error ") {
982 return Ok(Some(Directive::Error(rest.to_string())));
983 }
984 if let Some(rest) = s.strip_prefix("#filter ") {
985 return Ok(Some(Directive::Filter(rest.to_string())));
986 }
987 if let Some(rest) = s.strip_prefix("#sort ") {
988 let parts: Vec<&str> = rest.splitn(2, ' ').collect();
989 let field = parts[0].to_string();
990 let direction = if parts.len() > 1 {
991 match parts[1] {
992 "asc" => SortDirection::Asc,
993 "desc" => SortDirection::Desc,
994 _ => {
995 return Err(err(
996 ErrorKind::InvalidDirective,
997 line_num,
998 format!("invalid sort direction: {:?}", parts[1]),
999 ));
1000 }
1001 }
1002 } else {
1003 SortDirection::Asc
1004 };
1005 return Ok(Some(Directive::Sort { field, direction }));
1006 }
1007 if let Some(rest) = s.strip_prefix("#limit ") {
1008 let n: u64 = rest
1009 .trim()
1010 .parse()
1011 .map_err(|_| err(ErrorKind::InvalidDirective, line_num, "invalid limit value"))?;
1012 return Ok(Some(Directive::Limit(n)));
1013 }
1014 if let Some(rest) = s.strip_prefix("#truncated") {
1015 let attrs_str = rest.trim();
1016 let attrs = parse_header_attrs(attrs_str, line_num)?;
1017 return Ok(Some(Directive::Truncated(attrs)));
1018 }
1019 if let Some(rest) = s.strip_prefix("#relation ") {
1020 return parse_relation(rest, line_num).map(Some);
1021 }
1022
1023 if s.starts_with('#') {
1025 let name_end = s[1..]
1026 .find(' ')
1027 .map(|i| i + 1)
1028 .unwrap_or(s.len());
1029 let name = &s[1..name_end];
1030 let content = if name_end < s.len() {
1031 s[name_end + 1..].to_string()
1032 } else {
1033 String::new()
1034 };
1035 return Ok(Some(Directive::Unknown {
1036 name: name.to_string(),
1037 content,
1038 }));
1039 }
1040
1041 Ok(None)
1042}
1043
1044fn parse_header_attrs(input: &str, line_num: usize) -> Result<Vec<(String, String)>> {
1045 let mut attrs = Vec::new();
1046 if input.is_empty() {
1047 return Ok(attrs);
1048 }
1049 let tokens = tokenize_header(input);
1050 for token in tokens {
1051 if let Some(eq_pos) = token.find('=') {
1052 let key = token[..eq_pos].to_string();
1053 let raw_val = &token[eq_pos + 1..];
1054 let val = if raw_val.starts_with('"') {
1055 parse_quoted_string_value(raw_val)
1056 .map_err(|e| err(ErrorKind::InvalidDirective, line_num, e.message))?
1057 } else {
1058 raw_val.to_string()
1059 };
1060 attrs.push((key, val));
1061 }
1062 }
1063 Ok(attrs)
1064}
1065
1066fn parse_relation(input: &str, line_num: usize) -> Result<Directive> {
1067 let parts: Vec<&str> = input.splitn(3, " -> ").collect();
1069 if parts.len() != 2 {
1070 let arrow = input.find("->").ok_or_else(|| {
1072 err(
1073 ErrorKind::InvalidDirective,
1074 line_num,
1075 "relation directive missing '->'",
1076 )
1077 })?;
1078 let from_str = input[..arrow].trim();
1079 let to_str = input[arrow + 2..].trim();
1080 return Ok(Directive::Relation {
1081 from: parse_field_ref(from_str),
1082 to: parse_field_ref(to_str),
1083 });
1084 }
1085 Ok(Directive::Relation {
1086 from: parse_field_ref(parts[0].trim()),
1087 to: parse_field_ref(parts[1].trim()),
1088 })
1089}
1090
1091fn parse_field_ref(input: &str) -> FieldRef {
1092 if input.starts_with('§') {
1093 let rest = &input['§'.len_utf8()..];
1094 if let Some(dot_pos) = rest.find('.') {
1095 return FieldRef {
1096 section: Some(rest[..dot_pos].to_string()),
1097 field: rest[dot_pos + 1..].to_string(),
1098 };
1099 }
1100 }
1101 FieldRef {
1102 section: None,
1103 field: input.to_string(),
1104 }
1105}
1106
1107fn parse_block_start(line: &str, line_num: usize) -> Result<(BlockType, Vec<(String, String)>)> {
1110 let s = line.trim_end();
1111 let rest = s
1112 .strip_prefix("#block ")
1113 .ok_or_else(|| err(ErrorKind::InvalidBlock, line_num, "expected '#block <type>'"))?;
1114
1115 let tokens = tokenize_header(rest);
1116 if tokens.is_empty() {
1117 return Err(err(ErrorKind::InvalidBlock, line_num, "missing block type"));
1118 }
1119
1120 let block_type = match tokens[0] {
1121 "code" => BlockType::Code,
1122 "text" => BlockType::Text,
1123 "diff" => BlockType::Diff,
1124 "raw" => BlockType::Raw,
1125 "template" => BlockType::Template,
1126 other => {
1127 return Err(err(
1128 ErrorKind::InvalidBlock,
1129 line_num,
1130 format!("unknown block type: {:?}", other),
1131 ));
1132 }
1133 };
1134
1135 let mut attrs = Vec::new();
1136 for token in &tokens[1..] {
1137 if let Some(eq_pos) = token.find('=') {
1138 let key = token[..eq_pos].to_string();
1139 let raw_val = &token[eq_pos + 1..];
1140 let val = if raw_val.starts_with('"') {
1141 parse_quoted_string_value(raw_val)
1142 .map_err(|e| err(ErrorKind::InvalidBlock, line_num, e.message))?
1143 } else {
1144 raw_val.to_string()
1145 };
1146 attrs.push((key, val));
1147 }
1148 }
1149
1150 Ok((block_type, attrs))
1151}
1152
1153fn parse_spans(input: &str) -> Vec<Span> {
1156 let mut spans = Vec::new();
1157 let mut pos = 0;
1158 let bytes = input.as_bytes();
1159 let len = bytes.len();
1160
1161 while pos < len {
1162 if bytes[pos] == b'@' && pos + 1 < len {
1164 let sem_start = pos + 1;
1166 let mut brace_pos = None;
1167 let mut j = sem_start;
1168 while j < len {
1169 if bytes[j] == b'{' {
1170 brace_pos = Some(j);
1171 break;
1172 }
1173 if !bytes[j].is_ascii_alphanumeric() && bytes[j] != b'_' {
1174 break;
1175 }
1176 j += 1;
1177 }
1178
1179 if let Some(bp) = brace_pos {
1180 if bp > sem_start {
1181 if pos > spans_text_end(&spans, input) {
1183 let text_start = spans_text_end(&spans, input);
1184 if text_start < pos {
1185 }
1187 }
1188 let semantic = &input[sem_start..bp];
1190 let content_start = bp + 1;
1191 if let Some(content_end) = find_matching_brace(input, content_start) {
1193 let content = &input[content_start..content_end];
1194 let children = parse_spans(content);
1195 spans.push(Span::Annotated {
1196 semantic: semantic.to_string(),
1197 children,
1198 });
1199 pos = content_end + 1;
1200 continue;
1201 }
1202 }
1203 }
1204 }
1205
1206 let text_start = pos;
1208 while pos < len {
1209 if bytes[pos] == b'@' && pos + 1 < len && bytes[pos + 1].is_ascii_alphabetic() {
1210 let mut k = pos + 1;
1212 while k < len && (bytes[k].is_ascii_alphanumeric() || bytes[k] == b'_') {
1213 k += 1;
1214 }
1215 if k < len && bytes[k] == b'{' {
1216 break;
1217 }
1218 }
1219 pos += 1;
1220 }
1221 if pos > text_start {
1222 spans.push(Span::Text(input[text_start..pos].to_string()));
1223 }
1224 }
1225
1226 spans
1227}
1228
1229fn spans_text_end(_spans: &[Span], _input: &str) -> usize {
1230 0
1231}
1232
1233fn find_matching_brace(input: &str, start: usize) -> Option<usize> {
1234 let bytes = input.as_bytes();
1235 let mut depth = 1;
1236 let mut i = start;
1237 while i < bytes.len() {
1238 match bytes[i] {
1239 b'{' => depth += 1,
1240 b'}' => {
1241 depth -= 1;
1242 if depth == 0 {
1243 return Some(i);
1244 }
1245 }
1246 _ => {}
1247 }
1248 i += 1;
1249 }
1250 None
1251}
1252
1253fn parse_record_line(
1256 line: &str,
1257 schema: &Schema,
1258 line_num: usize,
1259) -> Result<Record> {
1260 let s = line.trim_end_matches('\n').trim_end_matches('\r');
1261
1262 let (cdc_op, data) = if s.starts_with('Δ') {
1264 (CdcOp::Update, &s['Δ'.len_utf8()..])
1265 } else if s.starts_with('∅') {
1266 (CdcOp::Delete, &s['∅'.len_utf8()..])
1267 } else {
1268 (CdcOp::Insert, s)
1269 };
1270
1271 let raw_fields: Vec<&str> = data.split('\t').collect();
1273 let field_count = schema.field_count();
1274
1275 let mut values = Vec::with_capacity(field_count);
1276
1277 if cdc_op == CdcOp::Delete {
1280 let id_fields: Vec<usize> = schema
1281 .fields
1282 .iter()
1283 .enumerate()
1284 .filter(|(_, f)| f.semantic.as_deref() == Some("id"))
1285 .map(|(i, _)| i)
1286 .collect();
1287
1288 let mut id_idx = 0;
1289 for (i, field_def) in schema.fields.iter().enumerate() {
1290 if id_fields.contains(&i) {
1291 let raw = if id_idx < raw_fields.len() {
1292 raw_fields[id_idx]
1293 } else {
1294 ""
1295 };
1296 id_idx += 1;
1297 let value = parse_value_typed(raw, &field_def.field_type, line_num)?;
1298 values.push(value);
1299 } else {
1300 values.push(Value::Null);
1301 }
1302 }
1303 } else {
1304 for (i, field_def) in schema.fields.iter().enumerate() {
1305 let raw = if i < raw_fields.len() {
1306 raw_fields[i]
1307 } else {
1308 ""
1310 };
1311
1312 let value = parse_value_typed(raw, &field_def.field_type, line_num)?;
1313 values.push(value);
1314 }
1315 }
1316
1317 Ok(Record { values, cdc_op })
1321}
1322
1323struct DocumentParser<'a> {
1326 lines: Vec<&'a str>,
1327 pos: usize,
1328}
1329
1330impl<'a> DocumentParser<'a> {
1331 fn new(input: &'a str) -> Self {
1332 let lines: Vec<&str> = input.lines().collect();
1333 Self { lines, pos: 0 }
1334 }
1335
1336 fn line_num(&self) -> usize {
1337 self.pos + 1
1338 }
1339
1340 fn peek(&self) -> Option<&'a str> {
1341 self.lines.get(self.pos).copied()
1342 }
1343
1344 fn advance(&mut self) -> Option<&'a str> {
1345 let line = self.lines.get(self.pos).copied();
1346 if line.is_some() {
1347 self.pos += 1;
1348 }
1349 line
1350 }
1351
1352 fn parse(&mut self) -> Result<Document> {
1353 if let Some(first) = self.peek() {
1355 if first.starts_with('\u{FEFF}') {
1356 let stripped = &first[3..]; self.lines[0] = stripped;
1360 }
1361 }
1362
1363 let header_line = self
1365 .advance()
1366 .ok_or_else(|| err(ErrorKind::UnexpectedEof, 1, "empty document"))?;
1367 let header = parse_header(header_line, 1)?;
1368
1369 let mut sections = Vec::new();
1371 let mut current = self.new_section();
1372
1373 while let Some(line) = self.peek() {
1374 let trimmed = line.trim_end();
1375
1376 if trimmed.is_empty() {
1378 self.advance();
1379 continue;
1380 }
1381
1382 if trimmed.starts_with("#!") {
1384 self.advance();
1385 continue;
1386 }
1387
1388 if trimmed == "---" {
1390 self.advance();
1391 sections.push(current);
1392 current = self.new_section();
1393 continue;
1394 }
1395
1396 if trimmed.starts_with('§') {
1398 self.advance();
1399 let id = &trimmed['§'.len_utf8()..];
1400 current.id = Some(id.to_string());
1401 continue;
1402 }
1403
1404 if trimmed.starts_with("#block ") {
1406 let block = self.parse_block()?;
1407 current.blocks.push(block);
1408 continue;
1409 }
1410
1411 if trimmed.starts_with("#template ") {
1413 let template = self.parse_template()?;
1414 current.templates.push(template);
1415 continue;
1416 }
1417
1418 if trimmed == "#/block" {
1420 return Err(err(
1421 ErrorKind::InvalidBlock,
1422 self.line_num(),
1423 "unexpected #/block outside block",
1424 ));
1425 }
1426
1427 if trimmed.starts_with("#schema ") {
1429 self.advance();
1430 let schema_body = &trimmed[8..];
1431 let schema = parse_schema_str(schema_body, self.line_num() - 1)?;
1432 current.schema = Some(schema);
1433 continue;
1434 }
1435
1436 if trimmed == "#recall schema" {
1438 self.advance();
1439 current.directives.push(Directive::Recall);
1440 continue;
1441 }
1442
1443 if trimmed.starts_with('#') {
1445 self.advance();
1446 if let Some(directive) = parse_directive(trimmed, self.line_num() - 1)? {
1447 current.directives.push(directive);
1448 }
1449 continue;
1450 }
1451
1452 if let Some(ref schema) = current.schema {
1454 self.advance();
1455 let schema_clone = schema.clone();
1456 let record = parse_record_line(trimmed, &schema_clone, self.line_num() - 1)?;
1457 current.records.push(record);
1458 } else {
1459 return Err(err(
1461 ErrorKind::RecordWithoutSchema,
1462 self.line_num(),
1463 "record found before any #schema directive in this section",
1464 ));
1465 }
1466 }
1467
1468 sections.push(current);
1469 Ok(Document { header, sections })
1470 }
1471
1472 fn new_section(&self) -> Section {
1473 Section {
1474 id: None,
1475 directives: Vec::new(),
1476 schema: None,
1477 records: Vec::new(),
1478 blocks: Vec::new(),
1479 templates: Vec::new(),
1480 }
1481 }
1482
1483 fn parse_block(&mut self) -> Result<Block> {
1484 let start_line = self.line_num();
1485 let start = self
1486 .advance()
1487 .ok_or_else(|| err(ErrorKind::UnexpectedEof, start_line, "expected block start"))?;
1488 let (block_type, attributes) = parse_block_start(start, start_line)?;
1489
1490 let mut content = String::new();
1491 loop {
1492 let line = self
1493 .advance()
1494 .ok_or_else(|| err(ErrorKind::InvalidBlock, start_line, "unterminated block"))?;
1495 if line.trim_end() == "#/block" {
1496 break;
1497 }
1498 if !content.is_empty() {
1499 content.push('\n');
1500 }
1501 content.push_str(line);
1502 }
1503
1504 Ok(Block {
1505 block_type,
1506 attributes,
1507 content,
1508 })
1509 }
1510
1511 fn parse_template(&mut self) -> Result<Template> {
1512 let start_line = self.line_num();
1513 let start = self
1514 .advance()
1515 .ok_or_else(|| err(ErrorKind::UnexpectedEof, start_line, "expected template start"))?;
1516
1517 let name = start
1518 .trim_end()
1519 .strip_prefix("#template ")
1520 .ok_or_else(|| err(ErrorKind::InvalidTemplate, start_line, "expected '#template <name>'"))?
1521 .trim()
1522 .to_string();
1523
1524 if name.is_empty() {
1525 return Err(err(
1526 ErrorKind::InvalidTemplate,
1527 start_line,
1528 "template name cannot be empty",
1529 ));
1530 }
1531
1532 let mut body = String::new();
1533 loop {
1534 let line = self.advance().ok_or_else(|| {
1535 err(
1536 ErrorKind::InvalidTemplate,
1537 start_line,
1538 "unterminated template",
1539 )
1540 })?;
1541 if line.trim_end() == "#/template" {
1542 break;
1543 }
1544 if !body.is_empty() {
1545 body.push('\n');
1546 }
1547 body.push_str(line);
1548 }
1549
1550 Ok(Template { name, body })
1551 }
1552}
1553
1554pub(crate) fn parse_header_public(line: &str, line_num: usize) -> Result<Header> {
1557 parse_header(line, line_num)
1558}
1559
1560pub(crate) fn parse_directive_public(
1561 line: &str,
1562 line_num: usize,
1563) -> Result<Option<Directive>> {
1564 parse_directive(line, line_num)
1565}
1566
1567pub(crate) fn parse_record_public(
1568 line: &str,
1569 schema: &Schema,
1570 line_num: usize,
1571) -> Result<Record> {
1572 parse_record_line(line, schema, line_num)
1573}
1574
1575#[cfg(test)]
1578mod tests {
1579 use super::*;
1580
1581 #[test]
1584 fn test_minimal_header() {
1585 let doc = parse("#!sif v1\n").unwrap();
1586 assert_eq!(doc.header.version, 1);
1587 assert!(doc.header.attributes.is_empty());
1588 }
1589
1590 #[test]
1591 fn test_header_with_attrs() {
1592 let doc = parse("#!sif v1 origin=sif-cli/1.0.0 created=2026-03-08T06:50:51Z\n").unwrap();
1593 assert_eq!(doc.header.attributes["origin"], "sif-cli/1.0.0");
1594 assert_eq!(doc.header.attributes["created"], "2026-03-08T06:50:51Z");
1595 }
1596
1597 #[test]
1598 fn test_header_quoted_attr() {
1599 let doc = parse("#!sif v1 context=\"GitHub issue export for serde-rs/serde\"\n").unwrap();
1600 assert_eq!(
1601 doc.header.attributes["context"],
1602 "GitHub issue export for serde-rs/serde"
1603 );
1604 }
1605
1606 #[test]
1607 fn test_unsupported_version() {
1608 assert!(parse("#!sif v2\n").is_err());
1609 }
1610
1611 #[test]
1614 fn test_scalar_types() {
1615 assert_eq!(parse_type_str("bool").unwrap(), Type::Bool);
1616 assert_eq!(parse_type_str("int").unwrap(), Type::Int);
1617 assert_eq!(parse_type_str("uint").unwrap(), Type::Uint);
1618 assert_eq!(parse_type_str("float").unwrap(), Type::Float);
1619 assert_eq!(parse_type_str("str").unwrap(), Type::Str);
1620 assert_eq!(parse_type_str("date").unwrap(), Type::Date);
1621 assert_eq!(parse_type_str("datetime").unwrap(), Type::DateTime);
1622 assert_eq!(parse_type_str("duration").unwrap(), Type::Duration);
1623 assert_eq!(parse_type_str("bytes").unwrap(), Type::Bytes);
1624 assert_eq!(parse_type_str("null").unwrap(), Type::Null);
1625 assert_eq!(parse_type_str("any").unwrap(), Type::Any);
1626 assert_eq!(parse_type_str("map").unwrap(), Type::Map);
1627 }
1628
1629 #[test]
1630 fn test_compound_types() {
1631 assert_eq!(
1632 parse_type_str("str[]").unwrap(),
1633 Type::Array(Box::new(Type::Str))
1634 );
1635 assert_eq!(
1636 parse_type_str("int?").unwrap(),
1637 Type::Nullable(Box::new(Type::Int))
1638 );
1639 assert_eq!(
1641 parse_type_str("str[]?").unwrap(),
1642 Type::Nullable(Box::new(Type::Array(Box::new(Type::Str))))
1643 );
1644 assert_eq!(
1646 parse_type_str("int?[]").unwrap(),
1647 Type::Array(Box::new(Type::Nullable(Box::new(Type::Int))))
1648 );
1649 }
1650
1651 #[test]
1652 fn test_enum_type() {
1653 assert_eq!(
1654 parse_type_str("enum(open,closed,merged)").unwrap(),
1655 Type::Enum(vec![
1656 "open".to_string(),
1657 "closed".to_string(),
1658 "merged".to_string()
1659 ])
1660 );
1661 }
1662
1663 #[test]
1666 fn test_basic_schema() {
1667 let s = parse_schema("id:uint:id title:str status:enum(open,closed)").unwrap();
1668 assert_eq!(s.fields.len(), 3);
1669 assert_eq!(s.fields[0].name, "id");
1670 assert_eq!(s.fields[0].field_type, Type::Uint);
1671 assert_eq!(s.fields[0].semantic.as_deref(), Some("id"));
1672 assert_eq!(s.fields[1].name, "title");
1673 assert_eq!(s.fields[1].field_type, Type::Str);
1674 assert!(s.fields[1].semantic.is_none());
1675 assert_eq!(s.fields[2].name, "status");
1676 assert_eq!(
1677 s.fields[2].field_type,
1678 Type::Enum(vec!["open".to_string(), "closed".to_string()])
1679 );
1680 }
1681
1682 #[test]
1683 fn test_deprecated_field() {
1684 let s = parse_schema("id:uint:id ∅old_email:str email:str:email").unwrap();
1685 assert!(!s.fields[0].deprecated);
1686 assert!(s.fields[1].deprecated);
1687 assert_eq!(s.fields[1].name, "old_email");
1688 assert!(!s.fields[2].deprecated);
1689 }
1690
1691 #[test]
1692 fn test_field_modifiers() {
1693 let s =
1694 parse_schema("stock:uint:metric|agg=sum,align=right price:float|unit=usd,fmt=currency")
1695 .unwrap();
1696 assert_eq!(s.fields[0].modifiers.len(), 2);
1697 assert_eq!(s.fields[0].modifiers[0].name, "agg");
1698 assert_eq!(s.fields[0].modifiers[0].value.as_deref(), Some("sum"));
1699 assert_eq!(s.fields[0].modifiers[1].name, "align");
1700 assert_eq!(s.fields[0].modifiers[1].value.as_deref(), Some("right"));
1701 }
1702
1703 #[test]
1706 fn test_untyped_disambiguation() {
1707 assert_eq!(parse_untyped_value("_"), Value::Null);
1708 assert_eq!(parse_untyped_value("T"), Value::Bool(true));
1709 assert_eq!(parse_untyped_value("F"), Value::Bool(false));
1710 assert_eq!(parse_untyped_value("42"), Value::Int(42));
1711 assert_eq!(parse_untyped_value("-7"), Value::Int(-7));
1712 assert_eq!(parse_untyped_value("3.14"), Value::Float(3.14));
1713 assert_eq!(
1714 parse_untyped_value("hello"),
1715 Value::Str("hello".to_string())
1716 );
1717 assert_eq!(
1719 parse_untyped_value("true"),
1720 Value::Str("true".to_string())
1721 );
1722 }
1723
1724 #[test]
1725 fn test_typed_values() {
1726 assert_eq!(
1727 parse_typed_value("T", &Type::Bool).unwrap(),
1728 Value::Bool(true)
1729 );
1730 assert_eq!(
1731 parse_typed_value("true", &Type::Bool).unwrap(),
1732 Value::Bool(true)
1733 );
1734 assert_eq!(
1735 parse_typed_value("42", &Type::Int).unwrap(),
1736 Value::Int(42)
1737 );
1738 assert_eq!(
1739 parse_typed_value("42", &Type::Uint).unwrap(),
1740 Value::Uint(42)
1741 );
1742 assert!(parse_typed_value("-1", &Type::Uint).is_err());
1743 assert_eq!(
1744 parse_typed_value("3.14", &Type::Float).unwrap(),
1745 Value::Float(3.14)
1746 );
1747 }
1748
1749 #[test]
1750 fn test_nullable() {
1751 assert_eq!(
1752 parse_typed_value("_", &Type::Nullable(Box::new(Type::Int))).unwrap(),
1753 Value::Null
1754 );
1755 assert_eq!(
1756 parse_typed_value("42", &Type::Nullable(Box::new(Type::Int))).unwrap(),
1757 Value::Int(42)
1758 );
1759 assert_eq!(parse_typed_value("_", &Type::Int).unwrap(), Value::Null);
1761 }
1762
1763 #[test]
1764 fn test_enum_validation() {
1765 let ty = Type::Enum(vec!["open".to_string(), "closed".to_string()]);
1766 assert_eq!(
1767 parse_typed_value("open", &ty).unwrap(),
1768 Value::Enum("open".to_string())
1769 );
1770 assert!(parse_typed_value("invalid", &ty).is_err());
1771 }
1772
1773 #[test]
1774 fn test_array_value() {
1775 assert_eq!(
1776 parse_typed_value("[1,2,3]", &Type::Array(Box::new(Type::Int))).unwrap(),
1777 Value::Array(vec![Value::Int(1), Value::Int(2), Value::Int(3)])
1778 );
1779 assert_eq!(
1780 parse_typed_value("[]", &Type::Array(Box::new(Type::Str))).unwrap(),
1781 Value::Array(Vec::new())
1782 );
1783 }
1784
1785 #[test]
1786 fn test_map_value() {
1787 assert_eq!(
1788 parse_typed_value("{name:alice,age:30}", &Type::Map).unwrap(),
1789 Value::Map(vec![
1790 ("name".to_string(), Value::Str("alice".to_string())),
1791 ("age".to_string(), Value::Int(30)),
1792 ])
1793 );
1794 }
1795
1796 #[test]
1797 fn test_string_escaping() {
1798 assert_eq!(
1799 parse_typed_value(r#""has a \t tab""#, &Type::Str).unwrap(),
1800 Value::Str("has a \t tab".to_string())
1801 );
1802 assert_eq!(
1803 parse_typed_value(r#""line\nbreak""#, &Type::Str).unwrap(),
1804 Value::Str("line\nbreak".to_string())
1805 );
1806 assert_eq!(
1807 parse_typed_value(r#""she said \"hello\"""#, &Type::Str).unwrap(),
1808 Value::Str("she said \"hello\"".to_string())
1809 );
1810 }
1811
1812 #[test]
1815 fn test_typical_document() {
1816 let input = "\
1817#!sif v1
1818#context Repository issues
1819#schema id:uint:id title:str status:enum(open,closed) created:datetime
18201\tFix flatten in tagged enums\topen\t2026-01-15T10:30:00Z
18212\tCow borrows owned\tclosed\t2026-01-16T08:00:00Z
1822";
1823 let doc = parse(input).unwrap();
1824 assert_eq!(doc.sections.len(), 1);
1825 let sec = &doc.sections[0];
1826 assert_eq!(sec.records.len(), 2);
1827 assert_eq!(sec.records[0].values[0], Value::Uint(1));
1828 assert_eq!(
1829 sec.records[0].values[1],
1830 Value::Str("Fix flatten in tagged enums".to_string())
1831 );
1832 assert_eq!(
1833 sec.records[0].values[2],
1834 Value::Enum("open".to_string())
1835 );
1836 }
1837
1838 #[test]
1839 fn test_multi_section() {
1840 let input = "\
1841#!sif v1
1842#context Repos
1843#schema name:str stars:uint
1844serde\t8947
1845---
1846#context Issues
1847#schema id:uint:id title:str
18481\tFlatten bug
1849";
1850 let doc = parse(input).unwrap();
1851 assert_eq!(doc.sections.len(), 2);
1852 assert_eq!(doc.sections[0].records.len(), 1);
1853 assert_eq!(doc.sections[1].records.len(), 1);
1854 }
1855
1856 #[test]
1857 fn test_section_identifiers() {
1858 let input = "\
1859#!sif v1
1860§repos
1861#schema name:str stars:uint
1862serde\t8947
1863---
1864§issues
1865#schema id:uint:id title:str
18661\tFlatten bug
1867";
1868 let doc = parse(input).unwrap();
1869 assert_eq!(doc.sections[0].id.as_deref(), Some("repos"));
1870 assert_eq!(doc.sections[1].id.as_deref(), Some("issues"));
1871 assert!(doc.section_by_id("repos").is_some());
1872 }
1873
1874 #[test]
1875 fn test_blocks() {
1876 let input = "\
1877#!sif v1
1878#block code language=rust
1879fn main() {
1880 println!(\"hello\");
1881}
1882#/block
1883";
1884 let doc = parse(input).unwrap();
1885 assert_eq!(doc.sections[0].blocks.len(), 1);
1886 let block = &doc.sections[0].blocks[0];
1887 assert_eq!(block.block_type, BlockType::Code);
1888 assert_eq!(block.attributes, vec![("language".to_string(), "rust".to_string())]);
1889 assert!(block.content.contains("fn main()"));
1890 }
1891
1892 #[test]
1893 fn test_templates() {
1894 let input = "\
1895#!sif v1
1896#schema id:uint name:str
1897#template greeting
1898Hello, @{name}! Your ID is @{id}.
1899#/template
19001\talice
1901";
1902 let doc = parse(input).unwrap();
1903 assert_eq!(doc.sections[0].templates.len(), 1);
1904 let tmpl = &doc.sections[0].templates[0];
1905 assert_eq!(tmpl.name, "greeting");
1906
1907 let schema = doc.sections[0].schema.as_ref().unwrap();
1908 let record = &doc.sections[0].records[0];
1909 let rendered = tmpl.render(record, schema);
1910 assert_eq!(rendered, "Hello, alice! Your ID is 1.");
1911 }
1912
1913 #[test]
1914 fn test_recall_is_noop() {
1915 let input = "\
1916#!sif v1
1917#schema id:uint name:str
19181\talice
1919#recall schema
19202\tbob
1921";
1922 let doc = parse(input).unwrap();
1923 assert_eq!(doc.sections[0].records.len(), 2);
1924 }
1925
1926 #[test]
1927 fn test_inline_annotations() {
1928 let spans = parse_inline_annotations(
1929 "Error in @path{src/main.rs} at @line_number{42}: @error{expected usize}",
1930 );
1931 assert_eq!(spans.len(), 6);
1933 match &spans[1] {
1934 Span::Annotated { semantic, children } => {
1935 assert_eq!(semantic, "path");
1936 assert_eq!(children.len(), 1);
1937 }
1938 _ => panic!("expected annotated span"),
1939 }
1940 }
1941
1942 #[test]
1943 fn test_inline_sif() {
1944 let doc =
1945 parse_inline_sif("sif::#schema id:uint name:str::1\talice::2\tbob").unwrap();
1946 assert_eq!(doc.sections[0].records.len(), 2);
1947 }
1948
1949 #[test]
1950 fn test_cdc_prefixes() {
1951 let schema = parse_schema("id:uint:id name:str").unwrap();
1952 let rec = parse_record_line("Δ1\tupdated", &schema, 1).unwrap();
1953 assert_eq!(rec.cdc_op, CdcOp::Update);
1954 assert_eq!(rec.values[0], Value::Uint(1));
1955
1956 let rec = parse_record_line("∅2\tdeleted", &schema, 1).unwrap();
1957 assert_eq!(rec.cdc_op, CdcOp::Delete);
1958 }
1959
1960 #[test]
1961 fn test_relation_directive() {
1962 let input = "\
1963#!sif v1
1964#schema id:uint:id parent_id:uint?:ref name:str
1965#relation parent_id -> id
19661\t_\tRoot
19672\t1\tChild
1968";
1969 let doc = parse(input).unwrap();
1970 let directives = &doc.sections[0].directives;
1971 assert!(directives.iter().any(|d| matches!(d, Directive::Relation { .. })));
1972 }
1973
1974 #[test]
1975 fn test_cross_section_reference() {
1976 let input = "\
1977#!sif v1
1978§auth
1979#schema name:str:id type:str
1980jwt\tbearer
1981---
1982§endpoints
1983#schema path:str:path auth:str?
1984/users\t§auth.jwt
1985";
1986 let doc = parse(input).unwrap();
1987 let rec = &doc.sections[1].records[0];
1988 assert_eq!(rec.values[1], Value::Str("§auth.jwt".to_string()));
1990 }
1991
1992 #[test]
1993 fn test_date_types() {
1994 let schema = parse_schema("d:date dt:datetime dur:duration").unwrap();
1995 let rec = parse_record_line("2026-03-14\t2026-03-14T10:30:00Z\tPT2H30M", &schema, 1).unwrap();
1996 assert_eq!(rec.values[0], Value::Date("2026-03-14".to_string()));
1997 assert_eq!(rec.values[1], Value::DateTime("2026-03-14T10:30:00Z".to_string()));
1998 assert_eq!(rec.values[2], Value::Duration("PT2H30M".to_string()));
1999 }
2000
2001 #[test]
2002 fn test_empty_document() {
2003 let doc = parse("#!sif v1\n").unwrap();
2004 assert_eq!(doc.sections.len(), 1);
2005 assert!(doc.sections[0].records.is_empty());
2006 }
2007
2008 #[test]
2009 fn test_nullable_array() {
2010 let schema = parse_schema("tags:str[]?").unwrap();
2011 assert_eq!(
2012 schema.fields[0].field_type,
2013 Type::Nullable(Box::new(Type::Array(Box::new(Type::Str))))
2014 );
2015 let rec = parse_record_line("_", &schema, 1).unwrap();
2016 assert_eq!(rec.values[0], Value::Null);
2017 let rec = parse_record_line("[a,b]", &schema, 1).unwrap();
2018 assert_eq!(
2019 rec.values[0],
2020 Value::Array(vec![
2021 Value::Str("a".to_string()),
2022 Value::Str("b".to_string()),
2023 ])
2024 );
2025 }
2026
2027 #[test]
2028 fn test_missing_trailing_fields() {
2029 let schema = parse_schema("a:str b:str? c:str?").unwrap();
2030 let rec = parse_record_line("hello", &schema, 1).unwrap();
2031 assert_eq!(rec.values.len(), 3);
2032 assert_eq!(rec.values[0], Value::Str("hello".to_string()));
2033 assert_eq!(rec.values[1], Value::Null);
2034 assert_eq!(rec.values[2], Value::Null);
2035 }
2036
2037 #[test]
2038 fn test_unknown_directive_ignored() {
2039 let input = "\
2040#!sif v1
2041#custom_directive some value here
2042#schema id:uint
20431
2044";
2045 let doc = parse(input).unwrap();
2046 assert!(doc.sections[0]
2047 .directives
2048 .iter()
2049 .any(|d| matches!(d, Directive::Unknown { name, .. } if name == "custom_directive")));
2050 assert_eq!(doc.sections[0].records.len(), 1);
2051 }
2052}