Skip to main content

haystack_core/codecs/zinc/
parser.rs

1// Zinc recursive descent parser for scalars and grids.
2
3use crate::codecs::CodecError;
4use crate::data::{HCol, HDict, HGrid};
5use crate::kinds::*;
6use chrono::{NaiveDate, NaiveTime};
7
8/// Maximum nesting depth for lists and dicts to prevent stack overflow from
9/// deeply nested inputs.
10const MAX_NESTING_DEPTH: usize = 64;
11
12/// Maximum length for string and URI literals (10 MiB) to prevent unbounded
13/// memory growth from malicious input.
14const MAX_STRING_LENGTH: usize = 10 * 1024 * 1024;
15
16/// Maximum number of elements in a list or entries in a dict to prevent
17/// unbounded memory growth from malicious input.
18const MAX_COLLECTION_SIZE: usize = 1_000_000;
19
20/// Hand-written recursive descent parser for Zinc wire format.
21pub struct ZincParser<'a> {
22    src: &'a str,
23    pos: usize,
24    depth: usize,
25}
26
27impl<'a> ZincParser<'a> {
28    /// Create a new parser for the given input.
29    pub fn new(src: &'a str) -> Self {
30        Self {
31            src,
32            pos: 0,
33            depth: 0,
34        }
35    }
36
37    /// Create a new parser starting at the given position within the source.
38    pub fn new_at(src: &'a str, pos: usize) -> Self {
39        Self { src, pos, depth: 0 }
40    }
41
42    /// Return the current byte position of the parser.
43    pub fn pos(&self) -> usize {
44        self.pos
45    }
46
47    /// Parse a single scalar value, consuming the entire input.
48    pub fn parse_scalar(&mut self) -> Result<Kind, CodecError> {
49        let val = self.read_val()?;
50        self.skip_spaces();
51        if !self.at_end() {
52            return Err(self.err(format!(
53                "unexpected trailing input: {:?}",
54                &self.src[self.pos..]
55            )));
56        }
57        Ok(val)
58    }
59
60    // ── Navigation helpers ──
61
62    pub fn at_end(&self) -> bool {
63        self.pos >= self.src.len()
64    }
65
66    fn peek(&self) -> Option<char> {
67        self.src[self.pos..].chars().next()
68    }
69
70    fn peek_ahead(&self, n: usize) -> Option<char> {
71        self.src[self.pos..].chars().nth(n)
72    }
73
74    fn consume(&mut self) -> Option<char> {
75        let ch = self.peek()?;
76        self.pos += ch.len_utf8();
77        Some(ch)
78    }
79
80    fn consume_if(&mut self, ch: char) -> bool {
81        if self.peek() == Some(ch) {
82            self.pos += ch.len_utf8();
83            true
84        } else {
85            false
86        }
87    }
88
89    pub fn skip_spaces(&mut self) {
90        while let Some(ch) = self.peek() {
91            if ch == ' ' || ch == '\t' {
92                self.pos += 1;
93            } else {
94                break;
95            }
96        }
97    }
98
99    fn remaining(&self) -> &str {
100        &self.src[self.pos..]
101    }
102
103    fn err(&self, msg: impl Into<String>) -> CodecError {
104        CodecError::Parse {
105            pos: self.pos,
106            message: msg.into(),
107        }
108    }
109
110    // ── Value dispatch ──
111
112    pub fn read_val(&mut self) -> Result<Kind, CodecError> {
113        self.skip_spaces();
114        if self.at_end() {
115            return Ok(Kind::Null);
116        }
117
118        let ch = self.peek().unwrap();
119
120        // N → Null or NA
121        if ch == 'N' {
122            let next = self.peek_ahead(1);
123            if next == Some('A') && !self.is_alpha_at(2) {
124                self.pos += 2;
125                return Ok(Kind::NA);
126            }
127            if next.is_none() || !next.unwrap().is_alphanumeric() {
128                self.pos += 1;
129                return Ok(Kind::Null);
130            }
131            // Fall through to xstr_or_keyword for NaN etc.
132        }
133
134        // T → true
135        if ch == 'T' && !self.is_alpha_at(1) {
136            self.pos += 1;
137            return Ok(Kind::Bool(true));
138        }
139
140        // F → false
141        if ch == 'F' && !self.is_alpha_at(1) {
142            self.pos += 1;
143            return Ok(Kind::Bool(false));
144        }
145
146        // M → Marker
147        if ch == 'M' && !self.is_alpha_at(1) {
148            self.pos += 1;
149            return Ok(Kind::Marker);
150        }
151
152        // R → Remove
153        if ch == 'R' && !self.is_alpha_at(1) {
154            self.pos += 1;
155            return Ok(Kind::Remove);
156        }
157
158        // -INF (must check before general number since '-' followed by 'I' is not a digit)
159        if ch == '-' && self.remaining().starts_with("-INF") {
160            self.pos += 4;
161            return Ok(Kind::Number(Number::unitless(f64::NEG_INFINITY)));
162        }
163
164        // Number, Date, Time, DateTime (starts with digit or '-' followed by digit)
165        let is_neg_num = ch == '-' && self.peek_ahead(1).is_some_and(|c| c.is_ascii_digit());
166        if ch.is_ascii_digit() || is_neg_num {
167            return self.read_number();
168        }
169
170        // String
171        if ch == '"' {
172            let s = self.read_str()?;
173            return Ok(Kind::Str(s));
174        }
175
176        // Ref
177        if ch == '@' {
178            return self.read_ref();
179        }
180
181        // URI
182        if ch == '`' {
183            return self.read_uri();
184        }
185
186        // Symbol
187        if ch == '^' {
188            return self.read_symbol();
189        }
190
191        // Coord
192        if ch == 'C' && self.peek_ahead(1) == Some('(') {
193            return self.read_coord();
194        }
195
196        // List
197        if ch == '[' {
198            return self.read_list();
199        }
200
201        // Dict
202        if ch == '{' {
203            return self.read_dict();
204        }
205
206        // XStr or keyword (INF, NaN, NA, etc.)
207        if ch.is_uppercase() {
208            return self.read_xstr_or_keyword();
209        }
210
211        Err(self.err(format!("unexpected character '{ch}'")))
212    }
213
214    fn is_alpha_at(&self, offset: usize) -> bool {
215        self.src[self.pos..]
216            .chars()
217            .nth(offset)
218            .is_some_and(|c| c.is_alphanumeric())
219    }
220
221    // ── Number / Date / Time / DateTime ──
222
223    fn read_number(&mut self) -> Result<Kind, CodecError> {
224        // Check for date pattern: YYYY-MM-DD
225        if self.looks_like_date() {
226            return self.read_date_or_datetime();
227        }
228
229        // Check for time pattern: HH:MM
230        if self.looks_like_time() {
231            return self.read_time();
232        }
233
234        // Parse sign
235        let neg = self.consume_if('-');
236
237        // Integer part
238        let int_part = self.read_digits()?;
239
240        // Decimal part
241        let frac_part = if self.peek() == Some('.') {
242            self.pos += 1;
243            Some(self.read_digits()?)
244        } else {
245            None
246        };
247
248        // Exponent
249        let exp_part = if self.peek() == Some('e') || self.peek() == Some('E') {
250            let mut exp = String::new();
251            exp.push(self.consume().unwrap());
252            if self.peek() == Some('+') || self.peek() == Some('-') {
253                exp.push(self.consume().unwrap());
254            }
255            exp.push_str(&self.read_digits()?);
256            Some(exp)
257        } else {
258            None
259        };
260
261        // Build number string
262        let mut num_str = String::new();
263        if neg {
264            num_str.push('-');
265        }
266        num_str.push_str(&int_part);
267        if let Some(ref frac) = frac_part {
268            num_str.push('.');
269            num_str.push_str(frac);
270        }
271        if let Some(ref exp) = exp_part {
272            num_str.push_str(exp);
273        }
274
275        let val: f64 = num_str
276            .parse()
277            .map_err(|_| self.err(format!("invalid number: {num_str}")))?;
278
279        // Unit
280        let unit = self.read_unit();
281
282        Ok(Kind::Number(Number::new(
283            val,
284            if unit.is_empty() { None } else { Some(unit) },
285        )))
286    }
287
288    fn read_digits(&mut self) -> Result<String, CodecError> {
289        let start = self.pos;
290        while let Some(ch) = self.peek() {
291            if ch.is_ascii_digit() || ch == '_' {
292                self.pos += ch.len_utf8();
293            } else {
294                break;
295            }
296        }
297        let raw = &self.src[start..self.pos];
298        let result: String = raw.chars().filter(|&c| c != '_').collect();
299        if result.is_empty() {
300            return Err(self.err("expected digits"));
301        }
302        Ok(result)
303    }
304
305    fn read_unit(&mut self) -> String {
306        let start = self.pos;
307        let mut first = true;
308        while let Some(ch) = self.peek() {
309            if ch.is_alphabetic()
310                || ch as u32 > 127
311                || ch == '_'
312                || ch == '/'
313                || ch == '%'
314                || ch == '$'
315            {
316                self.pos += ch.len_utf8();
317                first = false;
318            } else if ch.is_ascii_digit() && !first {
319                // Digits allowed after first unit char
320                self.pos += 1;
321            } else {
322                break;
323            }
324        }
325        self.src[start..self.pos].to_string()
326    }
327
328    fn looks_like_date(&self) -> bool {
329        // YYYY-MM-DD: need at least 10 chars
330        let rem = self.remaining();
331        if rem.len() < 10 {
332            return false;
333        }
334        let bytes = rem.as_bytes();
335        bytes[0..4].iter().all(|b| b.is_ascii_digit())
336            && bytes[4] == b'-'
337            && bytes[5..7].iter().all(|b| b.is_ascii_digit())
338            && bytes[7] == b'-'
339            && bytes[8..10].iter().all(|b| b.is_ascii_digit())
340    }
341
342    fn looks_like_time(&self) -> bool {
343        // HH:MM
344        let rem = self.remaining();
345        if rem.len() < 5 {
346            return false;
347        }
348        let bytes = rem.as_bytes();
349        bytes[0..2].iter().all(|b| b.is_ascii_digit())
350            && bytes[2] == b':'
351            && bytes[3..5].iter().all(|b| b.is_ascii_digit())
352    }
353
354    fn read_date_or_datetime(&mut self) -> Result<Kind, CodecError> {
355        // Read YYYY-MM-DD
356        let date_str = &self.src[self.pos..self.pos + 10];
357        let date = NaiveDate::parse_from_str(date_str, "%Y-%m-%d")
358            .map_err(|e| self.err(format!("invalid date: {e}")))?;
359        self.pos += 10;
360
361        // Check for T → datetime
362        if self.peek() == Some('T') {
363            return self.read_datetime_after_date(date);
364        }
365
366        Ok(Kind::Date(date))
367    }
368
369    fn read_datetime_after_date(&mut self, date: NaiveDate) -> Result<Kind, CodecError> {
370        self.pos += 1; // skip T
371        let time_str = self.read_time_str()?;
372        let offset_str = self.read_offset()?;
373
374        // Build ISO string and parse
375        let iso = format!("{}T{}{}", date, time_str, offset_str);
376        let dt = chrono::DateTime::parse_from_str(&iso, "%Y-%m-%dT%H:%M:%S%.f%:z")
377            .or_else(|_| chrono::DateTime::parse_from_str(&iso, "%Y-%m-%dT%H:%M:%S%:z"))
378            .map_err(|e| self.err(format!("invalid datetime: {e} (from '{iso}')")))?;
379
380        // Read optional timezone name
381        self.skip_spaces();
382        let tz_name = self.read_tz_name();
383
384        let tz = if tz_name.is_empty() {
385            "UTC".to_string()
386        } else {
387            tz_name
388        };
389
390        Ok(Kind::DateTime(HDateTime::new(dt, tz)))
391    }
392
393    fn read_time_str(&mut self) -> Result<String, CodecError> {
394        let start = self.pos;
395        // HH:MM
396        if self.remaining().len() < 5 {
397            return Err(self.err("expected time HH:MM"));
398        }
399        self.pos += 5;
400        // Optional :SS
401        if self.peek() == Some(':') {
402            if self.remaining().len() < 3 {
403                return Err(self.err("incomplete seconds in time"));
404            }
405            self.pos += 3; // :SS
406            // Optional .FFF...
407            if self.peek() == Some('.') {
408                self.pos += 1;
409                while let Some(ch) = self.peek() {
410                    if ch.is_ascii_digit() {
411                        self.pos += 1;
412                    } else {
413                        break;
414                    }
415                }
416            }
417        }
418        Ok(self.src[start..self.pos].to_string())
419    }
420
421    fn read_offset(&mut self) -> Result<String, CodecError> {
422        if self.at_end() {
423            return Ok(String::new());
424        }
425        if self.peek() == Some('Z') {
426            self.pos += 1;
427            return Ok("+00:00".to_string());
428        }
429        if self.peek() == Some('+') || self.peek() == Some('-') {
430            let start = self.pos;
431            if self.remaining().len() < 3 {
432                return Err(self.err("incomplete UTC offset"));
433            }
434            self.pos += 1; // sign
435            self.pos += 2; // HH
436            if self.peek() == Some(':') {
437                if self.remaining().len() < 3 {
438                    return Err(self.err("incomplete UTC offset minutes"));
439                }
440                self.pos += 3; // :MM
441            }
442            return Ok(self.src[start..self.pos].to_string());
443        }
444        Ok(String::new())
445    }
446
447    fn read_tz_name(&mut self) -> String {
448        let start = self.pos;
449        while let Some(ch) = self.peek() {
450            if ch.is_alphanumeric() || ch == '_' || ch == '-' || ch == '/' {
451                self.pos += ch.len_utf8();
452            } else {
453                break;
454            }
455        }
456        self.src[start..self.pos].to_string()
457    }
458
459    fn read_time(&mut self) -> Result<Kind, CodecError> {
460        let time_str = self.read_time_str()?;
461        let time = NaiveTime::parse_from_str(&time_str, "%H:%M:%S%.f")
462            .or_else(|_| NaiveTime::parse_from_str(&time_str, "%H:%M:%S"))
463            .or_else(|_| NaiveTime::parse_from_str(&time_str, "%H:%M"))
464            .map_err(|e| self.err(format!("invalid time: {e}")))?;
465        Ok(Kind::Time(time))
466    }
467
468    // ── Strings ──
469
470    fn read_str(&mut self) -> Result<String, CodecError> {
471        self.pos += 1; // skip opening "
472        let mut result = String::new();
473        while !self.at_end() {
474            let ch = self.peek().unwrap();
475            if ch == '"' {
476                self.pos += 1;
477                return Ok(result);
478            }
479            if ch == '\\' {
480                self.pos += 1;
481                result.push(self.read_escape()?);
482            } else {
483                result.push(ch);
484                self.pos += ch.len_utf8();
485            }
486            if result.len() > MAX_STRING_LENGTH {
487                return Err(self.err("string exceeds maximum allowed length"));
488            }
489        }
490        Err(self.err("unterminated string"))
491    }
492
493    fn read_escape(&mut self) -> Result<char, CodecError> {
494        if self.at_end() {
495            return Err(self.err("unexpected end of escape sequence"));
496        }
497        let ch = self.consume().unwrap();
498        match ch {
499            'n' => Ok('\n'),
500            'r' => Ok('\r'),
501            't' => Ok('\t'),
502            '\\' => Ok('\\'),
503            '"' => Ok('"'),
504            '$' => Ok('$'),
505            'b' => Ok('\u{0008}'),
506            'f' => Ok('\u{000C}'),
507            'u' => {
508                if self.remaining().len() < 4 {
509                    return Err(self.err("incomplete unicode escape"));
510                }
511                let hex = &self.src[self.pos..self.pos + 4];
512                self.pos += 4;
513                let code = u32::from_str_radix(hex, 16)
514                    .map_err(|_| self.err(format!("invalid unicode escape: {hex}")))?;
515                char::from_u32(code)
516                    .ok_or_else(|| self.err(format!("invalid unicode codepoint: {code}")))
517            }
518            _ => Err(self.err(format!("unknown escape sequence: \\{ch}"))),
519        }
520    }
521
522    // ── Ref ──
523
524    fn read_ref(&mut self) -> Result<Kind, CodecError> {
525        self.pos += 1; // skip @
526        let start = self.pos;
527        while let Some(ch) = self.peek() {
528            if is_ref_char(ch) {
529                self.pos += ch.len_utf8();
530            } else {
531                break;
532            }
533        }
534        let val = self.src[start..self.pos].to_string();
535
536        // Optional display string
537        self.skip_spaces();
538        let dis = if self.peek() == Some('"') {
539            Some(self.read_str()?)
540        } else {
541            None
542        };
543
544        Ok(Kind::Ref(HRef::new(val, dis)))
545    }
546
547    // ── URI ──
548
549    fn read_uri(&mut self) -> Result<Kind, CodecError> {
550        self.pos += 1; // skip `
551        let mut result = String::new();
552        while !self.at_end() {
553            let ch = self.peek().unwrap();
554            if ch == '`' {
555                self.pos += 1;
556                return Ok(Kind::Uri(Uri::new(result)));
557            }
558            if ch == '\\' {
559                self.pos += 1;
560                if let Some(next) = self.consume() {
561                    result.push(next);
562                }
563            } else {
564                result.push(ch);
565                self.pos += ch.len_utf8();
566            }
567            if result.len() > MAX_STRING_LENGTH {
568                return Err(self.err("URI exceeds maximum allowed length"));
569            }
570        }
571        Err(self.err("unterminated URI"))
572    }
573
574    // ── Symbol ──
575
576    fn read_symbol(&mut self) -> Result<Kind, CodecError> {
577        self.pos += 1; // skip ^
578        let start = self.pos;
579        while let Some(ch) = self.peek() {
580            if is_ref_char(ch) {
581                self.pos += ch.len_utf8();
582            } else {
583                break;
584            }
585        }
586        Ok(Kind::Symbol(Symbol::new(&self.src[start..self.pos])))
587    }
588
589    // ── Coord ──
590
591    fn read_coord(&mut self) -> Result<Kind, CodecError> {
592        self.pos += 2; // skip C(
593        let start = self.pos;
594        while self.peek() != Some(',') && !self.at_end() {
595            self.pos += 1;
596        }
597        if self.at_end() {
598            return Err(self.err("unterminated coord literal, expected ','"));
599        }
600        let lat: f64 = self.src[start..self.pos]
601            .trim()
602            .parse()
603            .map_err(|_| self.err("invalid coord latitude"))?;
604        if !(-90.0..=90.0).contains(&lat) {
605            return Err(self.err("coord latitude must be between -90 and 90"));
606        }
607        self.pos += 1; // skip comma
608        let start = self.pos;
609        while self.peek() != Some(')') && !self.at_end() {
610            self.pos += 1;
611        }
612        if self.at_end() {
613            return Err(self.err("unterminated coord literal, expected ')'"));
614        }
615        let lng: f64 = self.src[start..self.pos]
616            .trim()
617            .parse()
618            .map_err(|_| self.err("invalid coord longitude"))?;
619        if !(-180.0..=180.0).contains(&lng) {
620            return Err(self.err("coord longitude must be between -180 and 180"));
621        }
622        self.pos += 1; // skip )
623        Ok(Kind::Coord(Coord::new(lat, lng)))
624    }
625
626    // ── List ──
627
628    fn read_list(&mut self) -> Result<Kind, CodecError> {
629        self.depth += 1;
630        if self.depth > MAX_NESTING_DEPTH {
631            self.depth -= 1;
632            return Err(self.err("maximum nesting depth exceeded"));
633        }
634
635        self.pos += 1; // skip [
636        let mut vals = Vec::new();
637        self.skip_spaces();
638        while !self.at_end() && self.peek() != Some(']') {
639            if vals.len() >= MAX_COLLECTION_SIZE {
640                self.depth -= 1;
641                return Err(self.err("list exceeds maximum collection size"));
642            }
643            vals.push(self.read_val()?);
644            self.skip_spaces();
645            self.consume_if(',');
646            self.skip_spaces();
647        }
648        if self.at_end() || self.peek() != Some(']') {
649            self.depth -= 1;
650            return Err(self.err("unterminated list"));
651        }
652        self.pos += 1; // skip ]
653        self.depth -= 1;
654        Ok(Kind::List(vals))
655    }
656
657    // ── Dict ──
658
659    fn read_dict(&mut self) -> Result<Kind, CodecError> {
660        self.depth += 1;
661        if self.depth > MAX_NESTING_DEPTH {
662            self.depth -= 1;
663            return Err(self.err("maximum nesting depth exceeded"));
664        }
665
666        self.pos += 1; // skip {
667        let mut dict = HDict::new();
668        self.skip_spaces();
669        while !self.at_end() && self.peek() != Some('}') {
670            if dict.len() >= MAX_COLLECTION_SIZE {
671                self.depth -= 1;
672                return Err(self.err("dict exceeds maximum collection size"));
673            }
674            let name = self.read_tag_name()?;
675            self.skip_spaces();
676            if self.peek() == Some(':') {
677                self.pos += 1;
678                self.skip_spaces();
679                let val = self.read_val()?;
680                dict.set(name, val);
681            } else {
682                dict.set(name, Kind::Marker);
683            }
684            self.skip_spaces();
685            self.consume_if(',');
686            self.skip_spaces();
687        }
688        if self.at_end() || self.peek() != Some('}') {
689            self.depth -= 1;
690            return Err(self.err("unterminated dict"));
691        }
692        self.pos += 1; // skip }
693        self.depth -= 1;
694        Ok(Kind::Dict(Box::new(dict)))
695    }
696
697    fn read_tag_name(&mut self) -> Result<String, CodecError> {
698        let start = self.pos;
699        while let Some(ch) = self.peek() {
700            if ch.is_alphanumeric() || ch == '_' {
701                self.pos += ch.len_utf8();
702            } else {
703                break;
704            }
705        }
706        let name = self.src[start..self.pos].to_string();
707        if name.is_empty() {
708            return Err(self.err("expected tag name"));
709        }
710        Ok(name)
711    }
712
713    // ── XStr or keyword ──
714
715    fn read_xstr_or_keyword(&mut self) -> Result<Kind, CodecError> {
716        let start = self.pos;
717        while let Some(ch) = self.peek() {
718            if ch.is_alphanumeric() || ch == '_' {
719                self.pos += ch.len_utf8();
720            } else {
721                break;
722            }
723        }
724        let name = &self.src[start..self.pos];
725
726        match name {
727            "INF" => return Ok(Kind::Number(Number::unitless(f64::INFINITY))),
728            "NaN" => return Ok(Kind::Number(Number::unitless(f64::NAN))),
729            "NA" => return Ok(Kind::NA),
730            _ => {}
731        }
732
733        // XStr: Type("value")
734        if self.peek() == Some('(') {
735            self.pos += 1; // skip (
736            self.skip_spaces();
737            let val = self.read_str()?;
738            self.skip_spaces();
739            if self.peek() != Some(')') {
740                return Err(self.err("unterminated XStr, expected closing ')'"));
741            }
742            self.pos += 1; // skip )
743            return Ok(Kind::XStr(XStr::new(name, val)));
744        }
745
746        Err(self.err(format!("unknown keyword '{name}'")))
747    }
748
749    /// Read an identifier (alphanumeric + underscore).
750    pub fn read_id(&mut self) -> String {
751        let start = self.pos;
752        while let Some(ch) = self.peek() {
753            if ch.is_alphanumeric() || ch == '_' {
754                self.pos += ch.len_utf8();
755            } else {
756                break;
757            }
758        }
759        self.src[start..self.pos].to_string()
760    }
761}
762
763fn is_ref_char(ch: char) -> bool {
764    ch.is_alphanumeric() || ch == '_' || ch == ':' || ch == '-' || ch == '.' || ch == '~'
765}
766
767/// Decode a single Zinc scalar value from a string.
768pub fn decode_scalar(input: &str) -> Result<Kind, CodecError> {
769    let mut parser = ZincParser::new(input.trim());
770    parser.parse_scalar()
771}
772
773// ── Grid decoding ──
774
775/// Decode a Zinc-formatted string into an HGrid.
776pub fn decode_grid(input: &str) -> Result<HGrid, CodecError> {
777    let lines: Vec<&str> = input
778        .lines()
779        .map(|l| l.trim())
780        .filter(|l| !l.is_empty() && !l.starts_with("//"))
781        .collect();
782
783    if lines.is_empty() {
784        return Ok(HGrid::new());
785    }
786
787    let mut line_idx = 0;
788
789    // Line 1: ver + grid meta
790    let ver_line = lines[line_idx];
791    line_idx += 1;
792
793    if !ver_line.starts_with("ver:") {
794        return Err(CodecError::Parse {
795            pos: 0,
796            message: format!("expected 'ver:' header, got: {ver_line:?}"),
797        });
798    }
799
800    // Skip past ver:"X.X"
801    let meta = parse_ver_line_meta(ver_line)?;
802
803    // Line 2: columns
804    if line_idx >= lines.len() {
805        return Ok(HGrid::from_parts(meta, vec![], vec![]));
806    }
807
808    let col_line = lines[line_idx];
809    line_idx += 1;
810
811    let cols = if col_line == "empty" {
812        vec![]
813    } else {
814        parse_cols(col_line)?
815    };
816
817    // Remaining lines: rows
818    let mut rows = Vec::new();
819    while line_idx < lines.len() {
820        let row_line = lines[line_idx];
821        line_idx += 1;
822        if row_line.is_empty() {
823            continue;
824        }
825        let row = parse_row(row_line, &cols)?;
826        rows.push(row);
827    }
828
829    Ok(HGrid::from_parts(meta, cols, rows))
830}
831
832fn parse_ver_line_meta(ver_line: &str) -> Result<HDict, CodecError> {
833    // Skip past ver:"X.X"
834    let mut parser = ZincParser::new(ver_line);
835    // Consume ver:"3.0" — find the first space
836    while !parser.at_end() && parser.peek() != Some(' ') {
837        parser.consume();
838    }
839    parser.skip_spaces();
840    if parser.at_end() {
841        return Ok(HDict::new());
842    }
843    parse_inline_meta(&mut parser)
844}
845
846fn parse_cols(line: &str) -> Result<Vec<HCol>, CodecError> {
847    let parts = split_csv_aware(line);
848    let mut cols = Vec::new();
849    for part in parts {
850        let part = part.trim();
851        if part.is_empty() {
852            continue;
853        }
854        let mut parser = ZincParser::new(part);
855        let name = read_col_name(&mut parser);
856        parser.skip_spaces();
857        let meta = if !parser.at_end() {
858            parse_inline_meta(&mut parser)?
859        } else {
860            HDict::new()
861        };
862        cols.push(HCol::with_meta(name, meta));
863    }
864    Ok(cols)
865}
866
867fn parse_row(line: &str, cols: &[HCol]) -> Result<HDict, CodecError> {
868    let parts = split_csv_aware(line);
869    let mut dict = HDict::new();
870    for (i, col) in cols.iter().enumerate() {
871        if i < parts.len() {
872            let cell = parts[i].trim();
873            if !cell.is_empty() && cell != "N" {
874                let mut parser = ZincParser::new(cell);
875                let val = parser.read_val()?;
876                dict.set(&col.name, val);
877            }
878        }
879    }
880    Ok(dict)
881}
882
883fn parse_inline_meta(parser: &mut ZincParser<'_>) -> Result<HDict, CodecError> {
884    let mut dict = HDict::new();
885    while !parser.at_end() {
886        parser.skip_spaces();
887        if parser.at_end() {
888            break;
889        }
890        let name = read_col_name(parser);
891        if name.is_empty() {
892            break;
893        }
894        parser.skip_spaces();
895        if parser.peek() == Some(':') {
896            parser.consume();
897            parser.skip_spaces();
898            let val = parser.read_val()?;
899            dict.set(name, val);
900        } else {
901            dict.set(name, Kind::Marker);
902        }
903        parser.skip_spaces();
904    }
905    Ok(dict)
906}
907
908fn read_col_name(parser: &mut ZincParser<'_>) -> String {
909    parser.read_id()
910}
911
912/// Split a line by commas, respecting quoted strings and nested structures.
913fn split_csv_aware(line: &str) -> Vec<String> {
914    let mut parts = Vec::new();
915    let mut current = String::new();
916    let mut depth = 0i32;
917    let mut in_str = false;
918    let mut escaped = false;
919
920    for ch in line.chars() {
921        if escaped {
922            current.push(ch);
923            escaped = false;
924            continue;
925        }
926        if ch == '\\' {
927            current.push(ch);
928            escaped = true;
929            continue;
930        }
931        if ch == '"' && depth == 0 {
932            in_str = !in_str;
933            current.push(ch);
934            continue;
935        }
936        if in_str {
937            current.push(ch);
938            continue;
939        }
940        match ch {
941            '(' | '[' | '{' => {
942                depth += 1;
943                current.push(ch);
944            }
945            ')' | ']' | '}' => {
946                depth -= 1;
947                current.push(ch);
948            }
949            ',' if depth == 0 => {
950                parts.push(std::mem::take(&mut current));
951            }
952            _ => {
953                current.push(ch);
954            }
955        }
956    }
957    parts.push(current);
958    parts
959}
960
961#[cfg(test)]
962mod tests {
963    use super::*;
964    use crate::data::{HDict, HGrid};
965    use chrono::{Datelike, FixedOffset, NaiveDate, NaiveTime, TimeZone};
966
967    // ── Scalar round-trip tests ──
968
969    fn round_trip(kind: &Kind) -> Kind {
970        let encoded = crate::codecs::zinc::encode_scalar(kind).unwrap();
971        decode_scalar(&encoded).unwrap()
972    }
973
974    #[test]
975    fn parse_null() {
976        assert_eq!(decode_scalar("N").unwrap(), Kind::Null);
977    }
978
979    #[test]
980    fn parse_true() {
981        assert_eq!(decode_scalar("T").unwrap(), Kind::Bool(true));
982    }
983
984    #[test]
985    fn parse_false() {
986        assert_eq!(decode_scalar("F").unwrap(), Kind::Bool(false));
987    }
988
989    #[test]
990    fn parse_marker() {
991        assert_eq!(decode_scalar("M").unwrap(), Kind::Marker);
992    }
993
994    #[test]
995    fn parse_na() {
996        assert_eq!(decode_scalar("NA").unwrap(), Kind::NA);
997    }
998
999    #[test]
1000    fn parse_remove() {
1001        assert_eq!(decode_scalar("R").unwrap(), Kind::Remove);
1002    }
1003
1004    #[test]
1005    fn roundtrip_null() {
1006        assert_eq!(round_trip(&Kind::Null), Kind::Null);
1007    }
1008
1009    #[test]
1010    fn roundtrip_bool_true() {
1011        assert_eq!(round_trip(&Kind::Bool(true)), Kind::Bool(true));
1012    }
1013
1014    #[test]
1015    fn roundtrip_bool_false() {
1016        assert_eq!(round_trip(&Kind::Bool(false)), Kind::Bool(false));
1017    }
1018
1019    #[test]
1020    fn roundtrip_marker() {
1021        assert_eq!(round_trip(&Kind::Marker), Kind::Marker);
1022    }
1023
1024    #[test]
1025    fn roundtrip_na() {
1026        assert_eq!(round_trip(&Kind::NA), Kind::NA);
1027    }
1028
1029    #[test]
1030    fn roundtrip_remove() {
1031        assert_eq!(round_trip(&Kind::Remove), Kind::Remove);
1032    }
1033
1034    // ── Numbers ──
1035
1036    #[test]
1037    fn parse_number_zero() {
1038        assert_eq!(
1039            decode_scalar("0").unwrap(),
1040            Kind::Number(Number::unitless(0.0))
1041        );
1042    }
1043
1044    #[test]
1045    fn parse_number_integer() {
1046        assert_eq!(
1047            decode_scalar("42").unwrap(),
1048            Kind::Number(Number::unitless(42.0))
1049        );
1050    }
1051
1052    #[test]
1053    fn parse_number_float() {
1054        assert_eq!(
1055            decode_scalar("72.5").unwrap(),
1056            Kind::Number(Number::unitless(72.5))
1057        );
1058    }
1059
1060    #[test]
1061    fn parse_number_negative() {
1062        assert_eq!(
1063            decode_scalar("-23.45").unwrap(),
1064            Kind::Number(Number::unitless(-23.45))
1065        );
1066    }
1067
1068    #[test]
1069    fn parse_number_scientific() {
1070        let k = decode_scalar("5.4e8").unwrap();
1071        if let Kind::Number(n) = &k {
1072            assert!((n.val - 5.4e8).abs() < 1.0);
1073        } else {
1074            panic!("expected Number, got {k:?}");
1075        }
1076    }
1077
1078    #[test]
1079    fn parse_number_inf() {
1080        let k = decode_scalar("INF").unwrap();
1081        if let Kind::Number(n) = &k {
1082            assert!(n.val.is_infinite() && n.val > 0.0);
1083        } else {
1084            panic!("expected Number(INF)");
1085        }
1086    }
1087
1088    #[test]
1089    fn parse_number_neg_inf() {
1090        let k = decode_scalar("-INF").unwrap();
1091        if let Kind::Number(n) = &k {
1092            assert!(n.val.is_infinite() && n.val < 0.0);
1093        } else {
1094            panic!("expected Number(-INF)");
1095        }
1096    }
1097
1098    #[test]
1099    fn parse_number_nan() {
1100        let k = decode_scalar("NaN").unwrap();
1101        if let Kind::Number(n) = &k {
1102            assert!(n.val.is_nan());
1103        } else {
1104            panic!("expected Number(NaN)");
1105        }
1106    }
1107
1108    #[test]
1109    fn parse_number_with_unit() {
1110        let k = decode_scalar("72.5\u{00B0}F").unwrap();
1111        if let Kind::Number(n) = &k {
1112            assert_eq!(n.val, 72.5);
1113            assert_eq!(n.unit.as_deref(), Some("\u{00B0}F"));
1114        } else {
1115            panic!("expected Number with unit");
1116        }
1117    }
1118
1119    #[test]
1120    fn roundtrip_number_zero() {
1121        assert_eq!(
1122            round_trip(&Kind::Number(Number::unitless(0.0))),
1123            Kind::Number(Number::unitless(0.0))
1124        );
1125    }
1126
1127    #[test]
1128    fn roundtrip_number_integer() {
1129        assert_eq!(
1130            round_trip(&Kind::Number(Number::unitless(42.0))),
1131            Kind::Number(Number::unitless(42.0))
1132        );
1133    }
1134
1135    #[test]
1136    fn roundtrip_number_float() {
1137        assert_eq!(
1138            round_trip(&Kind::Number(Number::unitless(72.5))),
1139            Kind::Number(Number::unitless(72.5))
1140        );
1141    }
1142
1143    #[test]
1144    fn roundtrip_number_negative() {
1145        assert_eq!(
1146            round_trip(&Kind::Number(Number::unitless(-23.45))),
1147            Kind::Number(Number::unitless(-23.45))
1148        );
1149    }
1150
1151    #[test]
1152    fn roundtrip_number_with_unit() {
1153        let k = Kind::Number(Number::new(72.5, Some("\u{00B0}F".into())));
1154        let rt = round_trip(&k);
1155        if let Kind::Number(n) = &rt {
1156            assert_eq!(n.val, 72.5);
1157            assert_eq!(n.unit.as_deref(), Some("\u{00B0}F"));
1158        } else {
1159            panic!("expected Number");
1160        }
1161    }
1162
1163    #[test]
1164    fn roundtrip_inf() {
1165        let k = Kind::Number(Number::unitless(f64::INFINITY));
1166        let rt = round_trip(&k);
1167        if let Kind::Number(n) = &rt {
1168            assert!(n.val.is_infinite() && n.val > 0.0);
1169        } else {
1170            panic!("expected Number(INF)");
1171        }
1172    }
1173
1174    #[test]
1175    fn roundtrip_neg_inf() {
1176        let k = Kind::Number(Number::unitless(f64::NEG_INFINITY));
1177        let rt = round_trip(&k);
1178        if let Kind::Number(n) = &rt {
1179            assert!(n.val.is_infinite() && n.val < 0.0);
1180        } else {
1181            panic!("expected Number(-INF)");
1182        }
1183    }
1184
1185    #[test]
1186    fn roundtrip_nan() {
1187        let k = Kind::Number(Number::unitless(f64::NAN));
1188        let rt = round_trip(&k);
1189        if let Kind::Number(n) = &rt {
1190            assert!(n.val.is_nan());
1191        } else {
1192            panic!("expected Number(NaN)");
1193        }
1194    }
1195
1196    // ── Strings ──
1197
1198    #[test]
1199    fn parse_string_empty() {
1200        assert_eq!(decode_scalar("\"\"").unwrap(), Kind::Str(String::new()));
1201    }
1202
1203    #[test]
1204    fn parse_string_simple() {
1205        assert_eq!(
1206            decode_scalar("\"hello\"").unwrap(),
1207            Kind::Str("hello".into())
1208        );
1209    }
1210
1211    #[test]
1212    fn parse_string_escapes() {
1213        assert_eq!(
1214            decode_scalar("\"line1\\nline2\"").unwrap(),
1215            Kind::Str("line1\nline2".into())
1216        );
1217        assert_eq!(
1218            decode_scalar("\"tab\\there\"").unwrap(),
1219            Kind::Str("tab\there".into())
1220        );
1221        assert_eq!(
1222            decode_scalar("\"back\\\\slash\"").unwrap(),
1223            Kind::Str("back\\slash".into())
1224        );
1225        assert_eq!(
1226            decode_scalar("\"q\\\"uote\"").unwrap(),
1227            Kind::Str("q\"uote".into())
1228        );
1229        assert_eq!(
1230            decode_scalar("\"dollar\\$sign\"").unwrap(),
1231            Kind::Str("dollar$sign".into())
1232        );
1233    }
1234
1235    #[test]
1236    fn parse_string_unicode_escape() {
1237        assert_eq!(decode_scalar("\"\\u0041\"").unwrap(), Kind::Str("A".into()));
1238    }
1239
1240    #[test]
1241    fn roundtrip_string_empty() {
1242        assert_eq!(
1243            round_trip(&Kind::Str(String::new())),
1244            Kind::Str(String::new())
1245        );
1246    }
1247
1248    #[test]
1249    fn roundtrip_string_escapes() {
1250        let s = "line1\nline2\ttab\\slash\"quote$dollar";
1251        assert_eq!(round_trip(&Kind::Str(s.into())), Kind::Str(s.into()));
1252    }
1253
1254    // ── Refs ──
1255
1256    #[test]
1257    fn parse_ref_simple() {
1258        let k = decode_scalar("@site-1").unwrap();
1259        if let Kind::Ref(r) = &k {
1260            assert_eq!(r.val, "site-1");
1261            assert_eq!(r.dis, None);
1262        } else {
1263            panic!("expected Ref");
1264        }
1265    }
1266
1267    #[test]
1268    fn parse_ref_with_dis() {
1269        let k = decode_scalar("@site-1 \"Main Site\"").unwrap();
1270        if let Kind::Ref(r) = &k {
1271            assert_eq!(r.val, "site-1");
1272            assert_eq!(r.dis, Some("Main Site".into()));
1273        } else {
1274            panic!("expected Ref");
1275        }
1276    }
1277
1278    #[test]
1279    fn roundtrip_ref_simple() {
1280        let k = Kind::Ref(HRef::from_val("site-1"));
1281        let rt = round_trip(&k);
1282        if let Kind::Ref(r) = &rt {
1283            assert_eq!(r.val, "site-1");
1284        } else {
1285            panic!("expected Ref");
1286        }
1287    }
1288
1289    #[test]
1290    fn roundtrip_ref_with_dis() {
1291        let k = Kind::Ref(HRef::new("site-1", Some("Main Site".into())));
1292        let rt = round_trip(&k);
1293        if let Kind::Ref(r) = &rt {
1294            assert_eq!(r.val, "site-1");
1295            assert_eq!(r.dis, Some("Main Site".into()));
1296        } else {
1297            panic!("expected Ref");
1298        }
1299    }
1300
1301    // ── URIs ──
1302
1303    #[test]
1304    fn parse_uri_simple() {
1305        let k = decode_scalar("`http://example.com`").unwrap();
1306        assert_eq!(k, Kind::Uri(Uri::new("http://example.com")));
1307    }
1308
1309    #[test]
1310    fn parse_uri_with_special() {
1311        let k = decode_scalar("`http://ex.com/path?q=1&b=2`").unwrap();
1312        assert_eq!(k, Kind::Uri(Uri::new("http://ex.com/path?q=1&b=2")));
1313    }
1314
1315    #[test]
1316    fn roundtrip_uri() {
1317        let k = Kind::Uri(Uri::new("http://example.com/path"));
1318        assert_eq!(round_trip(&k), k);
1319    }
1320
1321    // ── Symbols ──
1322
1323    #[test]
1324    fn parse_symbol_simple() {
1325        let k = decode_scalar("^site").unwrap();
1326        assert_eq!(k, Kind::Symbol(Symbol::new("site")));
1327    }
1328
1329    #[test]
1330    fn parse_symbol_compound() {
1331        let k = decode_scalar("^hot-water").unwrap();
1332        assert_eq!(k, Kind::Symbol(Symbol::new("hot-water")));
1333    }
1334
1335    #[test]
1336    fn roundtrip_symbol() {
1337        let k = Kind::Symbol(Symbol::new("hot-water"));
1338        assert_eq!(round_trip(&k), k);
1339    }
1340
1341    // ── Dates ──
1342
1343    #[test]
1344    fn parse_date() {
1345        let k = decode_scalar("2024-03-13").unwrap();
1346        assert_eq!(k, Kind::Date(NaiveDate::from_ymd_opt(2024, 3, 13).unwrap()));
1347    }
1348
1349    #[test]
1350    fn roundtrip_date() {
1351        let k = Kind::Date(NaiveDate::from_ymd_opt(2024, 3, 13).unwrap());
1352        assert_eq!(round_trip(&k), k);
1353    }
1354
1355    // ── Times ──
1356
1357    #[test]
1358    fn parse_time() {
1359        let k = decode_scalar("08:12:05").unwrap();
1360        assert_eq!(k, Kind::Time(NaiveTime::from_hms_opt(8, 12, 5).unwrap()));
1361    }
1362
1363    #[test]
1364    fn parse_time_with_frac() {
1365        let k = decode_scalar("14:30:00.123").unwrap();
1366        assert_eq!(
1367            k,
1368            Kind::Time(NaiveTime::from_hms_milli_opt(14, 30, 0, 123).unwrap())
1369        );
1370    }
1371
1372    #[test]
1373    fn roundtrip_time() {
1374        let k = Kind::Time(NaiveTime::from_hms_opt(8, 12, 5).unwrap());
1375        assert_eq!(round_trip(&k), k);
1376    }
1377
1378    #[test]
1379    fn roundtrip_time_frac() {
1380        let k = Kind::Time(NaiveTime::from_hms_milli_opt(14, 30, 0, 123).unwrap());
1381        assert_eq!(round_trip(&k), k);
1382    }
1383
1384    // ── DateTimes ──
1385
1386    #[test]
1387    fn parse_datetime() {
1388        let k = decode_scalar("2024-01-01T08:12:05-05:00 New_York").unwrap();
1389        if let Kind::DateTime(hdt) = &k {
1390            assert_eq!(hdt.tz_name, "New_York");
1391            assert_eq!(hdt.dt.year(), 2024);
1392        } else {
1393            panic!("expected DateTime");
1394        }
1395    }
1396
1397    #[test]
1398    fn parse_datetime_utc() {
1399        let k = decode_scalar("2024-06-15T12:00:00+00:00 UTC").unwrap();
1400        if let Kind::DateTime(hdt) = &k {
1401            assert_eq!(hdt.tz_name, "UTC");
1402        } else {
1403            panic!("expected DateTime");
1404        }
1405    }
1406
1407    #[test]
1408    fn parse_datetime_z() {
1409        let k = decode_scalar("2024-06-15T12:00:00Z UTC").unwrap();
1410        if let Kind::DateTime(hdt) = &k {
1411            assert_eq!(hdt.tz_name, "UTC");
1412            assert_eq!(hdt.dt.offset(), &FixedOffset::east_opt(0).unwrap());
1413        } else {
1414            panic!("expected DateTime");
1415        }
1416    }
1417
1418    #[test]
1419    fn roundtrip_datetime() {
1420        let offset = FixedOffset::west_opt(5 * 3600).unwrap();
1421        let dt = offset.with_ymd_and_hms(2024, 1, 1, 8, 12, 5).unwrap();
1422        let k = Kind::DateTime(HDateTime::new(dt, "New_York"));
1423        let rt = round_trip(&k);
1424        if let Kind::DateTime(hdt) = &rt {
1425            assert_eq!(hdt.tz_name, "New_York");
1426            assert_eq!(hdt.dt, dt);
1427        } else {
1428            panic!("expected DateTime");
1429        }
1430    }
1431
1432    #[test]
1433    fn roundtrip_datetime_utc() {
1434        let offset = FixedOffset::east_opt(0).unwrap();
1435        let dt = offset.with_ymd_and_hms(2024, 6, 15, 12, 0, 0).unwrap();
1436        let k = Kind::DateTime(HDateTime::new(dt, "UTC"));
1437        let rt = round_trip(&k);
1438        if let Kind::DateTime(hdt) = &rt {
1439            assert_eq!(hdt.tz_name, "UTC");
1440            assert_eq!(hdt.dt, dt);
1441        } else {
1442            panic!("expected DateTime");
1443        }
1444    }
1445
1446    // ── Coords ──
1447
1448    #[test]
1449    fn parse_coord() {
1450        let k = decode_scalar("C(37.5458266,-77.4491888)").unwrap();
1451        assert_eq!(k, Kind::Coord(Coord::new(37.5458266, -77.4491888)));
1452    }
1453
1454    #[test]
1455    fn parse_coord_negative() {
1456        let k = decode_scalar("C(-33.8688,151.2093)").unwrap();
1457        assert_eq!(k, Kind::Coord(Coord::new(-33.8688, 151.2093)));
1458    }
1459
1460    #[test]
1461    fn roundtrip_coord() {
1462        let k = Kind::Coord(Coord::new(37.5458266, -77.4491888));
1463        assert_eq!(round_trip(&k), k);
1464    }
1465
1466    // ── XStr ──
1467
1468    #[test]
1469    fn parse_xstr() {
1470        let k = decode_scalar("Color(\"red\")").unwrap();
1471        assert_eq!(k, Kind::XStr(XStr::new("Color", "red")));
1472    }
1473
1474    #[test]
1475    fn roundtrip_xstr() {
1476        let k = Kind::XStr(XStr::new("Color", "red"));
1477        assert_eq!(round_trip(&k), k);
1478    }
1479
1480    // ── Lists ──
1481
1482    #[test]
1483    fn parse_list_empty() {
1484        assert_eq!(decode_scalar("[]").unwrap(), Kind::List(vec![]));
1485    }
1486
1487    #[test]
1488    fn parse_list_mixed() {
1489        let k = decode_scalar("[1, \"two\", M]").unwrap();
1490        assert_eq!(
1491            k,
1492            Kind::List(vec![
1493                Kind::Number(Number::unitless(1.0)),
1494                Kind::Str("two".into()),
1495                Kind::Marker,
1496            ])
1497        );
1498    }
1499
1500    #[test]
1501    fn parse_list_nested() {
1502        let k = decode_scalar("[[1, 2], [3, 4]]").unwrap();
1503        assert_eq!(
1504            k,
1505            Kind::List(vec![
1506                Kind::List(vec![
1507                    Kind::Number(Number::unitless(1.0)),
1508                    Kind::Number(Number::unitless(2.0)),
1509                ]),
1510                Kind::List(vec![
1511                    Kind::Number(Number::unitless(3.0)),
1512                    Kind::Number(Number::unitless(4.0)),
1513                ]),
1514            ])
1515        );
1516    }
1517
1518    #[test]
1519    fn roundtrip_list_empty() {
1520        assert_eq!(round_trip(&Kind::List(vec![])), Kind::List(vec![]));
1521    }
1522
1523    #[test]
1524    fn roundtrip_list_mixed() {
1525        let k = Kind::List(vec![
1526            Kind::Number(Number::unitless(1.0)),
1527            Kind::Str("two".into()),
1528            Kind::Marker,
1529        ]);
1530        assert_eq!(round_trip(&k), k);
1531    }
1532
1533    // ── Dicts ──
1534
1535    #[test]
1536    fn parse_dict_empty() {
1537        let k = decode_scalar("{}").unwrap();
1538        assert_eq!(k, Kind::Dict(Box::new(HDict::new())));
1539    }
1540
1541    #[test]
1542    fn parse_dict_with_marker() {
1543        let k = decode_scalar("{site}").unwrap();
1544        if let Kind::Dict(d) = &k {
1545            assert_eq!(d.get("site"), Some(&Kind::Marker));
1546        } else {
1547            panic!("expected Dict");
1548        }
1549    }
1550
1551    #[test]
1552    fn parse_dict_with_values() {
1553        let k = decode_scalar("{dis:\"Main\" area:42}").unwrap();
1554        if let Kind::Dict(d) = &k {
1555            assert_eq!(d.get("dis"), Some(&Kind::Str("Main".into())));
1556            assert_eq!(d.get("area"), Some(&Kind::Number(Number::unitless(42.0))));
1557        } else {
1558            panic!("expected Dict");
1559        }
1560    }
1561
1562    #[test]
1563    fn parse_dict_mixed() {
1564        let k = decode_scalar("{site dis:\"Main\" area:42}").unwrap();
1565        if let Kind::Dict(d) = &k {
1566            assert_eq!(d.get("site"), Some(&Kind::Marker));
1567            assert_eq!(d.get("dis"), Some(&Kind::Str("Main".into())));
1568            assert_eq!(d.get("area"), Some(&Kind::Number(Number::unitless(42.0))));
1569        } else {
1570            panic!("expected Dict");
1571        }
1572    }
1573
1574    #[test]
1575    fn roundtrip_dict_empty() {
1576        let k = Kind::Dict(Box::new(HDict::new()));
1577        assert_eq!(round_trip(&k), k);
1578    }
1579
1580    #[test]
1581    fn roundtrip_dict_with_values() {
1582        let mut d = HDict::new();
1583        d.set("dis", Kind::Str("Main".into()));
1584        d.set("site", Kind::Marker);
1585        let k = Kind::Dict(Box::new(d));
1586        let rt = round_trip(&k);
1587        if let Kind::Dict(d) = &rt {
1588            assert_eq!(d.get("dis"), Some(&Kind::Str("Main".into())));
1589            assert_eq!(d.get("site"), Some(&Kind::Marker));
1590        } else {
1591            panic!("expected Dict");
1592        }
1593    }
1594
1595    // ── Grid decoding tests ──
1596
1597    #[test]
1598    fn decode_grid_empty() {
1599        let zinc = "ver:\"3.0\"\nempty\n";
1600        let g = decode_grid(zinc).unwrap();
1601        assert!(g.cols.is_empty());
1602        assert!(g.rows.is_empty());
1603    }
1604
1605    #[test]
1606    fn decode_grid_simple() {
1607        let zinc = "ver:\"3.0\"\ndis,area\n\"Site One\",4500\n\"Site Two\",3200\n";
1608        let g = decode_grid(zinc).unwrap();
1609        assert_eq!(g.num_cols(), 2);
1610        assert_eq!(g.cols[0].name, "dis");
1611        assert_eq!(g.cols[1].name, "area");
1612        assert_eq!(g.len(), 2);
1613
1614        let r0 = g.row(0).unwrap();
1615        assert_eq!(r0.get("dis"), Some(&Kind::Str("Site One".into())));
1616        assert_eq!(
1617            r0.get("area"),
1618            Some(&Kind::Number(Number::unitless(4500.0)))
1619        );
1620
1621        let r1 = g.row(1).unwrap();
1622        assert_eq!(r1.get("dis"), Some(&Kind::Str("Site Two".into())));
1623        assert_eq!(
1624            r1.get("area"),
1625            Some(&Kind::Number(Number::unitless(3200.0)))
1626        );
1627    }
1628
1629    #[test]
1630    fn decode_grid_with_meta() {
1631        let zinc = "ver:\"3.0\" err dis:\"some error\"\nempty\n";
1632        let g = decode_grid(zinc).unwrap();
1633        assert!(g.is_err());
1634        assert_eq!(g.meta.get("dis"), Some(&Kind::Str("some error".into())));
1635    }
1636
1637    #[test]
1638    fn decode_grid_with_col_meta() {
1639        let zinc = "ver:\"3.0\"\nname,power unit:\"kW\"\n\"AHU-1\",75\n";
1640        let g = decode_grid(zinc).unwrap();
1641        assert_eq!(g.num_cols(), 2);
1642        assert_eq!(g.cols[0].name, "name");
1643        assert_eq!(g.cols[1].name, "power");
1644        assert_eq!(g.cols[1].meta.get("unit"), Some(&Kind::Str("kW".into())));
1645    }
1646
1647    #[test]
1648    fn decode_grid_with_null_cells() {
1649        let zinc = "ver:\"3.0\"\na,b\n1,N\nN,2\n";
1650        let g = decode_grid(zinc).unwrap();
1651        assert_eq!(g.len(), 2);
1652        let r0 = g.row(0).unwrap();
1653        assert_eq!(r0.get("a"), Some(&Kind::Number(Number::unitless(1.0))));
1654        assert!(r0.missing("b"));
1655
1656        let r1 = g.row(1).unwrap();
1657        assert!(r1.missing("a"));
1658        assert_eq!(r1.get("b"), Some(&Kind::Number(Number::unitless(2.0))));
1659    }
1660
1661    #[test]
1662    fn decode_grid_with_comments() {
1663        let zinc = "// comment\nver:\"3.0\"\nempty\n";
1664        let g = decode_grid(zinc).unwrap();
1665        assert!(g.cols.is_empty());
1666    }
1667
1668    // ── Grid round-trip tests ──
1669
1670    #[test]
1671    fn grid_roundtrip_empty() {
1672        let g = HGrid::new();
1673        let encoded = crate::codecs::zinc::encode_grid(&g).unwrap();
1674        let decoded = decode_grid(&encoded).unwrap();
1675        assert!(decoded.cols.is_empty());
1676        assert!(decoded.rows.is_empty());
1677    }
1678
1679    #[test]
1680    fn grid_roundtrip_with_data() {
1681        let cols = vec![HCol::new("dis"), HCol::new("area")];
1682        let mut row1 = HDict::new();
1683        row1.set("dis", Kind::Str("Site One".into()));
1684        row1.set("area", Kind::Number(Number::unitless(4500.0)));
1685        let mut row2 = HDict::new();
1686        row2.set("dis", Kind::Str("Site Two".into()));
1687        row2.set("area", Kind::Number(Number::unitless(3200.0)));
1688        let g = HGrid::from_parts(HDict::new(), cols, vec![row1, row2]);
1689
1690        let encoded = crate::codecs::zinc::encode_grid(&g).unwrap();
1691        let decoded = decode_grid(&encoded).unwrap();
1692        assert_eq!(decoded.num_cols(), 2);
1693        assert_eq!(decoded.len(), 2);
1694        assert_eq!(
1695            decoded.row(0).unwrap().get("dis"),
1696            Some(&Kind::Str("Site One".into()))
1697        );
1698        assert_eq!(
1699            decoded.row(0).unwrap().get("area"),
1700            Some(&Kind::Number(Number::unitless(4500.0)))
1701        );
1702    }
1703
1704    #[test]
1705    fn grid_roundtrip_with_meta() {
1706        let mut meta = HDict::new();
1707        meta.set("err", Kind::Marker);
1708        meta.set("dis", Kind::Str("some error".into()));
1709        let g = HGrid::from_parts(meta, vec![], vec![]);
1710
1711        let encoded = crate::codecs::zinc::encode_grid(&g).unwrap();
1712        let decoded = decode_grid(&encoded).unwrap();
1713        assert!(decoded.is_err());
1714        assert_eq!(
1715            decoded.meta.get("dis"),
1716            Some(&Kind::Str("some error".into()))
1717        );
1718    }
1719
1720    #[test]
1721    fn grid_roundtrip_error_grid() {
1722        let mut meta = HDict::new();
1723        meta.set("err", Kind::Marker);
1724        meta.set("dis", Kind::Str("Error occurred".into()));
1725        meta.set("errTrace", Kind::Str("stack trace here".into()));
1726        let g = HGrid::from_parts(meta, vec![], vec![]);
1727
1728        let encoded = crate::codecs::zinc::encode_grid(&g).unwrap();
1729        let decoded = decode_grid(&encoded).unwrap();
1730        assert!(decoded.is_err());
1731        assert_eq!(
1732            decoded.meta.get("errTrace"),
1733            Some(&Kind::Str("stack trace here".into()))
1734        );
1735    }
1736
1737    // ── CSV-aware splitting ──
1738
1739    #[test]
1740    fn split_csv_simple() {
1741        let parts = split_csv_aware("a,b,c");
1742        assert_eq!(parts, vec!["a", "b", "c"]);
1743    }
1744
1745    #[test]
1746    fn split_csv_with_quotes() {
1747        let parts = split_csv_aware("\"a,b\",c");
1748        assert_eq!(parts, vec!["\"a,b\"", "c"]);
1749    }
1750
1751    #[test]
1752    fn split_csv_with_nested() {
1753        let parts = split_csv_aware("[1,2],3");
1754        assert_eq!(parts, vec!["[1,2]", "3"]);
1755    }
1756
1757    // ── -INF as negative number start ──
1758
1759    #[test]
1760    fn parse_neg_inf_standalone() {
1761        let k = decode_scalar("-INF").unwrap();
1762        if let Kind::Number(n) = &k {
1763            assert!(n.val.is_infinite() && n.val < 0.0);
1764        } else {
1765            panic!("expected Number(-INF)");
1766        }
1767    }
1768
1769    // ── Codec trait tests ──
1770
1771    #[test]
1772    fn zinc_codec_trait() {
1773        use crate::codecs::Codec;
1774        let codec = crate::codecs::zinc::ZincCodec;
1775        assert_eq!(codec.mime_type(), "text/zinc");
1776
1777        let encoded = codec.encode_scalar(&Kind::Bool(true)).unwrap();
1778        assert_eq!(encoded, "T");
1779
1780        let decoded = codec.decode_scalar("T").unwrap();
1781        assert_eq!(decoded, Kind::Bool(true));
1782
1783        let g = HGrid::new();
1784        let grid_str = codec.encode_grid(&g).unwrap();
1785        let decoded_grid = codec.decode_grid(&grid_str).unwrap();
1786        assert!(decoded_grid.cols.is_empty());
1787    }
1788
1789    // ── Bug fix: trailing input rejection ──
1790
1791    #[test]
1792    fn parse_scalar_rejects_trailing_input() {
1793        assert!(decode_scalar("T extra garbage").is_err());
1794        assert!(decode_scalar("42 xyz").is_err());
1795        assert!(decode_scalar("\"hello\" world").is_err());
1796        assert!(decode_scalar("M extra").is_err());
1797    }
1798
1799    #[test]
1800    fn parse_scalar_allows_trailing_whitespace() {
1801        assert_eq!(decode_scalar("T  ").unwrap(), Kind::Bool(true));
1802        assert_eq!(decode_scalar("M ").unwrap(), Kind::Marker);
1803        assert_eq!(
1804            decode_scalar("42 ").unwrap(),
1805            Kind::Number(Number::unitless(42.0))
1806        );
1807    }
1808
1809    // ── Bug fix: unknown escape sequences ──
1810
1811    #[test]
1812    fn parse_string_rejects_unknown_escapes() {
1813        assert!(decode_scalar("\"bad\\x\"").is_err());
1814        assert!(decode_scalar("\"bad\\a\"").is_err());
1815        assert!(decode_scalar("\"bad\\z\"").is_err());
1816    }
1817
1818    #[test]
1819    fn parse_string_accepts_valid_escapes() {
1820        assert!(decode_scalar("\"\\n\"").is_ok());
1821        assert!(decode_scalar("\"\\r\"").is_ok());
1822        assert!(decode_scalar("\"\\t\"").is_ok());
1823        assert!(decode_scalar("\"\\\\\"").is_ok());
1824        assert!(decode_scalar("\"\\\"\"").is_ok());
1825        assert!(decode_scalar("\"\\$\"").is_ok());
1826        assert!(decode_scalar("\"\\b\"").is_ok());
1827        assert!(decode_scalar("\"\\f\"").is_ok());
1828        assert!(decode_scalar("\"\\u0041\"").is_ok());
1829    }
1830}