Skip to main content

sidereon_core/crinex/
mod.rs

1//! Hatanaka (CRINEX) observation-file decoder and encoder.
2//!
3//! Reconstructs the plain RINEX observation **text** from a Compact RINEX
4//! (CRINEX) stream, reproducing the `CRX2RNX` algorithm, and the inverse:
5//! recompacts plain RINEX observation text back to a CRINEX stream
6//! (`RNX2CRX`-style). Two stream revisions are handled: **CRINEX 1.0** (which
7//! compacts a RINEX 2 observation file) and **CRINEX 3.0** (which compacts a
8//! RINEX 3 observation file). The expanded text is what
9//! [`crate::rinex_obs::RinexObs`] then parses.
10//!
11//! The round-trip is closed at the RINEX-text level through the canonical
12//! [`ObsStream`] intermediate representation: [`decode`] takes CRINEX to plain
13//! RINEX text, [`encode_crinex`] takes plain RINEX text back to CRINEX, and
14//! [`parse_stream`] / [`encode_stream`] expose the IR boundary directly. CRINEX
15//! compression is not unique, so the encoder emits a canonical all-reset form
16//! (see [`encode_stream`]); the guarantee is that decoding the re-emitted CRINEX
17//! reproduces the original observations byte-for-byte.
18//!
19//! # What CRINEX is
20//!
21//! CRINEX is a lossless, line-oriented ASCII recompression of a RINEX
22//! observation file. The plain RINEX header is copied through unchanged (it is
23//! never compressed); only the data body is differenced. The body uses two
24//! difference engines:
25//!
26//! - a per-character **text** difference (epoch descriptor line and the trailing
27//!   LLI/SSI flag string of each satellite line); and
28//! - a per-observation higher-order **integer** difference (each observation
29//!   column, and the receiver clock offset), with arc (re)initialization marked
30//!   inline by an `order&value` token.
31//!
32//! The algorithm is fully specified by Hatanaka (2008) and the RNXCMP toolset.
33//! This is a deterministic byte-to-text transform, not a float recipe - there is
34//! no 0-ULP claim here, exactly as for the SP3 and RINEX-NAV readers. The
35//! reconstructed numbers are formatted with the same fixed-decimal layout the
36//! reference `crx2rnx` emits (value scaled back by `10^-decimals`, right-aligned
37//! in the field) and each output line has its trailing blanks trimmed, which is
38//! what makes the expansion reproduce the reference byte-for-byte.
39//!
40//! # Memory
41//!
42//! The difference state is bounded: the engines hold only the previous epoch's
43//! per-satellite reference values, not the whole stream. [`decode_to`] is the
44//! line-at-a-time form - it pushes each reconstructed line to a sink as it is
45//! produced, so the *decoder itself* never buffers the full expansion. Note that
46//! the [`decode`] convenience does collect the entire expanded text into one
47//! `String`; for a multi-megabyte daily file prefer `decode_to` with a streaming
48//! sink (e.g. feeding a record consumer) so the expansion is processed
49//! incrementally.
50
51use std::collections::HashMap;
52use std::fmt::Write as _;
53
54use crate::format::columns::{raw_field as field, raw_field_from as field_from};
55use crate::validate::{self, FieldError};
56use crate::{Error, Result};
57
58/// CRINEX stream revision (the `CRINEX VERS / TYPE` line).
59#[derive(Debug, Clone, Copy, PartialEq, Eq)]
60pub enum CrinexVersion {
61    /// CRINEX 1.0 - compacts a RINEX 2 observation file.
62    V1,
63    /// CRINEX 3.0 - compacts a RINEX 3 observation file.
64    V3,
65}
66
67/// The compression order used by the historical `RNX2CRX` (and the only order a
68/// reset token may request without exceeding the classic `M = 5` history). It is
69/// carried per token, so this is only a sanity ceiling.
70const MAX_ORDER: usize = 6;
71
72/// Canonical, wire-format-agnostic observation stream recovered from a CRINEX
73/// file. This is the IR the decoder produces: the difference engines are undone,
74/// leaving the plain RINEX header verbatim and each epoch's recovered
75/// observations as scaled integers plus reconstructed flag strings.
76///
77/// Two serializers consume it. [`Decoder`]'s RINEX path renders it back to plain
78/// RINEX observation text (the [`decode`] output). [`encode_stream`] renders it
79/// back to CRINEX. Because CRINEX compression is not unique, `encode_stream`
80/// emits the canonical all-reset form rather than reproducing the original
81/// CRINEX bytes; the round-trip guarantee is at the IR / RINEX-text level (see
82/// [`encode_stream`]).
83///
84/// [`parse_stream`] builds it from a CRINEX stream; the [`encode_crinex`] entry
85/// builds it from plain RINEX observation text, so the same container backs both
86/// the decode and encode directions.
87#[derive(Debug, Clone, PartialEq)]
88pub struct ObsStream {
89    /// Stream revision (selects the epoch grammar).
90    pub version: CrinexVersion,
91    /// The embedded plain RINEX header lines, verbatim, up to and including
92    /// `END OF HEADER` (the two CRINEX header lines are not part of the IR).
93    pub header: Vec<String>,
94    /// Epoch records in file order.
95    pub epochs: Vec<EpochRecord>,
96}
97
98/// One decoded epoch: either an observation epoch or a verbatim event block.
99#[derive(Debug, Clone, PartialEq)]
100pub enum EpochRecord {
101    /// An observation epoch (epoch flag 0 or 1).
102    Obs(ObsEpoch),
103    /// An event record (epoch flag > 1): header/comment lines copied verbatim,
104    /// carrying no differenced observations.
105    Event {
106        /// The reconstructed epoch descriptor line.
107        descriptor: String,
108        /// The `numsat` event lines following the descriptor, verbatim.
109        lines: Vec<String>,
110    },
111}
112
113/// A decoded observation epoch.
114#[derive(Debug, Clone, PartialEq)]
115pub struct ObsEpoch {
116    /// The reconstructed full epoch descriptor (V3: leading `>` plus the SV list;
117    /// V1: leading space plus the SV list).
118    pub descriptor: String,
119    /// Recovered receiver clock offset as the scaled integer the stream carried,
120    /// or `None` when the epoch carried no clock token.
121    pub clock: Option<i64>,
122    /// Per-satellite recovered observations, in epoch SV-list order.
123    pub sats: Vec<SatRecord>,
124}
125
126/// One satellite's recovered observations at an epoch.
127#[derive(Debug, Clone, PartialEq)]
128pub struct SatRecord {
129    /// SV token (e.g. `G05`), already expanded for mono-system RINEX-2 streams.
130    pub sv: String,
131    /// Recovered scaled-integer observation values; `None` is a blanked column.
132    pub values: Vec<Option<i64>>,
133    /// The reconstructed LLI/SSI flag string (the text-difference engine state).
134    pub flags: String,
135}
136
137/// Width of one RINEX-3 observation field: `F14.3` value + LLI + SSI.
138const OBS_FIELD_WIDTH: usize = 16;
139/// Width of the numeric part of one observation field (`F14.3`).
140const OBS_VALUE_WIDTH: usize = 14;
141
142/// Decode a CRINEX (Hatanaka) observation stream into the plain RINEX
143/// observation text it expands to, returning the whole text as a `String`.
144///
145/// Supports CRINEX 1.0 (RINEX 2 host) and CRINEX 3.0 (RINEX 3 host). Returns
146/// [`Error::Parse`] with a human-readable reason on a malformed stream.
147pub fn decode(crinex_text: &str) -> Result<String> {
148    let mut out = String::with_capacity(crinex_text.len() * 4);
149    decode_to(crinex_text, |line| {
150        out.push_str(line);
151        out.push('\n');
152    })?;
153    Ok(out)
154}
155
156/// Streaming decode: reconstruct the plain RINEX observation text one line at a
157/// time, pushing each line (without its trailing newline) to `emit`.
158///
159/// This is the bounded-memory form: the difference engines retain only the
160/// previous epoch's state, so a multi-megabyte daily file never holds its full
161/// expansion in a single buffer. [`decode`] is the collecting convenience.
162pub fn decode_to<W: FnMut(&str)>(crinex_text: &str, mut emit: W) -> Result<()> {
163    let mut decoder = Decoder::new();
164    let mut lines = crinex_text.lines();
165    decoder.read_crinex_header(&mut lines, &mut emit)?;
166    decoder.read_body(&mut lines, &mut emit)?;
167    Ok(())
168}
169
170/// Encode plain RINEX observation text into a CRINEX (Hatanaka) stream, the
171/// inverse of [`decode`].
172///
173/// Supports RINEX 2 (encoded as CRINEX 1.0) and RINEX 3 (encoded as CRINEX 3.0),
174/// selected from the embedded `RINEX VERSION / TYPE` header line. The text is
175/// parsed into the canonical [`ObsStream`] IR and serialized by [`encode_stream`].
176/// Because CRINEX compression is not unique, the output is the canonical
177/// all-reset form (see [`encode_stream`]); it is not byte-identical to an
178/// arbitrary `RNX2CRX` stream, but `decode(encode_crinex(rinex)) == rinex` for
179/// any RINEX observation text this round-trips. Returns [`Error::Parse`] on a
180/// malformed input.
181pub fn encode_crinex(rinex_text: &str) -> Result<String> {
182    let stream = parse_rinex_obs(rinex_text)?;
183    Ok(encode_stream(&stream))
184}
185
186/// Per-observation / clock higher-order integer difference engine.
187///
188/// Mirrors the Hatanaka `NumDiff`: a value is the original decimal scaled to an
189/// integer; the engine holds a small history and reconstructs the next value
190/// from a delta using the signed binomial (Pascal) coefficients for the current
191/// order. The order ramps up to `level` as samples arrive and is reset by an
192/// `order&value` token (arc reinitialization).
193#[derive(Debug, Clone)]
194struct NumDiff {
195    /// Iteration counter (current effective order), clamped to `level`.
196    m: usize,
197    /// Target compression level (order) for this arc.
198    level: usize,
199    /// History buffer, most-recent first.
200    buf: [i64; MAX_ORDER],
201}
202
203impl NumDiff {
204    /// Initialize a fresh arc seeded with `data` at the given `level`. The seed
205    /// is the recovered value of the init sample.
206    fn new(data: i64, level: usize) -> Self {
207        let mut buf = [0i64; MAX_ORDER];
208        buf[0] = data;
209        Self { m: 0, level, buf }
210    }
211
212    /// Reinitialize the arc (the `order&value` reset token): clear the order,
213    /// set the new level, and seed the history with the recovered value.
214    fn force_init(&mut self, data: i64, level: usize) {
215        self.m = 0;
216        self.level = level;
217        self.rotate(data);
218    }
219
220    /// Push a recovered value into the history buffer (most-recent first).
221    fn rotate(&mut self, data: i64) {
222        self.buf.copy_within(0..MAX_ORDER - 1, 1);
223        self.buf[0] = data;
224    }
225
226    /// Recover the next value from its delta, advancing the order toward
227    /// `level`.
228    fn decompress(&mut self, delta: i64) -> core::result::Result<i64, validate::ArithmeticError> {
229        let m = if self.m < self.level {
230            self.m + 1
231        } else {
232            self.m
233        };
234        let b = &self.buf;
235        let new = match m {
236            1 => checked_diff_sum(delta, &[(1, b[0])])?,
237            2 => checked_diff_sum(delta, &[(2, b[0]), (-1, b[1])])?,
238            3 => checked_diff_sum(delta, &[(3, b[0]), (-3, b[1]), (1, b[2])])?,
239            4 => checked_diff_sum(delta, &[(4, b[0]), (-6, b[1]), (4, b[2]), (-1, b[3])])?,
240            5 => checked_diff_sum(
241                delta,
242                &[(5, b[0]), (-10, b[1]), (10, b[2]), (-5, b[3]), (1, b[4])],
243            )?,
244            6 => checked_diff_sum(
245                delta,
246                &[
247                    (6, b[0]),
248                    (-15, b[1]),
249                    (20, b[2]),
250                    (-15, b[3]),
251                    (6, b[4]),
252                    (-1, b[5]),
253                ],
254            )?,
255            // m starts at 0 and is incremented before use, and `level` is
256            // capped at MAX_ORDER, so m is always in 1..=MAX_ORDER here.
257            _ => checked_diff_sum(delta, &[(1, b[0])])?,
258        };
259        self.m = m;
260        self.rotate(new);
261        Ok(new)
262    }
263}
264
265fn checked_diff_sum(
266    delta: i64,
267    terms: &[(i64, i64)],
268) -> core::result::Result<i64, validate::ArithmeticError> {
269    const FIELD: &str = "crinex numeric difference";
270    let mut sum = delta;
271    for &(coefficient, value) in terms {
272        let term = validate::checked_i64_mul(coefficient.abs(), value, FIELD)?;
273        sum = if coefficient >= 0 {
274            validate::checked_i64_add(sum, term, FIELD)?
275        } else {
276            validate::checked_i64_sub(sum, term, FIELD)?
277        };
278    }
279    Ok(sum)
280}
281
282/// Per-character text difference engine (Hatanaka `TextDiff`), used for the
283/// epoch descriptor line and each satellite's LLI/SSI flag string.
284///
285/// State is the last reconstructed string. A space keeps the buffered byte; an
286/// `&` blanks it; any other byte overwrites it. Input longer than the buffer
287/// appends verbatim.
288#[derive(Debug, Default, Clone)]
289struct TextDiff {
290    buffer: Vec<u8>,
291}
292
293impl TextDiff {
294    /// Replace the buffer wholesale (a forced reinit / first sample).
295    fn force_init(&mut self, data: &str) {
296        self.buffer = data.as_bytes().to_vec();
297    }
298
299    /// Apply a compressed line against the current buffer and return the
300    /// reconstructed string.
301    fn decompress(&mut self, data: &str) -> String {
302        let bytes = data.as_bytes();
303        if bytes.len() > self.buffer.len() {
304            self.buffer.extend_from_slice(&bytes[self.buffer.len()..]);
305        }
306        for (i, &byte) in bytes.iter().enumerate() {
307            if byte == b' ' {
308                continue;
309            }
310            if let Some(slot) = self.buffer.get_mut(i) {
311                *slot = if byte == b'&' { b' ' } else { byte };
312            }
313        }
314        // CRINEX text is ASCII; lossy is unreachable for valid input but keeps
315        // this panic-free on a stray byte.
316        String::from_utf8_lossy(&self.buffer).into_owned()
317    }
318}
319
320/// CRINEX decoder state machine.
321struct Decoder {
322    version: CrinexVersion,
323    /// Number of observation codes declared per constellation letter, used to
324    /// know how many observation fields each satellite line carries.
325    obs_count: HashMap<char, usize>,
326    /// Mono-constellation letter for a RINEX-2 file whose SV tokens omit the
327    /// system letter (set from the header `RINEX VERSION / TYPE`).
328    default_system: Option<char>,
329    /// Epoch descriptor text-diff engine.
330    epoch_diff: TextDiff,
331    /// Receiver-clock-offset difference engine.
332    clock_diff: Option<NumDiff>,
333    /// Per-satellite observation difference engines, keyed by SV token.
334    obs_diff: HashMap<String, Vec<Option<NumDiff>>>,
335    /// Per-satellite flag (LLI/SSI) text-diff engines.
336    flag_diff: HashMap<String, TextDiff>,
337}
338
339impl Decoder {
340    fn new() -> Self {
341        Self {
342            version: CrinexVersion::V3,
343            obs_count: HashMap::new(),
344            default_system: None,
345            epoch_diff: TextDiff::default(),
346            clock_diff: None,
347            obs_diff: HashMap::new(),
348            flag_diff: HashMap::new(),
349        }
350    }
351
352    /// Consume the two CRINEX header lines (dropped) and then copy the embedded
353    /// plain RINEX header through verbatim up to and including `END OF HEADER`,
354    /// recording the per-system observation-code counts and the file version
355    /// along the way.
356    fn read_crinex_header<'a, I, W>(&mut self, lines: &mut I, emit: &mut W) -> Result<()>
357    where
358        I: Iterator<Item = &'a str>,
359        W: FnMut(&str),
360    {
361        // Line 1: CRINEX VERS / TYPE - selects the stream grammar.
362        let l1 = lines
363            .next()
364            .ok_or_else(|| Error::Parse("CRINEX stream is empty".into()))?;
365        let crx_ver = field(l1, 0, 20).trim();
366        self.version = match crx_ver {
367            v if v.starts_with("1.0") || v.starts_with("1.") => CrinexVersion::V1,
368            v if v.starts_with("3.0") || v.starts_with("3.") => CrinexVersion::V3,
369            other => {
370                return Err(Error::Parse(format!(
371                    "unsupported CRINEX version {other:?} (expected 1.0 or 3.0)"
372                )))
373            }
374        };
375        if !l1.contains("CRINEX VERS") {
376            return Err(Error::Parse(
377                "missing CRINEX VERS / TYPE header line".into(),
378            ));
379        }
380        // Line 2: CRINEX PROG / DATE - dropped (it is the compaction stamp, not
381        // part of the reconstructed RINEX).
382        lines
383            .next()
384            .ok_or_else(|| Error::Parse("CRINEX header missing PROG / DATE line".into()))?;
385
386        // Copy the embedded plain RINEX header verbatim, tracking obs counts.
387        let mut saw_end = false;
388        for raw in lines.by_ref() {
389            let line = raw.trim_end_matches(['\r', '\n']);
390            emit(line);
391
392            let label = field(line, 60, 80).trim();
393            self.classify_header_label(line, label)?;
394            if label == "END OF HEADER" {
395                saw_end = true;
396                break;
397            }
398        }
399        if !saw_end {
400            return Err(Error::Parse(
401                "CRINEX embedded RINEX header has no END OF HEADER".into(),
402            ));
403        }
404        Ok(())
405    }
406
407    /// Record the per-system observation-code counts and the mono-system
408    /// constellation letter from one labelled RINEX header record. Shared by the
409    /// CRINEX-stream header reader and the plain-RINEX header scanner so both
410    /// resolve observation widths identically.
411    fn classify_header_label(&mut self, line: &str, label: &str) -> Result<()> {
412        match label {
413            "RINEX VERSION / TYPE" => {
414                // RINEX 2 SV tokens may omit the constellation letter for a
415                // single-system file; capture it for V1 streams.
416                let sys_field = field(line, 40, 41).trim();
417                if let Some(c) = sys_field.chars().next() {
418                    if c != 'M' {
419                        self.default_system = Some(c);
420                    }
421                }
422            }
423            "# / TYPES OF OBSERV" => {
424                // RINEX 2 observation-code count (shared across systems).
425                let n = strict_obs_count(line, 0, 6, "rinex2.obs_type_count")?;
426                if let Some(sys) = self.default_system {
427                    self.obs_count.insert(sys, n);
428                }
429                // RINEX 2 has one shared list; record under a sentinel so a
430                // mono-system file resolves even if the letter was 'M'.
431                self.obs_count.entry(' ').or_insert(n);
432            }
433            "SYS / # / OBS TYPES" => {
434                let sys_field = field(line, 0, 1).trim();
435                if let Some(c) = sys_field.chars().next() {
436                    let n = strict_obs_count(line, 3, 6, "rinex3.obs_type_count")?;
437                    self.obs_count.insert(c, n);
438                }
439                // Continuation lines (blank system field) carry no count.
440            }
441            _ => {}
442        }
443        Ok(())
444    }
445
446    /// Scan a plain RINEX observation header (no CRINEX wrapper): collect the
447    /// header lines verbatim up to and including `END OF HEADER`, set the stream
448    /// revision from `RINEX VERSION / TYPE`, and record the per-system
449    /// observation-code counts via [`Self::classify_header_label`].
450    fn scan_rinex_header<'a, I>(&mut self, lines: &mut I, header: &mut Vec<String>) -> Result<()>
451    where
452        I: Iterator<Item = &'a str>,
453    {
454        let mut saw_version = false;
455        let mut saw_end = false;
456        for raw in lines.by_ref() {
457            let line = raw.trim_end_matches(['\r', '\n']);
458            let label = field(line, 60, 80).trim();
459            if label == "RINEX VERSION / TYPE" {
460                let version = field(line, 0, 9).trim();
461                self.version = match version.chars().next() {
462                    Some('2') => CrinexVersion::V1,
463                    Some('3') => CrinexVersion::V3,
464                    _ => {
465                        return Err(Error::Parse(format!(
466                            "unsupported RINEX version {version:?} (expected 2 or 3)"
467                        )))
468                    }
469                };
470                saw_version = true;
471            }
472            self.classify_header_label(line, label)?;
473            header.push(line.to_string());
474            if label == "END OF HEADER" {
475                saw_end = true;
476                break;
477            }
478        }
479        if !saw_version {
480            return Err(Error::Parse(
481                "plain RINEX header missing RINEX VERSION / TYPE".into(),
482            ));
483        }
484        if !saw_end {
485            return Err(Error::Parse(
486                "plain RINEX observation header has no END OF HEADER".into(),
487            ));
488        }
489        Ok(())
490    }
491
492    /// Decode the epoch records following the header into plain RINEX text.
493    fn read_body<'a, I, W>(&mut self, lines: &mut I, emit: &mut W) -> Result<()>
494    where
495        I: Iterator<Item = &'a str>,
496        W: FnMut(&str),
497    {
498        let version = self.version;
499        loop {
500            let record = match version {
501                CrinexVersion::V3 => self.next_epoch_v3(lines)?,
502                CrinexVersion::V1 => self.next_epoch_v1(lines)?,
503            };
504            let Some(record) = record else { break };
505            match version {
506                CrinexVersion::V3 => serialize_rinex_epoch_v3(&record, emit),
507                CrinexVersion::V1 => serialize_rinex_epoch_v1(&record, emit),
508            }
509        }
510        Ok(())
511    }
512
513    // ----------------------------------------------------------------- V3 ---
514
515    /// Parse the next V3 epoch into the canonical [`EpochRecord`], advancing the
516    /// difference engines. Returns `Ok(None)` at end of stream. Stray blank lines
517    /// between records are skipped.
518    fn next_epoch_v3<'a, I>(&mut self, lines: &mut I) -> Result<Option<EpochRecord>>
519    where
520        I: Iterator<Item = &'a str>,
521    {
522        let raw = loop {
523            match lines.next() {
524                None => return Ok(None),
525                Some(raw) => {
526                    let line = raw.trim_end_matches(['\r', '\n']);
527                    if !line.is_empty() {
528                        break line;
529                    }
530                }
531            }
532        };
533
534        // Epoch descriptor. A reset is marked by a leading '>' (the rest of the
535        // line is taken literally), otherwise the line is a TextDiff delta of the
536        // previous descriptor. The leading '>' is kept in the text-diff buffer so
537        // the delta lines' column offsets line up with the full RINEX-3 epoch line
538        // (the seconds digits sit one column to the right of where they would be
539        // in a '>'-stripped buffer).
540        let descriptor = if raw.starts_with('>') {
541            self.epoch_diff.force_init(raw);
542            self.epoch_diff.decompress("")
543        } else {
544            self.epoch_diff.decompress(raw)
545        };
546
547        // The reconstructed full epoch line is
548        // "> YYYY MM DD HH MM SS.sssssss  F NN<svlist>": the epoch flag is at
549        // column 31, the satellite count at columns 32..35, and the 3-char SV
550        // tokens begin at column 41.
551        let numsat = strict_int_field::<usize>(&descriptor, 32, 35, "v3.epoch.satellite_count")?;
552        let flag = strict_int_field::<u8>(&descriptor, 31, 32, "v3.epoch.flag")?;
553
554        // Event records (flag > 1) carry header/comment lines rather than
555        // observation lines, and the clock-offset record is omitted for them
556        // entirely. Capture the `numsat` event lines verbatim, skip differencing.
557        if flag > 1 {
558            let mut event_lines = Vec::with_capacity(numsat);
559            for _ in 0..numsat {
560                let extra = lines
561                    .next()
562                    .ok_or_else(|| Error::Parse("CRINEX V3 event record truncated".into()))?;
563                event_lines.push(extra.trim_end_matches(['\r', '\n']).to_string());
564            }
565            return Ok(Some(EpochRecord::Event {
566                descriptor,
567                lines: event_lines,
568            }));
569        }
570
571        // The clock offset is its own line (a NumDiff token, possibly blank).
572        let clock_line = lines
573            .next()
574            .ok_or_else(|| Error::Parse("CRINEX V3 epoch missing clock line".into()))?
575            .trim_end_matches(['\r', '\n']);
576        let clock = self.decode_clock_value(clock_line)?;
577
578        let sv_list = self.sv_tokens_v3(&descriptor, numsat)?;
579        let mut sats = Vec::with_capacity(sv_list.len());
580        for sv in &sv_list {
581            let data_line = lines.next().ok_or_else(|| {
582                Error::Parse("CRINEX V3 epoch truncated: missing satellite line".into())
583            })?;
584            let n_obs = self.obs_count_for(sv)?;
585            let (values, flags) =
586                self.decode_sat_values(sv, data_line.trim_end_matches(['\r', '\n']), n_obs)?;
587            sats.push(SatRecord {
588                sv: sv.clone(),
589                values,
590                flags,
591            });
592        }
593        Ok(Some(EpochRecord::Obs(ObsEpoch {
594            descriptor,
595            clock,
596            sats,
597        })))
598    }
599
600    /// Extract `numsat` 3-character SV tokens from the V3 epoch descriptor.
601    fn sv_tokens_v3(&self, descriptor: &str, numsat: usize) -> Result<Vec<String>> {
602        // The RINEX-3 epoch line pads the satellite list to column 41 of the
603        // full line (the '>' is kept in the descriptor buffer); the 3-char SV
604        // tokens run from there.
605        let list = field_from(descriptor, 41);
606        let bytes = list.as_bytes();
607        let mut out = Vec::with_capacity(numsat);
608        for i in 0..numsat {
609            out.push(fixed_sv_token(bytes, "V3", numsat, i)?.to_string());
610        }
611        Ok(out)
612    }
613
614    /// Observation-code count for an SV token's constellation.
615    fn obs_count_for(&self, sv: &str) -> Result<usize> {
616        let sys = sv.chars().next().unwrap_or(' ');
617        let count = self
618            .obs_count
619            .get(&sys)
620            .or_else(|| self.obs_count.get(&' '))
621            .copied()
622            .ok_or_else(|| {
623                Error::Parse(format!(
624                    "CRINEX satellite {sv:?} has no declared observation count"
625                ))
626            })?;
627        if count == 0 {
628            return Err(Error::Parse(format!(
629                "CRINEX satellite {sv:?} has zero declared observations"
630            )));
631        }
632        Ok(count)
633    }
634
635    /// Recover one satellite's observations from a CRINEX data line: `n_obs`
636    /// difference-coded observation tokens followed by the TextDiff flag string.
637    /// The on-wire data-line grammar is identical for V1 and V3 (the SV token is
638    /// carried by the epoch descriptor, not the data line), so both revisions
639    /// share this recovery; only the RINEX-text layout of the result differs.
640    fn decode_sat_values(
641        &mut self,
642        sv: &str,
643        line: &str,
644        n_obs: usize,
645    ) -> Result<(Vec<Option<i64>>, String)> {
646        // The observation tokens are whitespace-separated; the remainder after
647        // the last consumed token is the flag string. We walk the line token by
648        // token, tracking byte offsets so we know where the flags begin.
649        let engines = self
650            .obs_diff
651            .entry(sv.to_string())
652            .or_insert_with(|| vec![None; n_obs]);
653        if engines.len() < n_obs {
654            engines.resize(n_obs, None);
655        }
656
657        let mut values: Vec<Option<i64>> = Vec::with_capacity(n_obs);
658        let mut cursor = 0usize;
659        let bytes = line.as_bytes();
660
661        for obs_index in 0..n_obs {
662            // Skip the single separating blank between fields (the compressor
663            // writes exactly one space between tokens; a doubled space marks a
664            // blanked observation).
665            if obs_index > 0 {
666                if cursor < bytes.len() && bytes[cursor] == b' ' {
667                    cursor += 1;
668                } else if cursor >= bytes.len() {
669                    // No more tokens on the line: the rest are blank.
670                    values.push(None);
671                    continue;
672                }
673            }
674            // A blanked observation: the field is empty (immediately another
675            // separator or end of the data section).
676            if cursor >= bytes.len() || bytes[cursor] == b' ' {
677                values.push(None);
678                continue;
679            }
680            // Read the token up to the next space.
681            let tok_start = cursor;
682            while cursor < bytes.len() && bytes[cursor] != b' ' {
683                cursor += 1;
684            }
685            let token = &line[tok_start..cursor];
686            let recovered = self.apply_obs_token(sv, obs_index, token)?;
687            values.push(Some(recovered));
688        }
689
690        // The flag string is whatever remains. In RNX2CRX output the flags are
691        // separated from the last observation token by a single space.
692        let flag_raw = if cursor < bytes.len() {
693            let rest = &line[cursor..];
694            rest.strip_prefix(' ').unwrap_or(rest)
695        } else {
696            ""
697        };
698        let flags = self
699            .flag_diff
700            .entry(sv.to_string())
701            .or_default()
702            .decompress(flag_raw);
703
704        Ok((values, flags))
705    }
706
707    /// Apply one observation token (reset `order&value`, or a plain delta) and
708    /// return the recovered scaled integer.
709    fn apply_obs_token(&mut self, sv: &str, obs_index: usize, token: &str) -> Result<i64> {
710        let engines = self.obs_diff.get_mut(sv).expect("engines inserted above");
711        let slot = &mut engines[obs_index];
712        if let Some((order, value)) = parse_reset(token)? {
713            match slot {
714                Some(e) => e.force_init(value, order),
715                None => *slot = Some(NumDiff::new(value, order)),
716            }
717            Ok(value)
718        } else {
719            let delta = token.trim().parse::<i64>().map_err(|_| {
720                Error::Parse(format!(
721                    "CRINEX observation delta {token:?} is not an integer"
722                ))
723            })?;
724            let Some(engine) = slot else {
725                return Err(Error::Parse(format!(
726                    "CRINEX observation {sv}[{obs_index}] has a delta before any arc init"
727                )));
728            };
729            engine.decompress(delta).map_err(map_arithmetic_error)
730        }
731    }
732
733    /// Recover the per-epoch receiver clock offset from its line as the scaled
734    /// integer the stream carried (`None` when no clock token is present),
735    /// advancing the clock difference engine. The picosecond/nanosecond scaling
736    /// to text is applied by the RINEX-text serializer, not here.
737    fn decode_clock_value(&mut self, line: &str) -> Result<Option<i64>> {
738        let token = line.trim();
739        if token.is_empty() {
740            return Ok(None);
741        }
742        let value = if let Some((order, v)) = parse_reset(token)? {
743            match &mut self.clock_diff {
744                Some(e) => e.force_init(v, order),
745                None => self.clock_diff = Some(NumDiff::new(v, order)),
746            }
747            v
748        } else {
749            let delta = token.parse::<i64>().map_err(|_| {
750                Error::Parse(format!("CRINEX clock delta {token:?} is not an integer"))
751            })?;
752            match &mut self.clock_diff {
753                Some(e) => e.decompress(delta).map_err(map_arithmetic_error)?,
754                None => {
755                    return Err(Error::Parse(
756                        "CRINEX clock delta before any clock arc init".into(),
757                    ))
758                }
759            }
760        };
761        Ok(Some(value))
762    }
763
764    // ----------------------------------------------------------------- V1 ---
765
766    /// Parse the next V1 epoch into the canonical [`EpochRecord`]. See
767    /// [`Self::next_epoch_v3`] for the streaming contract.
768    fn next_epoch_v1<'a, I>(&mut self, lines: &mut I) -> Result<Option<EpochRecord>>
769    where
770        I: Iterator<Item = &'a str>,
771    {
772        let raw = loop {
773            match lines.next() {
774                None => return Ok(None),
775                Some(raw) => {
776                    let line = raw.trim_end_matches(['\r', '\n']);
777                    if !line.is_empty() {
778                        break line;
779                    }
780                }
781            }
782        };
783
784        // CRINEX 1.0 stores the epoch line without RINEX-2's leading blank
785        // column, but crx2rnx restores it on output and keeps the restored,
786        // space-prefixed line as the text-difference base for the next epoch.
787        // Mirror that: seed the engine with the leading space (on reset) so both
788        // the reconstruction and the standard column offsets are right. A V1
789        // epoch descriptor reset is marked by a leading '&'.
790        let descriptor = if let Some(stripped) = raw.strip_prefix('&') {
791            self.epoch_diff.force_init(&format!(" {stripped}"));
792            self.epoch_diff.decompress("")
793        } else {
794            self.epoch_diff.decompress(raw)
795        };
796
797        // V1 epoch line: " YY MM DD HH MM SS.sssssss  F NN<svlist>". numsat is at
798        // cols 29..32 of the reconstructed RINEX-2 epoch line (which the
799        // descriptor mirrors, leading space included).
800        let numsat = strict_int_field::<usize>(&descriptor, 29, 32, "v1.epoch.satellite_count")?;
801        let flag = strict_int_field::<u8>(&descriptor, 26, 29, "v1.epoch.flag")?;
802
803        // Event records (flag > 1): capture the `numsat` event lines verbatim.
804        if flag > 1 {
805            let mut event_lines = Vec::with_capacity(numsat);
806            for _ in 0..numsat {
807                let extra = lines
808                    .next()
809                    .ok_or_else(|| Error::Parse("CRINEX V1 event record truncated".into()))?;
810                event_lines.push(extra.trim_end_matches(['\r', '\n']).to_string());
811            }
812            return Ok(Some(EpochRecord::Event {
813                descriptor,
814                lines: event_lines,
815            }));
816        }
817
818        // Clock line (its own NumDiff line, possibly blank).
819        let clock_line = lines
820            .next()
821            .ok_or_else(|| Error::Parse("CRINEX V1 epoch missing clock line".into()))?
822            .trim_end_matches(['\r', '\n']);
823        let clock = self.decode_clock_value(clock_line)?;
824
825        let sv_list = self.sv_tokens_v1(&descriptor, numsat)?;
826        let mut sats = Vec::with_capacity(sv_list.len());
827        for sv in &sv_list {
828            let data_line = lines.next().ok_or_else(|| {
829                Error::Parse("CRINEX V1 epoch truncated: missing satellite line".into())
830            })?;
831            let n_obs = self.obs_count_for(sv)?;
832            let (values, flags) =
833                self.decode_sat_values(sv, data_line.trim_end_matches(['\r', '\n']), n_obs)?;
834            sats.push(SatRecord {
835                sv: sv.clone(),
836                values,
837                flags,
838            });
839        }
840        Ok(Some(EpochRecord::Obs(ObsEpoch {
841            descriptor,
842            clock,
843            sats,
844        })))
845    }
846
847    fn sv_tokens_v1(&self, descriptor: &str, numsat: usize) -> Result<Vec<String>> {
848        // RINEX-2 SV list starts at col 32 of the epoch line; tokens are 3 chars
849        // and may omit the constellation letter for a mono-system file.
850        let list = field_from(descriptor, 32);
851        let bytes = list.as_bytes();
852        let mut out = Vec::with_capacity(numsat);
853        for i in 0..numsat {
854            let mut tok = fixed_sv_token(bytes, "V1", numsat, i)?.to_string();
855            if tok.starts_with(' ') {
856                if let Some(sys) = self.default_system {
857                    let prn = tok.trim();
858                    tok = format!("{sys}{prn:>2}");
859                }
860            }
861            out.push(tok);
862        }
863        Ok(out)
864    }
865
866    // ---------------------------------------------------- plain RINEX -> IR ---
867
868    /// Parse the body of a plain RINEX-3 observation file into canonical epoch
869    /// records (the inverse of [`serialize_rinex_epoch_v3`]).
870    fn parse_rinex_epochs_v3<'a, I>(&self, lines: &mut I) -> Result<Vec<EpochRecord>>
871    where
872        I: Iterator<Item = &'a str>,
873    {
874        let mut epochs = Vec::new();
875        loop {
876            let Some(line) = next_nonblank(lines) else {
877                return Ok(epochs);
878            };
879            if !line.starts_with('>') {
880                return Err(Error::Parse(format!(
881                    "RINEX-3 epoch line must start with '>': {line:?}"
882                )));
883            }
884            let flag = strict_int_field::<u8>(&line, 31, 32, "v3.epoch.flag")?;
885            let numsat = strict_int_field::<usize>(&line, 32, 35, "v3.epoch.satellite_count")?;
886
887            if flag > 1 {
888                let event_lines = read_event_lines(lines, numsat, "RINEX-3")?;
889                epochs.push(EpochRecord::Event {
890                    descriptor: line,
891                    lines: event_lines,
892                });
893                continue;
894            }
895
896            let clock = parse_clock_field(&line, 41, 56, 12, "v3.epoch.clock")?;
897            let mut sats = Vec::with_capacity(numsat);
898            let mut sv_tokens = Vec::with_capacity(numsat);
899            for _ in 0..numsat {
900                let raw = lines.next().ok_or_else(|| {
901                    Error::Parse("RINEX-3 epoch truncated: missing satellite line".into())
902                })?;
903                let sat_line = raw.trim_end_matches(['\r', '\n']);
904                let sv = field(sat_line, 0, 3).to_string();
905                let n_obs = self.obs_count_for(&sv)?;
906                let (values, flags) = parse_sat_obs_v3(sat_line, n_obs)?;
907                sv_tokens.push(sv.clone());
908                sats.push(SatRecord { sv, values, flags });
909            }
910            let descriptor = build_descriptor_v3(&line, &sv_tokens);
911            epochs.push(EpochRecord::Obs(ObsEpoch {
912                descriptor,
913                clock,
914                sats,
915            }));
916        }
917    }
918
919    /// Parse the body of a plain RINEX-2 observation file into canonical epoch
920    /// records (the inverse of [`serialize_rinex_epoch_v1`]). Handles the
921    /// 12-satellite epoch-line wrap and the 5-observation data-line wrap.
922    fn parse_rinex_epochs_v1<'a, I>(&self, lines: &mut I) -> Result<Vec<EpochRecord>>
923    where
924        I: Iterator<Item = &'a str>,
925    {
926        let mut epochs = Vec::new();
927        loop {
928            let Some(first) = next_nonblank(lines) else {
929                return Ok(epochs);
930            };
931            let flag = strict_int_field::<u8>(&first, 26, 29, "v1.epoch.flag")?;
932            let numsat = strict_int_field::<usize>(&first, 29, 32, "v1.epoch.satellite_count")?;
933
934            if flag > 1 {
935                let event_lines = read_event_lines(lines, numsat, "RINEX-2")?;
936                epochs.push(EpochRecord::Event {
937                    descriptor: first,
938                    lines: event_lines,
939                });
940                continue;
941            }
942
943            let clock = parse_clock_field(&first, 68, 80, 9, "v1.epoch.clock")?;
944
945            // The SV list begins at column 32 and wraps after 12 satellites onto
946            // continuation lines, each padded with 32 leading blanks.
947            let mut sv_tokens: Vec<String> = Vec::with_capacity(numsat);
948            collect_sv_tokens_v1(&first, numsat.min(12), &mut sv_tokens);
949            while sv_tokens.len() < numsat {
950                let raw = lines.next().ok_or_else(|| {
951                    Error::Parse("RINEX-2 epoch SV continuation truncated".into())
952                })?;
953                let cont = raw.trim_end_matches(['\r', '\n']);
954                let need = (numsat - sv_tokens.len()).min(12);
955                collect_sv_tokens_v1(cont, need, &mut sv_tokens);
956            }
957            let sv_tokens: Vec<String> = sv_tokens
958                .into_iter()
959                .map(|tok| self.normalize_v1_sv(tok))
960                .collect();
961
962            let mut sats = Vec::with_capacity(numsat);
963            for sv in &sv_tokens {
964                let n_obs = self.obs_count_for(sv)?;
965                let row_count = n_obs.div_ceil(5);
966                let mut obs_lines = Vec::with_capacity(row_count);
967                for _ in 0..row_count {
968                    let raw = lines.next().ok_or_else(|| {
969                        Error::Parse("RINEX-2 epoch truncated: missing observation line".into())
970                    })?;
971                    obs_lines.push(raw.trim_end_matches(['\r', '\n']).to_string());
972                }
973                let (values, flags) = parse_sat_obs_v1(&obs_lines, n_obs)?;
974                sats.push(SatRecord {
975                    sv: sv.clone(),
976                    values,
977                    flags,
978                });
979            }
980            let descriptor = build_descriptor_v1(&first, &sv_tokens);
981            epochs.push(EpochRecord::Obs(ObsEpoch {
982                descriptor,
983                clock,
984                sats,
985            }));
986        }
987    }
988
989    /// Re-attach the mono-system constellation letter to a RINEX-2 SV token that
990    /// omits it, matching [`Self::sv_tokens_v1`].
991    fn normalize_v1_sv(&self, token: String) -> String {
992        if token.starts_with(' ') {
993            if let Some(sys) = self.default_system {
994                let prn = token.trim();
995                return format!("{sys}{prn:>2}");
996            }
997        }
998        token
999    }
1000}
1001
1002// ── Plain RINEX -> IR ─────────────────────────────────────────────────────────
1003
1004/// Parse plain RINEX observation text into the canonical [`ObsStream`] IR (the
1005/// inverse of the RINEX serializers driving [`decode`]). The embedded header is
1006/// captured verbatim, the revision is taken from `RINEX VERSION / TYPE`, and each
1007/// epoch's fixed-decimal observation fields are read back into scaled integers.
1008fn parse_rinex_obs(rinex_text: &str) -> Result<ObsStream> {
1009    let mut decoder = Decoder::new();
1010    let mut header: Vec<String> = Vec::new();
1011    let mut lines = rinex_text.lines();
1012    decoder.scan_rinex_header(&mut lines, &mut header)?;
1013
1014    let version = decoder.version;
1015    let epochs = match version {
1016        CrinexVersion::V3 => decoder.parse_rinex_epochs_v3(&mut lines)?,
1017        CrinexVersion::V1 => decoder.parse_rinex_epochs_v1(&mut lines)?,
1018    };
1019    Ok(ObsStream {
1020        version,
1021        header,
1022        epochs,
1023    })
1024}
1025
1026/// Pull the next non-blank line from the body, returning it without its trailing
1027/// line ending, or `None` at end of stream.
1028fn next_nonblank<'a, I>(lines: &mut I) -> Option<String>
1029where
1030    I: Iterator<Item = &'a str>,
1031{
1032    for raw in lines.by_ref() {
1033        let line = raw.trim_end_matches(['\r', '\n']);
1034        if !line.is_empty() {
1035            return Some(line.to_string());
1036        }
1037    }
1038    None
1039}
1040
1041/// Read the `count` verbatim records that follow an event epoch descriptor.
1042fn read_event_lines<'a, I>(lines: &mut I, count: usize, revision: &str) -> Result<Vec<String>>
1043where
1044    I: Iterator<Item = &'a str>,
1045{
1046    let mut out = Vec::with_capacity(count);
1047    for _ in 0..count {
1048        let raw = lines
1049            .next()
1050            .ok_or_else(|| Error::Parse(format!("{revision} event record truncated")))?;
1051        out.push(raw.trim_end_matches(['\r', '\n']).to_string());
1052    }
1053    Ok(out)
1054}
1055
1056/// Read an optional receiver-clock field (`Fw.d`) as the scaled integer the
1057/// CRINEX clock engine carries, or `None` when the field is blank.
1058fn parse_clock_field(
1059    line: &str,
1060    start: usize,
1061    end: usize,
1062    decimals: usize,
1063    field_name: &'static str,
1064) -> Result<Option<i64>> {
1065    let text = field(line, start, end);
1066    if text.trim().is_empty() {
1067        Ok(None)
1068    } else {
1069        Ok(Some(parse_scaled_decimal(text, decimals, field_name)?))
1070    }
1071}
1072
1073/// Recover one RINEX-3 satellite line's observations: `n_obs` fixed 16-column
1074/// fields, each a 14-column `F14.3` value plus a 2-column LLI/SSI pair. A blank
1075/// value column is a `None`; trailing blanks may be trimmed from the line.
1076fn parse_sat_obs_v3(line: &str, n_obs: usize) -> Result<(Vec<Option<i64>>, String)> {
1077    let mut values = Vec::with_capacity(n_obs);
1078    let mut flags = String::with_capacity(n_obs * 2);
1079    for i in 0..n_obs {
1080        let base = 3 + i * OBS_FIELD_WIDTH;
1081        read_obs_field(line, base, &mut values, &mut flags)?;
1082    }
1083    Ok((values, flags))
1084}
1085
1086/// Recover one RINEX-2 satellite's observations from its wrapped data lines (five
1087/// 16-column fields per line). See [`parse_sat_obs_v3`] for the field layout.
1088fn parse_sat_obs_v1(obs_lines: &[String], n_obs: usize) -> Result<(Vec<Option<i64>>, String)> {
1089    let mut values = Vec::with_capacity(n_obs);
1090    let mut flags = String::with_capacity(n_obs * 2);
1091    for i in 0..n_obs {
1092        let line = obs_lines.get(i / 5).map_or("", String::as_str);
1093        let base = (i % 5) * OBS_FIELD_WIDTH;
1094        read_obs_field(line, base, &mut values, &mut flags)?;
1095    }
1096    Ok((values, flags))
1097}
1098
1099/// Read one observation field at column `base` of `line`, pushing the recovered
1100/// value (or `None` for a blank column) and its two LLI/SSI flag characters.
1101fn read_obs_field(
1102    line: &str,
1103    base: usize,
1104    values: &mut Vec<Option<i64>>,
1105    flags: &mut String,
1106) -> Result<()> {
1107    let value_text = field(line, base, base + OBS_VALUE_WIDTH);
1108    if value_text.trim().is_empty() {
1109        values.push(None);
1110        flags.push(' ');
1111        flags.push(' ');
1112    } else {
1113        values.push(Some(parse_scaled_decimal(value_text, 3, "observation")?));
1114        flags.push(char_at_or_space(line, base + OBS_VALUE_WIDTH));
1115        flags.push(char_at_or_space(line, base + OBS_VALUE_WIDTH + 1));
1116    }
1117    Ok(())
1118}
1119
1120/// Parse a fixed-decimal field into the scaled integer it represents, exactly:
1121/// `value * 10^decimals`. The fraction is read digit-by-digit (not through a
1122/// float) so the three-decimal observation layout and the `-.920`-style dropped
1123/// leading zero round-trip without binary-float rounding.
1124fn parse_scaled_decimal(text: &str, decimals: usize, field_name: &'static str) -> Result<i64> {
1125    let trimmed = text.trim();
1126    let (negative, body) = trimmed.strip_prefix('-').map_or_else(
1127        || (false, trimmed.strip_prefix('+').unwrap_or(trimmed)),
1128        |rest| (true, rest),
1129    );
1130    let (integer_part, fraction_part) = match body.split_once('.') {
1131        Some((integer, fraction)) => (integer, fraction),
1132        None => (body, ""),
1133    };
1134    let integer_text = if integer_part.is_empty() {
1135        "0"
1136    } else {
1137        integer_part
1138    };
1139    // Right-pad (or clip) the fraction to exactly `decimals` digits.
1140    let mut fraction = String::with_capacity(decimals);
1141    fraction.extend(fraction_part.chars().take(decimals));
1142    while fraction.len() < decimals {
1143        fraction.push('0');
1144    }
1145    let scale = 10i64.pow(decimals as u32);
1146    let integer_value = parse_scaled_component(integer_text, text, field_name)?;
1147    let fraction_value = if decimals == 0 {
1148        0
1149    } else {
1150        parse_scaled_component(&fraction, text, field_name)?
1151    };
1152    let magnitude = validate::checked_i64_mul(integer_value, scale, field_name)
1153        .and_then(|scaled| validate::checked_i64_add(scaled, fraction_value, field_name))
1154        .map_err(map_arithmetic_error)?;
1155    Ok(if negative { -magnitude } else { magnitude })
1156}
1157
1158/// Parse one all-digit component of a scaled-decimal field.
1159fn parse_scaled_component(token: &str, text: &str, field_name: &'static str) -> Result<i64> {
1160    token
1161        .parse::<i64>()
1162        .map_err(|_| Error::Parse(format!("CRINEX invalid {field_name}: {text:?}")))
1163}
1164
1165/// Build the canonical V3 epoch descriptor from the RINEX-3 epoch line and the
1166/// SV list gathered from the satellite data lines: the cols 0..35 head, padded to
1167/// column 41, then the concatenated 3-character SV tokens (where the CRINEX epoch
1168/// line and [`Decoder::sv_tokens_v3`] expect them).
1169fn build_descriptor_v3(epoch_line: &str, sv_tokens: &[String]) -> String {
1170    let mut descriptor = pad_to(field(epoch_line, 0, 35), 41);
1171    for token in sv_tokens {
1172        descriptor.push_str(token);
1173    }
1174    descriptor
1175}
1176
1177/// Build the canonical V1 epoch descriptor: the cols 0..32 head (leading space
1178/// included) then the full concatenated SV list, matching the single-line CRINEX
1179/// epoch record [`Decoder::sv_tokens_v1`] reads back.
1180fn build_descriptor_v1(epoch_line: &str, sv_tokens: &[String]) -> String {
1181    let mut descriptor = pad_to(field(epoch_line, 0, 32), 32);
1182    for token in sv_tokens {
1183        descriptor.push_str(token);
1184    }
1185    descriptor
1186}
1187
1188/// Append up to `count` 3-character SV tokens read from column 32 onward.
1189fn collect_sv_tokens_v1(line: &str, count: usize, out: &mut Vec<String>) {
1190    for i in 0..count {
1191        let start = 32 + i * 3;
1192        out.push(field(line, start, start + 3).to_string());
1193    }
1194}
1195
1196/// The ASCII byte at `index` as a `char`, or a space when the line is shorter
1197/// (trailing blanks are trimmed from RINEX output lines).
1198fn char_at_or_space(line: &str, index: usize) -> char {
1199    line.as_bytes().get(index).map_or(' ', |&byte| byte as char)
1200}
1201
1202/// Right-pad a field with spaces to at least `width` columns (never truncating).
1203fn pad_to(text: &str, width: usize) -> String {
1204    let mut out = text.to_string();
1205    while out.len() < width {
1206        out.push(' ');
1207    }
1208    out
1209}
1210
1211// ── Canonical-IR parse and serializers ───────────────────────────────────────
1212
1213/// Parse a CRINEX stream into the canonical [`ObsStream`] IR (the inverse of
1214/// [`encode_stream`]). The plain RINEX header is captured verbatim and every
1215/// epoch's difference engines are undone into recovered integers and flag
1216/// strings.
1217pub fn parse_stream(crinex_text: &str) -> Result<ObsStream> {
1218    let mut decoder = Decoder::new();
1219    let mut lines = crinex_text.lines();
1220    let mut header: Vec<String> = Vec::new();
1221    decoder.read_crinex_header(&mut lines, &mut |line: &str| header.push(line.to_string()))?;
1222
1223    let version = decoder.version;
1224    let mut epochs = Vec::new();
1225    loop {
1226        let record = match version {
1227            CrinexVersion::V3 => decoder.next_epoch_v3(&mut lines)?,
1228            CrinexVersion::V1 => decoder.next_epoch_v1(&mut lines)?,
1229        };
1230        match record {
1231            Some(record) => epochs.push(record),
1232            None => break,
1233        }
1234    }
1235    Ok(ObsStream {
1236        version,
1237        header,
1238        epochs,
1239    })
1240}
1241
1242/// Serialize a canonical [`ObsStream`] back to CRINEX text (the inverse of
1243/// [`parse_stream`]).
1244///
1245/// CRINEX compression is not unique, so this emits the **canonical all-reset**
1246/// form: every observation and the receiver clock are written as `1&value`
1247/// arc-init tokens (no higher-order differencing) and every epoch descriptor is
1248/// written as a text-diff reset. Only the per-satellite LLI/SSI flag strings are
1249/// genuinely text-differenced, because the flag grammar has no inline reset
1250/// marker. The result is therefore not byte-identical to an arbitrary source
1251/// CRINEX, but it is a valid CRINEX stream that decodes to exactly the same plain
1252/// RINEX text. The round-trip guarantee is `decode(encode_stream(parse_stream(x)))
1253/// == decode(x)` and `parse_stream(encode_stream(s)) == s`.
1254pub fn encode_stream(stream: &ObsStream) -> String {
1255    let mut out = String::new();
1256    let version_label = match stream.version {
1257        CrinexVersion::V3 => "3.0",
1258        CrinexVersion::V1 => "1.0",
1259    };
1260    push_crinex_line(
1261        &mut out,
1262        &labeled_crinex(version_label, "CRINEX VERS   / TYPE"),
1263    );
1264    push_crinex_line(
1265        &mut out,
1266        &labeled_crinex("sidereon", "CRINEX PROG   / DATE"),
1267    );
1268    for header_line in &stream.header {
1269        push_crinex_line(&mut out, header_line);
1270    }
1271
1272    let mut flag_state: HashMap<String, String> = HashMap::new();
1273    for epoch in &stream.epochs {
1274        encode_epoch(epoch, stream.version, &mut flag_state, &mut out);
1275    }
1276    out
1277}
1278
1279/// Emit one epoch (observation or event) in canonical all-reset CRINEX form.
1280fn encode_epoch(
1281    epoch: &EpochRecord,
1282    version: CrinexVersion,
1283    flag_state: &mut HashMap<String, String>,
1284    out: &mut String,
1285) {
1286    match epoch {
1287        EpochRecord::Event { descriptor, lines } => {
1288            encode_descriptor(descriptor, version, out);
1289            for line in lines {
1290                push_crinex_line(out, line);
1291            }
1292        }
1293        EpochRecord::Obs(ObsEpoch {
1294            descriptor,
1295            clock,
1296            sats,
1297        }) => {
1298            encode_descriptor(descriptor, version, out);
1299            // An observation epoch always carries a clock line (possibly blank).
1300            match clock {
1301                Some(value) => push_crinex_line(out, &format!("1&{value}")),
1302                None => push_crinex_line(out, ""),
1303            }
1304            for sat in sats {
1305                let previous = flag_state.entry(sat.sv.clone()).or_default();
1306                let delta = text_diff_delta(previous.as_str(), &sat.flags);
1307                previous.clone_from(&sat.flags);
1308                push_crinex_line(out, &encode_sat_line(&sat.values, &delta));
1309            }
1310        }
1311    }
1312}
1313
1314/// Emit the epoch descriptor as a text-diff reset for the given revision.
1315fn encode_descriptor(descriptor: &str, version: CrinexVersion, out: &mut String) {
1316    match version {
1317        // The reconstructed V3 descriptor already begins with '>'; a leading '>'
1318        // is exactly the V3 reset marker, so it re-emits verbatim.
1319        CrinexVersion::V3 => push_crinex_line(out, descriptor),
1320        // The reconstructed V1 descriptor begins with the restored leading space;
1321        // the V1 reset marker '&' replaces it and the decoder re-prepends a space.
1322        CrinexVersion::V1 => push_crinex_line(out, &format!("&{}", &descriptor[1..])),
1323    }
1324}
1325
1326/// Build one CRINEX satellite data line: the observation tokens (each a `1&value`
1327/// arc-init reset, blank columns left empty) then the text-diff flag delta.
1328fn encode_sat_line(values: &[Option<i64>], flag_delta: &str) -> String {
1329    let mut line = String::new();
1330    for (index, value) in values.iter().enumerate() {
1331        if index > 0 {
1332            line.push(' ');
1333        }
1334        if let Some(value) = value {
1335            let _ = write!(line, "1&{value}");
1336        }
1337    }
1338    // The flag string follows the last observation token after a single space.
1339    line.push(' ');
1340    line.push_str(flag_delta);
1341    line
1342}
1343
1344/// Compute the CRINEX text-difference delta that transforms `previous` into
1345/// `current` under [`TextDiff::decompress`]: a space keeps the buffered byte, an
1346/// `&` blanks it, any other byte overwrites it, and bytes past the buffer extend
1347/// it verbatim. Per-satellite flag strings never shrink across epochs (the
1348/// buffer only grows), so no shortening case is needed.
1349fn text_diff_delta(previous: &str, current: &str) -> String {
1350    let prev = previous.as_bytes();
1351    let curr = current.as_bytes();
1352    let mut delta = Vec::with_capacity(curr.len());
1353    for (index, &byte) in curr.iter().enumerate() {
1354        let out = match prev.get(index) {
1355            Some(&previous_byte) if byte == previous_byte => b' ',
1356            Some(_) if byte == b' ' => b'&',
1357            // New non-space byte, or a position past the previous buffer (which
1358            // the decoder extends verbatim): emit the byte itself.
1359            _ => byte,
1360        };
1361        delta.push(out);
1362    }
1363    // Inputs are ASCII LLI/SSI flag strings, so this is always valid UTF-8.
1364    String::from_utf8(delta).unwrap_or_default()
1365}
1366
1367/// Push a line plus its newline to a CRINEX output buffer.
1368fn push_crinex_line(out: &mut String, line: &str) {
1369    out.push_str(line);
1370    out.push('\n');
1371}
1372
1373/// A labeled CRINEX/RINEX header record: body left-justified into the tag column.
1374fn labeled_crinex(body: &str, label: &str) -> String {
1375    format!("{body:<60}{label}")
1376}
1377
1378/// Serialize one decoded V3 epoch back to plain RINEX-3 observation text.
1379fn serialize_rinex_epoch_v3<W: FnMut(&str)>(record: &EpochRecord, emit: &mut W) {
1380    match record {
1381        EpochRecord::Event { descriptor, lines } => {
1382            emit(trim_end(field(descriptor, 0, 35)));
1383            for line in lines {
1384                emit(line);
1385            }
1386        }
1387        EpochRecord::Obs(ObsEpoch {
1388            descriptor,
1389            clock,
1390            sats,
1391        }) => {
1392            let clock_text = format_clock_v3(*clock);
1393            // Everything before the SV list (cols 0..35) plus the clock. The SV
1394            // list is not part of a RINEX-3 epoch line. The optional receiver
1395            // clock offset is an `F15.12` field at columns 41..56, with columns
1396            // 35..41 reserved blank, so the head is padded to column 41 first.
1397            let head = field(descriptor, 0, 35);
1398            let mut epoch_out = head.to_string();
1399            if !clock_text.is_empty() {
1400                while epoch_out.len() < 41 {
1401                    epoch_out.push(' ');
1402                }
1403            }
1404            epoch_out.push_str(&clock_text);
1405            emit(trim_end(&epoch_out));
1406            for sat in sats {
1407                let out = format_sat_line(&sat.sv, &sat.values, &sat.flags);
1408                emit(trim_end(&out));
1409            }
1410        }
1411    }
1412}
1413
1414/// Serialize one decoded V1 epoch back to plain RINEX-2 observation text.
1415fn serialize_rinex_epoch_v1<W: FnMut(&str)>(record: &EpochRecord, emit: &mut W) {
1416    match record {
1417        EpochRecord::Event { descriptor, lines } => {
1418            emit(trim_end(field(descriptor, 0, 32)));
1419            for line in lines {
1420                emit(line);
1421            }
1422        }
1423        EpochRecord::Obs(ObsEpoch {
1424            descriptor,
1425            clock,
1426            sats,
1427        }) => {
1428            let clock_text = format_clock_v1(*clock);
1429            // The SV list wraps after 12 satellites with a 32-space pad.
1430            let sv_list: Vec<String> = sats.iter().map(|sat| sat.sv.clone()).collect();
1431            for line in &format_epoch_v1(descriptor, &sv_list, &clock_text) {
1432                emit(trim_end(line));
1433            }
1434            for sat in sats {
1435                for line in format_sat_lines_v1(&sat.values, &sat.flags) {
1436                    emit(trim_end(&line));
1437                }
1438            }
1439        }
1440    }
1441}
1442
1443/// Format the recovered V3 receiver clock offset (scaled by 10^12) as the
1444/// `%15.12f` field appended to the epoch line; empty when no clock is carried.
1445fn format_clock_v3(clock: Option<i64>) -> String {
1446    match clock {
1447        Some(value) => format!("{:15.12}", value as f64 / 1.0e12),
1448        None => String::new(),
1449    }
1450}
1451
1452/// Format the recovered V1 receiver clock offset (scaled by 10^9) as the RINEX-2
1453/// `%12.9f` field; empty when no clock is carried.
1454fn format_clock_v1(clock: Option<i64>) -> String {
1455    match clock {
1456        Some(value) => format!("{:12.9}", value as f64 / 1.0e9),
1457        None => String::new(),
1458    }
1459}
1460
1461fn strict_obs_count(
1462    line: &str,
1463    start: usize,
1464    end: usize,
1465    field_name: &'static str,
1466) -> Result<usize> {
1467    let count = strict_int_field::<usize>(line, start, end, field_name)?;
1468    if count == 0 {
1469        return Err(Error::Parse(format!(
1470            "CRINEX invalid {field_name}: observation count must be positive in {line:?}"
1471        )));
1472    }
1473    Ok(count)
1474}
1475
1476fn strict_int_field<T>(line: &str, start: usize, end: usize, field_name: &'static str) -> Result<T>
1477where
1478    T: core::str::FromStr,
1479{
1480    strict_int_token(field(line, start, end), field_name, line)
1481}
1482
1483fn strict_int_token<T>(token: &str, field_name: &'static str, line: &str) -> Result<T>
1484where
1485    T: core::str::FromStr,
1486{
1487    validate::strict_int::<T>(token, field_name).map_err(|error| map_field_error(error, line))
1488}
1489
1490fn fixed_sv_token<'a>(
1491    sv_list: &'a [u8],
1492    crinex_version: &str,
1493    numsat: usize,
1494    index: usize,
1495) -> Result<&'a str> {
1496    let start = index * 3;
1497    let end = start + 3;
1498    if end > sv_list.len() {
1499        return Err(Error::Parse(format!(
1500            "CRINEX {crinex_version} epoch SV list shorter than {numsat} satellites"
1501        )));
1502    }
1503    let token = &sv_list[start..end];
1504    if !token.is_ascii() {
1505        return Err(Error::Parse(format!(
1506            "CRINEX {crinex_version} epoch SV token {} contains non-ASCII bytes",
1507            index + 1
1508        )));
1509    }
1510    std::str::from_utf8(token).map_err(|_| {
1511        Error::Parse(format!(
1512            "CRINEX {crinex_version} epoch SV token {} is not valid UTF-8",
1513            index + 1
1514        ))
1515    })
1516}
1517
1518fn map_field_error(error: FieldError, line: &str) -> Error {
1519    Error::Parse(format!(
1520        "CRINEX invalid {}: {error} in {line:?}",
1521        error.field()
1522    ))
1523}
1524
1525fn map_arithmetic_error(error: validate::ArithmeticError) -> Error {
1526    Error::Parse(format!("CRINEX {error}"))
1527}
1528
1529/// Parse a reset token `order&value` (e.g. `3&126298057858`). Returns
1530/// `Ok(Some((order, value)))` for a reset, `Ok(None)` for a plain delta, and an
1531/// error for a malformed reset.
1532fn parse_reset(token: &str) -> Result<Option<(usize, i64)>> {
1533    let token = token.trim();
1534    if let Some(amp) = token.find('&') {
1535        let order = token[..amp]
1536            .parse::<usize>()
1537            .map_err(|_| Error::Parse(format!("CRINEX reset order in {token:?} invalid")))?;
1538        if order == 0 || order > MAX_ORDER {
1539            return Err(Error::Parse(format!(
1540                "CRINEX reset order {order} out of range 1..={MAX_ORDER}"
1541            )));
1542        }
1543        let value = token[amp + 1..]
1544            .parse::<i64>()
1545            .map_err(|_| Error::Parse(format!("CRINEX reset value in {token:?} invalid")))?;
1546        Ok(Some((order, value)))
1547    } else {
1548        Ok(None)
1549    }
1550}
1551
1552/// Format one reconstructed V3 satellite line: the SV token, then each
1553/// observation as a 16-column field (`F14.3` value + LLI + SSI), with the flag
1554/// string supplying the LLI/SSI characters.
1555fn format_sat_line(sv: &str, values: &[Option<i64>], flags: &str) -> String {
1556    let mut out = String::with_capacity(3 + values.len() * OBS_FIELD_WIDTH);
1557    out.push_str(sv);
1558    let flag_bytes = flags.as_bytes();
1559    for (i, value) in values.iter().enumerate() {
1560        match value {
1561            Some(v) => out.push_str(&format_value(*v)),
1562            None => {
1563                for _ in 0..OBS_VALUE_WIDTH {
1564                    out.push(' ');
1565                }
1566            }
1567        }
1568        // LLI + SSI from the flag string (2 chars per observation).
1569        let lli = flag_bytes.get(i * 2).copied().unwrap_or(b' ');
1570        let ssi = flag_bytes.get(i * 2 + 1).copied().unwrap_or(b' ');
1571        if value.is_some() {
1572            out.push(lli as char);
1573            out.push(ssi as char);
1574        } else {
1575            out.push(' ');
1576            out.push(' ');
1577        }
1578    }
1579    out
1580}
1581
1582/// Format a single scaled integer observation as the RINEX `F14.3` text the
1583/// reference `crx2rnx` emits: the value `value * 1e-3` right-aligned in 14
1584/// columns. A **negative** value in `(-1, 0)` drops its leading zero (`-0.920`
1585/// is written `-.920`) - the documented RNXCMP formatting idiosyncrasy; a
1586/// non-negative sub-one value keeps the zero (`0.216`, `0.000`). Formatting from
1587/// the scaled integer keeps the three decimals exact (no binary-float rounding
1588/// of the fractional part).
1589fn format_value(scaled: i64) -> String {
1590    let negative = scaled < 0;
1591    let magnitude = scaled.unsigned_abs();
1592    let whole = magnitude / 1000;
1593    let frac = magnitude % 1000;
1594    let body = if negative && whole == 0 {
1595        format!("-.{frac:03}")
1596    } else {
1597        format!("{}{}.{:03}", if negative { "-" } else { "" }, whole, frac)
1598    };
1599    format!("{body:>14}")
1600}
1601
1602/// Format the RINEX-2 epoch line(s) from the reconstructed descriptor, SV list,
1603/// and clock text, wrapping the SV list after 12 satellites.
1604fn format_epoch_v1(descriptor: &str, sv_list: &[String], clock_text: &str) -> Vec<String> {
1605    // The fixed epoch header (date + flag + count) is cols 0..32 of the
1606    // descriptor.
1607    let head = field(descriptor, 0, 32).to_string();
1608    let mut lines = Vec::new();
1609    let mut first = head;
1610    for sv in sv_list.iter().take(12) {
1611        first.push_str(sv);
1612    }
1613    if !clock_text.is_empty() {
1614        // The RINEX-2 receiver clock offset sits at columns 68..80 of the first
1615        // epoch line, regardless of satellite count: pad the (up-to-12) SV slots
1616        // to column 68 before appending it.
1617        while first.len() < 68 {
1618            first.push(' ');
1619        }
1620        first.push_str(clock_text);
1621    }
1622    lines.push(first);
1623    let mut idx = 12;
1624    while idx < sv_list.len() {
1625        let chunk = sv_list[idx..(idx + 12).min(sv_list.len())].join("");
1626        lines.push(format!("{:32}{chunk}", ""));
1627        idx += 12;
1628    }
1629    lines
1630}
1631
1632/// Format the RINEX-2 observation line(s) for one satellite, wrapping after 5
1633/// observations per line (RINEX 2 layout: 16-col fields).
1634fn format_sat_lines_v1(values: &[Option<i64>], flags: &str) -> Vec<String> {
1635    let flag_bytes = flags.as_bytes();
1636    let mut lines = Vec::new();
1637    let mut line = String::new();
1638    for (i, value) in values.iter().enumerate() {
1639        if i > 0 && i % 5 == 0 {
1640            lines.push(std::mem::take(&mut line));
1641        }
1642        match value {
1643            Some(v) => line.push_str(&format_value(*v)),
1644            None => {
1645                for _ in 0..OBS_VALUE_WIDTH {
1646                    line.push(' ');
1647                }
1648            }
1649        }
1650        let lli = flag_bytes.get(i * 2).copied().unwrap_or(b' ');
1651        let ssi = flag_bytes.get(i * 2 + 1).copied().unwrap_or(b' ');
1652        if value.is_some() {
1653            line.push(lli as char);
1654            line.push(ssi as char);
1655        } else {
1656            line.push(' ');
1657            line.push(' ');
1658        }
1659    }
1660    lines.push(line);
1661    lines
1662}
1663
1664/// Trim trailing spaces from a reconstructed line (the reference `crx2rnx`
1665/// strips trailing blanks from every output line).
1666fn trim_end(line: &str) -> &str {
1667    line.trim_end_matches(' ')
1668}
1669
1670#[cfg(all(test, sidereon_repo_tests))]
1671mod tests;