Skip to main content

sidereon_core/
constellation.rs

1//! GNSS constellation identity catalog and validation helpers.
2//!
3//! This is a data/catalog layer: it builds normalized satellite identity
4//! records from public sources and compares those records with GNSS products.
5//! It does not alter positioning solves or infer application-specific health
6//! rules. It is deterministic and performs no network access; fetching the
7//! source bytes is the caller's (binding's) job.
8//!
9//! GPS is supported first. CelesTrak `gps-ops` OMM/JSON is the base source for
10//! current NORAD catalog ids and PRN assignments; the PRN is parsed from
11//! `OBJECT_NAME` and rendered as the SP3/RINEX id (`"G13"`). NAVCEN's GPS
12//! constellation status page can be parsed and merged as an optional overlay
13//! for SVN and NANU usability details.
14//!
15//! The OMM input is the canonical [`Omm`](crate::astro::omm::Omm) produced by
16//! the core OMM parser (`crate::astro::omm::{parse_json, parse_json_array}`):
17//! this module does not re-parse OMM from scratch, it reads `OBJECT_NAME` and
18//! `NORAD_CAT_ID` off already-parsed records.
19//!
20//! ```
21//! use sidereon_core::constellation::{to_csv, BoolStyle, Record, RecordSource};
22//! use sidereon_core::GnssSystem;
23//!
24//! let record = Record {
25//!     system: GnssSystem::Gps,
26//!     prn: 3,
27//!     svn: None,
28//!     norad_id: 40294,
29//!     sp3_id: "G03".to_string(),
30//!     active: true,
31//!     usable: true,
32//!     source: RecordSource::default(),
33//! };
34//! assert_eq!(
35//!     to_csv(&[record], BoolStyle::Lower),
36//!     "prn,norad_cat_id,active,sp3_id\n3,40294,true,G03\n"
37//! );
38//! ```
39
40use crate::astro::omm::Omm;
41use crate::ephemeris::Sp3;
42use crate::id::GnssSystem;
43use core::fmt;
44
45const CELESTRAK_GPS_GROUP: &str = "gps-ops";
46
47/// Failure modes of the constellation catalog builders.
48///
49/// Mirrors the typed error pattern used by the core parsers (for example
50/// `astro::omm::OmmError`): a small enum with a `Display` and `std::error::Error`
51/// implementation, never a panic on malformed input.
52#[derive(Debug, Clone, PartialEq, Eq)]
53pub enum ConstellationError {
54    /// A CelesTrak `OBJECT_NAME` did not contain a parseable `(PRN nn)` block,
55    /// or the OMM carried no object name at all. Holds the offending name.
56    MissingPrn(Option<String>),
57    /// The NAVCEN status bytes were not valid UTF-8.
58    NavcenNotUtf8,
59    /// The NAVCEN status HTML contained no GPS constellation rows.
60    NavcenNoRows,
61    /// A required NAVCEN integer cell could not be parsed. Holds the field name
62    /// and the offending text.
63    NavcenBadField {
64        /// The NAVCEN field whose cell failed to parse (for example `gps-prn`).
65        field: &'static str,
66        /// The raw cell text that failed to parse.
67        value: String,
68    },
69    /// A catalog failed SP3 validation. Holds a description of the findings.
70    Sp3Validation(String),
71}
72
73impl fmt::Display for ConstellationError {
74    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
75        match self {
76            ConstellationError::MissingPrn(Some(name)) => {
77                write!(f, "CelesTrak OBJECT_NAME has no PRN: {name:?}")
78            }
79            ConstellationError::MissingPrn(None) => {
80                write!(f, "CelesTrak record has no OBJECT_NAME")
81            }
82            ConstellationError::NavcenNotUtf8 => write!(f, "NAVCEN bytes are not valid UTF-8"),
83            ConstellationError::NavcenNoRows => write!(f, "NAVCEN HTML has no GPS rows"),
84            ConstellationError::NavcenBadField { field, value } => {
85                write!(f, "NAVCEN field {field} has invalid integer {value:?}")
86            }
87            ConstellationError::Sp3Validation(msg) => {
88                write!(f, "GNSS catalog failed SP3 validation: {msg}")
89            }
90        }
91    }
92}
93
94impl std::error::Error for ConstellationError {}
95
96/// Per-source provenance kept on a [`Record`].
97///
98/// `active` in a record means the satellite is present in the base identity
99/// source. `usable` is an advisory health flag; for the current GPS path it is
100/// `true` unless a compatible merged NAVCEN row carries an active NANU that
101/// marks the PRN unusable or decommissioned.
102#[derive(Debug, Clone, Default, PartialEq, Eq)]
103pub struct RecordSource {
104    /// CelesTrak `gps-ops` identity provenance.
105    pub celestrak: Option<CelestrakSource>,
106    /// NAVCEN overlay that was merged into this record.
107    pub navcen: Option<NavcenSource>,
108    /// A NAVCEN row that matched the PRN but was not merged because its block
109    /// type was incompatible with the CelesTrak identity (a PRN transition).
110    pub navcen_conflict: Option<NavcenSource>,
111}
112
113/// CelesTrak `gps-ops` provenance fields preserved on a record.
114#[derive(Debug, Clone, PartialEq, Eq)]
115pub struct CelestrakSource {
116    /// CelesTrak GP group the record came from (`gps-ops`).
117    pub group: String,
118    /// The OMM `OBJECT_NAME`.
119    pub object_name: Option<String>,
120    /// The OMM `OBJECT_ID` (international designator).
121    pub object_id: Option<String>,
122    /// The OMM `EPOCH`, ISO-8601.
123    pub epoch: Option<String>,
124    /// Block type parsed from the object name (`IIF`, `IIR`, `IIR-M`, `III`).
125    pub block_type: Option<String>,
126}
127
128/// NAVCEN status provenance fields preserved on a record or conflict.
129#[derive(Debug, Clone, PartialEq, Eq)]
130pub struct NavcenSource {
131    /// Space Vehicle Number.
132    pub svn: Option<u16>,
133    /// Block type as reported by NAVCEN.
134    pub block_type: Option<String>,
135    /// Orbital plane letter.
136    pub plane: Option<String>,
137    /// Slot within the plane.
138    pub slot: Option<String>,
139    /// Clock type.
140    pub clock: Option<String>,
141    /// NANU type code (for example `FCSTSUMM`, `UNUSABLE`, `DECOM`).
142    pub nanu_type: Option<String>,
143    /// NANU subject line.
144    pub nanu_subject: Option<String>,
145    /// Whether the row carried an active NANU.
146    pub active_nanu: bool,
147}
148
149/// A normalized GNSS satellite identity record.
150#[derive(Debug, Clone, PartialEq, Eq)]
151pub struct Record {
152    /// The constellation. GPS today; the type is system-tagged for extension.
153    pub system: GnssSystem,
154    /// The within-constellation PRN.
155    pub prn: u16,
156    /// Space Vehicle Number, when known (CelesTrak alone leaves this `None`).
157    pub svn: Option<u16>,
158    /// NORAD catalog id.
159    pub norad_id: u32,
160    /// Canonical SP3/RINEX satellite token (`G03`).
161    pub sp3_id: String,
162    /// Present in the base identity source.
163    pub active: bool,
164    /// Advisory usability flag.
165    pub usable: bool,
166    /// Source provenance.
167    pub source: RecordSource,
168}
169
170/// A parsed row from NAVCEN's GPS constellation status table.
171#[derive(Debug, Clone, PartialEq, Eq)]
172pub struct NavcenStatus {
173    /// The constellation (GPS).
174    pub system: GnssSystem,
175    /// The within-constellation PRN.
176    pub prn: u16,
177    /// Space Vehicle Number, when present.
178    pub svn: Option<u16>,
179    /// Whether the satellite is usable per the active NANU (if any).
180    pub usable: bool,
181    /// Whether the row carried an active NANU.
182    pub active_nanu: bool,
183    /// NANU type code.
184    pub nanu_type: Option<String>,
185    /// NANU subject line.
186    pub nanu_subject: Option<String>,
187    /// Orbital plane letter.
188    pub plane: Option<String>,
189    /// Slot within the plane.
190    pub slot: Option<String>,
191    /// Block type.
192    pub block_type: Option<String>,
193    /// Clock type.
194    pub clock: Option<String>,
195}
196
197/// Validation report for a constellation catalog.
198#[derive(Debug, Clone, PartialEq, Eq, Default)]
199pub struct Validation {
200    /// Active+usable catalog SP3 ids absent from the compared product.
201    pub missing_sp3_ids: Vec<String>,
202    /// PRNs that appear in more than one record.
203    pub duplicate_prns: Vec<u16>,
204    /// NORAD ids that appear in more than one record.
205    pub duplicate_norad_ids: Vec<u32>,
206    /// PRNs that are inactive or unusable.
207    pub inactive_unusable_prns: Vec<u16>,
208    /// SP3 ids present in the product but absent from the active+usable catalog.
209    pub extra_sp3_ids: Vec<String>,
210}
211
212/// A single field change on a PRN that exists in both diffed snapshots.
213#[derive(Debug, Clone, PartialEq, Eq)]
214pub struct FieldChange<T> {
215    /// The constellation.
216    pub system: GnssSystem,
217    /// The PRN.
218    pub prn: u16,
219    /// The value in the previous snapshot.
220    pub from: T,
221    /// The value in the current snapshot.
222    pub to: T,
223}
224
225/// Change report between two catalog snapshots, keyed by `(system, prn)`.
226#[derive(Debug, Clone, PartialEq, Eq, Default)]
227pub struct Diff {
228    /// PRNs present only in the current snapshot.
229    pub added: Vec<Record>,
230    /// PRNs present only in the previous snapshot.
231    pub removed: Vec<Record>,
232    /// NORAD id reassignments on a held PRN.
233    pub norad_reassigned: Vec<FieldChange<u32>>,
234    /// SP3 id changes on a held PRN.
235    pub sp3_id_changed: Vec<FieldChange<String>>,
236    /// SVN changes on a held PRN.
237    pub svn_changed: Vec<FieldChange<Option<u16>>>,
238    /// Activity flips on a held PRN.
239    pub activity_changed: Vec<FieldChange<bool>>,
240    /// Usability flips on a held PRN.
241    pub usability_changed: Vec<FieldChange<bool>>,
242}
243
244/// How the CSV `active` column renders booleans.
245#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
246pub enum BoolStyle {
247    /// `true` / `false` (the conventional CSV form).
248    #[default]
249    Lower,
250    /// `True` / `False` (for a consumer that reads Python booleans).
251    Title,
252}
253
254/// Render the canonical SP3/RINEX satellite token for a GPS PRN (`7` -> `G07`).
255#[must_use]
256pub fn gps_sp3_id(prn: u16) -> String {
257    format!("{}{prn:02}", GnssSystem::Gps.letter())
258}
259
260/// Build GPS records from already-parsed CelesTrak `gps-ops` OMM records.
261///
262/// CelesTrak does not publish SVN in this feed, so records built from this
263/// source alone have `svn: None`. Records are returned sorted by PRN. Fails with
264/// [`ConstellationError::MissingPrn`] when an `OBJECT_NAME` has no `(PRN nn)`.
265pub fn from_celestrak_omm(omms: &[Omm]) -> Result<Vec<Record>, ConstellationError> {
266    let mut records = Vec::with_capacity(omms.len());
267    for omm in omms {
268        records.push(record_from_omm(omm)?);
269    }
270    records.sort_by_key(|r| (r.system, r.prn));
271    Ok(records)
272}
273
274fn record_from_omm(omm: &Omm) -> Result<Record, ConstellationError> {
275    let object_name = omm.object_name.as_deref();
276    let prn = prn_from_object_name(object_name)
277        .ok_or_else(|| ConstellationError::MissingPrn(omm.object_name.clone()))?;
278
279    Ok(Record {
280        system: GnssSystem::Gps,
281        prn,
282        svn: None,
283        norad_id: omm.norad_cat_id,
284        sp3_id: gps_sp3_id(prn),
285        active: true,
286        usable: true,
287        source: RecordSource {
288            celestrak: Some(CelestrakSource {
289                group: CELESTRAK_GPS_GROUP.to_string(),
290                object_name: omm.object_name.clone(),
291                object_id: omm.object_id.clone(),
292                epoch: Some(epoch_iso8601(omm)),
293                block_type: block_type_from_object_name(object_name),
294            }),
295            navcen: None,
296            navcen_conflict: None,
297        },
298    })
299}
300
301fn epoch_iso8601(omm: &Omm) -> String {
302    let e = &omm.epoch;
303    format!(
304        "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}.{:06}",
305        e.year, e.month, e.day, e.hour, e.minute, e.second, e.microsecond
306    )
307}
308
309/// Parse `(PRN nn)` from a CelesTrak object name, stripping leading zeros.
310///
311/// Matches the reference regex `\(PRN\s*0*([0-9]{1,3})\)` (case-insensitive),
312/// including its *search* semantics: every `(PRN` occurrence is tried, so a
313/// later valid `(PRN nn)` is found even if an earlier `(PRN ...)` does not
314/// parse. The PRN is up to three significant digits and must be positive.
315fn prn_from_object_name(name: Option<&str>) -> Option<u16> {
316    let name = name?;
317    let mut from = 0;
318    while let Some(rel) = find_ci(&name[from..], "(PRN") {
319        let after = from + rel + "(PRN".len();
320        if let Some(prn) = prn_at(&name[after..]) {
321            return Some(prn);
322        }
323        from = after;
324    }
325    None
326}
327
328/// Parse `\s*0*([0-9]{1,3})\)` at the start of `rest`.
329fn prn_at(rest: &str) -> Option<u16> {
330    let rest = rest.trim_start();
331    let bytes = rest.as_bytes();
332
333    let mut i = 0;
334    while i < bytes.len() && bytes[i] == b'0' {
335        i += 1;
336    }
337    let digit_start = i;
338    let mut count = 0;
339    while i < bytes.len() && bytes[i].is_ascii_digit() && count < 3 {
340        i += 1;
341        count += 1;
342    }
343    if i >= bytes.len() || bytes[i] != b')' || digit_start == i {
344        return None;
345    }
346    let value: u16 = rest[digit_start..i].parse().ok()?;
347    (value > 0).then_some(value)
348}
349
350/// Parse the GPS block type from a CelesTrak object name token.
351///
352/// Mirrors the reference patterns, matched as whole words in the order
353/// `IIR-M`, `III`, `IIF`, `IIR` so `BIIRM` is not caught by `BIIR`.
354fn block_type_from_object_name(name: Option<&str>) -> Option<String> {
355    let name = name?;
356    if contains_word_ci(name, "BIIRM") || contains_word_ci(name, "BIIR-M") {
357        Some("IIR-M".to_string())
358    } else if contains_word_ci(name, "BIII") {
359        Some("III".to_string())
360    } else if contains_word_ci(name, "BIIF") {
361        Some("IIF".to_string())
362    } else if contains_word_ci(name, "BIIR") {
363        Some("IIR".to_string())
364    } else {
365        None
366    }
367}
368
369/// Parse NAVCEN's GPS constellation status HTML from raw bytes.
370///
371/// The parser targets the Drupal table-field classes NAVCEN's public GPS
372/// constellation page uses, scanned without an HTML crate. Returns status rows
373/// sorted by PRN; merge them into CelesTrak records with [`merge_navcen`].
374pub fn parse_navcen(bytes: &[u8]) -> Result<Vec<NavcenStatus>, ConstellationError> {
375    let html = core::str::from_utf8(bytes).map_err(|_| ConstellationError::NavcenNotUtf8)?;
376
377    let mut statuses = Vec::new();
378    for row in tr_blocks(html) {
379        if find_ci(row, "views-field-field-gps-prn").is_none() || find_ci(row, "<td").is_none() {
380            continue;
381        }
382        statuses.push(navcen_status_from_row(row)?);
383    }
384
385    if statuses.is_empty() {
386        return Err(ConstellationError::NavcenNoRows);
387    }
388    statuses.sort_by_key(|s| s.prn);
389    Ok(statuses)
390}
391
392fn navcen_status_from_row(row: &str) -> Result<NavcenStatus, ConstellationError> {
393    let prn = navcen_required_int(row, "gps-prn")?;
394    let svn = navcen_optional_int(row, "gps-svn")?;
395    let nanu_type = navcen_text(row, "nanu-type");
396    let active_nanu = navcen_active(row);
397    let usable = !(active_nanu && unusable_nanu_type(nanu_type.as_deref()));
398
399    Ok(NavcenStatus {
400        system: GnssSystem::Gps,
401        prn,
402        svn,
403        usable,
404        active_nanu,
405        nanu_type: blank_to_none(nanu_type),
406        nanu_subject: blank_to_none(navcen_text(row, "nanu-subject")),
407        plane: blank_to_none(navcen_text(row, "gps-con-plane")),
408        slot: blank_to_none(navcen_text(row, "gps-con-slot")),
409        block_type: blank_to_none(navcen_text(row, "gps-con-block-type")),
410        clock: blank_to_none(navcen_text(row, "gps-con-clock")),
411    })
412}
413
414fn navcen_required_int(row: &str, field: &'static str) -> Result<u16, ConstellationError> {
415    let text = navcen_text(row, field);
416    parse_positive_int(text.as_deref().unwrap_or(""), field)
417}
418
419fn navcen_optional_int(row: &str, field: &'static str) -> Result<Option<u16>, ConstellationError> {
420    match navcen_text(row, field).as_deref() {
421        None | Some("") => Ok(None),
422        Some(text) => parse_positive_int(text, field).map(Some),
423    }
424}
425
426fn parse_positive_int(text: &str, field: &'static str) -> Result<u16, ConstellationError> {
427    let trimmed = text.trim();
428    match trimmed.parse::<u16>() {
429        Ok(value) if value > 0 => Ok(value),
430        _ => Err(ConstellationError::NavcenBadField {
431            field,
432            value: trimmed.to_string(),
433        }),
434    }
435}
436
437fn navcen_text(row: &str, field: &str) -> Option<String> {
438    let needle = format!("views-field-field-{field}");
439    td_inner(row, &needle).map(clean_html)
440}
441
442fn navcen_active(row: &str) -> bool {
443    td_inner(row, "nanu-active-check")
444        .map(clean_html)
445        .as_deref()
446        == Some("1")
447}
448
449fn unusable_nanu_type(nanu_type: Option<&str>) -> bool {
450    match nanu_type {
451        None => false,
452        Some(text) => {
453            let upper = text.trim().to_ascii_uppercase();
454            matches!(
455                upper.as_str(),
456                "UNUSABLE" | "DECOM" | "FCSTDV" | "FCSTMX" | "FCSTEXTD"
457            )
458        }
459    }
460}
461
462/// Merge NAVCEN status rows into normalized records by PRN.
463///
464/// NAVCEN does not publish NORAD ids, so CelesTrak stays the identity base. When
465/// a PRN exists in both sources and the block types are compatible, this fills
466/// `svn`, updates `usable`, and records the NAVCEN provenance. A NAVCEN row that
467/// matches the PRN but carries an incompatible block type (a PRN transition) is
468/// recorded under `navcen_conflict` rather than merged. Returns records sorted
469/// by PRN.
470///
471/// As in the reference (`Map.new(statuses, &{&1.prn, &1})`), at most one status
472/// is kept per PRN; if the input carries duplicate PRNs the last one wins.
473#[must_use]
474pub fn merge_navcen(records: &[Record], statuses: &[NavcenStatus]) -> Vec<Record> {
475    let mut by_prn: std::collections::HashMap<u16, &NavcenStatus> =
476        std::collections::HashMap::with_capacity(statuses.len());
477    for status in statuses {
478        by_prn.insert(status.prn, status);
479    }
480
481    let mut merged: Vec<Record> = records
482        .iter()
483        .map(|record| match by_prn.get(&record.prn) {
484            Some(status) => merge_status(record, status),
485            None => record.clone(),
486        })
487        .collect();
488    merged.sort_by_key(|r| r.prn);
489    merged
490}
491
492fn merge_status(record: &Record, status: &NavcenStatus) -> Record {
493    let mut out = record.clone();
494    if navcen_compatible(record, status) {
495        out.svn = status.svn;
496        out.usable = status.usable;
497        out.source.navcen = Some(navcen_source(status));
498    } else {
499        out.source.navcen_conflict = Some(navcen_source(status));
500    }
501    out
502}
503
504fn navcen_source(status: &NavcenStatus) -> NavcenSource {
505    NavcenSource {
506        svn: status.svn,
507        block_type: status.block_type.clone(),
508        plane: status.plane.clone(),
509        slot: status.slot.clone(),
510        clock: status.clock.clone(),
511        nanu_type: status.nanu_type.clone(),
512        nanu_subject: status.nanu_subject.clone(),
513        active_nanu: status.active_nanu,
514    }
515}
516
517fn navcen_compatible(record: &Record, status: &NavcenStatus) -> bool {
518    let celestrak_block = record
519        .source
520        .celestrak
521        .as_ref()
522        .and_then(|c| c.block_type.as_deref());
523    let navcen_block = status
524        .block_type
525        .as_deref()
526        .map(|b| b.trim().to_ascii_uppercase());
527
528    match (celestrak_block, navcen_block) {
529        (Some(a), Some(b)) => a == b,
530        _ => true,
531    }
532}
533
534/// Export records as the compact mapping CSV.
535///
536/// The header is `prn,norad_cat_id,active,sp3_id`. The `active` column is `true`
537/// only when both `active` and `usable` hold. Records are sorted by PRN.
538#[must_use]
539pub fn to_csv(records: &[Record], booleans: BoolStyle) -> String {
540    let mut sorted: Vec<&Record> = records.iter().collect();
541    sorted.sort_by_key(|r| r.prn);
542
543    let mut out = String::from("prn,norad_cat_id,active,sp3_id\n");
544    for record in sorted {
545        let active = format_bool(operational(record), booleans);
546        out.push_str(&format!(
547            "{},{},{},{}\n",
548            record.prn, record.norad_id, active, record.sp3_id
549        ));
550    }
551    out
552}
553
554fn format_bool(value: bool, style: BoolStyle) -> &'static str {
555    match (style, value) {
556        (BoolStyle::Lower, true) => "true",
557        (BoolStyle::Lower, false) => "false",
558        (BoolStyle::Title, true) => "True",
559        (BoolStyle::Title, false) => "False",
560    }
561}
562
563fn operational(record: &Record) -> bool {
564    record.active && record.usable
565}
566
567/// Validate catalog identity without an SP3 product.
568///
569/// Reports duplicate PRNs, duplicate NORAD ids, and PRNs that are inactive or
570/// unusable.
571#[must_use]
572pub fn validate(records: &[Record]) -> Validation {
573    validation(records, None)
574}
575
576/// Validate catalog identity against a loaded SP3 product.
577///
578/// `missing_sp3_ids` reports active+usable catalog GPS ids absent from the
579/// product; `extra_sp3_ids` reports GPS ids in the product absent from the
580/// active+usable catalog.
581#[must_use]
582pub fn validate_against_sp3(records: &[Record], sp3: &Sp3) -> Validation {
583    let ids: Vec<String> = sp3
584        .header
585        .satellites
586        .iter()
587        .map(ToString::to_string)
588        .collect();
589    validation(records, Some(&ids))
590}
591
592/// Validate catalog identity against a plain list of SP3/RINEX satellite tokens.
593#[must_use]
594pub fn validate_against_sp3_ids(records: &[Record], sp3_ids: &[&str]) -> Validation {
595    let ids: Vec<String> = sp3_ids.iter().map(|id| (*id).to_string()).collect();
596    validation(records, Some(&ids))
597}
598
599fn validation(records: &[Record], sp3_ids: Option<&[String]>) -> Validation {
600    let mut report = Validation {
601        missing_sp3_ids: Vec::new(),
602        duplicate_prns: duplicates(records.iter().map(|r| r.prn)),
603        duplicate_norad_ids: duplicates(records.iter().map(|r| r.norad_id)),
604        inactive_unusable_prns: inactive_unusable_prns(records),
605        extra_sp3_ids: Vec::new(),
606    };
607
608    if let Some(sp3_ids) = sp3_ids {
609        let catalog: Vec<String> = records
610            .iter()
611            .filter(|r| operational(r))
612            .map(|r| r.sp3_id.to_ascii_uppercase())
613            .collect();
614        let product: Vec<String> = sp3_ids
615            .iter()
616            .map(|id| id.to_ascii_uppercase())
617            .filter(|id| id.starts_with('G'))
618            .collect();
619
620        report.missing_sp3_ids = set_difference(&catalog, &product);
621        report.extra_sp3_ids = set_difference(&product, &catalog);
622    }
623
624    report
625}
626
627fn duplicates<T>(values: impl Iterator<Item = T>) -> Vec<T>
628where
629    T: Ord + Copy,
630{
631    let mut seen: Vec<T> = values.collect();
632    seen.sort_unstable();
633    let mut out = Vec::new();
634    let mut i = 0;
635    while i < seen.len() {
636        let mut j = i + 1;
637        while j < seen.len() && seen[j] == seen[i] {
638            j += 1;
639        }
640        if j - i > 1 {
641            out.push(seen[i]);
642        }
643        i = j;
644    }
645    out
646}
647
648fn inactive_unusable_prns(records: &[Record]) -> Vec<u16> {
649    let mut prns: Vec<u16> = records
650        .iter()
651        .filter(|r| !operational(r))
652        .map(|r| r.prn)
653        .collect();
654    prns.sort_unstable();
655    prns.dedup();
656    prns
657}
658
659fn set_difference(left: &[String], right: &[String]) -> Vec<String> {
660    let mut out: Vec<String> = left
661        .iter()
662        .filter(|id| !right.contains(id))
663        .cloned()
664        .collect();
665    out.sort();
666    out.dedup();
667    out
668}
669
670/// Returns `true` when a validation report has no findings.
671#[must_use]
672pub fn is_valid(report: &Validation) -> bool {
673    report.missing_sp3_ids.is_empty()
674        && report.duplicate_prns.is_empty()
675        && report.duplicate_norad_ids.is_empty()
676        && report.inactive_unusable_prns.is_empty()
677        && report.extra_sp3_ids.is_empty()
678}
679
680/// Validate against a plain SP3 id list and fail unless the catalog is clean.
681///
682/// A build-time gate: returns `Ok(())` when the report has no findings, otherwise
683/// [`ConstellationError::Sp3Validation`] describing them.
684pub fn validate_against_sp3_ids_strict(
685    records: &[Record],
686    sp3_ids: &[&str],
687) -> Result<(), ConstellationError> {
688    let report = validate_against_sp3_ids(records, sp3_ids);
689    if is_valid(&report) {
690        Ok(())
691    } else {
692        Err(ConstellationError::Sp3Validation(describe_findings(
693            &report,
694        )))
695    }
696}
697
698fn describe_findings(report: &Validation) -> String {
699    let mut parts = Vec::new();
700    if !report.missing_sp3_ids.is_empty() {
701        parts.push(format!("missing_sp3_ids: {:?}", report.missing_sp3_ids));
702    }
703    if !report.extra_sp3_ids.is_empty() {
704        parts.push(format!("extra_sp3_ids: {:?}", report.extra_sp3_ids));
705    }
706    if !report.duplicate_prns.is_empty() {
707        parts.push(format!("duplicate_prns: {:?}", report.duplicate_prns));
708    }
709    if !report.duplicate_norad_ids.is_empty() {
710        parts.push(format!(
711            "duplicate_norad_ids: {:?}",
712            report.duplicate_norad_ids
713        ));
714    }
715    if !report.inactive_unusable_prns.is_empty() {
716        parts.push(format!(
717            "inactive_unusable_prns: {:?}",
718            report.inactive_unusable_prns
719        ));
720    }
721    parts.join("; ")
722}
723
724/// Compare two catalog snapshots by `(system, prn)` identity.
725///
726/// Assumes each input has at most one record per `(system, prn)`; run
727/// [`validate`] first on hand-edited catalogs and treat duplicate findings as
728/// malformed input rather than a constellation change.
729#[must_use]
730pub fn diff(previous: &[Record], current: &[Record]) -> Diff {
731    let key = |r: &Record| (r.system, r.prn);
732
733    let added: Vec<Record> = current
734        .iter()
735        .filter(|c| !previous.iter().any(|p| key(p) == key(c)))
736        .cloned()
737        .collect();
738    let removed: Vec<Record> = previous
739        .iter()
740        .filter(|p| !current.iter().any(|c| key(c) == key(p)))
741        .cloned()
742        .collect();
743
744    let mut added = added;
745    let mut removed = removed;
746    added.sort_by_key(|r| (r.system, r.prn));
747    removed.sort_by_key(|r| (r.system, r.prn));
748
749    let mut common: Vec<(GnssSystem, u16)> = previous
750        .iter()
751        .filter_map(|p| current.iter().find(|c| key(c) == key(p)).map(|_| key(p)))
752        .collect();
753    common.sort_unstable();
754
755    let pairs: Vec<(&Record, &Record)> = common
756        .iter()
757        .map(|k| {
758            let p = previous.iter().find(|r| key(r) == *k).expect("common key");
759            let c = current.iter().find(|r| key(r) == *k).expect("common key");
760            (p, c)
761        })
762        .collect();
763
764    Diff {
765        added,
766        removed,
767        norad_reassigned: changes(&pairs, |r| r.norad_id),
768        sp3_id_changed: changes(&pairs, |r| r.sp3_id.clone()),
769        svn_changed: changes(&pairs, |r| r.svn),
770        activity_changed: changes(&pairs, |r| r.active),
771        usability_changed: changes(&pairs, |r| r.usable),
772    }
773}
774
775fn changes<T, F>(pairs: &[(&Record, &Record)], field: F) -> Vec<FieldChange<T>>
776where
777    T: PartialEq,
778    F: Fn(&Record) -> T,
779{
780    pairs
781        .iter()
782        .filter_map(|(p, c)| {
783            let from = field(p);
784            let to = field(c);
785            if from == to {
786                None
787            } else {
788                Some(FieldChange {
789                    system: p.system,
790                    prn: p.prn,
791                    from,
792                    to,
793                })
794            }
795        })
796        .collect()
797}
798
799/// Returns `true` when a diff has any findings.
800#[must_use]
801pub fn changed(diff: &Diff) -> bool {
802    !diff.added.is_empty()
803        || !diff.removed.is_empty()
804        || !diff.norad_reassigned.is_empty()
805        || !diff.sp3_id_changed.is_empty()
806        || !diff.svn_changed.is_empty()
807        || !diff.activity_changed.is_empty()
808        || !diff.usability_changed.is_empty()
809}
810
811// ── HTML/text scanning helpers (dependency-light) ────────────────────────────
812
813fn blank_to_none(value: Option<String>) -> Option<String> {
814    value.filter(|v| !v.is_empty())
815}
816
817/// Case-insensitive ASCII substring search returning the byte offset.
818fn find_ci(haystack: &str, needle: &str) -> Option<usize> {
819    let hay = haystack.as_bytes();
820    let need = needle.as_bytes();
821    if need.is_empty() {
822        return Some(0);
823    }
824    if hay.len() < need.len() {
825        return None;
826    }
827    (0..=hay.len() - need.len()).find(|&i| {
828        hay[i..i + need.len()]
829            .iter()
830            .zip(need)
831            .all(|(a, b)| a.eq_ignore_ascii_case(b))
832    })
833}
834
835fn is_word_byte(b: u8) -> bool {
836    b.is_ascii_alphanumeric() || b == b'_'
837}
838
839/// Case-insensitive whole-word match, mirroring regex `\bword\b` boundaries.
840fn contains_word_ci(haystack: &str, word: &str) -> bool {
841    let hay = haystack.as_bytes();
842    let need = word.as_bytes();
843    let n = need.len();
844    if n == 0 || hay.len() < n {
845        return false;
846    }
847    (0..=hay.len() - n).any(|i| {
848        let matched = hay[i..i + n]
849            .iter()
850            .zip(need)
851            .all(|(a, b)| a.eq_ignore_ascii_case(b));
852        if !matched {
853            return false;
854        }
855        let left_ok = i == 0 || !is_word_byte(hay[i - 1]);
856        let right_ok = i + n == hay.len() || !is_word_byte(hay[i + n]);
857        left_ok && right_ok
858    })
859}
860
861/// Split HTML into the inner text of each `<tr>...</tr>` block.
862fn tr_blocks(html: &str) -> Vec<&str> {
863    let mut out = Vec::new();
864    let mut rest = html;
865    while let Some(start) = find_ci(rest, "<tr") {
866        let Some(gt) = rest[start..].find('>') else {
867            break;
868        };
869        let content_start = start + gt + 1;
870        let Some(close) = find_ci(&rest[content_start..], "</tr>") else {
871            break;
872        };
873        out.push(&rest[content_start..content_start + close]);
874        rest = &rest[content_start + close + "</tr>".len()..];
875    }
876    out
877}
878
879/// Inner text of the first `<td>` whose attributes contain `class_needle`.
880fn td_inner<'a>(row: &'a str, class_needle: &str) -> Option<&'a str> {
881    let mut rest = row;
882    loop {
883        let start = find_ci(rest, "<td")?;
884        let gt = rest[start..].find('>')?;
885        let attrs = &rest[start..start + gt];
886        let content_start = start + gt + 1;
887        let close = find_ci(&rest[content_start..], "</td>")?;
888        let inner = &rest[content_start..content_start + close];
889        if find_ci(attrs, class_needle).is_some() {
890            return Some(inner);
891        }
892        rest = &rest[content_start + close + "</td>".len()..];
893    }
894}
895
896/// Strip tags, unescape entities, and collapse whitespace, matching the
897/// reference `clean_html`.
898fn clean_html(text: &str) -> String {
899    let mut stripped = String::with_capacity(text.len());
900    let mut in_tag = false;
901    for c in text.chars() {
902        match c {
903            '<' => in_tag = true,
904            '>' => in_tag = false,
905            _ if !in_tag => stripped.push(c),
906            _ => {}
907        }
908    }
909    let unescaped = html_unescape(&stripped);
910    unescaped.split_whitespace().collect::<Vec<_>>().join(" ")
911}
912
913/// Decode HTML entities: the named set the reference handles plus numeric
914/// character references (`&#160;`, `&#xA0;`). Numeric decoding is a superset of
915/// the reference's named-only set, so it never changes a reference-covered case
916/// but keeps generated markup (numeric `&nbsp;`, `&apos;`) from leaking literal
917/// `&#160;` into a cell and breaking, for example, optional-integer parsing.
918fn html_unescape(text: &str) -> String {
919    let mut out = String::with_capacity(text.len());
920    let mut rest = text;
921    while let Some(amp) = rest.find('&') {
922        out.push_str(&rest[..amp]);
923        let tail = &rest[amp..];
924        if let Some((decoded, consumed)) = decode_entity(tail) {
925            out.push(decoded);
926            rest = &tail[consumed..];
927        } else {
928            out.push('&');
929            rest = &tail[1..];
930        }
931    }
932    out.push_str(rest);
933    out
934}
935
936/// Decode a single entity at the start of `s` (which begins with `&`), returning
937/// the decoded char and the number of bytes consumed, or `None` if `s` does not
938/// start with a recognized entity.
939fn decode_entity(s: &str) -> Option<(char, usize)> {
940    for (entity, decoded) in [
941        ("&amp;", '&'),
942        ("&lt;", '<'),
943        ("&gt;", '>'),
944        ("&quot;", '"'),
945        ("&#39;", '\''),
946        ("&apos;", '\''),
947        ("&nbsp;", ' '),
948    ] {
949        if s.starts_with(entity) {
950            return Some((decoded, entity.len()));
951        }
952    }
953
954    // Numeric character reference: &#DDD; or &#xHHH;
955    let body = s.strip_prefix("&#")?;
956    let semi = body.find(';')?;
957    let (digits, radix) = match body.strip_prefix(['x', 'X']) {
958        Some(hex) => (&hex[..semi - 1], 16),
959        None => (&body[..semi], 10),
960    };
961    if digits.is_empty() {
962        return None;
963    }
964    let code = u32::from_str_radix(digits, radix).ok()?;
965    let decoded = char::from_u32(code)?;
966    Some((decoded, "&#".len() + semi + 1))
967}
968
969#[cfg(test)]
970mod tests {
971    use super::*;
972
973    #[test]
974    fn prn_parses_padded_and_multi_digit() {
975        assert_eq!(prn_from_object_name(Some("GPS BIIF-8  (PRN 03)")), Some(3));
976        assert_eq!(prn_from_object_name(Some("GPS BIII-10 (PRN 13)")), Some(13));
977        assert_eq!(prn_from_object_name(Some("X (PRN 003)")), Some(3));
978    }
979
980    #[test]
981    fn prn_search_skips_unparseable_earlier_occurrence() {
982        // A leading "(PRN ...)" that does not parse must not block a later valid
983        // one, matching the reference regex's search semantics.
984        assert_eq!(
985            prn_from_object_name(Some("GPS (PRN X) BIIF (PRN 07)")),
986            Some(7)
987        );
988        assert_eq!(prn_from_object_name(Some("GPS WITHOUT PRN")), None);
989        assert_eq!(prn_from_object_name(Some("(PRN 000)")), None);
990    }
991
992    #[test]
993    fn html_unescape_decodes_named_and_numeric_entities() {
994        assert_eq!(html_unescape("a &amp; b"), "a & b");
995        assert_eq!(html_unescape("&#39;x&#39;"), "'x'");
996        // Numeric references for NBSP (decimal and hex) decode to spaces.
997        assert_eq!(html_unescape("&#160;"), "\u{a0}");
998        assert_eq!(html_unescape("&#xA0;"), "\u{a0}");
999        // An unrecognized "&" is left literal rather than dropped.
1000        assert_eq!(html_unescape("AT&T"), "AT&T");
1001    }
1002
1003    #[test]
1004    fn optional_int_treats_numeric_nbsp_cell_as_blank() {
1005        // A cell whose only content is a numeric NBSP cleans to whitespace and
1006        // collapses to "", so it is absent rather than a parse error.
1007        let row = r#"<td class="views-field-field-gps-svn">&#160;</td>"#;
1008        assert_eq!(navcen_optional_int(row, "gps-svn"), Ok(None));
1009    }
1010}