Skip to main content

uls_parser/
dat.rs

1//! DAT file parser for pipe-delimited ULS records.
2
3use std::fs::File;
4use std::io::{BufRead, BufReader, Read};
5use std::path::Path;
6
7use phf::phf_set;
8use uls_core::codes::RecordType;
9use uls_core::records::*;
10
11use crate::{ParseError, Result};
12
13/// Known valid record type codes (2 uppercase letters).
14/// Uses compile-time perfect hash for O(1) lookup.
15static VALID_RECORD_TYPES: phf::Set<&'static str> = phf_set! {
16    "A2", "A3", "AC", "AD", "AM", "AN", "AS", "AT", "BC", "BD", "BF", "BL", "BO", "CF", "CG", "CH",
17    "CL", "CO", "CP", "CS", "CW", "EM", "EN", "F2", "FA", "FC", "FF", "FH", "FR", "FT", "HD", "HS",
18    "IR", "L2", "L3", "L4", "LA", "LF", "LH", "LM", "LO", "LS", "MC", "MF", "MH", "MI", "MK", "ML",
19    "MP", "MW", "O2", "OP", "PA", "PC", "PF", "PH", "PI", "PL", "RA", "RC", "RE", "RG", "RI", "RS",
20    "SA", "SC", "SE", "SF", "SG", "SH", "SL", "SR", "SS", "SV", "TA", "TP", "UA", "UC", "UF", "UL",
21    "UM", "VC",
22};
23
24/// Check if a string is a valid record type prefix.
25#[inline]
26fn is_valid_record_type(s: &str) -> bool {
27    s.len() == 2 && VALID_RECORD_TYPES.contains(s)
28}
29
30/// A parsed line from a DAT file.
31#[derive(Debug)]
32pub struct ParsedLine {
33    /// The line number (1-indexed).
34    pub line_number: usize,
35    /// The record type code.
36    pub record_type: String,
37    /// The raw fields (pipe-separated).
38    pub fields: Vec<String>,
39}
40
41impl ParsedLine {
42    /// Parse a line into fields.
43    pub fn from_line(line: &str, line_number: usize) -> Result<Self> {
44        let fields: Vec<String> = line.split('|').map(|s| s.to_string()).collect();
45
46        if fields.is_empty() {
47            return Err(ParseError::InvalidFormat {
48                line: line_number,
49                message: "empty line".to_string(),
50            });
51        }
52
53        let record_type = fields[0].clone();
54
55        Ok(Self {
56            line_number,
57            record_type,
58            fields,
59        })
60    }
61
62    /// Get a field as a string slice, or empty string if out of bounds.
63    pub fn field(&self, index: usize) -> &str {
64        self.fields.get(index).map(|s| s.as_str()).unwrap_or("")
65    }
66
67    /// Get field references suitable for from_fields methods.
68    pub fn field_refs(&self) -> Vec<&str> {
69        self.fields.iter().map(|s| s.as_str()).collect()
70    }
71
72    /// Append a continuation line to this record.
73    /// The continuation text is appended to the last non-empty field.
74    pub fn append_continuation(&mut self, line: &str) {
75        // Find the last field that looks like it could have content (description field)
76        // For most records with continuation, this is field 5 (description) or similar
77        // We append to the last field before the trailing empty fields
78
79        // Trim trailing empty fields to find the real last field
80        let mut last_content_idx = self.fields.len().saturating_sub(1);
81        while last_content_idx > 0 && self.fields[last_content_idx].is_empty() {
82            last_content_idx -= 1;
83        }
84
85        // Append the continuation (with a space separator if there's existing content)
86        if !self.fields[last_content_idx].is_empty() {
87            self.fields[last_content_idx].push(' ');
88        }
89        // Strip pipe delimiters from continuation line and append
90        let continuation = line.trim_matches('|').trim();
91        self.fields[last_content_idx].push_str(continuation);
92    }
93
94    /// Convert to a typed ULS record.
95    /// Uses stack-allocated array to avoid heap allocation for field references.
96    pub fn to_record(&self) -> Result<UlsRecord> {
97        // Build field references slice - reuse existing from_fields methods
98        // Use a stack-allocated array for small records, heap for large
99        const STACK_LIMIT: usize = 64;
100
101        if self.fields.len() <= STACK_LIMIT {
102            // Stack-allocated path for most records
103            let mut refs_arr: [&str; STACK_LIMIT] = [""; STACK_LIMIT];
104            for (i, s) in self.fields.iter().take(STACK_LIMIT).enumerate() {
105                refs_arr[i] = s.as_str();
106            }
107            let refs = &refs_arr[..self.fields.len()];
108            self.to_record_from_refs(refs)
109        } else {
110            // Heap fallback for unusually large records
111            let refs = self.field_refs();
112            self.to_record_from_refs(&refs)
113        }
114    }
115
116    /// Internal helper to convert to record from field references.
117    fn to_record_from_refs(&self, refs: &[&str]) -> Result<UlsRecord> {
118        match self.record_type.as_str() {
119            "HD" => Ok(UlsRecord::Header(HeaderRecord::from_fields(refs))),
120            "EN" => Ok(UlsRecord::Entity(EntityRecord::from_fields(refs))),
121            "AM" => Ok(UlsRecord::Amateur(AmateurRecord::from_fields(refs))),
122            "AD" => Ok(UlsRecord::ApplicationDetail(
123                ApplicationDetailRecord::from_fields(refs),
124            )),
125            "HS" => Ok(UlsRecord::History(HistoryRecord::from_fields(refs))),
126            "CO" => Ok(UlsRecord::Comment(CommentRecord::from_fields(refs))),
127            "LO" => Ok(UlsRecord::Location(LocationRecord::from_fields(refs))),
128            "FR" => Ok(UlsRecord::Frequency(FrequencyRecord::from_fields(refs))),
129            "AN" => Ok(UlsRecord::Antenna(AntennaRecord::from_fields(refs))),
130            "EM" => Ok(UlsRecord::Emission(EmissionRecord::from_fields(refs))),
131            "SC" => Ok(UlsRecord::SpecialCondition(
132                SpecialConditionRecord::from_fields(refs),
133            )),
134            "SF" => Ok(UlsRecord::FreeformCondition(
135                FreeformConditionRecord::from_fields(refs),
136            )),
137            "VC" => Ok(UlsRecord::VanityCallSign(
138                VanityCallSignRecord::from_fields(refs),
139            )),
140            "AC" => Ok(UlsRecord::Aircraft(AircraftRecord::from_fields(refs))),
141            "SH" => Ok(UlsRecord::Ship(ShipRecord::from_fields(refs))),
142            // For record types not yet fully implemented, return raw
143            _ => {
144                if let Ok(rt) = self.record_type.parse::<RecordType>() {
145                    Ok(UlsRecord::Raw {
146                        record_type: rt,
147                        fields: self.fields.clone(),
148                    })
149                } else {
150                    Err(ParseError::UnknownRecordType(self.record_type.clone()))
151                }
152            }
153        }
154    }
155}
156
157/// Parse a raw line string for fields, without requiring it to be a valid record.
158fn parse_raw_fields(line: &str) -> Vec<String> {
159    line.split('|').map(|s| s.to_string()).collect()
160}
161
162/// Check if a line is a continuation (doesn't start with a valid record type).
163fn is_continuation_line(line: &str) -> bool {
164    if line.is_empty() {
165        return true;
166    }
167
168    let fields = parse_raw_fields(line);
169    if fields.is_empty() {
170        return true;
171    }
172
173    let first_field = &fields[0];
174    !is_valid_record_type(first_field)
175}
176
177/// Reader for DAT files that yields parsed lines.
178/// Automatically handles multi-line continuation records.
179pub struct DatReader<R: Read> {
180    reader: BufReader<R>,
181    line_number: usize,
182    buffer: String,
183    /// Buffered/pending record that may receive continuation lines
184    pending_record: Option<ParsedLine>,
185}
186
187impl<R: Read> DatReader<R> {
188    /// Create a new DAT reader from any Read source.
189    pub fn new(reader: R) -> Self {
190        Self {
191            reader: BufReader::new(reader),
192            line_number: 0,
193            buffer: String::new(),
194            pending_record: None,
195        }
196    }
197
198    /// Read a raw line from the file.
199    fn read_raw_line(&mut self) -> Result<Option<String>> {
200        self.buffer.clear();
201        let bytes_read = self.reader.read_line(&mut self.buffer)?;
202
203        if bytes_read == 0 {
204            return Ok(None);
205        }
206
207        self.line_number += 1;
208
209        // Trim trailing newlines/carriage returns
210        let line = self.buffer.trim_end_matches(&['\r', '\n'][..]).to_string();
211        Ok(Some(line))
212    }
213
214    /// Read the next complete record from the file.
215    /// Handles multi-line continuation by merging lines until a new record starts.
216    pub fn next_line(&mut self) -> Result<Option<ParsedLine>> {
217        loop {
218            match self.read_raw_line()? {
219                None => {
220                    // EOF - return any pending record
221                    return Ok(self.pending_record.take());
222                }
223                Some(line) => {
224                    if line.is_empty() {
225                        // Skip truly empty lines
226                        continue;
227                    }
228
229                    if is_continuation_line(&line) {
230                        // This is a continuation - append to pending record if we have one
231                        if let Some(ref mut pending) = self.pending_record {
232                            pending.append_continuation(&line);
233                        }
234                        // If no pending record, we just skip orphan continuation lines
235                        continue;
236                    }
237
238                    // This is a new record
239                    let new_record = ParsedLine::from_line(&line, self.line_number)?;
240
241                    // Return the previous pending record (if any) and buffer this new one
242                    let to_return = self.pending_record.replace(new_record);
243
244                    if to_return.is_some() {
245                        return Ok(to_return);
246                    }
247                    // If there was no pending record, loop to read more
248                }
249            }
250        }
251    }
252
253    /// Returns the current line number.
254    pub fn line_number(&self) -> usize {
255        self.line_number
256    }
257}
258
259impl DatReader<File> {
260    /// Open a DAT file for reading.
261    pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
262        let file = File::open(path)?;
263        Ok(Self::new(file))
264    }
265}
266
267impl<R: Read> Iterator for DatReader<R> {
268    type Item = Result<ParsedLine>;
269
270    fn next(&mut self) -> Option<Self::Item> {
271        match self.next_line() {
272            Ok(Some(line)) => Some(Ok(line)),
273            Ok(None) => None,
274            Err(e) => Some(Err(e)),
275        }
276    }
277}
278
279/// Parse a single line (without continuation handling).
280/// Use DatReader for proper multi-line handling.
281pub fn parse_line(line: &str, line_number: usize) -> Result<ParsedLine> {
282    ParsedLine::from_line(line, line_number)
283}
284
285/// Convenience function to parse a complete DAT file into records.
286pub fn parse_file<P: AsRef<Path>>(path: P) -> Result<Vec<UlsRecord>> {
287    let reader = DatReader::open(path)?;
288    let mut records = Vec::new();
289
290    for line_result in reader {
291        let line = line_result?;
292        let record = line.to_record()?;
293        records.push(record);
294    }
295
296    Ok(records)
297}
298
299#[cfg(test)]
300mod tests {
301    use super::*;
302
303    #[test]
304    fn test_parse_simple_line() {
305        let line = "HD|123|456|W1AW|A|HA";
306        let parsed = ParsedLine::from_line(line, 1).unwrap();
307        assert_eq!(parsed.record_type, "HD");
308        assert_eq!(parsed.fields.len(), 6);
309        assert_eq!(parsed.field(3), "W1AW");
310    }
311
312    #[test]
313    fn test_is_continuation_line() {
314        assert!(!is_continuation_line("CO|123|test"));
315        assert!(!is_continuation_line("HD|456|data"));
316        assert!(is_continuation_line("License cancelled"));
317        assert!(is_continuation_line("||"));
318        assert!(is_continuation_line(""));
319        assert!(is_continuation_line("Some text without record type"));
320    }
321
322    #[test]
323    fn test_continuation_handling() {
324        let data = "CO|123||W1AW|01/01/2024|First line of comment||\n\
325                    continued text here||\n\
326                    HD|456||W1AW|A|HA||\n";
327
328        let reader = DatReader::new(data.as_bytes());
329        let lines: Vec<_> = reader.collect();
330
331        assert_eq!(lines.len(), 2);
332
333        // First record should have continuation merged
334        let co_record = lines[0].as_ref().unwrap();
335        assert_eq!(co_record.record_type, "CO");
336        assert!(co_record.field(5).contains("continued text here"));
337
338        // Second record should be HD
339        let hd_record = lines[1].as_ref().unwrap();
340        assert_eq!(hd_record.record_type, "HD");
341    }
342
343    #[test]
344    fn test_is_valid_record_type() {
345        assert!(is_valid_record_type("HD"));
346        assert!(is_valid_record_type("CO"));
347        assert!(is_valid_record_type("EN"));
348        assert!(!is_valid_record_type("XX"));
349        assert!(!is_valid_record_type("License"));
350        assert!(!is_valid_record_type(""));
351    }
352}