ngdp_bpsv/
document.rs

1//! BPSV document representation
2
3use crate::error::{Error, Result};
4use crate::schema::BpsvSchema;
5use crate::value::BpsvValue;
6use std::collections::HashMap;
7
8/// Common functionality for BPSV row types
9pub trait BpsvRowOps {
10    /// Get the number of values in this row
11    fn len(&self) -> usize;
12
13    /// Check if the row is empty
14    fn is_empty(&self) -> bool {
15        self.len() == 0
16    }
17
18    /// Get a raw string value by index
19    fn get_raw(&self, index: usize) -> Option<&str>;
20
21    /// Get a raw string value by field name using the schema
22    fn get_raw_by_name(&self, field_name: &str, schema: &BpsvSchema) -> Option<&str> {
23        schema
24            .get_field(field_name)
25            .and_then(|field| self.get_raw(field.index))
26    }
27
28    /// Convert row to a map of field names to raw values
29    fn to_map(&self, schema: &BpsvSchema) -> Result<HashMap<String, String>> {
30        if self.len() != schema.field_count() {
31            return Err(Error::SchemaMismatch {
32                expected: schema.field_count(),
33                actual: self.len(),
34            });
35        }
36
37        let mut map = HashMap::new();
38        for (field_index, field) in schema.fields().iter().enumerate() {
39            if let Some(value) = self.get_raw(field_index) {
40                map.insert(field.name.clone(), value.to_string());
41            }
42        }
43        Ok(map)
44    }
45}
46
47/// A single row in a BPSV document with borrowed data
48#[derive(Debug, Clone, PartialEq)]
49pub struct BpsvRow<'a> {
50    /// Raw string values as they appear in the BPSV (borrowed)
51    raw_values: Vec<&'a str>,
52    /// Typed values (lazy-loaded)
53    typed_values: Option<Vec<BpsvValue>>,
54}
55
56impl BpsvRowOps for BpsvRow<'_> {
57    fn len(&self) -> usize {
58        if let Some(typed) = &self.typed_values {
59            typed.len()
60        } else {
61            self.raw_values.len()
62        }
63    }
64
65    fn get_raw(&self, index: usize) -> Option<&str> {
66        self.raw_values.get(index).copied()
67    }
68}
69
70impl<'a> BpsvRow<'a> {
71    /// Create a new row from raw string slices
72    pub fn new(values: Vec<&'a str>) -> Self {
73        Self {
74            raw_values: values,
75            typed_values: None,
76        }
77    }
78
79    /// Create a new row from typed values
80    pub fn from_typed_values(values: Vec<BpsvValue>) -> BpsvRow<'static> {
81        // For typed values, we need to allocate since we're creating new data
82        BpsvRow {
83            raw_values: vec![],
84            typed_values: Some(values),
85        }
86    }
87
88    /// Get the number of values in this row
89    pub fn len(&self) -> usize {
90        BpsvRowOps::len(self)
91    }
92
93    /// Check if the row is empty
94    pub fn is_empty(&self) -> bool {
95        BpsvRowOps::is_empty(self)
96    }
97
98    /// Get a raw string value by index
99    pub fn get_raw(&self, index: usize) -> Option<&str> {
100        BpsvRowOps::get_raw(self, index)
101    }
102
103    /// Get a raw string value by field name using the schema
104    pub fn get_raw_by_name(&self, field_name: &str, schema: &BpsvSchema) -> Option<&str> {
105        BpsvRowOps::get_raw_by_name(self, field_name, schema)
106    }
107
108    /// Get all raw values
109    pub fn raw_values(&self) -> &[&'a str] {
110        &self.raw_values
111    }
112
113    /// Parse and get typed values using the schema
114    pub fn get_typed_values(&mut self, schema: &BpsvSchema) -> Result<&[BpsvValue]> {
115        if self.typed_values.is_none() {
116            if self.raw_values.len() != schema.field_count() {
117                return Err(Error::SchemaMismatch {
118                    expected: schema.field_count(),
119                    actual: self.raw_values.len(),
120                });
121            }
122
123            let mut typed = Vec::new();
124            for (value, field) in self.raw_values.iter().zip(schema.fields()) {
125                let typed_value = BpsvValue::parse(value, &field.field_type)?;
126                typed.push(typed_value);
127            }
128            self.typed_values = Some(typed);
129        }
130
131        Ok(self.typed_values.as_ref().unwrap())
132    }
133
134    /// Get a typed value by index
135    pub fn get_typed(&mut self, index: usize, schema: &BpsvSchema) -> Result<Option<&BpsvValue>> {
136        let typed_values = self.get_typed_values(schema)?;
137        Ok(typed_values.get(index))
138    }
139
140    /// Get a typed value by field name
141    pub fn get_typed_by_name(
142        &mut self,
143        field_name: &str,
144        schema: &BpsvSchema,
145    ) -> Result<Option<&BpsvValue>> {
146        if let Some(field) = schema.get_field(field_name) {
147            self.get_typed(field.index, schema)
148        } else {
149            Err(Error::FieldNotFound {
150                field: field_name.to_string(),
151            })
152        }
153    }
154
155    /// Convert row to a map of field names to raw values
156    pub fn to_map(&self, schema: &BpsvSchema) -> Result<HashMap<String, String>> {
157        BpsvRowOps::to_map(self, schema)
158    }
159
160    /// Convert row to a map of field names to typed values
161    pub fn to_typed_map(&mut self, schema: &BpsvSchema) -> Result<HashMap<String, BpsvValue>> {
162        let typed_values = self.get_typed_values(schema)?;
163        let mut map = HashMap::new();
164
165        for (field, value) in schema.fields().iter().zip(typed_values.iter()) {
166            map.insert(field.name.clone(), value.clone());
167        }
168        Ok(map)
169    }
170
171    /// Convert to BPSV line format
172    pub fn to_bpsv_line(&self) -> String {
173        if let Some(typed) = &self.typed_values {
174            typed
175                .iter()
176                .map(|v| v.to_bpsv_string())
177                .collect::<Vec<_>>()
178                .join("|")
179        } else {
180            self.raw_values.join("|")
181        }
182    }
183}
184
185/// An owned version of BpsvRow for when we need to store data
186#[derive(Debug, Clone, PartialEq)]
187#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
188pub struct OwnedBpsvRow {
189    /// Raw string values as they appear in the BPSV
190    pub raw_values: Vec<String>,
191    /// Typed values (lazy-loaded)
192    pub typed_values: Option<Vec<BpsvValue>>,
193}
194
195impl BpsvRowOps for OwnedBpsvRow {
196    fn len(&self) -> usize {
197        if let Some(typed) = &self.typed_values {
198            typed.len()
199        } else {
200            self.raw_values.len()
201        }
202    }
203
204    fn get_raw(&self, index: usize) -> Option<&str> {
205        self.raw_values.get(index).map(|s| s.as_str())
206    }
207}
208
209impl OwnedBpsvRow {
210    /// Create a new row from owned string values
211    pub fn new(values: Vec<String>) -> Self {
212        Self {
213            raw_values: values,
214            typed_values: None,
215        }
216    }
217
218    /// Create from a borrowed row
219    pub fn from_borrowed(row: &BpsvRow<'_>) -> Self {
220        Self {
221            raw_values: row.raw_values.iter().map(|&s| s.to_string()).collect(),
222            typed_values: row.typed_values.clone(),
223        }
224    }
225
226    /// Convert to borrowed row
227    pub fn as_borrowed(&self) -> BpsvRow<'_> {
228        BpsvRow {
229            raw_values: self.raw_values.iter().map(|s| s.as_str()).collect(),
230            typed_values: self.typed_values.clone(),
231        }
232    }
233
234    /// Get the number of values
235    pub fn len(&self) -> usize {
236        BpsvRowOps::len(self)
237    }
238
239    /// Check if empty
240    pub fn is_empty(&self) -> bool {
241        BpsvRowOps::is_empty(self)
242    }
243
244    /// Get a raw string value by index
245    pub fn get_raw(&self, index: usize) -> Option<&str> {
246        BpsvRowOps::get_raw(self, index)
247    }
248
249    /// Get a raw string value by field name using the schema
250    pub fn get_raw_by_name(&self, field_name: &str, schema: &BpsvSchema) -> Option<&str> {
251        BpsvRowOps::get_raw_by_name(self, field_name, schema)
252    }
253
254    /// Convert row to a map of field names to raw values
255    pub fn to_map(&self, schema: &BpsvSchema) -> Result<HashMap<String, String>> {
256        BpsvRowOps::to_map(self, schema)
257    }
258}
259
260/// Represents a complete BPSV document with borrowed data
261#[derive(Debug, Clone, PartialEq)]
262pub struct BpsvDocument<'a> {
263    /// The original content (for zero-copy)
264    content: &'a str,
265    /// The schema defining field structure
266    schema: BpsvSchema,
267    /// Sequence number (optional)
268    sequence_number: Option<u32>,
269    /// All data rows
270    rows: Vec<BpsvRow<'a>>,
271}
272
273impl<'a> BpsvDocument<'a> {
274    /// Create a new BPSV document
275    pub fn new(content: &'a str, schema: BpsvSchema) -> Self {
276        Self {
277            content,
278            schema,
279            sequence_number: None,
280            rows: Vec::new(),
281        }
282    }
283
284    /// Parse a BPSV document from string content
285    ///
286    /// # Examples
287    ///
288    /// ```
289    /// use ngdp_bpsv::BpsvDocument;
290    ///
291    /// let content = "Region!STRING:0|BuildId!DEC:4\n## seqn = 12345\nus|1234\neu|5678";
292    ///
293    /// let doc = BpsvDocument::parse(content)?;
294    /// assert_eq!(doc.sequence_number(), Some(12345));
295    /// assert_eq!(doc.rows().len(), 2);
296    /// # Ok::<(), ngdp_bpsv::Error>(())
297    /// ```
298    pub fn parse(content: &'a str) -> Result<Self> {
299        crate::parser::BpsvParser::parse(content)
300    }
301
302    /// Get the schema
303    pub fn schema(&self) -> &BpsvSchema {
304        &self.schema
305    }
306
307    /// Get the sequence number
308    pub fn sequence_number(&self) -> Option<u32> {
309        self.sequence_number
310    }
311
312    /// Set the sequence number
313    pub fn set_sequence_number(&mut self, seqn: Option<u32>) {
314        self.sequence_number = seqn;
315    }
316
317    /// Get all rows
318    pub fn rows(&self) -> &[BpsvRow<'a>] {
319        &self.rows
320    }
321
322    /// Get a mutable reference to all rows
323    pub fn rows_mut(&mut self) -> &mut [BpsvRow<'a>] {
324        &mut self.rows
325    }
326
327    /// Get the number of rows
328    pub fn row_count(&self) -> usize {
329        self.rows.len()
330    }
331
332    /// Check if the document has no data rows
333    pub fn is_empty(&self) -> bool {
334        self.rows.is_empty()
335    }
336
337    /// Add a row from raw string slices
338    pub fn add_row(&mut self, values: Vec<&'a str>) -> Result<()> {
339        // Validate against schema
340        let validated = self.schema.validate_row_refs(&values)?;
341        self.rows.push(BpsvRow::new(validated));
342        Ok(())
343    }
344
345    /// Add a row from typed values
346    pub fn add_typed_row(&mut self, values: Vec<BpsvValue>) -> Result<()> {
347        if values.len() != self.schema.field_count() {
348            return Err(Error::SchemaMismatch {
349                expected: self.schema.field_count(),
350                actual: values.len(),
351            });
352        }
353
354        // Validate compatibility
355        for (value, field) in values.iter().zip(self.schema.fields()) {
356            if !value.is_compatible_with(&field.field_type) {
357                return Err(Error::InvalidValue {
358                    field: field.name.clone(),
359                    field_type: field.field_type.to_string(),
360                    value: value.to_bpsv_string(),
361                });
362            }
363        }
364
365        self.rows.push(BpsvRow::from_typed_values(values));
366        Ok(())
367    }
368
369    /// Get a row by index
370    pub fn get_row(&self, index: usize) -> Option<&BpsvRow<'a>> {
371        self.rows.get(index)
372    }
373
374    /// Get a mutable row by index
375    pub fn get_row_mut(&mut self, index: usize) -> Option<&mut BpsvRow<'a>> {
376        self.rows.get_mut(index)
377    }
378
379    /// Find rows where a field matches a specific value
380    pub fn find_rows_by_field(&self, field_name: &str, value: &str) -> Result<Vec<usize>> {
381        let field = self
382            .schema
383            .get_field(field_name)
384            .ok_or_else(|| Error::FieldNotFound {
385                field: field_name.to_string(),
386            })?;
387
388        let mut matching_indices = Vec::new();
389        for (index, row) in self.rows.iter().enumerate() {
390            if let Some(row_value) = row.get_raw(field.index) {
391                if row_value == value {
392                    matching_indices.push(index);
393                }
394            }
395        }
396
397        Ok(matching_indices)
398    }
399
400    /// Convert the entire document back to BPSV format
401    pub fn to_bpsv_string(&self) -> String {
402        let mut lines = Vec::new();
403
404        // Header line
405        lines.push(self.schema.to_header_line());
406
407        // Sequence number line
408        if let Some(seqn) = self.sequence_number {
409            lines.push(format!("## seqn = {seqn}"));
410        }
411
412        // Data rows
413        for row in &self.rows {
414            lines.push(row.to_bpsv_line());
415        }
416
417        lines.join("\n")
418    }
419
420    /// Get all values for a specific field
421    pub fn get_column(&self, field_name: &str) -> Result<Vec<&str>> {
422        let field = self
423            .schema
424            .get_field(field_name)
425            .ok_or_else(|| Error::FieldNotFound {
426                field: field_name.to_string(),
427            })?;
428
429        let mut values = Vec::new();
430        for row in &self.rows {
431            if let Some(value) = row.get_raw(field.index) {
432                values.push(value);
433            }
434        }
435
436        Ok(values)
437    }
438
439    /// Convert all rows to maps for easier access
440    pub fn to_maps(&self) -> Result<Vec<HashMap<String, String>>> {
441        let mut maps = Vec::new();
442        for row in &self.rows {
443            maps.push(row.to_map(&self.schema)?);
444        }
445        Ok(maps)
446    }
447
448    /// Convert to owned rows for interning
449    pub fn into_owned_rows(self) -> Vec<OwnedBpsvRow> {
450        self.rows
451            .into_iter()
452            .map(|row| OwnedBpsvRow::from_borrowed(&row))
453            .collect()
454    }
455}
456
457/// An owned version of BpsvDocument for serialization
458#[derive(Debug, Clone, PartialEq)]
459#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
460pub struct OwnedBpsvDocument {
461    /// The schema defining field structure
462    schema: BpsvSchema,
463    /// Sequence number (optional)
464    sequence_number: Option<u32>,
465    /// All data rows
466    rows: Vec<OwnedBpsvRow>,
467}
468
469impl OwnedBpsvDocument {
470    /// Create a new owned document
471    pub fn new(schema: BpsvSchema) -> Self {
472        Self {
473            schema,
474            sequence_number: None,
475            rows: Vec::new(),
476        }
477    }
478
479    /// Set the sequence number
480    pub fn set_sequence_number(&mut self, seqn: Option<u32>) {
481        self.sequence_number = seqn;
482    }
483
484    /// Add a row to the document
485    pub fn add_row(&mut self, row: OwnedBpsvRow) {
486        self.rows.push(row);
487    }
488
489    /// Get the schema
490    pub fn schema(&self) -> &BpsvSchema {
491        &self.schema
492    }
493
494    /// Get the sequence number
495    pub fn sequence_number(&self) -> Option<u32> {
496        self.sequence_number
497    }
498
499    /// Get the number of rows
500    pub fn row_count(&self) -> usize {
501        self.rows.len()
502    }
503
504    /// Get all rows
505    pub fn rows(&self) -> &[OwnedBpsvRow] {
506        &self.rows
507    }
508
509    /// Create from a borrowed document
510    pub fn from_borrowed(doc: &BpsvDocument<'_>) -> Self {
511        Self {
512            schema: doc.schema.clone(),
513            sequence_number: doc.sequence_number,
514            rows: doc.rows.iter().map(OwnedBpsvRow::from_borrowed).collect(),
515        }
516    }
517
518    /// Convert to BPSV string
519    pub fn to_bpsv_string(&self) -> String {
520        let mut lines = Vec::new();
521
522        // Header line
523        lines.push(self.schema.to_header_line());
524
525        // Sequence number line
526        if let Some(seqn) = self.sequence_number {
527            lines.push(format!("## seqn = {seqn}"));
528        }
529
530        // Data rows
531        for row in &self.rows {
532            lines.push(row.as_borrowed().to_bpsv_line());
533        }
534
535        lines.join("\n")
536    }
537}
538
539#[cfg(test)]
540mod tests {
541    use super::*;
542    use crate::{BpsvFieldType, BpsvSchema};
543
544    fn create_test_schema() -> BpsvSchema {
545        let mut schema = BpsvSchema::new();
546        schema
547            .add_field("Region".to_string(), BpsvFieldType::String(0))
548            .unwrap();
549        schema
550            .add_field("BuildConfig".to_string(), BpsvFieldType::Hex(16))
551            .unwrap();
552        schema
553            .add_field("BuildId".to_string(), BpsvFieldType::Decimal(4))
554            .unwrap();
555        schema
556    }
557
558    #[test]
559    fn test_row_operations() {
560        let schema = create_test_schema();
561        let mut row = BpsvRow::new(vec!["us", "abcd1234abcd1234abcd1234abcd1234", "1234"]);
562
563        assert_eq!(row.len(), 3);
564        assert_eq!(row.get_raw(0), Some("us"));
565        assert_eq!(row.get_raw_by_name("Region", &schema), Some("us"));
566
567        let typed_values = row.get_typed_values(&schema).unwrap();
568        assert_eq!(typed_values.len(), 3);
569        assert_eq!(typed_values[0], BpsvValue::String("us".to_string()));
570        assert_eq!(
571            typed_values[1],
572            BpsvValue::Hex("abcd1234abcd1234abcd1234abcd1234".to_string())
573        );
574        assert_eq!(typed_values[2], BpsvValue::Decimal(1234));
575    }
576
577    #[test]
578    fn test_document_creation() {
579        let content = "";
580        let schema = create_test_schema();
581        let mut doc = BpsvDocument::new(content, schema);
582
583        doc.set_sequence_number(Some(12345));
584        assert_eq!(doc.sequence_number(), Some(12345));
585
586        doc.add_row(vec!["us", "abcd1234abcd1234abcd1234abcd1234", "1234"])
587            .unwrap();
588        doc.add_row(vec!["eu", "1234abcd1234abcd1234abcd1234abcd", "5678"])
589            .unwrap();
590
591        assert_eq!(doc.row_count(), 2);
592        assert!(!doc.is_empty());
593    }
594
595    #[test]
596    fn test_find_rows() {
597        let content = "";
598        let schema = create_test_schema();
599        let mut doc = BpsvDocument::new(content, schema);
600
601        doc.add_row(vec!["us", "abcd1234abcd1234abcd1234abcd1234", "1234"])
602            .unwrap();
603        doc.add_row(vec!["eu", "1234abcd1234abcd1234abcd1234abcd", "5678"])
604            .unwrap();
605        doc.add_row(vec!["us", "deadbeefdeadbeefdeadbeefdeadbeef", "9999"])
606            .unwrap();
607
608        let us_rows = doc.find_rows_by_field("Region", "us").unwrap();
609        assert_eq!(us_rows, vec![0, 2]);
610
611        let eu_rows = doc.find_rows_by_field("Region", "eu").unwrap();
612        assert_eq!(eu_rows, vec![1]);
613    }
614
615    #[test]
616    fn test_column_access() {
617        let content = "";
618        let schema = create_test_schema();
619        let mut doc = BpsvDocument::new(content, schema);
620
621        doc.add_row(vec!["us", "abcd1234abcd1234abcd1234abcd1234", "1234"])
622            .unwrap();
623        doc.add_row(vec!["eu", "1234abcd1234abcd1234abcd1234abcd", "5678"])
624            .unwrap();
625
626        let regions = doc.get_column("Region").unwrap();
627        assert_eq!(regions, vec!["us", "eu"]);
628
629        let build_ids = doc.get_column("BuildId").unwrap();
630        assert_eq!(build_ids, vec!["1234", "5678"]);
631    }
632
633    #[test]
634    fn test_to_bpsv_string() {
635        let content = "";
636        let schema = create_test_schema();
637        let mut doc = BpsvDocument::new(content, schema);
638        doc.set_sequence_number(Some(12345));
639        doc.add_row(vec!["us", "abcd1234abcd1234abcd1234abcd1234", "1234"])
640            .unwrap();
641
642        let bpsv_string = doc.to_bpsv_string();
643        let lines: Vec<&str> = bpsv_string.lines().collect();
644
645        assert_eq!(lines[0], "Region!STRING:0|BuildConfig!HEX:16|BuildId!DEC:4");
646        assert_eq!(lines[1], "## seqn = 12345");
647        assert_eq!(lines[2], "us|abcd1234abcd1234abcd1234abcd1234|1234");
648    }
649
650    #[test]
651    fn test_schema_mismatch() {
652        let content = "";
653        let schema = create_test_schema();
654        let mut doc = BpsvDocument::new(content, schema);
655
656        // Too few values
657        let result = doc.add_row(vec!["us"]);
658        assert!(matches!(result, Err(Error::SchemaMismatch { .. })));
659
660        // Too many values
661        let result = doc.add_row(vec!["us", "hex", "123", "extra"]);
662        assert!(matches!(result, Err(Error::SchemaMismatch { .. })));
663    }
664}