sqlite_wasm_reader/
record.rs

1//! SQLite record parsing
2
3use crate::{Error, Result, Value, btree::read_varint, logging::log_warn, logging::log_debug};
4use byteorder::{BigEndian, ByteOrder};
5
6#[cfg(all(target_arch = "wasm32", not(target_os = "wasi")))]
7use alloc::{vec::Vec, string::String};
8
9/// Parse a record from payload data
10pub fn parse_record(payload: &[u8]) -> Result<Vec<Value>> {
11    if payload.is_empty() {
12        return Ok(Vec::new());
13    }
14    
15    // Read header size varint
16    let (header_size, header_size_bytes) = read_varint(payload)?;
17    if header_size as usize > payload.len() {
18        return Err(Error::InvalidRecord);
19    }
20    
21    // Safety check: limit header size to prevent memory issues
22    // Record headers are typically < 1KB, 64KB is very generous
23    if header_size > 65536 {
24        return Err(Error::InvalidFormat(format!("Header size too large: {} bytes", header_size)));
25    }
26    
27    // Read serial types from header
28    let mut serial_types = Vec::new();
29    // The header_size includes the header_size varint itself
30    let header_end = header_size as usize;
31    let mut offset = header_size_bytes;
32    
33    // Safety check: limit number of serial types
34    // Most SQLite tables have < 100 columns, 10000 is very generous
35    let max_serial_types = 10_000;
36    
37    while offset < header_end {
38        if serial_types.len() >= max_serial_types {
39            // If we hit this limit, something is wrong with parsing
40            return Err(Error::InvalidFormat(format!(
41                "Too many serial types: {} (limit: {})", 
42                serial_types.len(), 
43                max_serial_types
44            )));
45        }
46        
47        let (serial_type, bytes_read) = read_varint(&payload[offset..])?;
48        offset += bytes_read;
49        serial_types.push(serial_type);
50    }
51    
52    // Skip to data section - data starts right after the header
53    offset = header_size as usize;
54    
55    // Parse values based on serial types
56    let mut values = Vec::new();
57    
58    for (i, serial_type) in serial_types.iter().enumerate() {
59        // Check if we have enough data left
60        if offset >= payload.len() {
61            log_debug(&format!("Ran out of data while parsing value {} (serial_type: {}). Payload size: {}, offset: {}", i, serial_type, payload.len(), offset));
62            // Instead of breaking, add NULL values for remaining columns
63            for _ in i..serial_types.len() {
64                values.push(Value::Null);
65            }
66            break;
67        }
68        
69        match parse_value(*serial_type, &payload[offset..]) {
70            Ok((value, bytes_read)) => {
71                offset += bytes_read;
72                values.push(value);
73            }
74            Err(e) => {
75                log_warn(&format!("Failed to parse value {} (serial_type: {}): {}. Payload size: {}, offset: {}", i, serial_type, e, payload.len(), offset));
76                // Add a null value as fallback and continue
77                values.push(Value::Null);
78                // Try to advance offset to prevent infinite loops
79                if offset < payload.len() {
80                    offset += 1;
81                }
82            }
83        }
84    }
85    
86    Ok(values)
87}
88
89/// Parse a value based on its serial type
90fn parse_value(serial_type: i64, data: &[u8]) -> Result<(Value, usize)> {
91    match serial_type {
92        0 => Ok((Value::Null, 0)),
93        1 => {
94            if data.is_empty() {
95                return Err(Error::InvalidRecord);
96            }
97            Ok((Value::Integer(data[0] as i64), 1))
98        }
99        2 => {
100            if data.len() < 2 {
101                return Err(Error::InvalidRecord);
102            }
103            Ok((Value::Integer(BigEndian::read_i16(data) as i64), 2))
104        }
105        3 => {
106            if data.len() < 3 {
107                return Err(Error::InvalidRecord);
108            }
109            let value = ((data[0] as i64) << 16) | 
110                       ((data[1] as i64) << 8) | 
111                       (data[2] as i64);
112            // Sign extend from 24-bit
113            let value = if value & 0x800000 != 0 {
114                value | 0xffffffffff000000u64 as i64
115            } else {
116                value
117            };
118            Ok((Value::Integer(value), 3))
119        }
120        4 => {
121            if data.len() < 4 {
122                return Err(Error::InvalidRecord);
123            }
124            Ok((Value::Integer(BigEndian::read_i32(data) as i64), 4))
125        }
126        5 => {
127            if data.len() < 6 {
128                return Err(Error::InvalidRecord);
129            }
130            let value = ((data[0] as i64) << 40) |
131                       ((data[1] as i64) << 32) |
132                       ((data[2] as i64) << 24) |
133                       ((data[3] as i64) << 16) |
134                       ((data[4] as i64) << 8) |
135                       (data[5] as i64);
136            // Sign extend from 48-bit
137            let value = if value & 0x800000000000 != 0 {
138                value | 0xffff000000000000u64 as i64
139            } else {
140                value
141            };
142            Ok((Value::Integer(value), 6))
143        }
144        6 => {
145            if data.len() < 8 {
146                return Err(Error::InvalidRecord);
147            }
148            Ok((Value::Integer(BigEndian::read_i64(data)), 8))
149        }
150        7 => {
151            if data.len() < 8 {
152                return Err(Error::InvalidRecord);
153            }
154            Ok((Value::Real(BigEndian::read_f64(data)), 8))
155        }
156        8 => Ok((Value::Integer(0), 0)),
157        9 => Ok((Value::Integer(1), 0)),
158        10 | 11 => Err(Error::UnsupportedFeature("Reserved serial types".into())),
159        n if n >= 12 && n % 2 == 0 => {
160            // BLOB with length (n-12)/2
161            let length = ((n - 12) / 2) as usize;
162            
163            // Safety check: limit BLOB size (increased significantly)
164            if length > 1_000_000_000 {
165                return Err(Error::InvalidFormat("BLOB too large".into()));
166            }
167            
168            if data.len() < length {
169                return Err(Error::InvalidRecord);
170            }
171            Ok((Value::Blob(data[..length].to_vec()), length))
172        }
173        n if n >= 13 && n % 2 == 1 => {
174            // String with length (n-13)/2
175            let length = ((n - 13) / 2) as usize;
176            
177            // Safety check: limit string size (increased significantly)
178            if length > 100_000_000 {
179                return Err(Error::InvalidFormat("String too large".into()));
180            }
181            
182            if data.len() < length {
183                return Err(Error::InvalidRecord);
184            }
185            let text = core::str::from_utf8(&data[..length])?;
186            Ok((Value::Text(text.to_string()), length))
187        }
188        _ => Err(Error::InvalidFormat("Invalid serial type".into())),
189    }
190}