Skip to main content

rds2rust/
materialization.rs

1use byteorder::{BigEndian, ReadBytesExt};
2use std::io::Cursor;
3
4use crate::{Complex, Error, LazyVector, Logical, RObject, Result, VectorData};
5
6#[derive(Debug, PartialEq)]
7enum PathToken {
8    Field(String),
9    Index(usize),
10}
11
12pub struct MaterializationContext<'a> {
13    data: &'a [u8],
14    remaining_budget: Option<usize>,
15}
16
17impl<'a> MaterializationContext<'a> {
18    pub fn new(data: &'a [u8]) -> Self {
19        Self {
20            data,
21            remaining_budget: None,
22        }
23    }
24
25    pub fn with_budget(data: &'a [u8], budget_bytes: usize) -> Self {
26        Self {
27            data,
28            remaining_budget: Some(budget_bytes),
29        }
30    }
31
32    pub fn remaining_budget(&self) -> Option<usize> {
33        self.remaining_budget
34    }
35
36    fn check_budget(&mut self, bytes_needed: usize) -> Result<()> {
37        if let Some(remaining) = &mut self.remaining_budget {
38            if bytes_needed > *remaining {
39                return Err(Error::MemoryBudgetExceeded {
40                    needed: bytes_needed,
41                    available: *remaining,
42                });
43            }
44            *remaining -= bytes_needed;
45        }
46        Ok(())
47    }
48
49    pub fn materialize_integer_vector(&mut self, span: LazyVector) -> Result<Vec<i32>> {
50        validate_byte_len(span, std::mem::size_of::<i32>())?;
51        self.check_budget(span.byte_len as usize)?;
52        let mut cursor = Cursor::new(slice_for_span(self.data, span)?);
53        let mut vec = Vec::with_capacity(span.length);
54        for _ in 0..span.length {
55            vec.push(cursor.read_i32::<BigEndian>()?);
56        }
57        Ok(vec)
58    }
59
60    pub fn materialize_real_vector(&mut self, span: LazyVector) -> Result<Vec<f64>> {
61        validate_byte_len(span, std::mem::size_of::<f64>())?;
62        self.check_budget(span.byte_len as usize)?;
63        let mut cursor = Cursor::new(slice_for_span(self.data, span)?);
64        let mut vec = Vec::with_capacity(span.length);
65        for _ in 0..span.length {
66            vec.push(cursor.read_f64::<BigEndian>()?);
67        }
68        Ok(vec)
69    }
70
71    pub fn materialize_logical_vector(&mut self, span: LazyVector) -> Result<Vec<Logical>> {
72        validate_byte_len(span, std::mem::size_of::<i32>())?;
73        self.check_budget(span.byte_len as usize)?;
74        let mut cursor = Cursor::new(slice_for_span(self.data, span)?);
75        let mut vec = Vec::with_capacity(span.length);
76        for _ in 0..span.length {
77            let val = cursor.read_i32::<BigEndian>()?;
78            let logical = match val {
79                0 => Logical::False,
80                1 => Logical::True,
81                i32::MIN => Logical::Na,
82                _ => Logical::Na,
83            };
84            vec.push(logical);
85        }
86        Ok(vec)
87    }
88
89    pub fn materialize_raw_vector(&mut self, span: LazyVector) -> Result<Vec<u8>> {
90        validate_byte_len(span, 1)?;
91        self.check_budget(span.byte_len as usize)?;
92        let slice = slice_for_span(self.data, span)?;
93        Ok(slice.to_vec())
94    }
95
96    pub fn materialize_complex_vector(&mut self, span: LazyVector) -> Result<Vec<Complex>> {
97        validate_byte_len(span, std::mem::size_of::<Complex>())?;
98        self.check_budget(span.byte_len as usize)?;
99        let mut cursor = Cursor::new(slice_for_span(self.data, span)?);
100        let mut vec = Vec::with_capacity(span.length);
101        for _ in 0..span.length {
102            let real = cursor.read_f64::<BigEndian>()?;
103            let imaginary = cursor.read_f64::<BigEndian>()?;
104            vec.push(Complex { real, imaginary });
105        }
106        Ok(vec)
107    }
108
109    pub fn materialize_integer_data(&mut self, vector: &mut VectorData<i32>) -> Result<()> {
110        if let VectorData::Lazy(span) = *vector {
111            *vector = VectorData::Owned(self.materialize_integer_vector(span)?);
112        }
113        Ok(())
114    }
115
116    pub fn materialize_real_data(&mut self, vector: &mut VectorData<f64>) -> Result<()> {
117        if let VectorData::Lazy(span) = *vector {
118            *vector = VectorData::Owned(self.materialize_real_vector(span)?);
119        }
120        Ok(())
121    }
122
123    pub fn materialize_logical_data(&mut self, vector: &mut VectorData<Logical>) -> Result<()> {
124        if let VectorData::Lazy(span) = *vector {
125            *vector = VectorData::Owned(self.materialize_logical_vector(span)?);
126        }
127        Ok(())
128    }
129
130    pub fn materialize_raw_data(&mut self, vector: &mut VectorData<u8>) -> Result<()> {
131        if let VectorData::Lazy(span) = *vector {
132            *vector = VectorData::Owned(self.materialize_raw_vector(span)?);
133        }
134        Ok(())
135    }
136
137    pub fn materialize_complex_data(&mut self, vector: &mut VectorData<Complex>) -> Result<()> {
138        if let VectorData::Lazy(span) = *vector {
139            *vector = VectorData::Owned(self.materialize_complex_vector(span)?);
140        }
141        Ok(())
142    }
143}
144
145pub fn materialize_path(
146    obj: &mut RObject,
147    path: &str,
148    ctx: &mut MaterializationContext<'_>,
149) -> Result<bool> {
150    let tokens = parse_path_tokens(path)?;
151    materialize_tokens(obj, &tokens, ctx)
152}
153
154pub fn materialize_paths_with_budget(
155    obj: &mut RObject,
156    data: &[u8],
157    paths: &[&str],
158    budget_bytes: Option<usize>,
159) -> Result<Vec<String>> {
160    let mut ctx = match budget_bytes {
161        Some(budget) => MaterializationContext::with_budget(data, budget),
162        None => MaterializationContext::new(data),
163    };
164
165    let mut missing = Vec::new();
166    for path in paths {
167        let changed = materialize_path(obj, path, &mut ctx)?;
168        if !changed {
169            missing.push((*path).to_string());
170        }
171    }
172
173    Ok(missing)
174}
175
176fn slice_for_span(data: &[u8], span: LazyVector) -> Result<&[u8]> {
177    let start = span.offset as usize;
178    let end = span
179        .offset
180        .checked_add(span.byte_len)
181        .ok_or_else(|| Error::InvalidFormat("lazy span overflow".to_string()))?
182        as usize;
183
184    if start > data.len() {
185        return Err(Error::TruncatedLazyPayload {
186            expected: span.byte_len,
187            actual: 0,
188        });
189    }
190
191    let available = data.len() - start;
192    if end > data.len() {
193        return Err(Error::TruncatedLazyPayload {
194            expected: span.byte_len,
195            actual: available as u64,
196        });
197    }
198
199    Ok(&data[start..end])
200}
201
202fn validate_byte_len(span: LazyVector, elem_size: usize) -> Result<()> {
203    let expected = span
204        .length
205        .checked_mul(elem_size)
206        .ok_or_else(|| Error::InvalidFormat("lazy span length overflow".to_string()))?;
207    if span.byte_len != expected as u64 {
208        return Err(Error::InvalidFormat(format!(
209            "lazy span byte_len mismatch: expected {}, got {}",
210            expected, span.byte_len
211        )));
212    }
213    Ok(())
214}
215
216fn parse_path_tokens(path: &str) -> Result<Vec<PathToken>> {
217    let mut tokens = Vec::new();
218    let bytes = path.as_bytes();
219    let mut i = 0;
220
221    while i < bytes.len() {
222        match bytes[i] {
223            b'.' => {
224                i += 1;
225            }
226            b'[' => {
227                i += 1;
228                let start = i;
229                while i < bytes.len() && bytes[i].is_ascii_digit() {
230                    i += 1;
231                }
232                if start == i || i >= bytes.len() || bytes[i] != b']' {
233                    return Err(Error::InvalidFormat(format!(
234                        "invalid path index in '{}'",
235                        path
236                    )));
237                }
238                let index: usize = path[start..i]
239                    .parse()
240                    .map_err(|_| Error::InvalidFormat(format!("invalid index in '{}'", path)))?;
241                tokens.push(PathToken::Index(index));
242                i += 1;
243            }
244            _ => {
245                let start = i;
246                while i < bytes.len() && bytes[i] != b'.' && bytes[i] != b'[' {
247                    i += 1;
248                }
249                let field = path[start..i].to_string();
250                if field.is_empty() {
251                    return Err(Error::InvalidFormat(format!("invalid path '{}'", path)));
252                }
253                tokens.push(PathToken::Field(field));
254            }
255        }
256    }
257
258    Ok(tokens)
259}
260
261fn materialize_tokens(
262    obj: &mut RObject,
263    tokens: &[PathToken],
264    ctx: &mut MaterializationContext<'_>,
265) -> Result<bool> {
266    use RObject::*;
267
268    if tokens.is_empty() {
269        return materialize_vector(obj, ctx);
270    }
271
272    match &tokens[0] {
273        PathToken::Field(name) => match obj {
274            DataFrame(df) => match df.columns.get_mut(name.as_str()) {
275                Some(col) => materialize_tokens(col, &tokens[1..], ctx),
276                None => Ok(false),
277            },
278            S4Object(s4) => match s4.slots.get_mut(name.as_str()) {
279                Some(slot) => materialize_tokens(slot, &tokens[1..], ctx),
280                None => Ok(false),
281            },
282            S3Object(s3) => {
283                if name == "base" {
284                    materialize_tokens(&mut s3.base, &tokens[1..], ctx)
285                } else {
286                    Ok(false)
287                }
288            }
289            Closure {
290                formals,
291                body,
292                environment,
293            } => match name.as_str() {
294                "formals" => materialize_tokens(formals, &tokens[1..], ctx),
295                "body" => materialize_tokens(body, &tokens[1..], ctx),
296                "environment" => materialize_tokens(environment, &tokens[1..], ctx),
297                _ => Ok(false),
298            },
299            Environment {
300                enclosing,
301                frame,
302                hashtab,
303            } => match name.as_str() {
304                "enclosing" => materialize_tokens(enclosing, &tokens[1..], ctx),
305                "frame" => materialize_tokens(frame, &tokens[1..], ctx),
306                "hashtab" => materialize_tokens(hashtab, &tokens[1..], ctx),
307                _ => Ok(false),
308            },
309            Promise {
310                value,
311                expression,
312                environment,
313            } => match name.as_str() {
314                "value" => materialize_tokens(value, &tokens[1..], ctx),
315                "expression" => materialize_tokens(expression, &tokens[1..], ctx),
316                "environment" => materialize_tokens(environment, &tokens[1..], ctx),
317                _ => Ok(false),
318            },
319            Bytecode {
320                code,
321                constants,
322                expr,
323            } => match name.as_str() {
324                "code" => materialize_tokens(code, &tokens[1..], ctx),
325                "constants" => materialize_tokens(constants, &tokens[1..], ctx),
326                "expr" => materialize_tokens(expr, &tokens[1..], ctx),
327                _ => Ok(false),
328            },
329            Language { function, args } => match name.as_str() {
330                "function" => materialize_tokens(function, &tokens[1..], ctx),
331                "args" => materialize_pairlist_elements(args, &tokens[1..], ctx),
332                _ => Ok(false),
333            },
334            Pairlist(_) => Ok(false),
335            WithAttributes { object, .. } => materialize_tokens(object, tokens, ctx),
336            Shared(inner) => {
337                let mut inner = inner.write().unwrap();
338                materialize_tokens(&mut inner, tokens, ctx)
339            }
340            _ => Ok(false),
341        },
342        PathToken::Index(index) => match obj {
343            List(items) | Expression(items) => match items.get_mut(*index) {
344                Some(item) => materialize_tokens(item, &tokens[1..], ctx),
345                None => Ok(false),
346            },
347            Pairlist(elements) => materialize_pairlist_index(elements, *index, &tokens[1..], ctx),
348            _ => Ok(false),
349        },
350    }
351}
352
353fn materialize_pairlist_elements(
354    elements: &mut [crate::PairlistElement],
355    tokens: &[PathToken],
356    ctx: &mut MaterializationContext<'_>,
357) -> Result<bool> {
358    if tokens.is_empty() {
359        return Ok(false);
360    }
361    match &tokens[0] {
362        PathToken::Index(index) => materialize_pairlist_index(elements, *index, &tokens[1..], ctx),
363        _ => Ok(false),
364    }
365}
366
367fn materialize_pairlist_index(
368    elements: &mut [crate::PairlistElement],
369    index: usize,
370    tokens: &[PathToken],
371    ctx: &mut MaterializationContext<'_>,
372) -> Result<bool> {
373    let elem = match elements.get_mut(index) {
374        Some(elem) => elem,
375        None => return Ok(false),
376    };
377
378    if tokens.is_empty() {
379        return Ok(false);
380    }
381
382    match &tokens[0] {
383        PathToken::Field(name) => match name.as_str() {
384            "value" => materialize_tokens(&mut elem.value, &tokens[1..], ctx),
385            "tag_object" => match elem.tag_object.as_mut() {
386                Some(tag) => materialize_tokens(tag, &tokens[1..], ctx),
387                None => Ok(false),
388            },
389            _ => Ok(false),
390        },
391        _ => Ok(false),
392    }
393}
394
395fn materialize_vector(obj: &mut RObject, ctx: &mut MaterializationContext<'_>) -> Result<bool> {
396    use RObject::*;
397
398    match obj {
399        Integer(v) => {
400            ctx.materialize_integer_data(v)?;
401            Ok(true)
402        }
403        Real(v) => {
404            ctx.materialize_real_data(v)?;
405            Ok(true)
406        }
407        Logical(v) => {
408            ctx.materialize_logical_data(v)?;
409            Ok(true)
410        }
411        Raw(v) => {
412            ctx.materialize_raw_data(v)?;
413            Ok(true)
414        }
415        Complex(v) => {
416            ctx.materialize_complex_data(v)?;
417            Ok(true)
418        }
419        Character(_) => Err(Error::Unsupported(
420            "materialize character vectors not yet supported".to_string(),
421        )),
422        _ => Ok(false),
423    }
424}
425
426pub fn materialize_integer_vector(data: &[u8], span: LazyVector) -> Result<Vec<i32>> {
427    let mut ctx = MaterializationContext::new(data);
428    ctx.materialize_integer_vector(span)
429}
430
431pub fn materialize_real_vector(data: &[u8], span: LazyVector) -> Result<Vec<f64>> {
432    let mut ctx = MaterializationContext::new(data);
433    ctx.materialize_real_vector(span)
434}
435
436pub fn materialize_logical_vector(data: &[u8], span: LazyVector) -> Result<Vec<Logical>> {
437    let mut ctx = MaterializationContext::new(data);
438    ctx.materialize_logical_vector(span)
439}
440
441pub fn materialize_raw_vector(data: &[u8], span: LazyVector) -> Result<Vec<u8>> {
442    let mut ctx = MaterializationContext::new(data);
443    ctx.materialize_raw_vector(span)
444}
445
446pub fn materialize_complex_vector(data: &[u8], span: LazyVector) -> Result<Vec<Complex>> {
447    let mut ctx = MaterializationContext::new(data);
448    ctx.materialize_complex_vector(span)
449}
450
451pub fn materialize_integer_data(data: &[u8], vector: &mut VectorData<i32>) -> Result<()> {
452    let mut ctx = MaterializationContext::new(data);
453    ctx.materialize_integer_data(vector)
454}
455
456pub fn materialize_real_data(data: &[u8], vector: &mut VectorData<f64>) -> Result<()> {
457    let mut ctx = MaterializationContext::new(data);
458    ctx.materialize_real_data(vector)
459}
460
461pub fn materialize_logical_data(data: &[u8], vector: &mut VectorData<Logical>) -> Result<()> {
462    let mut ctx = MaterializationContext::new(data);
463    ctx.materialize_logical_data(vector)
464}
465
466pub fn materialize_raw_data(data: &[u8], vector: &mut VectorData<u8>) -> Result<()> {
467    let mut ctx = MaterializationContext::new(data);
468    ctx.materialize_raw_data(vector)
469}
470
471pub fn materialize_complex_data(data: &[u8], vector: &mut VectorData<Complex>) -> Result<()> {
472    let mut ctx = MaterializationContext::new(data);
473    ctx.materialize_complex_data(vector)
474}