1use byteorder::{BigEndian, ReadBytesExt};
2use std::io::Cursor;
3
4use crate::{Complex, Error, LazyVector, Logical, RObject, Result, VectorData};
5
6#[derive(Debug, PartialEq)]
7enum PathToken {
8 Field(String),
9 Index(usize),
10}
11
12pub struct MaterializationContext<'a> {
13 data: &'a [u8],
14 remaining_budget: Option<usize>,
15}
16
17impl<'a> MaterializationContext<'a> {
18 pub fn new(data: &'a [u8]) -> Self {
19 Self {
20 data,
21 remaining_budget: None,
22 }
23 }
24
25 pub fn with_budget(data: &'a [u8], budget_bytes: usize) -> Self {
26 Self {
27 data,
28 remaining_budget: Some(budget_bytes),
29 }
30 }
31
32 pub fn remaining_budget(&self) -> Option<usize> {
33 self.remaining_budget
34 }
35
36 fn check_budget(&mut self, bytes_needed: usize) -> Result<()> {
37 if let Some(remaining) = &mut self.remaining_budget {
38 if bytes_needed > *remaining {
39 return Err(Error::MemoryBudgetExceeded {
40 needed: bytes_needed,
41 available: *remaining,
42 });
43 }
44 *remaining -= bytes_needed;
45 }
46 Ok(())
47 }
48
49 pub fn materialize_integer_vector(&mut self, span: LazyVector) -> Result<Vec<i32>> {
50 validate_byte_len(span, std::mem::size_of::<i32>())?;
51 self.check_budget(span.byte_len as usize)?;
52 let mut cursor = Cursor::new(slice_for_span(self.data, span)?);
53 let mut vec = Vec::with_capacity(span.length);
54 for _ in 0..span.length {
55 vec.push(cursor.read_i32::<BigEndian>()?);
56 }
57 Ok(vec)
58 }
59
60 pub fn materialize_real_vector(&mut self, span: LazyVector) -> Result<Vec<f64>> {
61 validate_byte_len(span, std::mem::size_of::<f64>())?;
62 self.check_budget(span.byte_len as usize)?;
63 let mut cursor = Cursor::new(slice_for_span(self.data, span)?);
64 let mut vec = Vec::with_capacity(span.length);
65 for _ in 0..span.length {
66 vec.push(cursor.read_f64::<BigEndian>()?);
67 }
68 Ok(vec)
69 }
70
71 pub fn materialize_logical_vector(&mut self, span: LazyVector) -> Result<Vec<Logical>> {
72 validate_byte_len(span, std::mem::size_of::<i32>())?;
73 self.check_budget(span.byte_len as usize)?;
74 let mut cursor = Cursor::new(slice_for_span(self.data, span)?);
75 let mut vec = Vec::with_capacity(span.length);
76 for _ in 0..span.length {
77 let val = cursor.read_i32::<BigEndian>()?;
78 let logical = match val {
79 0 => Logical::False,
80 1 => Logical::True,
81 i32::MIN => Logical::Na,
82 _ => Logical::Na,
83 };
84 vec.push(logical);
85 }
86 Ok(vec)
87 }
88
89 pub fn materialize_raw_vector(&mut self, span: LazyVector) -> Result<Vec<u8>> {
90 validate_byte_len(span, 1)?;
91 self.check_budget(span.byte_len as usize)?;
92 let slice = slice_for_span(self.data, span)?;
93 Ok(slice.to_vec())
94 }
95
96 pub fn materialize_complex_vector(&mut self, span: LazyVector) -> Result<Vec<Complex>> {
97 validate_byte_len(span, std::mem::size_of::<Complex>())?;
98 self.check_budget(span.byte_len as usize)?;
99 let mut cursor = Cursor::new(slice_for_span(self.data, span)?);
100 let mut vec = Vec::with_capacity(span.length);
101 for _ in 0..span.length {
102 let real = cursor.read_f64::<BigEndian>()?;
103 let imaginary = cursor.read_f64::<BigEndian>()?;
104 vec.push(Complex { real, imaginary });
105 }
106 Ok(vec)
107 }
108
109 pub fn materialize_integer_data(&mut self, vector: &mut VectorData<i32>) -> Result<()> {
110 if let VectorData::Lazy(span) = *vector {
111 *vector = VectorData::Owned(self.materialize_integer_vector(span)?);
112 }
113 Ok(())
114 }
115
116 pub fn materialize_real_data(&mut self, vector: &mut VectorData<f64>) -> Result<()> {
117 if let VectorData::Lazy(span) = *vector {
118 *vector = VectorData::Owned(self.materialize_real_vector(span)?);
119 }
120 Ok(())
121 }
122
123 pub fn materialize_logical_data(&mut self, vector: &mut VectorData<Logical>) -> Result<()> {
124 if let VectorData::Lazy(span) = *vector {
125 *vector = VectorData::Owned(self.materialize_logical_vector(span)?);
126 }
127 Ok(())
128 }
129
130 pub fn materialize_raw_data(&mut self, vector: &mut VectorData<u8>) -> Result<()> {
131 if let VectorData::Lazy(span) = *vector {
132 *vector = VectorData::Owned(self.materialize_raw_vector(span)?);
133 }
134 Ok(())
135 }
136
137 pub fn materialize_complex_data(&mut self, vector: &mut VectorData<Complex>) -> Result<()> {
138 if let VectorData::Lazy(span) = *vector {
139 *vector = VectorData::Owned(self.materialize_complex_vector(span)?);
140 }
141 Ok(())
142 }
143}
144
145pub fn materialize_path(
146 obj: &mut RObject,
147 path: &str,
148 ctx: &mut MaterializationContext<'_>,
149) -> Result<bool> {
150 let tokens = parse_path_tokens(path)?;
151 materialize_tokens(obj, &tokens, ctx)
152}
153
154pub fn materialize_paths_with_budget(
155 obj: &mut RObject,
156 data: &[u8],
157 paths: &[&str],
158 budget_bytes: Option<usize>,
159) -> Result<Vec<String>> {
160 let mut ctx = match budget_bytes {
161 Some(budget) => MaterializationContext::with_budget(data, budget),
162 None => MaterializationContext::new(data),
163 };
164
165 let mut missing = Vec::new();
166 for path in paths {
167 let changed = materialize_path(obj, path, &mut ctx)?;
168 if !changed {
169 missing.push((*path).to_string());
170 }
171 }
172
173 Ok(missing)
174}
175
176fn slice_for_span(data: &[u8], span: LazyVector) -> Result<&[u8]> {
177 let start = span.offset as usize;
178 let end = span
179 .offset
180 .checked_add(span.byte_len)
181 .ok_or_else(|| Error::InvalidFormat("lazy span overflow".to_string()))?
182 as usize;
183
184 if start > data.len() {
185 return Err(Error::TruncatedLazyPayload {
186 expected: span.byte_len,
187 actual: 0,
188 });
189 }
190
191 let available = data.len() - start;
192 if end > data.len() {
193 return Err(Error::TruncatedLazyPayload {
194 expected: span.byte_len,
195 actual: available as u64,
196 });
197 }
198
199 Ok(&data[start..end])
200}
201
202fn validate_byte_len(span: LazyVector, elem_size: usize) -> Result<()> {
203 let expected = span
204 .length
205 .checked_mul(elem_size)
206 .ok_or_else(|| Error::InvalidFormat("lazy span length overflow".to_string()))?;
207 if span.byte_len != expected as u64 {
208 return Err(Error::InvalidFormat(format!(
209 "lazy span byte_len mismatch: expected {}, got {}",
210 expected, span.byte_len
211 )));
212 }
213 Ok(())
214}
215
216fn parse_path_tokens(path: &str) -> Result<Vec<PathToken>> {
217 let mut tokens = Vec::new();
218 let bytes = path.as_bytes();
219 let mut i = 0;
220
221 while i < bytes.len() {
222 match bytes[i] {
223 b'.' => {
224 i += 1;
225 }
226 b'[' => {
227 i += 1;
228 let start = i;
229 while i < bytes.len() && bytes[i].is_ascii_digit() {
230 i += 1;
231 }
232 if start == i || i >= bytes.len() || bytes[i] != b']' {
233 return Err(Error::InvalidFormat(format!(
234 "invalid path index in '{}'",
235 path
236 )));
237 }
238 let index: usize = path[start..i]
239 .parse()
240 .map_err(|_| Error::InvalidFormat(format!("invalid index in '{}'", path)))?;
241 tokens.push(PathToken::Index(index));
242 i += 1;
243 }
244 _ => {
245 let start = i;
246 while i < bytes.len() && bytes[i] != b'.' && bytes[i] != b'[' {
247 i += 1;
248 }
249 let field = path[start..i].to_string();
250 if field.is_empty() {
251 return Err(Error::InvalidFormat(format!("invalid path '{}'", path)));
252 }
253 tokens.push(PathToken::Field(field));
254 }
255 }
256 }
257
258 Ok(tokens)
259}
260
261fn materialize_tokens(
262 obj: &mut RObject,
263 tokens: &[PathToken],
264 ctx: &mut MaterializationContext<'_>,
265) -> Result<bool> {
266 use RObject::*;
267
268 if tokens.is_empty() {
269 return materialize_vector(obj, ctx);
270 }
271
272 match &tokens[0] {
273 PathToken::Field(name) => match obj {
274 DataFrame(df) => match df.columns.get_mut(name.as_str()) {
275 Some(col) => materialize_tokens(col, &tokens[1..], ctx),
276 None => Ok(false),
277 },
278 S4Object(s4) => match s4.slots.get_mut(name.as_str()) {
279 Some(slot) => materialize_tokens(slot, &tokens[1..], ctx),
280 None => Ok(false),
281 },
282 S3Object(s3) => {
283 if name == "base" {
284 materialize_tokens(&mut s3.base, &tokens[1..], ctx)
285 } else {
286 Ok(false)
287 }
288 }
289 Closure {
290 formals,
291 body,
292 environment,
293 } => match name.as_str() {
294 "formals" => materialize_tokens(formals, &tokens[1..], ctx),
295 "body" => materialize_tokens(body, &tokens[1..], ctx),
296 "environment" => materialize_tokens(environment, &tokens[1..], ctx),
297 _ => Ok(false),
298 },
299 Environment {
300 enclosing,
301 frame,
302 hashtab,
303 } => match name.as_str() {
304 "enclosing" => materialize_tokens(enclosing, &tokens[1..], ctx),
305 "frame" => materialize_tokens(frame, &tokens[1..], ctx),
306 "hashtab" => materialize_tokens(hashtab, &tokens[1..], ctx),
307 _ => Ok(false),
308 },
309 Promise {
310 value,
311 expression,
312 environment,
313 } => match name.as_str() {
314 "value" => materialize_tokens(value, &tokens[1..], ctx),
315 "expression" => materialize_tokens(expression, &tokens[1..], ctx),
316 "environment" => materialize_tokens(environment, &tokens[1..], ctx),
317 _ => Ok(false),
318 },
319 Bytecode {
320 code,
321 constants,
322 expr,
323 } => match name.as_str() {
324 "code" => materialize_tokens(code, &tokens[1..], ctx),
325 "constants" => materialize_tokens(constants, &tokens[1..], ctx),
326 "expr" => materialize_tokens(expr, &tokens[1..], ctx),
327 _ => Ok(false),
328 },
329 Language { function, args } => match name.as_str() {
330 "function" => materialize_tokens(function, &tokens[1..], ctx),
331 "args" => materialize_pairlist_elements(args, &tokens[1..], ctx),
332 _ => Ok(false),
333 },
334 Pairlist(_) => Ok(false),
335 WithAttributes { object, .. } => materialize_tokens(object, tokens, ctx),
336 Shared(inner) => {
337 let mut inner = inner.write().unwrap();
338 materialize_tokens(&mut inner, tokens, ctx)
339 }
340 _ => Ok(false),
341 },
342 PathToken::Index(index) => match obj {
343 List(items) | Expression(items) => match items.get_mut(*index) {
344 Some(item) => materialize_tokens(item, &tokens[1..], ctx),
345 None => Ok(false),
346 },
347 Pairlist(elements) => materialize_pairlist_index(elements, *index, &tokens[1..], ctx),
348 _ => Ok(false),
349 },
350 }
351}
352
353fn materialize_pairlist_elements(
354 elements: &mut [crate::PairlistElement],
355 tokens: &[PathToken],
356 ctx: &mut MaterializationContext<'_>,
357) -> Result<bool> {
358 if tokens.is_empty() {
359 return Ok(false);
360 }
361 match &tokens[0] {
362 PathToken::Index(index) => materialize_pairlist_index(elements, *index, &tokens[1..], ctx),
363 _ => Ok(false),
364 }
365}
366
367fn materialize_pairlist_index(
368 elements: &mut [crate::PairlistElement],
369 index: usize,
370 tokens: &[PathToken],
371 ctx: &mut MaterializationContext<'_>,
372) -> Result<bool> {
373 let elem = match elements.get_mut(index) {
374 Some(elem) => elem,
375 None => return Ok(false),
376 };
377
378 if tokens.is_empty() {
379 return Ok(false);
380 }
381
382 match &tokens[0] {
383 PathToken::Field(name) => match name.as_str() {
384 "value" => materialize_tokens(&mut elem.value, &tokens[1..], ctx),
385 "tag_object" => match elem.tag_object.as_mut() {
386 Some(tag) => materialize_tokens(tag, &tokens[1..], ctx),
387 None => Ok(false),
388 },
389 _ => Ok(false),
390 },
391 _ => Ok(false),
392 }
393}
394
395fn materialize_vector(obj: &mut RObject, ctx: &mut MaterializationContext<'_>) -> Result<bool> {
396 use RObject::*;
397
398 match obj {
399 Integer(v) => {
400 ctx.materialize_integer_data(v)?;
401 Ok(true)
402 }
403 Real(v) => {
404 ctx.materialize_real_data(v)?;
405 Ok(true)
406 }
407 Logical(v) => {
408 ctx.materialize_logical_data(v)?;
409 Ok(true)
410 }
411 Raw(v) => {
412 ctx.materialize_raw_data(v)?;
413 Ok(true)
414 }
415 Complex(v) => {
416 ctx.materialize_complex_data(v)?;
417 Ok(true)
418 }
419 Character(_) => Err(Error::Unsupported(
420 "materialize character vectors not yet supported".to_string(),
421 )),
422 _ => Ok(false),
423 }
424}
425
426pub fn materialize_integer_vector(data: &[u8], span: LazyVector) -> Result<Vec<i32>> {
427 let mut ctx = MaterializationContext::new(data);
428 ctx.materialize_integer_vector(span)
429}
430
431pub fn materialize_real_vector(data: &[u8], span: LazyVector) -> Result<Vec<f64>> {
432 let mut ctx = MaterializationContext::new(data);
433 ctx.materialize_real_vector(span)
434}
435
436pub fn materialize_logical_vector(data: &[u8], span: LazyVector) -> Result<Vec<Logical>> {
437 let mut ctx = MaterializationContext::new(data);
438 ctx.materialize_logical_vector(span)
439}
440
441pub fn materialize_raw_vector(data: &[u8], span: LazyVector) -> Result<Vec<u8>> {
442 let mut ctx = MaterializationContext::new(data);
443 ctx.materialize_raw_vector(span)
444}
445
446pub fn materialize_complex_vector(data: &[u8], span: LazyVector) -> Result<Vec<Complex>> {
447 let mut ctx = MaterializationContext::new(data);
448 ctx.materialize_complex_vector(span)
449}
450
451pub fn materialize_integer_data(data: &[u8], vector: &mut VectorData<i32>) -> Result<()> {
452 let mut ctx = MaterializationContext::new(data);
453 ctx.materialize_integer_data(vector)
454}
455
456pub fn materialize_real_data(data: &[u8], vector: &mut VectorData<f64>) -> Result<()> {
457 let mut ctx = MaterializationContext::new(data);
458 ctx.materialize_real_data(vector)
459}
460
461pub fn materialize_logical_data(data: &[u8], vector: &mut VectorData<Logical>) -> Result<()> {
462 let mut ctx = MaterializationContext::new(data);
463 ctx.materialize_logical_data(vector)
464}
465
466pub fn materialize_raw_data(data: &[u8], vector: &mut VectorData<u8>) -> Result<()> {
467 let mut ctx = MaterializationContext::new(data);
468 ctx.materialize_raw_data(vector)
469}
470
471pub fn materialize_complex_data(data: &[u8], vector: &mut VectorData<Complex>) -> Result<()> {
472 let mut ctx = MaterializationContext::new(data);
473 ctx.materialize_complex_data(vector)
474}