1use crate::functions::{Bytecode, ExecutionContext, UserFunction};
2use crate::gc_roots::InterpretContext;
3use crate::instr::Instr;
4#[cfg(feature = "native-accel")]
5use runmat_accelerate::fusion_exec::{
6 execute_centered_gram, execute_elementwise, execute_explained_variance,
7 execute_image_normalize, execute_matmul_epilogue, execute_power_step_normalize,
8 execute_reduction, FusionExecutionRequest,
9};
10#[cfg(feature = "native-accel")]
11use runmat_accelerate::{
12 activate_fusion_plan, deactivate_fusion_plan, fusion_residency, prepare_fusion_plan,
13 set_current_pc,
14};
15#[cfg(feature = "native-accel")]
16use runmat_accelerate::{
17 active_group_plan_clone, value_is_all_keyword, FusionKind, ReductionAxes, ShapeInfo,
18 ValueOrigin, VarKind,
19};
20use runmat_builtins::{Type, Value};
21use runmat_runtime::{
22 builtins::common::tensor,
23 builtins::stats::random::stochastic_evolution::stochastic_evolution_host,
24 call_builtin, gather_if_needed,
25 workspace::{self as runtime_workspace, WorkspaceResolver},
26};
27use std::cell::{Cell, RefCell};
28use std::collections::{HashMap, HashSet};
29use std::convert::TryInto;
30use std::sync::Once;
31#[cfg(feature = "native-accel")]
32use std::sync::OnceLock;
33
34thread_local! {
35 static CURRENT_PC: Cell<usize> = const { Cell::new(0) };
36}
37
38#[inline]
39fn set_vm_pc(pc: usize) {
40 CURRENT_PC.with(|cell| cell.set(pc));
41}
42
43#[inline]
44fn current_pc() -> usize {
45 CURRENT_PC.with(|cell| cell.get())
46}
47
48#[cfg(feature = "native-accel")]
49struct FusionPlanGuard;
50
51#[cfg(feature = "native-accel")]
52impl Drop for FusionPlanGuard {
53 fn drop(&mut self) {
54 deactivate_fusion_plan();
55 }
56}
57
58struct InterpreterTiming {
59 enabled: bool,
60 host_span_start: Option<(std::time::Instant, usize)>,
61 host_span_last_pc: Option<usize>,
62 host_span_instrs: u64,
63 seq: u64,
64}
65
66impl InterpreterTiming {
67 fn new() -> Self {
68 let enabled = std::env::var("RUNMAT_INTERPRETER_TIMING")
69 .map(|v| v == "1" || v.eq_ignore_ascii_case("true") || v.eq_ignore_ascii_case("yes"))
70 .unwrap_or(false);
71 Self {
72 enabled,
73 host_span_start: None,
74 host_span_last_pc: None,
75 host_span_instrs: 0,
76 seq: 0,
77 }
78 }
79
80 fn note_host_instr(&mut self, pc: usize) {
81 if !self.enabled {
82 return;
83 }
84 if self.host_span_start.is_none() {
85 self.host_span_start = Some((std::time::Instant::now(), pc));
86 self.host_span_instrs = 0;
87 }
88 self.host_span_instrs += 1;
89 self.host_span_last_pc = Some(pc);
90 }
91
92 fn flush_host_span(&mut self, reason: &str, detail: Option<&str>) {
93 if !self.enabled {
94 return;
95 }
96 let Some((start, start_pc)) = self.host_span_start.take() else {
97 return;
98 };
99 let duration = start.elapsed();
100 let end_pc = self.host_span_last_pc.unwrap_or(start_pc);
101 let instrs = self.host_span_instrs.max(1);
102 if let Some(extra) = detail {
103 log::debug!(
104 "interpreter_host_span seq={} reason={} detail={} pc_span=[{}..{}] instrs={} duration_ns={}",
105 self.seq,
106 reason,
107 extra,
108 start_pc,
109 end_pc,
110 instrs,
111 duration.as_nanos()
112 );
113 } else {
114 log::debug!(
115 "interpreter_host_span seq={} reason={} pc_span=[{}..{}] instrs={} duration_ns={}",
116 self.seq,
117 reason,
118 start_pc,
119 end_pc,
120 instrs,
121 duration.as_nanos()
122 );
123 }
124 self.seq += 1;
125 self.host_span_last_pc = None;
126 self.host_span_instrs = 0;
127 }
128}
129
130impl Drop for InterpreterTiming {
131 fn drop(&mut self) {
132 self.flush_host_span("drop", None);
133 }
134}
135
136#[derive(Clone, Copy)]
137enum AutoBinaryOp {
138 Elementwise,
139 MatMul,
140}
141
142#[derive(Clone, Copy)]
143enum AutoUnaryOp {
144 Transpose,
145}
146
147#[cfg(feature = "native-accel")]
148fn accel_promote_binary(op: AutoBinaryOp, a: &Value, b: &Value) -> Result<(Value, Value), String> {
149 use runmat_accelerate::{promote_binary, BinaryOp};
150 let mapped = match op {
151 AutoBinaryOp::Elementwise => BinaryOp::Elementwise,
152 AutoBinaryOp::MatMul => BinaryOp::MatMul,
153 };
154 promote_binary(mapped, a, b).map_err(|e| e.to_string())
155}
156
157#[cfg(not(feature = "native-accel"))]
158fn accel_promote_binary(_op: AutoBinaryOp, a: &Value, b: &Value) -> Result<(Value, Value), String> {
159 Ok((a.clone(), b.clone()))
160}
161
162#[cfg(feature = "native-accel")]
163fn accel_promote_unary(op: AutoUnaryOp, value: &Value) -> Result<Value, String> {
164 use runmat_accelerate::{promote_unary, UnaryOp};
165 let mapped = match op {
166 AutoUnaryOp::Transpose => UnaryOp::Transpose,
167 };
168 promote_unary(mapped, value).map_err(|e| e.to_string())
169}
170
171#[cfg(not(feature = "native-accel"))]
172fn accel_promote_unary(_op: AutoUnaryOp, value: &Value) -> Result<Value, String> {
173 Ok(value.clone())
174}
175
176#[cfg(feature = "native-accel")]
177fn accel_prepare_args(name: &str, args: &[Value]) -> Result<Vec<Value>, String> {
178 runmat_accelerate::prepare_builtin_args(name, args).map_err(|e| e.to_string())
179}
180
181#[cfg(not(feature = "native-accel"))]
182fn accel_prepare_args(_name: &str, args: &[Value]) -> Result<Vec<Value>, String> {
183 Ok(args.to_vec())
184}
185
186fn call_builtin_auto(name: &str, args: &[Value]) -> Result<Value, String> {
187 let prepared = accel_prepare_args(name, args)?;
188 runmat_runtime::call_builtin(name, &prepared)
189}
190
191#[cfg(feature = "native-accel")]
192#[inline]
193fn fusion_debug_enabled() -> bool {
194 static FLAG: OnceLock<bool> = OnceLock::new();
195 *FLAG.get_or_init(|| match std::env::var("RUNMAT_DEBUG_FUSION") {
196 Ok(v) => v == "1" || v.eq_ignore_ascii_case("true") || v.eq_ignore_ascii_case("yes"),
197 Err(_) => false,
198 })
199}
200
201#[cfg(feature = "native-accel")]
202fn log_fusion_span_window(
203 plan: &runmat_accelerate::FusionGroupPlan,
204 bytecode: &Bytecode,
205 pc: usize,
206) {
207 if !fusion_debug_enabled() || !log::log_enabled!(log::Level::Debug) {
208 return;
209 }
210 if bytecode.instructions.is_empty() {
211 return;
212 }
213 let window = 3usize;
214 let span = plan.group.span.clone();
215 let total = bytecode.instructions.len();
216 let start = span.start.saturating_sub(window);
217 let mut end = span.end + window;
218 if end >= total {
219 end = total.saturating_sub(1);
220 }
221 if end < span.end {
222 end = span.end;
223 }
224 let mut ops: Vec<String> = Vec::new();
225 for idx in start..=end {
226 let instr = &bytecode.instructions[idx];
227 let mut tags: Vec<&'static str> = Vec::new();
228 if idx == pc {
229 tags.push("pc");
230 }
231 if idx == span.start {
232 tags.push("start");
233 }
234 if idx == span.end {
235 tags.push("end");
236 }
237 let tag_str = if tags.is_empty() {
238 String::new()
239 } else {
240 format!("<{}>", tags.join(","))
241 };
242 ops.push(format!("{}{} {:?}", idx, tag_str, instr));
243 }
244 log::debug!(
245 "fusion plan {} span window [{}..{}]: {}",
246 plan.index,
247 start,
248 end,
249 ops.join(" | ")
250 );
251}
252
253const ERROR_NAMESPACE: &str = "MATLAB";
255
256#[inline]
257fn mex(id: &str, msg: &str) -> String {
258 let suffix = match id.find(':') {
261 Some(pos) => &id[pos + 1..],
262 None => id,
263 };
264 let ident = format!("{ERROR_NAMESPACE}:{suffix}");
265 let pc = current_pc();
266 format!("{ident} (pc={pc}): {msg}")
267}
268
269#[derive(Clone)]
270enum SliceSelector {
271 Colon,
272 Scalar(usize),
273 Indices(Vec<usize>),
274}
275
276#[derive(Debug, Clone)]
277struct SlicePlan {
278 indices: Vec<u32>,
279 output_shape: Vec<usize>,
280 selection_lengths: Vec<usize>,
281 dims: usize,
282}
283
284fn cartesian_product<F: FnMut(&[usize])>(lists: &[Vec<usize>], mut f: F) {
285 let dims = lists.len();
286 if dims == 0 {
287 return;
288 }
289 let mut idx = vec![0usize; dims];
290 loop {
291 let current: Vec<usize> = (0..dims).map(|d| lists[d][idx[d]]).collect();
292 f(¤t);
293 let mut d = 0usize;
294 while d < dims {
295 idx[d] += 1;
296 if idx[d] < lists[d].len() {
297 break;
298 }
299 idx[d] = 0;
300 d += 1;
301 }
302 if d == dims {
303 break;
304 }
305 }
306}
307
308fn cartesian_positions<F: FnMut(&[usize])>(lengths: &[usize], mut f: F) {
309 if lengths.is_empty() || lengths.contains(&0) {
310 return;
311 }
312 let dims = lengths.len();
313 let mut idx = vec![0usize; dims];
314 loop {
315 f(&idx);
316 let mut d = 0usize;
317 while d < dims {
318 idx[d] += 1;
319 if idx[d] < lengths[d] {
320 break;
321 }
322 idx[d] = 0;
323 d += 1;
324 }
325 if d == dims {
326 break;
327 }
328 }
329}
330
331fn total_len_from_shape(shape: &[usize]) -> usize {
332 if shape.is_empty() {
333 1
334 } else {
335 shape.iter().copied().product()
336 }
337}
338
339fn indices_from_value_linear(value: &Value, total_len: usize) -> Result<Vec<usize>, String> {
340 match value {
341 Value::Num(n) => {
342 let idx = *n as isize;
343 if idx < 1 || (idx as usize) > total_len {
344 return Err(mex("IndexOutOfBounds", "Index out of bounds"));
345 }
346 Ok(vec![idx as usize])
347 }
348 Value::Int(int_val) => {
349 let idx = int_val.to_i64();
350 if idx < 1 || (idx as usize) > total_len {
351 return Err(mex("IndexOutOfBounds", "Index out of bounds"));
352 }
353 Ok(vec![idx as usize])
354 }
355 Value::Tensor(idx_t) => {
356 let len = idx_t.shape.iter().product::<usize>();
357 if len == total_len {
358 let mut indices = Vec::new();
359 for (i, &val) in idx_t.data.iter().enumerate() {
360 if val != 0.0 {
361 indices.push(i + 1);
362 }
363 }
364 Ok(indices)
365 } else {
366 let mut indices = Vec::with_capacity(len);
367 for &val in &idx_t.data {
368 let idx = val as isize;
369 if idx < 1 || (idx as usize) > total_len {
370 return Err(mex("IndexOutOfBounds", "Index out of bounds"));
371 }
372 indices.push(idx as usize);
373 }
374 Ok(indices)
375 }
376 }
377 Value::LogicalArray(la) => {
378 if la.data.len() != total_len {
379 return Err(mex(
380 "IndexShape",
381 "Logical mask length mismatch for linear indexing",
382 ));
383 }
384 let mut indices = Vec::new();
385 for (i, &b) in la.data.iter().enumerate() {
386 if b != 0 {
387 indices.push(i + 1);
388 }
389 }
390 Ok(indices)
391 }
392 _ => Err(mex(
393 "UnsupportedIndexType",
394 "Unsupported index type for linear indexing",
395 )),
396 }
397}
398
399fn selector_from_value_dim(value: &Value, dim_len: usize) -> Result<SliceSelector, String> {
400 match value {
401 Value::Num(n) => {
402 let idx = *n as isize;
403 if idx < 1 || (idx as usize) > dim_len {
404 return Err(mex("IndexOutOfBounds", "Index out of bounds"));
405 }
406 Ok(SliceSelector::Scalar(idx as usize))
407 }
408 Value::Int(int_val) => {
409 let idx = int_val.to_i64();
410 if idx < 1 || (idx as usize) > dim_len {
411 return Err(mex("IndexOutOfBounds", "Index out of bounds"));
412 }
413 Ok(SliceSelector::Scalar(idx as usize))
414 }
415 Value::Tensor(idx_t) => {
416 let len = idx_t.shape.iter().product::<usize>();
417 if len == dim_len {
418 let mut indices = Vec::new();
419 for (i, &val) in idx_t.data.iter().enumerate() {
420 if val != 0.0 {
421 indices.push(i + 1);
422 }
423 }
424 Ok(SliceSelector::Indices(indices))
425 } else {
426 let mut indices = Vec::with_capacity(len);
427 for &val in &idx_t.data {
428 let idx = val as isize;
429 if idx < 1 || (idx as usize) > dim_len {
430 return Err(mex("IndexOutOfBounds", "Index out of bounds"));
431 }
432 indices.push(idx as usize);
433 }
434 Ok(SliceSelector::Indices(indices))
435 }
436 }
437 Value::LogicalArray(la) => {
438 if la.data.len() != dim_len {
439 return Err(mex(
440 "IndexShape",
441 "Logical mask length mismatch for dimension",
442 ));
443 }
444 let mut indices = Vec::new();
445 for (i, &b) in la.data.iter().enumerate() {
446 if b != 0 {
447 indices.push(i + 1);
448 }
449 }
450 Ok(SliceSelector::Indices(indices))
451 }
452 _ => Err(mex(
453 "UnsupportedIndexType",
454 "Unsupported index type for slicing",
455 )),
456 }
457}
458
459fn build_slice_selectors(
460 dims: usize,
461 colon_mask: u32,
462 end_mask: u32,
463 numeric: &[Value],
464 base_shape: &[usize],
465) -> Result<Vec<SliceSelector>, String> {
466 let mut selectors = Vec::with_capacity(dims);
467 if dims == 1 {
468 let total_len = total_len_from_shape(base_shape);
469 if (colon_mask & 1u32) != 0 {
470 selectors.push(SliceSelector::Indices((1..=total_len).collect()));
471 return Ok(selectors);
472 }
473 if (end_mask & 1u32) != 0 {
474 selectors.push(SliceSelector::Scalar(total_len.max(1)));
475 return Ok(selectors);
476 }
477 let value = numeric.first().ok_or_else(|| {
478 mex(
479 "MissingNumericIndex",
480 "missing numeric index for linear slice",
481 )
482 })?;
483 let idxs = indices_from_value_linear(value, total_len)?;
484 selectors.push(SliceSelector::Indices(idxs));
485 return Ok(selectors);
486 }
487
488 let mut numeric_iter = 0usize;
489 for d in 0..dims {
490 let is_colon = (colon_mask & (1u32 << d)) != 0;
491 if is_colon {
492 selectors.push(SliceSelector::Colon);
493 continue;
494 }
495 let dim_len = base_shape.get(d).copied().unwrap_or(1);
496 let is_end = (end_mask & (1u32 << d)) != 0;
497 if is_end {
498 selectors.push(SliceSelector::Scalar(dim_len));
499 continue;
500 }
501 let value = numeric
502 .get(numeric_iter)
503 .ok_or_else(|| mex("MissingNumericIndex", "missing numeric index for slice"))?;
504 numeric_iter += 1;
505 selectors.push(selector_from_value_dim(value, dim_len)?);
506 }
507 Ok(selectors)
508}
509
510fn build_slice_plan(
511 selectors: &[SliceSelector],
512 dims: usize,
513 base_shape: &[usize],
514) -> Result<SlicePlan, String> {
515 let total_len = total_len_from_shape(base_shape);
516 if dims == 1 {
517 let list = selectors
518 .first()
519 .cloned()
520 .unwrap_or(SliceSelector::Indices(Vec::new()));
521 let indices = match list {
522 SliceSelector::Colon => (1..=total_len).collect::<Vec<usize>>(),
523 SliceSelector::Scalar(i) => vec![i],
524 SliceSelector::Indices(v) => v,
525 };
526 if indices.iter().any(|&i| i == 0 || i > total_len) {
527 return Err(mex("IndexOutOfBounds", "Index out of bounds"));
528 }
529 let zero_based: Vec<u32> = indices.iter().map(|&i| (i - 1) as u32).collect();
530 let count = zero_based.len();
531 let shape = if count <= 1 {
532 vec![1, 1]
533 } else {
534 vec![count, 1]
535 };
536 return Ok(SlicePlan {
537 indices: zero_based,
538 output_shape: shape,
539 selection_lengths: vec![count],
540 dims,
541 });
542 }
543
544 let mut selection_lengths = Vec::with_capacity(dims);
545 let mut per_dim_lists: Vec<Vec<usize>> = Vec::with_capacity(dims);
546 for (d, sel) in selectors.iter().enumerate().take(dims) {
547 let dim_len = base_shape.get(d).copied().unwrap_or(1);
548 let idxs = match sel {
549 SliceSelector::Colon => (1..=dim_len).collect::<Vec<usize>>(),
550 SliceSelector::Scalar(i) => vec![*i],
551 SliceSelector::Indices(v) => v.clone(),
552 };
553 if idxs.iter().any(|&i| i == 0 || i > dim_len) {
554 return Err(mex("IndexOutOfBounds", "Index out of bounds"));
555 }
556 selection_lengths.push(idxs.len());
557 per_dim_lists.push(idxs);
558 }
559
560 if selection_lengths.contains(&0) {
561 let mut out_shape = selection_lengths.clone();
562 if dims == 2 {
563 if selection_lengths[0] > 1 && selection_lengths[1] == 1 {
564 out_shape = vec![selection_lengths[0], 1];
565 } else if selection_lengths[0] == 1 && selection_lengths[1] > 1 {
566 out_shape = vec![1, selection_lengths[1]];
567 }
568 }
569 return Ok(SlicePlan {
570 indices: Vec::new(),
571 output_shape: out_shape,
572 selection_lengths,
573 dims,
574 });
575 }
576
577 let mut base_norm = base_shape.to_vec();
578 if base_norm.len() < dims {
579 base_norm.resize(dims, 1);
580 }
581
582 let mut strides = vec![1usize; dims];
583 for d in 1..dims {
584 strides[d] = strides[d - 1] * base_norm[d - 1].max(1);
585 }
586
587 let mut indices = Vec::new();
588 cartesian_product(&per_dim_lists, |multi| {
589 let mut lin = 0usize;
590 for d in 0..dims {
591 let idx = multi[d] - 1;
592 lin += idx * strides[d];
593 }
594 indices.push(lin as u32);
595 });
596
597 let mut out_shape = selection_lengths.clone();
598 if dims == 2 {
599 if selection_lengths[0] > 1 && selection_lengths[1] == 1 {
600 out_shape = vec![selection_lengths[0], 1];
601 } else if selection_lengths[0] == 1 && selection_lengths[1] > 1 {
602 out_shape = vec![1, selection_lengths[1]];
603 }
604 }
605 let total_out: usize = selection_lengths.iter().product();
606 if total_out == 1 {
607 out_shape = vec![1, 1];
608 }
609
610 Ok(SlicePlan {
611 indices,
612 output_shape: out_shape,
613 selection_lengths,
614 dims,
615 })
616}
617
618fn gather_string_slice(
619 sa: &runmat_builtins::StringArray,
620 plan: &SlicePlan,
621) -> Result<Value, String> {
622 if plan.indices.is_empty() {
623 let empty = runmat_builtins::StringArray::new(Vec::new(), plan.output_shape.clone())
624 .map_err(|e| format!("Slice error: {e}"))?;
625 return Ok(Value::StringArray(empty));
626 }
627 if plan.indices.len() == 1 {
628 let lin = plan.indices[0] as usize;
629 let value = sa
630 .data
631 .get(lin)
632 .cloned()
633 .ok_or_else(|| "Slice error: string index out of bounds".to_string())?;
634 return Ok(Value::String(value));
635 }
636 let mut out = Vec::with_capacity(plan.indices.len());
637 for &lin in &plan.indices {
638 let idx = lin as usize;
639 let value = sa
640 .data
641 .get(idx)
642 .cloned()
643 .ok_or_else(|| "Slice error: string index out of bounds".to_string())?;
644 out.push(value);
645 }
646 let out_sa = runmat_builtins::StringArray::new(out, plan.output_shape.clone())
647 .map_err(|e| format!("Slice error: {e}"))?;
648 Ok(Value::StringArray(out_sa))
649}
650
651enum StringAssignView {
652 Scalar(String),
653 Array {
654 data: Vec<String>,
655 shape: Vec<usize>,
656 strides: Vec<usize>,
657 },
658}
659
660fn build_string_rhs_view(
661 rhs: &Value,
662 selection_lengths: &[usize],
663) -> Result<StringAssignView, String> {
664 let dims = selection_lengths.len().max(1);
665 match rhs {
666 Value::String(s) => Ok(StringAssignView::Scalar(s.clone())),
667 Value::Num(n) => Ok(StringAssignView::Scalar(n.to_string())),
668 Value::Int(i) => Ok(StringAssignView::Scalar(i.to_i64().to_string())),
669 Value::Tensor(t) => {
670 let mut shape = t.shape.clone();
671 if shape.len() < dims {
672 shape.resize(dims, 1);
673 } else if shape.len() > dims {
674 if shape.iter().skip(dims).any(|&s| s != 1) {
675 return Err("shape mismatch for slice assign".to_string());
676 }
677 shape.truncate(dims);
678 }
679 for (rhs_len, sel_len) in shape.iter().zip(selection_lengths.iter()) {
680 if !(*rhs_len == 1 || *rhs_len == *sel_len) {
681 return Err("shape mismatch for slice assign".to_string());
682 }
683 }
684 let mut strides = vec![1usize; dims];
685 for d in 1..dims {
686 strides[d] = strides[d - 1] * shape[d - 1].max(1);
687 }
688 let data = t.data.iter().map(|v| v.to_string()).collect();
689 Ok(StringAssignView::Array {
690 data,
691 shape,
692 strides,
693 })
694 }
695 Value::StringArray(sa) => {
696 let mut shape = sa.shape.clone();
697 if shape.len() < dims {
698 shape.resize(dims, 1);
699 } else if shape.len() > dims {
700 if shape.iter().skip(dims).any(|&s| s != 1) {
701 return Err("shape mismatch for slice assign".to_string());
702 }
703 shape.truncate(dims);
704 }
705 for (rhs_len, sel_len) in shape.iter().zip(selection_lengths.iter()) {
706 if !(*rhs_len == 1 || *rhs_len == *sel_len) {
707 return Err("shape mismatch for slice assign".to_string());
708 }
709 }
710 let mut strides = vec![1usize; dims];
711 for d in 1..dims {
712 strides[d] = strides[d - 1] * shape[d - 1].max(1);
713 }
714 Ok(StringAssignView::Array {
715 data: sa.data.clone(),
716 shape,
717 strides,
718 })
719 }
720 _ => Err("rhs must be string or string array".to_string()),
721 }
722}
723
724fn scatter_string_with_plan(
725 sa: &mut runmat_builtins::StringArray,
726 plan: &SlicePlan,
727 view: &StringAssignView,
728) -> Result<(), String> {
729 if plan.indices.is_empty() {
730 return Ok(());
731 }
732 let mut idx_iter = plan.indices.iter();
733 cartesian_positions(&plan.selection_lengths, |position| {
734 if let Some(&lin) = idx_iter.next() {
735 let replacement = match view {
736 StringAssignView::Scalar(s) => s.clone(),
737 StringAssignView::Array {
738 data,
739 shape,
740 strides,
741 } => {
742 let mut rlin = 0usize;
743 for (d, &pos_val) in position.iter().enumerate() {
744 let rhs_len = shape.get(d).copied().unwrap_or(1);
745 let pos = if rhs_len == 1 { 0 } else { pos_val };
746 rlin += pos * strides.get(d).copied().unwrap_or(1);
747 }
748 data.get(rlin).cloned().unwrap_or_default()
749 }
750 };
751 if let Some(slot) = sa.data.get_mut(lin as usize) {
752 *slot = replacement;
753 }
754 }
755 });
756 Ok(())
757}
758
759fn apply_end_offsets_to_numeric(
760 numeric: &[Value],
761 dims: usize,
762 colon_mask: u32,
763 end_mask: u32,
764 end_offsets: &[(usize, i64)],
765 base_shape: &[usize],
766) -> Vec<Value> {
767 let mut adjusted = numeric.to_vec();
768 for (position, offset) in end_offsets {
769 if let Some(value) = adjusted.get_mut(*position) {
770 let mut seen_numeric = 0usize;
771 let mut dim_for_pos = 0usize;
772 for d in 0..dims {
773 let is_colon = (colon_mask & (1u32 << d)) != 0;
774 let is_end = (end_mask & (1u32 << d)) != 0;
775 if is_colon || is_end {
776 continue;
777 }
778 if seen_numeric == *position {
779 dim_for_pos = d;
780 break;
781 }
782 seen_numeric += 1;
783 }
784 let dim_len = base_shape.get(dim_for_pos).copied().unwrap_or(1);
785 let idx_val = (dim_len as isize) - (*offset as isize);
786 *value = Value::Num(idx_val as f64);
787 }
788 }
789 adjusted
790}
791
792fn materialize_rhs_linear(rhs: &Value, count: usize) -> Result<Vec<f64>, String> {
793 if count == 0 {
794 return Ok(Vec::new());
795 }
796 let host_rhs = runmat_runtime::gather_if_needed(rhs)?;
797 match host_rhs {
798 Value::Num(n) => Ok(vec![n; count]),
799 Value::Int(int_val) => Ok(vec![int_val.to_f64(); count]),
800 Value::Bool(b) => Ok(vec![if b { 1.0 } else { 0.0 }; count]),
801 Value::Tensor(t) => {
802 if t.data.len() == count {
803 Ok(t.data)
804 } else if t.data.len() == 1 {
805 Ok(vec![t.data[0]; count])
806 } else {
807 Err("shape mismatch for slice assign".to_string())
808 }
809 }
810 Value::LogicalArray(la) => {
811 if la.data.len() == count {
812 let out: Vec<f64> = la
813 .data
814 .into_iter()
815 .map(|b| if b != 0 { 1.0 } else { 0.0 })
816 .collect();
817 Ok(out)
818 } else if la.data.len() == 1 {
819 let val = if la.data[0] != 0 { 1.0 } else { 0.0 };
820 Ok(vec![val; count])
821 } else {
822 Err("shape mismatch for slice assign".to_string())
823 }
824 }
825 other => Err(format!("slice assign: unsupported RHS type {:?}", other)),
826 }
827}
828
829fn materialize_rhs_nd(rhs: &Value, selection_lengths: &[usize]) -> Result<Vec<f64>, String> {
830 let total: usize = selection_lengths.iter().copied().product();
831 if total == 0 {
832 return Ok(Vec::new());
833 }
834 let rhs_host = runmat_runtime::gather_if_needed(rhs)?;
835 enum RhsView {
836 Scalar(f64),
837 Tensor {
838 data: Vec<f64>,
839 shape: Vec<usize>,
840 strides: Vec<usize>,
841 },
842 }
843 let view = match rhs_host {
844 Value::Num(n) => RhsView::Scalar(n),
845 Value::Int(iv) => RhsView::Scalar(iv.to_f64()),
846 Value::Bool(b) => RhsView::Scalar(if b { 1.0 } else { 0.0 }),
847 Value::Tensor(t) => {
848 let mut shape = t.shape.clone();
849 if shape.len() < selection_lengths.len() {
850 shape.resize(selection_lengths.len(), 1);
851 }
852 if shape.len() > selection_lengths.len() {
853 if shape.iter().skip(selection_lengths.len()).any(|&s| s != 1) {
854 return Err("shape mismatch for slice assign".to_string());
855 }
856 shape.truncate(selection_lengths.len());
857 }
858 for (dim_len, &sel_len) in shape.iter().zip(selection_lengths.iter()) {
859 if *dim_len != 1 && *dim_len != sel_len {
860 return Err("shape mismatch for slice assign".to_string());
861 }
862 }
863 let mut strides = vec![1usize; selection_lengths.len()];
864 for d in 1..selection_lengths.len() {
865 strides[d] = strides[d - 1] * shape[d - 1].max(1);
866 }
867 if t.data.len()
868 != shape
869 .iter()
870 .copied()
871 .fold(1usize, |acc, len| acc.saturating_mul(len.max(1)))
872 {
873 return Err("shape mismatch for slice assign".to_string());
874 }
875 RhsView::Tensor {
876 data: t.data,
877 shape,
878 strides,
879 }
880 }
881 Value::LogicalArray(la) => {
882 if la.shape.len() > selection_lengths.len()
883 && la
884 .shape
885 .iter()
886 .skip(selection_lengths.len())
887 .any(|&s| s != 1)
888 {
889 return Err("shape mismatch for slice assign".to_string());
890 }
891 let mut shape = la.shape.clone();
892 if shape.len() < selection_lengths.len() {
893 shape.resize(selection_lengths.len(), 1);
894 } else {
895 shape.truncate(selection_lengths.len());
896 }
897 for (dim_len, &sel_len) in shape.iter().zip(selection_lengths.iter()) {
898 if *dim_len != 1 && *dim_len != sel_len {
899 return Err("shape mismatch for slice assign".to_string());
900 }
901 }
902 let mut strides = vec![1usize; selection_lengths.len()];
903 for d in 1..selection_lengths.len() {
904 strides[d] = strides[d - 1] * shape[d - 1].max(1);
905 }
906 if la.data.len()
907 != shape
908 .iter()
909 .copied()
910 .fold(1usize, |acc, len| acc.saturating_mul(len.max(1)))
911 {
912 return Err("shape mismatch for slice assign".to_string());
913 }
914 let data: Vec<f64> = la
915 .data
916 .into_iter()
917 .map(|b| if b != 0 { 1.0 } else { 0.0 })
918 .collect();
919 RhsView::Tensor {
920 data,
921 shape,
922 strides,
923 }
924 }
925 other => return Err(format!("slice assign: unsupported RHS type {:?}", other)),
926 };
927
928 let mut out = Vec::with_capacity(total);
929 cartesian_positions(selection_lengths, |positions| match &view {
930 RhsView::Scalar(val) => out.push(*val),
931 RhsView::Tensor {
932 data,
933 shape,
934 strides,
935 } => {
936 let mut rlin = 0usize;
937 for d in 0..positions.len() {
938 let rhs_len = shape[d];
939 let pos = if rhs_len == 1 { 0 } else { positions[d] };
940 rlin += pos * strides[d];
941 }
942 let value = data.get(rlin).copied().unwrap_or(0.0);
943 out.push(value);
944 }
945 });
946 Ok(out)
947}
948
949thread_local! {
950 static GLOBALS: RefCell<HashMap<String, Value>> = RefCell::new(HashMap::new());
951}
952
953thread_local! {
954 static PERSISTENTS: RefCell<HashMap<(String, usize), Value>> = RefCell::new(HashMap::new());
955}
956
957thread_local! {
958 static PERSISTENTS_BY_NAME: RefCell<HashMap<(String, String), Value>> = RefCell::new(HashMap::new());
959}
960
961struct WorkspaceState {
962 names: HashMap<String, usize>,
963 assigned: HashSet<String>,
964 data_ptr: *const Value,
965 len: usize,
966}
967
968type WorkspaceSnapshot = (HashMap<String, usize>, HashSet<String>);
969
970thread_local! {
971 static WORKSPACE_STATE: RefCell<Option<WorkspaceState>> = const { RefCell::new(None) };
972 static PENDING_WORKSPACE: RefCell<Option<WorkspaceSnapshot>> = const { RefCell::new(None) };
973 static LAST_WORKSPACE_STATE: RefCell<Option<WorkspaceSnapshot>> = const { RefCell::new(None) };
974}
975
976struct WorkspaceStateGuard;
977
978impl Drop for WorkspaceStateGuard {
979 fn drop(&mut self) {
980 WORKSPACE_STATE.with(|state| {
981 let mut state_mut = state.borrow_mut();
982 if let Some(ws) = state_mut.take() {
983 LAST_WORKSPACE_STATE.with(|slot| {
984 *slot.borrow_mut() = Some((ws.names, ws.assigned));
985 });
986 }
987 });
988 }
989}
990
991fn set_workspace_state(
992 names: HashMap<String, usize>,
993 assigned: HashSet<String>,
994 vars: &[Value],
995) -> WorkspaceStateGuard {
996 WORKSPACE_STATE.with(|state| {
997 *state.borrow_mut() = Some(WorkspaceState {
998 names,
999 assigned,
1000 data_ptr: vars.as_ptr(),
1001 len: vars.len(),
1002 });
1003 });
1004 WorkspaceStateGuard
1005}
1006
1007fn refresh_workspace_state(vars: &[Value]) {
1008 WORKSPACE_STATE.with(|state| {
1009 if let Some(ws) = state.borrow_mut().as_mut() {
1010 ws.data_ptr = vars.as_ptr();
1011 ws.len = vars.len();
1012 }
1013 });
1014}
1015
1016fn workspace_lookup(name: &str) -> Option<Value> {
1017 WORKSPACE_STATE.with(|state| {
1018 let state_ref = state.borrow();
1019 let ws = state_ref.as_ref()?;
1020 let idx = ws.names.get(name)?;
1021 if !ws.assigned.contains(name) {
1022 return None;
1023 }
1024 if *idx >= ws.len {
1025 return None;
1026 }
1027 unsafe {
1028 let ptr = ws.data_ptr.add(*idx);
1029 Some((*ptr).clone())
1030 }
1031 })
1032}
1033
1034fn workspace_snapshot() -> Vec<(String, Value)> {
1035 WORKSPACE_STATE.with(|state| {
1036 if let Some(ws) = state.borrow().as_ref() {
1037 let mut entries: Vec<(String, Value)> = ws
1038 .names
1039 .iter()
1040 .filter_map(|(name, idx)| {
1041 if *idx >= ws.len {
1042 return None;
1043 }
1044 if !ws.assigned.contains(name) {
1045 return None;
1046 }
1047 unsafe {
1048 let ptr = ws.data_ptr.add(*idx);
1049 Some((name.clone(), (*ptr).clone()))
1050 }
1051 })
1052 .collect();
1053 entries.sort_by(|a, b| a.0.cmp(&b.0));
1054 entries
1055 } else {
1056 Vec::new()
1057 }
1058 })
1059}
1060
1061fn workspace_global_names() -> Vec<String> {
1062 let mut names = Vec::new();
1063 GLOBALS.with(|globals| {
1064 let map = globals.borrow();
1065 for key in map.keys() {
1066 if !key.starts_with("var_") {
1067 names.push(key.clone());
1068 }
1069 }
1070 });
1071 names.sort();
1072 names
1073}
1074
1075fn set_workspace_variable(name: &str, value: Value, vars: &mut Vec<Value>) -> Result<(), String> {
1076 let mut result = Ok(());
1077 WORKSPACE_STATE.with(|state| {
1078 let mut state_mut = state.borrow_mut();
1079 match state_mut.as_mut() {
1080 Some(ws) => {
1081 let idx = if let Some(idx) = ws.names.get(name).copied() {
1082 idx
1083 } else {
1084 let idx = vars.len();
1085 ws.names.insert(name.to_string(), idx);
1086 idx
1087 };
1088 if idx >= vars.len() {
1089 vars.resize(idx + 1, Value::Num(0.0));
1090 }
1091 vars[idx] = value;
1092 ws.data_ptr = vars.as_ptr();
1093 ws.len = vars.len();
1094 ws.assigned.insert(name.to_string());
1095 }
1096 None => {
1097 result = Err("load: workspace state unavailable".to_string());
1098 }
1099 }
1100 });
1101 result
1102}
1103
1104fn assign_loaded_variables(
1105 vars: &mut Vec<Value>,
1106 entries: &[(String, Value)],
1107) -> Result<(), String> {
1108 for (name, value) in entries {
1109 set_workspace_variable(name, value.clone(), vars)?;
1110 }
1111 refresh_workspace_state(vars);
1112 Ok(())
1113}
1114
1115fn ensure_workspace_resolver_registered() {
1116 static REGISTER: Once = Once::new();
1117 REGISTER.call_once(|| {
1118 runtime_workspace::register_workspace_resolver(WorkspaceResolver {
1119 lookup: workspace_lookup,
1120 snapshot: workspace_snapshot,
1121 globals: workspace_global_names,
1122 });
1123 });
1124}
1125
1126pub struct PendingWorkspaceGuard;
1127
1128impl Drop for PendingWorkspaceGuard {
1129 fn drop(&mut self) {
1130 PENDING_WORKSPACE.with(|slot| {
1131 slot.borrow_mut().take();
1132 });
1133 }
1134}
1135
1136pub fn push_pending_workspace(
1137 names: HashMap<String, usize>,
1138 assigned: HashSet<String>,
1139) -> PendingWorkspaceGuard {
1140 PENDING_WORKSPACE.with(|slot| {
1141 *slot.borrow_mut() = Some((names, assigned));
1142 });
1143 PendingWorkspaceGuard
1144}
1145
1146pub fn take_updated_workspace_state() -> Option<(HashMap<String, usize>, HashSet<String>)> {
1147 LAST_WORKSPACE_STATE.with(|slot| slot.borrow_mut().take())
1148}
1149
1150thread_local! {
1151 static CALL_COUNTS: RefCell<Vec<(usize, usize)>> = const { RefCell::new(Vec::new()) };
1153}
1154
1155macro_rules! handle_rel_binary { ($op:tt, $name:literal, $stack:ident) => {{
1156 let b = $stack.pop().ok_or(mex("StackUnderflow","stack underflow"))?; let a = $stack.pop().ok_or(mex("StackUnderflow","stack underflow"))?;
1157 match (&a, &b) {
1158 (Value::Object(obj), _) => { let args = vec![Value::Object(obj.clone()), Value::String($name.to_string()), b.clone()]; match call_builtin("call_method", &args) { Ok(v) => $stack.push(v), Err(_) => { let aa: f64 = (&a).try_into()?; let bb: f64 = (&b).try_into()?; $stack.push(Value::Num(if aa $op bb {1.0}else{0.0})) } } }
1159 (_, Value::Object(obj)) => { let rev = match $name { "lt" => "gt", "le" => "ge", "gt" => "lt", "ge" => "le", other => other };
1160 let args = vec![Value::Object(obj.clone()), Value::String(rev.to_string()), a.clone()]; match call_builtin("call_method", &args) { Ok(v) => $stack.push(v), Err(_) => { let aa: f64 = (&a).try_into()?; let bb: f64 = (&b).try_into()?; $stack.push(Value::Num(if aa $op bb {1.0}else{0.0})) } } }
1161 _ => { let bb: f64 = (&b).try_into()?; let aa: f64 = (&a).try_into()?; $stack.push(Value::Num(if aa $op bb {1.0}else{0.0})) }
1162 }
1163}}; }
1164pub fn interpret_with_vars(
1165 bytecode: &Bytecode,
1166 initial_vars: &mut [Value],
1167 current_function_name: Option<&str>,
1168) -> Result<Vec<Value>, String> {
1169 ensure_workspace_resolver_registered();
1170 #[cfg(feature = "native-accel")]
1171 let fusion_plan = prepare_fusion_plan(bytecode.accel_graph.as_ref(), &bytecode.fusion_groups);
1172 #[cfg(feature = "native-accel")]
1173 activate_fusion_plan(fusion_plan.clone());
1174 #[cfg(feature = "native-accel")]
1175 let _fusion_guard = FusionPlanGuard;
1176 let mut stack: Vec<Value> = Vec::new();
1177 let mut vars = initial_vars.to_vec();
1178 if vars.len() < bytecode.var_count {
1179 vars.resize(bytecode.var_count, Value::Num(0.0));
1180 }
1181 let pending_state = PENDING_WORKSPACE.with(|slot| slot.borrow_mut().take());
1182 let _workspace_guard = pending_state.map(|(names, assigned)| {
1183 let filtered_assigned: HashSet<String> = assigned
1184 .into_iter()
1185 .filter(|name| names.contains_key(name))
1186 .collect();
1187 set_workspace_state(names, filtered_assigned, &vars)
1188 });
1189 refresh_workspace_state(&vars);
1190 let mut pc: usize = 0;
1191 let mut context = ExecutionContext {
1192 call_stack: Vec::new(),
1193 locals: Vec::new(),
1194 instruction_pointer: 0,
1195 functions: bytecode.functions.clone(),
1196 };
1197 let mut _gc_context = InterpretContext::new(&stack, &vars)?;
1198 let mut thread_roots: Vec<Value> = Vec::new();
1200 GLOBALS.with(|g| {
1201 for v in g.borrow().values() {
1202 thread_roots.push(v.clone());
1203 }
1204 });
1205 PERSISTENTS.with(|p| {
1206 for v in p.borrow().values() {
1207 thread_roots.push(v.clone());
1208 }
1209 });
1210 PERSISTENTS_BY_NAME.with(|p| {
1212 for v in p.borrow().values() {
1213 thread_roots.push(v.clone());
1214 }
1215 });
1216 let _ = _gc_context.register_global_values(thread_roots, "thread_globals_persistents");
1217 let current_func_name_str: String = current_function_name
1218 .map(|s| s.to_string())
1219 .unwrap_or_else(|| "<main>".to_string());
1220 let mut global_aliases: HashMap<usize, String> = HashMap::new();
1222 let mut persistent_aliases: HashMap<usize, String> = HashMap::new();
1223 let mut try_stack: Vec<(usize, Option<usize>)> = Vec::new();
1225 let mut last_exception: Option<runmat_builtins::MException> = None;
1227 let mut imports: Vec<(Vec<String>, bool)> = Vec::new();
1229 let _resolve_static =
1231 |imports: &Vec<(Vec<String>, bool)>, name: &str| -> Option<(String, String)> {
1232 for (path, wildcard) in imports {
1234 if !*wildcard {
1235 continue;
1236 }
1237 if path.len() == 1 {
1238 let class_name = path[0].clone();
1240 return Some((class_name, name.to_string()));
1242 }
1243 }
1244 None
1245 };
1246 #[inline]
1247 fn bench_start() -> Option<std::time::Instant> {
1248 None
1249 }
1250 #[inline]
1251 fn bench_end(_label: &str, _start: Option<std::time::Instant>) {}
1252 let debug_stack = std::env::var("RUNMAT_DEBUG_STACK")
1253 .map(|v| v == "1" || v.eq_ignore_ascii_case("true"))
1254 .unwrap_or(false);
1255 let mut interpreter_timing = InterpreterTiming::new();
1256 macro_rules! vm_bail {
1257 ($err:expr) => {{
1258 let e: String = $err.to_string();
1259 if let Some((catch_pc, catch_var)) = try_stack.pop() {
1260 if let Some(var_idx) = catch_var {
1261 if var_idx >= vars.len() {
1262 vars.resize(var_idx + 1, Value::Num(0.0));
1263 refresh_workspace_state(&vars);
1264 }
1265 let mex = parse_exception(&e);
1266 last_exception = Some(mex.clone());
1267 vars[var_idx] = Value::MException(mex);
1268 }
1269 pc = catch_pc;
1270 continue;
1271 } else {
1272 return Err(e);
1273 }
1274 }};
1275 }
1276 while pc < bytecode.instructions.len() {
1277 set_vm_pc(pc);
1278 #[cfg(feature = "native-accel")]
1279 set_current_pc(pc);
1280 #[cfg(feature = "native-accel")]
1281 if let (Some(plan), Some(graph)) =
1282 (active_group_plan_clone(), bytecode.accel_graph.as_ref())
1283 {
1284 if plan.group.span.start == pc {
1285 #[cfg(feature = "native-accel")]
1286 {
1287 let detail = format!(
1288 "plan={} kind={:?} span=[{}..{}]",
1289 plan.index, plan.group.kind, plan.group.span.start, plan.group.span.end
1290 );
1291 interpreter_timing.flush_host_span("before_fusion", Some(detail.as_str()));
1292 }
1293 #[cfg(feature = "native-accel")]
1294 log_fusion_span_window(&plan, bytecode, pc);
1295 match try_execute_fusion_group(&plan, graph, &mut stack, &mut vars, &context) {
1296 Ok(result) => {
1297 stack.push(result);
1298 pc = plan.group.span.end + 1;
1299 continue;
1300 }
1301 Err(err) => {
1302 log::debug!("fusion fallback at pc {}: {}", pc, err);
1303 }
1304 }
1305 }
1306 }
1307 interpreter_timing.note_host_instr(pc);
1308 if debug_stack {
1309 eprintln!(
1310 "Instr pc={} {:?} stack_len={}",
1311 pc,
1312 &bytecode.instructions[pc],
1313 stack.len()
1314 );
1315 }
1316 match bytecode.instructions[pc].clone() {
1317 Instr::AndAnd(target) => {
1318 let lhs: f64 = (&stack
1319 .pop()
1320 .ok_or(mex("StackUnderflow", "stack underflow"))?)
1321 .try_into()?;
1322 if lhs == 0.0 {
1323 pc = target;
1324 continue;
1325 }
1326 }
1327 Instr::OrOr(target) => {
1328 let lhs: f64 = (&stack
1329 .pop()
1330 .ok_or(mex("StackUnderflow", "stack underflow"))?)
1331 .try_into()?;
1332 if lhs != 0.0 {
1333 pc = target;
1334 continue;
1335 }
1336 }
1337 Instr::Swap => {
1338 let a = stack
1339 .pop()
1340 .ok_or(mex("StackUnderflow", "stack underflow"))?;
1341 let b = stack
1342 .pop()
1343 .ok_or(mex("StackUnderflow", "stack underflow"))?;
1344 stack.push(a);
1345 stack.push(b);
1346 }
1347 Instr::CallFeval(argc) => {
1348 let mut args = Vec::with_capacity(argc);
1350 for _ in 0..argc {
1351 args.push(
1352 stack
1353 .pop()
1354 .ok_or(mex("StackUnderflow", "stack underflow"))?,
1355 );
1356 }
1357 args.reverse();
1358 let func_val = stack
1360 .pop()
1361 .ok_or(mex("StackUnderflow", "stack underflow"))?;
1362 match func_val {
1363 Value::Closure(c) => {
1364 let name = c.function_name;
1366 let mut call_args = c.captures.clone();
1367 call_args.extend(args);
1368 if let Ok(result) = runmat_runtime::call_builtin(&name, &call_args) {
1370 stack.push(result);
1371 pc += 1;
1372 continue;
1373 }
1374 let func: UserFunction = match context
1375 .functions
1376 .get(&name)
1377 .or_else(|| bytecode.functions.get(&name))
1378 {
1379 Some(f) => f.clone(),
1380 None => vm_bail!(mex(
1381 "UndefinedFunction",
1382 &format!("Undefined function: {name}")
1383 )),
1384 };
1385 let arg_count = call_args.len();
1386 if !func.has_varargin {
1387 if arg_count < func.params.len() {
1388 vm_bail!(mex(
1389 "NotEnoughInputs",
1390 &format!(
1391 "Function '{name}' expects {} inputs, got {arg_count}",
1392 func.params.len()
1393 )
1394 ));
1395 }
1396 if arg_count > func.params.len() {
1397 vm_bail!(mex(
1398 "TooManyInputs",
1399 &format!(
1400 "Function '{name}' expects {} inputs, got {arg_count}",
1401 func.params.len()
1402 )
1403 ));
1404 }
1405 }
1406 let var_map = runmat_hir::remapping::create_complete_function_var_map(
1407 &func.params,
1408 &func.outputs,
1409 &func.body,
1410 );
1411 let local_var_count = var_map.len();
1412 let remapped_body =
1413 runmat_hir::remapping::remap_function_body(&func.body, &var_map);
1414 let func_vars_count = local_var_count.max(func.params.len());
1415 let mut func_vars = vec![Value::Num(0.0); func_vars_count];
1416 if func.has_varargin {
1417 let fixed = func.params.len().saturating_sub(1);
1418 for i in 0..fixed {
1419 if i < call_args.len() && i < func_vars.len() {
1420 func_vars[i] = call_args[i].clone();
1421 }
1422 }
1423 let mut rest: Vec<Value> = if call_args.len() > fixed {
1424 call_args[fixed..].to_vec()
1425 } else {
1426 Vec::new()
1427 };
1428 let cell = runmat_builtins::CellArray::new(
1429 std::mem::take(&mut rest),
1430 1,
1431 if call_args.len() > fixed {
1432 call_args.len() - fixed
1433 } else {
1434 0
1435 },
1436 )
1437 .map_err(|e| format!("varargin: {e}"))?;
1438 if fixed < func_vars.len() {
1439 func_vars[fixed] = Value::Cell(cell);
1440 }
1441 } else {
1442 for (i, _param_id) in func.params.iter().enumerate() {
1443 if i < call_args.len() && i < func_vars.len() {
1444 func_vars[i] = call_args[i].clone();
1445 }
1446 }
1447 }
1448 for (original_var_id, local_var_id) in &var_map {
1450 let local_index = local_var_id.0;
1451 let global_index = original_var_id.0;
1452 if local_index < func_vars.len() && global_index < vars.len() {
1453 let is_parameter = func
1454 .params
1455 .iter()
1456 .any(|param_id| param_id == original_var_id);
1457 if !is_parameter {
1458 func_vars[local_index] = vars[global_index].clone();
1459 }
1460 }
1461 }
1462 if func.has_varargout {
1464 if let Some(varargout_oid) = func.outputs.last() {
1465 if let Some(local_id) = var_map.get(varargout_oid) {
1466 if local_id.0 < func_vars.len() {
1467 let empty = runmat_builtins::CellArray::new(vec![], 1, 0)
1468 .map_err(|e| format!("varargout init: {e}"))?;
1469 func_vars[local_id.0] = Value::Cell(empty);
1470 }
1471 }
1472 }
1473 }
1474 let mut func_var_types = func.var_types.clone();
1475 if func_var_types.len() < local_var_count {
1476 func_var_types.resize(local_var_count, Type::Unknown);
1477 }
1478 let func_program = runmat_hir::HirProgram {
1479 body: remapped_body,
1480 var_types: func_var_types,
1481 };
1482 let func_bytecode =
1483 crate::compile_with_functions(&func_program, &bytecode.functions)?;
1484 for (k, v) in func_bytecode.functions.iter() {
1486 context.functions.insert(k.clone(), v.clone());
1487 }
1488 let func_result_vars = match interpret_function(&func_bytecode, func_vars) {
1489 Ok(v) => v,
1490 Err(e) => vm_bail!(e),
1491 };
1492 if let Some(output_var_id) = func.outputs.first() {
1493 let local_output_index =
1494 var_map.get(output_var_id).map(|id| id.0).unwrap_or(0);
1495 if local_output_index < func_result_vars.len() {
1496 stack.push(func_result_vars[local_output_index].clone());
1497 } else {
1498 stack.push(Value::Num(0.0));
1499 }
1500 } else {
1501 stack.push(Value::Num(0.0));
1502 }
1503 }
1504 other => {
1505 let mut argv = Vec::with_capacity(1 + args.len());
1507 argv.push(other);
1508 argv.extend(args);
1509 match runmat_runtime::call_builtin("feval", &argv) {
1510 Ok(result) => stack.push(result),
1511 Err(err) => vm_bail!(err),
1512 }
1513 }
1514 }
1515 }
1516 Instr::CallFevalExpandMulti(_specs) => {
1517 vm_bail!("feval expand not supported in this execution mode".to_string());
1518 }
1519 Instr::LoadConst(c) => {
1520 stack.push(Value::Num(c));
1521 if debug_stack {
1522 eprintln!(" -> LoadConst pushed {}, new_len={}", c, stack.len());
1523 }
1524 }
1525 Instr::LoadComplex(re, im) => {
1526 stack.push(Value::Complex(re, im));
1527 if debug_stack {
1528 eprintln!(
1529 " -> LoadComplex pushed ({}, {}), new_len={}",
1530 re,
1531 im,
1532 stack.len()
1533 );
1534 }
1535 }
1536 Instr::LoadBool(b) => stack.push(Value::Bool(b)),
1537 Instr::LoadString(s) => stack.push(Value::String(s)),
1538 Instr::LoadCharRow(s) => {
1539 let ca = runmat_builtins::CharArray::new(s.chars().collect(), 1, s.chars().count())
1540 .map_err(|e| mex("CharError", &e))?;
1541 stack.push(Value::CharArray(ca));
1542 }
1543 Instr::LoadVar(i) => {
1544 let v = vars[i].clone();
1545 if std::env::var("RUNMAT_DEBUG_INDEX").as_deref() == Ok("1") {
1546 match &v {
1547 Value::GpuTensor(h) => {
1548 eprintln!(
1549 "LoadVar pc={} var={} => GpuTensor shape={:?}",
1550 pc, i, h.shape
1551 );
1552 }
1553 Value::Tensor(t) => {
1554 eprintln!("LoadVar pc={} var={} => Tensor shape={:?}", pc, i, t.shape);
1555 }
1556 _ => {}
1557 }
1558 }
1559 stack.push(v)
1560 }
1561 Instr::StoreVar(i) => {
1562 let val = stack
1563 .pop()
1564 .ok_or(mex("StackUnderflow", "stack underflow"))?;
1565 if let Ok(filter) = std::env::var("RUNMAT_DEBUG_STORE_VAR") {
1566 let log_this = if filter.trim().eq_ignore_ascii_case("*") {
1567 true
1568 } else if let Ok(target) = filter.trim().parse::<usize>() {
1569 target == i
1570 } else {
1571 false
1572 };
1573 if log_this {
1574 eprintln!("StoreVar pc={} var={} value={:?}", pc, i, val);
1575 }
1576 }
1577 if std::env::var("RUNMAT_DEBUG_INDEX").as_deref() == Ok("1") {
1578 match &val {
1579 Value::GpuTensor(h) => {
1580 eprintln!(
1581 "StoreVar pc={} var={} := GpuTensor shape={:?}",
1582 pc, i, h.shape
1583 );
1584 }
1585 Value::Tensor(t) => {
1586 eprintln!("StoreVar pc={} var={} := Tensor shape={:?}", pc, i, t.shape);
1587 }
1588 _ => {}
1589 }
1590 }
1591 if i < vars.len() {
1592 #[cfg(feature = "native-accel")]
1593 clear_residency(&vars[i]);
1594 }
1595 if i >= vars.len() {
1596 vars.resize(i + 1, Value::Num(0.0));
1597 refresh_workspace_state(&vars);
1598 }
1599 vars[i] = val;
1600 let key = format!("var_{i}");
1603 GLOBALS.with(|g| {
1604 let mut m = g.borrow_mut();
1605 if m.contains_key(&key) {
1606 m.insert(key, vars[i].clone());
1607 }
1608 });
1609 if let Some(name) = global_aliases.get(&i) {
1610 GLOBALS.with(|g| {
1611 g.borrow_mut().insert(name.clone(), vars[i].clone());
1612 });
1613 }
1614 }
1615 Instr::LoadLocal(offset) => {
1616 if let Some(current_frame) = context.call_stack.last() {
1617 let local_index = current_frame.locals_start + offset;
1618 if local_index >= context.locals.len() {
1619 vm_bail!("Local variable index out of bounds".to_string());
1620 }
1621 stack.push(context.locals[local_index].clone());
1622 } else if offset < vars.len() {
1623 stack.push(vars[offset].clone());
1624 } else {
1625 stack.push(Value::Num(0.0));
1626 }
1627 }
1628 Instr::StoreLocal(offset) => {
1629 let val = stack
1630 .pop()
1631 .ok_or(mex("StackUnderflow", "stack underflow"))?;
1632 if let Some(current_frame) = context.call_stack.last() {
1633 let local_index = current_frame.locals_start + offset;
1634 while context.locals.len() <= local_index {
1635 context.locals.push(Value::Num(0.0));
1636 }
1637 #[cfg(feature = "native-accel")]
1638 if local_index < context.locals.len() {
1639 clear_residency(&context.locals[local_index]);
1640 }
1641 context.locals[local_index] = val;
1642 } else {
1643 if offset >= vars.len() {
1644 vars.resize(offset + 1, Value::Num(0.0));
1645 refresh_workspace_state(&vars);
1646 }
1647 #[cfg(feature = "native-accel")]
1648 if offset < vars.len() {
1649 clear_residency(&vars[offset]);
1650 }
1651 vars[offset] = val;
1652 let func_name = context
1654 .call_stack
1655 .last()
1656 .map(|f| f.function_name.clone())
1657 .unwrap_or_else(|| "<main>".to_string());
1658 let key = (func_name, offset);
1659 PERSISTENTS.with(|p| {
1660 let mut m = p.borrow_mut();
1661 if m.contains_key(&key) {
1662 m.insert(key, vars[offset].clone());
1663 }
1664 });
1665 }
1666 }
1667 Instr::EnterScope(local_count) => {
1668 for _ in 0..local_count {
1669 context.locals.push(Value::Num(0.0));
1670 }
1671 }
1672 Instr::ExitScope(local_count) => {
1673 for _ in 0..local_count {
1674 if let Some(val) = context.locals.pop() {
1675 #[cfg(feature = "native-accel")]
1676 clear_residency(&val);
1677 }
1678 }
1679 }
1680 Instr::RegisterImport { path, wildcard } => {
1681 imports.push((path, wildcard));
1682 }
1683 Instr::DeclareGlobal(indices) => {
1684 for i in indices.into_iter() {
1686 let key = format!("var_{i}");
1687 let val_opt = GLOBALS.with(|g| g.borrow().get(&key).cloned());
1688 if let Some(v) = val_opt {
1689 if i >= vars.len() {
1690 vars.resize(i + 1, Value::Num(0.0));
1691 refresh_workspace_state(&vars);
1692 }
1693 vars[i] = v;
1694 }
1695 }
1696 }
1697 Instr::DeclareGlobalNamed(indices, names) => {
1698 for (pos, i) in indices.into_iter().enumerate() {
1699 let name = names
1700 .get(pos)
1701 .cloned()
1702 .unwrap_or_else(|| format!("var_{i}"));
1703 let val_opt = GLOBALS.with(|g| g.borrow().get(&name).cloned());
1704 if let Some(v) = val_opt {
1705 if i >= vars.len() {
1706 vars.resize(i + 1, Value::Num(0.0));
1707 refresh_workspace_state(&vars);
1708 }
1709 vars[i] = v;
1710 }
1711 GLOBALS.with(|g| {
1712 let mut m = g.borrow_mut();
1713 if let Some(v) = m.get(&name).cloned() {
1714 m.insert(format!("var_{i}"), v);
1715 }
1716 });
1717 global_aliases.insert(i, name);
1718 }
1719 }
1720 Instr::DeclarePersistent(indices) => {
1721 let func_name = current_func_name_str.clone();
1723 for i in indices.into_iter() {
1724 let key = (func_name.clone(), i);
1725 let val_opt = PERSISTENTS.with(|p| p.borrow().get(&key).cloned());
1726 if let Some(v) = val_opt {
1727 if i >= vars.len() {
1728 vars.resize(i + 1, Value::Num(0.0));
1729 refresh_workspace_state(&vars);
1730 }
1731 vars[i] = v;
1732 }
1733 }
1734 }
1735 Instr::DeclarePersistentNamed(indices, names) => {
1736 let func_name = current_func_name_str.clone();
1737 for (pos, i) in indices.into_iter().enumerate() {
1738 let name = names
1739 .get(pos)
1740 .cloned()
1741 .unwrap_or_else(|| format!("var_{i}"));
1742 let key = (func_name.clone(), i);
1743 let val_opt = PERSISTENTS_BY_NAME
1744 .with(|p| p.borrow().get(&(func_name.clone(), name.clone())).cloned())
1745 .or_else(|| PERSISTENTS.with(|p| p.borrow().get(&key).cloned()));
1746 if let Some(v) = val_opt {
1747 if i >= vars.len() {
1748 vars.resize(i + 1, Value::Num(0.0));
1749 refresh_workspace_state(&vars);
1750 }
1751 vars[i] = v;
1752 }
1753 persistent_aliases.insert(i, name);
1754 }
1755 }
1756 Instr::Add => {
1757 let b = stack
1759 .pop()
1760 .ok_or(mex("StackUnderflow", "stack underflow"))?;
1761 let a = stack
1762 .pop()
1763 .ok_or(mex("StackUnderflow", "stack underflow"))?;
1764 match (&a, &b) {
1765 (Value::Object(obj), _) => {
1766 let args = vec![
1767 Value::Object(obj.clone()),
1768 Value::String("plus".to_string()),
1769 b.clone(),
1770 ];
1771 match call_builtin("call_method", &args) {
1772 Ok(v) => stack.push(v),
1773 Err(_) => {
1774 let v = call_builtin("plus", &[a.clone(), b.clone()])?;
1775 stack.push(v)
1776 }
1777 }
1778 }
1779 (_, Value::Object(obj)) => {
1780 let args = vec![
1781 Value::Object(obj.clone()),
1782 Value::String("plus".to_string()),
1783 a.clone(),
1784 ];
1785 match call_builtin("call_method", &args) {
1786 Ok(v) => stack.push(v),
1787 Err(_) => {
1788 let v = call_builtin("plus", &[a.clone(), b.clone()])?;
1789 stack.push(v)
1790 }
1791 }
1792 }
1793 _ => {
1794 let (a_acc, b_acc) =
1795 accel_promote_binary(AutoBinaryOp::Elementwise, &a, &b)?;
1796 let v = call_builtin("plus", &[a_acc, b_acc])?;
1797 stack.push(v)
1798 }
1799 }
1800 }
1801 Instr::Sub => {
1802 let b = stack
1803 .pop()
1804 .ok_or(mex("StackUnderflow", "stack underflow"))?;
1805 let a = stack
1806 .pop()
1807 .ok_or(mex("StackUnderflow", "stack underflow"))?;
1808 match (&a, &b) {
1809 (Value::Object(obj), _) => {
1810 let args = vec![Value::Object(obj.clone()), b.clone()];
1811 match call_builtin("minus", &args) {
1812 Ok(v) => stack.push(v),
1813 Err(_) => {
1814 let v = call_builtin("minus", &[a.clone(), b.clone()])?;
1815 stack.push(v)
1816 }
1817 }
1818 }
1819 (_, Value::Object(obj)) => {
1820 let args = vec![Value::Object(obj.clone()), a.clone()];
1821 match call_builtin("uminus", &args) {
1822 Ok(v) => stack.push(v),
1823 Err(_) => {
1824 let v = call_builtin("minus", &[a.clone(), b.clone()])?;
1825 stack.push(v)
1826 }
1827 }
1828 }
1829 _ => {
1830 let (a_acc, b_acc) =
1831 accel_promote_binary(AutoBinaryOp::Elementwise, &a, &b)?;
1832 let v = call_builtin("minus", &[a_acc, b_acc])?;
1833 stack.push(v)
1834 }
1835 }
1836 }
1837 Instr::Mul => {
1838 let b = stack
1839 .pop()
1840 .ok_or(mex("StackUnderflow", "stack underflow"))?;
1841 let a = stack
1842 .pop()
1843 .ok_or(mex("StackUnderflow", "stack underflow"))?;
1844 match (&a, &b) {
1845 (Value::Object(obj), _) => {
1846 let args = vec![
1847 Value::Object(obj.clone()),
1848 Value::String("mtimes".to_string()),
1849 b.clone(),
1850 ];
1851 match call_builtin("call_method", &args) {
1852 Ok(v) => stack.push(v),
1853 Err(_) => {
1854 let v = runmat_runtime::matrix::value_matmul(&a, &b)?;
1855 stack.push(v)
1856 }
1857 }
1858 }
1859 (_, Value::Object(obj)) => {
1860 let args = vec![
1861 Value::Object(obj.clone()),
1862 Value::String("mtimes".to_string()),
1863 a.clone(),
1864 ];
1865 match call_builtin("call_method", &args) {
1866 Ok(v) => stack.push(v),
1867 Err(_) => {
1868 let v = runmat_runtime::matrix::value_matmul(&a, &b)?;
1869 stack.push(v)
1870 }
1871 }
1872 }
1873 _ => {
1874 let (a_acc, b_acc) = accel_promote_binary(AutoBinaryOp::MatMul, &a, &b)?;
1875 let v = runmat_runtime::matrix::value_matmul(&a_acc, &b_acc)?;
1876 stack.push(v)
1877 }
1878 }
1879 }
1880 Instr::Div => {
1881 let b = stack
1882 .pop()
1883 .ok_or(mex("StackUnderflow", "stack underflow"))?;
1884 let a = stack
1885 .pop()
1886 .ok_or(mex("StackUnderflow", "stack underflow"))?;
1887 match (&a, &b) {
1888 (Value::Object(obj), _) => {
1889 let args = vec![
1890 Value::Object(obj.clone()),
1891 Value::String("mrdivide".to_string()),
1892 b.clone(),
1893 ];
1894 match call_builtin("call_method", &args) {
1895 Ok(v) => stack.push(v),
1896 Err(_) => {
1897 let (a_acc, b_acc) =
1898 accel_promote_binary(AutoBinaryOp::Elementwise, &a, &b)?;
1899 let v = runmat_runtime::call_builtin("rdivide", &[a_acc, b_acc])?;
1900 stack.push(v)
1901 }
1902 }
1903 }
1904 (_, Value::Object(obj)) => {
1905 let args = vec![
1906 Value::Object(obj.clone()),
1907 Value::String("mrdivide".to_string()),
1908 a.clone(),
1909 ];
1910 match call_builtin("call_method", &args) {
1911 Ok(v) => stack.push(v),
1912 Err(_) => {
1913 let (a_acc, b_acc) =
1914 accel_promote_binary(AutoBinaryOp::Elementwise, &a, &b)?;
1915 let v = runmat_runtime::call_builtin("rdivide", &[a_acc, b_acc])?;
1916 stack.push(v)
1917 }
1918 }
1919 }
1920 _ => {
1921 let (a_acc, b_acc) =
1922 accel_promote_binary(AutoBinaryOp::Elementwise, &a, &b)?;
1923 let v = runmat_runtime::call_builtin("rdivide", &[a_acc, b_acc])?;
1924 stack.push(v)
1925 }
1926 }
1927 }
1928 Instr::Pow => {
1929 let b = stack
1930 .pop()
1931 .ok_or(mex("StackUnderflow", "stack underflow"))?;
1932 let a = stack
1933 .pop()
1934 .ok_or(mex("StackUnderflow", "stack underflow"))?;
1935 match (&a, &b) {
1936 (Value::Object(obj), _) | (_, Value::Object(obj)) => {
1937 let arg_val = if matches!(&a, Value::Object(_)) {
1938 b.clone()
1939 } else {
1940 a.clone()
1941 };
1942 let args = vec![
1943 Value::Object(obj.clone()),
1944 Value::String("power".to_string()),
1945 arg_val,
1946 ];
1947 match call_builtin("call_method", &args) {
1948 Ok(v) => stack.push(v),
1949 Err(_) => {
1950 let v = runmat_runtime::power(&a, &b)?;
1951 stack.push(v)
1952 }
1953 }
1954 }
1955 _ => {
1956 let (a_acc, b_acc) =
1957 accel_promote_binary(AutoBinaryOp::Elementwise, &a, &b)?;
1958 let v = runmat_runtime::power(&a_acc, &b_acc)?;
1959 stack.push(v)
1960 }
1961 }
1962 }
1963 Instr::Neg => {
1964 let value = stack
1965 .pop()
1966 .ok_or(mex("StackUnderflow", "stack underflow"))?;
1967 match &value {
1968 Value::Object(obj) => {
1969 let args = vec![Value::Object(obj.clone())];
1970 match call_builtin("uminus", &args) {
1971 Ok(v) => stack.push(v),
1972 Err(_) => {
1973 let result = runmat_runtime::call_builtin(
1974 "times",
1975 &[value.clone(), runmat_builtins::Value::Num(-1.0)],
1976 )?;
1977 stack.push(result)
1978 }
1979 }
1980 }
1981 _ => {
1982 let result = runmat_runtime::call_builtin(
1983 "times",
1984 &[value.clone(), runmat_builtins::Value::Num(-1.0)],
1985 )?;
1986 stack.push(result);
1987 }
1988 }
1989 }
1990 Instr::UPlus => {
1991 let value = stack
1992 .pop()
1993 .ok_or(mex("StackUnderflow", "stack underflow"))?;
1994 match &value {
1995 Value::Object(obj) => {
1996 let args = vec![Value::Object(obj.clone())];
1997 match call_builtin("uplus", &args) {
1998 Ok(v) => stack.push(v),
1999 Err(_) => stack.push(value),
2000 }
2001 }
2002 _ => stack.push(value),
2003 }
2004 }
2005 Instr::Transpose => {
2006 let value = stack
2007 .pop()
2008 .ok_or(mex("StackUnderflow", "stack underflow"))?;
2009 let promoted = accel_promote_unary(AutoUnaryOp::Transpose, &value)?;
2010 let args = [promoted];
2011 let result = runmat_runtime::call_builtin("transpose", &args)?;
2012 stack.push(result);
2013 }
2014 Instr::ConjugateTranspose => {
2015 let value = stack
2016 .pop()
2017 .ok_or(mex("StackUnderflow", "stack underflow"))?;
2018 let promoted = accel_promote_unary(AutoUnaryOp::Transpose, &value)?;
2019 let args = [promoted];
2020 let result = runmat_runtime::call_builtin("ctranspose", &args)?;
2021 stack.push(result);
2022 }
2023 Instr::ElemMul => {
2024 let b = stack
2025 .pop()
2026 .ok_or(mex("StackUnderflow", "stack underflow"))?;
2027 let a = stack
2028 .pop()
2029 .ok_or(mex("StackUnderflow", "stack underflow"))?;
2030 match (&a, &b) {
2031 (Value::Object(obj), _) => {
2032 let args = vec![
2033 Value::Object(obj.clone()),
2034 Value::String("times".to_string()),
2035 b.clone(),
2036 ];
2037 match call_builtin("call_method", &args) {
2038 Ok(v) => stack.push(v),
2039 Err(_) => {
2040 let (a_acc, b_acc) =
2041 accel_promote_binary(AutoBinaryOp::Elementwise, &a, &b)?;
2042 stack.push(runmat_runtime::call_builtin("times", &[a_acc, b_acc])?)
2043 }
2044 }
2045 }
2046 (_, Value::Object(obj)) => {
2047 let args = vec![
2048 Value::Object(obj.clone()),
2049 Value::String("times".to_string()),
2050 a.clone(),
2051 ];
2052 match call_builtin("call_method", &args) {
2053 Ok(v) => stack.push(v),
2054 Err(_) => {
2055 let (a_acc, b_acc) =
2056 accel_promote_binary(AutoBinaryOp::Elementwise, &a, &b)?;
2057 stack.push(runmat_runtime::call_builtin("times", &[a_acc, b_acc])?)
2058 }
2059 }
2060 }
2061 _ => {
2062 let (a_acc, b_acc) =
2063 accel_promote_binary(AutoBinaryOp::Elementwise, &a, &b)?;
2064 stack.push(runmat_runtime::call_builtin("times", &[a_acc, b_acc])?)
2065 }
2066 }
2067 }
2068 Instr::ElemDiv => {
2069 let b = stack
2070 .pop()
2071 .ok_or(mex("StackUnderflow", "stack underflow"))?;
2072 let a = stack
2073 .pop()
2074 .ok_or(mex("StackUnderflow", "stack underflow"))?;
2075 match (&a, &b) {
2076 (Value::Object(obj), _) => {
2077 let args = vec![
2078 Value::Object(obj.clone()),
2079 Value::String("rdivide".to_string()),
2080 b.clone(),
2081 ];
2082 match call_builtin("call_method", &args) {
2083 Ok(v) => stack.push(v),
2084 Err(_) => {
2085 let (a_acc, b_acc) =
2086 accel_promote_binary(AutoBinaryOp::Elementwise, &a, &b)?;
2087 stack
2088 .push(runmat_runtime::call_builtin("rdivide", &[a_acc, b_acc])?)
2089 }
2090 }
2091 }
2092 (_, Value::Object(obj)) => {
2093 let args = vec![
2094 Value::Object(obj.clone()),
2095 Value::String("rdivide".to_string()),
2096 a.clone(),
2097 ];
2098 match call_builtin("call_method", &args) {
2099 Ok(v) => stack.push(v),
2100 Err(_) => {
2101 let (a_acc, b_acc) =
2102 accel_promote_binary(AutoBinaryOp::Elementwise, &a, &b)?;
2103 stack
2104 .push(runmat_runtime::call_builtin("rdivide", &[a_acc, b_acc])?)
2105 }
2106 }
2107 }
2108 _ => {
2109 let (a_acc, b_acc) =
2110 accel_promote_binary(AutoBinaryOp::Elementwise, &a, &b)?;
2111 stack.push(runmat_runtime::call_builtin("rdivide", &[a_acc, b_acc])?)
2112 }
2113 }
2114 }
2115 Instr::ElemPow => {
2116 let b = stack
2117 .pop()
2118 .ok_or(mex("StackUnderflow", "stack underflow"))?;
2119 let a = stack
2120 .pop()
2121 .ok_or(mex("StackUnderflow", "stack underflow"))?;
2122 match (&a, &b) {
2123 (Value::Object(obj), _) | (_, Value::Object(obj)) => {
2124 let args = vec![
2125 Value::Object(obj.clone()),
2126 if matches!(&a, Value::Object(_)) {
2127 b.clone()
2128 } else {
2129 a.clone()
2130 },
2131 ];
2132 match call_builtin("power", &args) {
2133 Ok(v) => stack.push(v),
2134 Err(_) => {
2135 let (a_acc, b_acc) =
2136 accel_promote_binary(AutoBinaryOp::Elementwise, &a, &b)?;
2137 stack.push(runmat_runtime::call_builtin("power", &[a_acc, b_acc])?)
2138 }
2139 }
2140 }
2141 _ => {
2142 let (a_acc, b_acc) =
2143 accel_promote_binary(AutoBinaryOp::Elementwise, &a, &b)?;
2144 stack.push(runmat_runtime::call_builtin("power", &[a_acc, b_acc])?)
2145 }
2146 }
2147 }
2148 Instr::ElemLeftDiv => {
2149 let b = stack
2150 .pop()
2151 .ok_or(mex("StackUnderflow", "stack underflow"))?;
2152 let a = stack
2153 .pop()
2154 .ok_or(mex("StackUnderflow", "stack underflow"))?;
2155 match (&a, &b) {
2156 (Value::Object(obj), _) => {
2157 let args = vec![
2158 Value::Object(obj.clone()),
2159 Value::String("ldivide".to_string()),
2160 b.clone(),
2161 ];
2162 match call_builtin("call_method", &args) {
2163 Ok(v) => stack.push(v),
2164 Err(_) => {
2165 let (b_acc, a_acc) =
2166 accel_promote_binary(AutoBinaryOp::Elementwise, &b, &a)?;
2167 stack
2168 .push(runmat_runtime::call_builtin("rdivide", &[b_acc, a_acc])?)
2169 }
2170 }
2171 }
2172 (_, Value::Object(obj)) => {
2173 let args = vec![
2174 Value::Object(obj.clone()),
2175 Value::String("ldivide".to_string()),
2176 a.clone(),
2177 ];
2178 match call_builtin("call_method", &args) {
2179 Ok(v) => stack.push(v),
2180 Err(_) => {
2181 let (b_acc, a_acc) =
2182 accel_promote_binary(AutoBinaryOp::Elementwise, &b, &a)?;
2183 stack
2184 .push(runmat_runtime::call_builtin("rdivide", &[b_acc, a_acc])?)
2185 }
2186 }
2187 }
2188 _ => {
2189 let (b_acc, a_acc) =
2190 accel_promote_binary(AutoBinaryOp::Elementwise, &b, &a)?;
2191 stack.push(runmat_runtime::call_builtin("rdivide", &[b_acc, a_acc])?)
2192 }
2193 }
2194 }
2195 Instr::LessEqual => {
2196 let b = stack
2197 .pop()
2198 .ok_or(mex("StackUnderflow", "stack underflow"))?;
2199 let a = stack
2200 .pop()
2201 .ok_or(mex("StackUnderflow", "stack underflow"))?;
2202 match (&a, &b) {
2203 (Value::Object(obj), _) => {
2204 let args = vec![
2205 Value::Object(obj.clone()),
2206 Value::String("le".to_string()),
2207 b.clone(),
2208 ];
2209 match call_builtin("call_method", &args) {
2210 Ok(v) => stack.push(v),
2211 Err(_) => {
2212 let args2 = vec![
2214 Value::Object(obj.clone()),
2215 Value::String("gt".to_string()),
2216 b.clone(),
2217 ];
2218 match call_builtin("call_method", &args2) {
2219 Ok(v) => {
2220 let truth: f64 = (&v).try_into()?;
2221 stack.push(Value::Num(if truth == 0.0 {
2222 1.0
2223 } else {
2224 0.0
2225 }));
2226 }
2227 Err(_) => {
2228 let aa: f64 = (&a).try_into()?;
2229 let bb: f64 = (&b).try_into()?;
2230 stack.push(Value::Num(if aa <= bb { 1.0 } else { 0.0 }));
2231 }
2232 }
2233 }
2234 }
2235 }
2236 (_, Value::Object(obj)) => {
2237 let args = vec![
2238 Value::Object(obj.clone()),
2239 Value::String("ge".to_string()),
2240 a.clone(),
2241 ];
2242 match call_builtin("call_method", &args) {
2243 Ok(v) => stack.push(v),
2244 Err(_) => {
2245 let args2 = vec![
2247 Value::Object(obj.clone()),
2248 Value::String("lt".to_string()),
2249 a.clone(),
2250 ];
2251 match call_builtin("call_method", &args2) {
2252 Ok(v) => {
2253 let truth: f64 = (&v).try_into()?;
2254 stack.push(Value::Num(if truth == 0.0 {
2255 1.0
2256 } else {
2257 0.0
2258 }));
2259 }
2260 Err(_) => {
2261 let aa: f64 = (&a).try_into()?;
2262 let bb: f64 = (&b).try_into()?;
2263 stack.push(Value::Num(if aa <= bb { 1.0 } else { 0.0 }));
2264 }
2265 }
2266 }
2267 }
2268 }
2269 _ => {
2270 let bb: f64 = (&b).try_into()?;
2271 let aa: f64 = (&a).try_into()?;
2272 stack.push(Value::Num(if aa <= bb { 1.0 } else { 0.0 }));
2273 }
2274 }
2275 }
2276 Instr::Less => {
2277 handle_rel_binary!(<, "lt", stack);
2278 }
2279 Instr::Greater => {
2280 handle_rel_binary!(>, "gt", stack);
2281 }
2282 Instr::GreaterEqual => {
2283 let b = stack
2284 .pop()
2285 .ok_or(mex("StackUnderflow", "stack underflow"))?;
2286 let a = stack
2287 .pop()
2288 .ok_or(mex("StackUnderflow", "stack underflow"))?;
2289 match (&a, &b) {
2290 (Value::Object(obj), _) => {
2291 let args = vec![
2292 Value::Object(obj.clone()),
2293 Value::String("ge".to_string()),
2294 b.clone(),
2295 ];
2296 match call_builtin("call_method", &args) {
2297 Ok(v) => stack.push(v),
2298 Err(_) => {
2299 let args2 = vec![
2301 Value::Object(obj.clone()),
2302 Value::String("lt".to_string()),
2303 b.clone(),
2304 ];
2305 match call_builtin("call_method", &args2) {
2306 Ok(v) => {
2307 let truth: f64 = (&v).try_into()?;
2308 stack.push(Value::Num(if truth == 0.0 {
2309 1.0
2310 } else {
2311 0.0
2312 }));
2313 }
2314 Err(_) => {
2315 let aa: f64 = (&a).try_into()?;
2316 let bb: f64 = (&b).try_into()?;
2317 stack.push(Value::Num(if aa >= bb { 1.0 } else { 0.0 }));
2318 }
2319 }
2320 }
2321 }
2322 }
2323 (_, Value::Object(obj)) => {
2324 let args = vec![
2325 Value::Object(obj.clone()),
2326 Value::String("le".to_string()),
2327 a.clone(),
2328 ];
2329 match call_builtin("call_method", &args) {
2330 Ok(v) => stack.push(v),
2331 Err(_) => {
2332 let args2 = vec![
2334 Value::Object(obj.clone()),
2335 Value::String("gt".to_string()),
2336 a.clone(),
2337 ];
2338 match call_builtin("call_method", &args2) {
2339 Ok(v) => {
2340 let truth: f64 = (&v).try_into()?;
2341 stack.push(Value::Num(if truth == 0.0 {
2342 1.0
2343 } else {
2344 0.0
2345 }));
2346 }
2347 Err(_) => {
2348 let aa: f64 = (&a).try_into()?;
2349 let bb: f64 = (&b).try_into()?;
2350 stack.push(Value::Num(if aa >= bb { 1.0 } else { 0.0 }));
2351 }
2352 }
2353 }
2354 }
2355 }
2356 _ => {
2357 let bb: f64 = (&b).try_into()?;
2358 let aa: f64 = (&a).try_into()?;
2359 stack.push(Value::Num(if aa >= bb { 1.0 } else { 0.0 }));
2360 }
2361 }
2362 }
2363 Instr::Equal => {
2364 let b = stack
2365 .pop()
2366 .ok_or(mex("StackUnderflow", "stack underflow"))?;
2367 let a = stack
2368 .pop()
2369 .ok_or(mex("StackUnderflow", "stack underflow"))?;
2370 match (&a, &b) {
2371 (Value::Object(obj), _) => {
2372 let args = vec![
2373 Value::Object(obj.clone()),
2374 Value::String("eq".to_string()),
2375 b.clone(),
2376 ];
2377 match call_builtin("call_method", &args) {
2378 Ok(v) => stack.push(v),
2379 Err(_) => {
2380 let aa: f64 = (&a).try_into()?;
2381 let bb: f64 = (&b).try_into()?;
2382 stack.push(Value::Num(if aa == bb { 1.0 } else { 0.0 }))
2383 }
2384 }
2385 }
2386 (_, Value::Object(obj)) => {
2387 let args = vec![
2388 Value::Object(obj.clone()),
2389 Value::String("eq".to_string()),
2390 a.clone(),
2391 ];
2392 match call_builtin("call_method", &args) {
2393 Ok(v) => stack.push(v),
2394 Err(_) => {
2395 let aa: f64 = (&a).try_into()?;
2396 let bb: f64 = (&b).try_into()?;
2397 stack.push(Value::Num(if aa == bb { 1.0 } else { 0.0 }))
2398 }
2399 }
2400 }
2401 (Value::HandleObject(_), _) | (_, Value::HandleObject(_)) => {
2402 let v = runmat_runtime::call_builtin("eq", &[a.clone(), b.clone()])?;
2404 stack.push(v);
2405 }
2406 (Value::Tensor(ta), Value::Tensor(tb)) => {
2407 if ta.shape != tb.shape {
2409 return Err(mex(
2410 "ShapeMismatch",
2411 "shape mismatch for element-wise comparison",
2412 ));
2413 }
2414 let mut out = Vec::with_capacity(ta.data.len());
2415 for i in 0..ta.data.len() {
2416 out.push(if (ta.data[i] - tb.data[i]).abs() < 1e-12 {
2417 1.0
2418 } else {
2419 0.0
2420 });
2421 }
2422 stack.push(Value::Tensor(
2423 runmat_builtins::Tensor::new(out, ta.shape.clone())
2424 .map_err(|e| format!("eq: {e}"))?,
2425 ));
2426 }
2427 (Value::Tensor(t), Value::Num(_)) | (Value::Tensor(t), Value::Int(_)) => {
2428 let s = match &b {
2429 Value::Num(n) => *n,
2430 Value::Int(i) => i.to_f64(),
2431 _ => 0.0,
2432 };
2433 let out: Vec<f64> = t
2434 .data
2435 .iter()
2436 .map(|x| if (*x - s).abs() < 1e-12 { 1.0 } else { 0.0 })
2437 .collect();
2438 stack.push(Value::Tensor(
2439 runmat_builtins::Tensor::new(out, t.shape.clone())
2440 .map_err(|e| format!("eq: {e}"))?,
2441 ));
2442 }
2443 (Value::Num(_), Value::Tensor(t)) | (Value::Int(_), Value::Tensor(t)) => {
2444 let s = match &a {
2445 Value::Num(n) => *n,
2446 Value::Int(i) => i.to_f64(),
2447 _ => 0.0,
2448 };
2449 let out: Vec<f64> = t
2450 .data
2451 .iter()
2452 .map(|x| if (s - *x).abs() < 1e-12 { 1.0 } else { 0.0 })
2453 .collect();
2454 stack.push(Value::Tensor(
2455 runmat_builtins::Tensor::new(out, t.shape.clone())
2456 .map_err(|e| format!("eq: {e}"))?,
2457 ));
2458 }
2459 (Value::StringArray(sa), Value::StringArray(sb)) => {
2460 if sa.shape != sb.shape {
2461 return Err(mex(
2462 "ShapeMismatch",
2463 "shape mismatch for string array comparison",
2464 ));
2465 }
2466 let mut out = Vec::with_capacity(sa.data.len());
2467 for i in 0..sa.data.len() {
2468 out.push(if sa.data[i] == sb.data[i] { 1.0 } else { 0.0 });
2469 }
2470 stack.push(Value::Tensor(
2471 runmat_builtins::Tensor::new(out, sa.shape.clone())
2472 .map_err(|e| format!("eq: {e}"))?,
2473 ));
2474 }
2475 (Value::StringArray(sa), Value::String(s)) => {
2476 let mut out = Vec::with_capacity(sa.data.len());
2477 for i in 0..sa.data.len() {
2478 out.push(if sa.data[i] == *s { 1.0 } else { 0.0 });
2479 }
2480 stack.push(Value::Tensor(
2481 runmat_builtins::Tensor::new(out, sa.shape.clone())
2482 .map_err(|e| format!("eq: {e}"))?,
2483 ));
2484 }
2485 (Value::String(s), Value::StringArray(sa)) => {
2486 let mut out = Vec::with_capacity(sa.data.len());
2487 for i in 0..sa.data.len() {
2488 out.push(if *s == sa.data[i] { 1.0 } else { 0.0 });
2489 }
2490 stack.push(Value::Tensor(
2491 runmat_builtins::Tensor::new(out, sa.shape.clone())
2492 .map_err(|e| format!("eq: {e}"))?,
2493 ));
2494 }
2495 (Value::String(a_s), Value::String(b_s)) => {
2496 stack.push(Value::Num(if a_s == b_s { 1.0 } else { 0.0 }));
2497 }
2498 _ => {
2499 let bb: f64 = (&b).try_into()?;
2500 let aa: f64 = (&a).try_into()?;
2501 stack.push(Value::Num(if aa == bb { 1.0 } else { 0.0 }));
2502 }
2503 }
2504 }
2505 Instr::NotEqual => {
2506 let b = stack
2507 .pop()
2508 .ok_or(mex("StackUnderflow", "stack underflow"))?;
2509 let a = stack
2510 .pop()
2511 .ok_or(mex("StackUnderflow", "stack underflow"))?;
2512 match (&a, &b) {
2513 (Value::Object(obj), _) => {
2514 let args = vec![
2515 Value::Object(obj.clone()),
2516 Value::String("ne".to_string()),
2517 b.clone(),
2518 ];
2519 match call_builtin("call_method", &args) {
2520 Ok(v) => stack.push(v),
2521 Err(_) => {
2522 let args2 = vec![
2524 Value::Object(obj.clone()),
2525 Value::String("eq".to_string()),
2526 b.clone(),
2527 ];
2528 match call_builtin("call_method", &args2) {
2529 Ok(v) => {
2530 let truth: f64 = (&v).try_into()?;
2531 stack.push(Value::Num(if truth == 0.0 {
2532 1.0
2533 } else {
2534 0.0
2535 }));
2536 }
2537 Err(_) => {
2538 let aa: f64 = (&a).try_into()?;
2539 let bb: f64 = (&b).try_into()?;
2540 stack.push(Value::Num(if aa != bb { 1.0 } else { 0.0 }));
2541 }
2542 }
2543 }
2544 }
2545 }
2546 (_, Value::Object(obj)) => {
2547 let args = vec![
2548 Value::Object(obj.clone()),
2549 Value::String("ne".to_string()),
2550 a.clone(),
2551 ];
2552 match call_builtin("call_method", &args) {
2553 Ok(v) => stack.push(v),
2554 Err(_) => {
2555 let args2 = vec![
2557 Value::Object(obj.clone()),
2558 Value::String("eq".to_string()),
2559 a.clone(),
2560 ];
2561 match call_builtin("call_method", &args2) {
2562 Ok(v) => {
2563 let truth: f64 = (&v).try_into()?;
2564 stack.push(Value::Num(if truth == 0.0 {
2565 1.0
2566 } else {
2567 0.0
2568 }));
2569 }
2570 Err(_) => {
2571 let aa: f64 = (&a).try_into()?;
2572 let bb: f64 = (&b).try_into()?;
2573 stack.push(Value::Num(if aa != bb { 1.0 } else { 0.0 }));
2574 }
2575 }
2576 }
2577 }
2578 }
2579 (Value::HandleObject(_), _) | (_, Value::HandleObject(_)) => {
2580 let v = runmat_runtime::call_builtin("ne", &[a.clone(), b.clone()])?;
2581 stack.push(v);
2582 }
2583 (Value::Tensor(ta), Value::Tensor(tb)) => {
2584 if ta.shape != tb.shape {
2585 return Err(mex(
2586 "ShapeMismatch",
2587 "shape mismatch for element-wise comparison",
2588 ));
2589 }
2590 let mut out = Vec::with_capacity(ta.data.len());
2591 for i in 0..ta.data.len() {
2592 out.push(if (ta.data[i] - tb.data[i]).abs() >= 1e-12 {
2593 1.0
2594 } else {
2595 0.0
2596 });
2597 }
2598 stack.push(Value::Tensor(
2599 runmat_builtins::Tensor::new(out, ta.shape.clone())
2600 .map_err(|e| format!("ne: {e}"))?,
2601 ));
2602 }
2603 (Value::Tensor(t), Value::Num(_)) | (Value::Tensor(t), Value::Int(_)) => {
2604 let s = match &b {
2605 Value::Num(n) => *n,
2606 Value::Int(i) => i.to_f64(),
2607 _ => 0.0,
2608 };
2609 let out: Vec<f64> = t
2610 .data
2611 .iter()
2612 .map(|x| if (*x - s).abs() >= 1e-12 { 1.0 } else { 0.0 })
2613 .collect();
2614 stack.push(Value::Tensor(
2615 runmat_builtins::Tensor::new(out, t.shape.clone())
2616 .map_err(|e| format!("ne: {e}"))?,
2617 ));
2618 }
2619 (Value::Num(_), Value::Tensor(t)) | (Value::Int(_), Value::Tensor(t)) => {
2620 let s = match &a {
2621 Value::Num(n) => *n,
2622 Value::Int(i) => i.to_f64(),
2623 _ => 0.0,
2624 };
2625 let out: Vec<f64> = t
2626 .data
2627 .iter()
2628 .map(|x| if (s - *x).abs() >= 1e-12 { 1.0 } else { 0.0 })
2629 .collect();
2630 stack.push(Value::Tensor(
2631 runmat_builtins::Tensor::new(out, t.shape.clone())
2632 .map_err(|e| format!("ne: {e}"))?,
2633 ));
2634 }
2635 (Value::StringArray(sa), Value::StringArray(sb)) => {
2636 if sa.shape != sb.shape {
2637 return Err(mex(
2638 "ShapeMismatch",
2639 "shape mismatch for string array comparison",
2640 ));
2641 }
2642 let mut out = Vec::with_capacity(sa.data.len());
2643 for i in 0..sa.data.len() {
2644 out.push(if sa.data[i] != sb.data[i] { 1.0 } else { 0.0 });
2645 }
2646 stack.push(Value::Tensor(
2647 runmat_builtins::Tensor::new(out, sa.shape.clone())
2648 .map_err(|e| format!("ne: {e}"))?,
2649 ));
2650 }
2651 (Value::StringArray(sa), Value::String(s)) => {
2652 let mut out = Vec::with_capacity(sa.data.len());
2653 for i in 0..sa.data.len() {
2654 out.push(if sa.data[i] != *s { 1.0 } else { 0.0 });
2655 }
2656 stack.push(Value::Tensor(
2657 runmat_builtins::Tensor::new(out, sa.shape.clone())
2658 .map_err(|e| format!("ne: {e}"))?,
2659 ));
2660 }
2661 (Value::String(s), Value::StringArray(sa)) => {
2662 let mut out = Vec::with_capacity(sa.data.len());
2663 for i in 0..sa.data.len() {
2664 out.push(if *s != sa.data[i] { 1.0 } else { 0.0 });
2665 }
2666 stack.push(Value::Tensor(
2667 runmat_builtins::Tensor::new(out, sa.shape.clone())
2668 .map_err(|e| format!("ne: {e}"))?,
2669 ));
2670 }
2671 (Value::String(a_s), Value::String(b_s)) => {
2672 stack.push(Value::Num(if a_s != b_s { 1.0 } else { 0.0 }));
2673 }
2674 _ => {
2675 let bb: f64 = (&b).try_into()?;
2676 let aa: f64 = (&a).try_into()?;
2677 stack.push(Value::Num(if aa != bb { 1.0 } else { 0.0 }));
2678 }
2679 }
2680 }
2681 Instr::JumpIfFalse(target) => {
2682 let cond: f64 = (&stack
2683 .pop()
2684 .ok_or(mex("StackUnderflow", "stack underflow"))?)
2685 .try_into()?;
2686 if cond == 0.0 {
2687 pc = target;
2688 continue;
2689 }
2690 }
2691 Instr::Jump(target) => {
2692 pc = target;
2693 continue;
2694 }
2695 Instr::StochasticEvolution => {
2696 let steps_value = stack
2697 .pop()
2698 .ok_or(mex("StackUnderflow", "stack underflow"))?;
2699 let scale_value = stack
2700 .pop()
2701 .ok_or(mex("StackUnderflow", "stack underflow"))?;
2702 let drift_value = stack
2703 .pop()
2704 .ok_or(mex("StackUnderflow", "stack underflow"))?;
2705 let state_value = stack
2706 .pop()
2707 .ok_or(mex("StackUnderflow", "stack underflow"))?;
2708 let evolved = stochastic_evolution_dispatch(
2709 state_value,
2710 drift_value,
2711 scale_value,
2712 steps_value,
2713 )?;
2714 stack.push(evolved);
2715 }
2716 Instr::CallBuiltin(name, arg_count) => {
2717 if debug_stack {
2718 eprintln!(
2719 "CallBuiltin pc={} name={} arg_count={} stack_len={} top={:?}",
2720 pc,
2721 name,
2722 arg_count,
2723 stack.len(),
2724 stack.last()
2725 );
2726 }
2727 if name == "nargin" {
2728 if arg_count != 0 {
2729 vm_bail!(mex("TooManyInputs", "nargin takes no arguments").to_string());
2730 }
2731 let (nin, _) =
2732 CALL_COUNTS.with(|cc| cc.borrow().last().cloned().unwrap_or((0, 0)));
2733 stack.push(Value::Num(nin as f64));
2734 pc += 1;
2735 continue;
2736 }
2737 if name == "nargout" {
2738 if arg_count != 0 {
2739 vm_bail!(mex("TooManyInputs", "nargout takes no arguments").to_string());
2740 }
2741 let (_, nout) =
2742 CALL_COUNTS.with(|cc| cc.borrow().last().cloned().unwrap_or((0, 0)));
2743 stack.push(Value::Num(nout as f64));
2744 pc += 1;
2745 continue;
2746 }
2747 let mut args = Vec::new();
2748
2749 for _ in 0..arg_count {
2750 args.push(
2751 stack
2752 .pop()
2753 .ok_or(mex("StackUnderflow", "stack underflow"))?,
2754 );
2755 }
2756 args.reverse();
2757
2758 let prepared_primary = accel_prepare_args(&name, &args)?;
2759 match runmat_runtime::call_builtin(&name, &prepared_primary) {
2760 Ok(result) => stack.push(result),
2761 Err(e) => {
2762 let mut specific_matches: Vec<(String, Vec<Value>, Value)> = Vec::new();
2764 for (path, wildcard) in &imports {
2765 if *wildcard {
2766 continue;
2767 }
2768 if path.last().map(|s| s.as_str()) == Some(name.as_str()) {
2769 let qual = path.join(".");
2770 let qual_args = accel_prepare_args(&qual, &prepared_primary)?;
2771 if let Ok(value) = runmat_runtime::call_builtin(&qual, &qual_args) {
2772 specific_matches.push((qual, qual_args, value));
2773 }
2774 }
2775 }
2776 if specific_matches.len() > 1 {
2777 let msg = specific_matches
2778 .iter()
2779 .map(|(q, _, _)| q.clone())
2780 .collect::<Vec<_>>()
2781 .join(", ");
2782 vm_bail!(format!("ambiguous builtin '{}' via imports: {}", name, msg)
2783 .to_string());
2784 }
2785 if let Some((_, _, value)) = specific_matches.pop() {
2786 stack.push(value);
2787 } else {
2788 let mut wildcard_matches: Vec<(String, Vec<Value>, Value)> = Vec::new();
2790 for (path, wildcard) in &imports {
2791 if !*wildcard {
2792 continue;
2793 }
2794 if path.is_empty() {
2795 continue;
2796 }
2797 let mut qual = String::new();
2798 for (i, part) in path.iter().enumerate() {
2799 if i > 0 {
2800 qual.push('.');
2801 }
2802 qual.push_str(part);
2803 }
2804 qual.push('.');
2805 qual.push_str(&name);
2806 let qual_args = accel_prepare_args(&qual, &prepared_primary)?;
2807 if let Ok(value) = runmat_runtime::call_builtin(&qual, &qual_args) {
2808 wildcard_matches.push((qual, qual_args, value));
2809 }
2810 }
2811 if wildcard_matches.len() > 1 {
2812 let msg = wildcard_matches
2813 .iter()
2814 .map(|(q, _, _)| q.clone())
2815 .collect::<Vec<_>>()
2816 .join(", ");
2817 vm_bail!(format!(
2818 "ambiguous builtin '{}' via wildcard imports: {}",
2819 name, msg
2820 )
2821 .to_string());
2822 }
2823 if let Some((_, _, value)) = wildcard_matches.pop() {
2824 stack.push(value);
2825 } else {
2826 if name == "rethrow" && args.is_empty() {
2828 if let Some(le) = &last_exception {
2829 vm_bail!(format!("{}: {}", le.identifier, le.message)
2830 .to_string());
2831 }
2832 }
2833 if let Some((catch_pc, catch_var)) = try_stack.pop() {
2834 if let Some(var_idx) = catch_var {
2835 if var_idx >= vars.len() {
2836 vars.resize(var_idx + 1, Value::Num(0.0));
2837 refresh_workspace_state(&vars);
2838 }
2839 let mex = parse_exception(&e);
2840 last_exception = Some(mex.clone());
2841 vars[var_idx] = Value::MException(mex);
2842 }
2843 pc = catch_pc;
2844 continue;
2845 } else {
2846 return Err(e);
2847 }
2848 }
2849 }
2850 }
2851 }
2852 }
2853 Instr::CallBuiltinExpandLast(name, fixed_argc, num_indices) => {
2854 let mut indices = Vec::with_capacity(num_indices);
2858 for _ in 0..num_indices {
2859 let v = stack
2860 .pop()
2861 .ok_or(mex("StackUnderflow", "stack underflow"))?;
2862 indices.push(v);
2863 }
2864 indices.reverse();
2865 let base = stack
2866 .pop()
2867 .ok_or(mex("StackUnderflow", "stack underflow"))?;
2868 let mut fixed = Vec::with_capacity(fixed_argc);
2870 for _ in 0..fixed_argc {
2871 fixed.push(
2872 stack
2873 .pop()
2874 .ok_or(mex("StackUnderflow", "stack underflow"))?,
2875 );
2876 }
2877 fixed.reverse();
2878 let expanded = match (base, indices.len()) {
2880 (Value::Cell(ca), 1) => {
2881 match &indices[0] {
2882 Value::Num(n) => {
2883 let i = *n as usize;
2884 if i == 0 || i > ca.data.len() {
2885 return Err(mex(
2886 "CellIndexOutOfBounds",
2887 "Cell index out of bounds",
2888 ));
2889 }
2890 vec![(*ca.data[i - 1]).clone()]
2891 }
2892 Value::Int(i) => {
2893 let iu = i.to_i64() as usize;
2894 if iu == 0 || iu > ca.data.len() {
2895 return Err(mex(
2896 "CellIndexOutOfBounds",
2897 "Cell index out of bounds",
2898 ));
2899 }
2900 vec![(*ca.data[iu - 1]).clone()]
2901 }
2902 Value::Tensor(t) => {
2903 let mut out: Vec<Value> = Vec::with_capacity(t.data.len());
2905 for &val in &t.data {
2906 let iu = val as usize;
2907 if iu == 0 || iu > ca.data.len() {
2908 return Err(mex(
2909 "CellIndexOutOfBounds",
2910 "Cell index out of bounds",
2911 ));
2912 }
2913 out.push((*ca.data[iu - 1]).clone());
2914 }
2915 out
2916 }
2917 _ => return Err(mex("CellIndexType", "Unsupported cell index type")),
2918 }
2919 }
2920 (Value::Cell(ca), 2) => {
2921 let r: f64 = (&indices[0]).try_into()?;
2922 let c: f64 = (&indices[1]).try_into()?;
2923 let (ir, ic) = (r as usize, c as usize);
2924 if ir == 0 || ir > ca.rows || ic == 0 || ic > ca.cols {
2925 return Err(mex(
2926 "CellSubscriptOutOfBounds",
2927 "Cell subscript out of bounds",
2928 ));
2929 }
2930 vec![(*ca.data[(ir - 1) * ca.cols + (ic - 1)]).clone()]
2931 }
2932 (other, _) => {
2933 match other {
2935 Value::Object(obj) => {
2936 let cell = runmat_builtins::CellArray::new(
2937 indices.clone(),
2938 1,
2939 indices.len(),
2940 )
2941 .map_err(|e| format!("subsref build error: {e}"))?;
2942 let v = match runmat_runtime::call_builtin(
2943 "call_method",
2944 &[
2945 Value::Object(obj),
2946 Value::String("subsref".to_string()),
2947 Value::String("{}".to_string()),
2948 Value::Cell(cell),
2949 ],
2950 ) {
2951 Ok(v) => v,
2952 Err(e) => vm_bail!(e),
2953 };
2954 vec![v]
2955 }
2956 _ => {
2957 return Err(mex(
2958 "ExpandError",
2959 "CallBuiltinExpandLast requires cell or object cell access",
2960 ))
2961 }
2962 }
2963 }
2964 };
2965 let mut args = fixed;
2966 args.extend(expanded.into_iter());
2967 match call_builtin_auto(&name, &args) {
2968 Ok(v) => stack.push(v),
2969 Err(e) => vm_bail!(e),
2970 }
2971 }
2972 Instr::CallBuiltinExpandAt(name, before_count, num_indices, after_count) => {
2973 let mut after: Vec<Value> = Vec::with_capacity(after_count);
2975 for _ in 0..after_count {
2976 after.push(
2977 stack
2978 .pop()
2979 .ok_or(mex("StackUnderflow", "stack underflow"))?,
2980 );
2981 }
2982 after.reverse();
2983 let mut indices = Vec::with_capacity(num_indices);
2984 for _ in 0..num_indices {
2985 indices.push(
2986 stack
2987 .pop()
2988 .ok_or(mex("StackUnderflow", "stack underflow"))?,
2989 );
2990 }
2991 indices.reverse();
2992 let base = stack
2993 .pop()
2994 .ok_or(mex("StackUnderflow", "stack underflow"))?;
2995 let mut before: Vec<Value> = Vec::with_capacity(before_count);
2996 for _ in 0..before_count {
2997 before.push(
2998 stack
2999 .pop()
3000 .ok_or(mex("StackUnderflow", "stack underflow"))?,
3001 );
3002 }
3003 before.reverse();
3004 let expanded = match (base, indices.len()) {
3005 (Value::Cell(ca), 1) => match &indices[0] {
3006 Value::Num(n) => {
3007 let idx = *n as usize;
3008 if idx == 0 || idx > ca.data.len() {
3009 return Err(mex(
3010 "CellIndexOutOfBounds",
3011 "Cell index out of bounds",
3012 ));
3013 }
3014 vec![(*ca.data[idx - 1]).clone()]
3015 }
3016 Value::Int(i) => {
3017 let idx = i.to_i64() as usize;
3018 if idx == 0 || idx > ca.data.len() {
3019 return Err(mex(
3020 "CellIndexOutOfBounds",
3021 "Cell index out of bounds",
3022 ));
3023 }
3024 vec![(*ca.data[idx - 1]).clone()]
3025 }
3026 Value::Tensor(t) => {
3027 let mut out: Vec<Value> = Vec::with_capacity(t.data.len());
3028 for &val in &t.data {
3029 let iu = val as usize;
3030 if iu == 0 || iu > ca.data.len() {
3031 return Err(mex(
3032 "CellIndexOutOfBounds",
3033 "Cell index out of bounds",
3034 ));
3035 }
3036 out.push((*ca.data[iu - 1]).clone());
3037 }
3038 out
3039 }
3040 _ => return Err(mex("CellIndexType", "Unsupported cell index type")),
3041 },
3042 (Value::Cell(ca), 2) => {
3043 let r: f64 = (&indices[0]).try_into()?;
3044 let c: f64 = (&indices[1]).try_into()?;
3045 let (ir, ic) = (r as usize, c as usize);
3046 if ir == 0 || ir > ca.rows || ic == 0 || ic > ca.cols {
3047 return Err(mex(
3048 "CellSubscriptOutOfBounds",
3049 "Cell subscript out of bounds",
3050 ));
3051 }
3052 vec![(*ca.data[(ir - 1) * ca.cols + (ic - 1)]).clone()]
3053 }
3054 (Value::Object(obj), _) => {
3055 let idx_vals: Vec<Value> = indices
3056 .iter()
3057 .map(|v| Value::Num((v).try_into().unwrap_or(0.0)))
3058 .collect();
3059 let cell = runmat_runtime::call_builtin("__make_cell", &idx_vals)?;
3060 let v = match runmat_runtime::call_builtin(
3061 "call_method",
3062 &[
3063 Value::Object(obj),
3064 Value::String("subsref".to_string()),
3065 Value::String("{}".to_string()),
3066 cell,
3067 ],
3068 ) {
3069 Ok(v) => v,
3070 Err(e) => vm_bail!(e),
3071 };
3072 vec![v]
3073 }
3074 _ => {
3075 return Err(mex(
3076 "ExpandError",
3077 "CallBuiltinExpandAt requires cell or object cell access",
3078 ))
3079 }
3080 };
3081 let mut args = before;
3082 args.extend(expanded.into_iter());
3083 args.extend(after.into_iter());
3084 match call_builtin_auto(&name, &args) {
3085 Ok(v) => stack.push(v),
3086 Err(e) => vm_bail!(e),
3087 }
3088 }
3089 Instr::CallBuiltinExpandMulti(name, specs) => {
3090 let mut args: Vec<Value> = Vec::with_capacity(specs.len());
3092 let mut temp: Vec<Value> = Vec::new();
3094 for spec in specs.iter().rev() {
3095 if spec.is_expand {
3096 let mut indices = Vec::with_capacity(spec.num_indices);
3097 for _ in 0..spec.num_indices {
3098 indices.push(
3099 stack
3100 .pop()
3101 .ok_or(mex("StackUnderflow", "stack underflow"))?,
3102 );
3103 }
3104 indices.reverse();
3105 let base = stack
3106 .pop()
3107 .ok_or(mex("StackUnderflow", "stack underflow"))?;
3108 #[cfg(feature = "native-accel")]
3109 clear_residency(&base);
3110 let expanded = if spec.expand_all {
3111 match base {
3112 Value::Cell(ca) => {
3113 ca.data.iter().map(|p| (*(*p)).clone()).collect()
3114 }
3115 Value::Object(obj) => {
3116 let empty = runmat_builtins::CellArray::new(vec![], 1, 0)
3118 .map_err(|e| format!("subsref build error: {e}"))?;
3119 let v = match runmat_runtime::call_builtin(
3120 "call_method",
3121 &[
3122 Value::Object(obj),
3123 Value::String("subsref".to_string()),
3124 Value::String("{}".to_string()),
3125 Value::Cell(empty),
3126 ],
3127 ) {
3128 Ok(v) => v,
3129 Err(e) => vm_bail!(e),
3130 };
3131 match v {
3132 Value::Cell(ca) => {
3133 ca.data.iter().map(|p| (*(*p)).clone()).collect()
3134 }
3135 other => vec![other],
3136 }
3137 }
3138 _ => return Err(mex(
3139 "ExpandError",
3140 "CallBuiltinExpandMulti requires cell or object for expand_all",
3141 )),
3142 }
3143 } else {
3144 match (base, indices.len()) {
3145 (Value::Cell(ca), 1) => match &indices[0] {
3146 Value::Num(n) => {
3147 let idx = *n as usize;
3148 if idx == 0 || idx > ca.data.len() {
3149 return Err(mex(
3150 "CellIndexOutOfBounds",
3151 "Cell index out of bounds",
3152 ));
3153 }
3154 vec![(*ca.data[idx - 1]).clone()]
3155 }
3156 Value::Int(i) => {
3157 let idx = i.to_i64() as usize;
3158 if idx == 0 || idx > ca.data.len() {
3159 return Err(mex(
3160 "CellIndexOutOfBounds",
3161 "Cell index out of bounds",
3162 ));
3163 }
3164 vec![(*ca.data[idx - 1]).clone()]
3165 }
3166 Value::Tensor(t) => {
3167 let mut out: Vec<Value> = Vec::with_capacity(t.data.len());
3168 for &val in &t.data {
3169 let iu = val as usize;
3170 if iu == 0 || iu > ca.data.len() {
3171 return Err(mex(
3172 "CellIndexOutOfBounds",
3173 "Cell index out of bounds",
3174 ));
3175 }
3176 out.push((*ca.data[iu - 1]).clone());
3177 }
3178 out
3179 }
3180 _ => {
3181 return Err(mex(
3182 "CellIndexType",
3183 "Unsupported cell index type",
3184 ))
3185 }
3186 },
3187 (Value::Cell(ca), 2) => {
3188 let r: f64 = (&indices[0]).try_into()?;
3189 let c: f64 = (&indices[1]).try_into()?;
3190 let (ir, ic) = (r as usize, c as usize);
3191 if ir == 0 || ir > ca.rows || ic == 0 || ic > ca.cols {
3192 return Err(mex(
3193 "CellSubscriptOutOfBounds",
3194 "Cell subscript out of bounds",
3195 ));
3196 }
3197 vec![(*ca.data[(ir - 1) * ca.cols + (ic - 1)]).clone()]
3198 }
3199 (Value::Object(obj), _) => {
3200 let idx_vals: Vec<Value> = indices
3201 .iter()
3202 .map(|v| Value::Num((v).try_into().unwrap_or(0.0)))
3203 .collect();
3204 let cell =
3205 runmat_runtime::call_builtin("__make_cell", &idx_vals)?;
3206 let v = match runmat_runtime::call_builtin(
3207 "call_method",
3208 &[
3209 Value::Object(obj),
3210 Value::String("subsref".to_string()),
3211 Value::String("{}".to_string()),
3212 cell,
3213 ],
3214 ) {
3215 Ok(v) => v,
3216 Err(e) => vm_bail!(e),
3217 };
3218 vec![v]
3219 }
3220 _ => return Err(mex(
3221 "ExpandError",
3222 "CallBuiltinExpandMulti requires cell or object cell access",
3223 )),
3224 }
3225 };
3226 for v in expanded {
3227 temp.push(v);
3228 }
3229 } else {
3230 temp.push(
3231 stack
3232 .pop()
3233 .ok_or(mex("StackUnderflow", "stack underflow"))?,
3234 );
3235 }
3236 }
3237 temp.reverse();
3238 args.extend(temp.into_iter());
3239 match call_builtin_auto(&name, &args) {
3240 Ok(v) => stack.push(v),
3241 Err(e) => vm_bail!(e),
3242 }
3243 }
3244 Instr::PackToRow(count) => {
3245 let mut vals: Vec<f64> = Vec::with_capacity(count);
3247 let mut tmp: Vec<Value> = Vec::with_capacity(count);
3248 for _ in 0..count {
3249 tmp.push(
3250 stack
3251 .pop()
3252 .ok_or(mex("StackUnderflow", "stack underflow"))?,
3253 );
3254 }
3255 tmp.reverse();
3256 for v in tmp {
3257 let n: f64 = (&v).try_into()?;
3258 vals.push(n);
3259 }
3260 let tens = runmat_builtins::Tensor::new(vals, vec![1, count])
3261 .map_err(|e| format!("PackToRow: {e}"))?;
3262 stack.push(Value::Tensor(tens));
3263 }
3264 Instr::PackToCol(count) => {
3265 let mut vals: Vec<f64> = Vec::with_capacity(count);
3266 let mut tmp: Vec<Value> = Vec::with_capacity(count);
3267 for _ in 0..count {
3268 tmp.push(
3269 stack
3270 .pop()
3271 .ok_or(mex("StackUnderflow", "stack underflow"))?,
3272 );
3273 }
3274 tmp.reverse();
3275 for v in tmp {
3276 let n: f64 = (&v).try_into()?;
3277 vals.push(n);
3278 }
3279 let tens = runmat_builtins::Tensor::new(vals, vec![count, 1])
3280 .map_err(|e| format!("PackToCol: {e}"))?;
3281 stack.push(Value::Tensor(tens));
3282 }
3283 Instr::CallFunctionExpandMulti(name, specs) => {
3284 let mut temp: Vec<Value> = Vec::new();
3286 for spec in specs.iter().rev() {
3287 if spec.is_expand {
3288 let mut indices = Vec::with_capacity(spec.num_indices);
3289 for _ in 0..spec.num_indices {
3290 indices.push(
3291 stack
3292 .pop()
3293 .ok_or(mex("StackUnderflow", "stack underflow"))?,
3294 );
3295 }
3296 indices.reverse();
3297 let base = stack
3298 .pop()
3299 .ok_or(mex("StackUnderflow", "stack underflow"))?;
3300 let expanded = if spec.expand_all {
3301 match base {
3302 Value::Cell(ca) => ca.data.iter().map(|p| (*(*p)).clone()).collect::<Vec<Value>>(),
3303 Value::Object(obj) => {
3304 let empty = runmat_builtins::CellArray::new(vec![], 1, 0).map_err(|e| format!("subsref build error: {e}"))?;
3305 let v = match runmat_runtime::call_builtin("call_method", &[
3306 Value::Object(obj),
3307 Value::String("subsref".to_string()),
3308 Value::String("{}".to_string()),
3309 Value::Cell(empty),
3310 ]) { Ok(v) => v, Err(e) => vm_bail!(e) };
3311 match v { Value::Cell(ca) => ca.data.iter().map(|p| (*(*p)).clone()).collect::<Vec<Value>>(), other => vec![other] }
3312 }
3313 _ => return Err("CallFunctionExpandMulti requires cell or object for expand_all".to_string()),
3314 }
3315 } else {
3316 match (base, indices.len()) {
3317 (Value::Cell(ca), 1) => match &indices[0] {
3318 Value::Num(n) => {
3319 let idx = *n as usize;
3320 if idx == 0 || idx > ca.data.len() {
3321 return Err(mex(
3322 "CellIndexOutOfBounds",
3323 "Cell index out of bounds",
3324 ));
3325 }
3326 vec![(*ca.data[idx - 1]).clone()]
3327 }
3328 Value::Int(i) => {
3329 let idx = i.to_i64() as usize;
3330 if idx == 0 || idx > ca.data.len() {
3331 return Err(mex(
3332 "CellIndexOutOfBounds",
3333 "Cell index out of bounds",
3334 ));
3335 }
3336 vec![(*ca.data[idx - 1]).clone()]
3337 }
3338 Value::Tensor(t) => {
3339 let mut out: Vec<Value> = Vec::with_capacity(t.data.len());
3340 for &val in &t.data {
3341 let iu = val as usize;
3342 if iu == 0 || iu > ca.data.len() {
3343 return Err(mex(
3344 "CellIndexOutOfBounds",
3345 "Cell index out of bounds",
3346 ));
3347 }
3348 out.push((*ca.data[iu - 1]).clone());
3349 }
3350 out
3351 }
3352 _ => {
3353 return Err(mex(
3354 "CellIndexType",
3355 "Unsupported cell index type",
3356 ))
3357 }
3358 },
3359 (Value::Cell(ca), 2) => {
3360 let r: f64 = (&indices[0]).try_into()?;
3361 let c: f64 = (&indices[1]).try_into()?;
3362 let (ir, ic) = (r as usize, c as usize);
3363 if ir == 0 || ir > ca.rows || ic == 0 || ic > ca.cols {
3364 return Err(mex(
3365 "CellSubscriptOutOfBounds",
3366 "Cell subscript out of bounds",
3367 ));
3368 }
3369 vec![(*ca.data[(ir - 1) * ca.cols + (ic - 1)]).clone()]
3370 }
3371 (Value::Object(obj), _) => {
3372 let cell = runmat_builtins::CellArray::new(
3373 indices.clone(),
3374 1,
3375 indices.len(),
3376 )
3377 .map_err(|e| format!("subsref build error: {e}"))?;
3378 let v = match runmat_runtime::call_builtin(
3379 "call_method",
3380 &[
3381 Value::Object(obj),
3382 Value::String("subsref".to_string()),
3383 Value::String("{}".to_string()),
3384 Value::Cell(cell),
3385 ],
3386 ) {
3387 Ok(v) => v,
3388 Err(e) => vm_bail!(e),
3389 };
3390 vec![v]
3391 }
3392 _ => return Err(
3393 "CallFunctionExpandMulti requires cell or object cell access"
3394 .to_string(),
3395 ),
3396 }
3397 };
3398 for v in expanded {
3399 temp.push(v);
3400 }
3401 } else {
3402 temp.push(
3403 stack
3404 .pop()
3405 .ok_or(mex("StackUnderflow", "stack underflow"))?,
3406 );
3407 }
3408 }
3409 temp.reverse();
3410 let args = temp;
3411 let func: UserFunction = match bytecode.functions.get(&name) {
3412 Some(f) => f.clone(),
3413 None => vm_bail!(mex(
3414 "UndefinedFunction",
3415 &format!("Undefined function: {name}")
3416 )),
3417 };
3418 let var_map = runmat_hir::remapping::create_complete_function_var_map(
3419 &func.params,
3420 &func.outputs,
3421 &func.body,
3422 );
3423 let local_var_count = var_map.len();
3424 let remapped_body =
3425 runmat_hir::remapping::remap_function_body(&func.body, &var_map);
3426 let func_vars_count = local_var_count.max(func.params.len());
3427 let mut func_vars = vec![Value::Num(0.0); func_vars_count];
3428 for (i, _param_id) in func.params.iter().enumerate() {
3429 if i < args.len() && i < func_vars.len() {
3430 func_vars[i] = args[i].clone();
3431 }
3432 }
3433 for (original_var_id, local_var_id) in &var_map {
3434 let local_index = local_var_id.0;
3435 let global_index = original_var_id.0;
3436 if local_index < func_vars.len() && global_index < vars.len() {
3437 let is_parameter = func
3438 .params
3439 .iter()
3440 .any(|param_id| param_id == original_var_id);
3441 if !is_parameter {
3442 func_vars[local_index] = vars[global_index].clone();
3443 }
3444 }
3445 }
3446 let mut func_var_types = func.var_types.clone();
3447 if func_var_types.len() < local_var_count {
3448 func_var_types.resize(local_var_count, Type::Unknown);
3449 }
3450 let func_program = runmat_hir::HirProgram {
3451 body: remapped_body,
3452 var_types: func_var_types,
3453 };
3454 let func_bytecode =
3455 crate::compile_with_functions(&func_program, &bytecode.functions)?;
3456 for (k, v) in func_bytecode.functions.iter() {
3458 context.functions.insert(k.clone(), v.clone());
3459 }
3460 let func_result_vars = match interpret_function(&func_bytecode, func_vars) {
3461 Ok(v) => v,
3462 Err(e) => vm_bail!(e),
3463 };
3464 if let Some(output_var_id) = func.outputs.first() {
3465 let local_output_index = var_map.get(output_var_id).map(|id| id.0).unwrap_or(0);
3466 if local_output_index < func_result_vars.len() {
3467 stack.push(func_result_vars[local_output_index].clone());
3468 } else {
3469 stack.push(Value::Num(0.0));
3470 }
3471 } else {
3472 stack.push(Value::Num(0.0));
3473 }
3474 }
3475 Instr::CallFunction(name, arg_count) => {
3476 {
3478 let mut args = Vec::new();
3479 for _ in 0..arg_count {
3480 args.push(
3481 stack
3482 .pop()
3483 .ok_or(mex("StackUnderflow", "stack underflow"))?,
3484 );
3485 }
3486 args.reverse();
3487 let prepared_primary = accel_prepare_args(&name, &args)?;
3488 if let Ok(result) = runmat_runtime::call_builtin(&name, &prepared_primary) {
3489 stack.push(result);
3490 pc += 1;
3491 continue;
3492 }
3493 for v in prepared_primary.into_iter().rev() {
3495 stack.push(v);
3496 }
3497 }
3498 let func: UserFunction = match bytecode.functions.get(&name) {
3499 Some(f) => f.clone(),
3500 None => vm_bail!(mex(
3501 "UndefinedFunction",
3502 &format!("Undefined function: {name}")
3503 )),
3504 };
3505 let mut args = Vec::new();
3506 for _ in 0..arg_count {
3507 args.push(
3508 stack
3509 .pop()
3510 .ok_or(mex("StackUnderflow", "stack underflow"))?,
3511 );
3512 }
3513 args.reverse();
3514 if !func.has_varargin {
3515 if arg_count < func.params.len() {
3516 vm_bail!(mex(
3517 "NotEnoughInputs",
3518 &format!(
3519 "Function '{name}' expects {} inputs, got {arg_count}",
3520 func.params.len()
3521 )
3522 ));
3523 }
3524 if arg_count > func.params.len() {
3525 vm_bail!(mex(
3526 "TooManyInputs",
3527 &format!(
3528 "Function '{name}' expects {} inputs, got {arg_count}",
3529 func.params.len()
3530 )
3531 ));
3532 }
3533 } else {
3534 let min_args = func.params.len().saturating_sub(1);
3535 if arg_count < min_args {
3536 vm_bail!(mex(
3537 "NotEnoughInputs",
3538 &format!("Function '{name}' expects at least {min_args} inputs, got {arg_count}")
3539 ));
3540 }
3541 }
3542 let var_map = runmat_hir::remapping::create_complete_function_var_map(
3543 &func.params,
3544 &func.outputs,
3545 &func.body,
3546 );
3547 let local_var_count = var_map.len();
3548 let remapped_body =
3549 runmat_hir::remapping::remap_function_body(&func.body, &var_map);
3550 let func_vars_count = local_var_count.max(func.params.len());
3551 let mut func_vars = vec![Value::Num(0.0); func_vars_count];
3552 if func.has_varargin {
3553 let fixed = func.params.len().saturating_sub(1);
3555 for i in 0..fixed {
3556 if i < args.len() && i < func_vars.len() {
3557 func_vars[i] = args[i].clone();
3558 }
3559 }
3560 let mut rest: Vec<Value> = if args.len() > fixed {
3561 args[fixed..].to_vec()
3562 } else {
3563 Vec::new()
3564 };
3565 let cell = runmat_builtins::CellArray::new(
3567 std::mem::take(&mut rest),
3568 1,
3569 if args.len() > fixed {
3570 args.len() - fixed
3571 } else {
3572 0
3573 },
3574 )
3575 .map_err(|e| format!("varargin: {e}"))?;
3576 if fixed < func_vars.len() {
3577 func_vars[fixed] = Value::Cell(cell);
3578 }
3579 } else {
3580 for (i, _param_id) in func.params.iter().enumerate() {
3581 if i < args.len() && i < func_vars.len() {
3582 func_vars[i] = args[i].clone();
3583 }
3584 }
3585 }
3586 for (original_var_id, local_var_id) in &var_map {
3588 let local_index = local_var_id.0;
3589 let global_index = original_var_id.0;
3590 if local_index < func_vars.len() && global_index < vars.len() {
3591 let is_parameter = func
3592 .params
3593 .iter()
3594 .any(|param_id| param_id == original_var_id);
3595 if !is_parameter {
3596 func_vars[local_index] = vars[global_index].clone();
3597 }
3598 }
3599 }
3600 if func.has_varargout {
3602 if let Some(varargout_oid) = func.outputs.last() {
3603 if let Some(local_id) = var_map.get(varargout_oid) {
3604 if local_id.0 < func_vars.len() {
3605 let empty = runmat_builtins::CellArray::new(vec![], 1, 0)
3606 .map_err(|e| format!("varargout init: {e}"))?;
3607 func_vars[local_id.0] = Value::Cell(empty);
3608 }
3609 }
3610 }
3611 }
3612 let mut func_var_types = func.var_types.clone();
3613 if func_var_types.len() < local_var_count {
3614 func_var_types.resize(local_var_count, Type::Unknown);
3615 }
3616 let func_program = runmat_hir::HirProgram {
3617 body: remapped_body,
3618 var_types: func_var_types,
3619 };
3620 let func_bytecode =
3621 crate::compile_with_functions(&func_program, &bytecode.functions)?;
3622 let func_result_vars = match interpret_function_with_counts(
3623 &func_bytecode,
3624 func_vars,
3625 &name,
3626 1,
3627 arg_count,
3628 ) {
3629 Ok(v) => v,
3630 Err(e) => {
3631 if let Some((catch_pc, catch_var)) = try_stack.pop() {
3632 if let Some(var_idx) = catch_var {
3633 if var_idx >= vars.len() {
3634 vars.resize(var_idx + 1, Value::Num(0.0));
3635 refresh_workspace_state(&vars);
3636 }
3637 let mex = parse_exception(&e);
3638 last_exception = Some(mex.clone());
3639 vars[var_idx] = Value::MException(mex);
3640 }
3641 pc = catch_pc;
3642 continue;
3643 } else {
3644 vm_bail!(e);
3645 }
3646 }
3647 };
3648 if func.has_varargout {
3649 let first = func
3652 .outputs
3653 .first()
3654 .and_then(|oid| var_map.get(oid))
3655 .map(|lid| lid.0)
3656 .unwrap_or(0);
3657 if let Some(Value::Cell(ca)) = func_result_vars.get(first) {
3658 if !ca.data.is_empty() {
3659 stack.push((*ca.data[0]).clone());
3660 } else {
3661 stack.push(Value::Num(0.0));
3662 }
3663 } else if let Some(v) = func_result_vars.get(first) {
3664 stack.push(v.clone());
3665 } else {
3666 stack.push(Value::Num(0.0));
3667 }
3668 } else if let Some(output_var_id) = func.outputs.first() {
3669 let local_output_index = var_map.get(output_var_id).map(|id| id.0).unwrap_or(0);
3670 if local_output_index < func_result_vars.len() {
3671 stack.push(func_result_vars[local_output_index].clone());
3672 } else {
3673 stack.push(Value::Num(0.0));
3674 }
3675 } else {
3676 vm_bail!(mex(
3677 "TooManyOutputs",
3678 &format!("Function '{name}' does not return outputs")
3679 ));
3680 }
3681 }
3682 Instr::CallFunctionExpandAt(name, before_count, num_indices, after_count) => {
3683 let mut after: Vec<Value> = Vec::with_capacity(after_count);
3685 for _ in 0..after_count {
3686 after.push(
3687 stack
3688 .pop()
3689 .ok_or(mex("StackUnderflow", "stack underflow"))?,
3690 );
3691 }
3692 after.reverse();
3693 let mut indices = Vec::with_capacity(num_indices);
3694 for _ in 0..num_indices {
3695 indices.push(
3696 stack
3697 .pop()
3698 .ok_or(mex("StackUnderflow", "stack underflow"))?,
3699 );
3700 }
3701 indices.reverse();
3702 let base = stack
3703 .pop()
3704 .ok_or(mex("StackUnderflow", "stack underflow"))?;
3705 let mut before: Vec<Value> = Vec::with_capacity(before_count);
3706 for _ in 0..before_count {
3707 before.push(
3708 stack
3709 .pop()
3710 .ok_or(mex("StackUnderflow", "stack underflow"))?,
3711 );
3712 }
3713 before.reverse();
3714 let expanded = match (base, indices.len()) {
3715 (Value::Cell(ca), 1) => match &indices[0] {
3716 Value::Num(n) => {
3717 let idx = *n as usize;
3718 if idx == 0 || idx > ca.data.len() {
3719 return Err(mex(
3720 "CellIndexOutOfBounds",
3721 "Cell index out of bounds",
3722 ));
3723 }
3724 vec![(*ca.data[idx - 1]).clone()]
3725 }
3726 Value::Int(i) => {
3727 let idx = i.to_i64() as usize;
3728 if idx == 0 || idx > ca.data.len() {
3729 return Err(mex(
3730 "CellIndexOutOfBounds",
3731 "Cell index out of bounds",
3732 ));
3733 }
3734 vec![(*ca.data[idx - 1]).clone()]
3735 }
3736 Value::Tensor(t) => {
3737 let mut out: Vec<Value> = Vec::with_capacity(t.data.len());
3738 for &val in &t.data {
3739 let iu = val as usize;
3740 if iu == 0 || iu > ca.data.len() {
3741 return Err(mex(
3742 "CellIndexOutOfBounds",
3743 "Cell index out of bounds",
3744 ));
3745 }
3746 out.push((*ca.data[iu - 1]).clone());
3747 }
3748 out
3749 }
3750 _ => return Err(mex("CellIndexType", "Unsupported cell index type")),
3751 },
3752 (Value::Cell(ca), 2) => {
3753 let r: f64 = (&indices[0]).try_into()?;
3754 let c: f64 = (&indices[1]).try_into()?;
3755 let (ir, ic) = (r as usize, c as usize);
3756 if ir == 0 || ir > ca.rows || ic == 0 || ic > ca.cols {
3757 return Err(mex(
3758 "CellSubscriptOutOfBounds",
3759 "Cell subscript out of bounds",
3760 ));
3761 }
3762 vec![(*ca.data[(ir - 1) * ca.cols + (ic - 1)]).clone()]
3763 }
3764 (Value::Object(obj), _) => {
3765 let idx_vals: Vec<Value> = indices
3766 .iter()
3767 .map(|v| Value::Num((v).try_into().unwrap_or(0.0)))
3768 .collect();
3769 let cell = runmat_runtime::call_builtin("__make_cell", &idx_vals)?;
3770 let v = match runmat_runtime::call_builtin(
3771 "call_method",
3772 &[
3773 Value::Object(obj),
3774 Value::String("subsref".to_string()),
3775 Value::String("{}".to_string()),
3776 cell,
3777 ],
3778 ) {
3779 Ok(v) => v,
3780 Err(e) => vm_bail!(e),
3781 };
3782 vec![v]
3783 }
3784 _ => {
3785 return Err(mex(
3786 "ExpandError",
3787 "CallBuiltinExpandAt requires cell or object cell access",
3788 ))
3789 }
3790 };
3791 let mut args = before;
3792 args.extend(expanded.into_iter());
3793 args.extend(after.into_iter());
3794 match call_builtin(&name, &args) {
3795 Ok(v) => stack.push(v),
3796 Err(e) => vm_bail!(e),
3797 }
3798 }
3799 Instr::CallFunctionMulti(name, arg_count, out_count) => {
3800 let func: UserFunction = match bytecode.functions.get(&name) {
3801 Some(f) => f.clone(),
3802 None => vm_bail!(format!("undefined function: {name}")),
3803 };
3804 let mut args = Vec::new();
3805 for _ in 0..arg_count {
3806 args.push(
3807 stack
3808 .pop()
3809 .ok_or(mex("StackUnderflow", "stack underflow"))?,
3810 );
3811 }
3812 args.reverse();
3813 if !func.has_varargin {
3814 if arg_count < func.params.len() {
3815 vm_bail!(mex(
3816 "NotEnoughInputs",
3817 &format!(
3818 "Function '{name}' expects {} inputs, got {arg_count}",
3819 func.params.len()
3820 )
3821 ));
3822 }
3823 if arg_count > func.params.len() {
3824 vm_bail!(mex(
3825 "TooManyInputs",
3826 &format!(
3827 "Function '{name}' expects {} inputs, got {arg_count}",
3828 func.params.len()
3829 )
3830 ));
3831 }
3832 } else if arg_count + 1 < func.params.len() {
3833 vm_bail!(mex(
3834 "NotEnoughInputs",
3835 &format!(
3836 "Function '{name}' expects at least {} inputs, got {arg_count}",
3837 func.params.len() - 1
3838 )
3839 ));
3840 }
3841 let var_map = runmat_hir::remapping::create_complete_function_var_map(
3842 &func.params,
3843 &func.outputs,
3844 &func.body,
3845 );
3846 let local_var_count = var_map.len();
3847 let remapped_body =
3848 runmat_hir::remapping::remap_function_body(&func.body, &var_map);
3849 let func_vars_count = local_var_count.max(func.params.len());
3850 let mut func_vars = vec![Value::Num(0.0); func_vars_count];
3851 if func.has_varargin {
3852 let fixed = func.params.len().saturating_sub(1);
3853 for i in 0..fixed {
3854 if i < args.len() && i < func_vars.len() {
3855 func_vars[i] = args[i].clone();
3856 }
3857 }
3858 let mut rest: Vec<Value> = if args.len() > fixed {
3859 args[fixed..].to_vec()
3860 } else {
3861 Vec::new()
3862 };
3863 let cell = runmat_builtins::CellArray::new(
3864 std::mem::take(&mut rest),
3865 1,
3866 if args.len() > fixed {
3867 args.len() - fixed
3868 } else {
3869 0
3870 },
3871 )
3872 .map_err(|e| format!("varargin: {e}"))?;
3873 if fixed < func_vars.len() {
3874 func_vars[fixed] = Value::Cell(cell);
3875 }
3876 } else {
3877 for (i, _param_id) in func.params.iter().enumerate() {
3878 if i < args.len() && i < func_vars.len() {
3879 func_vars[i] = args[i].clone();
3880 }
3881 }
3882 }
3883 for (original_var_id, local_var_id) in &var_map {
3884 let local_index = local_var_id.0;
3885 let global_index = original_var_id.0;
3886 if local_index < func_vars.len() && global_index < vars.len() {
3887 let is_parameter = func
3888 .params
3889 .iter()
3890 .any(|param_id| param_id == original_var_id);
3891 if !is_parameter {
3892 func_vars[local_index] = vars[global_index].clone();
3893 }
3894 }
3895 }
3896 if func.has_varargout {
3898 if let Some(varargout_oid) = func.outputs.last() {
3899 if let Some(local_id) = var_map.get(varargout_oid) {
3900 if local_id.0 < func_vars.len() {
3901 let empty = runmat_builtins::CellArray::new(vec![], 1, 0)
3902 .map_err(|e| format!("varargout init: {e}"))?;
3903 func_vars[local_id.0] = Value::Cell(empty);
3904 }
3905 }
3906 }
3907 }
3908 let mut func_var_types = func.var_types.clone();
3909 if func_var_types.len() < local_var_count {
3910 func_var_types.resize(local_var_count, Type::Unknown);
3911 }
3912 let func_program = runmat_hir::HirProgram {
3913 body: remapped_body,
3914 var_types: func_var_types,
3915 };
3916 let func_bytecode =
3917 crate::compile_with_functions(&func_program, &bytecode.functions)?;
3918 let func_result_vars = match interpret_function_with_counts(
3919 &func_bytecode,
3920 func_vars,
3921 &name,
3922 out_count,
3923 arg_count,
3924 ) {
3925 Ok(v) => v,
3926 Err(e) => {
3927 if let Some((catch_pc, catch_var)) = try_stack.pop() {
3928 if let Some(var_idx) = catch_var {
3929 if var_idx >= vars.len() {
3930 vars.resize(var_idx + 1, Value::Num(0.0));
3931 refresh_workspace_state(&vars);
3932 }
3933 let mex = parse_exception(&e);
3934 last_exception = Some(mex.clone());
3935 vars[var_idx] = Value::MException(mex);
3936 }
3937 pc = catch_pc;
3938 continue;
3939 } else {
3940 vm_bail!(e);
3941 }
3942 }
3943 };
3944 if func.has_varargout {
3945 let total_named = func.outputs.len().saturating_sub(1);
3947 let mut pushed = 0usize;
3948 for i in 0..total_named.min(out_count) {
3950 if let Some(oid) = func.outputs.get(i) {
3951 if let Some(local_id) = var_map.get(oid) {
3952 let idx = local_id.0;
3953 let v = func_result_vars
3954 .get(idx)
3955 .cloned()
3956 .unwrap_or(Value::Num(0.0));
3957 stack.push(v);
3958 pushed += 1;
3959 }
3960 }
3961 }
3962 if pushed < out_count {
3963 if let Some(varargout_oid) = func.outputs.last() {
3965 if let Some(local_id) = var_map.get(varargout_oid) {
3966 if let Some(Value::Cell(ca)) = func_result_vars.get(local_id.0) {
3967 let available = ca.data.len();
3968 let need = out_count - pushed;
3969 if need > available {
3970 vm_bail!(mex("VarargoutMismatch", &format!("Function '{name}' returned {available} varargout values, {need} requested")));
3971 }
3972 for vi in 0..need {
3973 stack.push((*ca.data[vi]).clone());
3974 }
3975 }
3976 }
3977 }
3978 }
3979 } else {
3981 let defined = func.outputs.len();
3983 if out_count > defined {
3984 vm_bail!(mex(
3985 "TooManyOutputs",
3986 &format!("Function '{name}' defines {defined} outputs, {out_count} requested")
3987 ));
3988 }
3989 for i in 0..out_count {
3990 let v = func
3991 .outputs
3992 .get(i)
3993 .and_then(|oid| var_map.get(oid))
3994 .map(|lid| lid.0)
3995 .and_then(|idx| func_result_vars.get(idx))
3996 .cloned()
3997 .unwrap_or(Value::Num(0.0));
3998 stack.push(v);
3999 }
4000 }
4001 }
4002 Instr::CallBuiltinMulti(name, arg_count, out_count) => {
4003 let mut args = Vec::new();
4005 for _ in 0..arg_count {
4006 args.push(
4007 stack
4008 .pop()
4009 .ok_or(mex("StackUnderflow", "stack underflow"))?,
4010 );
4011 }
4012 args.reverse();
4013 if name == "gather" {
4014 let eval = match runmat_runtime::builtins::acceleration::gpu::gather::evaluate(
4015 &args,
4016 ) {
4017 Ok(eval) => eval,
4018 Err(err) => vm_bail!(err),
4019 };
4020 let len = eval.len();
4021 if out_count == 0 {
4022 continue;
4023 }
4024 if len == 1 {
4025 if out_count > 1 {
4026 vm_bail!(mex("TooManyOutputs", "gather: too many output arguments"));
4027 }
4028 stack.push(eval.into_first());
4029 continue;
4030 }
4031 if out_count != len {
4032 vm_bail!(mex(
4033 "TooManyOutputs",
4034 "gather: number of outputs must match number of inputs"
4035 ));
4036 }
4037 for value in eval.into_outputs() {
4038 stack.push(value);
4039 }
4040 continue;
4041 }
4042 if name == "meshgrid" {
4043 let eval = match runmat_runtime::builtins::array::creation::meshgrid::evaluate(
4044 &args,
4045 ) {
4046 Ok(eval) => eval,
4047 Err(err) => vm_bail!(err),
4048 };
4049 if out_count == 0 {
4050 continue;
4051 }
4052 let available = eval.output_count();
4053 if out_count > available {
4054 let msg = if available == 2 {
4055 "meshgrid with two inputs supports at most two outputs"
4056 } else {
4057 "meshgrid supports at most three outputs"
4058 };
4059 vm_bail!(mex("TooManyOutputs", msg));
4060 }
4061 let first = match eval.first() {
4062 Ok(value) => value,
4063 Err(err) => vm_bail!(err),
4064 };
4065 stack.push(first);
4066 if out_count >= 2 {
4067 let second = match eval.second() {
4068 Ok(value) => value,
4069 Err(err) => vm_bail!(err),
4070 };
4071 stack.push(second);
4072 }
4073 if out_count >= 3 {
4074 let third = match eval.third() {
4075 Ok(value) => value,
4076 Err(err) => vm_bail!(err),
4077 };
4078 stack.push(third);
4079 }
4080 continue;
4081 }
4082 if name == "load" {
4083 let eval = match runmat_runtime::builtins::io::mat::load::evaluate(&args) {
4084 Ok(eval) => eval,
4085 Err(err) => vm_bail!(err),
4086 };
4087 if out_count == 0 {
4088 if let Err(err) = assign_loaded_variables(&mut vars, eval.variables()) {
4089 vm_bail!(err);
4090 }
4091 continue;
4092 }
4093 if out_count > 1 {
4094 vm_bail!(mex(
4095 "TooManyOutputs",
4096 "load supports at most one output argument"
4097 ));
4098 }
4099 stack.push(eval.first_output());
4100 for _ in 1..out_count {
4101 stack.push(Value::Num(0.0));
4102 }
4103 continue;
4104 }
4105 if name == "fopen" {
4106 let eval = match runmat_runtime::builtins::io::filetext::fopen::evaluate(&args)
4107 {
4108 Ok(eval) => eval,
4109 Err(err) => vm_bail!(err),
4110 };
4111 if out_count == 0 {
4112 continue;
4113 }
4114 let outputs = eval.outputs();
4115 for i in 0..out_count {
4116 if let Some(value) = outputs.get(i) {
4117 stack.push(value.clone());
4118 } else {
4119 stack.push(Value::Num(0.0));
4120 }
4121 }
4122 continue;
4123 }
4124 if name == "fgets" {
4125 if args.is_empty() {
4126 vm_bail!(mex(
4127 "RuntimeError",
4128 "fgets requires at least one input argument"
4129 ));
4130 }
4131 let eval = match runmat_runtime::builtins::io::filetext::fgets::evaluate(
4132 &args[0],
4133 &args[1..],
4134 ) {
4135 Ok(eval) => eval,
4136 Err(err) => vm_bail!(err),
4137 };
4138 if out_count == 0 {
4139 continue;
4140 }
4141 let outputs = eval.outputs();
4142 for i in 0..out_count {
4143 if let Some(value) = outputs.get(i) {
4144 stack.push(value.clone());
4145 } else {
4146 stack.push(Value::Num(0.0));
4147 }
4148 }
4149 continue;
4150 }
4151 if name == "fclose" {
4152 let eval = match runmat_runtime::builtins::io::filetext::fclose::evaluate(&args)
4153 {
4154 Ok(eval) => eval,
4155 Err(err) => vm_bail!(err),
4156 };
4157 if out_count == 0 {
4158 continue;
4159 }
4160 let outputs = eval.outputs();
4161 for i in 0..out_count {
4162 if let Some(value) = outputs.get(i) {
4163 stack.push(value.clone());
4164 } else {
4165 stack.push(Value::Num(0.0));
4166 }
4167 }
4168 continue;
4169 }
4170 if name == "mkdir" {
4171 let eval = match runmat_runtime::builtins::io::repl_fs::mkdir::evaluate(&args) {
4172 Ok(eval) => eval,
4173 Err(err) => vm_bail!(err),
4174 };
4175 if out_count == 0 {
4176 continue;
4177 }
4178 let outputs = eval.outputs();
4179 for i in 0..out_count {
4180 if let Some(value) = outputs.get(i) {
4181 stack.push(value.clone());
4182 } else {
4183 stack.push(Value::Num(0.0));
4184 }
4185 }
4186 continue;
4187 }
4188 if name == "setenv" {
4189 let eval = match runmat_runtime::builtins::io::repl_fs::setenv::evaluate(&args)
4190 {
4191 Ok(eval) => eval,
4192 Err(err) => vm_bail!(err),
4193 };
4194 if out_count == 0 {
4195 continue;
4196 }
4197 let outputs = eval.outputs();
4198 for i in 0..out_count {
4199 if let Some(value) = outputs.get(i) {
4200 stack.push(value.clone());
4201 } else {
4202 stack.push(Value::Num(0.0));
4203 }
4204 }
4205 continue;
4206 }
4207 if name == "savepath" {
4208 let eval =
4209 match runmat_runtime::builtins::io::repl_fs::savepath::evaluate(&args) {
4210 Ok(eval) => eval,
4211 Err(err) => vm_bail!(err),
4212 };
4213 if out_count == 0 {
4214 continue;
4215 }
4216 let outputs = eval.outputs();
4217 for i in 0..out_count {
4218 if let Some(value) = outputs.get(i) {
4219 stack.push(value.clone());
4220 } else {
4221 stack.push(Value::Num(0.0));
4222 }
4223 }
4224 continue;
4225 }
4226 if name == "copyfile" {
4227 let eval =
4228 match runmat_runtime::builtins::io::repl_fs::copyfile::evaluate(&args) {
4229 Ok(eval) => eval,
4230 Err(err) => vm_bail!(err),
4231 };
4232 if out_count == 0 {
4233 continue;
4234 }
4235 let outputs = eval.outputs();
4236 for i in 0..out_count {
4237 if let Some(value) = outputs.get(i) {
4238 stack.push(value.clone());
4239 } else {
4240 stack.push(Value::Num(0.0));
4241 }
4242 }
4243 continue;
4244 }
4245 if name == "movefile" {
4246 let eval =
4247 match runmat_runtime::builtins::io::repl_fs::movefile::evaluate(&args) {
4248 Ok(eval) => eval,
4249 Err(err) => vm_bail!(err),
4250 };
4251 if out_count == 0 {
4252 continue;
4253 }
4254 let outputs = eval.outputs();
4255 for i in 0..out_count {
4256 if let Some(value) = outputs.get(i) {
4257 stack.push(value.clone());
4258 } else {
4259 stack.push(Value::Num(0.0));
4260 }
4261 }
4262 continue;
4263 }
4264 if name == "rmdir" {
4265 let eval = match runmat_runtime::builtins::io::repl_fs::rmdir::evaluate(&args) {
4266 Ok(eval) => eval,
4267 Err(err) => vm_bail!(err),
4268 };
4269 if out_count == 0 {
4270 continue;
4271 }
4272 let outputs = eval.outputs();
4273 for i in 0..out_count {
4274 if let Some(value) = outputs.get(i) {
4275 stack.push(value.clone());
4276 } else {
4277 stack.push(Value::Num(0.0));
4278 }
4279 }
4280 continue;
4281 }
4282 if name == "orderfields" && !args.is_empty() {
4283 let eval = match runmat_runtime::builtins::structs::core::orderfields::evaluate(
4284 args[0].clone(),
4285 &args[1..],
4286 ) {
4287 Ok(eval) => eval,
4288 Err(err) => vm_bail!(err),
4289 };
4290 if out_count == 0 {
4291 continue;
4292 }
4293 let (ordered, permutation) = eval.into_values();
4294 stack.push(ordered);
4295 if out_count >= 2 {
4296 stack.push(permutation);
4297 }
4298 if out_count > 2 {
4299 for _ in 2..out_count {
4300 stack.push(Value::Num(0.0));
4301 }
4302 }
4303 continue;
4304 }
4305 if name == "chol" {
4306 if args.is_empty() {
4307 vm_bail!(mex("NotEnoughInputs", "chol requires an input matrix"));
4308 }
4309 let eval = match runmat_runtime::builtins::math::linalg::factor::chol::evaluate(
4310 args[0].clone(),
4311 &args[1..],
4312 ) {
4313 Ok(v) => v,
4314 Err(err) => vm_bail!(err),
4315 };
4316 match out_count {
4317 0 => continue,
4318 1 => {
4319 if !eval.is_positive_definite() {
4320 vm_bail!("Matrix must be positive definite.".to_string());
4321 }
4322 stack.push(eval.factor());
4323 continue;
4324 }
4325 2 => {
4326 stack.push(eval.factor());
4327 stack.push(eval.flag());
4328 continue;
4329 }
4330 _ => vm_bail!(mex(
4331 "TooManyOutputs",
4332 "chol currently supports at most two outputs"
4333 )),
4334 }
4335 }
4336 if name == "lu" {
4337 if args.is_empty() {
4338 vm_bail!(mex("NotEnoughInputs", "lu requires an input matrix"));
4339 }
4340 let eval = match runmat_runtime::builtins::math::linalg::factor::lu::evaluate(
4341 args[0].clone(),
4342 &args[1..],
4343 ) {
4344 Ok(v) => v,
4345 Err(err) => vm_bail!(err),
4346 };
4347 match out_count {
4348 0 => continue,
4349 1 => {
4350 stack.push(eval.combined());
4351 continue;
4352 }
4353 2 => {
4354 stack.push(eval.lower());
4355 stack.push(eval.upper());
4356 continue;
4357 }
4358 3 => {
4359 stack.push(eval.lower());
4360 stack.push(eval.upper());
4361 stack.push(eval.permutation());
4362 continue;
4363 }
4364 _ => vm_bail!(mex(
4365 "TooManyOutputs",
4366 "lu currently supports at most three outputs"
4367 )),
4368 }
4369 }
4370 if name == "linsolve" {
4371 if args.len() < 2 {
4372 vm_bail!(mex(
4373 "NotEnoughInputs",
4374 "linsolve requires coefficient and right-hand side inputs"
4375 ));
4376 }
4377 let eval =
4378 match runmat_runtime::builtins::math::linalg::solve::linsolve::evaluate_args(
4379 args[0].clone(),
4380 args[1].clone(),
4381 &args[2..],
4382 ) {
4383 Ok(v) => v,
4384 Err(err) => vm_bail!(err),
4385 };
4386 match out_count {
4387 0 => continue,
4388 1 => {
4389 stack.push(eval.solution());
4390 continue;
4391 }
4392 2 => {
4393 stack.push(eval.solution());
4394 stack.push(eval.reciprocal_condition());
4395 continue;
4396 }
4397 _ => vm_bail!(mex(
4398 "TooManyOutputs",
4399 "linsolve currently supports at most two outputs"
4400 )),
4401 }
4402 }
4403 if name == "qr" {
4404 if args.is_empty() {
4405 vm_bail!(mex("NotEnoughInputs", "qr requires an input matrix"));
4406 }
4407 let eval = match runmat_runtime::builtins::math::linalg::factor::qr::evaluate(
4408 args[0].clone(),
4409 &args[1..],
4410 ) {
4411 Ok(v) => v,
4412 Err(err) => vm_bail!(err),
4413 };
4414 match out_count {
4415 0 => {
4416 pc += 1;
4417 continue;
4418 }
4419 1 => {
4420 stack.push(eval.r());
4421 pc += 1;
4422 continue;
4423 }
4424 2 => {
4425 stack.push(eval.q());
4426 stack.push(eval.r());
4427 pc += 1;
4428 continue;
4429 }
4430 3 => {
4431 stack.push(eval.q());
4432 stack.push(eval.r());
4433 stack.push(eval.permutation());
4434 pc += 1;
4435 continue;
4436 }
4437 _ => vm_bail!(mex(
4438 "TooManyOutputs",
4439 "qr currently supports at most three outputs"
4440 )),
4441 }
4442 }
4443 if name == "svd" {
4444 if args.is_empty() {
4445 vm_bail!(mex("NotEnoughInputs", "svd requires an input matrix"));
4446 }
4447 let eval = match runmat_runtime::builtins::math::linalg::factor::svd::evaluate(
4448 args[0].clone(),
4449 &args[1..],
4450 ) {
4451 Ok(v) => v,
4452 Err(err) => vm_bail!(err),
4453 };
4454 match out_count {
4455 0 => continue,
4456 1 => {
4457 stack.push(eval.singular_values());
4458 continue;
4459 }
4460 2 => {
4461 stack.push(eval.u());
4462 stack.push(eval.sigma());
4463 continue;
4464 }
4465 3 => {
4466 stack.push(eval.u());
4467 stack.push(eval.sigma());
4468 stack.push(eval.v());
4469 continue;
4470 }
4471 _ => vm_bail!(mex(
4472 "TooManyOutputs",
4473 "svd currently supports at most three outputs"
4474 )),
4475 }
4476 }
4477 if name == "eig" {
4478 if args.is_empty() {
4479 vm_bail!(mex("NotEnoughInputs", "eig requires an input matrix"));
4480 }
4481 let require_left = out_count >= 3;
4482 let eval = match runmat_runtime::builtins::math::linalg::factor::eig::evaluate(
4483 args[0].clone(),
4484 &args[1..],
4485 require_left,
4486 ) {
4487 Ok(v) => v,
4488 Err(err) => vm_bail!(err),
4489 };
4490 match out_count {
4491 0 => continue,
4492 1 => {
4493 stack.push(eval.eigenvalues());
4494 continue;
4495 }
4496 2 => {
4497 stack.push(eval.right());
4498 stack.push(eval.diagonal());
4499 continue;
4500 }
4501 3 => {
4502 stack.push(eval.right());
4503 stack.push(eval.diagonal());
4504 let left = match eval.left() {
4505 Ok(value) => value,
4506 Err(err) => vm_bail!(err),
4507 };
4508 stack.push(left);
4509 continue;
4510 }
4511 _ => vm_bail!(mex(
4512 "TooManyOutputs",
4513 "eig currently supports at most three outputs"
4514 )),
4515 }
4516 }
4517 if name == "find" && !args.is_empty() {
4519 let eval = match runmat_runtime::builtins::array::indexing::find::evaluate(
4520 args[0].clone(),
4521 &args[1..],
4522 ) {
4523 Ok(eval) => eval,
4524 Err(err) => vm_bail!(err),
4525 };
4526 if out_count == 0 {
4527 continue;
4528 }
4529 if out_count <= 1 {
4530 let linear = match eval.linear_value() {
4531 Ok(v) => v,
4532 Err(err) => vm_bail!(err),
4533 };
4534 stack.push(linear);
4535 for _ in 1..out_count {
4536 stack.push(Value::Num(0.0));
4537 }
4538 } else {
4539 let rows = match eval.row_value() {
4540 Ok(v) => v,
4541 Err(err) => vm_bail!(err),
4542 };
4543 stack.push(rows);
4544 let cols = match eval.column_value() {
4545 Ok(v) => v,
4546 Err(err) => vm_bail!(err),
4547 };
4548 stack.push(cols);
4549 if out_count >= 3 {
4550 let vals = match eval.values_value() {
4551 Ok(v) => v,
4552 Err(err) => vm_bail!(err),
4553 };
4554 stack.push(vals);
4555 }
4556 if out_count > 3 {
4557 for _ in 3..out_count {
4558 stack.push(Value::Num(0.0));
4559 }
4560 }
4561 }
4562 continue;
4563 }
4564 if name == "regexp" && args.len() >= 2 {
4565 let eval = match runmat_runtime::builtins::strings::regex::regexp::evaluate(
4566 args[0].clone(),
4567 args[1].clone(),
4568 &args[2..],
4569 ) {
4570 Ok(eval) => eval,
4571 Err(err) => vm_bail!(err),
4572 };
4573 let mut values = match eval.outputs_for_multi() {
4574 Ok(values) => values,
4575 Err(err) => vm_bail!(err),
4576 };
4577 if out_count == 0 {
4578 continue;
4579 }
4580 for _ in 0..out_count {
4581 if !values.is_empty() {
4582 stack.push(values.remove(0));
4583 } else {
4584 stack.push(Value::Num(0.0));
4585 }
4586 }
4587 continue;
4588 }
4589 if name == "deconv" {
4590 if args.len() < 2 {
4591 vm_bail!(mex("MATLAB:minrhs", "Not enough input arguments."));
4592 }
4593 let eval = match runmat_runtime::builtins::math::signal::deconv::evaluate(
4594 args[0].clone(),
4595 args[1].clone(),
4596 ) {
4597 Ok(eval) => eval,
4598 Err(err) => vm_bail!(err),
4599 };
4600 if out_count == 0 {
4601 continue;
4602 }
4603 stack.push(eval.quotient());
4604 if out_count >= 2 {
4605 stack.push(eval.remainder());
4606 }
4607 if out_count > 2 {
4608 for _ in 2..out_count {
4609 stack.push(Value::Num(0.0));
4610 }
4611 }
4612 continue;
4613 }
4614 if name == "polyder" {
4615 if args.is_empty() {
4616 vm_bail!(mex("MATLAB:minrhs", "Not enough input arguments."));
4617 }
4618 if out_count <= 1 {
4619 let result = match args.len() {
4620 1 => runmat_runtime::builtins::math::poly::polyder::derivative_single(
4621 args[0].clone(),
4622 ),
4623 2 => runmat_runtime::builtins::math::poly::polyder::derivative_product(
4624 args[0].clone(),
4625 args[1].clone(),
4626 ),
4627 _ => vm_bail!("polyder: too many input arguments.".to_string()),
4628 };
4629 match result {
4630 Ok(value) => {
4631 if out_count == 0 {
4632 continue;
4633 }
4634 stack.push(value);
4635 }
4636 Err(err) => vm_bail!(err),
4637 }
4638 if out_count > 1 {
4639 for _ in 1..out_count {
4640 stack.push(Value::Num(0.0));
4641 }
4642 }
4643 continue;
4644 }
4645 if args.len() != 2 {
4646 vm_bail!(mex(
4647 "MATLAB:minrhs",
4648 "Not enough input arguments for quotient form."
4649 ));
4650 }
4651 let eval =
4652 match runmat_runtime::builtins::math::poly::polyder::evaluate_quotient(
4653 args[0].clone(),
4654 args[1].clone(),
4655 ) {
4656 Ok(eval) => eval,
4657 Err(err) => vm_bail!(err),
4658 };
4659 stack.push(eval.numerator());
4660 stack.push(eval.denominator());
4661 if out_count > 2 {
4662 for _ in 2..out_count {
4663 stack.push(Value::Num(0.0));
4664 }
4665 }
4666 continue;
4667 }
4668 if name == "polyval" {
4669 if args.len() < 2 {
4670 vm_bail!(mex("MATLAB:minrhs", "Not enough input arguments."));
4671 }
4672 let eval = match runmat_runtime::builtins::math::poly::polyval::evaluate(
4673 args[0].clone(),
4674 args[1].clone(),
4675 &args[2..],
4676 out_count >= 2,
4677 ) {
4678 Ok(eval) => eval,
4679 Err(err) => vm_bail!(err),
4680 };
4681 if out_count == 0 {
4682 continue;
4683 }
4684 stack.push(eval.value());
4685 if out_count >= 2 {
4686 let delta = match eval.delta() {
4687 Ok(v) => v,
4688 Err(err) => vm_bail!(err),
4689 };
4690 stack.push(delta);
4691 }
4692 if out_count > 2 {
4693 for _ in 2..out_count {
4694 stack.push(Value::Num(0.0));
4695 }
4696 }
4697 continue;
4698 }
4699 if name == "polyfit" {
4700 if args.len() < 3 {
4701 vm_bail!(mex("MATLAB:minrhs", "Not enough input arguments."));
4702 }
4703 let eval = match runmat_runtime::builtins::math::poly::polyfit::evaluate(
4704 args[0].clone(),
4705 args[1].clone(),
4706 args[2].clone(),
4707 &args[3..],
4708 ) {
4709 Ok(eval) => eval,
4710 Err(err) => vm_bail!(err),
4711 };
4712 if out_count == 0 {
4713 continue;
4714 }
4715 stack.push(eval.coefficients());
4716 if out_count >= 2 {
4717 stack.push(eval.stats());
4718 }
4719 if out_count >= 3 {
4720 stack.push(eval.mu());
4721 }
4722 if out_count > 3 {
4723 for _ in 3..out_count {
4724 stack.push(Value::Num(0.0));
4725 }
4726 }
4727 continue;
4728 }
4729 if name == "filter" {
4730 if args.len() < 3 {
4731 vm_bail!(mex("MATLAB:minrhs", "Not enough input arguments."));
4732 }
4733 let eval = match runmat_runtime::builtins::math::signal::filter::evaluate(
4734 args[0].clone(),
4735 args[1].clone(),
4736 args[2].clone(),
4737 &args[3..],
4738 ) {
4739 Ok(eval) => eval,
4740 Err(err) => vm_bail!(err),
4741 };
4742 if out_count == 0 {
4743 continue;
4744 }
4745 if out_count == 1 {
4746 stack.push(eval.into_value());
4747 } else {
4748 let (output, final_state) = eval.into_pair();
4749 stack.push(output);
4750 stack.push(final_state);
4751 if out_count > 2 {
4752 for _ in 2..out_count {
4753 stack.push(Value::Num(0.0));
4754 }
4755 }
4756 }
4757 continue;
4758 }
4759 if name == "sort" && !args.is_empty() {
4760 let eval = match runmat_runtime::builtins::array::sorting_sets::sort::evaluate(
4761 args[0].clone(),
4762 &args[1..],
4763 ) {
4764 Ok(eval) => eval,
4765 Err(err) => vm_bail!(err),
4766 };
4767 if out_count == 0 {
4768 continue;
4769 }
4770 let (sorted, indices) = eval.into_values();
4771 stack.push(sorted);
4772 if out_count >= 2 {
4773 stack.push(indices);
4774 }
4775 if out_count > 2 {
4776 for _ in 2..out_count {
4777 stack.push(Value::Num(0.0));
4778 }
4779 }
4780 continue;
4781 }
4782 if name == "cummin" && !args.is_empty() {
4783 let eval = match runmat_runtime::builtins::math::reduction::evaluate_cummin(
4784 args[0].clone(),
4785 &args[1..],
4786 ) {
4787 Ok(eval) => eval,
4788 Err(err) => vm_bail!(err),
4789 };
4790 if out_count == 0 {
4791 continue;
4792 }
4793 let (values, indices) = eval.into_pair();
4794 stack.push(values);
4795 if out_count >= 2 {
4796 stack.push(indices);
4797 }
4798 if out_count > 2 {
4799 for _ in 2..out_count {
4800 stack.push(Value::Num(0.0));
4801 }
4802 }
4803 continue;
4804 }
4805 if name == "min" && !args.is_empty() {
4806 let eval = match runmat_runtime::builtins::math::reduction::evaluate_min(
4807 args[0].clone(),
4808 &args[1..],
4809 ) {
4810 Ok(eval) => eval,
4811 Err(err) => vm_bail!(err),
4812 };
4813 if out_count == 0 {
4814 continue;
4815 }
4816 let (values, indices) = eval.into_pair();
4817 stack.push(values);
4818 if out_count >= 2 {
4819 stack.push(indices);
4820 }
4821 if out_count > 2 {
4822 for _ in 2..out_count {
4823 stack.push(Value::Num(0.0));
4824 }
4825 }
4826 continue;
4827 }
4828 if name == "sortrows" && !args.is_empty() {
4829 let eval =
4830 match runmat_runtime::builtins::array::sorting_sets::sortrows::evaluate(
4831 args[0].clone(),
4832 &args[1..],
4833 ) {
4834 Ok(eval) => eval,
4835 Err(err) => vm_bail!(err.to_string()),
4836 };
4837 if out_count == 0 {
4838 continue;
4839 }
4840 let (sorted, indices) = eval.into_values();
4841 stack.push(sorted);
4842 if out_count >= 2 {
4843 stack.push(indices);
4844 }
4845 if out_count > 2 {
4846 for _ in 2..out_count {
4847 stack.push(Value::Num(0.0));
4848 }
4849 }
4850 continue;
4851 }
4852 if name == "ismember" && args.len() >= 2 {
4853 let eval =
4854 match runmat_runtime::builtins::array::sorting_sets::ismember::evaluate(
4855 args[0].clone(),
4856 args[1].clone(),
4857 &args[2..],
4858 ) {
4859 Ok(eval) => eval,
4860 Err(err) => vm_bail!(err.to_string()),
4861 };
4862 if out_count == 0 {
4863 continue;
4864 }
4865 if out_count == 1 {
4866 stack.push(eval.into_mask_value());
4867 continue;
4868 }
4869 let (mask, loc) = eval.into_pair();
4870 stack.push(mask);
4871 stack.push(loc);
4872 if out_count > 2 {
4873 for _ in 2..out_count {
4874 stack.push(Value::Num(0.0));
4875 }
4876 }
4877 continue;
4878 }
4879 if name == "intersect" && args.len() >= 2 {
4880 let eval =
4881 match runmat_runtime::builtins::array::sorting_sets::intersect::evaluate(
4882 args[0].clone(),
4883 args[1].clone(),
4884 &args[2..],
4885 ) {
4886 Ok(eval) => eval,
4887 Err(err) => vm_bail!(err.to_string()),
4888 };
4889 if out_count == 0 {
4890 continue;
4891 }
4892 if out_count == 1 {
4893 stack.push(eval.into_values_value());
4894 continue;
4895 }
4896 if out_count == 2 {
4897 let (values, ia) = eval.into_pair();
4898 stack.push(values);
4899 stack.push(ia);
4900 continue;
4901 }
4902 let (values, ia, ib) = eval.into_triple();
4903 stack.push(values);
4904 stack.push(ia);
4905 stack.push(ib);
4906 if out_count > 3 {
4907 for _ in 3..out_count {
4908 stack.push(Value::Num(0.0));
4909 }
4910 }
4911 continue;
4912 }
4913 if name == "union" && args.len() >= 2 {
4914 let eval = match runmat_runtime::builtins::array::sorting_sets::union::evaluate(
4915 args[0].clone(),
4916 args[1].clone(),
4917 &args[2..],
4918 ) {
4919 Ok(eval) => eval,
4920 Err(err) => vm_bail!(err.to_string()),
4921 };
4922 if out_count == 0 {
4923 continue;
4924 }
4925 if out_count == 1 {
4926 stack.push(eval.into_values_value());
4927 continue;
4928 }
4929 if out_count == 2 {
4930 let (values, ia) = eval.into_pair();
4931 stack.push(values);
4932 stack.push(ia);
4933 continue;
4934 }
4935 let (values, ia, ib) = eval.into_triple();
4936 stack.push(values);
4937 stack.push(ia);
4938 stack.push(ib);
4939 if out_count > 3 {
4940 for _ in 3..out_count {
4941 stack.push(Value::Num(0.0));
4942 }
4943 }
4944 continue;
4945 }
4946 if name == "histcounts" && !args.is_empty() {
4947 let eval = match runmat_runtime::builtins::stats::hist::histcounts::evaluate(
4948 args[0].clone(),
4949 &args[1..],
4950 ) {
4951 Ok(eval) => eval,
4952 Err(err) => vm_bail!(err.to_string()),
4953 };
4954 if out_count == 0 {
4955 continue;
4956 }
4957 if out_count == 1 {
4958 stack.push(eval.into_counts_value());
4959 continue;
4960 }
4961 let (counts, edges) = eval.into_pair();
4962 stack.push(counts);
4963 stack.push(edges);
4964 if out_count > 2 {
4965 for _ in 2..out_count {
4966 stack.push(Value::Num(0.0));
4967 }
4968 }
4969 continue;
4970 }
4971 if name == "histcounts2" && args.len() >= 2 {
4972 let eval = match runmat_runtime::builtins::stats::hist::histcounts2::evaluate(
4973 args[0].clone(),
4974 args[1].clone(),
4975 &args[2..],
4976 ) {
4977 Ok(eval) => eval,
4978 Err(err) => vm_bail!(err.to_string()),
4979 };
4980 if out_count == 0 {
4981 continue;
4982 }
4983 if out_count == 1 {
4984 stack.push(eval.into_counts_value());
4985 continue;
4986 }
4987 if out_count == 2 {
4988 let (counts, xedges) = eval.into_pair();
4989 stack.push(counts);
4990 stack.push(xedges);
4991 continue;
4992 }
4993 let (counts, xedges, yedges) = eval.into_triple();
4994 stack.push(counts);
4995 stack.push(xedges);
4996 stack.push(yedges);
4997 if out_count > 3 {
4998 for _ in 3..out_count {
4999 stack.push(Value::Num(0.0));
5000 }
5001 }
5002 continue;
5003 }
5004 if name == "unique" && !args.is_empty() {
5005 let eval = match runmat_runtime::builtins::array::sorting_sets::unique::evaluate(
5006 args[0].clone(),
5007 &args[1..],
5008 ) {
5009 Ok(eval) => eval,
5010 Err(err) => vm_bail!(err.to_string()),
5011 };
5012 if out_count == 0 {
5013 continue;
5014 }
5015 if out_count == 1 {
5016 stack.push(eval.into_values_value());
5017 continue;
5018 }
5019 if out_count == 2 {
5020 let (values, ia) = eval.into_pair();
5021 stack.push(values);
5022 stack.push(ia);
5023 continue;
5024 }
5025 let (values, ia, ic) = eval.into_triple();
5026 stack.push(values);
5027 stack.push(ia);
5028 stack.push(ic);
5029 if out_count > 3 {
5030 for _ in 3..out_count {
5031 stack.push(Value::Num(0.0));
5032 }
5033 }
5034 continue;
5035 }
5036 match call_builtin(&name, &args) {
5037 Ok(v) => match v {
5038 Value::Tensor(t) => {
5039 let mut pushed = 0usize;
5040 for &val in t.data.iter() {
5041 if pushed >= out_count {
5042 break;
5043 }
5044 stack.push(Value::Num(val));
5045 pushed += 1;
5046 }
5047 for _ in pushed..out_count {
5048 stack.push(Value::Num(0.0));
5049 }
5050 }
5051 Value::Cell(ca) => {
5052 let mut pushed = 0usize;
5053 for v in &ca.data {
5054 if pushed >= out_count {
5055 break;
5056 }
5057 stack.push((**v).clone());
5058 pushed += 1;
5059 }
5060 for _ in pushed..out_count {
5061 stack.push(Value::Num(0.0));
5062 }
5063 }
5064 other => {
5065 stack.push(other);
5066 for _ in 1..out_count {
5067 stack.push(Value::Num(0.0));
5068 }
5069 }
5070 },
5071 Err(e) => {
5072 let mut resolved = None;
5074 for (path, wildcard) in &imports {
5075 if !*wildcard {
5076 continue;
5077 }
5078 let mut qual = String::new();
5079 for (i, part) in path.iter().enumerate() {
5080 if i > 0 {
5081 qual.push('.');
5082 }
5083 qual.push_str(part);
5084 }
5085 qual.push('.');
5086 qual.push_str(&name);
5087 if let Ok(v) = call_builtin(&qual, &args) {
5088 resolved = Some(v);
5089 break;
5090 }
5091 }
5092 if let Some(v) = resolved {
5093 match v {
5094 Value::Tensor(t) => {
5095 let mut pushed = 0usize;
5096 for &val in t.data.iter() {
5097 if pushed >= out_count {
5098 break;
5099 }
5100 stack.push(Value::Num(val));
5101 pushed += 1;
5102 }
5103 for _ in pushed..out_count {
5104 stack.push(Value::Num(0.0));
5105 }
5106 }
5107 Value::Cell(ca) => {
5108 let mut pushed = 0usize;
5109 for v in &ca.data {
5110 if pushed >= out_count {
5111 break;
5112 }
5113 stack.push((**v).clone());
5114 pushed += 1;
5115 }
5116 for _ in pushed..out_count {
5117 stack.push(Value::Num(0.0));
5118 }
5119 }
5120 other => {
5121 stack.push(other);
5122 for _ in 1..out_count {
5123 stack.push(Value::Num(0.0));
5124 }
5125 }
5126 }
5127 } else {
5128 vm_bail!(e.to_string());
5129 }
5130 }
5131 }
5132 }
5133 Instr::EnterTry(catch_pc, catch_var) => {
5134 try_stack.push((catch_pc, catch_var));
5135 }
5136 Instr::PopTry => {
5137 try_stack.pop();
5138 }
5139 Instr::CreateMatrix(rows, cols) => {
5140 let total_elements = rows * cols;
5141 let mut row_major = Vec::with_capacity(total_elements);
5142 for _ in 0..total_elements {
5143 let val: f64 = (&stack
5144 .pop()
5145 .ok_or(mex("StackUnderflow", "stack underflow"))?)
5146 .try_into()?;
5147 row_major.push(val);
5148 }
5149 row_major.reverse();
5150 let mut data = vec![0.0; total_elements];
5152 for r in 0..rows {
5153 for c in 0..cols {
5154 data[r + c * rows] = row_major[r * cols + c];
5155 }
5156 }
5157 let matrix = runmat_builtins::Tensor::new_2d(data, rows, cols)
5158 .map_err(|e| format!("Matrix creation error: {e}"))?;
5159 stack.push(Value::Tensor(matrix));
5160 }
5161 Instr::CreateMatrixDynamic(num_rows) => {
5162 let mut row_lengths = Vec::new();
5163 for _ in 0..num_rows {
5164 let row_len: f64 = (&stack
5165 .pop()
5166 .ok_or(mex("StackUnderflow", "stack underflow"))?)
5167 .try_into()?;
5168 row_lengths.push(row_len as usize);
5169 }
5170 row_lengths.reverse();
5171 let mut rows_data = Vec::new();
5172 for &row_len in row_lengths.iter().rev() {
5173 let mut row_values = Vec::new();
5174 for _ in 0..row_len {
5175 row_values.push(
5176 stack
5177 .pop()
5178 .ok_or(mex("StackUnderflow", "stack underflow"))?,
5179 );
5180 }
5181 row_values.reverse();
5182 rows_data.push(row_values);
5183 }
5184 rows_data.reverse();
5185 let result = runmat_runtime::create_matrix_from_values(&rows_data)?;
5186 stack.push(result);
5187 }
5188 Instr::CreateRange(has_step) => {
5189 if has_step {
5190 let end: f64 = (&stack
5191 .pop()
5192 .ok_or(mex("StackUnderflow", "stack underflow"))?)
5193 .try_into()?;
5194 let step: f64 = (&stack
5195 .pop()
5196 .ok_or(mex("StackUnderflow", "stack underflow"))?)
5197 .try_into()?;
5198 let start: f64 = (&stack
5199 .pop()
5200 .ok_or(mex("StackUnderflow", "stack underflow"))?)
5201 .try_into()?;
5202 let range_result = runmat_runtime::create_range(start, Some(step), end)?;
5203 stack.push(range_result);
5204 } else {
5205 let end: f64 = (&stack
5206 .pop()
5207 .ok_or(mex("StackUnderflow", "stack underflow"))?)
5208 .try_into()?;
5209 let start: f64 = (&stack
5210 .pop()
5211 .ok_or(mex("StackUnderflow", "stack underflow"))?)
5212 .try_into()?;
5213 let range_result = runmat_runtime::create_range(start, None, end)?;
5214 stack.push(range_result);
5215 }
5216 }
5217 Instr::Index(num_indices) => {
5218 let mut indices = Vec::new();
5219 let count = num_indices;
5220 for _ in 0..count {
5221 let index_val: f64 = (&stack
5222 .pop()
5223 .ok_or(mex("StackUnderflow", "stack underflow"))?)
5224 .try_into()?;
5225 indices.push(index_val);
5226 }
5227 indices.reverse();
5228 let base = stack
5229 .pop()
5230 .ok_or(mex("StackUnderflow", "stack underflow"))?;
5231 #[cfg(feature = "native-accel")]
5232 clear_residency(&base);
5233 match base {
5234 Value::Object(obj) => {
5235 let cell = runmat_builtins::CellArray::new(
5236 indices.iter().map(|n| Value::Num(*n)).collect(),
5237 1,
5238 indices.len(),
5239 )
5240 .map_err(|e| format!("subsref build error: {e}"))?;
5241 match runmat_runtime::call_builtin(
5242 "call_method",
5243 &[
5244 Value::Object(obj),
5245 Value::String("subsref".to_string()),
5246 Value::String("()".to_string()),
5247 Value::Cell(cell),
5248 ],
5249 ) {
5250 Ok(v) => stack.push(v),
5251 Err(e) => vm_bail!(e.to_string()),
5252 }
5253 }
5254 other => {
5255 let result = match runmat_runtime::perform_indexing(&other, &indices) {
5256 Ok(v) => v,
5257 Err(e) => vm_bail!(e.to_string()),
5258 };
5259 stack.push(result);
5260 }
5261 }
5262 }
5263 Instr::IndexSlice(dims, numeric_count, colon_mask, end_mask) => {
5264 let __b = bench_start();
5265 let mut numeric: Vec<Value> = Vec::with_capacity(numeric_count);
5267 for _ in 0..numeric_count {
5268 numeric.push(
5269 stack
5270 .pop()
5271 .ok_or(mex("StackUnderflow", "stack underflow"))?,
5272 );
5273 }
5274 numeric.reverse();
5275 let mut base = stack
5276 .pop()
5277 .ok_or(mex("StackUnderflow", "stack underflow"))?;
5278 let mut logical_base = false;
5279 base = match base {
5280 Value::LogicalArray(la) => {
5281 logical_base = true;
5282 let data: Vec<f64> = la
5283 .data
5284 .iter()
5285 .map(|&b| if b != 0 { 1.0 } else { 0.0 })
5286 .collect();
5287 let tensor = runmat_builtins::Tensor::new(data, la.shape.clone())
5288 .map_err(|e| format!("slice: {e}"))?;
5289 Value::Tensor(tensor)
5290 }
5291 other => other,
5292 };
5293 match base {
5294 Value::Object(obj) => {
5295 let cell =
5296 runmat_builtins::CellArray::new(numeric.to_vec(), 1, numeric.len())
5297 .map_err(|e| format!("subsref build error: {e}"))?;
5298 match runmat_runtime::call_builtin(
5299 "call_method",
5300 &[
5301 Value::Object(obj),
5302 Value::String("subsref".to_string()),
5303 Value::String("()".to_string()),
5304 Value::Cell(cell),
5305 ],
5306 ) {
5307 Ok(v) => stack.push(v),
5308 Err(e) => vm_bail!(e.to_string()),
5309 }
5310 }
5311 Value::Tensor(t) => {
5312 let rank = t.shape.len();
5313 #[derive(Clone)]
5315 enum Sel {
5316 Colon,
5317 Scalar(usize),
5318 Indices(Vec<usize>),
5319 }
5320 let mut selectors: Vec<Sel> = Vec::with_capacity(dims);
5321 let mut num_iter = 0usize;
5322 if dims == 1 {
5323 let total = t.data.len();
5324 let mut idxs: Vec<usize> = Vec::new();
5325 let is_colon = (colon_mask & 1u32) != 0;
5326 let is_end = (end_mask & 1u32) != 0;
5327 if is_colon {
5328 idxs = (1..=total).collect();
5329 } else if is_end {
5330 idxs = vec![total];
5331 } else if let Some(v) = numeric.first() {
5332 match v {
5333 Value::Num(n) => {
5334 let i = *n as isize;
5335 if i < 1 {
5336 vm_bail!(mex(
5337 "IndexOutOfBounds",
5338 "Index out of bounds"
5339 ));
5340 }
5341 idxs = vec![i as usize];
5342 }
5343 Value::Tensor(idx_t) => {
5344 let len = idx_t.shape.iter().product::<usize>();
5345 if len == total {
5346 for (i, &val) in idx_t.data.iter().enumerate() {
5347 if val != 0.0 {
5348 idxs.push(i + 1);
5349 }
5350 }
5351 } else {
5352 for &val in &idx_t.data {
5353 let i = val as isize;
5354 if i < 1 {
5355 vm_bail!(mex(
5356 "IndexOutOfBounds",
5357 "Index out of bounds"
5358 ));
5359 }
5360 idxs.push(i as usize);
5361 }
5362 }
5363 }
5364 _ => vm_bail!(mex(
5365 "UnsupportedIndexType",
5366 "Unsupported index type"
5367 )),
5368 }
5369 } else {
5370 vm_bail!(mex("MissingNumericIndex", "missing numeric index"));
5371 }
5372 if idxs.iter().any(|&i| i == 0 || i > total) {
5373 vm_bail!(mex("IndexOutOfBounds", "Index out of bounds"));
5374 }
5375 if idxs.len() == 1 {
5376 stack.push(Value::Num(t.data[idxs[0] - 1]));
5377 } else {
5378 let mut out = Vec::with_capacity(idxs.len());
5379 for &i in &idxs {
5380 out.push(t.data[i - 1]);
5381 }
5382 let tens = runmat_builtins::Tensor::new(out, vec![idxs.len(), 1])
5383 .map_err(|e| format!("Slice error: {e}"))?;
5384 stack.push(Value::Tensor(tens));
5385 }
5386 } else {
5387 for d in 0..dims {
5388 let is_colon = (colon_mask & (1u32 << d)) != 0;
5389 let is_end = (end_mask & (1u32 << d)) != 0;
5390 if is_colon {
5391 selectors.push(Sel::Colon);
5392 } else if is_end {
5393 let dim_len = *t.shape.get(d).unwrap_or(&1);
5395 selectors.push(Sel::Scalar(dim_len));
5396 } else {
5397 let v = numeric.get(num_iter).ok_or(mex(
5398 "MissingNumericIndex",
5399 "missing numeric index",
5400 ))?;
5401 num_iter += 1;
5402 match v {
5403 Value::Num(n) => {
5404 let idx = *n as isize;
5405 if idx < 1 {
5406 return Err(mex(
5407 "IndexOutOfBounds",
5408 "Index out of bounds",
5409 ));
5410 }
5411 selectors.push(Sel::Scalar(idx as usize));
5412 }
5413 Value::Tensor(idx_t) => {
5414 let dim_len = *t.shape.get(d).unwrap_or(&1);
5416 let len = idx_t.shape.iter().product::<usize>();
5417 if len == dim_len {
5418 let mut indices = Vec::new();
5419 for (i, &val) in idx_t.data.iter().enumerate() {
5420 if val != 0.0 {
5421 indices.push(i + 1);
5422 }
5423 }
5424 selectors.push(Sel::Indices(indices));
5425 } else {
5426 let mut indices = Vec::with_capacity(len);
5428 for &val in &idx_t.data {
5429 let idx = val as isize;
5430 if idx < 1 {
5431 return Err(mex(
5432 "IndexOutOfBounds",
5433 "Index out of bounds",
5434 ));
5435 }
5436 indices.push(idx as usize);
5437 }
5438 selectors.push(Sel::Indices(indices));
5439 }
5440 }
5441 Value::LogicalArray(la) => {
5442 let dim_len = *t.shape.get(d).unwrap_or(&1);
5443 if la.data.len() == dim_len {
5444 let mut indices = Vec::new();
5445 for (i, &b) in la.data.iter().enumerate() {
5446 if b != 0 {
5447 indices.push(i + 1);
5448 }
5449 }
5450 selectors.push(Sel::Indices(indices));
5451 } else {
5452 return Err(mex(
5453 "IndexShape",
5454 "Logical mask shape mismatch",
5455 ));
5456 }
5457 }
5458 _ => {
5459 return Err(mex(
5460 "UnsupportedIndexType",
5461 "Unsupported index type",
5462 ))
5463 }
5464 }
5465 }
5466 }
5467 if dims == 2 {
5469 let rows = if rank >= 1 { t.shape[0] } else { 1 };
5470 let cols = if rank >= 2 { t.shape[1] } else { 1 };
5471 match (&selectors[0], &selectors[1]) {
5472 (Sel::Colon, Sel::Scalar(j)) => {
5474 let j0 = *j - 1;
5475 if j0 >= cols {
5476 return Err(mex(
5477 "IndexOutOfBounds",
5478 "Index out of bounds",
5479 ));
5480 }
5481 let start = j0 * rows;
5482 let out = t.data[start..start + rows].to_vec();
5483 if out.len() == 1 {
5484 stack.push(Value::Num(out[0]));
5485 } else {
5486 let tens =
5487 runmat_builtins::Tensor::new(out, vec![rows, 1])
5488 .map_err(|e| format!("Slice error: {e}"))?;
5489 stack.push(Value::Tensor(tens));
5490 }
5491 bench_end("IndexSlice2D.fast_col", __b);
5492 pc += 1;
5493 continue;
5494 }
5495 (Sel::Scalar(i), Sel::Colon) => {
5497 let i0 = *i - 1;
5498 if i0 >= rows {
5499 return Err(mex(
5500 "IndexOutOfBounds",
5501 "Index out of bounds",
5502 ));
5503 }
5504 let mut out: Vec<f64> = Vec::with_capacity(cols);
5505 for c in 0..cols {
5506 out.push(t.data[i0 + c * rows]);
5507 }
5508 if out.len() == 1 {
5509 stack.push(Value::Num(out[0]));
5510 } else {
5511 let tens =
5512 runmat_builtins::Tensor::new(out, vec![1, cols])
5513 .map_err(|e| format!("Slice error: {e}"))?;
5514 stack.push(Value::Tensor(tens));
5515 }
5516 bench_end("IndexSlice2D.fast_row", __b);
5517 pc += 1;
5518 continue;
5519 }
5520 (Sel::Colon, Sel::Indices(js)) => {
5522 if js.is_empty() {
5524 let tens = runmat_builtins::Tensor::new(
5525 Vec::new(),
5526 vec![rows, 0],
5527 )
5528 .map_err(|e| format!("Slice error: {e}"))?;
5529 stack.push(Value::Tensor(tens));
5530 } else {
5531 let mut out: Vec<f64> =
5532 Vec::with_capacity(rows * js.len());
5533 for &j in js {
5534 let j0 = j - 1;
5535 if j0 >= cols {
5536 return Err(mex(
5537 "IndexOutOfBounds",
5538 "Index out of bounds",
5539 ));
5540 }
5541 let start = j0 * rows;
5542 out.extend_from_slice(&t.data[start..start + rows]);
5543 }
5544 let tens = runmat_builtins::Tensor::new(
5545 out,
5546 vec![rows, js.len()],
5547 )
5548 .map_err(|e| format!("Slice error: {e}"))?;
5549 stack.push(Value::Tensor(tens));
5550 }
5551 bench_end("IndexSlice2D.fast_cols", __b);
5552 pc += 1;
5553 continue;
5554 }
5555 (Sel::Indices(is), Sel::Colon) => {
5557 if is.is_empty() {
5559 let tens = runmat_builtins::Tensor::new(
5560 Vec::new(),
5561 vec![0, cols],
5562 )
5563 .map_err(|e| format!("Slice error: {e}"))?;
5564 stack.push(Value::Tensor(tens));
5565 } else {
5566 let mut out: Vec<f64> =
5567 Vec::with_capacity(is.len() * cols);
5568 for c in 0..cols {
5569 for &i in is {
5570 let i0 = i - 1;
5571 if i0 >= rows {
5572 return Err(mex(
5573 "IndexOutOfBounds",
5574 "Index out of bounds",
5575 ));
5576 }
5577 out.push(t.data[i0 + c * rows]);
5578 }
5579 }
5580 let tens = runmat_builtins::Tensor::new(
5581 out,
5582 vec![is.len(), cols],
5583 )
5584 .map_err(|e| format!("Slice error: {e}"))?;
5585 stack.push(Value::Tensor(tens));
5586 }
5587 bench_end("IndexSlice2D.fast_rows_multi", __b);
5588 pc += 1;
5589 continue;
5590 }
5591 _ => {}
5592 }
5593 }
5594 {
5595 let mut out_dims: Vec<usize> = Vec::new();
5597 let mut per_dim_indices: Vec<Vec<usize>> = Vec::with_capacity(dims);
5598 for (d, sel) in selectors.iter().enumerate().take(dims) {
5599 let dim_len = *t.shape.get(d).unwrap_or(&1);
5600 let idxs = match sel {
5601 Sel::Colon => (1..=dim_len).collect::<Vec<usize>>(),
5602 Sel::Scalar(i) => vec![*i],
5603 Sel::Indices(v) => v.clone(),
5604 };
5605 if idxs.iter().any(|&i| i == 0 || i > dim_len) {
5606 return Err(mex("IndexOutOfBounds", "Index out of bounds"));
5607 }
5608 if idxs.len() > 1 {
5609 out_dims.push(idxs.len());
5610 } else {
5611 out_dims.push(1);
5612 }
5613 per_dim_indices.push(idxs);
5614 }
5615 let mut out_dims: Vec<usize> =
5616 per_dim_indices.iter().map(|v| v.len()).collect();
5617 if dims == 2 {
5620 match (
5621 &per_dim_indices[0].as_slice(),
5622 &per_dim_indices[1].as_slice(),
5623 ) {
5624 (i_list, j_list)
5626 if i_list.len() > 1 && j_list.len() == 1 =>
5627 {
5628 out_dims = vec![i_list.len(), 1];
5629 }
5630 (i_list, j_list)
5632 if i_list.len() == 1 && j_list.len() > 1 =>
5633 {
5634 out_dims = vec![1, j_list.len()];
5635 }
5636 _ => {}
5637 }
5638 }
5639 let mut strides: Vec<usize> = vec![0; dims];
5641 let full_shape: Vec<usize> = if rank < dims {
5642 let mut s = t.shape.clone();
5643 s.resize(dims, 1);
5644 s
5645 } else {
5646 t.shape.clone()
5647 };
5648 let mut acc = 1usize;
5649 for (d, stride) in strides.iter_mut().enumerate().take(dims) {
5650 *stride = acc;
5651 acc *= full_shape[d];
5652 }
5653 let total_out: usize = out_dims.iter().product();
5655 let mut out_data: Vec<f64> = Vec::with_capacity(total_out);
5656 if out_dims.contains(&0)
5657 || per_dim_indices.iter().any(|v| v.is_empty())
5658 {
5659 let out_tensor =
5661 runmat_builtins::Tensor::new(out_data, out_dims)
5662 .map_err(|e| format!("Slice error: {e}"))?;
5663 stack.push(Value::Tensor(out_tensor));
5664 } else {
5665 fn cartesian<F: FnMut(&[usize])>(
5666 lists: &[Vec<usize>],
5667 mut f: F,
5668 ) {
5669 let dims = lists.len();
5670 let mut idx = vec![0usize; dims];
5671 loop {
5672 let current: Vec<usize> =
5673 (0..dims).map(|d| lists[d][idx[d]]).collect();
5674 f(¤t);
5675 let mut d = 0usize;
5677 while d < dims {
5678 idx[d] += 1;
5679 if idx[d] < lists[d].len() {
5680 break;
5681 }
5682 idx[d] = 0;
5683 d += 1;
5684 }
5685 if d == dims {
5686 break;
5687 }
5688 }
5689 }
5690 cartesian(&per_dim_indices, |multi| {
5691 let mut lin = 0usize;
5692 for d in 0..dims {
5693 let i0 = multi[d] - 1;
5694 lin += i0 * strides[d];
5695 }
5696 out_data.push(t.data[lin]);
5697 });
5698 if out_data.len() == 1 {
5699 stack.push(Value::Num(out_data[0]));
5700 } else {
5701 let out_tensor =
5702 runmat_builtins::Tensor::new(out_data, out_dims)
5703 .map_err(|e| format!("Slice error: {e}"))?;
5704 stack.push(Value::Tensor(out_tensor));
5705 }
5706 }
5707 }
5708 }
5709 }
5710 Value::GpuTensor(handle) => {
5711 let provider = runmat_accelerate_api::provider()
5712 .ok_or_else(|| "No acceleration provider registered".to_string())?;
5713 let base_shape = handle.shape.clone();
5714 let selectors = build_slice_selectors(
5715 dims,
5716 colon_mask,
5717 end_mask,
5718 &numeric,
5719 &base_shape,
5720 )
5721 .map_err(|e| format!("slice: {e}"))?;
5722 let plan =
5723 build_slice_plan(&selectors, dims, &base_shape).map_err(|e| {
5724 if e.contains("IndexOutOfBounds") {
5725 e.clone()
5726 } else {
5727 format!("slice: {e}")
5728 }
5729 })?;
5730 if plan.indices.is_empty() {
5731 let zeros = provider
5732 .zeros(&plan.output_shape)
5733 .map_err(|e| format!("slice: {e}"))?;
5734 stack.push(Value::GpuTensor(zeros));
5735 } else {
5736 let result = provider
5737 .gather_linear(&handle, &plan.indices, &plan.output_shape)
5738 .map_err(|e| format!("slice: {e}"))?;
5739 stack.push(Value::GpuTensor(result));
5740 }
5741 }
5742 Value::StringArray(sa) => {
5743 let rank = sa.shape.len();
5744 #[derive(Clone)]
5745 enum Sel {
5746 Colon,
5747 Scalar(usize),
5748 Indices(Vec<usize>),
5749 }
5750 let mut selectors: Vec<Sel> = Vec::with_capacity(dims);
5751 let mut num_iter = 0usize;
5752 if dims == 1 {
5753 let total = sa.data.len();
5754 let mut idxs: Vec<usize> = Vec::new();
5755 let is_colon = (colon_mask & 1u32) != 0;
5756 let is_end = (end_mask & 1u32) != 0;
5757 if is_colon {
5758 idxs = (1..=total).collect();
5759 } else if is_end {
5760 idxs = vec![total];
5761 } else if let Some(v) = numeric.first() {
5762 match v {
5763 Value::Num(n) => {
5764 let i = *n as isize;
5765 if i < 1 {
5766 vm_bail!(mex(
5767 "IndexOutOfBounds",
5768 "Index out of bounds"
5769 ));
5770 }
5771 idxs = vec![i as usize];
5772 }
5773 Value::Tensor(idx_t) => {
5774 let len = idx_t.shape.iter().product::<usize>();
5775 if len == total {
5776 for (i, &val) in idx_t.data.iter().enumerate() {
5777 if val != 0.0 {
5778 idxs.push(i + 1);
5779 }
5780 }
5781 } else {
5782 for &val in &idx_t.data {
5783 let i = val as isize;
5784 if i < 1 {
5785 vm_bail!(mex(
5786 "IndexOutOfBounds",
5787 "Index out of bounds"
5788 ));
5789 }
5790 idxs.push(i as usize);
5791 }
5792 }
5793 }
5794 _ => vm_bail!(mex(
5795 "UnsupportedIndexType",
5796 "Unsupported index type"
5797 )),
5798 }
5799 } else {
5800 vm_bail!(mex("MissingNumericIndex", "missing numeric index"));
5801 }
5802 if idxs.iter().any(|&i| i == 0 || i > total) {
5803 vm_bail!(mex("IndexOutOfBounds", "Index out of bounds"));
5804 }
5805 if idxs.len() == 1 {
5806 stack.push(Value::String(sa.data[idxs[0] - 1].clone()));
5808 } else {
5809 let mut out: Vec<String> = Vec::with_capacity(idxs.len());
5810 for &i in &idxs {
5811 out.push(sa.data[i - 1].clone());
5812 }
5813 let out_sa =
5814 runmat_builtins::StringArray::new(out, vec![idxs.len(), 1])
5815 .map_err(|e| format!("Slice error: {e}"))?;
5816 stack.push(Value::StringArray(out_sa));
5817 }
5818 } else {
5819 for d in 0..dims {
5820 let is_colon = (colon_mask & (1u32 << d)) != 0;
5821 let is_end = (end_mask & (1u32 << d)) != 0;
5822 if is_colon {
5823 selectors.push(Sel::Colon);
5824 } else if is_end {
5825 let dim_len = *sa.shape.get(d).unwrap_or(&1);
5826 selectors.push(Sel::Scalar(dim_len));
5827 } else {
5828 let v = numeric.get(num_iter).ok_or(mex(
5829 "MissingNumericIndex",
5830 "missing numeric index",
5831 ))?;
5832 num_iter += 1;
5833 match v {
5834 Value::Num(n) => {
5835 let idx = *n as isize;
5836 if idx < 1 {
5837 return Err(mex(
5838 "IndexOutOfBounds",
5839 "Index out of bounds",
5840 ));
5841 }
5842 selectors.push(Sel::Scalar(idx as usize));
5843 }
5844 Value::Tensor(idx_t) => {
5845 let dim_len = *sa.shape.get(d).unwrap_or(&1);
5846 let len = idx_t.shape.iter().product::<usize>();
5847 let is_binary_mask = len == dim_len
5848 && idx_t.data.iter().all(|&x| x == 0.0 || x == 1.0);
5849 if is_binary_mask {
5850 let mut v = Vec::new();
5851 for (i, &val) in idx_t.data.iter().enumerate() {
5852 if val != 0.0 {
5853 v.push(i + 1);
5854 }
5855 }
5856 selectors.push(Sel::Indices(v));
5857 } else {
5858 let mut v = Vec::with_capacity(len);
5859 for &val in &idx_t.data {
5860 let idx = val as isize;
5861 if idx < 1 {
5862 vm_bail!(mex(
5863 "IndexOutOfBounds",
5864 "Index out of bounds"
5865 ));
5866 }
5867 v.push(idx as usize);
5868 }
5869 selectors.push(Sel::Indices(v));
5870 }
5871 }
5872 _ => vm_bail!(mex(
5873 "UnsupportedIndexType",
5874 "Unsupported index type"
5875 )),
5876 }
5877 }
5878 }
5879 let mut out_dims: Vec<usize> = Vec::new();
5880 let mut per_dim_indices: Vec<Vec<usize>> = Vec::with_capacity(dims);
5881 for (d, sel) in selectors.iter().enumerate().take(dims) {
5882 let dim_len = *sa.shape.get(d).unwrap_or(&1);
5883 let idxs = match sel {
5884 Sel::Colon => (1..=dim_len).collect::<Vec<usize>>(),
5885 Sel::Scalar(i) => vec![*i],
5886 Sel::Indices(v) => v.clone(),
5887 };
5888 if idxs.iter().any(|&i| i == 0 || i > dim_len) {
5889 return Err(mex("IndexOutOfBounds", "Index out of bounds"));
5890 }
5891 if idxs.len() > 1 {
5892 out_dims.push(idxs.len());
5893 } else {
5894 out_dims.push(1);
5895 }
5896 per_dim_indices.push(idxs);
5897 }
5898 if dims == 2 {
5899 match (
5900 &per_dim_indices[0].as_slice(),
5901 &per_dim_indices[1].as_slice(),
5902 ) {
5903 (i_list, j_list) if i_list.len() > 1 && j_list.len() == 1 => {
5904 out_dims = vec![i_list.len(), 1];
5905 }
5906 (i_list, j_list) if i_list.len() == 1 && j_list.len() > 1 => {
5907 out_dims = vec![1, j_list.len()];
5908 }
5909 _ => {}
5910 }
5911 }
5912 let mut strides: Vec<usize> = vec![0; dims];
5913 let full_shape: Vec<usize> = if rank < dims {
5914 let mut s = sa.shape.clone();
5915 s.resize(dims, 1);
5916 s
5917 } else {
5918 sa.shape.clone()
5919 };
5920 let mut acc = 1usize;
5921 for (d, stride) in strides.iter_mut().enumerate().take(dims) {
5922 *stride = acc;
5923 acc *= full_shape[d];
5924 }
5925 let total_out: usize = out_dims.iter().product();
5926 if total_out == 0 {
5927 stack.push(Value::StringArray(
5928 runmat_builtins::StringArray::new(Vec::new(), out_dims)
5929 .map_err(|e| format!("Slice error: {e}"))?,
5930 ));
5931 } else {
5932 fn cartesian<F: FnMut(&[usize])>(lists: &[Vec<usize>], mut f: F) {
5933 let dims = lists.len();
5934 let mut idx = vec![0usize; dims];
5935 loop {
5936 let current: Vec<usize> =
5937 (0..dims).map(|d| lists[d][idx[d]]).collect();
5938 f(¤t);
5939 let mut d = 0usize;
5940 while d < dims {
5941 idx[d] += 1;
5942 if idx[d] < lists[d].len() {
5943 break;
5944 }
5945 idx[d] = 0;
5946 d += 1;
5947 }
5948 if d == dims {
5949 break;
5950 }
5951 }
5952 }
5953 let mut out_data: Vec<String> = Vec::with_capacity(total_out);
5954 cartesian(&per_dim_indices, |multi| {
5955 let mut lin = 0usize;
5956 for d in 0..dims {
5957 let i0 = multi[d] - 1;
5958 lin += i0 * strides[d];
5959 }
5960 out_data.push(sa.data[lin].clone());
5961 });
5962 if out_data.len() == 1 {
5963 stack.push(Value::String(out_data[0].clone()));
5964 } else {
5965 let out_sa =
5966 runmat_builtins::StringArray::new(out_data, out_dims)
5967 .map_err(|e| format!("Slice error: {e}"))?;
5968 stack.push(Value::StringArray(out_sa));
5969 }
5970 }
5971 }
5972 }
5973 other => {
5974 if dims == 1 {
5976 let is_colon = (colon_mask & 1u32) != 0;
5977 let is_end = (end_mask & 1u32) != 0;
5978 if is_colon {
5979 vm_bail!(mex(
5980 "SliceNonTensor",
5981 "Slicing only supported on tensors"
5982 ));
5983 }
5984 let idx_val: f64 = if is_end {
5985 1.0
5986 } else {
5987 match numeric.first() {
5988 Some(Value::Num(n)) => *n,
5989 Some(Value::Int(i)) => i.to_f64(),
5990 _ => 1.0,
5991 }
5992 };
5993 let v = match runmat_runtime::perform_indexing(&other, &[idx_val]) {
5994 Ok(v) => v,
5995 Err(_e) => vm_bail!(mex(
5996 "SliceNonTensor",
5997 "Slicing only supported on tensors"
5998 )),
5999 };
6000 stack.push(v);
6001 } else {
6002 vm_bail!(mex("SliceNonTensor", "Slicing only supported on tensors"));
6003 }
6004 }
6005 }
6006 if logical_base {
6007 let result = stack
6008 .pop()
6009 .ok_or(mex("SliceNonTensor", "logical slice missing result"))?;
6010 let converted = match result {
6011 Value::Tensor(t) => {
6012 let logical_data: Vec<u8> = t
6013 .data
6014 .iter()
6015 .map(|&v| if v != 0.0 { 1 } else { 0 })
6016 .collect();
6017 if logical_data.len() <= 1 {
6018 Value::Bool(logical_data.first().copied().unwrap_or(0) != 0)
6019 } else {
6020 let logical = runmat_builtins::LogicalArray::new(
6021 logical_data,
6022 t.shape.clone(),
6023 )
6024 .map_err(|e| mex("SliceNonTensor", &format!("slice: {e}")))?;
6025 Value::LogicalArray(logical)
6026 }
6027 }
6028 Value::Num(n) => Value::Bool(n != 0.0),
6029 Value::Bool(_) | Value::LogicalArray(_) => result,
6030 other => other,
6031 };
6032 stack.push(converted);
6033 }
6034 bench_end("IndexSlice", __b);
6035 }
6036 Instr::IndexRangeEnd {
6037 dims,
6038 numeric_count,
6039 colon_mask,
6040 end_mask,
6041 range_dims,
6042 range_has_step,
6043 end_offsets,
6044 } => {
6045 let mut numeric: Vec<Value> = Vec::with_capacity(numeric_count);
6047 for _ in 0..numeric_count {
6048 numeric.push(
6049 stack
6050 .pop()
6051 .ok_or(mex("StackUnderflow", "stack underflow"))?,
6052 );
6053 }
6054 numeric.reverse();
6055 let mut range_params: Vec<(f64, f64)> = Vec::with_capacity(range_dims.len());
6057 for i in (0..range_dims.len()).rev() {
6058 let has_step = range_has_step[i];
6059 let step = if has_step {
6060 let v = stack
6061 .pop()
6062 .ok_or(mex("StackUnderflow", "stack underflow"))?;
6063 match v {
6064 Value::Num(n) => n,
6065 Value::Int(i) => i.to_f64(),
6066 Value::Tensor(t) if !t.data.is_empty() => t.data[0],
6067 _ => 1.0,
6068 }
6069 } else {
6070 1.0
6071 };
6072 let v = stack
6073 .pop()
6074 .ok_or(mex("StackUnderflow", "stack underflow"))?;
6075 let start: f64 = match v {
6076 Value::Num(n) => n,
6077 Value::Int(i) => i.to_f64(),
6078 Value::Tensor(t) if !t.data.is_empty() => t.data[0],
6079 _ => 1.0,
6080 };
6081 range_params.push((start, step));
6082 }
6083 range_params.reverse();
6084 let base = stack
6085 .pop()
6086 .ok_or(mex("StackUnderflow", "stack underflow"))?;
6087 #[cfg(feature = "native-accel")]
6088 clear_residency(&base);
6089 match base {
6090 Value::Tensor(t) => {
6091 let rank = t.shape.len();
6092 #[derive(Clone)]
6093 enum Sel {
6094 Colon,
6095 Scalar(usize),
6096 Indices(Vec<usize>),
6097 Range { start: i64, step: i64, end_off: i64 },
6098 }
6099 let mut selectors: Vec<Sel> = Vec::with_capacity(dims);
6100 let mut num_iter = 0usize;
6101 let mut rp_iter = 0usize;
6102 for d in 0..dims {
6103 let is_colon = (colon_mask & (1u32 << d)) != 0;
6104 let is_end = (end_mask & (1u32 << d)) != 0;
6105 if is_colon {
6106 selectors.push(Sel::Colon);
6107 } else if is_end {
6108 selectors.push(Sel::Scalar(*t.shape.get(d).unwrap_or(&1)));
6109 } else if let Some(pos) = range_dims.iter().position(|&rd| rd == d) {
6110 let (st, sp) = range_params[rp_iter];
6111 rp_iter += 1;
6112 let off = end_offsets[pos];
6113 selectors.push(Sel::Range {
6114 start: st as i64,
6115 step: if sp >= 0.0 {
6116 sp as i64
6117 } else {
6118 -(sp.abs() as i64)
6119 },
6120 end_off: off,
6121 });
6122 } else {
6123 let v = numeric
6124 .get(num_iter)
6125 .ok_or(mex("MissingNumericIndex", "missing numeric index"))?;
6126 num_iter += 1;
6127 match v {
6128 Value::Num(n) => {
6129 let idx = *n as isize;
6130 if idx < 1 {
6131 vm_bail!(mex(
6132 "IndexOutOfBounds",
6133 "Index out of bounds"
6134 ));
6135 }
6136 selectors.push(Sel::Scalar(idx as usize));
6137 }
6138 Value::Tensor(idx_t) => {
6139 let dim_len = *t.shape.get(d).unwrap_or(&1);
6140 let len = idx_t.shape.iter().product::<usize>();
6141 if len == dim_len {
6142 let mut v = Vec::new();
6143 for (i, &val) in idx_t.data.iter().enumerate() {
6144 if val != 0.0 {
6145 v.push(i + 1);
6146 }
6147 }
6148 selectors.push(Sel::Indices(v));
6149 } else {
6150 let mut v = Vec::with_capacity(len);
6151 for &val in &idx_t.data {
6152 let idx = val as isize;
6153 if idx < 1 {
6154 vm_bail!(mex(
6155 "IndexOutOfBounds",
6156 "Index out of bounds"
6157 ));
6158 }
6159 v.push(idx as usize);
6160 }
6161 selectors.push(Sel::Indices(v));
6162 }
6163 }
6164 _ => vm_bail!(mex(
6165 "UnsupportedIndexType",
6166 "Unsupported index type"
6167 )),
6168 }
6169 }
6170 }
6171 let mut per_dim_indices: Vec<Vec<usize>> = Vec::with_capacity(dims);
6173 let full_shape: Vec<usize> = if rank < dims {
6174 let mut s = t.shape.clone();
6175 s.resize(dims, 1);
6176 s
6177 } else {
6178 t.shape.clone()
6179 };
6180 for (d, sel) in selectors.iter().enumerate().take(dims) {
6181 let dim_len = full_shape[d] as i64;
6182 let idxs: Vec<usize> = match sel {
6183 Sel::Colon => (1..=full_shape[d]).collect(),
6184 Sel::Scalar(i) => vec![*i],
6185 Sel::Indices(v) => v.clone(),
6186 Sel::Range {
6187 start,
6188 step,
6189 end_off,
6190 } => {
6191 let mut v = Vec::new();
6192 let mut cur = *start;
6193 let stp = *step;
6194 let end_i = dim_len - *end_off;
6195 if stp == 0 {
6196 vm_bail!(mex("IndexStepZero", "Index step cannot be zero"));
6197 }
6198 if stp > 0 {
6199 while cur <= end_i {
6200 if cur < 1 || cur > dim_len {
6201 break;
6202 }
6203 v.push(cur as usize);
6204 cur += stp;
6205 }
6206 } else {
6207 while cur >= end_i {
6208 if cur < 1 || cur > dim_len {
6209 break;
6210 }
6211 v.push(cur as usize);
6212 cur += stp;
6213 }
6214 }
6215 v
6216 }
6217 };
6218 if idxs.iter().any(|&i| i == 0 || i > full_shape[d]) {
6219 vm_bail!(mex("IndexOutOfBounds", "Index out of bounds"));
6220 }
6221 per_dim_indices.push(idxs);
6222 }
6223 let mut strides: Vec<usize> = vec![0; dims];
6225 let mut acc = 1usize;
6226 for (d, stride) in strides.iter_mut().enumerate().take(dims) {
6227 *stride = acc;
6228 acc *= full_shape[d];
6229 }
6230 let total_out: usize = per_dim_indices.iter().map(|v| v.len()).product();
6231 if total_out == 0 {
6232 stack.push(Value::Tensor(
6233 runmat_builtins::Tensor::new(Vec::new(), vec![0, 0])
6234 .map_err(|e| format!("Slice error: {e}"))?,
6235 ));
6236 continue;
6237 }
6238 let mut out_data: Vec<f64> = Vec::with_capacity(total_out);
6239 fn cartesian<F: FnMut(&[usize])>(lists: &[Vec<usize>], mut f: F) {
6240 let dims = lists.len();
6241 let mut idx = vec![0usize; dims];
6242 loop {
6243 let current: Vec<usize> =
6244 (0..dims).map(|d| lists[d][idx[d]]).collect();
6245 f(¤t);
6246 let mut d = 0usize;
6247 while d < dims {
6248 idx[d] += 1;
6249 if idx[d] < lists[d].len() {
6250 break;
6251 }
6252 idx[d] = 0;
6253 d += 1;
6254 }
6255 if d == dims {
6256 break;
6257 }
6258 }
6259 }
6260 cartesian(&per_dim_indices, |multi| {
6261 let mut lin = 0usize;
6262 for d in 0..dims {
6263 let i0 = multi[d] - 1;
6264 lin += i0 * strides[d];
6265 }
6266 out_data.push(t.data[lin]);
6267 });
6268 if out_data.len() == 1 {
6269 stack.push(Value::Num(out_data[0]));
6270 } else {
6271 let shape: Vec<usize> =
6272 per_dim_indices.iter().map(|v| v.len().max(1)).collect();
6273 let tens = runmat_builtins::Tensor::new(out_data, shape)
6274 .map_err(|e| format!("Slice error: {e}"))?;
6275 stack.push(Value::Tensor(tens));
6276 }
6277 }
6278 Value::StringArray(sa) => {
6279 let selectors =
6280 build_slice_selectors(dims, colon_mask, end_mask, &numeric, &sa.shape)
6281 .map_err(|e| format!("slice: {e}"))?;
6282 let plan = build_slice_plan(&selectors, dims, &sa.shape).map_err(|e| {
6283 if e.contains("IndexOutOfBounds") {
6284 e.clone()
6285 } else {
6286 format!("slice: {e}")
6287 }
6288 })?;
6289 let result =
6290 gather_string_slice(&sa, &plan).map_err(|e| format!("slice: {e}"))?;
6291 stack.push(result);
6292 }
6293 _ => vm_bail!(mex("SliceNonTensor", "Slicing only supported on tensors")),
6294 }
6295 }
6296
6297 Instr::IndexSliceEx(dims, numeric_count, colon_mask, end_mask, end_offsets) => {
6298 let mut numeric: Vec<Value> = Vec::with_capacity(numeric_count);
6300 for _ in 0..numeric_count {
6301 numeric.push(
6302 stack
6303 .pop()
6304 .ok_or(mex("StackUnderflow", "stack underflow"))?,
6305 );
6306 }
6307 numeric.reverse();
6308 let mut base = stack
6309 .pop()
6310 .ok_or(mex("StackUnderflow", "stack underflow"))?;
6311 let mut numeric_values = numeric.clone();
6312 if let Value::GpuTensor(handle) = &base {
6313 let adjusted = apply_end_offsets_to_numeric(
6314 &numeric_values,
6315 dims,
6316 colon_mask,
6317 end_mask,
6318 &end_offsets,
6319 &handle.shape,
6320 );
6321 if let Some(provider) = runmat_accelerate_api::provider() {
6322 if let Ok(selectors) = build_slice_selectors(
6323 dims,
6324 colon_mask,
6325 end_mask,
6326 &adjusted,
6327 &handle.shape,
6328 ) {
6329 if let Ok(plan) = build_slice_plan(&selectors, dims, &handle.shape) {
6330 if plan.indices.is_empty() {
6331 let zeros = provider
6332 .zeros(&plan.output_shape)
6333 .map_err(|e| format!("slice: {e}"))?;
6334 stack.push(Value::GpuTensor(zeros));
6335 pc += 1;
6336 continue;
6337 } else {
6338 let result = provider
6339 .gather_linear(handle, &plan.indices, &plan.output_shape)
6340 .map_err(|e| format!("slice: {e}"))?;
6341 stack.push(Value::GpuTensor(result));
6342 pc += 1;
6343 continue;
6344 }
6345 }
6346 }
6347 let host = provider
6348 .download(handle)
6349 .map_err(|e| format!("slice: {e}"))?;
6350 let tensor = runmat_builtins::Tensor::new(host.data, host.shape)
6351 .map_err(|e| format!("slice: {e}"))?;
6352 base = Value::Tensor(tensor);
6353 numeric_values = adjusted;
6354 } else {
6355 return Err("No acceleration provider registered".to_string());
6356 }
6357 }
6358 match base {
6359 Value::Tensor(t) => {
6360 let adjusted = apply_end_offsets_to_numeric(
6361 &numeric_values,
6362 dims,
6363 colon_mask,
6364 end_mask,
6365 &end_offsets,
6366 &t.shape,
6367 );
6368 let mut tmp_stack = Vec::new();
6370 tmp_stack.push(Value::Tensor(t));
6371 for v in adjusted {
6372 tmp_stack.push(v);
6373 }
6374 let mut numeric_vals: Vec<Value> = Vec::new();
6376 let count = numeric_count;
6377 let mut idx_iter = tmp_stack.into_iter();
6378 let base = idx_iter
6379 .next()
6380 .ok_or(mex("StackUnderflow", "stack underflow"))?;
6381 for _ in 0..count {
6382 match idx_iter.next() {
6383 Some(v) => numeric_vals.push(v),
6384 None => return Err(mex("StackUnderflow", "stack underflow")),
6385 }
6386 }
6387 match base {
6388 Value::Tensor(t2) => {
6389 let rank = t2.shape.len();
6391 #[derive(Clone)]
6392 enum Sel {
6393 Colon,
6394 Scalar(usize),
6395 Indices(Vec<usize>),
6396 }
6397 let mut selectors: Vec<Sel> = Vec::with_capacity(dims);
6398 let mut num_iter = 0usize;
6399 if dims == 1 {
6400 let total = t2.data.len();
6401 let mut idxs: Vec<usize> = Vec::new();
6402 let is_colon = (colon_mask & 1u32) != 0;
6403 let is_end = (end_mask & 1u32) != 0;
6404 if is_colon {
6405 idxs = (1..=total).collect();
6406 } else if is_end {
6407 idxs = vec![total];
6408 } else if let Some(v) = numeric_vals.first() {
6409 match v {
6410 Value::Num(n) => {
6411 let i = *n as isize;
6412 if i < 1 {
6413 vm_bail!(mex(
6414 "IndexOutOfBounds",
6415 "Index out of bounds"
6416 ));
6417 }
6418 idxs = vec![i as usize];
6419 }
6420 Value::Tensor(idx_t) => {
6421 let len = idx_t.shape.iter().product::<usize>();
6422 if len == total {
6423 for (i, &val) in idx_t.data.iter().enumerate() {
6424 if val != 0.0 {
6425 idxs.push(i + 1);
6426 }
6427 }
6428 } else {
6429 for &val in &idx_t.data {
6430 let i = val as isize;
6431 if i < 1 {
6432 vm_bail!(mex(
6433 "IndexOutOfBounds",
6434 "Index out of bounds"
6435 ));
6436 }
6437 idxs.push(i as usize);
6438 }
6439 }
6440 }
6441 _ => vm_bail!(mex(
6442 "UnsupportedIndexType",
6443 "Unsupported index type"
6444 )),
6445 }
6446 } else {
6447 vm_bail!(mex(
6448 "MissingNumericIndex",
6449 "missing numeric index"
6450 ));
6451 }
6452 if idxs.iter().any(|&i| i == 0 || i > total) {
6453 vm_bail!(mex("IndexOutOfBounds", "Index out of bounds"));
6454 }
6455 if idxs.len() == 1 {
6456 stack.push(Value::Num(t2.data[idxs[0] - 1]));
6457 } else {
6458 let mut out = Vec::with_capacity(idxs.len());
6459 for &i in &idxs {
6460 out.push(t2.data[i - 1]);
6461 }
6462 let tens =
6463 runmat_builtins::Tensor::new(out, vec![idxs.len(), 1])
6464 .map_err(|e| format!("Slice error: {e}"))?;
6465 stack.push(Value::Tensor(tens));
6466 }
6467 } else {
6468 for d in 0..dims {
6469 let is_colon = (colon_mask & (1u32 << d)) != 0;
6470 let is_end = (end_mask & (1u32 << d)) != 0;
6471 if is_colon {
6472 selectors.push(Sel::Colon);
6473 } else if is_end {
6474 let dim_len = *t2.shape.get(d).unwrap_or(&1);
6475 selectors.push(Sel::Scalar(dim_len));
6476 } else {
6477 let v = numeric_vals.get(num_iter).ok_or(mex(
6478 "MissingNumericIndex",
6479 "missing numeric index",
6480 ))?;
6481 num_iter += 1;
6482 match v {
6483 Value::Num(n) => {
6484 let idx = *n as isize;
6485 if idx < 1 {
6486 return Err(mex(
6487 "IndexOutOfBounds",
6488 "Index out of bounds",
6489 ));
6490 }
6491 selectors.push(Sel::Scalar(idx as usize));
6492 }
6493 Value::Tensor(idx_t) => {
6494 let dim_len = *t2.shape.get(d).unwrap_or(&1);
6495 let len = idx_t.shape.iter().product::<usize>();
6496 if len == dim_len {
6497 let mut indices = Vec::new();
6498 for (i, &val) in
6499 idx_t.data.iter().enumerate()
6500 {
6501 if val != 0.0 {
6502 indices.push(i + 1);
6503 }
6504 }
6505 selectors.push(Sel::Indices(indices));
6506 } else {
6507 let mut indices = Vec::with_capacity(len);
6508 for &val in &idx_t.data {
6509 let idx = val as isize;
6510 if idx < 1 {
6511 return Err(mex(
6512 "IndexOutOfBounds",
6513 "Index out of bounds",
6514 ));
6515 }
6516 indices.push(idx as usize);
6517 }
6518 selectors.push(Sel::Indices(indices));
6519 }
6520 }
6521 Value::LogicalArray(la) => {
6522 let dim_len = *t2.shape.get(d).unwrap_or(&1);
6523 if la.data.len() == dim_len {
6524 let mut indices = Vec::new();
6525 for (i, &b) in la.data.iter().enumerate() {
6526 if b != 0 {
6527 indices.push(i + 1);
6528 }
6529 }
6530 selectors.push(Sel::Indices(indices));
6531 } else {
6532 return Err(mex(
6533 "IndexShape",
6534 "Logical mask shape mismatch",
6535 ));
6536 }
6537 }
6538 _ => {
6539 return Err(mex(
6540 "UnsupportedIndexType",
6541 "Unsupported index type",
6542 ))
6543 }
6544 }
6545 }
6546 }
6547 let mut out_dims: Vec<usize> = Vec::new();
6548 let mut per_dim_indices: Vec<Vec<usize>> =
6549 Vec::with_capacity(dims);
6550 for (d, sel) in selectors.iter().enumerate().take(dims) {
6551 let dim_len = *t2.shape.get(d).unwrap_or(&1);
6552 let idxs = match sel {
6553 Sel::Colon => (1..=dim_len).collect::<Vec<usize>>(),
6554 Sel::Scalar(i) => vec![*i],
6555 Sel::Indices(v) => v.clone(),
6556 };
6557 if idxs.iter().any(|&i| i == 0 || i > dim_len) {
6558 return Err(mex(
6559 "IndexOutOfBounds",
6560 "Index out of bounds",
6561 ));
6562 }
6563 if idxs.len() > 1 {
6564 out_dims.push(idxs.len());
6565 } else {
6566 out_dims.push(1);
6567 }
6568 per_dim_indices.push(idxs);
6569 }
6570 if dims == 2 {
6571 match (
6572 &per_dim_indices[0].as_slice(),
6573 &per_dim_indices[1].as_slice(),
6574 ) {
6575 (i_list, j_list)
6576 if i_list.len() > 1 && j_list.len() == 1 =>
6577 {
6578 out_dims = vec![i_list.len(), 1];
6579 }
6580 (i_list, j_list)
6581 if i_list.len() == 1 && j_list.len() > 1 =>
6582 {
6583 out_dims = vec![1, j_list.len()];
6584 }
6585 _ => {}
6586 }
6587 }
6588 let mut strides: Vec<usize> = vec![0; dims];
6589 let full_shape: Vec<usize> = if rank < dims {
6590 let mut s = t2.shape.clone();
6591 s.resize(dims, 1);
6592 s
6593 } else {
6594 t2.shape.clone()
6595 };
6596 let mut acc = 1usize;
6597 for d in 0..dims {
6598 strides[d] = acc;
6599 acc *= full_shape[d];
6600 }
6601 let total_out: usize = out_dims.iter().product();
6602 let mut out_data: Vec<f64> = Vec::with_capacity(total_out);
6603 if out_dims.contains(&0) {
6604 let out_tensor =
6605 runmat_builtins::Tensor::new(out_data, out_dims)
6606 .map_err(|e| format!("Slice error: {e}"))?;
6607 stack.push(Value::Tensor(out_tensor));
6608 } else {
6609 fn cartesian<F: FnMut(&[usize])>(
6610 lists: &[Vec<usize>],
6611 mut f: F,
6612 ) {
6613 let dims = lists.len();
6614 let mut idx = vec![0usize; dims];
6615 loop {
6616 let current: Vec<usize> =
6617 (0..dims).map(|d| lists[d][idx[d]]).collect();
6618 f(¤t);
6619 let mut d = 0usize;
6620 while d < dims {
6621 idx[d] += 1;
6622 if idx[d] < lists[d].len() {
6623 break;
6624 }
6625 idx[d] = 0;
6626 d += 1;
6627 }
6628 if d == dims {
6629 break;
6630 }
6631 }
6632 }
6633 cartesian(&per_dim_indices, |multi| {
6634 let mut lin = 0usize;
6635 for d in 0..dims {
6636 let i0 = multi[d] - 1;
6637 lin += i0 * strides[d];
6638 }
6639 out_data.push(t2.data[lin]);
6640 });
6641 if out_data.len() == 1 {
6642 stack.push(Value::Num(out_data[0]));
6643 } else {
6644 let out_tensor =
6645 runmat_builtins::Tensor::new(out_data, out_dims)
6646 .map_err(|e| format!("Slice error: {e}"))?;
6647 stack.push(Value::Tensor(out_tensor));
6648 }
6649 }
6650 }
6651 }
6652 other => {
6653 stack.push(other);
6654 }
6655 }
6656 }
6657 other => {
6658 vm_bail!(mex(
6659 "SliceNonTensor",
6660 &format!("Slicing only supported on tensors: got {other:?}")
6661 ));
6662 }
6663 }
6664 }
6665 Instr::Index1DRangeEnd { has_step, offset } => {
6666 let step_val: f64 = if has_step {
6668 let v: f64 = (&stack
6669 .pop()
6670 .ok_or(mex("StackUnderflow", "stack underflow"))?)
6671 .try_into()?;
6672 v
6673 } else {
6674 1.0
6675 };
6676 let start_val: f64 = (&stack
6677 .pop()
6678 .ok_or(mex("StackUnderflow", "stack underflow"))?)
6679 .try_into()?;
6680 let base = stack
6681 .pop()
6682 .ok_or(mex("StackUnderflow", "stack underflow"))?;
6683 match base {
6684 Value::Tensor(t) => {
6685 let total = t.data.len();
6686 let end_idx = (total as i64) - offset; let mut out: Vec<f64> = Vec::new();
6688 let mut cur = start_val as i64;
6689 let step_i = if step_val >= 0.0 {
6690 step_val as i64
6691 } else {
6692 -(step_val.abs() as i64)
6693 };
6694 if step_i == 0 {
6695 return Err(mex("IndexStepZero", "Index step cannot be zero"));
6696 }
6697 if step_i > 0 {
6698 while cur as i64 <= end_idx {
6699 let idx0 = cur as usize;
6700 if idx0 == 0 || idx0 > total {
6701 break;
6702 }
6703 out.push(t.data[idx0 - 1]);
6704 cur += step_i;
6705 }
6706 } else {
6707 while (cur as i64) >= end_idx {
6708 let idx0 = cur as usize;
6709 if idx0 == 0 || idx0 > total {
6710 break;
6711 }
6712 out.push(t.data[idx0 - 1]);
6713 cur += step_i;
6714 }
6715 }
6716 if out.len() == 1 {
6717 stack.push(Value::Num(out[0]));
6718 } else {
6719 let tens =
6720 runmat_builtins::Tensor::new(out.clone(), vec![out.len(), 1])
6721 .map_err(|e| format!("Range slice error: {e}"))?;
6722 stack.push(Value::Tensor(tens));
6723 }
6724 }
6725 _ => vm_bail!(mex("SliceNonTensor", "Slicing only supported on tensors")),
6726 }
6727 }
6728 Instr::StoreSlice(dims, numeric_count, colon_mask, end_mask) => {
6729 let __b = bench_start();
6730 let rhs = stack
6732 .pop()
6733 .ok_or(mex("StackUnderflow", "stack underflow"))?;
6734 let mut numeric: Vec<Value> = Vec::with_capacity(numeric_count);
6735 for _ in 0..numeric_count {
6736 numeric.push(
6737 stack
6738 .pop()
6739 .ok_or(mex("StackUnderflow", "stack underflow"))?,
6740 );
6741 }
6742 numeric.reverse();
6743 let base = stack
6744 .pop()
6745 .ok_or(mex("StackUnderflow", "stack underflow"))?;
6746 match base {
6747 Value::Object(obj) => {
6748 let cell =
6749 runmat_builtins::CellArray::new(numeric.clone(), 1, numeric.len())
6750 .map_err(|e| format!("subsasgn build error: {e}"))?;
6751 match runmat_runtime::call_builtin(
6752 "call_method",
6753 &[
6754 Value::Object(obj.clone()),
6755 Value::String("subsasgn".to_string()),
6756 Value::String("()".to_string()),
6757 Value::Cell(cell.clone()),
6758 rhs.clone(),
6759 ],
6760 ) {
6761 Ok(v) => stack.push(v),
6762 Err(_e) => {
6763 let qualified = format!("{}.subsasgn", obj.class_name);
6766 match runmat_runtime::call_builtin(
6767 &qualified,
6768 &[
6769 Value::Object(obj),
6770 Value::String("()".to_string()),
6771 Value::Cell(cell),
6772 rhs,
6773 ],
6774 ) {
6775 Ok(v2) => stack.push(v2),
6776 Err(e2) => vm_bail!(e2),
6777 }
6778 }
6779 }
6780 }
6781 Value::Tensor(mut t) => {
6782 if dims == 1 {
6785 let total = t.data.len();
6786 let mut lin_indices: Vec<usize> = Vec::new();
6788 let is_colon = (colon_mask & 1u32) != 0;
6789 let is_end = (end_mask & 1u32) != 0;
6790 if is_colon {
6791 lin_indices = (1..=total).collect();
6792 } else if is_end {
6793 lin_indices = vec![total];
6794 } else {
6795 let v = numeric
6796 .first()
6797 .ok_or(mex("MissingNumericIndex", "missing numeric index"))?;
6798 match v {
6799 Value::Num(n) => {
6800 let i = *n as isize;
6801 if i < 1 || (i as usize) > total {
6802 vm_bail!(mex(
6803 "IndexOutOfBounds",
6804 "Index out of bounds"
6805 ));
6806 }
6807 lin_indices.push(i as usize);
6808 }
6809 Value::Tensor(idx_t) => {
6810 let len = idx_t.shape.iter().product::<usize>();
6811 if len == total {
6812 for (i, &val) in idx_t.data.iter().enumerate() {
6813 if val != 0.0 {
6814 lin_indices.push(i + 1);
6815 }
6816 }
6817 } else {
6818 for &val in &idx_t.data {
6819 let i = val as isize;
6820 if i < 1 || (i as usize) > total {
6821 vm_bail!(mex(
6822 "IndexOutOfBounds",
6823 "Index out of bounds"
6824 ));
6825 }
6826 lin_indices.push(i as usize);
6827 }
6828 }
6829 }
6830 _ => vm_bail!(mex(
6831 "UnsupportedIndexType",
6832 "Unsupported index type"
6833 )),
6834 }
6835 }
6836 match rhs {
6838 Value::Num(v) => {
6839 for &li in &lin_indices {
6840 t.data[li - 1] = v;
6841 }
6842 }
6843 Value::Tensor(rt) => {
6844 if rt.data.len() == 1 {
6845 let v = rt.data[0];
6846 for &li in &lin_indices {
6847 t.data[li - 1] = v;
6848 }
6849 } else if rt.data.len() == lin_indices.len() {
6850 for (k, &li) in lin_indices.iter().enumerate() {
6851 t.data[li - 1] = rt.data[k];
6852 }
6853 } else {
6854 vm_bail!(
6855 "shape mismatch for linear slice assign".to_string()
6856 );
6857 }
6858 }
6859 _ => vm_bail!("rhs must be numeric or tensor".to_string()),
6860 }
6861 stack.push(Value::Tensor(t));
6862 } else {
6863 let rank = t.shape.len();
6864 #[derive(Clone)]
6865 enum Sel {
6866 Colon,
6867 Scalar(usize),
6868 Indices(Vec<usize>),
6869 }
6870 let mut selectors: Vec<Sel> = Vec::with_capacity(dims);
6871 let mut num_iter = 0usize;
6872 for d in 0..dims {
6873 let is_colon = (colon_mask & (1u32 << d)) != 0;
6874 let is_end = (end_mask & (1u32 << d)) != 0;
6875 if is_colon {
6876 selectors.push(Sel::Colon);
6877 } else if is_end {
6878 selectors.push(Sel::Scalar(*t.shape.get(d).unwrap_or(&1)));
6879 } else {
6880 let v = numeric.get(num_iter).ok_or(mex(
6881 "MissingNumericIndex",
6882 "missing numeric index",
6883 ))?;
6884 num_iter += 1;
6885 match v {
6886 Value::Num(n) => {
6887 let idx = *n as isize;
6888 if idx < 1 {
6889 vm_bail!(mex(
6890 "IndexOutOfBounds",
6891 "Index out of bounds"
6892 ));
6893 }
6894 selectors.push(Sel::Scalar(idx as usize));
6895 }
6896 Value::Tensor(idx_t) => {
6897 let dim_len = *t.shape.get(d).unwrap_or(&1);
6898 let len = idx_t.shape.iter().product::<usize>();
6899 if len == dim_len {
6900 let mut v = Vec::new();
6901 for (i, &val) in idx_t.data.iter().enumerate() {
6902 if val != 0.0 {
6903 v.push(i + 1);
6904 }
6905 }
6906 selectors.push(Sel::Indices(v));
6907 } else {
6908 let mut v = Vec::with_capacity(len);
6909 for &val in &idx_t.data {
6910 let idx = val as isize;
6911 if idx < 1 {
6912 vm_bail!(mex(
6913 "IndexOutOfBounds",
6914 "Index out of bounds"
6915 ));
6916 }
6917 v.push(idx as usize);
6918 }
6919 selectors.push(Sel::Indices(v));
6920 }
6921 }
6922 _ => vm_bail!(mex(
6923 "UnsupportedIndexType",
6924 "Unsupported index type"
6925 )),
6926 }
6927 }
6928 }
6929 if dims == 2 {
6931 let rows = if rank >= 1 { t.shape[0] } else { 1 };
6932 let cols = if rank >= 2 { t.shape[1] } else { 1 };
6933 match (&selectors[0], &selectors[1]) {
6934 (Sel::Colon, Sel::Scalar(j)) => {
6936 let j0 = *j - 1;
6937 if j0 >= cols {
6939 let new_cols = j0 + 1;
6940 let new_rows = rows;
6941 let mut new_data = vec![0.0f64; new_rows * new_cols];
6942 for c in 0..cols {
6943 let src_off = c * rows;
6944 let dst_off = c * new_rows;
6945 new_data[dst_off..dst_off + rows].copy_from_slice(
6946 &t.data[src_off..src_off + rows],
6947 );
6948 }
6949 t.data = new_data;
6950 t.shape = vec![new_rows, new_cols];
6951 t.rows = new_rows;
6952 t.cols = new_cols;
6953 }
6954 let start = j0 * rows;
6955 match rhs {
6956 Value::Num(v) => {
6957 for r in 0..rows {
6958 t.data[start + r] = v;
6959 }
6960 }
6961 Value::Tensor(rt) => {
6962 let len = rt.data.len();
6963 if len == rows {
6964 for r in 0..rows {
6965 t.data[start + r] = rt.data[r];
6966 }
6967 } else if len == 1 {
6968 for r in 0..rows {
6969 t.data[start + r] = rt.data[0];
6970 }
6971 } else {
6972 vm_bail!("shape mismatch for slice assign"
6973 .to_string());
6974 }
6975 }
6976 _ => {
6977 vm_bail!("rhs must be numeric or tensor".to_string())
6978 }
6979 }
6980 stack.push(Value::Tensor(t));
6981 bench_end("StoreSlice2D.fast_col", __b);
6982 pc += 1;
6983 continue;
6984 }
6985 (Sel::Scalar(i), Sel::Colon) => {
6987 let i0 = *i - 1;
6988 if i0 >= rows {
6990 let new_rows = i0 + 1;
6991 let new_cols = cols;
6992 let mut new_data = vec![0.0f64; new_rows * new_cols];
6993 for c in 0..cols {
6994 for r in 0..rows {
6995 new_data[r + c * new_rows] =
6996 t.data[r + c * rows];
6997 }
6998 }
6999 t.data = new_data;
7000 t.shape = vec![new_rows, new_cols];
7001 t.rows = new_rows;
7002 t.cols = new_cols;
7003 }
7004 match rhs {
7005 Value::Num(v) => {
7006 for c in 0..cols {
7007 t.data[i0 + c * rows] = v;
7008 }
7009 }
7010 Value::Tensor(rt) => {
7011 let len = rt.data.len();
7012 if len == cols {
7013 for c in 0..cols {
7014 t.data[i0 + c * rows] = rt.data[c];
7015 }
7016 } else if len == 1 {
7017 for c in 0..cols {
7018 t.data[i0 + c * rows] = rt.data[0];
7019 }
7020 } else {
7021 vm_bail!("shape mismatch for slice assign"
7022 .to_string());
7023 }
7024 }
7025 _ => {
7026 vm_bail!("rhs must be numeric or tensor".to_string())
7027 }
7028 }
7029 stack.push(Value::Tensor(t));
7030 bench_end("StoreSlice2D.fast_row", __b);
7031 pc += 1;
7032 continue;
7033 }
7034 _ => {}
7035 }
7036 }
7037 let mut per_dim_indices: Vec<Vec<usize>> = Vec::with_capacity(dims);
7040 let full_shape: Vec<usize> = if rank < dims {
7041 let mut s = t.shape.clone();
7042 s.resize(dims, 1);
7043 s
7044 } else {
7045 t.shape.clone()
7046 };
7047 for d in 0..dims {
7048 let dim_len = full_shape[d];
7049 let idxs = match &selectors[d] {
7050 Sel::Colon => (1..=dim_len).collect(),
7051 Sel::Scalar(i) => vec![*i],
7052 Sel::Indices(v) => v.clone(),
7053 };
7054 if idxs.iter().any(|&i| i == 0 || i > dim_len) {
7055 vm_bail!(mex("IndexOutOfBounds", "Index out of bounds"));
7056 }
7057 per_dim_indices.push(idxs);
7058 }
7059 let mut strides: Vec<usize> = vec![0; dims];
7061 let mut acc = 1usize;
7062 for d in 0..dims {
7063 strides[d] = acc;
7064 acc *= full_shape[d];
7065 }
7066 let total_out: usize =
7067 per_dim_indices.iter().map(|v| v.len()).product();
7068 enum RhsView {
7070 Scalar(f64),
7071 Tensor {
7072 data: Vec<f64>,
7073 shape: Vec<usize>,
7074 strides: Vec<usize>,
7075 },
7076 }
7077 let rhs_view = match rhs {
7078 Value::Num(n) => RhsView::Scalar(n),
7079 Value::Tensor(rt) => {
7080 let mut shape = rt.shape.clone();
7082 if shape.len() < dims {
7083 shape.resize(dims, 1);
7084 }
7085 if shape.len() > dims {
7086 if shape.iter().skip(dims).any(|&s| s != 1) {
7087 vm_bail!("shape mismatch for slice assign".to_string());
7088 }
7089 shape.truncate(dims);
7090 }
7091 let mut ok = true;
7092 for d in 0..dims {
7093 let out_len = per_dim_indices[d].len();
7094 let rhs_len = shape[d];
7095 if !(rhs_len == 1 || rhs_len == out_len) {
7096 ok = false;
7097 break;
7098 }
7099 }
7100 if !ok {
7101 vm_bail!("shape mismatch for slice assign".to_string());
7102 }
7103 let mut rstrides = vec![0usize; dims];
7104 let mut racc = 1usize;
7105 for d in 0..dims {
7106 rstrides[d] = racc;
7107 racc *= shape[d];
7108 }
7109 RhsView::Tensor {
7110 data: rt.data,
7111 shape,
7112 strides: rstrides,
7113 }
7114 }
7115 _ => vm_bail!("rhs must be numeric or tensor".to_string()),
7116 };
7117 let mut _k = 0usize;
7119 let mut idx = vec![0usize; dims];
7120 if total_out == 0 {
7121 stack.push(Value::Tensor(t));
7122 } else {
7123 loop {
7124 let mut lin = 0usize;
7125 for d in 0..dims {
7126 let i0 = per_dim_indices[d][idx[d]] - 1;
7127 lin += i0 * strides[d];
7128 }
7129 match &rhs_view {
7130 RhsView::Scalar(val) => t.data[lin] = *val,
7131 RhsView::Tensor {
7132 data,
7133 shape,
7134 strides,
7135 } => {
7136 let mut rlin = 0usize;
7137 for d in 0..dims {
7138 let rhs_len = shape[d];
7139 let pos = if rhs_len == 1 { 0 } else { idx[d] };
7140 rlin += pos * strides[d];
7141 }
7142 t.data[lin] = data[rlin];
7143 }
7144 }
7145 _k += 1;
7146 let mut d = 0usize;
7148 while d < dims {
7149 idx[d] += 1;
7150 if idx[d] < per_dim_indices[d].len() {
7151 break;
7152 }
7153 idx[d] = 0;
7154 d += 1;
7155 }
7156 if d == dims {
7157 break;
7158 }
7159 }
7160 stack.push(Value::Tensor(t));
7161 }
7162 }
7163 }
7164 Value::GpuTensor(handle) => {
7165 if let Some(provider) = runmat_accelerate_api::provider() {
7166 let base_shape = handle.shape.clone();
7167 if let Ok(selectors) = build_slice_selectors(
7168 dims,
7169 colon_mask,
7170 end_mask,
7171 &numeric,
7172 &base_shape,
7173 ) {
7174 if let Ok(plan) = build_slice_plan(&selectors, dims, &base_shape) {
7175 if plan.indices.is_empty() {
7176 stack.push(Value::GpuTensor(handle));
7177 bench_end("StoreSlice", __b);
7178 pc += 1;
7179 continue;
7180 }
7181 let values_result = if plan.dims == 1 {
7182 let count =
7183 plan.selection_lengths.first().copied().unwrap_or(0);
7184 materialize_rhs_linear(&rhs, count)
7185 } else {
7186 materialize_rhs_nd(&rhs, &plan.selection_lengths)
7187 };
7188 if let Ok(values) = values_result {
7189 if values.len() == plan.indices.len() {
7190 let value_shape = vec![values.len().max(1), 1];
7191 let upload_result = if values.is_empty() {
7192 provider.zeros(&[0, 1])
7193 } else {
7194 provider.upload(
7195 &runmat_accelerate_api::HostTensorView {
7196 data: &values,
7197 shape: &value_shape,
7198 },
7199 )
7200 };
7201 if let Ok(values_handle) = upload_result {
7202 if provider
7203 .scatter_linear(
7204 &handle,
7205 &plan.indices,
7206 &values_handle,
7207 )
7208 .is_ok()
7209 {
7210 stack.push(Value::GpuTensor(handle));
7211 bench_end("StoreSlice", __b);
7212 pc += 1;
7213 continue;
7214 }
7215 }
7216 }
7217 }
7218 }
7219 }
7220 }
7221 let h = handle;
7222 if dims == 2 {
7224 let rows = h.shape.first().copied().unwrap_or(1);
7225 let cols = h.shape.get(1).copied().unwrap_or(1);
7226 #[derive(Clone)]
7228 enum Sel {
7229 Colon,
7230 Scalar(usize),
7231 }
7232 #[allow(unused_assignments)]
7233 let mut num_iter_fast = 0usize;
7234 let sel0;
7235 let sel1;
7236 let is_colon0 = (colon_mask & (1u32 << 0)) != 0;
7238 let is_end0 = (end_mask & (1u32 << 0)) != 0;
7239 if is_colon0 {
7240 sel0 = Sel::Colon;
7241 } else if is_end0 {
7242 sel0 = Sel::Scalar(rows);
7243 } else {
7244 let v = numeric
7245 .get(num_iter_fast)
7246 .ok_or(mex("MissingNumericIndex", "missing numeric index"))?;
7247 num_iter_fast += 1;
7248 let n: f64 = v.try_into()?;
7249 if n < 1.0 {
7250 return Err(mex("IndexOutOfBounds", "Index out of bounds"));
7251 }
7252 sel0 = Sel::Scalar(n as usize);
7253 }
7254 let is_colon1 = (colon_mask & (1u32 << 1)) != 0;
7256 let is_end1 = (end_mask & (1u32 << 1)) != 0;
7257 if is_colon1 {
7258 sel1 = Sel::Colon;
7259 } else if is_end1 {
7260 sel1 = Sel::Scalar(cols);
7261 } else {
7262 let v = numeric
7263 .get(num_iter_fast)
7264 .ok_or(mex("MissingNumericIndex", "missing numeric index"))?;
7265 let n: f64 = v.try_into()?;
7266 if n < 1.0 {
7267 return Err(mex("IndexOutOfBounds", "Index out of bounds"));
7268 }
7269 sel1 = Sel::Scalar(n as usize);
7270 }
7271 let _ = num_iter_fast;
7273 if let (Sel::Colon, Sel::Scalar(j)) = (&sel0, &sel1) {
7275 let j0 = *j - 1;
7276 if j0 < cols {
7277 if let Value::GpuTensor(vh) = &rhs {
7278 let v_rows = match vh.shape.len() {
7279 1 | 2 => vh.shape[0],
7280 _ => 0,
7281 };
7282 if v_rows == rows {
7283 if let Some(p) = runmat_accelerate_api::provider() {
7284 match p.scatter_column(&h, j0, vh) {
7285 Ok(new_h) => {
7286 stack.push(Value::GpuTensor(new_h));
7287 bench_end("StoreSlice2D.fast_col", __b);
7288 pc += 1;
7289 continue;
7290 }
7291 Err(_) => { }
7292 }
7293 }
7294 }
7295 }
7296 }
7297 }
7298 if let (Sel::Scalar(i), Sel::Colon) = (&sel0, &sel1) {
7300 let i0 = *i - 1;
7301 if i0 < rows {
7302 if let Value::GpuTensor(vh) = &rhs {
7303 let v_cols = match vh.shape.len() {
7304 1 => vh.shape[0],
7305 2 => vh.shape[1],
7306 _ => 0,
7307 };
7308 if v_cols == cols {
7309 if let Some(p) = runmat_accelerate_api::provider() {
7310 match p.scatter_row(&h, i0, vh) {
7311 Ok(new_h) => {
7312 stack.push(Value::GpuTensor(new_h));
7313 bench_end("StoreSlice2D.fast_row", __b);
7314 pc += 1;
7315 continue;
7316 }
7317 Err(_) => { }
7318 }
7319 }
7320 }
7321 }
7322 }
7323 }
7324 }
7325 let provider = runmat_accelerate_api::provider()
7327 .ok_or_else(|| "No acceleration provider registered".to_string())?;
7328 let host = provider
7329 .download(&h)
7330 .map_err(|e| format!("gather for slice assign: {e}"))?;
7331 let mut t = runmat_builtins::Tensor::new(host.data, host.shape)
7332 .map_err(|e| format!("slice assign: {e}"))?;
7333 if dims == 1 {
7335 let total = t.data.len();
7336 let mut lin_indices: Vec<usize> = Vec::new();
7338 let is_colon = (colon_mask & 1u32) != 0;
7339 let is_end = (end_mask & 1u32) != 0;
7340 if is_colon {
7341 lin_indices = (1..=total).collect();
7342 } else if is_end {
7343 lin_indices = vec![total];
7344 } else {
7345 let v = numeric
7346 .first()
7347 .ok_or(mex("MissingNumericIndex", "missing numeric index"))?;
7348 match v {
7349 Value::Num(n) => {
7350 let i = *n as isize;
7351 if i < 1 || (i as usize) > total {
7352 vm_bail!(mex(
7353 "IndexOutOfBounds",
7354 "Index out of bounds"
7355 ));
7356 }
7357 lin_indices.push(i as usize);
7358 }
7359 Value::Tensor(idx_t) => {
7360 let len = idx_t.shape.iter().product::<usize>();
7361 if len == total {
7362 for (i, &val) in idx_t.data.iter().enumerate() {
7363 if val != 0.0 {
7364 lin_indices.push(i + 1);
7365 }
7366 }
7367 } else {
7368 for &val in &idx_t.data {
7369 let i = val as isize;
7370 if i < 1 || (i as usize) > total {
7371 vm_bail!(mex(
7372 "IndexOutOfBounds",
7373 "Index out of bounds"
7374 ));
7375 }
7376 lin_indices.push(i as usize);
7377 }
7378 }
7379 }
7380 _ => vm_bail!(mex(
7381 "UnsupportedIndexType",
7382 "Unsupported index type"
7383 )),
7384 }
7385 }
7386 match rhs {
7388 Value::Num(v) => {
7389 for &li in &lin_indices {
7390 t.data[li - 1] = v;
7391 }
7392 }
7393 Value::Tensor(rt) => {
7394 if rt.data.len() == 1 {
7395 let v = rt.data[0];
7396 for &li in &lin_indices {
7397 t.data[li - 1] = v;
7398 }
7399 } else if rt.data.len() == lin_indices.len() {
7400 for (k, &li) in lin_indices.iter().enumerate() {
7401 t.data[li - 1] = rt.data[k];
7402 }
7403 } else {
7404 vm_bail!(
7405 "shape mismatch for linear slice assign".to_string()
7406 );
7407 }
7408 }
7409 _ => vm_bail!("rhs must be numeric or tensor".to_string()),
7410 }
7411 let view = runmat_accelerate_api::HostTensorView {
7412 data: &t.data,
7413 shape: &t.shape,
7414 };
7415 let new_h = provider
7416 .upload(&view)
7417 .map_err(|e| format!("reupload after slice assign: {e}"))?;
7418 stack.push(Value::GpuTensor(new_h));
7419 } else {
7420 let rank = t.shape.len();
7421 #[derive(Clone)]
7422 enum Sel {
7423 Colon,
7424 Scalar(usize),
7425 Indices(Vec<usize>),
7426 }
7427 let mut selectors: Vec<Sel> = Vec::with_capacity(dims);
7428 let mut num_iter = 0usize;
7429 for d in 0..dims {
7430 let is_colon = (colon_mask & (1u32 << d)) != 0;
7431 let is_end = (end_mask & (1u32 << d)) != 0;
7432 if is_colon {
7433 selectors.push(Sel::Colon);
7434 } else if is_end {
7435 selectors.push(Sel::Scalar(*t.shape.get(d).unwrap_or(&1)));
7436 } else {
7437 let v = numeric.get(num_iter).ok_or(mex(
7438 "MissingNumericIndex",
7439 "missing numeric index",
7440 ))?;
7441 num_iter += 1;
7442 match v {
7443 Value::Num(n) => {
7444 let idx = *n as isize;
7445 if idx < 1 {
7446 vm_bail!(mex(
7447 "IndexOutOfBounds",
7448 "Index out of bounds"
7449 ));
7450 }
7451 selectors.push(Sel::Scalar(idx as usize));
7452 }
7453 Value::Tensor(idx_t) => {
7454 let dim_len = *t.shape.get(d).unwrap_or(&1);
7455 let len = idx_t.shape.iter().product::<usize>();
7456 if len == dim_len {
7457 let mut v = Vec::new();
7458 for (i, &val) in idx_t.data.iter().enumerate() {
7459 if val != 0.0 {
7460 v.push(i + 1);
7461 }
7462 }
7463 selectors.push(Sel::Indices(v));
7464 } else {
7465 let mut v = Vec::with_capacity(len);
7466 for &val in &idx_t.data {
7467 let idx = val as isize;
7468 if idx < 1 {
7469 vm_bail!(mex(
7470 "IndexOutOfBounds",
7471 "Index out of bounds"
7472 ));
7473 }
7474 v.push(idx as usize);
7475 }
7476 selectors.push(Sel::Indices(v));
7477 }
7478 }
7479 _ => vm_bail!(mex(
7480 "UnsupportedIndexType",
7481 "Unsupported index type"
7482 )),
7483 }
7484 }
7485 }
7486 if dims == 2 {
7488 let rows = if rank >= 1 { t.shape[0] } else { 1 };
7489 let cols = if rank >= 2 { t.shape[1] } else { 1 };
7490 match (&selectors[0], &selectors[1]) {
7491 (Sel::Colon, Sel::Scalar(j)) => {
7493 let j0 = *j - 1;
7494 if j0 >= cols {
7496 let new_cols = j0 + 1;
7497 let new_rows = rows;
7498 let mut new_data = vec![0.0f64; new_rows * new_cols];
7499 for c in 0..cols {
7500 let src_off = c * rows;
7501 let dst_off = c * new_rows;
7502 new_data[dst_off..dst_off + rows].copy_from_slice(
7503 &t.data[src_off..src_off + rows],
7504 );
7505 }
7506 t.data = new_data;
7507 t.shape = vec![new_rows, new_cols];
7508 }
7509 let start = j0 * rows;
7510 match rhs {
7512 Value::Num(v) => {
7513 for r in 0..rows {
7514 t.data[start + r] = v;
7515 }
7516 }
7517 Value::Tensor(rt) => {
7518 let len = rt.data.len();
7519 if len == rows {
7520 for r in 0..rows {
7521 t.data[start + r] = rt.data[r];
7522 }
7523 } else if len == 1 {
7524 for r in 0..rows {
7525 t.data[start + r] = rt.data[0];
7526 }
7527 } else {
7528 vm_bail!("shape mismatch for slice assign"
7529 .to_string());
7530 }
7531 }
7532 _ => {
7533 vm_bail!("rhs must be numeric or tensor".to_string())
7534 }
7535 }
7536 let view = runmat_accelerate_api::HostTensorView {
7537 data: &t.data,
7538 shape: &t.shape,
7539 };
7540 let new_h = provider.upload(&view).map_err(|e| {
7541 format!("reupload after slice assign: {e}")
7542 })?;
7543 stack.push(Value::GpuTensor(new_h));
7544 bench_end("StoreSlice2D.fast_col", __b);
7545 pc += 1;
7546 continue;
7547 }
7548 (Sel::Scalar(i), Sel::Colon) => {
7550 let i0 = *i - 1;
7551 if i0 >= rows {
7553 let new_rows = i0 + 1;
7554 let new_cols = cols;
7555 let mut new_data = vec![0.0f64; new_rows * new_cols];
7556 for c in 0..cols {
7557 for r in 0..rows {
7558 new_data[r + c * new_rows] =
7559 t.data[r + c * rows];
7560 }
7561 }
7562 t.data = new_data;
7563 t.shape = vec![new_rows, new_cols];
7564 }
7565 match rhs {
7567 Value::Num(v) => {
7568 for c in 0..cols {
7569 t.data[i0 + c * rows] = v;
7570 }
7571 }
7572 Value::Tensor(rt) => {
7573 let len = rt.data.len();
7574 if len == cols {
7575 for c in 0..cols {
7576 t.data[i0 + c * rows] = rt.data[c];
7577 }
7578 } else if len == 1 {
7579 for c in 0..cols {
7580 t.data[i0 + c * rows] = rt.data[0];
7581 }
7582 } else {
7583 vm_bail!("shape mismatch for slice assign"
7584 .to_string());
7585 }
7586 }
7587 _ => {
7588 vm_bail!("rhs must be numeric or tensor".to_string())
7589 }
7590 }
7591 let view = runmat_accelerate_api::HostTensorView {
7592 data: &t.data,
7593 shape: &t.shape,
7594 };
7595 let new_h = provider.upload(&view).map_err(|e| {
7596 format!("reupload after slice assign: {e}")
7597 })?;
7598 stack.push(Value::GpuTensor(new_h));
7599 bench_end("StoreSlice2D.fast_row", __b);
7600 pc += 1;
7601 continue;
7602 }
7603 _ => {}
7604 }
7605 }
7606 let mut per_dim_indices: Vec<Vec<usize>> = Vec::with_capacity(dims);
7609 let full_shape: Vec<usize> = if rank < dims {
7610 let mut s = t.shape.clone();
7611 s.resize(dims, 1);
7612 s
7613 } else {
7614 t.shape.clone()
7615 };
7616 for d in 0..dims {
7617 let dim_len = full_shape[d];
7618 let idxs = match &selectors[d] {
7619 Sel::Colon => (1..=dim_len).collect(),
7620 Sel::Scalar(i) => vec![*i],
7621 Sel::Indices(v) => v.clone(),
7622 };
7623 if idxs.iter().any(|&i| i == 0 || i > dim_len) {
7624 vm_bail!(mex("IndexOutOfBounds", "Index out of bounds"));
7625 }
7626 per_dim_indices.push(idxs);
7627 }
7628 let mut strides: Vec<usize> = vec![0; dims];
7630 let mut acc = 1usize;
7631 for d in 0..dims {
7632 strides[d] = acc;
7633 acc *= full_shape[d];
7634 }
7635 let total_out: usize =
7636 per_dim_indices.iter().map(|v| v.len()).product();
7637 enum RhsView {
7639 Scalar(f64),
7640 Tensor {
7641 data: Vec<f64>,
7642 shape: Vec<usize>,
7643 strides: Vec<usize>,
7644 },
7645 }
7646 let rhs_view = match rhs {
7647 Value::Num(n) => RhsView::Scalar(n),
7648 Value::Tensor(rt) => {
7649 let mut shape = rt.shape.clone();
7651 if shape.len() < dims {
7652 shape.resize(dims, 1);
7653 }
7654 if shape.len() > dims {
7655 if shape.iter().skip(dims).any(|&s| s != 1) {
7656 vm_bail!("shape mismatch for slice assign".to_string());
7657 }
7658 shape.truncate(dims);
7659 }
7660 let mut ok = true;
7661 for d in 0..dims {
7662 let out_len = per_dim_indices[d].len();
7663 let rhs_len = shape[d];
7664 if !(rhs_len == 1 || rhs_len == out_len) {
7665 ok = false;
7666 break;
7667 }
7668 }
7669 if !ok {
7670 vm_bail!("shape mismatch for slice assign".to_string());
7671 }
7672 let mut rstrides = vec![0usize; dims];
7673 let mut racc = 1usize;
7674 for d in 0..dims {
7675 rstrides[d] = racc;
7676 racc *= shape[d];
7677 }
7678 RhsView::Tensor {
7679 data: rt.data,
7680 shape,
7681 strides: rstrides,
7682 }
7683 }
7684 _ => vm_bail!("rhs must be numeric or tensor".to_string()),
7685 };
7686 let mut _k = 0usize;
7688 let mut idx = vec![0usize; dims];
7689 if total_out == 0 {
7690 let view = runmat_accelerate_api::HostTensorView {
7691 data: &t.data,
7692 shape: &t.shape,
7693 };
7694 let new_h = provider
7695 .upload(&view)
7696 .map_err(|e| format!("reupload after slice assign: {e}"))?;
7697 stack.push(Value::GpuTensor(new_h));
7698 } else {
7699 loop {
7700 let mut lin = 0usize;
7701 for d in 0..dims {
7702 let i0 = per_dim_indices[d][idx[d]] - 1;
7703 lin += i0 * strides[d];
7704 }
7705 match &rhs_view {
7706 RhsView::Scalar(val) => t.data[lin] = *val,
7707 RhsView::Tensor {
7708 data,
7709 shape,
7710 strides,
7711 } => {
7712 let mut rlin = 0usize;
7713 for d in 0..dims {
7714 let rhs_len = shape[d];
7715 let pos = if rhs_len == 1 { 0 } else { idx[d] };
7716 rlin += pos * strides[d];
7717 }
7718 t.data[lin] = data[rlin];
7719 }
7720 }
7721 _k += 1;
7722 let mut d = 0usize;
7724 while d < dims {
7725 idx[d] += 1;
7726 if idx[d] < per_dim_indices[d].len() {
7727 break;
7728 }
7729 idx[d] = 0;
7730 d += 1;
7731 }
7732 if d == dims {
7733 break;
7734 }
7735 }
7736 let view = runmat_accelerate_api::HostTensorView {
7737 data: &t.data,
7738 shape: &t.shape,
7739 };
7740 let new_h = provider
7741 .upload(&view)
7742 .map_err(|e| format!("reupload after slice assign: {e}"))?;
7743 stack.push(Value::GpuTensor(new_h));
7744 }
7745 }
7746 }
7747 Value::StringArray(mut sa) => {
7748 let selectors =
7749 build_slice_selectors(dims, colon_mask, end_mask, &numeric, &sa.shape)
7750 .map_err(|e| format!("slice assign: {e}"))?;
7751 let plan = build_slice_plan(&selectors, dims, &sa.shape).map_err(|e| {
7752 if e.contains("IndexOutOfBounds") {
7753 e.clone()
7754 } else {
7755 format!("slice assign: {e}")
7756 }
7757 })?;
7758 if plan.indices.is_empty() {
7759 stack.push(Value::StringArray(sa));
7760 bench_end("StoreSlice", __b);
7761 pc += 1;
7762 continue;
7763 }
7764 let rhs_view = build_string_rhs_view(&rhs, &plan.selection_lengths)
7765 .map_err(|e| format!("slice assign: {e}"))?;
7766 scatter_string_with_plan(&mut sa, &plan, &rhs_view)
7767 .map_err(|e| format!("slice assign: {e}"))?;
7768 stack.push(Value::StringArray(sa));
7769 bench_end("StoreSlice", __b);
7770 pc += 1;
7771 continue;
7772 }
7774 _ => vm_bail!(
7775 "Slicing assignment only supported on tensors or string arrays".to_string()
7776 ),
7777 }
7778 bench_end("StoreSlice", __b);
7779 }
7780 Instr::StoreSliceEx(dims, numeric_count, colon_mask, end_mask, end_offsets) => {
7781 let rhs = stack
7782 .pop()
7783 .ok_or(mex("StackUnderflow", "stack underflow"))?;
7784 let mut numeric: Vec<Value> = Vec::with_capacity(numeric_count);
7785 for _ in 0..numeric_count {
7786 numeric.push(
7787 stack
7788 .pop()
7789 .ok_or(mex("StackUnderflow", "stack underflow"))?,
7790 );
7791 }
7792 numeric.reverse();
7793 let mut base = stack
7794 .pop()
7795 .ok_or(mex("StackUnderflow", "stack underflow"))?;
7796 if let Value::GpuTensor(handle) = &base {
7797 let adjusted = apply_end_offsets_to_numeric(
7798 &numeric,
7799 dims,
7800 colon_mask,
7801 end_mask,
7802 &end_offsets,
7803 &handle.shape,
7804 );
7805 if let Some(provider) = runmat_accelerate_api::provider() {
7806 if let Ok(selectors) = build_slice_selectors(
7807 dims,
7808 colon_mask,
7809 end_mask,
7810 &adjusted,
7811 &handle.shape,
7812 ) {
7813 if let Ok(plan) = build_slice_plan(&selectors, dims, &handle.shape) {
7814 let values = if plan.dims == 1 {
7815 let count =
7816 plan.selection_lengths.first().copied().unwrap_or(0);
7817 materialize_rhs_linear(&rhs, count)
7818 } else {
7819 materialize_rhs_nd(&rhs, &plan.selection_lengths)
7820 }
7821 .map_err(|e| format!("slice assign: {e}"))?;
7822 if values.len() == plan.indices.len() {
7823 let value_shape = vec![values.len().max(1), 1];
7824 let upload_result = if values.is_empty() {
7825 provider.zeros(&[0, 1])
7826 } else {
7827 provider.upload(&runmat_accelerate_api::HostTensorView {
7828 data: &values,
7829 shape: &value_shape,
7830 })
7831 };
7832 if let Ok(values_handle) = upload_result {
7833 if provider
7834 .scatter_linear(handle, &plan.indices, &values_handle)
7835 .is_ok()
7836 {
7837 stack.push(Value::GpuTensor(handle.clone()));
7838 pc += 1;
7839 continue;
7840 }
7841 }
7842 }
7843 }
7844 }
7845 let host = provider
7846 .download(handle)
7847 .map_err(|e| format!("slice assign: {e}"))?;
7848 let tensor = runmat_builtins::Tensor::new(host.data, host.shape)
7849 .map_err(|e| format!("slice assign: {e}"))?;
7850 base = Value::Tensor(tensor);
7851 } else {
7852 return Err("No acceleration provider registered".to_string());
7853 }
7854 }
7855 match base {
7856 Value::Tensor(t) => {
7857 let mut adjusted = numeric.clone();
7859 for (pos, off) in end_offsets {
7860 if let Some(v) = adjusted.get_mut(pos) {
7861 let mut seen_numeric = 0usize;
7863 let mut dim_for_pos = 0usize;
7864 for d in 0..dims {
7865 let is_colon = (colon_mask & (1u32 << d)) != 0;
7866 let is_end = (end_mask & (1u32 << d)) != 0;
7867 if is_colon || is_end {
7868 continue;
7869 }
7870 if seen_numeric == pos {
7871 dim_for_pos = d;
7872 break;
7873 }
7874 seen_numeric += 1;
7875 }
7876 let dim_len = *t.shape.get(dim_for_pos).unwrap_or(&1);
7877 let idx_val = (dim_len as isize) - (off as isize);
7878 *v = Value::Num(idx_val as f64);
7879 }
7880 }
7881 stack.push(Value::Tensor(t));
7883 for v in adjusted {
7884 stack.push(v);
7885 }
7886 stack.push(rhs);
7887 let rhs = stack
7889 .pop()
7890 .ok_or(mex("StackUnderflow", "stack underflow"))?;
7891 let mut numeric: Vec<Value> = Vec::with_capacity(numeric_count);
7892 for _ in 0..numeric_count {
7893 numeric.push(
7894 stack
7895 .pop()
7896 .ok_or(mex("StackUnderflow", "stack underflow"))?,
7897 );
7898 }
7899 numeric.reverse();
7900 let base = stack
7901 .pop()
7902 .ok_or(mex("StackUnderflow", "stack underflow"))?;
7903 match base {
7904 Value::Tensor(mut t) => {
7905 #[derive(Clone)]
7906 enum Sel {
7907 Colon,
7908 Scalar(usize),
7909 Indices(Vec<usize>),
7910 }
7911 let mut selectors: Vec<Sel> = Vec::with_capacity(dims);
7912 let mut num_iter = 0usize;
7913 for d in 0..dims {
7914 let is_colon = (colon_mask & (1u32 << d)) != 0;
7915 let is_end = (end_mask & (1u32 << d)) != 0;
7916 if is_colon {
7917 selectors.push(Sel::Colon);
7918 } else if is_end {
7919 selectors.push(Sel::Scalar(*t.shape.get(d).unwrap_or(&1)));
7920 } else {
7921 let v = numeric.get(num_iter).ok_or(mex(
7922 "MissingNumericIndex",
7923 "missing numeric index",
7924 ))?;
7925 num_iter += 1;
7926 match v {
7927 Value::Num(n) => {
7928 let idx = *n as isize;
7929 if idx < 1 {
7930 vm_bail!(mex(
7931 "IndexOutOfBounds",
7932 "Index out of bounds"
7933 ));
7934 }
7935 selectors.push(Sel::Scalar(idx as usize));
7936 }
7937 Value::Tensor(idx_t) => {
7938 let dim_len = *t.shape.get(d).unwrap_or(&1);
7939 let len = idx_t.shape.iter().product::<usize>();
7940 if len == dim_len {
7941 let mut vi = Vec::new();
7942 for (i, &val) in idx_t.data.iter().enumerate() {
7943 if val != 0.0 {
7944 vi.push(i + 1);
7945 }
7946 }
7947 selectors.push(Sel::Indices(vi));
7948 } else {
7949 let mut vi = Vec::with_capacity(len);
7950 for &val in &idx_t.data {
7951 let idx = val as isize;
7952 if idx < 1 {
7953 vm_bail!(mex(
7954 "IndexOutOfBounds",
7955 "Index out of bounds"
7956 ));
7957 }
7958 vi.push(idx as usize);
7959 }
7960 selectors.push(Sel::Indices(vi));
7961 }
7962 }
7963 _ => vm_bail!(mex(
7964 "UnsupportedIndexType",
7965 "Unsupported index type"
7966 )),
7967 }
7968 }
7969 }
7970 let mut per_dim_indices: Vec<Vec<usize>> = Vec::with_capacity(dims);
7972 for (d, sel) in selectors.iter().enumerate().take(dims) {
7973 let dim_len = *t.shape.get(d).unwrap_or(&1);
7974 let idxs = match sel {
7975 Sel::Colon => (1..=dim_len).collect::<Vec<usize>>(),
7976 Sel::Scalar(i) => vec![*i],
7977 Sel::Indices(v) => v.clone(),
7978 };
7979 per_dim_indices.push(idxs);
7980 }
7981 let mut strides: Vec<usize> = vec![0; dims];
7982 let mut acc = 1usize;
7983 for (d, stride) in strides.iter_mut().enumerate().take(dims) {
7984 *stride = acc;
7985 acc *= *t.shape.get(d).unwrap_or(&1);
7986 }
7987 enum RhsView {
7989 Scalar(f64),
7990 Tensor {
7991 data: Vec<f64>,
7992 shape: Vec<usize>,
7993 strides: Vec<usize>,
7994 },
7995 }
7996 let rhs_view =
7997 match rhs {
7998 Value::Num(n) => RhsView::Scalar(n),
7999 Value::Tensor(rt) => {
8000 let mut rshape = rt.shape.clone();
8001 if rshape.len() < dims {
8002 rshape.resize(dims, 1);
8003 }
8004 if rshape.len() > dims {
8005 if rshape.iter().skip(dims).any(|&s| s != 1) {
8006 vm_bail!("shape mismatch for slice assign"
8007 .to_string());
8008 }
8009 rshape.truncate(dims);
8010 }
8011 for d in 0..dims {
8012 let out_len = per_dim_indices[d].len();
8013 let rhs_len = rshape[d];
8014 if !(rhs_len == 1 || rhs_len == out_len) {
8015 vm_bail!("shape mismatch for slice assign"
8016 .to_string());
8017 }
8018 }
8019 let mut rstrides = vec![0usize; dims];
8020 let mut racc = 1usize;
8021 for d in 0..dims {
8022 rstrides[d] = racc;
8023 racc *= rshape[d];
8024 }
8025 RhsView::Tensor {
8026 data: rt.data,
8027 shape: rshape,
8028 strides: rstrides,
8029 }
8030 }
8031 _ => vm_bail!("rhs must be numeric or tensor".to_string()),
8032 };
8033 use std::collections::HashMap;
8035 let mut pos_maps: Vec<HashMap<usize, usize>> =
8036 Vec::with_capacity(dims);
8037 for (_d, dim_idxs) in per_dim_indices.iter().enumerate().take(dims)
8038 {
8039 let mut m = HashMap::new();
8040 for (p, &idx) in dim_idxs.iter().enumerate() {
8041 m.insert(idx, p);
8042 }
8043 pos_maps.push(m);
8044 }
8045 fn cartesian2<F: FnMut(&[usize])>(lists: &[Vec<usize>], mut f: F) {
8046 let dims = lists.len();
8047 let mut idx = vec![0usize; dims];
8048 loop {
8049 let cur: Vec<usize> =
8050 (0..dims).map(|d| lists[d][idx[d]]).collect();
8051 f(&cur);
8052 let mut d = 0usize;
8053 while d < dims {
8054 idx[d] += 1;
8055 if idx[d] < lists[d].len() {
8056 break;
8057 }
8058 idx[d] = 0;
8059 d += 1;
8060 }
8061 if d == dims {
8062 break;
8063 }
8064 }
8065 }
8066 cartesian2(&per_dim_indices, |multi| {
8067 let mut lin = 0usize;
8068 for d in 0..dims {
8069 let i0 = multi[d] - 1;
8070 lin += i0 * strides[d];
8071 }
8072 match &rhs_view {
8073 RhsView::Scalar(v) => {
8074 t.data[lin] = *v;
8075 }
8076 RhsView::Tensor {
8077 data,
8078 shape,
8079 strides: rstrides,
8080 } => {
8081 let mut rlin = 0usize;
8082 for d in 0..dims {
8083 let rhs_len = shape[d];
8084 let pos_in_dim = if rhs_len == 1 {
8085 0
8086 } else {
8087 *pos_maps[d].get(&multi[d]).unwrap_or(&0)
8088 };
8089 rlin += pos_in_dim * rstrides[d];
8090 }
8091 t.data[lin] = data[rlin];
8092 }
8093 }
8094 });
8095 stack.push(Value::Tensor(t));
8096 }
8097 Value::StringArray(mut sa) => {
8098 let selectors = build_slice_selectors(
8099 dims, colon_mask, end_mask, &numeric, &sa.shape,
8100 )
8101 .map_err(|e| format!("slice assign: {e}"))?;
8102 let plan =
8103 build_slice_plan(&selectors, dims, &sa.shape).map_err(|e| {
8104 if e.contains("IndexOutOfBounds") {
8105 e.clone()
8106 } else {
8107 format!("slice assign: {e}")
8108 }
8109 })?;
8110 if plan.indices.is_empty() {
8111 stack.push(Value::StringArray(sa));
8112 pc += 1;
8113 continue;
8114 }
8115 let rhs_view = build_string_rhs_view(&rhs, &plan.selection_lengths)
8116 .map_err(|e| format!("slice assign: {e}"))?;
8117 scatter_string_with_plan(&mut sa, &plan, &rhs_view)
8118 .map_err(|e| format!("slice assign: {e}"))?;
8119 stack.push(Value::StringArray(sa));
8120 pc += 1;
8121 continue;
8122 }
8123 other => vm_bail!(format!("StoreSliceEx unsupported base: {other:?}")),
8124 }
8125 }
8126 other => vm_bail!(format!(
8127 "StoreSliceEx only supports tensors currently, got {other:?}"
8128 )),
8129 }
8130 }
8131 Instr::StoreRangeEnd {
8132 dims,
8133 numeric_count,
8134 colon_mask,
8135 end_mask,
8136 range_dims,
8137 range_has_step,
8138 end_offsets,
8139 } => {
8140 let mut rhs = stack
8142 .pop()
8143 .ok_or(mex("StackUnderflow", "stack underflow"))?;
8144 let mut range_params: Vec<(f64, f64)> = Vec::with_capacity(range_dims.len());
8146 for i in (0..range_dims.len()).rev() {
8147 let has = range_has_step[i];
8148 let step = if has {
8149 let v: f64 = (&stack
8150 .pop()
8151 .ok_or(mex("StackUnderflow", "stack underflow"))?)
8152 .try_into()?;
8153 v
8154 } else {
8155 1.0
8156 };
8157 let st: f64 = (&stack
8158 .pop()
8159 .ok_or(mex("StackUnderflow", "stack underflow"))?)
8160 .try_into()?;
8161 range_params.push((st, step));
8162 }
8163 range_params.reverse();
8164 let mut numeric: Vec<Value> = Vec::with_capacity(numeric_count);
8165 for _ in 0..numeric_count {
8166 numeric.push(
8167 stack
8168 .pop()
8169 .ok_or(mex("StackUnderflow", "stack underflow"))?,
8170 );
8171 }
8172 numeric.reverse();
8173 let mut base = stack
8174 .pop()
8175 .ok_or(mex("StackUnderflow", "stack underflow"))?;
8176 #[cfg(feature = "native-accel")]
8177 clear_residency(&base);
8178 let base_assignable = matches!(
8180 base,
8181 Value::Object(_) | Value::Tensor(_) | Value::GpuTensor(_)
8182 );
8183 if !base_assignable
8184 && matches!(
8185 rhs,
8186 Value::Object(_) | Value::Tensor(_) | Value::GpuTensor(_)
8187 )
8188 {
8189 std::mem::swap(&mut base, &mut rhs);
8190 }
8191 match base {
8192 Value::Tensor(mut t) => {
8193 #[derive(Clone)]
8194 enum Sel {
8195 Colon,
8196 Scalar(usize),
8197 Indices(Vec<usize>),
8198 Range { start: i64, step: i64, end_off: i64 },
8199 }
8200 let mut selectors: Vec<Sel> = Vec::with_capacity(dims);
8201 let mut num_iter = 0usize;
8202 let mut rp_iter = 0usize;
8203 for d in 0..dims {
8204 if let Some(pos) = range_dims.iter().position(|&rd| rd == d) {
8205 let (st, sp) = range_params[rp_iter];
8206 rp_iter += 1;
8207 let step_i = if sp >= 0.0 {
8208 sp as i64
8209 } else {
8210 -(sp.abs() as i64)
8211 };
8212 selectors.push(Sel::Range {
8213 start: st as i64,
8214 step: step_i,
8215 end_off: end_offsets[pos],
8216 });
8217 continue;
8218 }
8219 let is_colon = (colon_mask & (1u32 << d)) != 0;
8220 let is_end = (end_mask & (1u32 << d)) != 0;
8221 if is_colon {
8222 selectors.push(Sel::Colon);
8223 continue;
8224 }
8225 if is_end {
8226 selectors.push(Sel::Scalar(*t.shape.get(d).unwrap_or(&1)));
8227 continue;
8228 }
8229 let v = numeric
8230 .get(num_iter)
8231 .ok_or(mex("MissingNumericIndex", "missing numeric index"))?;
8232 num_iter += 1;
8233 match v {
8234 Value::Num(n) => {
8235 let idx = *n as isize;
8236 if idx < 1 {
8237 vm_bail!(mex("IndexOutOfBounds", "Index out of bounds"));
8238 }
8239 selectors.push(Sel::Scalar(idx as usize));
8240 }
8241 Value::Tensor(idx_t) => {
8242 let dim_len = *t.shape.get(d).unwrap_or(&1);
8243 let len = idx_t.shape.iter().product::<usize>();
8244 if len == dim_len {
8245 let mut vi = Vec::new();
8246 for (i, &val) in idx_t.data.iter().enumerate() {
8247 if val != 0.0 {
8248 vi.push(i + 1);
8249 }
8250 }
8251 selectors.push(Sel::Indices(vi));
8252 } else {
8253 let mut vi = Vec::with_capacity(len);
8254 for &val in &idx_t.data {
8255 let idx = val as isize;
8256 if idx < 1 {
8257 vm_bail!(mex(
8258 "IndexOutOfBounds",
8259 "Index out of bounds"
8260 ));
8261 }
8262 vi.push(idx as usize);
8263 }
8264 selectors.push(Sel::Indices(vi));
8265 }
8266 }
8267 _ => {
8268 vm_bail!(mex("UnsupportedIndexType", "Unsupported index type"))
8269 }
8270 }
8271 }
8272 let mut per_dim_indices: Vec<Vec<usize>> = Vec::with_capacity(dims);
8275 for (d, sel) in selectors.iter().enumerate().take(dims) {
8276 let dim_len = *t.shape.get(d).unwrap_or(&1);
8277 let idxs = match sel {
8278 Sel::Colon => (1..=dim_len).collect::<Vec<usize>>(),
8279 Sel::Scalar(i) => vec![*i],
8280 Sel::Indices(v) => v.clone(),
8281 Sel::Range {
8282 start,
8283 step,
8284 end_off,
8285 } => {
8286 let mut v = Vec::new();
8287 let mut cur = *start;
8288 let end_i = (dim_len as i64) - *end_off;
8289 let stp = *step;
8290 if stp == 0 {
8291 vm_bail!(mex("IndexStepZero", "Index step cannot be zero"));
8292 }
8293 if stp > 0 {
8294 while cur <= end_i {
8295 if cur < 1 || cur > dim_len as i64 {
8296 break;
8297 }
8298 v.push(cur as usize);
8299 cur += stp;
8300 }
8301 } else {
8302 while cur >= end_i {
8303 if cur < 1 || cur > dim_len as i64 {
8304 break;
8305 }
8306 v.push(cur as usize);
8307 cur += stp;
8308 }
8309 }
8310 v
8311 }
8312 };
8313 if idxs.iter().any(|&i| i == 0 || i > dim_len) {
8314 vm_bail!(mex("IndexOutOfBounds", "Index out of bounds"));
8315 }
8316 per_dim_indices.push(idxs);
8317 }
8318 let mut strides: Vec<usize> = vec![0; dims];
8319 let mut acc = 1usize;
8320 for (d, stride) in strides.iter_mut().enumerate().take(dims) {
8321 *stride = acc;
8322 acc *= *t.shape.get(d).unwrap_or(&1);
8323 }
8324 let selection_empty = per_dim_indices.iter().any(|v| v.is_empty());
8325 if selection_empty {
8326 stack.push(Value::Tensor(t));
8327 } else {
8328 enum RhsView {
8330 Scalar(f64),
8331 Tensor {
8332 data: Vec<f64>,
8333 shape: Vec<usize>,
8334 strides: Vec<usize>,
8335 },
8336 }
8337 let rhs_view = match rhs {
8338 Value::Num(n) => RhsView::Scalar(n),
8339 Value::Tensor(rt) => {
8340 if rt.data.is_empty() {
8341 vm_bail!("shape mismatch for slice assign".to_string());
8342 }
8343 let mut rshape = rt.shape.clone();
8345 if rshape.len() < dims {
8346 rshape.resize(dims, 1);
8347 }
8348 if rshape.len() > dims {
8349 if rshape.iter().skip(dims).any(|&s| s != 1) {
8350 vm_bail!("shape mismatch for slice assign".to_string());
8351 }
8352 rshape.truncate(dims);
8353 }
8354 for d in 0..dims {
8356 let out_len = per_dim_indices[d].len();
8357 let rhs_len = rshape[d];
8358 if !(rhs_len == 1 || rhs_len == out_len) {
8359 vm_bail!("shape mismatch for slice assign".to_string());
8360 }
8361 }
8362 let mut rstrides = vec![0usize; dims];
8364 let mut racc = 1usize;
8365 for d in 0..dims {
8366 rstrides[d] = racc;
8367 racc *= rshape[d];
8368 }
8369 if racc != rt.data.len() {
8370 vm_bail!("shape mismatch for slice assign".to_string());
8371 }
8372 RhsView::Tensor {
8373 data: rt.data,
8374 shape: rshape,
8375 strides: rstrides,
8376 }
8377 }
8378 _ => vm_bail!("rhs must be numeric or tensor".to_string()),
8379 };
8380 use std::collections::HashMap;
8382 let mut pos_maps: Vec<HashMap<usize, usize>> = Vec::with_capacity(dims);
8383 for dim_idxs in per_dim_indices.iter().take(dims) {
8384 let mut m: HashMap<usize, usize> = HashMap::new();
8385 for (p, &idx) in dim_idxs.iter().enumerate() {
8386 m.insert(idx, p);
8387 }
8388 pos_maps.push(m);
8389 }
8390 fn cartesian2<F: FnMut(&[usize])>(lists: &[Vec<usize>], mut f: F) {
8391 let dims = lists.len();
8392 let mut idx = vec![0usize; dims];
8393 loop {
8394 let cur: Vec<usize> =
8395 (0..dims).map(|d| lists[d][idx[d]]).collect();
8396 f(&cur);
8397 let mut d = 0usize;
8398 while d < dims {
8399 idx[d] += 1;
8400 if idx[d] < lists[d].len() {
8401 break;
8402 }
8403 idx[d] = 0;
8404 d += 1;
8405 }
8406 if d == dims {
8407 break;
8408 }
8409 }
8410 }
8411 let mut err_opt: Option<String> = None;
8413 let mut _debug_count = 0usize;
8414 cartesian2(&per_dim_indices, |multi| {
8415 if err_opt.is_some() {
8416 return;
8417 }
8418 let mut lin = 0usize;
8419 for d in 0..dims {
8420 let i0 = multi[d] - 1;
8421 lin += i0 * strides[d];
8422 }
8423 match &rhs_view {
8424 RhsView::Scalar(val) => t.data[lin] = *val,
8425 RhsView::Tensor {
8426 data,
8427 shape,
8428 strides: rstrides,
8429 } => {
8430 let mut rlin = 0usize;
8432 for d in 0..dims {
8433 let rhs_len = shape[d];
8434 let pos_in_dim = if rhs_len == 1 {
8435 0
8436 } else {
8437 *pos_maps[d].get(&multi[d]).unwrap_or(&0)
8438 };
8439 rlin += pos_in_dim * rstrides[d];
8440 }
8441 if rlin >= data.len() {
8442 err_opt =
8443 Some("shape mismatch for slice assign".to_string());
8444 return;
8445 }
8446 t.data[lin] = data[rlin];
8447 }
8448 }
8449 });
8450 let _ = (t.data.first(), t.data.len());
8451 if let Some(e) = err_opt {
8452 vm_bail!(e);
8453 }
8454 stack.push(Value::Tensor(t));
8455 }
8456 }
8457 Value::GpuTensor(h) => {
8458 let provider = runmat_accelerate_api::provider()
8459 .ok_or_else(|| "No acceleration provider registered".to_string())?;
8460 let host = provider
8461 .download(&h)
8462 .map_err(|e| format!("gather for range-end assign: {e}"))?;
8463 let mut t = runmat_builtins::Tensor::new(host.data, host.shape)
8464 .map_err(|e| format!("range-end assign: {e}"))?;
8465 #[derive(Clone)]
8466 enum Sel {
8467 Colon,
8468 Scalar(usize),
8469 Indices(Vec<usize>),
8470 Range { start: i64, step: i64, end_off: i64 },
8471 }
8472 let mut selectors: Vec<Sel> = Vec::with_capacity(dims);
8473 let mut num_iter = 0usize;
8474 let mut rp_iter = 0usize;
8475 for d in 0..dims {
8476 if let Some(pos) = range_dims.iter().position(|&rd| rd == d) {
8477 let (st, sp) = range_params[rp_iter];
8478 rp_iter += 1;
8479 let step_i = if sp >= 0.0 {
8480 sp as i64
8481 } else {
8482 -(sp.abs() as i64)
8483 };
8484 selectors.push(Sel::Range {
8485 start: st as i64,
8486 step: step_i,
8487 end_off: end_offsets[pos],
8488 });
8489 continue;
8490 }
8491 let is_colon = (colon_mask & (1u32 << d)) != 0;
8492 let is_end = (end_mask & (1u32 << d)) != 0;
8493 if is_colon {
8494 selectors.push(Sel::Colon);
8495 continue;
8496 }
8497 if is_end {
8498 selectors.push(Sel::Scalar(*t.shape.get(d).unwrap_or(&1)));
8499 continue;
8500 }
8501 let v = numeric
8502 .get(num_iter)
8503 .ok_or(mex("MissingNumericIndex", "missing numeric index"))?;
8504 num_iter += 1;
8505 match v {
8506 Value::Num(n) => {
8507 let idx = *n as isize;
8508 if idx < 1 {
8509 vm_bail!(mex("IndexOutOfBounds", "Index out of bounds"));
8510 }
8511 selectors.push(Sel::Scalar(idx as usize));
8512 }
8513 Value::Tensor(idx_t) => {
8514 let dim_len = *t.shape.get(d).unwrap_or(&1);
8515 let len = idx_t.shape.iter().product::<usize>();
8516 if len == dim_len {
8517 let mut vi = Vec::new();
8518 for (i, &val) in idx_t.data.iter().enumerate() {
8519 if val != 0.0 {
8520 vi.push(i + 1);
8521 }
8522 }
8523 selectors.push(Sel::Indices(vi));
8524 } else {
8525 let mut vi = Vec::with_capacity(len);
8526 for &val in &idx_t.data {
8527 let idx = val as isize;
8528 if idx < 1 {
8529 vm_bail!(mex(
8530 "IndexOutOfBounds",
8531 "Index out of bounds"
8532 ));
8533 }
8534 vi.push(idx as usize);
8535 }
8536 selectors.push(Sel::Indices(vi));
8537 }
8538 }
8539 _ => {
8540 vm_bail!(mex("UnsupportedIndexType", "Unsupported index type"))
8541 }
8542 }
8543 }
8544 let mut per_dim_indices: Vec<Vec<usize>> = Vec::with_capacity(dims);
8547 for (d, sel) in selectors.iter().enumerate().take(dims) {
8548 let dim_len = *t.shape.get(d).unwrap_or(&1);
8549 let idxs = match sel {
8550 Sel::Colon => (1..=dim_len).collect::<Vec<usize>>(),
8551 Sel::Scalar(i) => vec![*i],
8552 Sel::Indices(v) => v.clone(),
8553 Sel::Range {
8554 start,
8555 step,
8556 end_off,
8557 } => {
8558 let mut v = Vec::new();
8559 let mut cur = *start;
8560 let end_i = (dim_len as i64) - *end_off;
8561 let stp = *step;
8562 if stp == 0 {
8563 vm_bail!(mex("IndexStepZero", "Index step cannot be zero"));
8564 }
8565 if stp > 0 {
8566 while cur <= end_i {
8567 if cur < 1 || cur > dim_len as i64 {
8568 break;
8569 }
8570 v.push(cur as usize);
8571 cur += stp;
8572 }
8573 } else {
8574 while cur >= end_i {
8575 if cur < 1 || cur > dim_len as i64 {
8576 break;
8577 }
8578 v.push(cur as usize);
8579 cur += stp;
8580 }
8581 }
8582 v
8583 }
8584 };
8585 if idxs.iter().any(|&i| i == 0 || i > dim_len) {
8586 vm_bail!(mex("IndexOutOfBounds", "Index out of bounds"));
8587 }
8588 per_dim_indices.push(idxs);
8589 }
8590 let mut strides: Vec<usize> = vec![0; dims];
8591 let mut acc = 1usize;
8592 for (d, stride) in strides.iter_mut().enumerate().take(dims) {
8593 *stride = acc;
8594 acc *= *t.shape.get(d).unwrap_or(&1);
8595 }
8596 let selection_empty = per_dim_indices.iter().any(|v| v.is_empty());
8597 if selection_empty {
8598 let view = runmat_accelerate_api::HostTensorView {
8599 data: &t.data,
8600 shape: &t.shape,
8601 };
8602 let new_h = provider
8603 .upload(&view)
8604 .map_err(|e| format!("reupload after range-end assign: {e}"))?;
8605 stack.push(Value::GpuTensor(new_h));
8606 } else {
8607 enum RhsView {
8609 Scalar(f64),
8610 Tensor {
8611 data: Vec<f64>,
8612 shape: Vec<usize>,
8613 strides: Vec<usize>,
8614 },
8615 }
8616 let rhs_view = match rhs {
8617 Value::Num(n) => RhsView::Scalar(n),
8618 Value::Tensor(rt) => {
8619 if rt.data.is_empty() {
8620 vm_bail!("shape mismatch for slice assign".to_string());
8621 }
8622 let mut rshape = rt.shape.clone();
8624 if rshape.len() < dims {
8625 rshape.resize(dims, 1);
8626 }
8627 if rshape.len() > dims {
8628 if rshape.iter().skip(dims).any(|&s| s != 1) {
8629 vm_bail!("shape mismatch for slice assign".to_string());
8630 }
8631 rshape.truncate(dims);
8632 }
8633 for d in 0..dims {
8635 let out_len = per_dim_indices[d].len();
8636 let rhs_len = rshape[d];
8637 if !(rhs_len == 1 || rhs_len == out_len) {
8638 vm_bail!("shape mismatch for slice assign".to_string());
8639 }
8640 }
8641 let mut rstrides = vec![0usize; dims];
8643 let mut racc = 1usize;
8644 for d in 0..dims {
8645 rstrides[d] = racc;
8646 racc *= rshape[d];
8647 }
8648 if racc != rt.data.len() {
8649 vm_bail!("shape mismatch for slice assign".to_string());
8650 }
8651 RhsView::Tensor {
8652 data: rt.data,
8653 shape: rshape,
8654 strides: rstrides,
8655 }
8656 }
8657 _ => vm_bail!("rhs must be numeric or tensor".to_string()),
8658 };
8659 use std::collections::HashMap;
8661 let mut pos_maps: Vec<HashMap<usize, usize>> = Vec::with_capacity(dims);
8662 for dim_idxs in per_dim_indices.iter().take(dims) {
8663 let mut m: HashMap<usize, usize> = HashMap::new();
8664 for (p, &idx) in dim_idxs.iter().enumerate() {
8665 m.insert(idx, p);
8666 }
8667 pos_maps.push(m);
8668 }
8669 let mut err_opt: Option<String> = None;
8671 fn cartesian2<F: FnMut(&[usize])>(lists: &[Vec<usize>], mut f: F) {
8673 let dims = lists.len();
8674 let mut idx = vec![0usize; dims];
8675 loop {
8676 let cur: Vec<usize> =
8677 (0..dims).map(|d| lists[d][idx[d]]).collect();
8678 f(&cur);
8679 let mut d = 0usize;
8680 while d < dims {
8681 idx[d] += 1;
8682 if idx[d] < lists[d].len() {
8683 break;
8684 }
8685 idx[d] = 0;
8686 d += 1;
8687 }
8688 if d == dims {
8689 break;
8690 }
8691 }
8692 }
8693 cartesian2(&per_dim_indices, |multi| {
8694 if err_opt.is_some() {
8695 return;
8696 }
8697 let mut lin = 0usize;
8698 for d in 0..dims {
8699 let i0 = multi[d] - 1;
8700 lin += i0 * strides[d];
8701 }
8702 match &rhs_view {
8703 RhsView::Scalar(val) => t.data[lin] = *val,
8704 RhsView::Tensor {
8705 data,
8706 shape,
8707 strides: rstrides,
8708 } => {
8709 let mut rlin = 0usize;
8710 for d in 0..dims {
8711 let rhs_len = shape[d];
8712 let pos_in_dim = if rhs_len == 1 {
8713 0
8714 } else {
8715 *pos_maps[d].get(&multi[d]).unwrap_or(&0)
8716 };
8717 rlin += pos_in_dim * rstrides[d];
8718 }
8719 if rlin >= data.len() {
8720 err_opt =
8721 Some("shape mismatch for slice assign".to_string());
8722 return;
8723 }
8724 t.data[lin] = data[rlin];
8725 }
8726 }
8727 });
8728 if let Some(e) = err_opt {
8729 vm_bail!(e);
8730 }
8731 let view = runmat_accelerate_api::HostTensorView {
8732 data: &t.data,
8733 shape: &t.shape,
8734 };
8735 let new_h = provider
8736 .upload(&view)
8737 .map_err(|e| format!("reupload after range-end assign: {e}"))?;
8738 stack.push(Value::GpuTensor(new_h));
8739 }
8740 }
8741 Value::Object(obj) => {
8742 let mut idx_values: Vec<Value> = Vec::with_capacity(dims);
8744 let mut num_iter = 0usize;
8745 let mut rp_iter = 0usize;
8746 for d in 0..dims {
8747 let is_colon = (colon_mask & (1u32 << d)) != 0;
8748 let is_end = (end_mask & (1u32 << d)) != 0;
8749 if is_colon {
8750 idx_values.push(Value::String(":".to_string()));
8751 continue;
8752 }
8753 if is_end {
8754 idx_values.push(Value::String("end".to_string()));
8755 continue;
8756 }
8757 if let Some(pos) = range_dims.iter().position(|&rd| rd == d) {
8758 let (st, sp) = range_params[rp_iter];
8759 rp_iter += 1;
8760 let off = end_offsets[pos];
8761 let cell = runmat_builtins::CellArray::new(
8762 vec![
8763 Value::Num(st),
8764 Value::Num(sp),
8765 Value::String("end".to_string()),
8766 Value::Num(off as f64),
8767 ],
8768 1,
8769 4,
8770 )
8771 .map_err(|e| format!("obj range: {e}"))?;
8772 idx_values.push(Value::Cell(cell));
8773 } else {
8774 let v = numeric
8775 .get(num_iter)
8776 .ok_or(mex("MissingNumericIndex", "missing numeric index"))?;
8777 num_iter += 1;
8778 match v {
8779 Value::Num(n) => idx_values.push(Value::Num(*n)),
8780 Value::Int(i) => idx_values.push(Value::Num(i.to_f64())),
8781 Value::Tensor(t) => idx_values.push(Value::Tensor(t.clone())),
8782 other => {
8783 return Err(format!(
8784 "Unsupported index type for object: {other:?}"
8785 ))
8786 }
8787 }
8788 }
8789 }
8790 let cell = runmat_builtins::CellArray::new(idx_values, 1, dims)
8791 .map_err(|e| format!("subsasgn build error: {e}"))?;
8792 match runmat_runtime::call_builtin(
8793 "call_method",
8794 &[
8795 Value::Object(obj),
8796 Value::String("subsasgn".to_string()),
8797 Value::String("()".to_string()),
8798 Value::Cell(cell),
8799 rhs,
8800 ],
8801 ) {
8802 Ok(v) => stack.push(v),
8803 Err(e) => vm_bail!(e),
8804 }
8805 }
8806 _ => vm_bail!("StoreRangeEnd only supports tensors currently".to_string()),
8807 }
8808 }
8809 Instr::StoreSlice1DRangeEnd { has_step, offset } => {
8810 let rhs = stack
8812 .pop()
8813 .ok_or(mex("StackUnderflow", "stack underflow"))?;
8814 let step_val: f64 = if has_step {
8815 let v: f64 = (&stack
8816 .pop()
8817 .ok_or(mex("StackUnderflow", "stack underflow"))?)
8818 .try_into()?;
8819 v
8820 } else {
8821 1.0
8822 };
8823 let start_val: f64 = (&stack
8824 .pop()
8825 .ok_or(mex("StackUnderflow", "stack underflow"))?)
8826 .try_into()?;
8827 let base = stack
8828 .pop()
8829 .ok_or(mex("StackUnderflow", "stack underflow"))?;
8830 #[cfg(feature = "native-accel")]
8831 clear_residency(&base);
8832 match base {
8833 Value::Tensor(mut t) => {
8834 let total = t.data.len();
8835 let end_idx = (total as i64) - offset;
8836 let mut cur = start_val as i64;
8837 let step_i = if step_val >= 0.0 {
8838 step_val as i64
8839 } else {
8840 -(step_val.abs() as i64)
8841 };
8842 if step_i == 0 {
8843 return Err(mex("IndexStepZero", "Index step cannot be zero"));
8844 }
8845 let rhs_vals: Vec<f64> = match rhs {
8847 Value::Num(n) => vec![n],
8848 Value::Tensor(rt) => rt.data.clone(),
8849 _ => vec![0.0],
8850 };
8851 let mut rpos = 0usize;
8852 if step_i > 0 {
8853 while cur as i64 <= end_idx {
8854 let idx0 = cur as usize;
8855 if idx0 == 0 || idx0 > total {
8856 break;
8857 }
8858 let v = rhs_vals
8859 .get(rpos)
8860 .cloned()
8861 .unwrap_or(*rhs_vals.last().unwrap_or(&0.0));
8862 t.data[idx0 - 1] = v;
8863 rpos += 1;
8864 cur += step_i;
8865 }
8866 } else {
8867 while (cur as i64) >= end_idx {
8868 let idx0 = cur as usize;
8869 if idx0 == 0 || idx0 > total {
8870 break;
8871 }
8872 let v = rhs_vals
8873 .get(rpos)
8874 .cloned()
8875 .unwrap_or(*rhs_vals.last().unwrap_or(&0.0));
8876 t.data[idx0 - 1] = v;
8877 rpos += 1;
8878 cur += step_i;
8879 }
8880 }
8881 stack.push(Value::Tensor(t));
8882 }
8883 _ => vm_bail!("Store range with end only supported on tensors".to_string()),
8884 }
8885 }
8886 Instr::CreateCell2D(rows, cols) => {
8887 let mut elems = Vec::with_capacity(rows * cols);
8888 for _ in 0..rows * cols {
8889 elems.push(
8890 stack
8891 .pop()
8892 .ok_or(mex("StackUnderflow", "stack underflow"))?,
8893 );
8894 }
8895 elems.reverse();
8896 let ca = runmat_builtins::CellArray::new(elems, rows, cols)
8897 .map_err(|e| format!("Cell creation error: {e}"))?;
8898 stack.push(Value::Cell(ca));
8899 }
8900 Instr::IndexCell(num_indices) => {
8901 let mut indices = Vec::with_capacity(num_indices);
8903 for _ in 0..num_indices {
8904 let v: f64 = (&stack
8905 .pop()
8906 .ok_or(mex("StackUnderflow", "stack underflow"))?)
8907 .try_into()?;
8908 indices.push(v as usize);
8909 }
8910 indices.reverse();
8911 let base = stack
8912 .pop()
8913 .ok_or(mex("StackUnderflow", "stack underflow"))?;
8914 match base {
8915 Value::Object(obj) => {
8916 let cell = runmat_runtime::call_builtin(
8918 "__make_cell",
8919 &indices
8920 .iter()
8921 .map(|n| Value::Num(*n as f64))
8922 .collect::<Vec<_>>(),
8923 )?;
8924 match runmat_runtime::call_builtin(
8925 "call_method",
8926 &[
8927 Value::Object(obj),
8928 Value::String("subsref".to_string()),
8929 Value::String("{}".to_string()),
8930 cell,
8931 ],
8932 ) {
8933 Ok(v) => stack.push(v),
8934 Err(e) => vm_bail!(e.to_string()),
8935 }
8936 }
8937 Value::Cell(ca) => match indices.len() {
8938 1 => {
8939 let i = indices[0];
8940 if i == 0 || i > ca.data.len() {
8941 return Err(mex(
8942 "CellIndexOutOfBounds",
8943 "Cell index out of bounds",
8944 ));
8945 }
8946 stack.push((*ca.data[i - 1]).clone());
8947 }
8948 2 => {
8949 let r = indices[0];
8950 let c = indices[1];
8951 if r == 0 || r > ca.rows || c == 0 || c > ca.cols {
8952 return Err(mex(
8953 "CellSubscriptOutOfBounds",
8954 "Cell subscript out of bounds",
8955 ));
8956 }
8957 stack.push((*ca.data[(r - 1) * ca.cols + (c - 1)]).clone());
8958 }
8959 _ => return Err("Unsupported number of cell indices".to_string()),
8960 },
8961 _ => return Err("Cell indexing on non-cell".to_string()),
8962 }
8963 }
8964 Instr::IndexCellExpand(num_indices, out_count) => {
8965 let mut indices = Vec::with_capacity(num_indices);
8967 if num_indices > 0 {
8968 for _ in 0..num_indices {
8969 let v: f64 = (&stack
8970 .pop()
8971 .ok_or(mex("StackUnderflow", "stack underflow"))?)
8972 .try_into()?;
8973 indices.push(v as usize);
8974 }
8975 indices.reverse();
8976 }
8977 let base = stack
8978 .pop()
8979 .ok_or(mex("StackUnderflow", "stack underflow"))?;
8980 match base {
8981 Value::Cell(ca) => {
8982 let mut values: Vec<Value> = Vec::new();
8984 if indices.is_empty() {
8985 values.extend(ca.data.iter().map(|p| (*(*p)).clone()));
8987 } else {
8988 match indices.len() {
8989 1 => {
8990 let i = indices[0];
8991 if i == 0 || i > ca.data.len() {
8992 return Err(mex(
8993 "CellIndexOutOfBounds",
8994 "Cell index out of bounds",
8995 ));
8996 }
8997 values.push((*ca.data[i - 1]).clone());
8998 }
8999 2 => {
9000 let r = indices[0];
9001 let c = indices[1];
9002 if r == 0 || r > ca.rows || c == 0 || c > ca.cols {
9003 return Err(mex(
9004 "CellSubscriptOutOfBounds",
9005 "Cell subscript out of bounds",
9006 ));
9007 }
9008 values.push((*ca.data[(r - 1) * ca.cols + (c - 1)]).clone());
9009 }
9010 _ => return Err("Unsupported number of cell indices".to_string()),
9011 }
9012 }
9013 if values.len() >= out_count {
9015 for v in values.iter().take(out_count) {
9016 stack.push(v.clone());
9017 }
9018 } else {
9019 for v in &values {
9020 stack.push(v.clone());
9021 }
9022 for _ in values.len()..out_count {
9023 stack.push(Value::Num(0.0));
9024 }
9025 }
9026 }
9027 Value::Object(obj) => {
9028 let cell = runmat_runtime::call_builtin(
9030 "__make_cell",
9031 &indices
9032 .iter()
9033 .map(|n| Value::Num(*n as f64))
9034 .collect::<Vec<_>>(),
9035 )?;
9036 let v = match runmat_runtime::call_builtin(
9037 "call_method",
9038 &[
9039 Value::Object(obj),
9040 Value::String("subsref".to_string()),
9041 Value::String("{}".to_string()),
9042 cell,
9043 ],
9044 ) {
9045 Ok(v) => v,
9046 Err(e) => vm_bail!(e.to_string()),
9047 };
9048 stack.push(v);
9050 for _ in 1..out_count {
9051 stack.push(Value::Num(0.0));
9052 }
9053 }
9054 _ => return Err("Cell expansion on non-cell".to_string()),
9055 }
9056 }
9057 Instr::Pop => {
9058 stack.pop();
9059 }
9060 Instr::ReturnValue => {
9061 let return_value = stack
9062 .pop()
9063 .ok_or(mex("StackUnderflow", "stack underflow"))?;
9064 stack.push(return_value);
9065 interpreter_timing.flush_host_span("return_value", None);
9066 break;
9067 }
9068 Instr::Return => {
9069 interpreter_timing.flush_host_span("return", None);
9070 break;
9071 }
9072 Instr::StoreIndex(num_indices) => {
9073 #[allow(unused)]
9076 if std::env::var("RUNMAT_DEBUG_INDEX").as_deref() == Ok("1") {
9077 let snap = stack
9078 .iter()
9079 .rev()
9080 .take(6)
9081 .map(|v| match v {
9082 Value::Object(_) => "Object",
9083 Value::Tensor(t) => {
9084 eprintln!("StoreIndex pre-snap Tensor shape={:?}", t.shape);
9085 "Tensor"
9086 }
9087 Value::GpuTensor(h) => {
9088 eprintln!("StoreIndex pre-snap GpuTensor shape={:?}", h.shape);
9089 "GpuTensor"
9090 }
9091 Value::Num(_) => "Num",
9092 Value::Int(_) => "Int",
9093 Value::String(_) => "String",
9094 Value::Cell(_) => "Cell",
9095 _ => "Other",
9096 })
9097 .collect::<Vec<_>>();
9098 eprintln!("StoreIndex pre-snap pc={} stack_top_types={:?}", pc, snap);
9099 }
9100 let rhs = stack
9101 .pop()
9102 .ok_or(mex("StackUnderflow", "stack underflow"))?;
9103 let assignable = |v: &Value| {
9106 matches!(v, Value::Object(_) | Value::Tensor(_) | Value::GpuTensor(_))
9107 };
9108 let base_idx_opt = (0..stack.len()).rev().find(|&j| assignable(&stack[j]));
9109 let base_pos = if let Some(j) = base_idx_opt {
9110 j
9111 } else {
9112 return Err("Index assignment only for tensors".to_string());
9113 };
9114 let base = stack.remove(base_pos);
9115 #[cfg(feature = "native-accel")]
9116 clear_residency(&base);
9117 let mut indices: Vec<usize> = Vec::new();
9120 if num_indices > 0 {
9121 let mut contiguous_ok = true;
9122 if base_pos + num_indices > stack.len() {
9123 contiguous_ok = false;
9124 } else {
9125 for k in 0..num_indices {
9126 let idx_pos = base_pos + k;
9127 match (&stack[idx_pos]).try_into() as Result<f64, _> {
9128 Ok(v) => indices.push(v as usize),
9129 Err(_) => {
9130 contiguous_ok = false;
9131 indices.clear();
9132 break;
9133 }
9134 }
9135 }
9136 }
9137 if contiguous_ok {
9138 for k in (0..num_indices).rev() {
9140 stack.remove(base_pos + k);
9141 }
9142 } else {
9143 indices.clear();
9144 }
9145 }
9146 let (rows_opt, cols_opt) = match &base {
9148 Value::Tensor(t) => (Some(t.rows()), Some(t.cols())),
9149 Value::GpuTensor(h) => (
9150 Some(h.shape.first().copied().unwrap_or(1).max(1)),
9151 Some(h.shape.get(1).copied().unwrap_or(1).max(1)),
9152 ),
9153 _ => (None, None),
9154 };
9155 if indices.is_empty() {
9157 let mut numeric_above: Vec<(usize, usize)> = Vec::new(); let mut scan_limit = 12usize;
9159 let mut kk = stack.len();
9160 while kk > 0 && scan_limit > 0 {
9161 let idx = kk - 1;
9162 if assignable(&stack[idx]) {
9163 break;
9164 }
9165 if let Ok(v) = (&stack[idx]).try_into() as Result<f64, _> {
9166 numeric_above.push((idx, v as usize));
9167 }
9168 kk -= 1;
9169 scan_limit -= 1;
9170 }
9171 if numeric_above.len() >= 2 {
9172 let mut picked: Option<((usize, usize), (usize, usize))> = None;
9173 for w in (1..numeric_above.len()).rev() {
9174 let (j_idx, j_val) = numeric_above[w];
9175 let (i_idx, i_val) = numeric_above[w - 1];
9176 let fits = match (rows_opt, cols_opt) {
9177 (Some(r), Some(c)) => {
9178 i_val >= 1 && i_val <= r && j_val >= 1 && j_val <= c
9179 }
9180 _ => true,
9181 };
9182 if fits {
9183 picked = Some(((i_idx, i_val), (j_idx, j_val)));
9184 break;
9185 }
9186 }
9187 if let Some(((i_idx, i_val), (j_idx, j_val))) = picked {
9188 let mut to_remove = [i_idx, j_idx];
9189 to_remove.sort_unstable();
9190 stack.remove(to_remove[1]);
9191 stack.remove(to_remove[0]);
9192 indices = vec![i_val, j_val];
9193 }
9194 } else if numeric_above.len() == 1 {
9195 let (k_idx, k_val) = numeric_above[0];
9196 stack.remove(k_idx);
9197 indices = vec![k_val];
9198 }
9199 }
9200 if indices.is_empty() {
9201 return Err("Index assignment only for tensors".to_string());
9202 }
9203 match base {
9205 Value::Object(obj) => {
9206 let cell = runmat_runtime::call_builtin(
9208 "__make_cell",
9209 &indices
9210 .iter()
9211 .map(|n| Value::Num(*n as f64))
9212 .collect::<Vec<_>>(),
9213 )?;
9214 match runmat_runtime::call_builtin(
9215 "call_method",
9216 &[
9217 Value::Object(obj),
9218 Value::String("subsasgn".to_string()),
9219 Value::String("()".to_string()),
9220 cell,
9221 rhs,
9222 ],
9223 ) {
9224 Ok(v) => stack.push(v),
9225 Err(e) => vm_bail!(e.to_string()),
9226 }
9227 }
9228 Value::Tensor(mut t) => {
9229 let rhs_to_scalar = |rhs: &Value| -> Result<f64, String> {
9231 match rhs {
9232 Value::Num(x) => Ok(*x),
9233 Value::Tensor(t2) => {
9234 if t2.data.len() == 1 {
9235 Ok(t2.data[0])
9236 } else {
9237 Err("RHS must be scalar".to_string())
9238 }
9239 }
9240 Value::GpuTensor(h2) => {
9241 let total = h2.shape.iter().copied().product::<usize>();
9242 if total != 1 {
9243 return Err("RHS must be scalar".to_string());
9244 }
9245 if let Some(p) = runmat_accelerate_api::provider() {
9246 let host = p
9247 .download(h2)
9248 .map_err(|e| format!("gather rhs: {e}"))?;
9249 Ok(host.data[0])
9250 } else {
9251 Err("No acceleration provider registered".to_string())
9252 }
9253 }
9254 _ => rhs
9255 .try_into()
9256 .map_err(|_| "RHS must be numeric".to_string()),
9257 }
9258 };
9259 if indices.len() == 1 {
9261 let total = t.rows() * t.cols();
9262 let idx = indices[0];
9263 if idx == 0 || idx > total {
9264 return Err(mex("IndexOutOfBounds", "Index out of bounds"));
9265 }
9266 let val: f64 = rhs_to_scalar(&rhs)?;
9267 t.data[idx - 1] = val;
9268 stack.push(Value::Tensor(t));
9269 } else if indices.len() == 2 {
9270 let i = indices[0];
9271 let mut j = indices[1];
9272 let rows = t.rows();
9273 let cols = t.cols();
9274 if j == 0 {
9276 j = 1;
9277 }
9278 if j > cols {
9279 j = cols;
9280 }
9281 if i == 0 || i > rows {
9282 if std::env::var("RUNMAT_DEBUG_INDEX").as_deref() == Ok("1") {
9283 eprintln!(
9284 "StoreIndex Tensor OOB: i={} j(clamped)={} rows={} cols={} shape={:?}",
9285 i, j, rows, cols, t.shape
9286 );
9287 }
9288 return Err(mex("SubscriptOutOfBounds", "Subscript out of bounds"));
9289 }
9290 let val: f64 = rhs_to_scalar(&rhs)?;
9291 let idx = (i - 1) + (j - 1) * rows;
9292 t.data[idx] = val;
9293 stack.push(Value::Tensor(t));
9294 } else {
9295 return Err("Only 1D/2D scalar assignment supported".to_string());
9296 }
9297 }
9298 Value::GpuTensor(h) => {
9299 let provider = runmat_accelerate_api::provider()
9301 .ok_or_else(|| "No acceleration provider registered".to_string())?;
9302 let host = provider
9303 .download(&h)
9304 .map_err(|e| format!("gather for assignment: {e}"))?;
9305 let mut t = runmat_builtins::Tensor::new(host.data, host.shape)
9306 .map_err(|e| format!("assignment: {e}"))?;
9307 let rhs_to_scalar = |rhs: &Value| -> Result<f64, String> {
9309 match rhs {
9310 Value::Num(x) => Ok(*x),
9311 Value::Tensor(t2) => {
9312 if t2.data.len() == 1 {
9313 Ok(t2.data[0])
9314 } else {
9315 Err("RHS must be scalar".to_string())
9316 }
9317 }
9318 Value::GpuTensor(h2) => {
9319 let total = h2.shape.iter().copied().product::<usize>();
9320 if total != 1 {
9321 return Err("RHS must be scalar".to_string());
9322 }
9323 let host2 = provider
9324 .download(h2)
9325 .map_err(|e| format!("gather rhs: {e}"))?;
9326 Ok(host2.data[0])
9327 }
9328 _ => rhs
9329 .try_into()
9330 .map_err(|_| "RHS must be numeric".to_string()),
9331 }
9332 };
9333 if indices.len() == 1 {
9334 let total = t.rows() * t.cols();
9335 let idx = indices[0];
9336 if idx == 0 || idx > total {
9337 return Err(mex("IndexOutOfBounds", "Index out of bounds"));
9338 }
9339 let val: f64 = rhs_to_scalar(&rhs)?;
9340 t.data[idx - 1] = val;
9341 } else if indices.len() == 2 {
9342 let i = indices[0];
9343 let mut j = indices[1];
9344 let rows = t.rows();
9345 let cols = t.cols();
9346 if j == 0 {
9348 j = 1;
9349 }
9350 if j > cols {
9351 j = cols;
9352 }
9353 if i == 0 || i > rows {
9354 if std::env::var("RUNMAT_DEBUG_INDEX").as_deref() == Ok("1") {
9355 eprintln!(
9356 "StoreIndex GpuTensor OOB: i={} j(clamped)={} rows={} cols={} shape={:?}",
9357 i, j, rows, cols, t.shape
9358 );
9359 }
9360 return Err(mex("SubscriptOutOfBounds", "Subscript out of bounds"));
9361 }
9362 let val: f64 = rhs_to_scalar(&rhs)?;
9363 let idx = (i - 1) + (j - 1) * rows;
9364 t.data[idx] = val;
9365 } else if indices.is_empty() {
9366 let val: f64 = rhs_to_scalar(&rhs)?;
9368 for k in 0..t.data.len() {
9369 t.data[k] = val;
9370 }
9371 } else {
9372 return Err("Only 1D/2D scalar assignment supported".to_string());
9373 }
9374 let view = runmat_accelerate_api::HostTensorView {
9375 data: &t.data,
9376 shape: &t.shape,
9377 };
9378 let new_h = provider
9379 .upload(&view)
9380 .map_err(|e| format!("reupload after assignment: {e}"))?;
9381 stack.push(Value::GpuTensor(new_h));
9382 }
9383 _ => {
9384 if std::env::var("RUNMAT_DEBUG_INDEX").as_deref() == Ok("1") {
9385 let kind = |v: &Value| match v {
9386 Value::Object(_) => "Object",
9387 Value::Tensor(_) => "Tensor",
9388 Value::GpuTensor(_) => "GpuTensor",
9389 Value::Num(_) => "Num",
9390 Value::Int(_) => "Int",
9391 _ => "Other",
9392 };
9393 eprintln!(
9394 "StoreIndex default-branch pc={} base_kind={} rhs_kind={} indices={:?}",
9395 pc,
9396 kind(&base),
9397 kind(&rhs),
9398 indices
9399 );
9400 }
9401 return Err("Index assignment only for tensors".to_string());
9402 }
9403 }
9404 }
9405 Instr::StoreIndexCell(num_indices) => {
9406 let rhs = stack
9408 .pop()
9409 .ok_or(mex("StackUnderflow", "stack underflow"))?;
9410 let mut indices = Vec::new();
9411 for _ in 0..num_indices {
9412 let v: f64 = (&stack
9413 .pop()
9414 .ok_or(mex("StackUnderflow", "stack underflow"))?)
9415 .try_into()?;
9416 indices.push(v as usize);
9417 }
9418 indices.reverse();
9419 let base = stack
9420 .pop()
9421 .ok_or(mex("StackUnderflow", "stack underflow"))?;
9422 #[cfg(feature = "native-accel")]
9423 clear_residency(&base);
9424 match base {
9426 Value::Object(obj) => {
9427 let cell = runmat_builtins::CellArray::new(
9429 indices.iter().map(|n| Value::Num(*n as f64)).collect(),
9430 1,
9431 indices.len(),
9432 )
9433 .map_err(|e| format!("subsasgn build error: {e}"))?;
9434 match runmat_runtime::call_builtin(
9435 "call_method",
9436 &[
9437 Value::Object(obj),
9438 Value::String("subsasgn".to_string()),
9439 Value::String("{}".to_string()),
9440 Value::Cell(cell),
9441 rhs,
9442 ],
9443 ) {
9444 Ok(v) => stack.push(v),
9445 Err(e) => vm_bail!(e.to_string()),
9446 }
9447 }
9448 Value::Cell(mut ca) => match indices.len() {
9449 1 => {
9450 let i = indices[0];
9451 if i == 0 || i > ca.data.len() {
9452 return Err(mex(
9453 "CellIndexOutOfBounds",
9454 "Cell index out of bounds",
9455 ));
9456 }
9457 if let Some(oldv) = ca.data.get(i - 1) {
9458 runmat_gc::gc_record_write(oldv, &rhs);
9459 }
9460 *ca.data[i - 1] = rhs;
9461 stack.push(Value::Cell(ca));
9462 }
9463 2 => {
9464 let i = indices[0];
9465 let j = indices[1];
9466 if i == 0 || i > ca.rows || j == 0 || j > ca.cols {
9467 return Err(mex(
9468 "CellSubscriptOutOfBounds",
9469 "Cell subscript out of bounds",
9470 ));
9471 }
9472 let lin = (i - 1) * ca.cols + (j - 1);
9473 if let Some(oldv) = ca.data.get(lin) {
9474 runmat_gc::gc_record_write(oldv, &rhs);
9475 }
9476 *ca.data[lin] = rhs;
9477 stack.push(Value::Cell(ca));
9478 }
9479 _ => return Err("Unsupported number of cell indices".to_string()),
9480 },
9481 _ => return Err("Cell assignment on non-cell".to_string()),
9482 }
9483 }
9484 Instr::LoadMember(field) => {
9485 let base = stack
9486 .pop()
9487 .ok_or(mex("StackUnderflow", "stack underflow"))?;
9488 match base {
9489 Value::Object(obj) => {
9490 if let Some((p, _owner)) =
9491 runmat_builtins::lookup_property(&obj.class_name, &field)
9492 {
9493 if p.is_static {
9494 vm_bail!(format!(
9495 "Property '{}' is static; use classref('{}').{}",
9496 field, obj.class_name, field
9497 ));
9498 }
9499 if p.get_access == runmat_builtins::Access::Private {
9500 vm_bail!(format!("Property '{}' is private", field))
9501 }
9502 if p.is_dependent {
9503 let getter = format!("get.{field}");
9505 match runmat_runtime::call_builtin(
9506 &getter,
9507 &[Value::Object(obj.clone())],
9508 ) {
9509 Ok(v) => {
9510 stack.push(v);
9511 continue;
9512 }
9513 Err(_e) => {}
9514 }
9515 }
9516 }
9517 if let Some(v) = obj.properties.get(&field) {
9518 stack.push(v.clone());
9519 } else if let Some((p2, _)) =
9520 runmat_builtins::lookup_property(&obj.class_name, &field)
9521 {
9522 if p2.is_dependent {
9523 let backing = format!("{field}_backing");
9524 if let Some(vb) = obj.properties.get(&backing) {
9525 stack.push(vb.clone());
9526 continue;
9527 }
9528 }
9529 } else if let Some(cls) = runmat_builtins::get_class(&obj.class_name) {
9530 if cls.methods.contains_key("subsref") {
9531 match runmat_runtime::call_builtin(
9532 "call_method",
9533 &[
9534 Value::Object(obj),
9535 Value::String("subsref".to_string()),
9536 Value::String(".".to_string()),
9537 Value::String(field),
9538 ],
9539 ) {
9540 Ok(v) => stack.push(v),
9541 Err(e) => vm_bail!(e.to_string()),
9542 }
9543 } else {
9544 vm_bail!(format!(
9545 "Undefined property '{}' for class {}",
9546 field, obj.class_name
9547 ));
9548 }
9549 } else {
9550 vm_bail!(format!("Unknown class {}", obj.class_name));
9551 }
9552 }
9553 Value::Struct(st) => {
9554 if let Some(v) = st.fields.get(&field) {
9555 stack.push(v.clone());
9556 } else {
9557 vm_bail!(format!("Undefined field '{}'", field));
9558 }
9559 }
9560 Value::Cell(ca) => {
9561 let mut out: Vec<Value> = Vec::with_capacity(ca.data.len());
9563 for v in &ca.data {
9564 match &**v {
9565 Value::Struct(st) => {
9566 if let Some(fv) = st.fields.get(&field) {
9567 out.push(fv.clone());
9568 } else {
9569 out.push(Value::Num(0.0));
9570 }
9571 }
9572 other => {
9573 out.push(other.clone());
9574 }
9575 }
9576 }
9577 let new_cell = runmat_builtins::CellArray::new(out, ca.rows, ca.cols)
9578 .map_err(|e| format!("cell field gather: {e}"))?;
9579 stack.push(Value::Cell(new_cell));
9580 }
9581 _ => vm_bail!("LoadMember on non-object".to_string()),
9582 }
9583 }
9584 Instr::LoadMemberDynamic => {
9585 let name_val = stack
9586 .pop()
9587 .ok_or(mex("StackUnderflow", "stack underflow"))?;
9588 let base = stack
9589 .pop()
9590 .ok_or(mex("StackUnderflow", "stack underflow"))?;
9591 let name: String = (&name_val).try_into()?;
9592 match base {
9593 Value::Object(obj) => {
9594 if let Some((p, _owner)) =
9595 runmat_builtins::lookup_property(&obj.class_name, &name)
9596 {
9597 if p.is_static {
9598 vm_bail!(format!(
9599 "Property '{}' is static; use classref('{}').{}",
9600 name, obj.class_name, name
9601 ));
9602 }
9603 if p.get_access == runmat_builtins::Access::Private {
9604 vm_bail!(format!("Property '{}' is private", name))
9605 }
9606 }
9607 if let Some(v) = obj.properties.get(&name) {
9608 stack.push(v.clone());
9609 } else if let Some(cls) = runmat_builtins::get_class(&obj.class_name) {
9610 if cls.methods.contains_key("subsref") {
9611 match runmat_runtime::call_builtin(
9612 "call_method",
9613 &[
9614 Value::Object(obj),
9615 Value::String("subsref".to_string()),
9616 Value::String(".".to_string()),
9617 Value::String(name),
9618 ],
9619 ) {
9620 Ok(v) => stack.push(v),
9621 Err(e) => vm_bail!(e.to_string()),
9622 }
9623 } else {
9624 vm_bail!(format!(
9625 "Undefined property '{}' for class {}",
9626 name, obj.class_name
9627 ));
9628 }
9629 } else {
9630 vm_bail!(format!("Unknown class {}", obj.class_name));
9631 }
9632 }
9633 Value::Struct(st) => {
9634 if let Some(v) = st.fields.get(&name) {
9635 stack.push(v.clone());
9636 } else {
9637 vm_bail!(format!("Undefined field '{}'", name));
9638 }
9639 }
9640 _ => vm_bail!("LoadMemberDynamic on non-struct/object".to_string()),
9641 }
9642 }
9643 Instr::StoreMember(field) => {
9644 let rhs = stack
9645 .pop()
9646 .ok_or(mex("StackUnderflow", "stack underflow"))?;
9647 let base = stack
9648 .pop()
9649 .ok_or(mex("StackUnderflow", "stack underflow"))?;
9650 match base {
9652 Value::Object(mut obj) => {
9653 if let Some((p, _owner)) =
9654 runmat_builtins::lookup_property(&obj.class_name, &field)
9655 {
9656 if p.is_static {
9657 vm_bail!(format!(
9658 "Property '{}' is static; use classref('{}').{}",
9659 field, obj.class_name, field
9660 ));
9661 }
9662 if p.set_access == runmat_builtins::Access::Private {
9663 vm_bail!(format!("Property '{}' is private", field))
9664 }
9665 if p.is_dependent {
9666 let setter = format!("set.{field}");
9668 match runmat_runtime::call_builtin(
9669 &setter,
9670 &[Value::Object(obj.clone()), rhs.clone()],
9671 ) {
9672 Ok(v) => {
9673 stack.push(v);
9674 continue;
9675 }
9676 Err(_e) => {}
9677 }
9678 }
9679 if let Some(oldv) = obj.properties.get(&field) {
9680 runmat_gc::gc_record_write(oldv, &rhs);
9681 }
9682 obj.properties.insert(field, rhs);
9683 stack.push(Value::Object(obj));
9684 } else if let Some(cls) = runmat_builtins::get_class(&obj.class_name) {
9685 if cls.methods.contains_key("subsasgn") {
9686 match runmat_runtime::call_builtin(
9687 "call_method",
9688 &[
9689 Value::Object(obj),
9690 Value::String("subsasgn".to_string()),
9691 Value::String(".".to_string()),
9692 Value::String(field),
9693 rhs,
9694 ],
9695 ) {
9696 Ok(v) => stack.push(v),
9697 Err(e) => vm_bail!(e),
9698 }
9699 } else {
9700 vm_bail!(format!(
9701 "Undefined property '{}' for class {}",
9702 field, obj.class_name
9703 ));
9704 }
9705 } else {
9706 vm_bail!(format!("Unknown class {}", obj.class_name));
9707 }
9708 }
9709 Value::ClassRef(cls) => {
9710 if let Some((p, owner)) = runmat_builtins::lookup_property(&cls, &field) {
9711 if !p.is_static {
9712 vm_bail!(format!("Property '{}' is not static", field));
9713 }
9714 if p.set_access == runmat_builtins::Access::Private {
9715 vm_bail!(format!("Property '{}' is private", field))
9716 }
9717 runmat_builtins::set_static_property_value_in_owner(
9718 &owner, &field, rhs,
9719 )?;
9720 stack.push(Value::ClassRef(cls));
9721 } else {
9722 vm_bail!(format!("Unknown property '{}' on class {}", field, cls));
9723 }
9724 }
9725 Value::Struct(mut st) => {
9726 if let Some(oldv) = st.fields.get(&field) {
9727 runmat_gc::gc_record_write(oldv, &rhs);
9728 }
9729 st.fields.insert(field, rhs);
9730 stack.push(Value::Struct(st));
9731 }
9732 Value::Cell(mut ca) => {
9733 let is_cell_rhs = matches!(rhs, Value::Cell(_));
9735 let rhs_cell = if let Value::Cell(rc) = &rhs {
9736 Some(rc)
9737 } else {
9738 None
9739 };
9740 if is_cell_rhs {
9741 if let Some(rc) = rhs_cell {
9742 if rc.rows != ca.rows || rc.cols != ca.cols {
9743 vm_bail!(
9744 "Field assignment: cell rhs shape mismatch".to_string()
9745 );
9746 }
9747 }
9748 }
9749 for i in 0..ca.data.len() {
9750 let rv = if let Some(rc) = rhs_cell {
9751 (*rc.data[i]).clone()
9752 } else {
9753 rhs.clone()
9754 };
9755 match &mut *ca.data[i] {
9756 Value::Struct(st) => {
9757 if let Some(oldv) = st.fields.get(&field) {
9758 runmat_gc::gc_record_write(oldv, &rv);
9759 }
9760 st.fields.insert(field.clone(), rv);
9761 }
9762 other => {
9763 let mut st = runmat_builtins::StructValue::new();
9765 st.fields.insert(field.clone(), rv);
9766 *other = Value::Struct(st);
9767 }
9768 }
9769 }
9770 stack.push(Value::Cell(ca));
9771 }
9772 _ => vm_bail!("StoreMember on non-object".to_string()),
9773 }
9774 }
9775 Instr::StoreMemberDynamic => {
9776 let rhs = stack
9777 .pop()
9778 .ok_or(mex("StackUnderflow", "stack underflow"))?;
9779 let name_val = stack
9780 .pop()
9781 .ok_or(mex("StackUnderflow", "stack underflow"))?;
9782 let base = stack
9783 .pop()
9784 .ok_or(mex("StackUnderflow", "stack underflow"))?;
9785 let name: String = (&name_val).try_into()?;
9786 match base {
9788 Value::Object(mut obj) => {
9789 if let Some((p, _owner)) =
9790 runmat_builtins::lookup_property(&obj.class_name, &name)
9791 {
9792 if p.is_static {
9793 vm_bail!(format!(
9794 "Property '{}' is static; use classref('{}').{}",
9795 name, obj.class_name, name
9796 ));
9797 }
9798 if p.set_access == runmat_builtins::Access::Private {
9799 vm_bail!(format!("Property '{}' is private", name))
9800 }
9801 }
9802 if let Some(oldv) = obj.properties.get(&name) {
9803 runmat_gc::gc_record_write(oldv, &rhs);
9804 }
9805 obj.properties.insert(name, rhs);
9806 stack.push(Value::Object(obj));
9807 }
9808 Value::Struct(mut st) => {
9809 if let Some(oldv) = st.fields.get(&name) {
9810 runmat_gc::gc_record_write(oldv, &rhs);
9811 }
9812 st.fields.insert(name, rhs);
9813 stack.push(Value::Struct(st));
9814 }
9815 Value::Cell(mut ca) => {
9816 let is_cell_rhs = matches!(rhs, Value::Cell(_));
9817 let rhs_cell = if let Value::Cell(rc) = &rhs {
9818 Some(rc)
9819 } else {
9820 None
9821 };
9822 if is_cell_rhs {
9823 if let Some(rc) = rhs_cell {
9824 if rc.rows != ca.rows || rc.cols != ca.cols {
9825 vm_bail!(
9826 "Field assignment: cell rhs shape mismatch".to_string()
9827 );
9828 }
9829 }
9830 }
9831 for i in 0..ca.data.len() {
9832 let rv = if let Some(rc) = rhs_cell {
9833 (*rc.data[i]).clone()
9834 } else {
9835 rhs.clone()
9836 };
9837 match &mut *ca.data[i] {
9838 Value::Struct(st) => {
9839 if let Some(oldv) = st.fields.get(&name) {
9840 runmat_gc::gc_record_write(oldv, &rv);
9841 }
9842 st.fields.insert(name.clone(), rv);
9843 }
9844 other => {
9845 let mut st = runmat_builtins::StructValue::new();
9846 st.fields.insert(name.clone(), rv);
9847 *other = Value::Struct(st);
9848 }
9849 }
9850 }
9851 stack.push(Value::Cell(ca));
9852 }
9853 _ => vm_bail!("StoreMemberDynamic on non-struct/object".to_string()),
9854 }
9855 }
9856 Instr::CallMethod(name, arg_count) => {
9857 let mut args = Vec::with_capacity(arg_count);
9859 for _ in 0..arg_count {
9860 args.push(
9861 stack
9862 .pop()
9863 .ok_or(mex("StackUnderflow", "stack underflow"))?,
9864 );
9865 }
9866 args.reverse();
9867 let base = stack
9868 .pop()
9869 .ok_or(mex("StackUnderflow", "stack underflow"))?;
9870 match base {
9871 Value::Object(obj) => {
9872 if let Some((m, _owner)) =
9874 runmat_builtins::lookup_method(&obj.class_name, &name)
9875 {
9876 if m.is_static {
9877 vm_bail!(format!(
9878 "Method '{}' is static; use classref({}).{}",
9879 name, obj.class_name, name
9880 ));
9881 }
9882 if m.access == runmat_builtins::Access::Private {
9883 vm_bail!(format!("Method '{}' is private", name))
9884 }
9885 let mut full_args = Vec::with_capacity(1 + args.len());
9886 full_args.push(Value::Object(obj));
9887 full_args.extend(args.into_iter());
9888 let v = runmat_runtime::call_builtin(&m.function_name, &full_args)?;
9889 stack.push(v);
9890 continue;
9891 }
9892 let qualified = format!("{}.{}", obj.class_name, name);
9893 let mut full_args = Vec::with_capacity(1 + args.len());
9894 full_args.push(Value::Object(obj));
9895 full_args.extend(args.into_iter());
9896 if let Ok(v) = runmat_runtime::call_builtin(&qualified, &full_args) {
9897 stack.push(v);
9898 } else {
9899 match runmat_runtime::call_builtin(&name, &full_args) {
9900 Ok(v) => {
9901 stack.push(v);
9902 }
9903 Err(e) => {
9904 vm_bail!(e);
9905 }
9906 }
9907 }
9908 }
9909 _ => vm_bail!("CallMethod on non-object".to_string()),
9910 }
9911 }
9912 Instr::LoadMethod(name) => {
9913 let base = stack
9915 .pop()
9916 .ok_or(mex("StackUnderflow", "stack underflow"))?;
9917 match base {
9918 Value::Object(obj) => {
9919 let func_qual = format!("{}.{}", obj.class_name, name);
9920 stack.push(Value::Closure(runmat_builtins::Closure {
9921 function_name: func_qual,
9922 captures: vec![Value::Object(obj)],
9923 }));
9924 }
9925 Value::ClassRef(cls) => {
9926 if let Some((m, _owner)) = runmat_builtins::lookup_method(&cls, &name) {
9928 if !m.is_static {
9929 vm_bail!(format!("Method '{}' is not static", name));
9930 }
9931 stack.push(Value::Closure(runmat_builtins::Closure {
9932 function_name: m.function_name,
9933 captures: vec![],
9934 }));
9935 } else {
9936 vm_bail!(format!("Unknown static method '{}' on class {}", name, cls));
9937 }
9938 }
9939 _ => vm_bail!("LoadMethod requires object or classref".to_string()),
9940 }
9941 }
9942 Instr::CreateClosure(func_name, capture_count) => {
9943 let mut captures = Vec::with_capacity(capture_count);
9944 for _ in 0..capture_count {
9945 captures.push(
9946 stack
9947 .pop()
9948 .ok_or(mex("StackUnderflow", "stack underflow"))?,
9949 );
9950 }
9951 captures.reverse();
9952 stack.push(Value::Closure(runmat_builtins::Closure {
9953 function_name: func_name,
9954 captures,
9955 }));
9956 }
9957 Instr::LoadStaticProperty(class_name, prop) => {
9958 if let Some((p, owner)) = runmat_builtins::lookup_property(&class_name, &prop) {
9960 if !p.is_static {
9961 vm_bail!(format!("Property '{}' is not static", prop));
9962 }
9963 if p.get_access == runmat_builtins::Access::Private {
9964 vm_bail!(format!("Property '{}' is private", prop))
9965 }
9966 if let Some(v) = runmat_builtins::get_static_property_value(&owner, &prop) {
9967 stack.push(v);
9968 } else if let Some(v) = &p.default_value {
9969 stack.push(v.clone());
9970 } else {
9971 stack.push(Value::Num(0.0));
9972 }
9973 } else {
9974 vm_bail!(format!(
9975 "Unknown property '{}' on class {}",
9976 prop, class_name
9977 ));
9978 }
9979 }
9980 Instr::CallStaticMethod(class_name, method, arg_count) => {
9981 let mut args = Vec::with_capacity(arg_count);
9982 for _ in 0..arg_count {
9983 args.push(
9984 stack
9985 .pop()
9986 .ok_or(mex("StackUnderflow", "stack underflow"))?,
9987 );
9988 }
9989 args.reverse();
9990 if let Some((m, _owner)) = runmat_builtins::lookup_method(&class_name, &method) {
9991 if !m.is_static {
9992 vm_bail!(format!("Method '{}' is not static", method));
9993 }
9994 if m.access == runmat_builtins::Access::Private {
9995 vm_bail!(format!("Method '{}' is private", method))
9996 }
9997 let v = match runmat_runtime::call_builtin(&m.function_name, &args) {
9998 Ok(v) => v,
9999 Err(e) => vm_bail!(e),
10000 };
10001 stack.push(v);
10002 } else {
10003 vm_bail!(format!(
10004 "Unknown static method '{}' on class {}",
10005 method, class_name
10006 ));
10007 }
10008 }
10009 Instr::RegisterClass {
10010 name,
10011 super_class,
10012 properties,
10013 methods,
10014 } => {
10015 let mut prop_map = std::collections::HashMap::new();
10017 for (p, is_static, get_access, set_access) in properties {
10018 let gacc = if get_access.eq_ignore_ascii_case("private") {
10019 runmat_builtins::Access::Private
10020 } else {
10021 runmat_builtins::Access::Public
10022 };
10023 let sacc = if set_access.eq_ignore_ascii_case("private") {
10024 runmat_builtins::Access::Private
10025 } else {
10026 runmat_builtins::Access::Public
10027 };
10028 let (is_dep, clean_name) = if let Some(stripped) = p.strip_prefix("@dep:") {
10029 (true, stripped.to_string())
10030 } else {
10031 (false, p.clone())
10032 };
10033 prop_map.insert(
10034 clean_name.clone(),
10035 runmat_builtins::PropertyDef {
10036 name: clean_name,
10037 is_static,
10038 is_dependent: is_dep,
10039 get_access: gacc,
10040 set_access: sacc,
10041 default_value: None,
10042 },
10043 );
10044 }
10045 let mut method_map = std::collections::HashMap::new();
10046 for (mname, fname, is_static, access) in methods {
10047 let access = if access.eq_ignore_ascii_case("private") {
10048 runmat_builtins::Access::Private
10049 } else {
10050 runmat_builtins::Access::Public
10051 };
10052 method_map.insert(
10053 mname.clone(),
10054 runmat_builtins::MethodDef {
10055 name: mname,
10056 is_static,
10057 access,
10058 function_name: fname,
10059 },
10060 );
10061 }
10062 let def = runmat_builtins::ClassDef {
10063 name: name.clone(),
10064 parent: super_class.clone(),
10065 properties: prop_map,
10066 methods: method_map,
10067 };
10068 runmat_builtins::register_class(def);
10069 }
10070 }
10071 if debug_stack {
10072 eprintln!("After exec pc={} stack_len={}", pc, stack.len());
10073 }
10074 pc += 1;
10075 }
10076 interpreter_timing.flush_host_span("loop_complete", None);
10077 for (i, var) in vars.iter().enumerate() {
10078 if i < initial_vars.len() {
10079 initial_vars[i] = var.clone();
10080 }
10081 }
10082 Ok(vars)
10083}
10084
10085fn stochastic_evolution_dispatch(
10086 state: Value,
10087 drift: Value,
10088 scale: Value,
10089 steps: Value,
10090) -> Result<Value, String> {
10091 let steps_u32 = parse_steps_value(&steps)?;
10092 if steps_u32 == 0 {
10093 return Ok(state);
10094 }
10095
10096 #[cfg(feature = "native-accel")]
10097 {
10098 if let Some(provider) = runmat_accelerate_api::provider() {
10099 let (state_handle, state_owned) = ensure_gpu_tensor_for_stochastic(provider, &state)?;
10100 let drift_scalar = scalar_from_value_scalar(&drift, "stochastic_evolution drift")?;
10101 let scale_scalar = scalar_from_value_scalar(&scale, "stochastic_evolution scale")?;
10102 let output = provider
10103 .stochastic_evolution(&state_handle, drift_scalar, scale_scalar, steps_u32)
10104 .map_err(|e| format!("stochastic_evolution: {e}"))?;
10105 if let Some(temp) = state_owned {
10106 let _ = provider.free(&temp);
10107 }
10108 fusion_residency::mark(&output);
10109 return Ok(Value::GpuTensor(output));
10110 }
10111 }
10112
10113 let gathered_state =
10114 gather_if_needed(&state).map_err(|e| format!("stochastic_evolution: {e}"))?;
10115 let mut tensor_value = match gathered_state {
10116 Value::Tensor(t) => t,
10117 other => tensor::value_into_tensor_for("stochastic_evolution", other)?,
10118 };
10119 let drift_scalar = scalar_from_value_scalar(&drift, "stochastic_evolution drift")?;
10120 let scale_scalar = scalar_from_value_scalar(&scale, "stochastic_evolution scale")?;
10121 stochastic_evolution_host(&mut tensor_value, drift_scalar, scale_scalar, steps_u32)?;
10122 Ok(Value::Tensor(tensor_value))
10123}
10124
10125fn scalar_from_value_scalar(value: &Value, label: &str) -> Result<f64, String> {
10126 match value {
10127 Value::Num(n) => Ok(*n),
10128 Value::Int(i) => Ok(i.to_f64()),
10129 Value::Tensor(t) if t.data.len() == 1 => Ok(t.data[0]),
10130 Value::Tensor(t) => Err(format!(
10131 "{label}: expected scalar tensor, got {} elements",
10132 t.data.len()
10133 )),
10134 Value::GpuTensor(_) => {
10135 let gathered = gather_if_needed(value).map_err(|e| format!("{label}: {e}"))?;
10136 scalar_from_value_scalar(&gathered, label)
10137 }
10138 other => Err(format!("{label}: expected numeric scalar, got {:?}", other)),
10139 }
10140}
10141
10142fn parse_steps_value(value: &Value) -> Result<u32, String> {
10143 let raw = scalar_from_value_scalar(value, "stochastic_evolution steps")?;
10144 if !raw.is_finite() || raw < 0.0 {
10145 return Err("stochastic_evolution: steps must be a non-negative scalar".to_string());
10146 }
10147 Ok(raw.round() as u32)
10148}
10149
10150#[cfg(feature = "native-accel")]
10151fn ensure_gpu_tensor_for_stochastic(
10152 provider: &dyn runmat_accelerate_api::AccelProvider,
10153 value: &Value,
10154) -> Result<
10155 (
10156 runmat_accelerate_api::GpuTensorHandle,
10157 Option<runmat_accelerate_api::GpuTensorHandle>,
10158 ),
10159 String,
10160> {
10161 match value {
10162 Value::GpuTensor(handle) => Ok((handle.clone(), None)),
10163 Value::Tensor(tensor) => {
10164 let handle = upload_tensor_view(provider, tensor)?;
10165 Ok((handle.clone(), Some(handle)))
10166 }
10167 _ => {
10168 let gathered =
10169 gather_if_needed(value).map_err(|e| format!("stochastic_evolution: {e}"))?;
10170 match gathered {
10171 Value::Tensor(t) => {
10172 let handle = upload_tensor_view(provider, &t)?;
10173 Ok((handle.clone(), Some(handle)))
10174 }
10175 other => {
10176 let tensor = tensor::value_into_tensor_for("stochastic_evolution", other)?;
10177 let handle = upload_tensor_view(provider, &tensor)?;
10178 Ok((handle.clone(), Some(handle)))
10179 }
10180 }
10181 }
10182 }
10183}
10184
10185#[cfg(feature = "native-accel")]
10186fn upload_tensor_view(
10187 provider: &dyn runmat_accelerate_api::AccelProvider,
10188 tensor: &runmat_builtins::Tensor,
10189) -> Result<runmat_accelerate_api::GpuTensorHandle, String> {
10190 let view = runmat_accelerate_api::HostTensorView {
10191 data: &tensor.data,
10192 shape: &tensor.shape,
10193 };
10194 provider.upload(&view).map_err(|e| e.to_string())
10195}
10196
10197#[cfg(feature = "native-accel")]
10198#[inline]
10199fn value_kind(value: &Value) -> &'static str {
10200 match value {
10201 Value::Int(_) => "Int",
10202 Value::Num(_) => "Num",
10203 Value::Complex(_, _) => "Complex",
10204 Value::Bool(_) => "Bool",
10205 Value::LogicalArray(_) => "LogicalArray",
10206 Value::String(_) => "String",
10207 Value::StringArray(_) => "StringArray",
10208 Value::CharArray(_) => "CharArray",
10209 Value::Tensor(_) => "Tensor",
10210 Value::ComplexTensor(_) => "ComplexTensor",
10211 Value::Cell(_) => "Cell",
10212 Value::Struct(_) => "Struct",
10213 Value::GpuTensor(_) => "GpuTensor",
10214 Value::Object(_) => "Object",
10215 Value::HandleObject(_) => "HandleObject",
10216 Value::Listener(_) => "Listener",
10217 Value::FunctionHandle(_) => "FunctionHandle",
10218 Value::Closure(_) => "Closure",
10219 Value::ClassRef(_) => "ClassRef",
10220 Value::MException(_) => "MException",
10221 }
10222}
10223#[cfg(feature = "native-accel")]
10224#[inline]
10225fn summarize_value(i: usize, v: &Value) -> String {
10226 match v {
10227 Value::GpuTensor(h) => format!("in#{i}:GpuTensor shape={:?}", h.shape),
10228 Value::Tensor(t) => format!("in#{i}:Tensor shape={:?}", t.shape),
10229 Value::String(s) => format!("in#{i}:String({})", s),
10230 _ => format!("in#{i}:{}", value_kind(v)),
10231 }
10232}
10233#[cfg(feature = "native-accel")]
10234struct StackSliceGuard<'a> {
10235 stack: *mut Vec<Value>,
10236 slice: Option<Vec<Value>>,
10237 _marker: std::marker::PhantomData<&'a mut Vec<Value>>,
10238}
10239
10240#[cfg(feature = "native-accel")]
10241impl<'a> StackSliceGuard<'a> {
10242 fn new(stack: &'a mut Vec<Value>, slice_start: usize) -> Self {
10243 let slice = stack.split_off(slice_start);
10244 Self {
10245 stack,
10246 slice: Some(slice),
10247 _marker: std::marker::PhantomData,
10248 }
10249 }
10250
10251 fn slice(&self) -> &[Value] {
10252 self.slice.as_ref().expect("stack slice missing").as_slice()
10253 }
10254
10255 fn commit(mut self) {
10256 self.slice = None;
10257 }
10258}
10259
10260#[cfg(feature = "native-accel")]
10261impl Drop for StackSliceGuard<'_> {
10262 fn drop(&mut self) {
10263 if let Some(slice) = self.slice.take() {
10264 unsafe {
10265 (&mut *self.stack).extend(slice);
10266 }
10267 }
10268 }
10269}
10270
10271#[cfg(feature = "native-accel")]
10272fn try_execute_fusion_group(
10273 plan: &runmat_accelerate::FusionGroupPlan,
10274 graph: &runmat_accelerate::AccelGraph,
10275 stack: &mut Vec<Value>,
10276 vars: &mut [Value],
10277 context: &ExecutionContext,
10278) -> Result<Value, String> {
10279 let mut inputs: Vec<Option<Value>> = vec![None; plan.inputs.len()];
10280
10281 for (idx, value) in &plan.constants {
10282 if let Some(slot) = inputs.get_mut(*idx) {
10283 if slot.is_none() {
10284 *slot = Some(value.clone());
10285 }
10286 }
10287 }
10288
10289 for (idx, value_id) in plan.inputs.iter().enumerate() {
10290 let info = graph
10291 .value(*value_id)
10292 .ok_or_else(|| format!("fusion: missing value metadata for id {value_id}"))?;
10293 match &info.origin {
10294 ValueOrigin::Variable { kind, index } => {
10295 let value =
10296 match kind {
10297 VarKind::Global => vars
10298 .get(*index)
10299 .cloned()
10300 .ok_or_else(|| format!("fusion: global var {index} out of range"))?,
10301 VarKind::Local => {
10302 if let Some(frame) = context.call_stack.last() {
10303 let absolute = frame.locals_start + index;
10304 context.locals.get(absolute).cloned().ok_or_else(|| {
10305 format!("fusion: local var {index} unavailable")
10306 })?
10307 } else {
10308 vars.get(*index).cloned().ok_or_else(|| {
10309 format!("fusion: local var {index} unavailable")
10310 })?
10311 }
10312 }
10313 };
10314 debug_assert!(
10315 inputs[idx].is_none(),
10316 "fusion: duplicate input slot {} for plan {}",
10317 idx,
10318 plan.index
10319 );
10320 inputs[idx] = Some(value);
10321 }
10322 ValueOrigin::Constant | ValueOrigin::NodeOutput { .. } | ValueOrigin::Unknown => {}
10323 }
10324 }
10325
10326 if log::log_enabled!(log::Level::Debug) && fusion_debug_enabled() {
10327 let stack_needed_preview = plan.stack_pattern.len();
10328 let stack_snapshot: Vec<&Value> = stack.iter().rev().take(stack_needed_preview).collect();
10329 let stack_kinds: Vec<&'static str> =
10330 stack_snapshot.iter().rev().map(|v| value_kind(v)).collect();
10331 let input_meta: Vec<String> = plan
10332 .inputs
10333 .iter()
10334 .enumerate()
10335 .map(|(i, value_id)| {
10336 if let Some(info) = graph.value(*value_id) {
10337 format!("#{i}:id={} origin={:?}", value_id, info.origin)
10338 } else {
10339 format!("#{i}:id={} origin=<missing>", value_id)
10340 }
10341 })
10342 .collect();
10343 log::debug!(
10344 "fusion group {} gather: stack_depth={} stack_needed={} stack_kinds={:?} pattern={:?} inputs={:?}",
10345 plan.index,
10346 stack.len(),
10347 stack_needed_preview,
10348 stack_kinds,
10349 &plan.stack_pattern,
10350 input_meta
10351 );
10352 }
10353
10354 let pattern_len = plan.stack_pattern.len();
10355 if stack.len() < pattern_len {
10356 if fusion_debug_enabled() {
10357 log::debug!(
10358 "fusion stack underflow: plan={} needed={} available={} pattern={:?}",
10359 plan.index,
10360 pattern_len,
10361 stack.len(),
10362 plan.stack_pattern
10363 );
10364 }
10365 return Err("fusion: stack underflow gathering inputs".to_string());
10366 }
10367 let available = pattern_len;
10368 let slice_start = stack.len() - available;
10369 let stack_guard = StackSliceGuard::new(stack, slice_start);
10370 let slice = stack_guard.slice().to_vec();
10371 let mut consumed: Vec<Option<Value>> = vec![None; pattern_len];
10372 let skip = 0;
10373
10374 for (offset, input_idx) in plan.stack_pattern.iter().enumerate() {
10375 if offset < skip {
10376 continue;
10377 }
10378 let slice_idx = offset - skip;
10379 let Some(val) = slice.get(slice_idx).cloned() else {
10380 continue;
10381 };
10382 consumed[offset] = Some(val.clone());
10383 if inputs[*input_idx].is_none() {
10384 let allow_stack_value = if plan.group.kind.is_reduction() {
10387 matches!(val, Value::GpuTensor(_) | Value::Tensor(_))
10388 } else {
10389 true
10390 };
10391 if allow_stack_value {
10392 inputs[*input_idx] = Some(val);
10393 }
10394 }
10395 }
10396
10397 for (idx, slot) in inputs.iter_mut().enumerate() {
10398 if slot.is_some() {
10399 continue;
10400 }
10401 let vid = plan.inputs[idx];
10402 let info = graph.value(vid);
10403 if let Some(info) = info {
10404 match &info.origin {
10405 ValueOrigin::Variable { kind, index } => {
10406 let value_opt = match kind {
10407 VarKind::Global => vars.get(*index).cloned(),
10408 VarKind::Local => {
10409 if let Some(frame) = context.call_stack.last() {
10410 let absolute = frame.locals_start + index;
10411 context.locals.get(absolute).cloned()
10412 } else {
10413 vars.get(*index).cloned()
10414 }
10415 }
10416 };
10417 if let Some(value) = value_opt {
10418 *slot = Some(value);
10419 continue;
10420 }
10421 }
10422 ValueOrigin::Constant => {
10423 if let Some(value) = plan.const_values.get(&vid) {
10424 *slot = Some(value.clone());
10425 continue;
10426 }
10427 }
10428 _ => {}
10429 }
10430 }
10431 if slot.is_none() {
10432 if let Some(binding) = graph.var_binding(vid) {
10433 let value_opt = match binding.kind {
10434 VarKind::Global => vars.get(binding.index).cloned(),
10435 VarKind::Local => {
10436 if let Some(frame) = context.call_stack.last() {
10437 let absolute = frame.locals_start + binding.index;
10438 context.locals.get(absolute).cloned()
10439 } else {
10440 vars.get(binding.index).cloned()
10441 }
10442 }
10443 };
10444 if let Some(value) = value_opt {
10445 *slot = Some(value);
10446 continue;
10447 }
10448 }
10449 }
10450 if slot.is_none() {
10451 if let Some(info) = info {
10452 if let ValueOrigin::NodeOutput { node, .. } = info.origin {
10453 if let Some(binding) = graph.node_binding(node) {
10454 let value_opt = match binding.kind {
10455 VarKind::Global => vars.get(binding.index).cloned(),
10456 VarKind::Local => {
10457 if let Some(frame) = context.call_stack.last() {
10458 let absolute = frame.locals_start + binding.index;
10459 context.locals.get(absolute).cloned()
10460 } else {
10461 vars.get(binding.index).cloned()
10462 }
10463 }
10464 };
10465 if let Some(value) = value_opt {
10466 *slot = Some(value);
10467 continue;
10468 }
10469 }
10470 }
10471 }
10472 }
10473 if slot.is_none() {
10474 if let Some(value) = plan.const_values.get(&vid) {
10475 *slot = Some(value.clone());
10476 }
10477 }
10478 }
10479
10480 let inputs: Vec<Value> = inputs
10481 .into_iter()
10482 .map(|opt| opt.ok_or_else(|| "fusion: missing input value".to_string()))
10483 .collect::<Result<_, _>>()?;
10484
10485 if log::log_enabled!(log::Level::Debug) {
10487 let summaries: Vec<String> = inputs
10488 .iter()
10489 .enumerate()
10490 .map(|(i, v)| summarize_value(i, v))
10491 .collect();
10492 log::debug!("fusion inputs runtime: [{}]", summaries.join(", "));
10493 }
10494
10495 let request = FusionExecutionRequest { plan, inputs };
10496 log::debug!(
10497 "dispatch fusion kind {:?}, supported {}",
10498 plan.group.kind,
10499 plan.kernel.supported
10500 );
10501 if plan.group.kind.is_elementwise() {
10502 match execute_elementwise(request) {
10503 Ok(result) => {
10504 stack_guard.commit();
10505 Ok(result)
10506 }
10507 Err(err) => Err(err.to_string()),
10508 }
10509 } else if plan.group.kind.is_reduction() {
10510 let mut axis = 0usize;
10513 let mut reduce_all = matches!(plan.reduction_axes, Some(ReductionAxes::All));
10514 if let Some(ReductionAxes::Explicit(dims)) = &plan.reduction_axes {
10515 if let Some(first) = dims.first().copied() {
10516 axis = first.saturating_sub(1);
10517 }
10518 }
10519 if log::log_enabled!(log::Level::Debug) {
10521 let meta: Vec<String> = plan
10522 .inputs
10523 .iter()
10524 .map(|vid| {
10525 if let Some(info) = graph.value(*vid) {
10526 format!(
10527 "vid={} origin={:?} shape={:?}",
10528 vid, info.origin, info.shape
10529 )
10530 } else {
10531 format!("vid={} origin=<missing>", vid)
10532 }
10533 })
10534 .collect();
10535 log::debug!("reduction gather meta: [{}]", meta.join(", "));
10536 }
10537 let has_all = reduce_all
10539 || plan.constants.values().any(value_is_all_keyword)
10540 || plan.const_values.values().any(value_is_all_keyword);
10541 if has_all {
10542 reduce_all = true;
10543 }
10544 if reduce_all && fusion_debug_enabled() {
10545 log::debug!(
10546 "fusion reduction (all) meta: data_vid={:?} inputs={:?} stack_pattern={:?}",
10547 plan.reduction_data,
10548 plan.inputs,
10549 plan.stack_pattern
10550 );
10551 }
10552 if !reduce_all {
10553 for node_id in &plan.group.nodes {
10554 if let Some(node) = graph.node(*node_id) {
10555 if let runmat_accelerate::graph::AccelNodeLabel::Builtin { name } = &node.label
10556 {
10557 if name.eq_ignore_ascii_case("mean") {
10558 for input_vid in &node.inputs {
10559 if let Some(info) = graph.value(*input_vid) {
10560 if let Some(constant) = &info.constant {
10561 if value_is_all_keyword(constant) {
10562 reduce_all = true;
10563 break;
10564 }
10565 }
10566 }
10567 }
10568 }
10569 }
10570 }
10571 if reduce_all {
10572 break;
10573 }
10574 }
10575 }
10576 if !reduce_all {
10578 if let Some(dim_vid) = plan.reduction_dim {
10579 if let Some(cv) = plan.const_values.get(&dim_vid) {
10580 axis = match cv {
10581 Value::Num(n) if *n >= 1.0 => (*n as usize).saturating_sub(1),
10582 Value::Int(i) => (i.to_f64() as usize).saturating_sub(1),
10583 _ => axis,
10584 };
10585 } else if let Some(input_idx) = plan.inputs.iter().position(|v| *v == dim_vid) {
10586 if let Some(cv) = plan.constants.get(&input_idx) {
10587 axis = match cv {
10588 Value::Num(n) if *n >= 1.0 => (*n as usize).saturating_sub(1),
10589 Value::Int(i) => (i.to_f64() as usize).saturating_sub(1),
10590 _ => axis,
10591 };
10592 }
10593 }
10594 } else {
10595 if let Some(dim_const) = plan.constants.get(&1) {
10597 axis = match dim_const {
10598 Value::Num(n) if *n >= 1.0 => (*n as usize).saturating_sub(1),
10599 Value::Int(i) => (i.to_f64() as usize).saturating_sub(1),
10600 _ => axis,
10601 };
10602 }
10603 }
10604 }
10605 let (reduce_len, num_slices) = {
10606 let mut rows_cols: Option<(usize, usize)> = None;
10608 if let Some(shape) = plan.reduction_data_shape(graph) {
10610 if shape.len() >= 2 {
10611 rows_cols = Some((shape[0].max(1), shape[1].max(1)));
10612 } else if shape.len() == 1 {
10613 rows_cols = Some((shape[0].max(1), 1));
10614 }
10615 }
10616 if rows_cols.is_none() {
10618 for &vid in &plan.inputs {
10619 if let Some(binding) = graph.var_binding(vid) {
10620 let value_opt = match binding.kind {
10621 VarKind::Global => vars.get(binding.index).cloned(),
10622 VarKind::Local => {
10623 if let Some(frame) = context.call_stack.last() {
10624 let absolute = frame.locals_start + binding.index;
10625 context.locals.get(absolute).cloned()
10626 } else {
10627 vars.get(binding.index).cloned()
10628 }
10629 }
10630 };
10631 if let Some(value) = value_opt {
10632 match value {
10633 Value::GpuTensor(h) => {
10634 rows_cols = Some((
10635 h.shape.first().copied().unwrap_or(1).max(1),
10636 h.shape.get(1).copied().unwrap_or(1).max(1),
10637 ));
10638 break;
10639 }
10640 Value::Tensor(t) => {
10641 rows_cols = Some((
10642 t.shape.first().copied().unwrap_or(1).max(1),
10643 t.shape.get(1).copied().unwrap_or(1).max(1),
10644 ));
10645 break;
10646 }
10647 _ => {}
10648 }
10649 }
10650 }
10651 }
10652 }
10653 for v in consumed.iter().filter_map(|v| v.as_ref()) {
10655 match v {
10656 Value::GpuTensor(h) => {
10657 rows_cols = Some((
10658 h.shape.first().copied().unwrap_or(1).max(1),
10659 h.shape.get(1).copied().unwrap_or(1).max(1),
10660 ));
10661 break;
10662 }
10663 Value::Tensor(t) => {
10664 rows_cols = Some((
10665 t.shape.first().copied().unwrap_or(1).max(1),
10666 t.shape.get(1).copied().unwrap_or(1).max(1),
10667 ));
10668 break;
10669 }
10670 _ => {}
10671 }
10672 }
10673 let data_value_id: Option<runmat_accelerate::graph::ValueId> = plan.reduction_data;
10674
10675 if let Some(data_id) = data_value_id {
10676 if let Some(input_index) = plan.inputs.iter().position(|vid| *vid == data_id) {
10678 if let Some(stack_offset) = plan
10680 .stack_pattern
10681 .iter()
10682 .position(|&idx| idx == input_index)
10683 {
10684 if let Some(val) = consumed.get(stack_offset).and_then(|v| v.as_ref()) {
10685 match val {
10686 Value::GpuTensor(h) => {
10687 let r = h.shape.first().copied().unwrap_or(1).max(1);
10688 let c = h.shape.get(1).copied().unwrap_or(1).max(1);
10689 rows_cols = Some((r, c));
10690 }
10691 Value::Tensor(t) => {
10692 let r = t.shape.first().copied().unwrap_or(1).max(1);
10693 let c = t.shape.get(1).copied().unwrap_or(1).max(1);
10694 rows_cols = Some((r, c));
10695 }
10696 _ => {}
10697 }
10698 }
10699 }
10700 if rows_cols.is_none() {
10702 if let Some(val) = request.inputs.get(input_index) {
10703 match val {
10704 Value::GpuTensor(h) => {
10705 let r = h.shape.first().copied().unwrap_or(1).max(1);
10706 let c = h.shape.get(1).copied().unwrap_or(1).max(1);
10707 rows_cols = Some((r, c));
10708 }
10709 Value::Tensor(t) => {
10710 let r = t.shape.first().copied().unwrap_or(1).max(1);
10711 let c = t.shape.get(1).copied().unwrap_or(1).max(1);
10712 rows_cols = Some((r, c));
10713 }
10714 _ => {}
10715 }
10716 }
10717 }
10718 }
10719 if rows_cols.is_none() {
10720 if let Some(info) = graph.value(data_id) {
10721 if let ValueOrigin::Variable { kind, index } = &info.origin {
10723 let val = match kind {
10724 VarKind::Global => vars.get(*index).cloned(),
10725 VarKind::Local => {
10726 if let Some(frame) = context.call_stack.last() {
10727 let absolute = frame.locals_start + index;
10728 context.locals.get(absolute).cloned()
10729 } else {
10730 vars.get(*index).cloned()
10731 }
10732 }
10733 };
10734 if let Some(v) = val {
10735 match v {
10736 Value::GpuTensor(h) => {
10737 rows_cols = Some((
10738 h.shape.first().copied().unwrap_or(1).max(1),
10739 h.shape.get(1).copied().unwrap_or(1).max(1),
10740 ));
10741 }
10742 Value::Tensor(t) => {
10743 rows_cols = Some((
10744 t.shape.first().copied().unwrap_or(1).max(1),
10745 t.shape.get(1).copied().unwrap_or(1).max(1),
10746 ));
10747 }
10748 _ => {}
10749 }
10750 }
10751 }
10752 if rows_cols.is_none() {
10753 if let ShapeInfo::Tensor(dims) = &info.shape {
10754 if !dims.is_empty() {
10755 let r = dims.first().and_then(|d| *d).unwrap_or(1);
10756 let c = dims.get(1).and_then(|d| *d).unwrap_or(1);
10757 rows_cols = Some((r.max(1), c.max(1)));
10758 }
10759 }
10760 }
10761 }
10762 }
10763 }
10764
10765 if rows_cols.is_none() {
10767 for v in consumed.iter().filter_map(|v| v.as_ref()) {
10768 match v {
10769 Value::GpuTensor(h) => {
10770 rows_cols = Some((
10771 h.shape.first().copied().unwrap_or(1).max(1),
10772 h.shape.get(1).copied().unwrap_or(1).max(1),
10773 ));
10774 break;
10775 }
10776 Value::Tensor(t) => {
10777 rows_cols = Some((
10778 t.shape.first().copied().unwrap_or(1).max(1),
10779 t.shape.get(1).copied().unwrap_or(1).max(1),
10780 ));
10781 break;
10782 }
10783 _ => {}
10784 }
10785 }
10786 if rows_cols.is_none() {
10787 for v in &request.inputs {
10788 match v {
10789 Value::GpuTensor(h) => {
10790 rows_cols = Some((
10791 h.shape.first().copied().unwrap_or(1).max(1),
10792 h.shape.get(1).copied().unwrap_or(1).max(1),
10793 ));
10794 break;
10795 }
10796 Value::Tensor(t) => {
10797 rows_cols = Some((
10798 t.shape.first().copied().unwrap_or(1).max(1),
10799 t.shape.get(1).copied().unwrap_or(1).max(1),
10800 ));
10801 break;
10802 }
10803 _ => {}
10804 }
10805 }
10806 }
10807 }
10808 if rows_cols.is_none() {
10810 if let ShapeInfo::Tensor(dims) = &plan.group.shape {
10811 if !dims.is_empty() {
10812 let r = dims.first().and_then(|d| *d).unwrap_or(1);
10813 let c = dims.get(1).and_then(|d| *d).unwrap_or(1);
10814 rows_cols = Some((r.max(1), c.max(1)));
10815 }
10816 }
10817 }
10818
10819 let (r, c) = rows_cols.unwrap_or((1, 1));
10820 if reduce_all {
10821 let mut total_elems: Option<usize> = None;
10822 let mut total_from_operand = false;
10823 if let Some(shape) = plan.reduction_data_shape(graph) {
10825 let prod = shape.into_iter().fold(1usize, |acc, dim| {
10826 let d = dim.max(1);
10827 acc.saturating_mul(d)
10828 });
10829 total_from_operand = true;
10830 total_elems = Some(prod.max(1));
10831 }
10832 if total_elems.is_none() {
10834 let inspect_value = |value: &Value| -> Option<usize> {
10835 match value {
10836 Value::GpuTensor(handle) => {
10837 if handle.shape.is_empty() {
10838 Some(1)
10839 } else {
10840 Some(
10841 handle
10842 .shape
10843 .iter()
10844 .copied()
10845 .map(|d| d.max(1))
10846 .fold(1usize, |acc, dim| acc.saturating_mul(dim)),
10847 )
10848 }
10849 }
10850 Value::Tensor(tensor) => {
10851 if tensor.shape.is_empty() {
10852 Some(1)
10853 } else {
10854 Some(
10855 tensor
10856 .shape
10857 .iter()
10858 .copied()
10859 .map(|d| d.max(1))
10860 .fold(1usize, |acc, dim| acc.saturating_mul(dim)),
10861 )
10862 }
10863 }
10864 _ => None,
10865 }
10866 };
10867 for value in consumed.iter().filter_map(|v| v.as_ref()) {
10868 if let Some(prod) = inspect_value(value) {
10869 total_from_operand = true;
10870 total_elems = Some(prod.max(1));
10871 break;
10872 }
10873 }
10874 if total_elems.is_none() {
10875 for value in &request.inputs {
10876 if let Some(prod) = inspect_value(value) {
10877 total_from_operand = true;
10878 total_elems = Some(prod.max(1));
10879 break;
10880 }
10881 }
10882 }
10883 }
10884 if total_elems.is_none() {
10886 if let Some(ec) = plan.element_count() {
10887 total_elems = Some(ec.max(1));
10888 }
10889 }
10890 if total_elems.is_none() || !total_from_operand {
10891 if fusion_debug_enabled() {
10892 log::debug!(
10893 "fusion reduction (all): operand extent unknown (source: {:?}); falling back to provider path",
10894 if total_from_operand { "runtime" } else { "output_shape" }
10895 );
10896 }
10897 return Err("fusion: reduction all extent unknown".to_string());
10898 }
10899 let total = total_elems.unwrap();
10900 if fusion_debug_enabled() {
10901 log::debug!(
10902 "fusion reduction (all): total_elems={} fallback_rows={} fallback_cols={}",
10903 total,
10904 r,
10905 c
10906 );
10907 }
10908 (total, 1usize)
10909 } else {
10910 if fusion_debug_enabled() {
10911 if r == 1 && c == 1 {
10912 log::debug!(
10913 "fusion reduction: unresolved shape (defaulted to 1x1); axis={}, constants={:?}",
10914 axis, plan.constants
10915 );
10916 } else {
10917 log::debug!(
10918 "fusion reduction: resolved shape rows={} cols={} axis={} constants={:?}",
10919 r,
10920 c,
10921 axis,
10922 plan.constants
10923 );
10924 }
10925 }
10926 if axis == 0 {
10927 (r, c)
10928 } else {
10929 (c, r)
10930 }
10931 }
10932 };
10933 if fusion_debug_enabled() {
10934 log::debug!(
10935 "fusion reduction: axis={} reduce_len={} num_slices={} constants={:?}",
10936 axis,
10937 reduce_len,
10938 num_slices,
10939 plan.constants
10940 );
10941 }
10942 if log::log_enabled!(log::Level::Debug) && fusion_debug_enabled() {
10943 let _rt_inputs: Vec<String> = request
10944 .inputs
10945 .iter()
10946 .enumerate()
10947 .map(|(i, v)| summarize_value(i, v))
10948 .collect();
10949 let _plan_inputs: Vec<String> = plan
10950 .inputs
10951 .iter()
10952 .map(|vid| {
10953 if let Some(info) = graph.value(*vid) {
10954 format!(
10955 "vid={} origin={:?} shape={:?}",
10956 vid, info.origin, info.shape
10957 )
10958 } else {
10959 format!("vid={} origin=<missing>", vid)
10960 }
10961 })
10962 .collect();
10963 log::debug!("reduction inputs: [{}]", _rt_inputs.join(", "));
10965 }
10966 let looks_wrong = reduce_len == 1 && num_slices == 1 && {
10968 let mut big = false;
10969 let mut check_val = |v: &Value| match v {
10970 Value::GpuTensor(h) => {
10971 let prod = h.shape.iter().copied().product::<usize>();
10972 if prod > 1 {
10973 big = true;
10974 }
10975 }
10976 Value::Tensor(t) => {
10977 let prod = t.shape.iter().copied().product::<usize>();
10978 if prod > 1 {
10979 big = true;
10980 }
10981 }
10982 _ => {}
10983 };
10984 for v in consumed.iter().filter_map(|v| v.as_ref()) {
10985 check_val(v);
10986 }
10987 for v in &request.inputs {
10988 check_val(v);
10989 }
10990 big
10991 };
10992 if looks_wrong {
10993 log::debug!(
10994 "fusion reduction: skipping fusion due to unresolved shape; falling back to provider path"
10995 );
10996 return Err("fusion: reduction shape unresolved".to_string());
10997 }
10998
10999 if std::env::var("RUNMAT_DISABLE_FUSED_REDUCTION")
11001 .ok()
11002 .as_deref()
11003 == Some("1")
11004 {
11005 return Err("fusion: fused reductions disabled".to_string());
11006 }
11007 let workgroup_size = 256u32;
11008 if log::log_enabled!(log::Level::Debug) && fusion_debug_enabled() {
11009 let _rt_inputs: Vec<String> = request
11010 .inputs
11011 .iter()
11012 .enumerate()
11013 .map(|(i, v)| summarize_value(i, v))
11014 .collect();
11015 let _plan_inputs: Vec<String> = plan
11016 .inputs
11017 .iter()
11018 .map(|vid| {
11019 if let Some(info) = graph.value(*vid) {
11020 format!(
11021 "vid={} origin={:?} shape={:?}",
11022 vid, info.origin, info.shape
11023 )
11024 } else {
11025 format!("vid={} origin=<missing>", vid)
11026 }
11027 })
11028 .collect();
11029 log::debug!(
11030 "reduction axis={} reduce_len={} num_slices={}",
11031 axis,
11032 reduce_len,
11033 num_slices
11034 );
11035 }
11036 match execute_reduction(request, reduce_len, num_slices, workgroup_size) {
11037 Ok(result) => {
11038 stack_guard.commit();
11039 Ok(result)
11040 }
11041 Err(err) => Err(err.to_string()),
11042 }
11043 } else if plan.group.kind == FusionKind::CenteredGram {
11044 match execute_centered_gram(request) {
11045 Ok(result) => {
11046 stack_guard.commit();
11047 Ok(result)
11048 }
11049 Err(err) => Err(err.to_string()),
11050 }
11051 } else if plan.group.kind == FusionKind::PowerStepNormalize {
11052 match execute_power_step_normalize(request) {
11053 Ok(result) => {
11054 stack_guard.commit();
11055 Ok(result)
11056 }
11057 Err(err) => Err(err.to_string()),
11058 }
11059 } else if plan.group.kind == FusionKind::ExplainedVariance {
11060 log::debug!("explained variance plan inputs {:?}", plan.inputs);
11061 match execute_explained_variance(request) {
11062 Ok(result) => {
11063 stack_guard.commit();
11064 Ok(result)
11065 }
11066 Err(err) => {
11067 log::debug!("explained variance fusion fallback: {}", err);
11068 Err(err.to_string())
11069 }
11070 }
11071 } else if plan.group.kind == FusionKind::MatmulEpilogue {
11072 match execute_matmul_epilogue(request) {
11073 Ok(result) => {
11074 stack_guard.commit();
11075 Ok(result)
11076 }
11077 Err(err) => Err(err.to_string()),
11078 }
11079 } else if plan.group.kind == FusionKind::ImageNormalize {
11080 match execute_image_normalize(request) {
11081 Ok(result) => {
11082 stack_guard.commit();
11083 Ok(result)
11084 }
11085 Err(err) => Err(err.to_string()),
11086 }
11087 } else {
11088 Err("fusion: unsupported fusion kind".to_string())
11090 }
11091}
11092
11093#[cfg(feature = "native-accel")]
11094fn clear_residency(value: &Value) {
11095 if let Value::GpuTensor(handle) = value {
11096 fusion_residency::clear(handle);
11097 }
11098}
11099
11100fn parse_exception(err: &str) -> runmat_builtins::MException {
11101 if let Some(idx) = err.rfind(": ") {
11103 let (id, msg) = err.split_at(idx);
11104 let message = msg.trim_start_matches(':').trim().to_string();
11105 let ident = if id.trim().is_empty() {
11106 format!("{ERROR_NAMESPACE}:error")
11107 } else {
11108 id.trim().to_string()
11109 };
11110 return runmat_builtins::MException::new(ident, message);
11111 }
11112 if let Some(idx) = err.rfind(':') {
11114 let (id, msg) = err.split_at(idx);
11115 let message = msg.trim_start_matches(':').trim().to_string();
11116 let ident = if id.trim().is_empty() {
11117 format!("{ERROR_NAMESPACE}:error")
11118 } else {
11119 id.trim().to_string()
11120 };
11121 runmat_builtins::MException::new(ident, message)
11122 } else {
11123 runmat_builtins::MException::new(format!("{ERROR_NAMESPACE}:error"), err.to_string())
11124 }
11125}
11126
11127pub fn interpret(bytecode: &Bytecode) -> Result<Vec<Value>, String> {
11129 let mut vars = vec![Value::Num(0.0); bytecode.var_count];
11130 interpret_with_vars(bytecode, &mut vars, Some("<main>"))
11131}
11132
11133pub fn interpret_function(bytecode: &Bytecode, vars: Vec<Value>) -> Result<Vec<Value>, String> {
11134 interpret_function_with_counts(bytecode, vars, "<anonymous>", 0, 0)
11136}
11137
11138fn interpret_function_with_counts(
11139 bytecode: &Bytecode,
11140 mut vars: Vec<Value>,
11141 name: &str,
11142 out_count: usize,
11143 in_count: usize,
11144) -> Result<Vec<Value>, String> {
11145 let res = CALL_COUNTS.with(|cc| {
11147 cc.borrow_mut().push((in_count, out_count));
11148 let r = interpret_with_vars(bytecode, &mut vars, Some(name));
11149 cc.borrow_mut().pop();
11150 r
11151 });
11152 let func_name = name.to_string();
11154 for instr in &bytecode.instructions {
11155 match instr {
11156 crate::instr::Instr::DeclarePersistent(indices) => {
11157 for &i in indices {
11158 if i < vars.len() {
11159 let key = (func_name.clone(), i);
11160 PERSISTENTS.with(|p| {
11161 p.borrow_mut().insert(key, vars[i].clone());
11162 });
11163 }
11164 }
11165 }
11166 crate::instr::Instr::DeclarePersistentNamed(indices, names) => {
11167 for (pos, &i) in indices.iter().enumerate() {
11168 if i < vars.len() {
11169 let key = (func_name.clone(), i);
11170 let name_key = (
11171 func_name.clone(),
11172 names
11173 .get(pos)
11174 .cloned()
11175 .unwrap_or_else(|| format!("var_{i}")),
11176 );
11177 let val = vars[i].clone();
11178 PERSISTENTS.with(|p| {
11179 p.borrow_mut().insert(key, val.clone());
11180 });
11181 PERSISTENTS_BY_NAME.with(|p| {
11182 p.borrow_mut().insert(name_key, val);
11183 });
11184 }
11185 }
11186 }
11187 _ => {}
11188 }
11189 }
11190 res
11191}