minarrow 0.10.0

Apache Arrow-compatible, Rust-first columnar data library for high-performance computing, native streaming, and embedded workloads. Minimal dependencies, ultra-low-latency access, automatic 64-byte SIMD alignment, and fast compile times. Great for real-time analytics, HPC pipelines, and systems integration.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
// Copyright 2025 Peter Garfield Bower
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! # **Matrix Module** - *De-facto Matrix Memory Layout for BLAS/LAPACK ecosystem compatibility*
//!
//! Dense column-major matrix type for high-performance linear algebra.
//! BLAS/LAPACK compatible with built-inconversions from `Table` data.

use std::fmt;
use std::sync::Arc;

use crate::enums::error::MinarrowError;
use crate::enums::shape_dim::ShapeDim;
use crate::structs::buffer::Buffer;
use crate::structs::shared_buffer::SharedBuffer;
use crate::traits::{concatenate::Concatenate, shape::Shape};
use crate::{Array, Field, FieldArray, FloatArray, NumericArray, Table, Vec64};

/// # Matrix
///
/// Column-major dense matrix.
///
/// ### Description
/// This struct is compatible with Arrow, LAPACK, BLAS, and all
/// column-major numeric routines.
///
/// **This is an optional extra enabled by the `matrix` feature,
/// and is not part of the *`Apache Arrow`* framework**.
///
/// ### Properties
/// - `n_rows`: Logical number of rows.
/// - `n_cols`: Number of columns.
/// - `stride`: Physical elements per column in the buffer. Padded to 8-element
///   (64-byte) boundaries so every column starts SIMD-aligned. This is the
///   BLAS leading dimension (lda). Always `>= n_rows`.
/// - `data`: Flat buffer in column-major order with stride padding.
/// - `name`: Optional matrix name for diagnostics.
///
/// ### Null handling
/// - It is dense - nulls can be represented through `f64::NAN`
/// - However this is not always reliable, as a single *NaN* can affect vectorised
/// calculations when integrating with various frameworks.
#[repr(C, align(64))]
#[derive(Clone, PartialEq)]
pub struct Matrix {
    pub n_rows: usize,
    pub n_cols: usize,
    /// Physical column stride in elements, padded so each column is 64-byte aligned.
    pub stride: usize,
    pub data: Vec64<f64>,
    pub name: Option<String>,
}

/// Number of f64 elements per 64-byte alignment boundary.
const ALIGN_ELEMS: usize = 64 / std::mem::size_of::<f64>(); // 8

/// Round up to next multiple of ALIGN_ELEMS for 64-byte column alignment.
#[inline]
const fn aligned_stride(n_rows: usize) -> usize {
    (n_rows + ALIGN_ELEMS - 1) & !(ALIGN_ELEMS - 1)
}

impl Matrix {
    /// Constructs a new dense Matrix with shape and optional name.
    /// Data buffer is zeroed. Columns are padded to 64-byte alignment.
    pub fn new(n_rows: usize, n_cols: usize, name: Option<String>) -> Self {
        let stride = aligned_stride(n_rows);
        let len = stride * n_cols;
        let mut data = Vec64::with_capacity(len);
        data.0.resize(len, 0.0);
        Matrix { n_rows, n_cols, stride, data, name }
    }

    /// Constructs a Matrix from a pre-padded Vec64 buffer.
    /// The buffer must already have `stride * n_cols` elements with the correct
    /// stride layout. Use `from_f64_unaligned` if your data is unpadded.
    pub fn from_f64_aligned(data: Vec64<f64>, n_rows: usize, n_cols: usize, name: Option<String>) -> Self {
        let stride = aligned_stride(n_rows);
        assert_eq!(
            data.len(),
            stride * n_cols,
            "Matrix: padded buffer length does not match stride * n_cols"
        );
        Matrix { n_rows, n_cols, stride, data, name }
    }

    /// Constructs a Matrix from a flat column-major buffer without stride padding.
    /// The data is re-laid out with 64-byte aligned column padding.
    pub fn from_f64_unaligned(src: &[f64], n_rows: usize, n_cols: usize, name: Option<String>) -> Self {
        assert_eq!(
            src.len(),
            n_rows * n_cols,
            "Matrix shape does not match buffer length"
        );
        let stride = aligned_stride(n_rows);
        if stride == n_rows {
            // No padding needed, take ownership directly
            let data = Vec64::from(src);
            return Matrix { n_rows, n_cols, stride, data, name };
        }
        // Re-layout with padding between columns
        let mut data = Vec64::with_capacity(stride * n_cols);
        data.0.resize(stride * n_cols, 0.0);
        for col in 0..n_cols {
            let src_start = col * n_rows;
            let dst_start = col * stride;
            data.as_mut_slice()[dst_start..dst_start + n_rows]
                .copy_from_slice(&src[src_start..src_start + n_rows]);
        }
        Matrix { n_rows, n_cols, stride, data, name }
    }

    /// Returns the value at (row, col) (0-based). Panics if out of bounds.
    #[inline]
    pub fn get(&self, row: usize, col: usize) -> f64 {
        debug_assert!(row < self.n_rows, "Row out of bounds");
        debug_assert!(col < self.n_cols, "Col out of bounds");
        self.data[col * self.stride + row]
    }

    /// Sets the value at (row, col) (0-based). Panics if out of bounds.
    #[inline]
    pub fn set(&mut self, row: usize, col: usize, value: f64) {
        debug_assert!(row < self.n_rows, "Row out of bounds");
        debug_assert!(col < self.n_cols, "Col out of bounds");
        self.data[col * self.stride + row] = value;
    }

    /// Returns true if the matrix is empty.
    #[inline]
    pub fn is_empty(&self) -> bool {
        self.n_rows == 0 || self.n_cols == 0
    }

    /// Returns the logical number of elements (n_rows * n_cols), not including padding.
    #[inline]
    pub fn len(&self) -> usize {
        self.n_rows * self.n_cols
    }

    /// Returns an immutable reference to the full flat buffer including padding.
    #[inline]
    pub fn as_slice(&self) -> &[f64] {
        &self.data
    }

    /// Returns a mutable reference to the full flat buffer including padding.
    #[inline]
    pub fn as_mut_slice(&mut self) -> &mut [f64] {
        &mut self.data
    }

    /// Returns a view of the matrix as a slice of columns (logical rows only, no padding).
    pub fn columns(&self) -> Vec<&[f64]> {
        (0..self.n_cols)
            .map(|col| &self.data[(col * self.stride)..(col * self.stride + self.n_rows)])
            .collect()
    }

    /// Returns a vector of mutable slices, each corresponding to a column.
    pub fn columns_mut(&mut self) -> Vec<&mut [f64]> {
        let n_rows = self.n_rows;
        let stride = self.stride;
        let n_cols = self.n_cols;
        let ptr = self.data.as_mut_slice().as_mut_ptr();
        let mut result = Vec::with_capacity(n_cols);

        for col in 0..n_cols {
            let start = col * stride;
            // SAFETY: each slice is within bounds and non-overlapping,
            // we have exclusive &mut access to self.
            unsafe {
                let col_ptr = ptr.add(start);
                let slice = std::slice::from_raw_parts_mut(col_ptr, n_rows);
                result.push(slice);
            }
        }
        result
    }

    /// Returns a single column as a slice, panics if col out of bounds.
    #[inline]
    pub fn col(&self, col: usize) -> &[f64] {
        debug_assert!(col < self.n_cols, "Col out of bounds");
        &self.data[(col * self.stride)..(col * self.stride + self.n_rows)]
    }

    /// Returns a single column as a mutable slice, panics if col out of bounds.
    #[inline]
    pub fn col_mut(&mut self, col: usize) -> &mut [f64] {
        debug_assert!(col < self.n_cols, "Col out of bounds");
        let start = col * self.stride;
        &mut self.data[start..start + self.n_rows]
    }

    /// Returns a single row as an owned Vec.
    #[inline]
    pub fn row(&self, row: usize) -> Vec<f64> {
        debug_assert!(row < self.n_rows, "Row out of bounds");
        (0..self.n_cols).map(|col| self.data[col * self.stride + row]).collect()
    }

    /// Sets the matrix name.
    #[inline]
    pub fn set_name(&mut self, name: impl Into<String>) {
        self.name = Some(name.into());
    }

    /// Returns the number of columns.
    pub fn n_cols(&self) -> usize {
        self.n_cols
    }

    /// Returns the number of rows.
    #[inline]
    pub fn n_rows(&self) -> usize {
        self.n_rows
    }

    // ********************** BLAS/LAPACK Compatibility **************

    /// Number of rows as i32 for BLAS parameter passing.
    #[inline]
    pub fn m(&self) -> i32 {
        self.n_rows as i32
    }

    /// Number of columns as i32 for BLAS parameter passing.
    #[inline]
    pub fn n(&self) -> i32 {
        self.n_cols as i32
    }

    /// Leading dimension for BLAS - equals stride, which is n_rows padded to
    /// 64-byte alignment. Pass this as the `lda` parameter to all BLAS/LAPACK calls.
    #[inline]
    pub fn lda(&self) -> i32 {
        self.stride as i32
    }

    // ********************** Table conversion **********************

    /// Convert this Matrix into a Table with zero-copy column sharing.
    ///
    /// The matrix data buffer is frozen into a SharedBuffer, and each column
    /// becomes a FloatArray backed by a window into that shared allocation.
    /// No data is copied.
    ///
    /// `fields` must have exactly `n_cols` entries, providing the name and
    /// metadata for each column.
    pub fn to_table(self, fields: Vec<Field>) -> Result<Table, MinarrowError> {
        if fields.len() != self.n_cols {
            return Err(MinarrowError::ShapeError {
                message: format!(
                    "to_table: expected {} fields for {} columns, got {}",
                    self.n_cols, self.n_cols, fields.len()
                ),
            });
        }

        let n_rows = self.n_rows;
        let n_cols = self.n_cols;
        let stride = self.stride;

        // Freeze the Vec64<f64> into a SharedBuffer (zero-copy, refcounted)
        // SAFETY: f64 is plain data with no drop logic
        let shared = unsafe { SharedBuffer::from_vec64_typed(self.data) };

        let mut cols = Vec::with_capacity(n_cols);
        for (i, field) in fields.into_iter().enumerate() {
            // Each column starts at i * stride elements, which is 64-byte aligned
            let col_offset = i * stride;
            let buf: Buffer<f64> = Buffer::from_shared_column(shared.clone(), col_offset, n_rows);
            let float_arr = FloatArray::new(buf, None);
            let array = Array::NumericArray(NumericArray::Float64(Arc::new(float_arr)));
            cols.push(FieldArray::new(field, array));
        }

        Ok(Table::new(self.name.unwrap_or_default(), Some(cols)))
    }

    /// Convert this Matrix into a Table using auto-generated column names
    /// (col_0, col_1, ...).
    pub fn to_table_gen(self) -> Table {
        let n_cols = self.n_cols;
        let fields: Vec<Field> = (0..n_cols)
            .map(|i| Field::new(format!("col_{}", i), crate::ffi::arrow_dtype::ArrowType::Float64, false, None))
            .collect();
        self.to_table(fields).unwrap()
    }
}

impl Shape for Matrix {
    fn shape(&self) -> ShapeDim {
        ShapeDim::Rank2 {
            rows: self.n_rows(),
            cols: self.n_cols(),
        }
    }
}

impl Concatenate for Matrix {
    /// Concatenates two matrices vertically (i.e., row-wise stacking).
    ///
    /// # Requirements
    /// - Both matrices must have the same number of columns
    ///
    /// # Returns
    /// A new Matrix with rows from `self` followed by rows from `other`
    ///
    /// # Errors
    /// - `IncompatibleTypeError` if column counts don't match
    fn concat(self, other: Self) -> Result<Self, MinarrowError> {
        // Check column count
        if self.n_cols != other.n_cols {
            return Err(MinarrowError::IncompatibleTypeError {
                from: "Matrix",
                to: "Matrix",
                message: Some(format!(
                    "Cannot concatenate matrices with different column counts: {} vs {}",
                    self.n_cols, other.n_cols
                )),
            });
        }

        // Handle empty matrices
        if self.is_empty() && other.is_empty() {
            return Ok(Matrix::new(
                0,
                0,
                None,
            ));
        }

        let result_n_rows = self.n_rows + other.n_rows;
        let result_n_cols = self.n_cols;
        let result_stride = aligned_stride(result_n_rows);
        let pad = result_stride - result_n_rows;
        let mut result_data = Vec64::with_capacity(result_stride * result_n_cols);

        for col in 0..result_n_cols {
            result_data.extend_from_slice(self.col(col));
            result_data.extend_from_slice(other.col(col));
            if pad > 0 {
                result_data.extend_from_slice(&[0.0; ALIGN_ELEMS][..pad]);
            }
        }

        Ok(Matrix {
            n_rows: result_n_rows,
            n_cols: result_n_cols,
            stride: result_stride,
            data: result_data,
            name: None,
        })
    }
}

// Pretty print
impl fmt::Debug for Matrix {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(
            f,
            "Matrix{}: {} × {} [col-major]",
            self.name.as_deref().map_or(String::new(), |n| format!(" '{}'", n)),
            self.n_rows, self.n_cols
        )?;
        for row in 0..self.n_rows.min(6) {
            // Print up to 6 rows
            write!(f, "\n[")?;
            for col in 0..self.n_cols.min(8) {
                // Print up to 8 cols
                write!(f, " {:8.4}", self.get(row, col))?;
                if col != self.n_cols - 1 {
                    write!(f, ",")?;
                }
            }
            if self.n_cols > 8 {
                write!(f, " ...")?;
            }
            write!(f, " ]")?;
        }
        if self.n_rows > 6 {
            write!(f, "\n...")?;
        }
        Ok(())
    }
}

// From Vec<FloatArray<f64>> to unnamed Matrix
impl From<Vec<FloatArray<f64>>> for Matrix {
    fn from(columns: Vec<FloatArray<f64>>) -> Self {
        let n_cols = columns.len();
        let n_rows = columns.first().map(|c| c.data.len()).unwrap_or(0);
        let stride = aligned_stride(n_rows);
        let pad = stride - n_rows;
        for col in &columns {
            assert_eq!(col.data.len(), n_rows, "Column length mismatch");
        }
        let mut data = Vec64::with_capacity(stride * n_cols);
        for col in &columns {
            data.extend_from_slice(&col.data);
            if pad > 0 {
                data.extend_from_slice(&[0.0; ALIGN_ELEMS][..pad]);
            }
        }
        Matrix { n_rows, n_cols, stride, data, name: None }
    }
}

// From (Vec<FloatArray<f64>>, String) to named Matrix
impl From<(Vec<FloatArray<f64>>, String)> for Matrix {
    fn from((columns, name): (Vec<FloatArray<f64>>, String)) -> Self {
        let mut mat = Matrix::from(columns);
        mat.name = Some(name);
        mat
    }
}

// From &[FloatArray<f64>] to unnamed Matrix
impl From<&[FloatArray<f64>]> for Matrix {
    fn from(columns: &[FloatArray<f64>]) -> Self {
        let n_cols = columns.len();
        let n_rows = columns.first().map(|c| c.data.len()).unwrap_or(0);
        let stride = aligned_stride(n_rows);
        let pad = stride - n_rows;
        for col in columns {
            assert_eq!(col.data.len(), n_rows, "Column length mismatch");
        }
        let mut data = Vec64::with_capacity(stride * n_cols);
        for col in columns {
            data.extend_from_slice(&col.data);
            if pad > 0 {
                data.extend_from_slice(&[0.0; ALIGN_ELEMS][..pad]);
            }
        }
        Matrix { n_rows, n_cols, stride, data, name: None }
    }
}

impl TryFrom<&Table> for Matrix {
    type Error = MinarrowError;

    fn try_from(table: &Table) -> Result<Self, Self::Error> {
        let name = if table.name.is_empty() { None } else { Some(table.name.clone()) };
        let n_cols = table.n_cols();
        let n_rows = table.n_rows;
        let stride = aligned_stride(n_rows);
        let pad = stride - n_rows;

        let mut data = Vec64::with_capacity(stride * n_cols);
        for (col_idx, fa) in table.cols.iter().enumerate() {
            let numeric = fa.array.num_ref().map_err(|_| MinarrowError::TypeError {
                from: "non-numeric",
                to: "Float64",
                message: Some(format!("column {} is not numeric", col_idx)),
            })?;
            let f64_arr = numeric.clone().f64()?;
            if f64_arr.data.len() != n_rows {
                return Err(MinarrowError::ColumnLengthMismatch {
                    col: col_idx,
                    expected: n_rows,
                    found: f64_arr.data.len(),
                });
            }
            data.extend_from_slice(f64_arr.data.as_slice());
            if pad > 0 {
                data.extend_from_slice(&[0.0; ALIGN_ELEMS][..pad]);
            }
        }

        Ok(Matrix { n_rows, n_cols, stride, data, name })
    }
}

impl TryFrom<Table> for Matrix {
    type Error = MinarrowError;

    fn try_from(table: Table) -> Result<Self, Self::Error> {
        Matrix::try_from(&table)
    }
}

// From &[Vec<f64>] (Vec-of-cols) to unnamed Matrix
impl From<&[Vec<f64>]> for Matrix {
    fn from(columns: &[Vec<f64>]) -> Self {
        let n_cols = columns.len();
        let n_rows = columns.first().map(|c| c.len()).unwrap_or(0);
        let stride = aligned_stride(n_rows);
        let pad = stride - n_rows;
        for col in columns {
            assert_eq!(col.len(), n_rows, "Column length mismatch");
        }
        let mut data = Vec64::with_capacity(stride * n_cols);
        for col in columns {
            data.extend_from_slice(col);
            if pad > 0 {
                data.extend_from_slice(&[0.0; ALIGN_ELEMS][..pad]);
            }
        }
        Matrix { n_rows, n_cols, stride, data, name: None }
    }
}

// From flat unpadded slice with shape - re-lays out with stride padding
impl<'a> From<(&'a [f64], usize, usize, Option<String>)> for Matrix {
    fn from((slice, n_rows, n_cols, name): (&'a [f64], usize, usize, Option<String>)) -> Self {
        assert_eq!(slice.len(), n_rows * n_cols, "Slice shape mismatch");
        Matrix::from_f64_unaligned(slice, n_rows, n_cols, name)
    }
}

// ********************** Iterators ***********************

impl<'a> IntoIterator for &'a Matrix {
    type Item = &'a f64;
    type IntoIter = std::slice::Iter<'a, f64>;
    #[inline]
    fn into_iter(self) -> Self::IntoIter {
        self.data.iter()
    }
}

impl<'a> IntoIterator for &'a mut Matrix {
    type Item = &'a mut f64;
    type IntoIter = std::slice::IterMut<'a, f64>;
    #[inline]
    fn into_iter(self) -> Self::IntoIter {
        self.data.iter_mut()
    }
}

impl IntoIterator for Matrix {
    type Item = f64;
    type IntoIter = <Vec64<f64> as IntoIterator>::IntoIter;
    #[inline]
    fn into_iter(self) -> Self::IntoIter {
        self.data.into_iter()
    }
}