Skip to main content

jellyfish_reader/
matrix.rs

1use crate::error::{Error, Result};
2
3/// A rectangular binary matrix over GF(2), used by Jellyfish for hash position computation.
4///
5/// The matrix has dimensions `rows × cols` where `rows ≤ 64`. It is stored in
6/// column-major format: each column is a single `u64` word. Matrix-vector
7/// multiplication is done using XOR operations.
8///
9/// This is used to compute the expected position of a k-mer in the binary/sorted
10/// file format, enabling efficient binary search for random access queries.
11#[derive(Debug, Clone)]
12pub struct RectangularBinaryMatrix {
13    /// Number of rows (≤ 64).
14    rows: usize,
15    /// Number of columns.
16    cols: usize,
17    /// Column data. None represents an identity matrix.
18    columns: Option<Vec<u64>>,
19}
20
21impl RectangularBinaryMatrix {
22    /// Create a new matrix with the given dimensions and column data.
23    pub fn new(rows: usize, cols: usize, columns: Vec<u64>) -> Result<Self> {
24        if rows > 64 {
25            return Err(Error::InvalidHeader(format!(
26                "matrix rows ({rows}) exceeds maximum of 64"
27            )));
28        }
29        if columns.len() != cols {
30            return Err(Error::InvalidHeader(format!(
31                "expected {cols} columns, got {}",
32                columns.len()
33            )));
34        }
35        Ok(Self {
36            rows,
37            cols,
38            columns: Some(columns),
39        })
40    }
41
42    /// Create an identity matrix.
43    pub fn identity(size: usize) -> Self {
44        Self {
45            rows: size.min(64),
46            cols: size,
47            columns: None,
48        }
49    }
50
51    /// Parse a matrix from the JSON header representation.
52    ///
53    /// In the Jellyfish header, a matrix is stored as an array of integers
54    /// representing the column values, preceded by dimension information.
55    pub fn from_json(value: &serde_json::Value) -> Result<Self> {
56        // Jellyfish stores matrices as: {"r": rows, "c": cols, "columns": [...]}
57        // or sometimes just as an array of column values with dimensions inferred
58        if value.is_null() {
59            return Ok(Self::identity(64));
60        }
61
62        if let Some(obj) = value.as_object() {
63            let rows = obj
64                .get("r")
65                .and_then(|v| v.as_u64())
66                .ok_or_else(|| Error::MissingField("matrix.r".to_string()))?
67                as usize;
68            let cols = obj
69                .get("c")
70                .and_then(|v| v.as_u64())
71                .ok_or_else(|| Error::MissingField("matrix.c".to_string()))?
72                as usize;
73
74            if let Some(identity) = obj.get("identity") {
75                if identity.as_bool().unwrap_or(false) {
76                    return Ok(Self::identity(rows.max(cols)));
77                }
78            }
79
80            let columns = if let Some(col_arr) = obj.get("columns").and_then(|v| v.as_array()) {
81                col_arr
82                    .iter()
83                    .map(|v| {
84                        v.as_u64()
85                            .or_else(|| v.as_i64().map(|i| i as u64))
86                            .ok_or_else(|| {
87                                Error::InvalidHeader("invalid matrix column value".to_string())
88                            })
89                    })
90                    .collect::<Result<Vec<u64>>>()?
91            } else {
92                // Try parsing "data" field (alternative format)
93                let data = obj.get("data").and_then(|v| v.as_array()).ok_or_else(|| {
94                    Error::MissingField("matrix.columns or matrix.data".to_string())
95                })?;
96                data.iter()
97                    .map(|v| {
98                        v.as_u64()
99                            .or_else(|| v.as_i64().map(|i| i as u64))
100                            .ok_or_else(|| {
101                                Error::InvalidHeader("invalid matrix data value".to_string())
102                            })
103                    })
104                    .collect::<Result<Vec<u64>>>()?
105            };
106
107            Self::new(rows, cols, columns)
108        } else {
109            Err(Error::InvalidHeader(
110                "matrix must be a JSON object".to_string(),
111            ))
112        }
113    }
114
115    /// Number of rows.
116    pub fn rows(&self) -> usize {
117        self.rows
118    }
119
120    /// Number of columns.
121    pub fn cols(&self) -> usize {
122        self.cols
123    }
124
125    /// Whether this is an identity matrix.
126    pub fn is_identity(&self) -> bool {
127        self.columns.is_none()
128    }
129
130    /// Multiply this matrix by a vector (given as packed u64 words).
131    ///
132    /// The vector represents the k-mer's packed bits. The result is a `u64`
133    /// hash position value.
134    pub fn times(&self, vector: &[u64]) -> u64 {
135        match &self.columns {
136            None => {
137                // Identity: just return the low bits
138                if vector.is_empty() { 0 } else { vector[0] }
139            }
140            Some(columns) => {
141                let mut result = 0u64;
142                let mut col_idx = 0;
143
144                for &word in vector {
145                    let mut w = word;
146                    let remaining = self.cols - col_idx;
147                    let bits_in_word = remaining.min(64);
148
149                    for _ in 0..bits_in_word {
150                        if w & 1 != 0 {
151                            result ^= columns[col_idx];
152                        }
153                        w >>= 1;
154                        col_idx += 1;
155                    }
156                }
157
158                result
159            }
160        }
161    }
162}
163
164#[cfg(test)]
165mod tests {
166    use super::*;
167
168    #[test]
169    fn test_identity_matrix() {
170        let m = RectangularBinaryMatrix::identity(64);
171        assert!(m.is_identity());
172        assert_eq!(m.times(&[0x12345678]), 0x12345678);
173        assert_eq!(m.times(&[0xDEADBEEF]), 0xDEADBEEF);
174    }
175
176    #[test]
177    fn test_identity_empty() {
178        let m = RectangularBinaryMatrix::identity(64);
179        assert_eq!(m.times(&[]), 0);
180    }
181
182    #[test]
183    fn test_zero_matrix() {
184        let cols = vec![0u64; 8];
185        let m = RectangularBinaryMatrix::new(4, 8, cols).unwrap();
186        assert_eq!(m.times(&[0xFF]), 0);
187    }
188
189    #[test]
190    fn test_simple_matrix() {
191        // 2x2 identity-like matrix: col0 = 0b01, col1 = 0b10
192        let cols = vec![0b01u64, 0b10u64];
193        let m = RectangularBinaryMatrix::new(2, 2, cols).unwrap();
194        // vector = 0b11 (both bits set) -> result = col0 XOR col1 = 0b01 ^ 0b10 = 0b11
195        assert_eq!(m.times(&[0b11]), 0b11);
196        // vector = 0b01 -> result = col0 = 0b01
197        assert_eq!(m.times(&[0b01]), 0b01);
198        // vector = 0b10 -> result = col1 = 0b10
199        assert_eq!(m.times(&[0b10]), 0b10);
200    }
201
202    #[test]
203    fn test_invalid_dimensions() {
204        assert!(RectangularBinaryMatrix::new(65, 4, vec![0; 4]).is_err());
205        assert!(RectangularBinaryMatrix::new(4, 4, vec![0; 3]).is_err());
206    }
207
208    #[test]
209    fn test_from_json_identity() {
210        let json = serde_json::json!({
211            "r": 32,
212            "c": 64,
213            "identity": true
214        });
215        let m = RectangularBinaryMatrix::from_json(&json).unwrap();
216        assert!(m.is_identity());
217    }
218
219    #[test]
220    fn test_from_json_with_columns() {
221        let json = serde_json::json!({
222            "r": 2,
223            "c": 2,
224            "columns": [1, 2]
225        });
226        let m = RectangularBinaryMatrix::from_json(&json).unwrap();
227        assert!(!m.is_identity());
228        assert_eq!(m.rows(), 2);
229        assert_eq!(m.cols(), 2);
230    }
231
232    #[test]
233    fn test_from_json_null() {
234        let json = serde_json::Value::Null;
235        let m = RectangularBinaryMatrix::from_json(&json).unwrap();
236        assert!(m.is_identity());
237    }
238}