Skip to main content

math_sparse_data/
surface.rs

1//! Library-owned runtime surface for `math-sparse-data`.
2
3use runtime_core::{
4    describe_surface_response, parse_surface_input, structured_operation_response,
5    surface_operation, validate_matching_lengths, validate_max_items, OperationId, PackageSurface,
6    RuntimeCapabilities, SurfaceError, SurfaceRequest, SurfaceResponse,
7};
8use serde::Deserialize;
9
10use crate::{CooMatrix, CsrMatrix, SparseVector};
11
12const MAX_VALUES: usize = 100_000;
13const MAX_MATRIX_ENTRIES: usize = 100_000;
14
15/// Returns the package surface exposed by every transport wrapper.
16pub fn package_surface() -> PackageSurface {
17    PackageSurface {
18        library: env!("CARGO_PKG_NAME").to_string(),
19        version: env!("CARGO_PKG_VERSION").to_string(),
20        capabilities: RuntimeCapabilities::pure_rust(),
21        operations: vec![
22            surface_operation(
23                "describe",
24                "Describe package",
25                "Sparse vector and matrix contracts for text, retrieval, and feature indexing.",
26                serde_json::json!({"includeOperations": true}),
27            ),
28            surface_operation(
29                "sparse.similarity",
30                "Sparse similarity",
31                "Computes sparse dot product or cosine similarity.",
32                serde_json::json!({"left": {"dimensions": 3, "indices": [0, 2], "values": [1.0, 2.0]}, "right": {"dimensions": 3, "indices": [2], "values": [3.0]}, "metric": "dot"}),
33            ),
34            surface_operation(
35                "sparse.toDense",
36                "Sparse to dense",
37                "Converts sparse vector coordinates into a dense f32 array.",
38                serde_json::json!({"dimensions": 3, "indices": [1], "values": [2.0]}),
39            ),
40            surface_operation(
41                "sparse.matrixSummary",
42                "Sparse matrix summary",
43                "Summarizes COO or CSR sparse matrix shape, nnz, density, and row nnz.",
44                serde_json::json!({"format": "coo", "rows": 2, "cols": 2, "entries": [[0, 1, 2.0]]}),
45            ),
46            surface_operation(
47                "sparse.matrixStats",
48                "Sparse matrix stats",
49                "Summarizes sparse matrix density, row/column nnz, row/column sums, and compact nnz statistics.",
50                serde_json::json!({"matrix": {"rows": 3, "cols": 4, "entries": [[0, 1, 2.0], [1, 3, 4.0], [2, 1, -1.0]]}}),
51            ),
52            surface_operation(
53                "sparse.vectorOps",
54                "Sparse vector operations",
55                "Computes sparse vector norms, optional scaling, optional addition, and top-k entries.",
56                serde_json::json!({"vector": {"dimensions": 4, "indices": [0, 2], "values": [1.0, -3.0]}, "scale": 0.5, "topK": 1}),
57            ),
58            surface_operation(
59                "sparse.matrixVector",
60                "Sparse matrix vector multiply",
61                "Multiplies a COO or CSR sparse matrix by a finite dense vector.",
62                serde_json::json!({"format": "coo", "rows": 2, "cols": 3, "entries": [[0, 1, 2.0], [1, 2, 3.0]], "vector": [1.0, 2.0, 3.0]}),
63            ),
64            surface_operation(
65                "sparse.transpose",
66                "Sparse transpose",
67                "Transposes a COO or CSR sparse matrix and returns canonical COO entries.",
68                serde_json::json!({"format": "coo", "rows": 2, "cols": 3, "entries": [[0, 1, 2.0], [1, 2, 3.0]]}),
69            ),
70        ],
71    }
72}
73
74/// Runs one library-owned operation.
75pub fn run_surface_operation(request: SurfaceRequest) -> Result<SurfaceResponse, String> {
76    let surface = package_surface();
77    let operation = request.operation.clone();
78    let value = match request.operation.as_str() {
79        "describe" => return Ok(describe_surface_response(&surface, request)),
80        "sparse.similarity" => similarity_value(
81            operation.as_str(),
82            parse_surface_input(Some(operation.as_str()), request.input)?,
83        )?,
84        "sparse.toDense" => to_dense_value(
85            operation.as_str(),
86            parse_surface_input(Some(operation.as_str()), request.input)?,
87        )?,
88        "sparse.matrixSummary" => matrix_summary_value(
89            operation.as_str(),
90            parse_surface_input(Some(operation.as_str()), request.input)?,
91        )?,
92        "sparse.matrixStats" => matrix_stats_value(
93            operation.as_str(),
94            parse_surface_input(Some(operation.as_str()), request.input)?,
95        )?,
96        "sparse.vectorOps" => vector_ops_value(
97            operation.as_str(),
98            parse_surface_input(Some(operation.as_str()), request.input)?,
99        )?,
100        "sparse.matrixVector" => matrix_vector_value(
101            operation.as_str(),
102            parse_surface_input(Some(operation.as_str()), request.input)?,
103        )?,
104        "sparse.transpose" => transpose_value(
105            operation.as_str(),
106            parse_surface_input(Some(operation.as_str()), request.input)?,
107        )?,
108        operation => {
109            return Err(
110                SurfaceError::unsupported_operation(operation, env!("CARGO_PKG_NAME"))
111                    .to_error_string(),
112            )
113        }
114    };
115    Ok(structured_operation_response(&surface, operation, value))
116}
117
118#[derive(Debug, Deserialize)]
119#[serde(rename_all = "camelCase")]
120struct SparseVectorRequest {
121    dimensions: usize,
122    indices: Vec<usize>,
123    values: Vec<f32>,
124}
125
126#[derive(Debug, Deserialize)]
127#[serde(rename_all = "camelCase")]
128struct SimilarityRequest {
129    left: SparseVectorRequest,
130    right: SparseVectorRequest,
131    metric: String,
132}
133
134#[derive(Debug, Deserialize)]
135#[serde(rename_all = "camelCase")]
136struct MatrixSummaryRequest {
137    #[serde(default = "default_sparse_format")]
138    format: String,
139    rows: usize,
140    cols: usize,
141    #[serde(default)]
142    entries: Vec<(usize, usize, f32)>,
143    #[serde(default)]
144    row_offsets: Vec<usize>,
145    #[serde(default)]
146    column_indices: Vec<usize>,
147    #[serde(default)]
148    values: Vec<f32>,
149}
150
151#[derive(Debug, Deserialize)]
152#[serde(rename_all = "camelCase")]
153struct MatrixStatsRequest {
154    matrix: MatrixSummaryRequest,
155}
156
157#[derive(Debug, Deserialize)]
158#[serde(rename_all = "camelCase")]
159struct VectorOpsRequest {
160    vector: SparseVectorRequest,
161    #[serde(default)]
162    scale: Option<f32>,
163    #[serde(default)]
164    add: Option<SparseVectorRequest>,
165    #[serde(default)]
166    top_k: Option<usize>,
167}
168
169#[derive(Debug, Deserialize)]
170#[serde(rename_all = "camelCase")]
171struct MatrixVectorRequest {
172    #[serde(flatten)]
173    matrix: MatrixSummaryRequest,
174    vector: Vec<f32>,
175}
176
177fn similarity_value(
178    operation: &str,
179    request: SimilarityRequest,
180) -> Result<serde_json::Value, String> {
181    let left = sparse_vector(operation, request.left)?;
182    let right = sparse_vector(operation, request.right)?;
183    let value = match request.metric.as_str() {
184        "dot" => left
185            .dot(&right)
186            .map_err(|error| invalid_request(operation, error.to_string()))?,
187        "cosine" => left
188            .cosine_similarity(&right)
189            .map_err(|error| invalid_request(operation, error.to_string()))?,
190        metric => {
191            return Err(SurfaceError::unsupported_value(
192                Some(OperationId::new(operation)),
193                "metric",
194                metric,
195                &["dot", "cosine"],
196            )
197            .to_error_string())
198        }
199    };
200    Ok(serde_json::json!({"metric": request.metric, "value": value}))
201}
202
203fn to_dense_value(
204    operation: &str,
205    request: SparseVectorRequest,
206) -> Result<serde_json::Value, String> {
207    let vector = sparse_vector(operation, request)?;
208    Ok(serde_json::json!({
209        "dimensions": vector.dimensions(),
210        "nnz": vector.nnz(),
211        "dense": vector.to_dense()
212    }))
213}
214
215fn matrix_summary_value(
216    operation: &str,
217    request: MatrixSummaryRequest,
218) -> Result<serde_json::Value, String> {
219    let format = request.format.clone();
220    matrix_json(&format, matrix_from_request(operation, request)?)
221}
222
223fn matrix_stats_value(
224    operation: &str,
225    request: MatrixStatsRequest,
226) -> Result<serde_json::Value, String> {
227    let matrix = matrix_from_request(operation, request.matrix)?;
228    let summary = matrix
229        .summary()
230        .map_err(|error| invalid_request(operation, error.to_string()))?;
231    Ok(serde_json::json!({
232        "rows": matrix.rows(),
233        "cols": matrix.cols(),
234        "nnz": summary.nnz,
235        "density": summary.density,
236        "rowNnz": matrix.row_nnz(),
237        "columnNnz": matrix.column_nnz(),
238        "rowSums": matrix.row_sums().map_err(|error| invalid_request(operation, error.to_string()))?,
239        "columnSums": matrix.column_sums().map_err(|error| invalid_request(operation, error.to_string()))?,
240        "summary": {
241            "rows": summary.rows,
242            "cols": summary.cols,
243            "nnz": summary.nnz,
244            "density": summary.density,
245            "rowNnzMin": summary.row_nnz_min,
246            "rowNnzMax": summary.row_nnz_max,
247            "rowNnzMean": summary.row_nnz_mean,
248            "columnNnzMin": summary.column_nnz_min,
249            "columnNnzMax": summary.column_nnz_max,
250            "columnNnzMean": summary.column_nnz_mean
251        }
252    }))
253}
254
255fn vector_ops_value(
256    operation: &str,
257    request: VectorOpsRequest,
258) -> Result<serde_json::Value, String> {
259    let vector = sparse_vector(operation, request.vector)?;
260    let mut value = serde_json::json!({
261        "dimensions": vector.dimensions(),
262        "nnz": vector.nnz(),
263        "l1Norm": vector.l1_norm().map_err(|error| invalid_request(operation, error.to_string()))?,
264        "l2Norm": vector.l2_norm().map_err(|error| invalid_request(operation, error.to_string()))?
265    });
266    if let Some(scale) = request.scale {
267        let scaled = vector
268            .scale(scale)
269            .map_err(|error| invalid_request(operation, error.to_string()))?;
270        value["scaled"] = vector_json(&scaled);
271    }
272    if let Some(add) = request.add {
273        let added = vector
274            .add(&sparse_vector(operation, add)?)
275            .map_err(|error| invalid_request(operation, error.to_string()))?;
276        value["added"] = vector_json(&added);
277    }
278    if let Some(top_k) = request.top_k {
279        validate_max_items(operation, "topK", top_k, MAX_VALUES)?;
280        value["topK"] = serde_json::json!(vector
281            .top_k_by_abs(top_k)
282            .map_err(|error| invalid_request(operation, error.to_string()))?
283            .into_iter()
284            .map(|(index, value)| serde_json::json!({"index": index, "value": value}))
285            .collect::<Vec<_>>());
286    }
287    Ok(value)
288}
289
290fn matrix_vector_value(
291    operation: &str,
292    request: MatrixVectorRequest,
293) -> Result<serde_json::Value, String> {
294    validate_value_count(operation, "vector", request.vector.len())?;
295    let matrix = matrix_from_request(operation, request.matrix)?;
296    let result = matrix
297        .mul_dense_vector(&request.vector)
298        .map_err(|error| invalid_request(operation, error.to_string()))?;
299    Ok(serde_json::json!({
300        "rows": matrix.rows(),
301        "cols": matrix.cols(),
302        "values": result
303    }))
304}
305
306fn transpose_value(
307    operation: &str,
308    request: MatrixSummaryRequest,
309) -> Result<serde_json::Value, String> {
310    let matrix = matrix_from_request(operation, request)?;
311    let transposed = matrix
312        .transpose()
313        .map_err(|error| invalid_request(operation, error.to_string()))?;
314    let coo = transposed
315        .to_coo()
316        .map_err(|error| invalid_request(operation, error.to_string()))?;
317    Ok(serde_json::json!({
318        "format": "coo",
319        "rows": coo.rows(),
320        "cols": coo.cols(),
321        "entries": coo.entries()
322    }))
323}
324
325fn matrix_json(format: &str, matrix: CsrMatrix) -> Result<serde_json::Value, String> {
326    let row_nnz = matrix
327        .rows_iter()
328        .map(|row| row.indices().len())
329        .collect::<Vec<_>>();
330    let nnz = row_nnz.iter().sum::<usize>();
331    Ok(serde_json::json!({
332        "format": format,
333        "rows": matrix.rows(),
334        "cols": matrix.cols(),
335        "nnz": nnz,
336        "density": nnz as f64 / (matrix.rows() * matrix.cols()) as f64,
337        "rowNnz": row_nnz
338    }))
339}
340
341fn matrix_from_request(
342    operation: &str,
343    request: MatrixSummaryRequest,
344) -> Result<CsrMatrix, String> {
345    match request.format.as_str() {
346        "coo" => {
347            validate_matrix_entry_count(operation, request.entries.len())?;
348            CooMatrix::new(request.rows, request.cols, request.entries)
349                .and_then(|coo| coo.to_csr())
350                .map_err(|error| invalid_request(operation, error.to_string()))
351        }
352        "csr" => {
353            validate_value_count(operation, "values", request.values.len())?;
354            validate_matching_lengths(
355                operation,
356                "columnIndices",
357                request.column_indices.len(),
358                "values",
359                request.values.len(),
360            )?;
361            CsrMatrix::new(
362                request.rows,
363                request.cols,
364                request.row_offsets,
365                request.column_indices,
366                request.values,
367            )
368            .map_err(|error| invalid_request(operation, error.to_string()))
369        }
370        format => Err(SurfaceError::unsupported_value(
371            Some(OperationId::new(operation)),
372            "format",
373            format,
374            &["coo", "csr"],
375        )
376        .to_error_string()),
377    }
378}
379
380fn vector_json(vector: &SparseVector) -> serde_json::Value {
381    serde_json::json!({
382        "dimensions": vector.dimensions(),
383        "indices": vector.indices(),
384        "values": vector.values(),
385        "nnz": vector.nnz()
386    })
387}
388
389fn sparse_vector(operation: &str, request: SparseVectorRequest) -> Result<SparseVector, String> {
390    validate_value_count(operation, "values", request.values.len())?;
391    validate_matching_lengths(
392        operation,
393        "indices",
394        request.indices.len(),
395        "values",
396        request.values.len(),
397    )?;
398    SparseVector::new(request.dimensions, request.indices, request.values)
399        .map_err(|error| invalid_request(operation, error.to_string()))
400}
401
402fn validate_value_count(operation: &str, field: &str, count: usize) -> Result<(), String> {
403    validate_max_items(operation, field, count, MAX_VALUES)
404}
405
406fn validate_matrix_entry_count(operation: &str, count: usize) -> Result<(), String> {
407    validate_max_items(operation, "entries", count, MAX_MATRIX_ENTRIES)
408}
409
410fn invalid_request(operation: &str, message: impl Into<String>) -> String {
411    SurfaceError::invalid_request(Some(OperationId::new(operation)), message).to_error_string()
412}
413
414fn default_sparse_format() -> String {
415    "coo".to_string()
416}
417
418#[cfg(test)]
419mod tests {
420    use super::*;
421
422    #[test]
423    fn sparse_similarity_dot_works() {
424        let response = run_surface_operation(SurfaceRequest {
425            operation: OperationId::new("sparse.similarity"),
426            input: serde_json::json!({"left": {"dimensions": 3, "indices": [0, 2], "values": [1.0, 2.0]}, "right": {"dimensions": 3, "indices": [2], "values": [3.0]}, "metric": "dot"}),
427        }).expect("similarity");
428        assert_eq!(response.value["value"], 6.0);
429    }
430
431    #[test]
432    fn sparse_to_dense_works() {
433        let response = run_surface_operation(SurfaceRequest {
434            operation: OperationId::new("sparse.toDense"),
435            input: serde_json::json!({"dimensions": 3, "indices": [1], "values": [2.0]}),
436        })
437        .expect("to dense");
438        assert_eq!(response.value["dense"], serde_json::json!([0.0, 2.0, 0.0]));
439    }
440
441    #[test]
442    fn sparse_matrix_summary_reports_row_counts() {
443        let response = run_surface_operation(SurfaceRequest {
444            operation: OperationId::new("sparse.matrixSummary"),
445            input: serde_json::json!({"format": "coo", "rows": 2, "cols": 3, "entries": [[0, 1, 2.0], [1, 2, 3.0]]}),
446        }).expect("matrix summary");
447        assert_eq!(response.value["nnz"], 2);
448        assert_eq!(response.value["rowNnz"], serde_json::json!([1, 1]));
449    }
450
451    #[test]
452    fn new_sparse_operations_run() {
453        for operation in [
454            "sparse.vectorOps",
455            "sparse.matrixVector",
456            "sparse.transpose",
457            "sparse.matrixStats",
458        ] {
459            let surface_operation = package_surface()
460                .operations
461                .into_iter()
462                .find(|candidate| candidate.id.as_str() == operation)
463                .expect("operation metadata");
464            let response = run_surface_operation(SurfaceRequest {
465                operation: surface_operation.id,
466                input: surface_operation.example_request,
467            })
468            .unwrap_or_else(|error| panic!("{operation} failed: {error}"));
469            assert!(response.value.is_object());
470        }
471    }
472
473    #[test]
474    fn sparse_matrix_stats_reports_columns_and_sums() {
475        let response = run_surface_operation(SurfaceRequest {
476            operation: OperationId::new("sparse.matrixStats"),
477            input: serde_json::json!({
478                "matrix": {
479                    "rows": 3,
480                    "cols": 4,
481                    "entries": [[0, 1, 2.0], [1, 3, 4.0], [2, 1, -1.0]]
482                }
483            }),
484        })
485        .expect("matrix stats");
486        assert_eq!(response.value["nnz"], 3);
487        assert_eq!(response.value["rowNnz"], serde_json::json!([1, 1, 1]));
488        assert_eq!(response.value["columnNnz"], serde_json::json!([0, 2, 0, 1]));
489        assert_eq!(
490            response.value["rowSums"],
491            serde_json::json!([2.0, 4.0, -1.0])
492        );
493    }
494}