runmat-runtime 0.4.1

{
  "title": "readmatrix",
  "category": "io/tabular",
  "keywords": [
    "readmatrix",
    "csv",
    "delimited text",
    "numeric import",
    "table",
    "range",
    "logical"
  ],
  "summary": "Import numeric data from delimited text files into a RunMat matrix.",
  "references": [
    "https://www.mathworks.com/help/matlab/ref/readmatrix.html"
  ],
  "gpu_support": {
    "elementwise": false,
    "reduction": false,
    "precisions": [],
    "broadcasting": "none",
    "notes": "Performs host-side file I/O and parsing. GPU providers are not involved; gathered results remain CPU-resident."
  },
  "fusion": {
    "elementwise": false,
    "reduction": false,
    "max_inputs": 1,
    "constants": "inline"
  },
  "requires_feature": null,
  "tested": {
    "unit": "builtins::io::tabular::readmatrix::tests",
    "integration": "builtins::io::tabular::readmatrix::tests::readmatrix_reads_csv_data"
  },
  "description": "`readmatrix(filename)` reads numeric data from text or delimited files and returns a dense double-precision matrix. RunMat mirrors MATLAB defaults: the function automatically detects common delimiters, skips leading header lines only when requested, infers a rectangular output, and treats empty fields as missing values.",
  "behaviors": [
    "Accepts character vectors or string scalars for the file name. String arrays must contain exactly one element.",
    "Supports option structs (from `detectImportOptions`) as well as name/value arguments such as `'Delimiter'`, `'NumHeaderLines'`, `'TreatAsMissing'`, `'DecimalSeparator'`, `'ThousandsSeparator'`, and `'EmptyValue'`.",
    "Accepts `'Range'` as either an Excel-style address (`\"B2:E10\"`) or a numeric vector `[rowStart colStart rowEnd colEnd]` to slice the imported data.",
    "Automatically detects comma, tab, semicolon, pipe, or whitespace delimiters when none are specified. Detection is based on the first few non-empty data lines.",
    "Parses numeric values using MATLAB-compatible rules, recognising `NaN`, `Inf`, `-Inf`, and locale-specific decimal/thousands separators.",
    "Treats empty fields as `NaN` by default; specify `'EmptyValue', value` to inject a replacement scalar, or `'TreatAsMissing', tokens` to mark additional strings as missing.",
    "`'OutputType','logical'` coalesces non-zero numeric values (including `NaN`) to logical true, mirroring MATLAB's casting behaviour.",
    "`'Like', prototype` matches the output class and residency of an existing array. Supplying a GPU tensor keeps the parsed matrix on the device when an acceleration provider is active.",
    "Tolerates ragged rows by padding trailing elements with the configured empty value (default `NaN`).",
    "Raises descriptive errors when the file cannot be read or when a field cannot be parsed as a numeric value."
  ],
  "examples": [
    {
      "description": "Read Comma-Separated Values With Automatic Delimiter Detection",
      "input": "M = readmatrix(\"data/scores.csv\")",
      "output": "% Returns a numeric matrix containing the CSV data."
    },
    {
      "description": "Skip Header Lines Before Reading Numeric Data",
      "input": "M = readmatrix(\"data/sensor_log.txt\", 'NumHeaderLines', 2)",
      "output": "% The first two lines are skipped; the remaining numeric rows are returned."
    },
    {
      "description": "Import Tab-Delimited Text By Specifying The Delimiter",
      "input": "M = readmatrix(\"data/report.tsv\", 'Delimiter', 'tab')",
      "output": "% Numeric matrix representing the tab-delimited values."
    },
    {
      "description": "Treat Custom Tokens As Missing Values",
      "input": "M = readmatrix(\"data/results.csv\", 'TreatAsMissing', [\"NA\", \"missing\"])",
      "output": "% Entries equal to \"NA\" or \"missing\" become NaN in the output matrix."
    },
    {
      "description": "Use European Decimal And Thousands Separators",
      "input": "M = readmatrix(\"data/europe.csv\", 'Delimiter', ';', 'DecimalSeparator', ',', 'ThousandsSeparator', '.')",
      "output": "% Values like \"1.234,56\" are interpreted as 1234.56."
    },
    {
      "description": "Replace Empty Numeric Fields With A Custom Value",
      "input": "M = readmatrix(\"data/with_blanks.csv\", 'EmptyValue', 0)",
      "output": "% Blank entries become 0 instead of NaN."
    },
    {
      "description": "Import A Specific Range Of Cells",
      "input": "M = readmatrix(\"data/quarterly.csv\", 'Range', 'B2:D5')",
      "output": "% Returns only the rows and columns covered by the specified range."
    },
    {
      "description": "Convert The Result To A Logical Matrix",
      "input": "flags = readmatrix(\"data/thresholds.csv\", 'OutputType', 'logical')",
      "output": "% Non-zero entries (including NaN) become logical true, zero stays false."
    },
    {
      "description": "Keep The Result On The GPU By Matching A Prototype",
      "input": "proto = gpuArray.zeros(1);        % simple prototype to establish residency\nG = readmatrix(\"data/heavy.csv\", 'Like', proto)",
      "output": "% The parsed matrix is uploaded to the same GPU device as the prototype."
    },
    {
      "description": "Provide Options Using A Struct From detectImportOptions",
      "input": "opts = struct('Delimiter', ',', 'NumHeaderLines', 1);\nM = readmatrix(\"data/measurements.csv\", opts)",
      "output": "% Reads the file using the supplied options struct."
    }
  ],
  "faqs": [
    {
      "question": "What file encodings does `readmatrix` support?",
      "answer": "The builtin reads UTF-8 text files by default. If a file starts with a UTF-8 byte-order mark, it is ignored automatically. For other encodings, convert the file with `fileread`/`string` or external tools before calling `readmatrix`."
    },
    {
      "question": "Does `readmatrix` support Excel or binary files?",
      "answer": "This implementation focuses on delimited text files. For MAT-files use `load`, and for spreadsheets use the `readtable` / `readcell` family (planned)."
    },
    {
      "question": "How are missing values represented?",
      "answer": "Empty fields become NaN unless `'EmptyValue'` is supplied. Additional tokens can be marked missing with `'TreatAsMissing'`, which also converts them to NaN."
    },
    {
      "question": "What happens when rows have different numbers of columns?",
      "answer": "RunMat pads short rows with the empty value (default NaN) so the output remains rectangular."
    },
    {
      "question": "Can I import only part of the file?",
      "answer": "Yes. Pass `'Range', 'B3:F20'` (Excel-style addresses) or `'Range', [3 2 10 6]` (row/column indices) to slice the data before it is materialised. Rows or columns outside the range are ignored entirely."
    },
    {
      "question": "Are comment lines supported?",
      "answer": "Lines that are entirely blank are ignored. Use `'NumHeaderLines'` to skip introductory text or call `detectImportOptions` for more control."
    },
    {
      "question": "How do I read files stored on the GPU?",
      "answer": "File paths are always gathered to the CPU before reading. By default the parsed matrix is created on the host; supply `'Like', gpuArray.zeros(1)` (or any GPU prototype) to upload the result automatically, or call `gpuArray` afterwards to move it manually."
    },
    {
      "question": "Can I request single-precision output?",
      "answer": "`readmatrix` currently returns double-precision arrays, matching MATLAB defaults. Cast the result with `single(...)` when you need single precision."
    },
    {
      "question": "How are delimiters detected automatically?",
      "answer": "The builtin inspects the first few non-empty data lines and chooses the candidate delimiter (comma, tab, semicolon, pipe, or whitespace) that produces the most columns consistently. Explicit `'Delimiter'` settings override detection."
    },
    {
      "question": "How are thousands separators handled?",
      "answer": "Specify `'ThousandsSeparator'` to strip that character before parsing, e.g. `'.'` for European locales. The thousands and decimal separators must be different."
    },
    {
      "question": "Does `readmatrix` modify the current directory?",
      "answer": "No. Relative paths are resolved against the current working directory, exactly like MATLAB."
    },
    {
      "question": "What does readmatrix do in MATLAB?",
      "answer": "`readmatrix(filename)` reads numeric data from a text file (CSV, TSV, etc.) and returns it as a matrix. Non-numeric values are replaced with `NaN`."
    },
    {
      "question": "How is readmatrix different from csvread?",
      "answer": "`readmatrix` is the modern replacement for `csvread`. It auto-detects delimiters, handles headers, and supports more file formats. `csvread` only handles comma-separated files with numeric data."
    },
    {
      "question": "Can readmatrix skip header rows?",
      "answer": "Yes. Use the `'NumHeaderLines'` option: `readmatrix('file.csv', 'NumHeaderLines', 1)` skips the first row. RunMat supports this same option."
    }
  ],
  "links": [
    {
      "label": "fileread",
      "url": "./fileread"
    },
    {
      "label": "load",
      "url": "./load"
    },
    {
      "label": "gpuArray",
      "url": "./gpuarray"
    },
    {
      "label": "gather",
      "url": "./gather"
    },
    {
      "label": "csvread",
      "url": "./csvread"
    },
    {
      "label": "csvwrite",
      "url": "./csvwrite"
    },
    {
      "label": "dlmread",
      "url": "./dlmread"
    },
    {
      "label": "dlmwrite",
      "url": "./dlmwrite"
    },
    {
      "label": "writematrix",
      "url": "./writematrix"
    }
  ],
  "source": {
    "label": "`crates/runmat-runtime/src/builtins/io/tabular/readmatrix.rs`",
    "url": "https://github.com/runmat-org/runmat/blob/main/crates/runmat-runtime/src/builtins/io/tabular/readmatrix.rs"
  },
  "gpu_behavior": [
    "`readmatrix` always executes on the host CPU. If the file name or option arguments are GPU-resident scalars, RunMat gathers them automatically before accessing the filesystem. The resulting matrix is created in host memory unless you pass `'Like', gpuPrototype`, in which case the parsed tensor is uploaded to the same provider so subsequent operations remain on the device. Acceleration providers do not need bespoke hooks for this builtin."
  ]
}