pub struct FromCsvConfig {
pub delimiter: u8,
pub has_headers: bool,
pub trim: bool,
pub max_rows: usize,
pub infer_schema: bool,
pub sample_rows: usize,
pub list_key: Option<String>,
}Expand description
Configuration for CSV parsing.
This structure controls all aspects of CSV parsing behavior, including delimiters, headers, whitespace handling, security limits, and custom list naming.
§Examples
§Default Configuration
let config = FromCsvConfig::default();
assert_eq!(config.delimiter, b',');
assert!(config.has_headers);
assert!(config.trim);
assert_eq!(config.max_rows, 1_000_000);
assert_eq!(config.list_key, None);§Tab-Delimited without Headers
let config = FromCsvConfig {
delimiter: b'\t',
has_headers: false,
..Default::default()
};§Custom Row Limit for Large Datasets
let config = FromCsvConfig {
max_rows: 10_000_000, // Allow up to 10M rows
..Default::default()
};§Disable Whitespace Trimming
let config = FromCsvConfig {
trim: false,
..Default::default()
};§Enable Schema Inference
let config = FromCsvConfig {
infer_schema: true,
sample_rows: 200, // Sample first 200 rows
..Default::default()
};§Custom List Key for Irregular Plurals
// For "Person" type, use "people" instead of default "persons"
let config = FromCsvConfig {
list_key: Some("people".to_string()),
..Default::default()
};Fields§
§delimiter: u8Field delimiter character (default: ,).
Common alternatives:
b'\t'- Tab-separated values (TSV)b';'- Semicolon-separated (common in European locales)b'|'- Pipe-separated
has_headers: boolWhether the first row contains column headers (default: true).
When true, the first row is interpreted as column names and not included
in the data. When false, all rows are treated as data.
trim: boolWhether to trim leading/trailing whitespace from fields (default: true).
When true, fields like " value " become "value". This is generally
recommended to handle inconsistently formatted CSV files.
max_rows: usizeMaximum number of rows to parse (default: 1,000,000).
This security limit prevents memory exhaustion from maliciously large CSV files. Processing stops with an error if more rows are encountered.
§Security Impact
- DoS Protection: Prevents attackers from causing memory exhaustion
- Memory Bound: Limits worst-case memory usage to approximately
max_rows × avg_row_size × columns - Recommended Values:
- Small deployments: 100,000 - 1,000,000 rows
- Large deployments: 1,000,000 - 10,000,000 rows
- Batch processing: Adjust based on available RAM
§Example
// For processing very large datasets on a high-memory server
let config = FromCsvConfig {
max_rows: 50_000_000,
..Default::default()
};infer_schema: boolWhether to automatically infer column types from data (default: false).
When true, the parser samples the first sample_rows to determine the
most specific type for each column. When false, uses standard per-value
type inference.
§Type Inference Hierarchy (most to least specific)
- Null: All values are empty/null
- Bool: All values are “true” or “false”
- Int: All values parse as integers
- Float: All values parse as floats
- String: Fallback for all other cases
§Example
let config = FromCsvConfig {
infer_schema: true,
sample_rows: 100,
..Default::default()
};sample_rows: usizeNumber of rows to sample for schema inference (default: 100).
Only used when infer_schema is true. Larger sample sizes provide
more accurate type detection but slower initial processing.
§Trade-offs
- Small (10-50): Fast inference, may miss edge cases
- Medium (100-500): Balanced accuracy and performance
- Large (1000+): High accuracy, slower for large datasets
list_key: Option<String>Custom key name for the matrix list in the document (default: None).
When None, the list key is automatically generated by adding ‘s’ to the
lowercased type name (e.g., “Person” → “persons”). When Some, uses the
specified custom key instead.
§Use Cases
- Irregular Plurals: “Person” → “people” instead of “persons”
- Collective Nouns: “Data” → “dataset” instead of “datas”
- Custom Naming: Any non-standard naming convention
- Case-Sensitive Keys: Preserve specific casing requirements
§Examples
§Irregular Plural
let csv = "id,name\n1,Alice\n";
let config = FromCsvConfig {
list_key: Some("people".to_string()),
..Default::default()
};
let doc = from_csv_with_config(csv, "Person", &["name"], config).unwrap();
assert!(doc.get("people").is_some()); // Uses custom plural
assert!(doc.get("persons").is_none()); // Default plural not used§Collective Noun
let csv = "id,value\n1,42\n";
let config = FromCsvConfig {
list_key: Some("dataset".to_string()),
..Default::default()
};
let doc = from_csv_with_config(csv, "Data", &["value"], config).unwrap();
assert!(doc.get("dataset").is_some());§Case-Sensitive Key
let csv = "id,value\n1,test\n";
let config = FromCsvConfig {
list_key: Some("MyCustomList".to_string()),
..Default::default()
};
let doc = from_csv_with_config(csv, "Item", &["value"], config).unwrap();
assert!(doc.get("MyCustomList").is_some());Trait Implementations§
Source§impl Clone for FromCsvConfig
impl Clone for FromCsvConfig
Source§fn clone(&self) -> FromCsvConfig
fn clone(&self) -> FromCsvConfig
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read more