irox_csv/
dialects.rs

1// SPDX-License-Identifier: MIT
2// Copyright 2023 IROX Contributors
3
4//!
5//! The dialects module has the different ways a CSV file (or any repeating
6//! record / fields in record) file can be represented.
7//!
8
9///
10/// A dialect represents the variations in how this record/field format can
11/// be encoded.
12#[derive(Debug, Copy, Clone, Eq, PartialEq)]
13pub struct Dialect {
14    line_separators: &'static str,
15    field_separators: &'static str,
16    comment_chars: &'static str,
17}
18impl Default for Dialect {
19    fn default() -> Self {
20        RFC4180_DIALECT
21    }
22}
23
24impl Dialect {
25    pub const fn new(
26        line_separators: &'static str,
27        field_separators: &'static str,
28        comment_chars: &'static str,
29    ) -> Dialect {
30        Dialect {
31            line_separators,
32            field_separators,
33            comment_chars,
34        }
35    }
36
37    ///
38    /// Returns the line/record separator for this tokenizer type
39    /// Defaults to "\n"
40    #[must_use]
41    pub const fn get_line_separators(&self) -> &str {
42        self.line_separators
43    }
44
45    ///
46    /// Returns the field separator for this tokenizer type,
47    /// Defaults to ","
48    #[must_use]
49    pub const fn get_field_separators(&self) -> &str {
50        self.field_separators
51    }
52
53    ///
54    /// Returns the optional comment character for this tokenizer type,
55    /// Defaults to `None`
56    #[must_use]
57    pub const fn get_comment_chars(&self) -> &str {
58        self.comment_chars
59    }
60}
61
62///
63/// RFC4180 Dialect, uses the industry defaults '\r\n' for record separator,
64/// and ',' for field separator.
65pub const RFC4180_DIALECT: Dialect = Dialect::new("\r\n", ",", "#");
66
67///
68/// Microsoft Excel tokenizer, effectively the same as RFC4180.
69pub const EXCEL_DIALECT: Dialect = RFC4180_DIALECT;
70
71///
72/// Standard unix dialect, uses '\n' instead of CRLF for line separators.
73pub const UNIX_DIALECT: Dialect = Dialect::new("\n", ",", "#");
74
75///
76/// Tab dialect, uses '\n' for newlines and '\t' for the field separator.
77pub const UNIX_TAB_DIALECT: Dialect = Dialect::new("\n", "\t", "#");
78
79///
80/// Excel tab dialect, uses '\r\n' for newlines and '\t' for the field separator.
81pub const EXCEL_TAB_DIALECT: Dialect = Dialect::new("\r\n", "\t", "#");
82
83///
84/// Piped Field Dialect, uses vertical pipes '|' for the field separators
85pub const PIPE_FIELD_DIALECT: Dialect = Dialect::new("\n", "|", "#");