irox_csv/dialects.rs
1// SPDX-License-Identifier: MIT
2// Copyright 2023 IROX Contributors
3
4//!
5//! The dialects module has the different ways a CSV file (or any repeating
6//! record / fields in record) file can be represented.
7//!
8
9///
10/// A dialect represents the variations in how this record/field format can
11/// be encoded.
12#[derive(Debug, Copy, Clone, Eq, PartialEq)]
13pub struct Dialect {
14 line_separators: &'static str,
15 field_separators: &'static str,
16 comment_chars: &'static str,
17}
18impl Default for Dialect {
19 fn default() -> Self {
20 RFC4180_DIALECT
21 }
22}
23
24impl Dialect {
25 pub const fn new(
26 line_separators: &'static str,
27 field_separators: &'static str,
28 comment_chars: &'static str,
29 ) -> Dialect {
30 Dialect {
31 line_separators,
32 field_separators,
33 comment_chars,
34 }
35 }
36
37 ///
38 /// Returns the line/record separator for this tokenizer type
39 /// Defaults to "\n"
40 #[must_use]
41 pub const fn get_line_separators(&self) -> &str {
42 self.line_separators
43 }
44
45 ///
46 /// Returns the field separator for this tokenizer type,
47 /// Defaults to ","
48 #[must_use]
49 pub const fn get_field_separators(&self) -> &str {
50 self.field_separators
51 }
52
53 ///
54 /// Returns the optional comment character for this tokenizer type,
55 /// Defaults to `None`
56 #[must_use]
57 pub const fn get_comment_chars(&self) -> &str {
58 self.comment_chars
59 }
60}
61
62///
63/// RFC4180 Dialect, uses the industry defaults '\r\n' for record separator,
64/// and ',' for field separator.
65pub const RFC4180_DIALECT: Dialect = Dialect::new("\r\n", ",", "#");
66
67///
68/// Microsoft Excel tokenizer, effectively the same as RFC4180.
69pub const EXCEL_DIALECT: Dialect = RFC4180_DIALECT;
70
71///
72/// Standard unix dialect, uses '\n' instead of CRLF for line separators.
73pub const UNIX_DIALECT: Dialect = Dialect::new("\n", ",", "#");
74
75///
76/// Tab dialect, uses '\n' for newlines and '\t' for the field separator.
77pub const UNIX_TAB_DIALECT: Dialect = Dialect::new("\n", "\t", "#");
78
79///
80/// Excel tab dialect, uses '\r\n' for newlines and '\t' for the field separator.
81pub const EXCEL_TAB_DIALECT: Dialect = Dialect::new("\r\n", "\t", "#");
82
83///
84/// Piped Field Dialect, uses vertical pipes '|' for the field separators
85pub const PIPE_FIELD_DIALECT: Dialect = Dialect::new("\n", "|", "#");