clickhouse_format/output/
tsv.rs1use core::marker::PhantomData;
2use std::collections::HashMap;
3
4use csv::{ReaderBuilder, StringRecordsIntoIter};
5use serde::de::DeserializeOwned;
6
7use crate::format_name::FormatName;
8
9use super::{tsv_raw::TsvRawOutput, Output, OutputResult};
10
11pub struct TsvOutput<T> {
12 names: Option<Vec<String>>,
13 types: Option<Vec<String>>,
14 phantom: PhantomData<T>,
15}
16impl<T> Default for TsvOutput<T> {
17 fn default() -> Self {
18 Self::new()
19 }
20}
21impl<T> TsvOutput<T> {
22 pub fn new() -> Self {
23 Self {
24 names: None,
25 types: None,
26 phantom: PhantomData,
27 }
28 }
29 pub fn with_names(names: Vec<String>) -> Self {
30 Self {
31 names: Some(names),
32 types: None,
33 phantom: PhantomData,
34 }
35 }
36 pub fn with_names_and_types(names: Vec<String>, types: Vec<String>) -> Self {
37 Self {
38 names: Some(names),
39 types: Some(types),
40 phantom: PhantomData,
41 }
42 }
43 pub(crate) fn from_raw_parts(names: Option<Vec<String>>, types: Option<Vec<String>>) -> Self {
44 Self {
45 names,
46 types,
47 phantom: PhantomData,
48 }
49 }
50}
51
52impl<T> Output for TsvOutput<T>
53where
54 T: DeserializeOwned,
55{
56 type Row = T;
57 type Info = Option<HashMap<String, String>>;
58
59 type Error = csv::Error;
60
61 fn format_name() -> FormatName {
62 FormatName::Tsv
63 }
64
65 fn deserialize(&self, slice: &[u8]) -> OutputResult<Self::Row, Self::Info, Self::Error> {
66 let rdr = ReaderBuilder::new()
67 .delimiter(b'\t')
68 .has_headers(false)
69 .from_reader(slice);
70
71 self.deserialize_with_records(rdr.into_records())
72 }
73}
74impl<T> TsvOutput<T>
75where
76 T: DeserializeOwned,
77{
78 pub(crate) fn deserialize_with_records(
79 &self,
80 records: StringRecordsIntoIter<&[u8]>,
81 ) -> OutputResult<<Self as Output>::Row, <Self as Output>::Info, <Self as Output>::Error> {
82 TsvRawOutput::from_raw_parts(self.names.to_owned(), self.types.to_owned())
84 .deserialize_with_records(records)
85 }
86}
87
88#[cfg(test)]
89mod tests {
90 use super::*;
91
92 use std::{fs, path::PathBuf};
93
94 use crate::test_helpers::{TestStringsRow, TEST_STRINGS_ROW_1};
95
96 #[test]
97 fn simple() -> Result<(), Box<dyn std::error::Error>> {
98 let file_path = PathBuf::new().join("tests/files/TSV.tsv");
99 let content = fs::read_to_string(&file_path)?;
100
101 assert_eq!(
102 TsvOutput::<HashMap<String, String>>::format_name(),
103 file_path
104 .file_stem()
105 .unwrap()
106 .to_string_lossy()
107 .parse()
108 .unwrap()
109 );
110
111 let (rows, info) = TsvOutput::<HashMap<String, String>>::with_names(vec![
112 "array1".into(),
113 "array2".into(),
114 "tuple1".into(),
115 "tuple2".into(),
116 "map1".into(),
117 ])
118 .deserialize(content.as_bytes())?;
119 assert_eq!(rows.first().unwrap().get("tuple1").unwrap(), "(1,'a')");
120 assert_eq!(info, None);
121
122 let (rows, info) = TsvOutput::<TestStringsRow>::new().deserialize(content.as_bytes())?;
123 assert_eq!(rows.first().unwrap(), &*TEST_STRINGS_ROW_1);
124 assert_eq!(info, None);
125
126 Ok(())
127 }
128}