1use std::cell::RefCell;
2use std::error::Error;
3use std::fs::File;
4use std::io::{BufReader, Read};
5use std::iter::Iterator;
6use std::path::{Path, PathBuf};
7use std::rc::Rc;
8use std::result::Result;
9
10use crate::encodings::Encoding;
11use crate::strings_extractor::{new_strings_extractor, StringsExtractor};
12use crate::strings_writer::{JsonWriter, StringWriter, VectorWriter};
13use crate::ErrorResult;
14
15const DEFAULT_MIN_LENGTH: usize = 3;
16const DEFAULT_ENCODINGS: [Encoding; 1] = [Encoding::ASCII];
17
18pub trait Config {
19 #[doc(hidden)]
20 fn consume<F>(&self, func: F) -> ErrorResult
21 where
22 F: FnMut(usize, u8) -> ErrorResult;
23 #[doc(hidden)]
24 fn get_min_length(&self) -> usize;
25 #[doc(hidden)]
26 fn get_encodings(&self) -> Vec<Encoding>;
27}
28
29macro_rules! impl_config {
30 () => {
31 fn get_min_length(&self) -> usize {
32 self.min_length
33 }
34 fn get_encodings(&self) -> Vec<Encoding> {
35 if self.encodings.is_empty() {
36 return DEFAULT_ENCODINGS.to_vec();
37 }
38 self.encodings.clone()
39 }
40 };
41}
42
43macro_rules! impl_default {
44 () => {
45 pub fn with_min_length(mut self, min_length: usize) -> Self {
46 self.min_length = min_length;
47 self
48 }
49
50 pub fn with_encoding(mut self, encoding: Encoding) -> Self {
51 self.encodings.push(encoding);
52 self
53 }
54
55 pub fn with_encodings(mut self, encodings: Vec<Encoding>) -> Self {
56 self.encodings = encodings;
57 self
58 }
59 };
60}
61
62pub struct FileConfig<'a> {
63 pub file_path: &'a Path,
64 pub min_length: usize,
65 pub encodings: Vec<Encoding>,
66 pub buffer_size: usize,
67}
68
69impl<'a> FileConfig<'a> {
70 const DEFAULT_BUFFER_SIZE: usize = 1024 * 1024;
71
72 pub fn new(file_path: &'a Path) -> Self {
73 FileConfig {
74 file_path,
75 min_length: DEFAULT_MIN_LENGTH,
76 encodings: vec![],
77 buffer_size: FileConfig::DEFAULT_BUFFER_SIZE,
78 }
79 }
80
81 pub fn with_buffer_size(mut self, buffer_size: usize) -> Self {
82 self.buffer_size = buffer_size;
83 self
84 }
85
86 impl_default!();
87}
88
89impl<'a> Config for FileConfig<'a> {
90 fn consume<F>(&self, mut func: F) -> ErrorResult
91 where
92 F: FnMut(usize, u8) -> ErrorResult,
93 {
94 let file_result = File::open(self.file_path);
95 if let Err(err) = file_result {
96 return Err(Box::new(err));
97 }
98 let file = file_result.unwrap();
99 let buf_reader = BufReader::with_capacity(self.buffer_size, file);
100 buf_reader
101 .bytes()
102 .enumerate()
103 .try_for_each(|(i, b)| func(i, b.unwrap()))?;
104 Ok(())
105 }
106
107 impl_config!();
108}
109
110pub struct StdinConfig {
111 pub min_length: usize,
112 pub encodings: Vec<Encoding>,
113 pub buffer_size: usize,
114}
115
116impl Default for StdinConfig {
117 fn default() -> Self {
118 Self::new()
119 }
120}
121
122impl StdinConfig {
123 const DEFAULT_BUFFER_SIZE: usize = 1024 * 1024;
124
125 pub fn new() -> Self {
126 StdinConfig {
127 min_length: DEFAULT_MIN_LENGTH,
128 encodings: vec![],
129 buffer_size: StdinConfig::DEFAULT_BUFFER_SIZE,
130 }
131 }
132
133 pub fn with_buffer_size(mut self, buffer_size: usize) -> Self {
134 self.buffer_size = buffer_size;
135 self
136 }
137
138 impl_default!();
139}
140
141impl Config for StdinConfig {
142 fn consume<F>(&self, mut func: F) -> ErrorResult
143 where
144 F: FnMut(usize, u8) -> ErrorResult,
145 {
146 let buf_reader = BufReader::with_capacity(self.buffer_size, std::io::stdin());
147 buf_reader
148 .bytes()
149 .enumerate()
150 .try_for_each(|(i, b)| func(i, b.unwrap()))?;
151 Ok(())
152 }
153
154 impl_config!();
155}
156
157pub struct BytesConfig {
158 pub bytes: Vec<u8>,
159 pub min_length: usize,
160 pub encodings: Vec<Encoding>,
161}
162
163impl BytesConfig {
164 pub fn new(bytes: Vec<u8>) -> Self {
165 BytesConfig {
166 bytes,
167 min_length: DEFAULT_MIN_LENGTH,
168 encodings: vec![],
169 }
170 }
171
172 impl_default!();
173}
174
175impl Config for BytesConfig {
176 fn consume<F>(&self, mut func: F) -> ErrorResult
177 where
178 F: FnMut(usize, u8) -> ErrorResult,
179 {
180 self.bytes
181 .iter()
182 .enumerate()
183 .try_for_each(|(i, b)| func(i, *b))?;
184 Ok(())
185 }
186
187 impl_config!();
188}
189
190fn _strings<T: Config, W: StringWriter>(
191 strings_config: &T,
192 strings_writer: Rc<RefCell<W>>,
193) -> ErrorResult {
194 let min_length = strings_config.get_min_length();
195 let mut strings_extractors: Vec<Box<dyn StringsExtractor>> = strings_config
196 .get_encodings()
197 .iter()
198 .map(|e| new_strings_extractor(strings_writer.clone(), *e, min_length))
199 .collect();
200 strings_config.consume(|offset: usize, c: u8| {
201 strings_extractors
202 .iter_mut()
203 .try_for_each(|strings_extractor| -> ErrorResult {
204 if strings_extractor.can_consume(c) {
205 strings_extractor.consume(offset as u64, c)?;
206 } else {
207 strings_extractor.stop_consume()?;
208 }
209 Ok(())
210 })?;
211 Ok(())
212 })?;
213 strings_extractors
214 .iter_mut()
215 .try_for_each(|strings_extractor| -> ErrorResult {
216 strings_extractor.stop_consume()?;
217 Ok(())
218 })?;
219 Ok(())
220}
221
222pub fn strings<T: Config>(strings_config: &T) -> Result<Vec<(String, u64)>, Box<dyn Error>> {
250 let vector_writer = Rc::new(RefCell::new(VectorWriter::new()));
251 _strings(strings_config, vector_writer.clone())?;
252 let result = Ok(vector_writer.borrow_mut().get_strings());
253 result
254}
255
256pub fn dump_strings<T: Config>(strings_config: &T, output: PathBuf) -> ErrorResult {
267 let output_file = File::create(output)?;
268 let vector_writer = Rc::new(RefCell::new(JsonWriter::new(output_file)));
269 _strings(strings_config, vector_writer.clone())?;
270 vector_writer.borrow_mut().finish()?;
271 Ok(())
272}