rust_strings/
strings.rs

1use std::cell::RefCell;
2use std::error::Error;
3use std::fs::File;
4use std::io::{BufReader, Read};
5use std::iter::Iterator;
6use std::path::{Path, PathBuf};
7use std::rc::Rc;
8use std::result::Result;
9
10use crate::encodings::Encoding;
11use crate::strings_extractor::{new_strings_extractor, StringsExtractor};
12use crate::strings_writer::{JsonWriter, StringWriter, VectorWriter};
13use crate::ErrorResult;
14
15const DEFAULT_MIN_LENGTH: usize = 3;
16const DEFAULT_ENCODINGS: [Encoding; 1] = [Encoding::ASCII];
17
18pub trait Config {
19    #[doc(hidden)]
20    fn consume<F>(&self, func: F) -> ErrorResult
21    where
22        F: FnMut(usize, u8) -> ErrorResult;
23    #[doc(hidden)]
24    fn get_min_length(&self) -> usize;
25    #[doc(hidden)]
26    fn get_encodings(&self) -> Vec<Encoding>;
27}
28
29macro_rules! impl_config {
30    () => {
31        fn get_min_length(&self) -> usize {
32            self.min_length
33        }
34        fn get_encodings(&self) -> Vec<Encoding> {
35            if self.encodings.is_empty() {
36                return DEFAULT_ENCODINGS.to_vec();
37            }
38            self.encodings.clone()
39        }
40    };
41}
42
43macro_rules! impl_default {
44    () => {
45        pub fn with_min_length(mut self, min_length: usize) -> Self {
46            self.min_length = min_length;
47            self
48        }
49
50        pub fn with_encoding(mut self, encoding: Encoding) -> Self {
51            self.encodings.push(encoding);
52            self
53        }
54
55        pub fn with_encodings(mut self, encodings: Vec<Encoding>) -> Self {
56            self.encodings = encodings;
57            self
58        }
59    };
60}
61
62pub struct FileConfig<'a> {
63    pub file_path: &'a Path,
64    pub min_length: usize,
65    pub encodings: Vec<Encoding>,
66    pub buffer_size: usize,
67}
68
69impl<'a> FileConfig<'a> {
70    const DEFAULT_BUFFER_SIZE: usize = 1024 * 1024;
71
72    pub fn new(file_path: &'a Path) -> Self {
73        FileConfig {
74            file_path,
75            min_length: DEFAULT_MIN_LENGTH,
76            encodings: vec![],
77            buffer_size: FileConfig::DEFAULT_BUFFER_SIZE,
78        }
79    }
80
81    pub fn with_buffer_size(mut self, buffer_size: usize) -> Self {
82        self.buffer_size = buffer_size;
83        self
84    }
85
86    impl_default!();
87}
88
89impl<'a> Config for FileConfig<'a> {
90    fn consume<F>(&self, mut func: F) -> ErrorResult
91    where
92        F: FnMut(usize, u8) -> ErrorResult,
93    {
94        let file_result = File::open(self.file_path);
95        if let Err(err) = file_result {
96            return Err(Box::new(err));
97        }
98        let file = file_result.unwrap();
99        let buf_reader = BufReader::with_capacity(self.buffer_size, file);
100        buf_reader
101            .bytes()
102            .enumerate()
103            .try_for_each(|(i, b)| func(i, b.unwrap()))?;
104        Ok(())
105    }
106
107    impl_config!();
108}
109
110pub struct StdinConfig {
111    pub min_length: usize,
112    pub encodings: Vec<Encoding>,
113    pub buffer_size: usize,
114}
115
116impl Default for StdinConfig {
117    fn default() -> Self {
118        Self::new()
119    }
120}
121
122impl StdinConfig {
123    const DEFAULT_BUFFER_SIZE: usize = 1024 * 1024;
124
125    pub fn new() -> Self {
126        StdinConfig {
127            min_length: DEFAULT_MIN_LENGTH,
128            encodings: vec![],
129            buffer_size: StdinConfig::DEFAULT_BUFFER_SIZE,
130        }
131    }
132
133    pub fn with_buffer_size(mut self, buffer_size: usize) -> Self {
134        self.buffer_size = buffer_size;
135        self
136    }
137
138    impl_default!();
139}
140
141impl Config for StdinConfig {
142    fn consume<F>(&self, mut func: F) -> ErrorResult
143    where
144        F: FnMut(usize, u8) -> ErrorResult,
145    {
146        let buf_reader = BufReader::with_capacity(self.buffer_size, std::io::stdin());
147        buf_reader
148            .bytes()
149            .enumerate()
150            .try_for_each(|(i, b)| func(i, b.unwrap()))?;
151        Ok(())
152    }
153
154    impl_config!();
155}
156
157pub struct BytesConfig {
158    pub bytes: Vec<u8>,
159    pub min_length: usize,
160    pub encodings: Vec<Encoding>,
161}
162
163impl BytesConfig {
164    pub fn new(bytes: Vec<u8>) -> Self {
165        BytesConfig {
166            bytes,
167            min_length: DEFAULT_MIN_LENGTH,
168            encodings: vec![],
169        }
170    }
171
172    impl_default!();
173}
174
175impl Config for BytesConfig {
176    fn consume<F>(&self, mut func: F) -> ErrorResult
177    where
178        F: FnMut(usize, u8) -> ErrorResult,
179    {
180        self.bytes
181            .iter()
182            .enumerate()
183            .try_for_each(|(i, b)| func(i, *b))?;
184        Ok(())
185    }
186
187    impl_config!();
188}
189
190fn _strings<T: Config, W: StringWriter>(
191    strings_config: &T,
192    strings_writer: Rc<RefCell<W>>,
193) -> ErrorResult {
194    let min_length = strings_config.get_min_length();
195    let mut strings_extractors: Vec<Box<dyn StringsExtractor>> = strings_config
196        .get_encodings()
197        .iter()
198        .map(|e| new_strings_extractor(strings_writer.clone(), *e, min_length))
199        .collect();
200    strings_config.consume(|offset: usize, c: u8| {
201        strings_extractors
202            .iter_mut()
203            .try_for_each(|strings_extractor| -> ErrorResult {
204                if strings_extractor.can_consume(c) {
205                    strings_extractor.consume(offset as u64, c)?;
206                } else {
207                    strings_extractor.stop_consume()?;
208                }
209                Ok(())
210            })?;
211        Ok(())
212    })?;
213    strings_extractors
214        .iter_mut()
215        .try_for_each(|strings_extractor| -> ErrorResult {
216            strings_extractor.stop_consume()?;
217            Ok(())
218        })?;
219    Ok(())
220}
221
222/// Extract strings from binary data.
223///
224/// Examples:
225/// ```
226/// use rust_strings::{FileConfig, BytesConfig, strings, Encoding};
227/// use std::path::Path;
228///
229/// let config = FileConfig::new(Path::new("/bin/ls")).with_min_length(5);
230/// let extracted_strings = strings(&config);
231///
232/// // Extract utf16le strings
233/// let config = FileConfig::new(Path::new("C:\\Windows\\notepad.exe"))
234///     .with_min_length(15)
235///     .with_encoding(Encoding::UTF16LE);
236/// let extracted_strings = strings(&config);
237///
238/// // Extract ascii and utf16le strings
239/// let config = FileConfig::new(Path::new("C:\\Windows\\notepad.exe"))
240///     .with_min_length(15)
241///     .with_encoding(Encoding::ASCII)
242///     .with_encoding(Encoding::UTF16LE);
243/// let extracted_strings = strings(&config);
244///
245/// let config = BytesConfig::new(b"test\x00".to_vec());
246/// let extracted_strings = strings(&config);
247/// assert_eq!(vec![(String::from("test"), 0)], extracted_strings.unwrap());
248/// ```
249pub fn strings<T: Config>(strings_config: &T) -> Result<Vec<(String, u64)>, Box<dyn Error>> {
250    let vector_writer = Rc::new(RefCell::new(VectorWriter::new()));
251    _strings(strings_config, vector_writer.clone())?;
252    let result = Ok(vector_writer.borrow_mut().get_strings());
253    result
254}
255
256/// Dump strings from binary data to json file.
257///
258/// Examples:
259/// ```
260/// use std::path::PathBuf;
261/// use rust_strings::{BytesConfig, dump_strings};
262///
263/// let config = BytesConfig::new(b"test\x00".to_vec());
264/// dump_strings(&config, PathBuf::from("strings.json"));
265///
266pub fn dump_strings<T: Config>(strings_config: &T, output: PathBuf) -> ErrorResult {
267    let output_file = File::create(output)?;
268    let vector_writer = Rc::new(RefCell::new(JsonWriter::new(output_file)));
269    _strings(strings_config, vector_writer.clone())?;
270    vector_writer.borrow_mut().finish()?;
271    Ok(())
272}