dm_database_parser_sqllog/parser/
builder.rs1use std::fs::File;
2use std::io::{Read, Seek, SeekFrom};
3use std::path::{Path, PathBuf};
4use std::str;
5
6use crate::error::ParseError;
7use crate::parser::LogParser;
8use crate::parser::encoding::FileEncodingHint;
9
10pub struct LogParserBuilder {
12 path: PathBuf,
13 encoding_hint: Option<FileEncodingHint>,
14}
15
16impl LogParserBuilder {
17 pub fn new<P: AsRef<Path>>(path: P) -> Self {
19 Self {
20 path: path.as_ref().to_path_buf(),
21 encoding_hint: None,
22 }
23 }
24
25 pub fn encoding_hint(mut self, hint: FileEncodingHint) -> Self {
27 self.encoding_hint = Some(hint);
28 self
29 }
30
31 pub fn build(self) -> Result<LogParser, ParseError> {
35 let encoding = match self.encoding_hint {
36 Some(hint) => hint,
37 None => detect_encoding(&self.path)?,
38 };
39 Ok(LogParser { path: self.path, encoding })
40 }
41}
42
43fn detect_encoding(path: &Path) -> Result<FileEncodingHint, ParseError> {
44 let mut file = File::open(path).map_err(|e| ParseError::IoError(e.to_string()))?;
45
46 let mut head_buf = Vec::with_capacity(64 * 1024);
47 file.by_ref()
48 .take(64 * 1024)
49 .read_to_end(&mut head_buf)
50 .map_err(|e| ParseError::IoError(e.to_string()))?;
51
52 let head_ok = str::from_utf8(&head_buf).is_ok();
53
54 let file_size = file
55 .seek(SeekFrom::End(0))
56 .map_err(|e| ParseError::IoError(e.to_string()))?;
57
58 let tail_ok = if file_size > head_buf.len() as u64 {
59 let tail_start = file_size.saturating_sub(4 * 1024).max(head_buf.len() as u64);
60 file.seek(SeekFrom::Start(tail_start))
61 .map_err(|e| ParseError::IoError(e.to_string()))?;
62 let mut tail_buf = Vec::with_capacity(4 * 1024);
63 file.read_to_end(&mut tail_buf)
64 .map_err(|e| ParseError::IoError(e.to_string()))?;
65 str::from_utf8(&tail_buf).is_ok()
66 } else {
67 true
68 };
69
70 Ok(if head_ok && tail_ok {
71 FileEncodingHint::Utf8
72 } else {
73 FileEncodingHint::Gb18030
74 })
75}