Skip to main content

dm_database_parser_sqllog/parser/
builder.rs

1use std::fs::File;
2use std::io::{Read, Seek, SeekFrom};
3use std::path::{Path, PathBuf};
4use std::str;
5
6use crate::error::ParseError;
7use crate::parser::LogParser;
8use crate::parser::encoding::FileEncodingHint;
9
10/// 配置并构建 [`LogParser`] 的构建器模式 API。
11pub struct LogParserBuilder {
12    path: PathBuf,
13    encoding_hint: Option<FileEncodingHint>,
14}
15
16impl LogParserBuilder {
17    /// 创建一个新的 `LogParserBuilder`。
18    pub fn new<P: AsRef<Path>>(path: P) -> Self {
19        Self {
20            path: path.as_ref().to_path_buf(),
21            encoding_hint: None,
22        }
23    }
24
25    /// 设置文件编码提示。
26    pub fn encoding_hint(mut self, hint: FileEncodingHint) -> Self {
27        self.encoding_hint = Some(hint);
28        self
29    }
30
31    /// 构建并返回 [`LogParser`] 实例。
32    ///
33    /// 仅读取文件头尾各最多 64 KB / 4 KB 用于编码探测,不加载整个文件。
34    pub fn build(self) -> Result<LogParser, ParseError> {
35        let encoding = match self.encoding_hint {
36            Some(hint) => hint,
37            None => detect_encoding(&self.path)?,
38        };
39        Ok(LogParser { path: self.path, encoding })
40    }
41}
42
43fn detect_encoding(path: &Path) -> Result<FileEncodingHint, ParseError> {
44    let mut file = File::open(path).map_err(|e| ParseError::IoError(e.to_string()))?;
45
46    let mut head_buf = Vec::with_capacity(64 * 1024);
47    file.by_ref()
48        .take(64 * 1024)
49        .read_to_end(&mut head_buf)
50        .map_err(|e| ParseError::IoError(e.to_string()))?;
51
52    let head_ok = str::from_utf8(&head_buf).is_ok();
53
54    let file_size = file
55        .seek(SeekFrom::End(0))
56        .map_err(|e| ParseError::IoError(e.to_string()))?;
57
58    let tail_ok = if file_size > head_buf.len() as u64 {
59        let tail_start = file_size.saturating_sub(4 * 1024).max(head_buf.len() as u64);
60        file.seek(SeekFrom::Start(tail_start))
61            .map_err(|e| ParseError::IoError(e.to_string()))?;
62        let mut tail_buf = Vec::with_capacity(4 * 1024);
63        file.read_to_end(&mut tail_buf)
64            .map_err(|e| ParseError::IoError(e.to_string()))?;
65        str::from_utf8(&tail_buf).is_ok()
66    } else {
67        true
68    };
69
70    Ok(if head_ok && tail_ok {
71        FileEncodingHint::Utf8
72    } else {
73        FileEncodingHint::Gb18030
74    })
75}