Skip to main content

dm_database_sqllog2db/
parser.rs

1/// SQL 日志解析模块
2/// 使用 dm-database-parser-sqllog 库解析达梦数据库的 SQL 日志文件
3use crate::error::{Error, ParserError, Result};
4use log::{debug, info, warn};
5use std::path::{Path, PathBuf};
6
7/// SQL 日志解析器
8#[derive(Debug)]
9pub(crate) struct SqllogParser {
10    /// 日志输入列表(每条可为文件路径、目录路径或 glob 模式)
11    inputs: Vec<String>,
12}
13
14impl SqllogParser {
15    /// 创建新的 SQL 日志解析器,接受多个输入路径/模式
16    pub(crate) fn new(inputs: Vec<String>) -> Self {
17        Self { inputs }
18    }
19
20    /// 返回所有日志文件的路径列表(已合并去重排序)
21    /// 空结果返回 Ok(空 Vec);NoFilesFound 由 `handle_run` 层触发
22    pub(crate) fn log_files(&self) -> Result<Vec<PathBuf>> {
23        let mut all = Vec::new();
24        for input in &self.inputs {
25            let mut files = Self::expand_single(input)?;
26            all.append(&mut files);
27        }
28        all.sort();
29        all.dedup();
30        Ok(all)
31    }
32
33    /// 展开单条 input(文件/目录/glob 模式)为日志文件列表
34    fn expand_single(input: &str) -> Result<Vec<PathBuf>> {
35        // Glob 模式检测
36        if input.contains('*') || input.contains('?') || input.contains('[') {
37            return Self::scan_glob(input);
38        }
39
40        let path = Path::new(input);
41
42        if !path.exists() {
43            return Err(Error::Parser(ParserError::PathNotFound {
44                path: PathBuf::from(input),
45            }));
46        }
47
48        let mut log_files = Vec::new();
49
50        if path.is_file() {
51            // 单个文件
52            info!("Parsing single log file: {}", path.display());
53            log_files.push(path.to_path_buf());
54        } else if path.is_dir() {
55            // 目录:扫描所有 .log 文件
56            info!("Scanning log directory: {}", path.display());
57
58            let entries = std::fs::read_dir(path).map_err(|e| {
59                Error::Parser(ParserError::ReadDirFailed {
60                    path: PathBuf::from(input),
61                    reason: e.to_string(),
62                })
63            })?;
64
65            for entry in entries {
66                let entry = entry.map_err(|e| {
67                    Error::Parser(ParserError::ReadDirFailed {
68                        path: PathBuf::from(input),
69                        reason: e.to_string(),
70                    })
71                })?;
72
73                let entry_path = entry.path();
74
75                if entry_path.is_file() && entry_path.extension().is_some_and(|ext| ext == "log") {
76                    debug!("Found log file: {}", entry_path.display());
77                    log_files.push(entry_path);
78                }
79            }
80
81            if log_files.is_empty() {
82                warn!("No .log files found in directory {}", path.display());
83            } else {
84                info!("Found {} log files", log_files.len());
85            }
86        } else {
87            return Err(Error::Parser(ParserError::InvalidPath {
88                path: PathBuf::from(input),
89                reason: "既不是文件也不是目录".to_string(),
90                line_number: None,
91            }));
92        }
93
94        log_files.sort();
95        Ok(log_files)
96    }
97
98    /// 使用 glob 模式扫描日志文件
99    fn scan_glob(pattern: &str) -> Result<Vec<PathBuf>> {
100        // Windows 路径用反斜杠,glob crate 只接受正斜杠,统一替换
101        #[cfg(windows)]
102        let pattern_normalized = pattern.replace('\\', "/");
103        #[cfg(not(windows))]
104        let pattern_normalized = pattern.to_owned();
105        let pattern = pattern_normalized.as_str();
106
107        let mut log_files: Vec<PathBuf> = glob::glob(pattern)
108            .map_err(|e| {
109                Error::Parser(ParserError::InvalidPath {
110                    path: PathBuf::from(pattern),
111                    reason: format!("invalid glob pattern: {e}. Check glob syntax (e.g. wildcards must not include unmatched brackets)"),
112                    line_number: None,
113                })
114            })?
115            .filter_map(std::result::Result::ok)
116            .filter(|p| p.is_file() && p.extension().is_some_and(|ext| ext == "log"))
117            .collect();
118
119        log_files.sort();
120
121        if log_files.is_empty() {
122            warn!("No .log files matched glob pattern: {pattern}");
123        } else {
124            info!(
125                "Glob matched {} log files for pattern: {pattern}",
126                log_files.len()
127            );
128        }
129
130        Ok(log_files)
131    }
132}
133
134#[cfg(test)]
135mod tests {
136    use super::*;
137
138    #[test]
139    fn test_log_files_nonexistent_path() {
140        let p = SqllogParser::new(vec!["/this/does/not/exist/at/all".to_string()]);
141        assert!(p.log_files().is_err());
142    }
143
144    #[test]
145    fn test_log_files_empty_directory() {
146        let dir = tempfile::TempDir::new().unwrap();
147        let p = SqllogParser::new(vec![dir.path().to_string_lossy().into_owned()]);
148        let files = p.log_files().unwrap();
149        assert!(files.is_empty());
150    }
151
152    #[test]
153    fn test_log_files_with_log_file() {
154        let dir = tempfile::TempDir::new().unwrap();
155        std::fs::write(dir.path().join("test.log"), "").unwrap();
156        let p = SqllogParser::new(vec![dir.path().to_string_lossy().into_owned()]);
157        let files = p.log_files().unwrap();
158        assert_eq!(files.len(), 1);
159    }
160
161    #[test]
162    fn test_log_files_ignores_non_log_files() {
163        let dir = tempfile::TempDir::new().unwrap();
164        std::fs::write(dir.path().join("test.log"), "").unwrap();
165        std::fs::write(dir.path().join("test.txt"), "").unwrap();
166        std::fs::write(dir.path().join("test.csv"), "").unwrap();
167        let p = SqllogParser::new(vec![dir.path().to_string_lossy().into_owned()]);
168        let files = p.log_files().unwrap();
169        assert_eq!(files.len(), 1);
170    }
171
172    #[test]
173    fn test_log_files_single_file() {
174        let dir = tempfile::TempDir::new().unwrap();
175        let file_path = dir.path().join("single.log");
176        std::fs::write(&file_path, "").unwrap();
177        let p = SqllogParser::new(vec![file_path.to_string_lossy().into_owned()]);
178        let files = p.log_files().unwrap();
179        assert_eq!(files.len(), 1);
180        assert_eq!(files[0], file_path);
181    }
182
183    #[test]
184    fn test_log_files_sorted() {
185        let dir = tempfile::TempDir::new().unwrap();
186        std::fs::write(dir.path().join("c.log"), "").unwrap();
187        std::fs::write(dir.path().join("a.log"), "").unwrap();
188        std::fs::write(dir.path().join("b.log"), "").unwrap();
189        let p = SqllogParser::new(vec![dir.path().to_string_lossy().into_owned()]);
190        let files = p.log_files().unwrap();
191        assert_eq!(files.len(), 3);
192        let names: Vec<_> = files
193            .iter()
194            .map(|f| f.file_name().unwrap().to_string_lossy().into_owned())
195            .collect();
196        assert_eq!(names, vec!["a.log", "b.log", "c.log"]);
197    }
198
199    #[test]
200    fn test_log_files_glob_pattern() {
201        let dir = tempfile::TempDir::new().unwrap();
202        std::fs::write(dir.path().join("2025-01.log"), "").unwrap();
203        std::fs::write(dir.path().join("2025-02.log"), "").unwrap();
204        std::fs::write(dir.path().join("other.txt"), "").unwrap();
205        let pattern = format!("{}/*.log", dir.path().display());
206        let p = SqllogParser::new(vec![pattern]);
207        let files = p.log_files().unwrap();
208        assert_eq!(files.len(), 2);
209    }
210
211    #[test]
212    fn test_log_files_glob_no_match() {
213        let dir = tempfile::TempDir::new().unwrap();
214        let pattern = format!("{}/nomatch*.log", dir.path().display());
215        let p = SqllogParser::new(vec![pattern]);
216        let files = p.log_files().unwrap();
217        assert!(files.is_empty());
218    }
219
220    #[test]
221    fn test_log_files_invalid_glob_pattern() {
222        // '[' without closing ']' is an invalid glob pattern
223        let p = SqllogParser::new(vec!["/tmp/[invalid".to_string()]);
224        let result = p.log_files();
225        assert!(result.is_err());
226    }
227
228    #[test]
229    fn test_log_files_multi_input_merge_and_dedup() {
230        let base = tempfile::TempDir::new().unwrap();
231        let dir_a = base.path().join("a");
232        let dir_b = base.path().join("b");
233        std::fs::create_dir_all(&dir_a).unwrap();
234        std::fs::create_dir_all(&dir_b).unwrap();
235        std::fs::write(dir_a.join("x.log"), "").unwrap();
236        std::fs::write(dir_b.join("y.log"), "").unwrap();
237
238        let input_a = dir_a.to_string_lossy().into_owned();
239        let input_b = dir_b.to_string_lossy().into_owned();
240        // 重复 dir_a:dedup 应过滤掉
241        let p = SqllogParser::new(vec![input_a.clone(), input_b, input_a]);
242        let files = p.log_files().unwrap();
243        assert_eq!(
244            files.len(),
245            2,
246            "dedup should produce 2 files, got: {files:?}"
247        );
248        // 验证字典序排序
249        assert!(files[0] < files[1]);
250    }
251
252    #[test]
253    fn test_log_files_multi_input_mixes_file_dir_glob() {
254        let base = tempfile::TempDir::new().unwrap();
255        let dir1 = base.path().join("dir1");
256        let dir2 = base.path().join("dir2");
257        std::fs::create_dir_all(&dir1).unwrap();
258        std::fs::create_dir_all(&dir2).unwrap();
259
260        let single_log = base.path().join("single.log");
261        std::fs::write(&single_log, "").unwrap();
262        std::fs::write(dir1.join("a.log"), "").unwrap();
263        std::fs::write(dir2.join("c.log"), "").unwrap();
264
265        let glob_pattern = format!("{}/*.log", dir2.display());
266        let p = SqllogParser::new(vec![
267            single_log.to_string_lossy().into_owned(),
268            dir1.to_string_lossy().into_owned(),
269            glob_pattern,
270        ]);
271        let files = p.log_files().unwrap();
272        assert_eq!(files.len(), 3, "expected 3 files, got: {files:?}");
273    }
274}