dm_database_sqllog2db/
parser.rs1use crate::error::{Error, ParserError, Result};
4use log::{debug, info, warn};
5use std::path::{Path, PathBuf};
6
7#[derive(Debug)]
9pub(crate) struct SqllogParser {
10 inputs: Vec<String>,
12}
13
14impl SqllogParser {
15 pub(crate) fn new(inputs: Vec<String>) -> Self {
17 Self { inputs }
18 }
19
20 pub(crate) fn log_files(&self) -> Result<Vec<PathBuf>> {
23 let mut all = Vec::new();
24 for input in &self.inputs {
25 let mut files = Self::expand_single(input)?;
26 all.append(&mut files);
27 }
28 all.sort();
29 all.dedup();
30 Ok(all)
31 }
32
33 fn expand_single(input: &str) -> Result<Vec<PathBuf>> {
35 if input.contains('*') || input.contains('?') || input.contains('[') {
37 return Self::scan_glob(input);
38 }
39
40 let path = Path::new(input);
41
42 if !path.exists() {
43 return Err(Error::Parser(ParserError::PathNotFound {
44 path: PathBuf::from(input),
45 }));
46 }
47
48 let mut log_files = Vec::new();
49
50 if path.is_file() {
51 info!("Parsing single log file: {}", path.display());
53 log_files.push(path.to_path_buf());
54 } else if path.is_dir() {
55 info!("Scanning log directory: {}", path.display());
57
58 let entries = std::fs::read_dir(path).map_err(|e| {
59 Error::Parser(ParserError::ReadDirFailed {
60 path: PathBuf::from(input),
61 reason: e.to_string(),
62 })
63 })?;
64
65 for entry in entries {
66 let entry = entry.map_err(|e| {
67 Error::Parser(ParserError::ReadDirFailed {
68 path: PathBuf::from(input),
69 reason: e.to_string(),
70 })
71 })?;
72
73 let entry_path = entry.path();
74
75 if entry_path.is_file() && entry_path.extension().is_some_and(|ext| ext == "log") {
76 debug!("Found log file: {}", entry_path.display());
77 log_files.push(entry_path);
78 }
79 }
80
81 if log_files.is_empty() {
82 warn!("No .log files found in directory {}", path.display());
83 } else {
84 info!("Found {} log files", log_files.len());
85 }
86 } else {
87 return Err(Error::Parser(ParserError::InvalidPath {
88 path: PathBuf::from(input),
89 reason: "既不是文件也不是目录".to_string(),
90 line_number: None,
91 }));
92 }
93
94 log_files.sort();
95 Ok(log_files)
96 }
97
98 fn scan_glob(pattern: &str) -> Result<Vec<PathBuf>> {
100 #[cfg(windows)]
102 let pattern_normalized = pattern.replace('\\', "/");
103 #[cfg(not(windows))]
104 let pattern_normalized = pattern.to_owned();
105 let pattern = pattern_normalized.as_str();
106
107 let mut log_files: Vec<PathBuf> = glob::glob(pattern)
108 .map_err(|e| {
109 Error::Parser(ParserError::InvalidPath {
110 path: PathBuf::from(pattern),
111 reason: format!("invalid glob pattern: {e}. Check glob syntax (e.g. wildcards must not include unmatched brackets)"),
112 line_number: None,
113 })
114 })?
115 .filter_map(std::result::Result::ok)
116 .filter(|p| p.is_file() && p.extension().is_some_and(|ext| ext == "log"))
117 .collect();
118
119 log_files.sort();
120
121 if log_files.is_empty() {
122 warn!("No .log files matched glob pattern: {pattern}");
123 } else {
124 info!(
125 "Glob matched {} log files for pattern: {pattern}",
126 log_files.len()
127 );
128 }
129
130 Ok(log_files)
131 }
132}
133
134#[cfg(test)]
135mod tests {
136 use super::*;
137
138 #[test]
139 fn test_log_files_nonexistent_path() {
140 let p = SqllogParser::new(vec!["/this/does/not/exist/at/all".to_string()]);
141 assert!(p.log_files().is_err());
142 }
143
144 #[test]
145 fn test_log_files_empty_directory() {
146 let dir = tempfile::TempDir::new().unwrap();
147 let p = SqllogParser::new(vec![dir.path().to_string_lossy().into_owned()]);
148 let files = p.log_files().unwrap();
149 assert!(files.is_empty());
150 }
151
152 #[test]
153 fn test_log_files_with_log_file() {
154 let dir = tempfile::TempDir::new().unwrap();
155 std::fs::write(dir.path().join("test.log"), "").unwrap();
156 let p = SqllogParser::new(vec![dir.path().to_string_lossy().into_owned()]);
157 let files = p.log_files().unwrap();
158 assert_eq!(files.len(), 1);
159 }
160
161 #[test]
162 fn test_log_files_ignores_non_log_files() {
163 let dir = tempfile::TempDir::new().unwrap();
164 std::fs::write(dir.path().join("test.log"), "").unwrap();
165 std::fs::write(dir.path().join("test.txt"), "").unwrap();
166 std::fs::write(dir.path().join("test.csv"), "").unwrap();
167 let p = SqllogParser::new(vec![dir.path().to_string_lossy().into_owned()]);
168 let files = p.log_files().unwrap();
169 assert_eq!(files.len(), 1);
170 }
171
172 #[test]
173 fn test_log_files_single_file() {
174 let dir = tempfile::TempDir::new().unwrap();
175 let file_path = dir.path().join("single.log");
176 std::fs::write(&file_path, "").unwrap();
177 let p = SqllogParser::new(vec![file_path.to_string_lossy().into_owned()]);
178 let files = p.log_files().unwrap();
179 assert_eq!(files.len(), 1);
180 assert_eq!(files[0], file_path);
181 }
182
183 #[test]
184 fn test_log_files_sorted() {
185 let dir = tempfile::TempDir::new().unwrap();
186 std::fs::write(dir.path().join("c.log"), "").unwrap();
187 std::fs::write(dir.path().join("a.log"), "").unwrap();
188 std::fs::write(dir.path().join("b.log"), "").unwrap();
189 let p = SqllogParser::new(vec![dir.path().to_string_lossy().into_owned()]);
190 let files = p.log_files().unwrap();
191 assert_eq!(files.len(), 3);
192 let names: Vec<_> = files
193 .iter()
194 .map(|f| f.file_name().unwrap().to_string_lossy().into_owned())
195 .collect();
196 assert_eq!(names, vec!["a.log", "b.log", "c.log"]);
197 }
198
199 #[test]
200 fn test_log_files_glob_pattern() {
201 let dir = tempfile::TempDir::new().unwrap();
202 std::fs::write(dir.path().join("2025-01.log"), "").unwrap();
203 std::fs::write(dir.path().join("2025-02.log"), "").unwrap();
204 std::fs::write(dir.path().join("other.txt"), "").unwrap();
205 let pattern = format!("{}/*.log", dir.path().display());
206 let p = SqllogParser::new(vec![pattern]);
207 let files = p.log_files().unwrap();
208 assert_eq!(files.len(), 2);
209 }
210
211 #[test]
212 fn test_log_files_glob_no_match() {
213 let dir = tempfile::TempDir::new().unwrap();
214 let pattern = format!("{}/nomatch*.log", dir.path().display());
215 let p = SqllogParser::new(vec![pattern]);
216 let files = p.log_files().unwrap();
217 assert!(files.is_empty());
218 }
219
220 #[test]
221 fn test_log_files_invalid_glob_pattern() {
222 let p = SqllogParser::new(vec!["/tmp/[invalid".to_string()]);
224 let result = p.log_files();
225 assert!(result.is_err());
226 }
227
228 #[test]
229 fn test_log_files_multi_input_merge_and_dedup() {
230 let base = tempfile::TempDir::new().unwrap();
231 let dir_a = base.path().join("a");
232 let dir_b = base.path().join("b");
233 std::fs::create_dir_all(&dir_a).unwrap();
234 std::fs::create_dir_all(&dir_b).unwrap();
235 std::fs::write(dir_a.join("x.log"), "").unwrap();
236 std::fs::write(dir_b.join("y.log"), "").unwrap();
237
238 let input_a = dir_a.to_string_lossy().into_owned();
239 let input_b = dir_b.to_string_lossy().into_owned();
240 let p = SqllogParser::new(vec![input_a.clone(), input_b, input_a]);
242 let files = p.log_files().unwrap();
243 assert_eq!(
244 files.len(),
245 2,
246 "dedup should produce 2 files, got: {files:?}"
247 );
248 assert!(files[0] < files[1]);
250 }
251
252 #[test]
253 fn test_log_files_multi_input_mixes_file_dir_glob() {
254 let base = tempfile::TempDir::new().unwrap();
255 let dir1 = base.path().join("dir1");
256 let dir2 = base.path().join("dir2");
257 std::fs::create_dir_all(&dir1).unwrap();
258 std::fs::create_dir_all(&dir2).unwrap();
259
260 let single_log = base.path().join("single.log");
261 std::fs::write(&single_log, "").unwrap();
262 std::fs::write(dir1.join("a.log"), "").unwrap();
263 std::fs::write(dir2.join("c.log"), "").unwrap();
264
265 let glob_pattern = format!("{}/*.log", dir2.display());
266 let p = SqllogParser::new(vec![
267 single_log.to_string_lossy().into_owned(),
268 dir1.to_string_lossy().into_owned(),
269 glob_pattern,
270 ]);
271 let files = p.log_files().unwrap();
272 assert_eq!(files.len(), 3, "expected 3 files, got: {files:?}");
273 }
274}