1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
use crate::document::{Document, Paragraph};
use crate::error::Error;
use crate::reader;
use globwalk::DirEntry;
use regex::{Captures, Regex, RegexBuilder};
use std::path::Path;
use std::result::Result as StdResult;
#[derive(Clone, Debug)]
pub struct Parser<'a> {
pub glob_path: &'a [&'a str],
pub required_files: &'a [&'a str],
pub regex: Regex,
}
impl<'a> Parser<'a> {
pub fn new(
glob_path: &'a [&'a str],
required_files: &'a [&'a str],
regex: &'a str,
) -> Result<Self, Error> {
Ok(Self {
glob_path,
required_files,
regex: RegexBuilder::new(regex).multi_line(true).build()?,
})
}
pub fn parse(&self, base_path: &Path) -> Result<Vec<Document>, Error> {
let mut documents = Vec::new();
let mut glob_files = Vec::new();
for glob in self.glob_path {
glob_files.extend(
globwalk::glob(base_path.join(glob).to_str().ok_or(Error::Utf8Error)?)?
.filter_map(StdResult::ok)
.collect::<Vec<DirEntry>>(),
);
}
if glob_files.is_empty() {
return Err(Error::EmptyFileListError);
}
self.required_files
.iter()
.filter(|file_name| !file_name.is_empty())
.try_for_each(|file_name| {
glob_files
.iter()
.find(|file| file.file_name().to_str() == Some(file_name))
.map(drop)
.ok_or_else(|| Error::MissingFileError(file_name.to_string()))
})?;
for file in glob_files {
let input = {
#[cfg(feature = "gzip")]
if file.path().extension().and_then(|ext| ext.to_str()) == Some("gz") {
reader::read_gzip(file.path())
} else {
reader::read_to_string(file.path())
}
#[cfg(not(feature = "gzip"))]
reader::read_to_string(file.path())
}?;
let capture_group = self
.regex
.captures_iter(&input)
.collect::<Vec<Captures<'_>>>();
documents.push(Document::new(
Paragraph::from_captures(capture_group, &input)?,
file.path().to_path_buf(),
));
}
Ok(documents)
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
#[test]
fn test_document_parser() -> Result<(), Error> {
let base_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
let parser = Parser::new(&["Cargo.t*"], &[], r#"^(\[package\])\n"#)?;
let mut documents = parser.parse(base_path.as_path())?;
assert!(documents[0].paragraphs[0]
.contents
.contains(&format!("name = \"{}\"", env!("CARGO_PKG_NAME"))));
documents[0].paragraphs[0].contents = String::new();
assert_eq!(
Document {
paragraphs: vec![Paragraph {
title: String::from("[package]"),
contents: String::new(),
}],
path: base_path.join("Cargo.toml")
},
documents[0]
);
Ok(())
}
}