hawkeye_fmt/header/
parser.rs1use std::fmt::Display;
16use std::fmt::Formatter;
17use std::fs::File;
18use std::io::BufRead;
19use std::io::BufReader;
20use std::path::Path;
21
22use crate::header::model::HeaderDef;
23
24#[derive(Debug)]
25pub struct HeaderParser {
26 pub begin_pos: usize,
27 pub end_pos: Option<usize>,
29 pub file_content: FileContent,
30}
31
32pub fn parse_header(
33 mut file_content: FileContent,
34 header_def: &HeaderDef,
35 keywords: &[String],
36) -> HeaderParser {
37 let mut line = file_content.next_line();
38
39 let begin_pos = find_first_position(&mut line, &mut file_content, header_def);
41
42 let existing_header = existing_header(&mut line, &mut file_content, header_def, keywords);
44
45 let end_pos = if existing_header {
47 let mut end = file_content.pos;
49 line = file_content.next_line();
50 if begin_pos == 0 {
51 while line.as_ref().map(|l| l.trim().is_empty()).unwrap_or(false) {
52 end = file_content.pos;
53 line = file_content.next_line();
54 }
55 }
56 if header_def.end_line.ends_with('\n')
57 && line.as_ref().map(|l| l.trim().is_empty()).unwrap_or(false)
58 {
59 end = file_content.pos;
60 }
61 Some(end)
62 } else {
63 None
64 };
65
66 HeaderParser {
67 begin_pos,
68 end_pos,
69 file_content,
70 }
71}
72
73fn find_first_position(
74 line: &mut Option<String>,
75 file_content: &mut FileContent,
76 header_def: &HeaderDef,
77) -> usize {
78 const UTF8_BOM: [u8; 3] = [0xEF, 0xBB, 0xBF];
79
80 let mut begin_pos = 0;
81
82 if let Some(l) = line.as_ref() {
83 if l.as_bytes().starts_with(&UTF8_BOM) {
85 log::debug!("Detected UTF-8 BOM for {file_content}; skip");
86 begin_pos = 3;
87 file_content.reset_to(3);
88 }
89 }
90
91 if header_def.skip_line_pattern.is_some() {
92 while line
94 .as_ref()
95 .map(|l| !header_def.is_skip_line(l))
96 .unwrap_or(false)
97 {
98 begin_pos = file_content.pos;
99 *line = file_content.next_line();
100 }
101
102 while line
105 .as_ref()
106 .map(|l| header_def.is_skip_line(l))
107 .unwrap_or(false)
108 {
109 begin_pos = file_content.pos;
110 *line = file_content.next_line();
111 }
112
113 if line.is_none() {
116 begin_pos = 0;
117 file_content.reset();
118 *line = file_content.next_line();
119
120 if let Some(l) = line.as_ref() {
122 if l.as_bytes().starts_with(&UTF8_BOM) {
123 begin_pos = 3;
124 file_content.reset_to(3);
125 }
126 }
127 }
128 }
129
130 begin_pos
131}
132
133fn existing_header(
134 line: &mut Option<String>,
135 file_content: &mut FileContent,
136 header_def: &HeaderDef,
137 keywords: &[String],
138) -> bool {
139 while line.as_ref().map(|l| l.trim().is_empty()).unwrap_or(false) {
141 *line = file_content.next_line();
142 }
143
144 let l = match line.as_ref() {
146 Some(l) if header_def.is_first_header_line(l) => l,
147 _ => return false,
148 };
149
150 let mut got_header = false;
151 let mut in_place_header = String::new();
152 in_place_header.push_str(&l.to_lowercase());
153
154 *line = file_content.next_line();
155
156 if header_def.allow_blank_lines {
158 while line.as_ref().map(|l| l.trim().is_empty()).unwrap_or(false) {
159 *line = file_content.next_line();
160 }
161 }
162
163 if let Some(l) = line.as_ref() {
166 let before = {
167 let mut before = header_def.before_each_line.trim_end();
168 if before.is_empty() && !header_def.multiple_lines {
169 before = header_def.before_each_line.as_str();
170 }
171 before
172 };
173
174 let found_end = {
175 let mut found_end = false;
176 if (header_def.multiple_lines && header_def.is_last_header_line(l))
177 || l.trim().is_empty()
178 {
179 in_place_header.push_str(&l.to_lowercase());
180 found_end = true;
181 } else {
182 loop {
183 match line.as_ref() {
184 Some(l) if l.starts_with(before) => {
185 in_place_header.push_str(&l.to_lowercase());
186 if header_def.multiple_lines && header_def.is_last_header_line(l) {
187 found_end = true;
188 break;
189 }
190 }
191 _ => break,
192 }
193 *line = file_content.next_line();
194 }
195
196 if line.as_ref().map(|l| l.trim().is_empty()).unwrap_or(true) {
197 found_end = true;
198 }
199 }
200 found_end
201 };
202
203 if header_def.multiple_lines && header_def.allow_blank_lines && !found_end {
205 loop {
206 if !line.as_ref().map(|l| l.trim().is_empty()).unwrap_or(false) {
207 break;
208 }
209 *line = file_content.next_line();
210 }
211 file_content.rewind();
212 } else if !header_def.multiple_lines && !found_end {
213 file_content.rewind();
214 }
215
216 if !header_def.multiple_lines {
217 let pos = file_content.pos;
220 while line
222 .as_ref()
223 .map(|l| {
224 !header_def.is_last_header_line(l)
225 && (header_def.allow_blank_lines || !l.trim().is_empty())
226 && l.starts_with(before)
227 })
228 .unwrap_or(false)
229 {
230 *line = file_content.next_line();
231 }
232 if line.is_none() {
233 file_content.reset_to(pos);
234 }
235 } else if line.is_some() {
236 let pos = file_content.pos;
239 *line = file_content.next_line();
240 if line
241 .as_ref()
242 .map(|l| !header_def.is_last_header_line(l))
243 .unwrap_or(true)
244 {
245 file_content.reset_to(pos);
246 }
247 }
248
249 got_header = true;
250 for keyword in keywords {
251 if !in_place_header.contains(keyword) {
252 got_header = false;
253 break;
254 }
255 }
256 }
257 got_header
260}
261
262#[derive(Debug)]
263pub struct FileContent {
264 pos: usize,
265 old_pos: usize,
266 content: String,
267 filepath: String,
268}
269
270impl Display for FileContent {
271 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
272 f.write_str(&self.filepath)
273 }
274}
275
276impl FileContent {
277 pub fn new(file: &Path) -> std::io::Result<Self> {
278 Ok(Self {
279 pos: 0,
280 old_pos: 0,
281 content: {
282 let mut content = String::new();
283 let mut reader = File::open(file).map(BufReader::new)?;
284 let mut buf = String::new();
285 let mut n = reader.read_line(&mut buf)?;
286 while n > 0 {
287 if buf.ends_with('\n') {
288 buf.pop();
289 if buf.ends_with('\r') {
290 buf.pop();
291 }
292 content.push_str(&buf);
293 content.push('\n');
294 } else {
295 content.push_str(&buf);
296 }
297 buf.clear();
298 n = reader.read_line(&mut buf)?;
299 }
300 content
301 },
302 filepath: file.to_string_lossy().to_string(),
303 })
304 }
305
306 pub fn reset_to(&mut self, pos: usize) {
307 self.old_pos = pos;
308 self.pos = pos;
309 }
310
311 pub fn reset(&mut self) {
312 self.reset_to(0);
313 }
314
315 pub fn rewind(&mut self) {
316 self.pos = self.old_pos;
317 }
318
319 pub fn end_reached(&self) -> bool {
320 self.pos >= self.content.len()
321 }
322
323 pub fn next_line(&mut self) -> Option<String> {
324 if self.end_reached() {
325 return None;
326 }
327
328 let lf = self.content[self.pos..].find('\n').map(|i| i + self.pos);
329 let eol = lf.unwrap_or(self.content.len());
330 let result = self.content[self.pos..eol].to_string();
331
332 self.old_pos = self.pos;
333 self.pos = if let Some(lf) = lf {
334 lf + 1
335 } else {
336 self.content.len()
337 };
338
339 Some(result)
340 }
341
342 pub fn content(&self) -> String {
343 self.content.clone()
344 }
345
346 pub fn insert(&mut self, index: usize, s: &str) {
347 self.content.insert_str(index, s);
348 }
349
350 pub fn delete(&mut self, start: usize, end: usize) {
351 self.content.drain(start..end);
352 }
353}