Skip to main content

hawkeye_fmt/header/
parser.rs

1// Copyright 2024 tison <wander4096@gmail.com>
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::fmt::Display;
16use std::fmt::Formatter;
17use std::fs::File;
18use std::io::BufRead;
19use std::io::BufReader;
20use std::path::Path;
21
22use crate::header::model::HeaderDef;
23
24#[derive(Debug)]
25pub struct HeaderParser {
26    pub begin_pos: usize,
27    /// Some if header exists; None if header does not exist.
28    pub end_pos: Option<usize>,
29    pub file_content: FileContent,
30}
31
32pub fn parse_header(
33    mut file_content: FileContent,
34    header_def: &HeaderDef,
35    keywords: &[String],
36) -> HeaderParser {
37    let mut line = file_content.next_line();
38
39    // 1. find begin position
40    let begin_pos = find_first_position(&mut line, &mut file_content, header_def);
41
42    // 2. has header
43    let existing_header = existing_header(&mut line, &mut file_content, header_def, keywords);
44
45    // 3. find end position
46    let end_pos = if existing_header {
47        // we check if there is a header, if the next line is the blank line of the header
48        let mut end = file_content.pos;
49        line = file_content.next_line();
50        if begin_pos == 0 {
51            while line.as_ref().map(|l| l.trim().is_empty()).unwrap_or(false) {
52                end = file_content.pos;
53                line = file_content.next_line();
54            }
55        }
56        if header_def.end_line.ends_with('\n')
57            && line.as_ref().map(|l| l.trim().is_empty()).unwrap_or(false)
58        {
59            end = file_content.pos;
60        }
61        Some(end)
62    } else {
63        None
64    };
65
66    HeaderParser {
67        begin_pos,
68        end_pos,
69        file_content,
70    }
71}
72
73fn find_first_position(
74    line: &mut Option<String>,
75    file_content: &mut FileContent,
76    header_def: &HeaderDef,
77) -> usize {
78    const UTF8_BOM: [u8; 3] = [0xEF, 0xBB, 0xBF];
79
80    let mut begin_pos = 0;
81
82    if let Some(l) = line.as_ref() {
83        // skip UTF-8 BOM if exists
84        if l.as_bytes().starts_with(&UTF8_BOM) {
85            log::debug!("Detected UTF-8 BOM for {file_content}; skip");
86            begin_pos = 3;
87            file_content.reset_to(3);
88        }
89    }
90
91    if header_def.skip_line_pattern.is_some() {
92        // the format expect to find lines to be skipped
93        while line
94            .as_ref()
95            .map(|l| !header_def.is_skip_line(l))
96            .unwrap_or(false)
97        {
98            begin_pos = file_content.pos;
99            *line = file_content.next_line();
100        }
101
102        // at least we have found the line to skip, or we are the end of the file
103        // this time we are going to skip next lines if they match the skip pattern
104        while line
105            .as_ref()
106            .map(|l| header_def.is_skip_line(l))
107            .unwrap_or(false)
108        {
109            begin_pos = file_content.pos;
110            *line = file_content.next_line();
111        }
112
113        // After skipping everything we are at the end of the file
114        // Header has to be at the file beginning
115        if line.is_none() {
116            begin_pos = 0;
117            file_content.reset();
118            *line = file_content.next_line();
119
120            // recheck for UTF-8 BOM
121            if let Some(l) = line.as_ref() {
122                if l.as_bytes().starts_with(&UTF8_BOM) {
123                    begin_pos = 3;
124                    file_content.reset_to(3);
125                }
126            }
127        }
128    }
129
130    begin_pos
131}
132
133fn existing_header(
134    line: &mut Option<String>,
135    file_content: &mut FileContent,
136    header_def: &HeaderDef,
137    keywords: &[String],
138) -> bool {
139    // skip blank lines
140    while line.as_ref().map(|l| l.trim().is_empty()).unwrap_or(false) {
141        *line = file_content.next_line();
142    }
143
144    // check if there is already a header
145    let l = match line.as_ref() {
146        Some(l) if header_def.is_first_header_line(l) => l,
147        _ => return false,
148    };
149
150    let mut got_header = false;
151    let mut in_place_header = String::new();
152    in_place_header.push_str(&l.to_lowercase());
153
154    *line = file_content.next_line();
155
156    // skip blank lines before header text
157    if header_def.allow_blank_lines {
158        while line.as_ref().map(|l| l.trim().is_empty()).unwrap_or(false) {
159            *line = file_content.next_line();
160        }
161    }
162
163    // first header detected line & potential blank lines have been detected
164    // following lines should be header lines
165    if let Some(l) = line.as_ref() {
166        let before = {
167            let mut before = header_def.before_each_line.trim_end();
168            if before.is_empty() && !header_def.multiple_lines {
169                before = header_def.before_each_line.as_str();
170            }
171            before
172        };
173
174        let found_end = {
175            let mut found_end = false;
176            if (header_def.multiple_lines && header_def.is_last_header_line(l))
177                || l.trim().is_empty()
178            {
179                in_place_header.push_str(&l.to_lowercase());
180                found_end = true;
181            } else {
182                loop {
183                    match line.as_ref() {
184                        Some(l) if l.starts_with(before) => {
185                            in_place_header.push_str(&l.to_lowercase());
186                            if header_def.multiple_lines && header_def.is_last_header_line(l) {
187                                found_end = true;
188                                break;
189                            }
190                        }
191                        _ => break,
192                    }
193                    *line = file_content.next_line();
194                }
195
196                if line.as_ref().map(|l| l.trim().is_empty()).unwrap_or(true) {
197                    found_end = true;
198                }
199            }
200            found_end
201        };
202
203        // skip blank lines after header text
204        if header_def.multiple_lines && header_def.allow_blank_lines && !found_end {
205            loop {
206                if !line.as_ref().map(|l| l.trim().is_empty()).unwrap_or(false) {
207                    break;
208                }
209                *line = file_content.next_line();
210            }
211            file_content.rewind();
212        } else if !header_def.multiple_lines && !found_end {
213            file_content.rewind();
214        }
215
216        if !header_def.multiple_lines {
217            // keep track of the position for headers where the end line is the same as the
218            // before each line
219            let pos = file_content.pos;
220            // check if the line is the end line
221            while line
222                .as_ref()
223                .map(|l| {
224                    !header_def.is_last_header_line(l)
225                        && (header_def.allow_blank_lines || !l.trim().is_empty())
226                        && l.starts_with(before)
227                })
228                .unwrap_or(false)
229            {
230                *line = file_content.next_line();
231            }
232            if line.is_none() {
233                file_content.reset_to(pos);
234            }
235        } else if line.is_some() {
236            // we could end up there if we still have some lines, but not matching "before".
237            // This can be the last line in a multi line header
238            let pos = file_content.pos;
239            *line = file_content.next_line();
240            if line
241                .as_ref()
242                .map(|l| !header_def.is_last_header_line(l))
243                .unwrap_or(true)
244            {
245                file_content.reset_to(pos);
246            }
247        }
248
249        got_header = true;
250        for keyword in keywords {
251            if !in_place_header.contains(keyword) {
252                got_header = false;
253                break;
254            }
255        }
256    }
257    // else - we detected previously a one line comment block that matches the header
258    // detection it is not a header it is a comment
259    got_header
260}
261
262#[derive(Debug)]
263pub struct FileContent {
264    pos: usize,
265    old_pos: usize,
266    content: String,
267    filepath: String,
268}
269
270impl Display for FileContent {
271    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
272        f.write_str(&self.filepath)
273    }
274}
275
276impl FileContent {
277    pub fn new(file: &Path) -> std::io::Result<Self> {
278        Ok(Self {
279            pos: 0,
280            old_pos: 0,
281            content: {
282                let mut content = String::new();
283                let mut reader = File::open(file).map(BufReader::new)?;
284                let mut buf = String::new();
285                let mut n = reader.read_line(&mut buf)?;
286                while n > 0 {
287                    if buf.ends_with('\n') {
288                        buf.pop();
289                        if buf.ends_with('\r') {
290                            buf.pop();
291                        }
292                        content.push_str(&buf);
293                        content.push('\n');
294                    } else {
295                        content.push_str(&buf);
296                    }
297                    buf.clear();
298                    n = reader.read_line(&mut buf)?;
299                }
300                content
301            },
302            filepath: file.to_string_lossy().to_string(),
303        })
304    }
305
306    pub fn reset_to(&mut self, pos: usize) {
307        self.old_pos = pos;
308        self.pos = pos;
309    }
310
311    pub fn reset(&mut self) {
312        self.reset_to(0);
313    }
314
315    pub fn rewind(&mut self) {
316        self.pos = self.old_pos;
317    }
318
319    pub fn end_reached(&self) -> bool {
320        self.pos >= self.content.len()
321    }
322
323    pub fn next_line(&mut self) -> Option<String> {
324        if self.end_reached() {
325            return None;
326        }
327
328        let lf = self.content[self.pos..].find('\n').map(|i| i + self.pos);
329        let eol = lf.unwrap_or(self.content.len());
330        let result = self.content[self.pos..eol].to_string();
331
332        self.old_pos = self.pos;
333        self.pos = if let Some(lf) = lf {
334            lf + 1
335        } else {
336            self.content.len()
337        };
338
339        Some(result)
340    }
341
342    pub fn content(&self) -> String {
343        self.content.clone()
344    }
345
346    pub fn insert(&mut self, index: usize, s: &str) {
347        self.content.insert_str(index, s);
348    }
349
350    pub fn delete(&mut self, start: usize, end: usize) {
351        self.content.drain(start..end);
352    }
353}