gix_attributes/
parse.rs

1use std::borrow::Cow;
2
3use crate::{name, AssignmentRef, Name, NameRef, StateRef};
4use bstr::{BStr, ByteSlice};
5use kstring::KStringRef;
6
7/// The kind of attribute that was parsed.
8#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
9#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
10pub enum Kind {
11    /// A pattern to match paths against
12    Pattern(gix_glob::Pattern),
13    /// The name of the macro to define, always a valid attribute name
14    Macro(Name),
15}
16
17mod error {
18    use bstr::BString;
19    /// The error returned by [`parse::Lines`][crate::parse::Lines].
20    #[derive(thiserror::Error, Debug)]
21    #[allow(missing_docs)]
22    pub enum Error {
23        #[error("Line {line_number} has a negative pattern, for literal characters use \\!: {line}")]
24        PatternNegation { line_number: usize, line: BString },
25        #[error("Attribute in line {line_number} has non-ascii characters or starts with '-': {attribute}")]
26        AttributeName { line_number: usize, attribute: BString },
27        #[error("Macro in line {line_number} has non-ascii characters or starts with '-': {macro_name}")]
28        MacroName { line_number: usize, macro_name: BString },
29        #[error("Could not unquote attributes line")]
30        Unquote(#[from] gix_quote::ansi_c::undo::Error),
31    }
32}
33pub use error::Error;
34
35/// An iterator over attribute assignments, parsed line by line.
36pub struct Lines<'a> {
37    lines: bstr::Lines<'a>,
38    line_no: usize,
39}
40
41/// An iterator over attribute assignments in a single line.
42pub struct Iter<'a> {
43    attrs: bstr::Fields<'a>,
44}
45
46impl<'a> Iter<'a> {
47    /// Create a new instance to parse attribute assignments from `input`.
48    pub fn new(input: &'a BStr) -> Self {
49        Iter { attrs: input.fields() }
50    }
51
52    fn parse_attr(&self, attr: &'a [u8]) -> Result<AssignmentRef<'a>, name::Error> {
53        let mut tokens = attr.splitn(2, |b| *b == b'=');
54        let attr = tokens.next().expect("attr itself").as_bstr();
55        let possibly_value = tokens.next();
56        let (attr, state) = if attr.first() == Some(&b'-') {
57            (&attr[1..], StateRef::Unset)
58        } else if attr.first() == Some(&b'!') {
59            (&attr[1..], StateRef::Unspecified)
60        } else {
61            (attr, possibly_value.map_or(StateRef::Set, StateRef::from_bytes))
62        };
63        Ok(AssignmentRef::new(check_attr(attr)?, state))
64    }
65}
66
67fn check_attr(attr: &BStr) -> Result<NameRef<'_>, name::Error> {
68    fn attr_valid(attr: &BStr) -> bool {
69        if attr.first() == Some(&b'-') {
70            return false;
71        }
72
73        attr.bytes()
74            .all(|b| matches!(b, b'-' | b'.' | b'_' | b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9'))
75    }
76
77    attr_valid(attr)
78        .then(|| NameRef(KStringRef::from_ref(attr.to_str().expect("no illformed utf8"))))
79        .ok_or_else(|| name::Error { attribute: attr.into() })
80}
81
82impl<'a> Iterator for Iter<'a> {
83    type Item = Result<AssignmentRef<'a>, name::Error>;
84
85    fn next(&mut self) -> Option<Self::Item> {
86        let attr = self.attrs.next().filter(|a| !a.is_empty())?;
87        self.parse_attr(attr).into()
88    }
89}
90
91/// Instantiation
92impl<'a> Lines<'a> {
93    /// Create a new instance to parse all attributes in all lines of the input `bytes`.
94    pub fn new(bytes: &'a [u8]) -> Self {
95        let bom = unicode_bom::Bom::from(bytes);
96        Lines {
97            lines: bytes[bom.len()..].lines(),
98            line_no: 0,
99        }
100    }
101}
102
103impl<'a> Iterator for Lines<'a> {
104    type Item = Result<(Kind, Iter<'a>, usize), Error>;
105
106    fn next(&mut self) -> Option<Self::Item> {
107        fn skip_blanks(line: &BStr) -> &BStr {
108            line.find_not_byteset(BLANKS).map_or(line, |pos| &line[pos..])
109        }
110        for line in self.lines.by_ref() {
111            self.line_no += 1;
112            let line = skip_blanks(line.into());
113            if line.first() == Some(&b'#') {
114                continue;
115            }
116            match parse_line(line, self.line_no) {
117                None => continue,
118                Some(res) => return Some(res),
119            }
120        }
121        None
122    }
123}
124
125fn parse_line(line: &BStr, line_number: usize) -> Option<Result<(Kind, Iter<'_>, usize), Error>> {
126    if line.is_empty() {
127        return None;
128    }
129
130    let (line, attrs): (Cow<'_, _>, _) = if line.starts_with(b"\"") {
131        let (unquoted, consumed) = match gix_quote::ansi_c::undo(line) {
132            Ok(res) => res,
133            Err(err) => return Some(Err(err.into())),
134        };
135        (unquoted, &line[consumed..])
136    } else {
137        line.find_byteset(BLANKS)
138            .map(|pos| (line[..pos].as_bstr().into(), line[pos..].as_bstr()))
139            .unwrap_or((line.into(), [].as_bstr()))
140    };
141
142    let kind_res = match line.strip_prefix(b"[attr]") {
143        Some(macro_name) => check_attr(macro_name.into())
144            .map_err(|err| Error::MacroName {
145                line_number,
146                macro_name: err.attribute,
147            })
148            .map(|name| Kind::Macro(name.to_owned())),
149        None => {
150            let pattern = gix_glob::Pattern::from_bytes(line.as_ref())?;
151            if pattern.mode.contains(gix_glob::pattern::Mode::NEGATIVE) {
152                Err(Error::PatternNegation {
153                    line: line.into_owned(),
154                    line_number,
155                })
156            } else {
157                Ok(Kind::Pattern(pattern))
158            }
159        }
160    };
161    let kind = match kind_res {
162        Ok(kind) => kind,
163        Err(err) => return Some(Err(err)),
164    };
165    Ok((kind, Iter::new(attrs), line_number)).into()
166}
167
168const BLANKS: &[u8] = b" \t\r";