docsearch/
simple_path.rs

1//! Implementation of the simple path according to the Rust spec as well as helpers in regards to
2//! this crate to make easy use of the path.
3
4use std::{
5    fmt::{self, Display},
6    str::FromStr,
7};
8
9use crate::{error::ParseError, STD_CRATES};
10
11/// Path for any item within a crate (or just the crate itself) like `std::vec::Vec`,
12/// `anyhow::Result` or `thiserror`.
13///
14/// New paths are created by the [`FromStr`] trait:
15///
16/// ```rust
17/// "anyhow::Result".parse::<docsearch::SimplePath>().unwrap();
18/// ```
19pub struct SimplePath(String, usize);
20
21impl SimplePath {
22    /// Get back the original string.
23    #[allow(clippy::missing_const_for_fn)]
24    #[must_use]
25    pub fn into_inner(self) -> String {
26        self.0
27    }
28
29    /// Crate name part of this path.
30    ///
31    /// This can be used as argument for the [`start_search`](crate::start_search) function.
32    #[must_use]
33    pub fn crate_name(&self) -> &str {
34        &self.0[..self.1]
35    }
36
37    /// Whether this path is for the standard library.
38    #[must_use]
39    pub fn is_std(&self) -> bool {
40        STD_CRATES.contains(&self.crate_name())
41    }
42
43    /// Whether the path only contains the crate name and no item information.
44    pub(crate) fn is_crate_only(&self) -> bool {
45        self.0.len() == self.1
46    }
47}
48
49impl FromStr for SimplePath {
50    type Err = ParseError;
51
52    fn from_str(s: &str) -> Result<Self, Self::Err> {
53        if s.is_empty() {
54            return Err(Self::Err::TooShort);
55        }
56
57        if !s.split("::").all(is_identifier) {
58            return Err(Self::Err::InvalidIdentifier);
59        }
60
61        let index = s.find("::").unwrap_or(s.len());
62
63        Ok(Self(s.to_owned(), index))
64    }
65}
66
67impl AsRef<str> for SimplePath {
68    fn as_ref(&self) -> &str {
69        &self.0
70    }
71}
72
73impl Display for SimplePath {
74    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
75        self.0.fmt(f)
76    }
77}
78
79/// Check whether the given value is an identifier or a keyword.
80///
81/// An identifier is any nonempty Unicode string of the following form:
82///
83/// Either
84///
85/// - The first character has property [`XID_start`].
86/// - The remaining characters have property [`XID_continue`].
87///
88/// Or
89///
90/// - The first character is `_`.
91/// - The identifier is more than one character. `_` alone is not an identifier.
92/// - The remaining characters have property [`XID_continue`].
93///
94/// [`XID_start`]: http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5B%3AXID_Start%3A%5D&abb=on&g=&i=
95/// [`XID_continue`]: http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5B%3AXID_Continue%3A%5D&abb=on&g=&i=
96fn is_identifier_or_keyword(value: &str) -> bool {
97    fn variant_one(first_char: char, value: &str) -> bool {
98        unicode_ident::is_xid_start(first_char)
99            && value.chars().skip(1).all(unicode_ident::is_xid_continue)
100    }
101
102    fn variant_two(first_char: char, value: &str) -> bool {
103        first_char == '_'
104            && value.chars().skip(1).count() > 0
105            && value.chars().skip(1).all(unicode_ident::is_xid_continue)
106    }
107
108    let first_char = match value.chars().next() {
109        Some(ch) => ch,
110        None => return false,
111    };
112
113    variant_one(first_char, value) || variant_two(first_char, value)
114}
115
116/// Check whether the given value is a raw identifier.
117///
118/// A raw identifier is any nonempty Unicode string of the following form:
119///
120/// - The value starts with `r#`.
121/// - The followed content is a valid [identifier or keyword](is_identifier_or_keyword).
122/// - The followed content is none of: `crate`, `self`, `super`, `Self`.
123fn is_raw_identifier(value: &str) -> bool {
124    const KEYWORDS: &[&str] = &["crate", "self", "super", "Self"];
125
126    value
127        .strip_prefix("r#")
128        .map(|value| is_identifier_or_keyword(value) && !KEYWORDS.contains(&value))
129        .unwrap_or_default()
130}
131
132/// Check whether the given value is a non-keyword identifier.
133///
134/// A non-keyword identifier is any nonempty Unicode string of the following form:
135///
136/// - The value is a valid [identifier or keyword](is_identifier_or_keyword).
137/// - The value is not a [strict] or [reserved] keyword.
138///
139/// [strict]: https://doc.rust-lang.org/stable/reference/keywords.html#strict-keywords
140/// [reserved]: https://doc.rust-lang.org/stable/reference/keywords.html#reserved-keywords
141fn is_non_keyword_identifier(value: &str) -> bool {
142    const STRICT_KEYWORDS: &[&str] = &[
143        "as", "break", "const", "continue", "crate", "else", "enum", "extern", "false", "fn",
144        "for", "if", "impl", "in", "let", "loop", "match", "mod", "move", "mut", "pub", "ref",
145        "return", "self", "Self", "static", "struct", "super", "trait", "true", "type", "unsafe",
146        "use", "where", "while", "async", "await", "dyn",
147    ];
148    const RESERVED_KEYWORDS: &[&str] = &[
149        "abstract", "become", "box", "do", "final", "macro", "override", "priv", "typeof",
150        "unsized", "virtual", "yield",
151    ];
152
153    is_identifier_or_keyword(value)
154        && !STRICT_KEYWORDS.contains(&value)
155        && !RESERVED_KEYWORDS.contains(&value)
156}
157
158/// Check whether the given value is an identifier.
159///
160/// An identifier is any nonempty Unicode string of the following form:
161///
162/// Either
163///
164/// - The value is [raw identifier](is_raw_identifier).
165///
166/// Or
167///
168/// - The value is a [non-keyword identifier](is_non_keyword_identifier).
169fn is_identifier(value: &str) -> bool {
170    is_non_keyword_identifier(value) || is_raw_identifier(value)
171}
172
173#[cfg(test)]
174mod tests {
175    use super::*;
176
177    #[test]
178    fn parse_valid() {
179        let inputs = &["anyhow", "anyhow::Result", "special::__", "__", "r#unsafe"];
180
181        for input in inputs {
182            assert!(input.parse::<SimplePath>().is_ok());
183        }
184    }
185
186    #[test]
187    fn parse_invalid() {
188        let inputs = &["", "a::::b", "::", "_", "unsafe", "Self", "r#Self"];
189
190        for input in inputs {
191            assert!(input.parse::<SimplePath>().is_err());
192        }
193    }
194}