1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
//! Implementation of the simple path according to the Rust spec as well as helpers in regards to
//! this crate to make easy use of the path.

use std::{
    fmt::{self, Display},
    str::FromStr,
};

use crate::{error::ParseError, STD_CRATES};

/// Path for any item within a crate (or just the crate itself) like `std::vec::Vec`,
/// `anyhow::Result` or `thiserror`.
///
/// New paths are created by the [`FromStr`] trait:
///
/// ```rust
/// "anyhow::Result".parse::<docsearch::SimplePath>().unwrap();
/// ```
pub struct SimplePath(String, usize);

impl SimplePath {
    /// Get back the original string.
    #[allow(clippy::missing_const_for_fn)]
    #[must_use]
    pub fn into_inner(self) -> String {
        self.0
    }

    /// Crate name part of this path.
    ///
    /// This can be used as argument for the [`start_search`](crate::start_search) function.
    #[must_use]
    pub fn crate_name(&self) -> &str {
        &self.0[..self.1]
    }

    /// Whether this path is for the standard library.
    #[must_use]
    pub fn is_std(&self) -> bool {
        STD_CRATES.contains(&self.crate_name())
    }

    /// Whether the path only contains the crate name and no item information.
    pub(crate) fn is_crate_only(&self) -> bool {
        self.0.len() == self.1
    }
}

impl FromStr for SimplePath {
    type Err = ParseError;

    fn from_str(s: &str) -> Result<Self, Self::Err> {
        if s.is_empty() {
            return Err(Self::Err::TooShort);
        }

        if !s.split("::").all(is_identifier) {
            return Err(Self::Err::InvalidIdentifier);
        }

        let index = s.find("::").unwrap_or(s.len());

        Ok(Self(s.to_owned(), index))
    }
}

impl AsRef<str> for SimplePath {
    fn as_ref(&self) -> &str {
        &self.0
    }
}

impl Display for SimplePath {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        self.0.fmt(f)
    }
}

/// Check whether the given value is an identifier or a keyword.
///
/// An identifier is any nonempty Unicode string of the following form:
///
/// Either
///
/// - The first character has property [`XID_start`].
/// - The remaining characters have property [`XID_continue`].
///
/// Or
///
/// - The first character is `_`.
/// - The identifier is more than one character. `_` alone is not an identifier.
/// - The remaining characters have property [`XID_continue`].
///
/// [`XID_start`]: http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5B%3AXID_Start%3A%5D&abb=on&g=&i=
/// [`XID_continue`]: http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5B%3AXID_Continue%3A%5D&abb=on&g=&i=
fn is_identifier_or_keyword(value: &str) -> bool {
    fn variant_one(first_char: char, value: &str) -> bool {
        unicode_ident::is_xid_start(first_char)
            && value.chars().skip(1).all(unicode_ident::is_xid_continue)
    }

    fn variant_two(first_char: char, value: &str) -> bool {
        first_char == '_'
            && value.chars().skip(1).count() > 0
            && value.chars().skip(1).all(unicode_ident::is_xid_continue)
    }

    let first_char = match value.chars().next() {
        Some(ch) => ch,
        None => return false,
    };

    variant_one(first_char, value) || variant_two(first_char, value)
}

/// Check whether the given value is a raw identifier.
///
/// A raw identifier is any nonempty Unicode string of the following form:
///
/// - The value starts with `r#`.
/// - The followed content is a valid [identifier or keyword](is_identifier_or_keyword).
/// - The followed content is none of: `crate`, `self`, `super`, `Self`.
fn is_raw_identifier(value: &str) -> bool {
    const KEYWORDS: &[&str] = &["crate", "self", "super", "Self"];

    value
        .strip_prefix("r#")
        .map(|value| is_identifier_or_keyword(value) && !KEYWORDS.contains(&value))
        .unwrap_or_default()
}

/// Check whether the given value is a non-keyword identifier.
///
/// A non-keyword identifier is any nonempty Unicode string of the following form:
///
/// - The value is a valid [identifier or keyword](is_identifier_or_keyword).
/// - The value is not a [strict] or [reserved] keyword.
///
/// [strict]: https://doc.rust-lang.org/stable/reference/keywords.html#strict-keywords
/// [reserved]: https://doc.rust-lang.org/stable/reference/keywords.html#reserved-keywords
fn is_non_keyword_identifier(value: &str) -> bool {
    const STRICT_KEYWORDS: &[&str] = &[
        "as", "break", "const", "continue", "crate", "else", "enum", "extern", "false", "fn",
        "for", "if", "impl", "in", "let", "loop", "match", "mod", "move", "mut", "pub", "ref",
        "return", "self", "Self", "static", "struct", "super", "trait", "true", "type", "unsafe",
        "use", "where", "while", "async", "await", "dyn",
    ];
    const RESERVED_KEYWORDS: &[&str] = &[
        "abstract", "become", "box", "do", "final", "macro", "override", "priv", "typeof",
        "unsized", "virtual", "yield",
    ];

    is_identifier_or_keyword(value)
        && !STRICT_KEYWORDS.contains(&value)
        && !RESERVED_KEYWORDS.contains(&value)
}

/// Check whether the given value is an identifier.
///
/// An identifier is any nonempty Unicode string of the following form:
///
/// Either
///
/// - The value is [raw identifier](is_raw_identifier).
///
/// Or
///
/// - The value is a [non-keyword identifier](is_non_keyword_identifier).
fn is_identifier(value: &str) -> bool {
    is_non_keyword_identifier(value) || is_raw_identifier(value)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn parse_valid() {
        let inputs = &["anyhow", "anyhow::Result", "special::__", "__", "r#unsafe"];

        for input in inputs {
            assert!(input.parse::<SimplePath>().is_ok());
        }
    }

    #[test]
    fn parse_invalid() {
        let inputs = &["", "a::::b", "::", "_", "unsafe", "Self", "r#Self"];

        for input in inputs {
            assert!(input.parse::<SimplePath>().is_err());
        }
    }
}