uv_pypi_types/
identifier.rs

1use serde::{Serialize, Serializer};
2#[cfg(feature = "schemars")]
3use std::borrow::Cow;
4use std::fmt::Display;
5use std::str::FromStr;
6use thiserror::Error;
7
8/// Simplified Python identifier.
9///
10/// We don't match Python's identifier rules
11/// (<https://docs.python.org/3.13/reference/lexical_analysis.html#identifiers>) exactly
12/// (we just use Rust's `is_alphabetic`) and we don't convert to NFKC, but it's good enough
13/// for our validation purposes.
14#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
15pub struct Identifier(Box<str>);
16
17#[derive(Debug, Clone, Error)]
18pub enum IdentifierParseError {
19    #[error("An identifier must not be empty")]
20    Empty,
21    #[error(
22        "Invalid first character `{first}` for identifier `{identifier}`, expected an underscore or an alphabetic character"
23    )]
24    InvalidFirstChar { first: char, identifier: Box<str> },
25    #[error(
26        "Invalid character `{invalid_char}` at position {pos} for identifier `{identifier}`, \
27        expected an underscore or an alphanumeric character"
28    )]
29    InvalidChar {
30        pos: usize,
31        invalid_char: char,
32        identifier: Box<str>,
33    },
34}
35
36impl Identifier {
37    pub fn new(identifier: impl Into<Box<str>>) -> Result<Self, IdentifierParseError> {
38        let identifier = identifier.into();
39        let mut chars = identifier.chars().enumerate();
40        let (_, first_char) = chars.next().ok_or(IdentifierParseError::Empty)?;
41        if first_char != '_' && !first_char.is_alphabetic() {
42            return Err(IdentifierParseError::InvalidFirstChar {
43                first: first_char,
44                identifier,
45            });
46        }
47
48        for (pos, current_char) in chars {
49            if current_char != '_' && !current_char.is_alphanumeric() {
50                return Err(IdentifierParseError::InvalidChar {
51                    // Make the position 1-indexed
52                    pos: pos + 1,
53                    invalid_char: current_char,
54                    identifier,
55                });
56            }
57        }
58
59        Ok(Self(identifier))
60    }
61}
62
63impl FromStr for Identifier {
64    type Err = IdentifierParseError;
65
66    fn from_str(identifier: &str) -> Result<Self, Self::Err> {
67        Self::new(identifier.to_string())
68    }
69}
70
71impl Display for Identifier {
72    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
73        write!(f, "{}", self.0)
74    }
75}
76
77impl AsRef<str> for Identifier {
78    fn as_ref(&self) -> &str {
79        &self.0
80    }
81}
82
83impl<'de> serde::de::Deserialize<'de> for Identifier {
84    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
85    where
86        D: serde::de::Deserializer<'de>,
87    {
88        let s = String::deserialize(deserializer)?;
89        Self::from_str(&s).map_err(serde::de::Error::custom)
90    }
91}
92
93impl Serialize for Identifier {
94    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
95    where
96        S: Serializer,
97    {
98        Serialize::serialize(&self.0, serializer)
99    }
100}
101
102#[cfg(feature = "schemars")]
103impl schemars::JsonSchema for Identifier {
104    fn schema_name() -> Cow<'static, str> {
105        Cow::Borrowed("Identifier")
106    }
107
108    fn json_schema(_generator: &mut schemars::generate::SchemaGenerator) -> schemars::Schema {
109        schemars::json_schema!({
110            "type": "string",
111            "pattern": r"^[_\p{Alphabetic}][_0-9\p{Alphabetic}]*$",
112            "description": "An identifier in Python"
113        })
114    }
115}
116
117#[cfg(test)]
118mod tests {
119    use super::*;
120    use insta::assert_snapshot;
121
122    #[test]
123    fn valid() {
124        let valid_ids = vec![
125            "abc",
126            "_abc",
127            "a_bc",
128            "a123",
129            "snake_case",
130            "camelCase",
131            "PascalCase",
132            // A single character is valid
133            "_",
134            "a",
135            // Unicode
136            "α",
137            "férrîs",
138            "안녕하세요",
139        ];
140
141        for valid_id in valid_ids {
142            assert!(Identifier::from_str(valid_id).is_ok(), "{}", valid_id);
143        }
144    }
145
146    #[test]
147    fn empty() {
148        assert_snapshot!(Identifier::from_str("").unwrap_err(), @"An identifier must not be empty");
149    }
150
151    #[test]
152    fn invalid_first_char() {
153        assert_snapshot!(
154            Identifier::from_str("1foo").unwrap_err(),
155            @"Invalid first character `1` for identifier `1foo`, expected an underscore or an alphabetic character"
156        );
157        assert_snapshot!(
158            Identifier::from_str("$foo").unwrap_err(),
159            @"Invalid first character `$` for identifier `$foo`, expected an underscore or an alphabetic character"
160        );
161        assert_snapshot!(
162            Identifier::from_str(".foo").unwrap_err(),
163            @"Invalid first character `.` for identifier `.foo`, expected an underscore or an alphabetic character"
164        );
165    }
166
167    #[test]
168    fn invalid_char() {
169        // A dot in module names equals a path separator, which is a separate problem.
170        assert_snapshot!(
171            Identifier::from_str("foo.bar").unwrap_err(),
172            @"Invalid character `.` at position 4 for identifier `foo.bar`, expected an underscore or an alphanumeric character"
173        );
174        assert_snapshot!(
175            Identifier::from_str("foo-bar").unwrap_err(),
176            @"Invalid character `-` at position 4 for identifier `foo-bar`, expected an underscore or an alphanumeric character"
177        );
178        assert_snapshot!(
179            Identifier::from_str("foo_bar$").unwrap_err(),
180            @"Invalid character `$` at position 8 for identifier `foo_bar$`, expected an underscore or an alphanumeric character"
181        );
182        assert_snapshot!(
183            Identifier::from_str("foo🦀bar").unwrap_err(),
184            @"Invalid character `🦀` at position 4 for identifier `foo🦀bar`, expected an underscore or an alphanumeric character"
185        );
186    }
187}