apache_avro/
validator.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::{schema::Namespace, AvroResult, Error};
19use regex_lite::Regex;
20use std::sync::OnceLock;
21
22/// A validator that validates names and namespaces according to the Avro specification.
23struct SpecificationValidator;
24
25/// A trait that validates schema names.
26/// To register a custom one use [set_schema_name_validator].
27pub trait SchemaNameValidator: Send + Sync {
28    /// Returns the regex used to validate the schema name
29    /// according to the Avro specification.
30    fn regex(&self) -> &'static Regex {
31        static SCHEMA_NAME_ONCE: OnceLock<Regex> = OnceLock::new();
32        SCHEMA_NAME_ONCE.get_or_init(|| {
33            Regex::new(
34                // An optional namespace (with optional dots) followed by a name without any dots in it.
35                r"^((?P<namespace>([A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)*)?)\.)?(?P<name>[A-Za-z_][A-Za-z0-9_]*)$",
36            )
37                .unwrap()
38        })
39    }
40
41    /// Validates the schema name and returns the name and the optional namespace,
42    /// or [Error::InvalidSchemaName] if it is invalid.
43    fn validate(&self, schema_name: &str) -> AvroResult<(String, Namespace)>;
44}
45
46impl SchemaNameValidator for SpecificationValidator {
47    fn validate(&self, schema_name: &str) -> AvroResult<(String, Namespace)> {
48        let regex = SchemaNameValidator::regex(self);
49        let caps = regex
50            .captures(schema_name)
51            .ok_or_else(|| Error::InvalidSchemaName(schema_name.to_string(), regex.as_str()))?;
52        Ok((
53            caps["name"].to_string(),
54            caps.name("namespace").map(|s| s.as_str().to_string()),
55        ))
56    }
57}
58
59static NAME_VALIDATOR_ONCE: OnceLock<Box<dyn SchemaNameValidator + Send + Sync>> = OnceLock::new();
60
61/// Sets a custom schema name validator.
62///
63/// Returns a unit if the registration was successful or the already
64/// registered validator if the registration failed.
65///
66/// **Note**: This function must be called before parsing any schema because this will
67/// register the default validator and the registration is one time only!
68pub fn set_schema_name_validator(
69    validator: Box<dyn SchemaNameValidator + Send + Sync>,
70) -> Result<(), Box<dyn SchemaNameValidator + Send + Sync>> {
71    debug!("Setting a custom schema name validator.");
72    NAME_VALIDATOR_ONCE.set(validator)
73}
74
75pub(crate) fn validate_schema_name(schema_name: &str) -> AvroResult<(String, Namespace)> {
76    NAME_VALIDATOR_ONCE
77        .get_or_init(|| {
78            debug!("Going to use the default name validator.");
79            Box::new(SpecificationValidator)
80        })
81        .validate(schema_name)
82}
83
84/// A trait that validates schema namespaces.
85/// To register a custom one use [set_schema_namespace_validator].
86pub trait SchemaNamespaceValidator: Send + Sync {
87    /// Returns the regex used to validate the schema namespace
88    /// according to the Avro specification.
89    fn regex(&self) -> &'static Regex {
90        static NAMESPACE_ONCE: OnceLock<Regex> = OnceLock::new();
91        NAMESPACE_ONCE.get_or_init(|| {
92            Regex::new(r"^([A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)*)?$").unwrap()
93        })
94    }
95
96    /// Validates the schema namespace or [Error::InvalidNamespace] if it is invalid.
97    fn validate(&self, namespace: &str) -> AvroResult<()>;
98}
99
100impl SchemaNamespaceValidator for SpecificationValidator {
101    fn validate(&self, ns: &str) -> AvroResult<()> {
102        let regex = SchemaNamespaceValidator::regex(self);
103        if !regex.is_match(ns) {
104            return Err(Error::InvalidNamespace(ns.to_string(), regex.as_str()));
105        } else {
106            Ok(())
107        }
108    }
109}
110
111static NAMESPACE_VALIDATOR_ONCE: OnceLock<Box<dyn SchemaNamespaceValidator + Send + Sync>> =
112    OnceLock::new();
113
114/// Sets a custom schema namespace validator.
115///
116/// Returns a unit if the registration was successful or the already
117/// registered validator if the registration failed.
118///
119/// **Note**: This function must be called before parsing any schema because this will
120/// register the default validator and the registration is one time only!
121pub fn set_schema_namespace_validator(
122    validator: Box<dyn SchemaNamespaceValidator + Send + Sync>,
123) -> Result<(), Box<dyn SchemaNamespaceValidator + Send + Sync>> {
124    NAMESPACE_VALIDATOR_ONCE.set(validator)
125}
126
127pub(crate) fn validate_namespace(ns: &str) -> AvroResult<()> {
128    NAMESPACE_VALIDATOR_ONCE
129        .get_or_init(|| {
130            debug!("Going to use the default namespace validator.");
131            Box::new(SpecificationValidator)
132        })
133        .validate(ns)
134}
135
136/// A trait that validates enum symbol names.
137/// To register a custom one use [set_enum_symbol_name_validator].
138pub trait EnumSymbolNameValidator: Send + Sync {
139    /// Returns the regex used to validate the symbols of enum schema
140    /// according to the Avro specification.
141    fn regex(&self) -> &'static Regex {
142        static ENUM_SYMBOL_NAME_ONCE: OnceLock<Regex> = OnceLock::new();
143        ENUM_SYMBOL_NAME_ONCE.get_or_init(|| Regex::new(r"^[A-Za-z_][A-Za-z0-9_]*$").unwrap())
144    }
145
146    /// Validates the symbols of an Enum schema name and returns nothing (unit),
147    /// or [Error::EnumSymbolName] if it is invalid.
148    fn validate(&self, name: &str) -> AvroResult<()>;
149}
150
151impl EnumSymbolNameValidator for SpecificationValidator {
152    fn validate(&self, symbol: &str) -> AvroResult<()> {
153        let regex = EnumSymbolNameValidator::regex(self);
154        if !regex.is_match(symbol) {
155            return Err(Error::EnumSymbolName(symbol.to_string()));
156        }
157
158        Ok(())
159    }
160}
161
162static ENUM_SYMBOL_NAME_VALIDATOR_ONCE: OnceLock<Box<dyn EnumSymbolNameValidator + Send + Sync>> =
163    OnceLock::new();
164
165/// Sets a custom enum symbol name validator.
166///
167/// Returns a unit if the registration was successful or the already
168/// registered validator if the registration failed.
169///
170/// **Note**: This function must be called before parsing any schema because this will
171/// register the default validator and the registration is one time only!
172pub fn set_enum_symbol_name_validator(
173    validator: Box<dyn EnumSymbolNameValidator + Send + Sync>,
174) -> Result<(), Box<dyn EnumSymbolNameValidator + Send + Sync>> {
175    ENUM_SYMBOL_NAME_VALIDATOR_ONCE.set(validator)
176}
177
178pub(crate) fn validate_enum_symbol_name(symbol: &str) -> AvroResult<()> {
179    ENUM_SYMBOL_NAME_VALIDATOR_ONCE
180        .get_or_init(|| {
181            debug!("Going to use the default enum symbol name validator.");
182            Box::new(SpecificationValidator)
183        })
184        .validate(symbol)
185}
186
187/// A trait that validates record field names.
188/// To register a custom one use [set_record_field_name_validator].
189pub trait RecordFieldNameValidator: Send + Sync {
190    /// Returns the regex used to validate the record field names
191    /// according to the Avro specification.
192    fn regex(&self) -> &'static Regex {
193        static FIELD_NAME_ONCE: OnceLock<Regex> = OnceLock::new();
194        FIELD_NAME_ONCE.get_or_init(|| Regex::new(r"^[A-Za-z_][A-Za-z0-9_]*$").unwrap())
195    }
196
197    /// Validates the record field's names and returns nothing (unit),
198    /// or [Error::FieldName] if it is invalid.
199    fn validate(&self, name: &str) -> AvroResult<()>;
200}
201
202impl RecordFieldNameValidator for SpecificationValidator {
203    fn validate(&self, field_name: &str) -> AvroResult<()> {
204        let regex = RecordFieldNameValidator::regex(self);
205        if !regex.is_match(field_name) {
206            return Err(Error::FieldName(field_name.to_string()));
207        }
208
209        Ok(())
210    }
211}
212
213static RECORD_FIELD_NAME_VALIDATOR_ONCE: OnceLock<Box<dyn RecordFieldNameValidator + Send + Sync>> =
214    OnceLock::new();
215
216/// Sets a custom record field name validator.
217///
218/// Returns a unit if the registration was successful or the already
219/// registered validator if the registration failed.
220///
221/// **Note**: This function must be called before parsing any schema because this will
222/// register the default validator and the registration is one time only!
223pub fn set_record_field_name_validator(
224    validator: Box<dyn RecordFieldNameValidator + Send + Sync>,
225) -> Result<(), Box<dyn RecordFieldNameValidator + Send + Sync>> {
226    RECORD_FIELD_NAME_VALIDATOR_ONCE.set(validator)
227}
228
229pub(crate) fn validate_record_field_name(field_name: &str) -> AvroResult<()> {
230    RECORD_FIELD_NAME_VALIDATOR_ONCE
231        .get_or_init(|| {
232            debug!("Going to use the default record field name validator.");
233            Box::new(SpecificationValidator)
234        })
235        .validate(field_name)
236}
237
238#[cfg(test)]
239mod tests {
240    use super::*;
241    use crate::schema::Name;
242    use apache_avro_test_helper::TestResult;
243
244    #[test]
245    fn avro_3900_default_name_validator_with_valid_ns() -> TestResult {
246        validate_schema_name("example")?;
247        Ok(())
248    }
249
250    #[test]
251    fn avro_3900_default_name_validator_with_invalid_ns() -> TestResult {
252        assert!(validate_schema_name("com-example").is_err());
253        Ok(())
254    }
255
256    #[test]
257    fn test_avro_3897_disallow_invalid_namespaces_in_fully_qualified_name() -> TestResult {
258        let full_name = "ns.0.record1";
259        let name = Name::new(full_name);
260        assert!(name.is_err());
261        let validator = SpecificationValidator;
262        let expected = Error::InvalidSchemaName(
263            full_name.to_string(),
264            SchemaNameValidator::regex(&validator).as_str(),
265        )
266        .to_string();
267        let err = name.map_err(|e| e.to_string()).err().unwrap();
268        pretty_assertions::assert_eq!(expected, err);
269
270        let full_name = "ns..record1";
271        let name = Name::new(full_name);
272        assert!(name.is_err());
273        let expected = Error::InvalidSchemaName(
274            full_name.to_string(),
275            SchemaNameValidator::regex(&validator).as_str(),
276        )
277        .to_string();
278        let err = name.map_err(|e| e.to_string()).err().unwrap();
279        pretty_assertions::assert_eq!(expected, err);
280        Ok(())
281    }
282
283    #[test]
284    fn avro_3900_default_namespace_validator_with_valid_ns() -> TestResult {
285        validate_namespace("com.example")?;
286        Ok(())
287    }
288
289    #[test]
290    fn avro_3900_default_namespace_validator_with_invalid_ns() -> TestResult {
291        assert!(validate_namespace("com-example").is_err());
292        Ok(())
293    }
294
295    #[test]
296    fn avro_3900_default_enum_symbol_validator_with_valid_symbol_name() -> TestResult {
297        validate_enum_symbol_name("spades")?;
298        Ok(())
299    }
300
301    #[test]
302    fn avro_3900_default_enum_symbol_validator_with_invalid_symbol_name() -> TestResult {
303        assert!(validate_enum_symbol_name("com-example").is_err());
304        Ok(())
305    }
306
307    #[test]
308    fn avro_3900_default_record_field_validator_with_valid_name() -> TestResult {
309        validate_record_field_name("test")?;
310        Ok(())
311    }
312
313    #[test]
314    fn avro_3900_default_record_field_validator_with_invalid_name() -> TestResult {
315        assert!(validate_record_field_name("com-example").is_err());
316        Ok(())
317    }
318}