Skip to main content

japanese_codepoints/
validation.rs

1//! Validation utilities for code-point collections.
2//!
3//! This module provides:
4//!
5//! * [`ValidationError`] – a structured error returned when a string contains
6//!   characters outside an allowed set.
7//! * [`validate_all_in_any`] – validate text against the *union* of several
8//!   character sets simultaneously.
9//! * Convenience macros for common Japanese character-set checks.
10
11use std::fmt;
12
13use crate::CodePoints;
14
15// ── error type ────────────────────────────────────────────────────────────────
16
17/// Describes a single code-point validation failure.
18///
19/// A `ValidationError` pinpoints the exact character that caused the check to
20/// fail, its position in the input string, and a human-readable message.
21///
22/// # Examples
23///
24/// ```rust
25/// use japanese_codepoints::CodePoints;
26///
27/// let cp = CodePoints::ascii_printable();
28/// let err = cp.validate("hello\0world").unwrap_err();
29/// assert_eq!(err.code_point, 0);   // NULL character
30/// assert_eq!(err.position, 5);     // index of '\0'
31/// assert!(err.to_string().contains("U+0000"));
32/// ```
33#[derive(Debug, Clone, PartialEq, Eq)]
34pub struct ValidationError {
35    /// The Unicode code point that is not allowed by the character set.
36    pub code_point: u32,
37    /// Zero-based *character* index (not byte index) within the input string.
38    pub position: usize,
39    /// A human-readable description of the error.
40    pub message: String,
41}
42
43impl fmt::Display for ValidationError {
44    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
45        f.write_str(&self.message)
46    }
47}
48
49impl std::error::Error for ValidationError {}
50
51impl ValidationError {
52    /// Creates a `ValidationError` for the given code point and character index.
53    pub fn new(code_point: u32, position: usize) -> Self {
54        let ch = char::from_u32(code_point).unwrap_or('\u{FFFD}');
55        Self {
56            code_point,
57            position,
58            message: format!(
59                "invalid character '{}' (U+{:04X}) at position {}",
60                ch, code_point, position
61            ),
62        }
63    }
64
65    /// Creates a `ValidationError` with an explicit message, overriding the
66    /// default formatting.
67    pub fn with_message(code_point: u32, position: usize, message: impl Into<String>) -> Self {
68        Self {
69            code_point,
70            position,
71            message: message.into(),
72        }
73    }
74}
75
76// ── multi-set validation ──────────────────────────────────────────────────────
77
78/// Validates that **every** character in `text` belongs to **at least one** of
79/// the provided character sets.
80///
81/// This is the idiomatic way to validate text that may legitimately contain
82/// characters from multiple scripts — for example Japanese hiragana mixed with
83/// ASCII punctuation.
84///
85/// # Edge cases
86///
87/// * An empty `text` returns `Ok(())` (vacuously valid).
88/// * An empty `sets` slice returns `Err` for any non-empty `text`.
89///
90/// # Examples
91///
92/// ```rust
93/// use japanese_codepoints::{CodePoints, validation::validate_all_in_any};
94///
95/// let hiragana = CodePoints::new(vec![0x3042, 0x3044]); // あ, い
96/// let katakana = CodePoints::new(vec![0x30A2]);          // ア
97///
98/// assert!(validate_all_in_any("あア", &[&hiragana, &katakana]).is_ok());
99/// assert!(validate_all_in_any("あx", &[&hiragana, &katakana]).is_err());
100/// ```
101pub fn validate_all_in_any(text: &str, sets: &[&CodePoints]) -> Result<(), ValidationError> {
102    for (i, c) in text.chars().enumerate() {
103        if !sets.iter().any(|set| set.contains_char(c)) {
104            return Err(ValidationError::new(c as u32, i));
105        }
106    }
107    Ok(())
108}
109
110// ── macros ────────────────────────────────────────────────────────────────────
111
112/// Validates that `$value` contains only code points present in `$codepoints`.
113///
114/// Returns `Ok(())` on success; `Err([`ValidationError`])` on failure.
115///
116/// # Examples
117///
118/// ```rust
119/// use japanese_codepoints::{validate_codepoints, CodePoints};
120///
121/// let cp = CodePoints::ascii_printable();
122/// assert!(validate_codepoints!("hello", &cp).is_ok());
123/// assert!(validate_codepoints!("hello\0", &cp).is_err());
124/// ```
125#[macro_export]
126macro_rules! validate_codepoints {
127    ($value:expr, $codepoints:expr) => {
128        $codepoints.validate($value)
129    };
130}
131
132/// Extended validation with additional patterns.
133///
134/// ## Custom error message
135///
136/// ```rust
137/// use japanese_codepoints::{validate_codepoints_advanced, CodePoints};
138///
139/// let cp = CodePoints::ascii_printable();
140/// let r = validate_codepoints_advanced!("hi", &cp, "Only ASCII allowed");
141/// assert!(r.is_ok());
142/// ```
143///
144/// ## Detailed (default position-aware message)
145///
146/// ```rust
147/// use japanese_codepoints::{validate_codepoints_advanced, CodePoints};
148///
149/// let cp = CodePoints::ascii_printable();
150/// let r = validate_codepoints_advanced!("hi\0there", detailed &cp);
151/// assert!(r.is_err());
152/// ```
153#[macro_export]
154macro_rules! validate_codepoints_advanced {
155    // Custom error message — overrides the default ValidationError message.
156    ($value:expr, $codepoints:expr, $error_msg:expr) => {
157        $codepoints.validate($value).map_err(|mut e| {
158            e.message = $error_msg.to_string();
159            e
160        })
161    };
162
163    // Detailed — identical to validate_codepoints! but kept for symmetry.
164    ($value:expr, detailed $codepoints:expr) => {
165        $codepoints.validate($value)
166    };
167}
168
169// ── feature-gated convenience macros ─────────────────────────────────────────
170
171/// Validates that `$value` contains only JIS X 0208 **hiragana** characters.
172///
173/// # Examples
174///
175/// ```rust
176/// # #[cfg(feature = "codepoints-jisx0208")]
177/// use japanese_codepoints::validate_hiragana;
178/// # #[cfg(feature = "codepoints-jisx0208")]
179/// assert!(validate_hiragana!("あいうえお").is_ok());
180/// # #[cfg(feature = "codepoints-jisx0208")]
181/// assert!(validate_hiragana!("Hello").is_err());
182/// ```
183#[cfg(feature = "codepoints-jisx0208")]
184#[macro_export]
185macro_rules! validate_hiragana {
186    ($value:expr) => {
187        $crate::jisx0208::Hiragana::cached().validate($value)
188    };
189}
190
191/// Validates that `$value` contains only JIS X 0208 **katakana** characters.
192///
193/// # Examples
194///
195/// ```rust
196/// # #[cfg(feature = "codepoints-jisx0208")]
197/// use japanese_codepoints::validate_katakana;
198/// # #[cfg(feature = "codepoints-jisx0208")]
199/// assert!(validate_katakana!("アイウエオ").is_ok());
200/// # #[cfg(feature = "codepoints-jisx0208")]
201/// assert!(validate_katakana!("あいうえお").is_err());
202/// ```
203#[cfg(feature = "codepoints-jisx0208")]
204#[macro_export]
205macro_rules! validate_katakana {
206    ($value:expr) => {
207        $crate::jisx0208::Katakana::cached().validate($value)
208    };
209}
210
211/// Validates that `$value` contains only **hiragana or katakana** characters.
212///
213/// Each character must belong to at least one of the two sets; mixing is
214/// allowed.
215///
216/// # Examples
217///
218/// ```rust
219/// # #[cfg(feature = "codepoints-jisx0208")]
220/// use japanese_codepoints::validate_japanese_kana;
221/// # #[cfg(feature = "codepoints-jisx0208")]
222/// assert!(validate_japanese_kana!("あいアイ").is_ok());
223/// # #[cfg(feature = "codepoints-jisx0208")]
224/// assert!(validate_japanese_kana!("Hello").is_err());
225/// ```
226#[cfg(feature = "codepoints-jisx0208")]
227#[macro_export]
228macro_rules! validate_japanese_kana {
229    ($value:expr) => {{
230        let sets: &[&$crate::CodePoints] = &[
231            $crate::jisx0208::Hiragana::cached().codepoints(),
232            $crate::jisx0208::Katakana::cached().codepoints(),
233        ];
234        $crate::validation::validate_all_in_any($value, sets)
235    }};
236}
237
238/// Validates that `$value` contains only **hiragana, katakana, or ASCII
239/// printable** characters.
240///
241/// # Examples
242///
243/// ```rust
244/// # #[cfg(feature = "codepoints-jisx0208")]
245/// use japanese_codepoints::validate_japanese_mixed;
246/// # #[cfg(feature = "codepoints-jisx0208")]
247/// assert!(validate_japanese_mixed!("こんにちはHello").is_ok());
248/// # #[cfg(feature = "codepoints-jisx0208")]
249/// assert!(validate_japanese_mixed!("漢字").is_err());
250/// ```
251#[cfg(feature = "codepoints-jisx0208")]
252#[macro_export]
253macro_rules! validate_japanese_mixed {
254    ($value:expr) => {{
255        let sets: &[&$crate::CodePoints] = &[
256            $crate::jisx0208::Hiragana::cached().codepoints(),
257            $crate::jisx0208::Katakana::cached().codepoints(),
258            $crate::CodePoints::ascii_printable_cached(),
259        ];
260        $crate::validation::validate_all_in_any($value, sets)
261    }};
262}
263
264/// Validates that `$value` contains only JIS X 0201 **halfwidth katakana**.
265///
266/// # Examples
267///
268/// ```rust
269/// # #[cfg(feature = "codepoints-jisx0201")]
270/// use japanese_codepoints::validate_jisx0201_katakana;
271/// # #[cfg(feature = "codepoints-jisx0201")]
272/// assert!(validate_jisx0201_katakana!("アイウエオ").is_ok());
273/// # #[cfg(feature = "codepoints-jisx0201")]
274/// assert!(validate_jisx0201_katakana!("アイウエオ").is_err());
275/// ```
276#[cfg(feature = "codepoints-jisx0201")]
277#[macro_export]
278macro_rules! validate_jisx0201_katakana {
279    ($value:expr) => {
280        $crate::jisx0201::Katakana::cached().validate($value)
281    };
282}
283
284/// Validates that `$value` contains only JIS X 0201 **Latin letters**.
285///
286/// # Examples
287///
288/// ```rust
289/// # #[cfg(feature = "codepoints-jisx0201")]
290/// use japanese_codepoints::validate_jisx0201_latin;
291/// # #[cfg(feature = "codepoints-jisx0201")]
292/// assert!(validate_jisx0201_latin!("Hello¥").is_ok());
293/// # #[cfg(feature = "codepoints-jisx0201")]
294/// assert!(validate_jisx0201_latin!("こんにちは").is_err());
295/// ```
296#[cfg(feature = "codepoints-jisx0201")]
297#[macro_export]
298macro_rules! validate_jisx0201_latin {
299    ($value:expr) => {
300        $crate::jisx0201::LatinLetters::cached().validate($value)
301    };
302}
303
304#[cfg(test)]
305mod tests {
306    use super::*;
307
308    #[test]
309    fn test_validation_error_display() {
310        let e = ValidationError::new(0x3046, 2);
311        assert!(e.to_string().contains("U+3046"));
312        assert!(e.to_string().contains("position 2"));
313    }
314
315    #[test]
316    fn test_validation_error_with_message() {
317        let e = ValidationError::with_message(0x41, 0, "custom msg");
318        assert_eq!(e.message, "custom msg");
319        assert_eq!(e.code_point, 0x41);
320    }
321
322    #[test]
323    fn test_validate_all_in_any() {
324        let hira = CodePoints::new(vec![0x3042, 0x3044]); // あ, い
325        let kata = CodePoints::new(vec![0x30A2, 0x30A4]); // ア, イ
326
327        assert!(validate_all_in_any("あア", &[&hira, &kata]).is_ok());
328        assert!(validate_all_in_any("あい", &[&hira]).is_ok());
329        assert!(validate_all_in_any("", &[&hira]).is_ok());
330
331        let err = validate_all_in_any("あx", &[&hira, &kata]).unwrap_err();
332        assert_eq!(err.code_point, 0x78); // 'x'
333        assert_eq!(err.position, 1);
334    }
335
336    #[test]
337    fn test_validate_all_in_any_empty_sets() {
338        assert!(validate_all_in_any("", &[]).is_ok()); // empty text, empty sets → vacuously ok
339        assert!(validate_all_in_any("a", &[]).is_err());
340    }
341
342    #[test]
343    fn test_validate_all_in_any_three_sets() {
344        let hira = CodePoints::new(vec![0x3042]); // あ
345        let kata = CodePoints::new(vec![0x30A2]); // ア
346        let ascii = CodePoints::ascii_printable();
347
348        assert!(validate_all_in_any("あアA", &[&hira, &kata, &ascii]).is_ok());
349        // π (U+03C0) not in any set
350        assert!(validate_all_in_any("あアAπ", &[&hira, &kata, &ascii]).is_err());
351    }
352}