japanese_codepoints/validation.rs
1//! Validation utilities for code-point collections.
2//!
3//! This module provides:
4//!
5//! * [`ValidationError`] – a structured error returned when a string contains
6//! characters outside an allowed set.
7//! * [`validate_all_in_any`] – validate text against the *union* of several
8//! character sets simultaneously.
9//! * Convenience macros for common Japanese character-set checks.
10
11use std::fmt;
12
13use crate::CodePoints;
14
15// ── error type ────────────────────────────────────────────────────────────────
16
17/// Describes a single code-point validation failure.
18///
19/// A `ValidationError` pinpoints the exact character that caused the check to
20/// fail, its position in the input string, and a human-readable message.
21///
22/// # Examples
23///
24/// ```rust
25/// use japanese_codepoints::CodePoints;
26///
27/// let cp = CodePoints::ascii_printable();
28/// let err = cp.validate("hello\0world").unwrap_err();
29/// assert_eq!(err.code_point, 0); // NULL character
30/// assert_eq!(err.position, 5); // index of '\0'
31/// assert!(err.to_string().contains("U+0000"));
32/// ```
33#[derive(Debug, Clone, PartialEq, Eq)]
34pub struct ValidationError {
35 /// The Unicode code point that is not allowed by the character set.
36 pub code_point: u32,
37 /// Zero-based *character* index (not byte index) within the input string.
38 pub position: usize,
39 /// A human-readable description of the error.
40 pub message: String,
41}
42
43impl fmt::Display for ValidationError {
44 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
45 f.write_str(&self.message)
46 }
47}
48
49impl std::error::Error for ValidationError {}
50
51impl ValidationError {
52 /// Creates a `ValidationError` for the given code point and character index.
53 pub fn new(code_point: u32, position: usize) -> Self {
54 let ch = char::from_u32(code_point).unwrap_or('\u{FFFD}');
55 Self {
56 code_point,
57 position,
58 message: format!(
59 "invalid character '{}' (U+{:04X}) at position {}",
60 ch, code_point, position
61 ),
62 }
63 }
64
65 /// Creates a `ValidationError` with an explicit message, overriding the
66 /// default formatting.
67 pub fn with_message(code_point: u32, position: usize, message: impl Into<String>) -> Self {
68 Self {
69 code_point,
70 position,
71 message: message.into(),
72 }
73 }
74}
75
76// ── multi-set validation ──────────────────────────────────────────────────────
77
78/// Validates that **every** character in `text` belongs to **at least one** of
79/// the provided character sets.
80///
81/// This is the idiomatic way to validate text that may legitimately contain
82/// characters from multiple scripts — for example Japanese hiragana mixed with
83/// ASCII punctuation.
84///
85/// # Edge cases
86///
87/// * An empty `text` returns `Ok(())` (vacuously valid).
88/// * An empty `sets` slice returns `Err` for any non-empty `text`.
89///
90/// # Examples
91///
92/// ```rust
93/// use japanese_codepoints::{CodePoints, validation::validate_all_in_any};
94///
95/// let hiragana = CodePoints::new(vec![0x3042, 0x3044]); // あ, い
96/// let katakana = CodePoints::new(vec![0x30A2]); // ア
97///
98/// assert!(validate_all_in_any("あア", &[&hiragana, &katakana]).is_ok());
99/// assert!(validate_all_in_any("あx", &[&hiragana, &katakana]).is_err());
100/// ```
101pub fn validate_all_in_any(text: &str, sets: &[&CodePoints]) -> Result<(), ValidationError> {
102 for (i, c) in text.chars().enumerate() {
103 if !sets.iter().any(|set| set.contains_char(c)) {
104 return Err(ValidationError::new(c as u32, i));
105 }
106 }
107 Ok(())
108}
109
110// ── macros ────────────────────────────────────────────────────────────────────
111
112/// Validates that `$value` contains only code points present in `$codepoints`.
113///
114/// Returns `Ok(())` on success; `Err([`ValidationError`])` on failure.
115///
116/// # Examples
117///
118/// ```rust
119/// use japanese_codepoints::{validate_codepoints, CodePoints};
120///
121/// let cp = CodePoints::ascii_printable();
122/// assert!(validate_codepoints!("hello", &cp).is_ok());
123/// assert!(validate_codepoints!("hello\0", &cp).is_err());
124/// ```
125#[macro_export]
126macro_rules! validate_codepoints {
127 ($value:expr, $codepoints:expr) => {
128 $codepoints.validate($value)
129 };
130}
131
132/// Extended validation with additional patterns.
133///
134/// ## Custom error message
135///
136/// ```rust
137/// use japanese_codepoints::{validate_codepoints_advanced, CodePoints};
138///
139/// let cp = CodePoints::ascii_printable();
140/// let r = validate_codepoints_advanced!("hi", &cp, "Only ASCII allowed");
141/// assert!(r.is_ok());
142/// ```
143///
144/// ## Detailed (default position-aware message)
145///
146/// ```rust
147/// use japanese_codepoints::{validate_codepoints_advanced, CodePoints};
148///
149/// let cp = CodePoints::ascii_printable();
150/// let r = validate_codepoints_advanced!("hi\0there", detailed &cp);
151/// assert!(r.is_err());
152/// ```
153#[macro_export]
154macro_rules! validate_codepoints_advanced {
155 // Custom error message — overrides the default ValidationError message.
156 ($value:expr, $codepoints:expr, $error_msg:expr) => {
157 $codepoints.validate($value).map_err(|mut e| {
158 e.message = $error_msg.to_string();
159 e
160 })
161 };
162
163 // Detailed — identical to validate_codepoints! but kept for symmetry.
164 ($value:expr, detailed $codepoints:expr) => {
165 $codepoints.validate($value)
166 };
167}
168
169// ── feature-gated convenience macros ─────────────────────────────────────────
170
171/// Validates that `$value` contains only JIS X 0208 **hiragana** characters.
172///
173/// # Examples
174///
175/// ```rust
176/// # #[cfg(feature = "codepoints-jisx0208")]
177/// use japanese_codepoints::validate_hiragana;
178/// # #[cfg(feature = "codepoints-jisx0208")]
179/// assert!(validate_hiragana!("あいうえお").is_ok());
180/// # #[cfg(feature = "codepoints-jisx0208")]
181/// assert!(validate_hiragana!("Hello").is_err());
182/// ```
183#[cfg(feature = "codepoints-jisx0208")]
184#[macro_export]
185macro_rules! validate_hiragana {
186 ($value:expr) => {
187 $crate::jisx0208::Hiragana::cached().validate($value)
188 };
189}
190
191/// Validates that `$value` contains only JIS X 0208 **katakana** characters.
192///
193/// # Examples
194///
195/// ```rust
196/// # #[cfg(feature = "codepoints-jisx0208")]
197/// use japanese_codepoints::validate_katakana;
198/// # #[cfg(feature = "codepoints-jisx0208")]
199/// assert!(validate_katakana!("アイウエオ").is_ok());
200/// # #[cfg(feature = "codepoints-jisx0208")]
201/// assert!(validate_katakana!("あいうえお").is_err());
202/// ```
203#[cfg(feature = "codepoints-jisx0208")]
204#[macro_export]
205macro_rules! validate_katakana {
206 ($value:expr) => {
207 $crate::jisx0208::Katakana::cached().validate($value)
208 };
209}
210
211/// Validates that `$value` contains only **hiragana or katakana** characters.
212///
213/// Each character must belong to at least one of the two sets; mixing is
214/// allowed.
215///
216/// # Examples
217///
218/// ```rust
219/// # #[cfg(feature = "codepoints-jisx0208")]
220/// use japanese_codepoints::validate_japanese_kana;
221/// # #[cfg(feature = "codepoints-jisx0208")]
222/// assert!(validate_japanese_kana!("あいアイ").is_ok());
223/// # #[cfg(feature = "codepoints-jisx0208")]
224/// assert!(validate_japanese_kana!("Hello").is_err());
225/// ```
226#[cfg(feature = "codepoints-jisx0208")]
227#[macro_export]
228macro_rules! validate_japanese_kana {
229 ($value:expr) => {{
230 let sets: &[&$crate::CodePoints] = &[
231 $crate::jisx0208::Hiragana::cached().codepoints(),
232 $crate::jisx0208::Katakana::cached().codepoints(),
233 ];
234 $crate::validation::validate_all_in_any($value, sets)
235 }};
236}
237
238/// Validates that `$value` contains only **hiragana, katakana, or ASCII
239/// printable** characters.
240///
241/// # Examples
242///
243/// ```rust
244/// # #[cfg(feature = "codepoints-jisx0208")]
245/// use japanese_codepoints::validate_japanese_mixed;
246/// # #[cfg(feature = "codepoints-jisx0208")]
247/// assert!(validate_japanese_mixed!("こんにちはHello").is_ok());
248/// # #[cfg(feature = "codepoints-jisx0208")]
249/// assert!(validate_japanese_mixed!("漢字").is_err());
250/// ```
251#[cfg(feature = "codepoints-jisx0208")]
252#[macro_export]
253macro_rules! validate_japanese_mixed {
254 ($value:expr) => {{
255 let sets: &[&$crate::CodePoints] = &[
256 $crate::jisx0208::Hiragana::cached().codepoints(),
257 $crate::jisx0208::Katakana::cached().codepoints(),
258 $crate::CodePoints::ascii_printable_cached(),
259 ];
260 $crate::validation::validate_all_in_any($value, sets)
261 }};
262}
263
264/// Validates that `$value` contains only JIS X 0201 **halfwidth katakana**.
265///
266/// # Examples
267///
268/// ```rust
269/// # #[cfg(feature = "codepoints-jisx0201")]
270/// use japanese_codepoints::validate_jisx0201_katakana;
271/// # #[cfg(feature = "codepoints-jisx0201")]
272/// assert!(validate_jisx0201_katakana!("アイウエオ").is_ok());
273/// # #[cfg(feature = "codepoints-jisx0201")]
274/// assert!(validate_jisx0201_katakana!("アイウエオ").is_err());
275/// ```
276#[cfg(feature = "codepoints-jisx0201")]
277#[macro_export]
278macro_rules! validate_jisx0201_katakana {
279 ($value:expr) => {
280 $crate::jisx0201::Katakana::cached().validate($value)
281 };
282}
283
284/// Validates that `$value` contains only JIS X 0201 **Latin letters**.
285///
286/// # Examples
287///
288/// ```rust
289/// # #[cfg(feature = "codepoints-jisx0201")]
290/// use japanese_codepoints::validate_jisx0201_latin;
291/// # #[cfg(feature = "codepoints-jisx0201")]
292/// assert!(validate_jisx0201_latin!("Hello¥").is_ok());
293/// # #[cfg(feature = "codepoints-jisx0201")]
294/// assert!(validate_jisx0201_latin!("こんにちは").is_err());
295/// ```
296#[cfg(feature = "codepoints-jisx0201")]
297#[macro_export]
298macro_rules! validate_jisx0201_latin {
299 ($value:expr) => {
300 $crate::jisx0201::LatinLetters::cached().validate($value)
301 };
302}
303
304#[cfg(test)]
305mod tests {
306 use super::*;
307
308 #[test]
309 fn test_validation_error_display() {
310 let e = ValidationError::new(0x3046, 2);
311 assert!(e.to_string().contains("U+3046"));
312 assert!(e.to_string().contains("position 2"));
313 }
314
315 #[test]
316 fn test_validation_error_with_message() {
317 let e = ValidationError::with_message(0x41, 0, "custom msg");
318 assert_eq!(e.message, "custom msg");
319 assert_eq!(e.code_point, 0x41);
320 }
321
322 #[test]
323 fn test_validate_all_in_any() {
324 let hira = CodePoints::new(vec![0x3042, 0x3044]); // あ, い
325 let kata = CodePoints::new(vec![0x30A2, 0x30A4]); // ア, イ
326
327 assert!(validate_all_in_any("あア", &[&hira, &kata]).is_ok());
328 assert!(validate_all_in_any("あい", &[&hira]).is_ok());
329 assert!(validate_all_in_any("", &[&hira]).is_ok());
330
331 let err = validate_all_in_any("あx", &[&hira, &kata]).unwrap_err();
332 assert_eq!(err.code_point, 0x78); // 'x'
333 assert_eq!(err.position, 1);
334 }
335
336 #[test]
337 fn test_validate_all_in_any_empty_sets() {
338 assert!(validate_all_in_any("", &[]).is_ok()); // empty text, empty sets → vacuously ok
339 assert!(validate_all_in_any("a", &[]).is_err());
340 }
341
342 #[test]
343 fn test_validate_all_in_any_three_sets() {
344 let hira = CodePoints::new(vec![0x3042]); // あ
345 let kata = CodePoints::new(vec![0x30A2]); // ア
346 let ascii = CodePoints::ascii_printable();
347
348 assert!(validate_all_in_any("あアA", &[&hira, &kata, &ascii]).is_ok());
349 // π (U+03C0) not in any set
350 assert!(validate_all_in_any("あアAπ", &[&hira, &kata, &ascii]).is_err());
351 }
352}