Skip to main content

copybook_codepage/
lib.rs

1#![cfg_attr(not(test), deny(clippy::unwrap_used, clippy::expect_used))]
2// SPDX-License-Identifier: AGPL-3.0-or-later
3//! Codepage domain types and helpers.
4//!
5//! This crate contains codepage-related enums and codepage-specific constants
6//! used by charset and numeric handling.
7
8#[cfg(feature = "clap")]
9use clap::ValueEnum;
10use serde::{Deserialize, Serialize};
11use std::str::FromStr;
12
13/// Character encoding specification
14///
15/// # Examples
16///
17/// ```
18/// use copybook_codepage::Codepage;
19///
20/// let cp = Codepage::CP037;
21/// assert!(cp.is_ebcdic());
22/// assert_eq!(cp.code_page_number(), Some(37));
23/// assert_eq!(cp.description(), "EBCDIC Code Page 037 (US/Canada)");
24/// ```
25#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
26#[cfg_attr(feature = "clap", derive(ValueEnum))]
27pub enum Codepage {
28    /// ASCII encoding
29    ASCII,
30    /// EBCDIC Code Page 037 (US/Canada)
31    #[cfg_attr(feature = "clap", value(name = "cp037"))]
32    CP037,
33    /// EBCDIC Code Page 273 (Germany/Austria)
34    #[cfg_attr(feature = "clap", value(name = "cp273"))]
35    CP273,
36    /// EBCDIC Code Page 500 (International)
37    #[cfg_attr(feature = "clap", value(name = "cp500"))]
38    CP500,
39    /// EBCDIC Code Page 1047 (Open Systems)
40    #[cfg_attr(feature = "clap", value(name = "cp1047"))]
41    CP1047,
42    /// EBCDIC Code Page 1140 (US/Canada with Euro)
43    #[cfg_attr(feature = "clap", value(name = "cp1140"))]
44    CP1140,
45}
46
47impl Codepage {
48    /// Check if this is an ASCII codepage
49    #[must_use]
50    #[inline]
51    pub const fn is_ascii(self) -> bool {
52        matches!(self, Self::ASCII)
53    }
54
55    /// Check if this is an EBCDIC codepage
56    #[must_use]
57    #[inline]
58    pub const fn is_ebcdic(self) -> bool {
59        !self.is_ascii()
60    }
61
62    /// Get the numeric code page identifier
63    #[must_use]
64    #[inline]
65    pub const fn code_page_number(self) -> Option<u16> {
66        match self {
67            Self::ASCII => None,
68            Self::CP037 => Some(37),
69            Self::CP273 => Some(273),
70            Self::CP500 => Some(500),
71            Self::CP1047 => Some(1047),
72            Self::CP1140 => Some(1140),
73        }
74    }
75
76    /// Get a human-readable description of the codepage
77    #[must_use]
78    #[inline]
79    pub const fn description(self) -> &'static str {
80        match self {
81            Self::ASCII => "ASCII encoding",
82            Self::CP037 => "EBCDIC Code Page 037 (US/Canada)",
83            Self::CP273 => "EBCDIC Code Page 273 (Germany/Austria)",
84            Self::CP500 => "EBCDIC Code Page 500 (International)",
85            Self::CP1047 => "EBCDIC Code Page 1047 (Open Systems)",
86            Self::CP1140 => "EBCDIC Code Page 1140 (US/Canada with Euro)",
87        }
88    }
89}
90
91impl std::fmt::Display for Codepage {
92    #[inline]
93    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
94        match self {
95            Self::ASCII => write!(f, "ascii"),
96            Self::CP037 => write!(f, "cp037"),
97            Self::CP273 => write!(f, "cp273"),
98            Self::CP500 => write!(f, "cp500"),
99            Self::CP1047 => write!(f, "cp1047"),
100            Self::CP1140 => write!(f, "cp1140"),
101        }
102    }
103}
104
105impl FromStr for Codepage {
106    type Err = std::convert::Infallible;
107
108    #[inline]
109    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
110        Ok(match s.to_lowercase().as_str() {
111            "ascii" => Self::ASCII,
112            "cp273" => Self::CP273,
113            "cp500" => Self::CP500,
114            "cp1047" => Self::CP1047,
115            "cp1140" => Self::CP1140,
116            // Default to CP037 for backward compatibility
117            _ => Self::CP037,
118        })
119    }
120}
121
122/// Policy for handling unmappable characters during decode
123///
124/// # Examples
125///
126/// ```
127/// use copybook_codepage::UnmappablePolicy;
128///
129/// let policy = UnmappablePolicy::Replace;
130/// assert_eq!(format!("{policy}"), "replace");
131/// ```
132#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
133#[cfg_attr(feature = "clap", derive(ValueEnum))]
134pub enum UnmappablePolicy {
135    /// Error on unmappable characters
136    #[cfg_attr(feature = "clap", value(name = "error"))]
137    Error,
138    /// Replace with U+FFFD
139    #[cfg_attr(feature = "clap", value(name = "replace"))]
140    Replace,
141    /// Skip unmappable characters
142    #[cfg_attr(feature = "clap", value(name = "skip"))]
143    Skip,
144}
145
146impl std::fmt::Display for UnmappablePolicy {
147    #[inline]
148    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
149        match self {
150            Self::Error => write!(f, "error"),
151            Self::Replace => write!(f, "replace"),
152            Self::Skip => write!(f, "skip"),
153        }
154    }
155}
156
157impl FromStr for UnmappablePolicy {
158    type Err = std::convert::Infallible;
159
160    #[inline]
161    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
162        Ok(match s.to_lowercase().as_str() {
163            "replace" => Self::Replace,
164            "skip" => Self::Skip,
165            _ => Self::Error, // Default to Error for backward compatibility
166        })
167    }
168}
169
170// Zoned decimal sign tables map the zone nibble (high 4 bits) to sign info.
171static EBCDIC_ZONED_SIGNS: [(bool, bool); 16] = [
172    (false, false), // 0x0_: unsigned
173    (false, false), // 0x1_: unsigned
174    (false, false), // 0x2_: unsigned
175    (false, false), // 0x3_: unsigned
176    (false, false), // 0x4_: unsigned
177    (false, false), // 0x5_: unsigned
178    (false, false), // 0x6_: unsigned
179    (false, false), // 0x7_: unsigned
180    (false, false), // 0x8_: unsigned
181    (false, false), // 0x9_: unsigned
182    (false, false), // 0xA_: unsigned
183    (false, false), // 0xB_: unsigned
184    (true, false),  // 0xC_: positive
185    (true, true),   // 0xD_: negative
186    (false, false), // 0xE_: unsigned
187    (true, false),  // 0xF_: positive (default)
188];
189
190// ASCII overpunch requires byte-level logic, so zoned table is intentionally
191// unsigned to avoid accidental misuse in ASCII code paths.
192static ASCII_ZONED_SIGNS: [(bool, bool); 16] = [(false, false); 16];
193
194/// Get zoned decimal sign table for a codepage.
195#[must_use]
196#[inline]
197pub fn get_zoned_sign_table(codepage: Codepage) -> &'static [(bool, bool); 16] {
198    match codepage {
199        Codepage::ASCII => &ASCII_ZONED_SIGNS,
200        _ => &EBCDIC_ZONED_SIGNS,
201    }
202}
203
204/// Get the space byte value for a codepage.
205///
206/// Returns `0x20` for ASCII, `0x40` for all EBCDIC codepages.
207#[must_use]
208#[inline]
209pub const fn space_byte(codepage: Codepage) -> u8 {
210    match codepage {
211        Codepage::ASCII => 0x20,
212        Codepage::CP037
213        | Codepage::CP273
214        | Codepage::CP500
215        | Codepage::CP1047
216        | Codepage::CP1140 => 0x40,
217    }
218}
219
220#[cfg(test)]
221#[allow(clippy::expect_used, clippy::unwrap_used)]
222mod tests {
223    use super::*;
224
225    #[test]
226    fn test_space_byte_ascii() {
227        assert_eq!(space_byte(Codepage::ASCII), 0x20);
228    }
229
230    #[test]
231    fn test_space_byte_ebcdic() {
232        assert_eq!(space_byte(Codepage::CP037), 0x40);
233        assert_eq!(space_byte(Codepage::CP273), 0x40);
234        assert_eq!(space_byte(Codepage::CP500), 0x40);
235        assert_eq!(space_byte(Codepage::CP1047), 0x40);
236        assert_eq!(space_byte(Codepage::CP1140), 0x40);
237    }
238
239    #[test]
240    fn test_codepage_is_ascii() {
241        assert!(Codepage::ASCII.is_ascii());
242        assert!(!Codepage::CP037.is_ascii());
243    }
244
245    #[test]
246    fn test_codepage_is_ebcdic() {
247        assert!(!Codepage::ASCII.is_ebcdic());
248        assert!(Codepage::CP037.is_ebcdic());
249    }
250
251    #[test]
252    fn test_codepage_code_page_number() {
253        assert_eq!(Codepage::ASCII.code_page_number(), None);
254        assert_eq!(Codepage::CP037.code_page_number(), Some(37));
255        assert_eq!(Codepage::CP1140.code_page_number(), Some(1140));
256    }
257
258    #[test]
259    fn test_codepage_from_str_defaults_to_cp037() {
260        assert_eq!(
261            <Codepage as std::str::FromStr>::from_str("unknown").unwrap(),
262            Codepage::CP037
263        );
264    }
265
266    #[test]
267    fn test_unmappable_policy_from_str_defaults_to_error() {
268        assert_eq!(
269            <UnmappablePolicy as std::str::FromStr>::from_str("unknown").unwrap(),
270            UnmappablePolicy::Error
271        );
272    }
273
274    #[test]
275    fn test_get_zoned_sign_table_ascii_is_unsigned() {
276        let table = get_zoned_sign_table(Codepage::ASCII);
277        assert!(table.iter().all(|entry| *entry == (false, false)));
278    }
279
280    #[test]
281    fn test_get_zoned_sign_table_ebcdic_has_signed_entries() {
282        let table = get_zoned_sign_table(Codepage::CP037);
283        assert_eq!(table[0xC], (true, false));
284        assert_eq!(table[0xD], (true, true));
285        assert_eq!(table[0xF], (true, false));
286    }
287
288    // --- Codepage::description tests ---
289
290    #[test]
291    fn test_codepage_description_all_variants() {
292        assert_eq!(Codepage::ASCII.description(), "ASCII encoding");
293        assert_eq!(
294            Codepage::CP037.description(),
295            "EBCDIC Code Page 037 (US/Canada)"
296        );
297        assert_eq!(
298            Codepage::CP273.description(),
299            "EBCDIC Code Page 273 (Germany/Austria)"
300        );
301        assert_eq!(
302            Codepage::CP500.description(),
303            "EBCDIC Code Page 500 (International)"
304        );
305        assert_eq!(
306            Codepage::CP1047.description(),
307            "EBCDIC Code Page 1047 (Open Systems)"
308        );
309        assert_eq!(
310            Codepage::CP1140.description(),
311            "EBCDIC Code Page 1140 (US/Canada with Euro)"
312        );
313    }
314
315    // --- Codepage Display tests ---
316
317    #[test]
318    fn test_codepage_display_all_variants() {
319        assert_eq!(format!("{}", Codepage::ASCII), "ascii");
320        assert_eq!(format!("{}", Codepage::CP037), "cp037");
321        assert_eq!(format!("{}", Codepage::CP273), "cp273");
322        assert_eq!(format!("{}", Codepage::CP500), "cp500");
323        assert_eq!(format!("{}", Codepage::CP1047), "cp1047");
324        assert_eq!(format!("{}", Codepage::CP1140), "cp1140");
325    }
326
327    // --- Codepage FromStr tests ---
328
329    #[test]
330    fn test_codepage_from_str_all_valid_variants() {
331        assert_eq!(
332            <Codepage as std::str::FromStr>::from_str("ascii").unwrap(),
333            Codepage::ASCII
334        );
335        assert_eq!(
336            <Codepage as std::str::FromStr>::from_str("cp273").unwrap(),
337            Codepage::CP273
338        );
339        assert_eq!(
340            <Codepage as std::str::FromStr>::from_str("cp500").unwrap(),
341            Codepage::CP500
342        );
343        assert_eq!(
344            <Codepage as std::str::FromStr>::from_str("cp1047").unwrap(),
345            Codepage::CP1047
346        );
347        assert_eq!(
348            <Codepage as std::str::FromStr>::from_str("cp1140").unwrap(),
349            Codepage::CP1140
350        );
351    }
352
353    #[test]
354    fn test_codepage_from_str_case_insensitive() {
355        assert_eq!(
356            <Codepage as std::str::FromStr>::from_str("ASCII").unwrap(),
357            Codepage::ASCII
358        );
359        assert_eq!(
360            <Codepage as std::str::FromStr>::from_str("CP273").unwrap(),
361            Codepage::CP273
362        );
363        assert_eq!(
364            <Codepage as std::str::FromStr>::from_str("Cp500").unwrap(),
365            Codepage::CP500
366        );
367    }
368
369    #[test]
370    fn test_codepage_from_str_empty_string_defaults_to_cp037() {
371        assert_eq!(
372            <Codepage as std::str::FromStr>::from_str("").unwrap(),
373            Codepage::CP037
374        );
375    }
376
377    // --- Codepage is_ebcdic exhaustive ---
378
379    #[test]
380    fn test_codepage_is_ebcdic_all_variants() {
381        assert!(!Codepage::ASCII.is_ebcdic());
382        assert!(Codepage::CP037.is_ebcdic());
383        assert!(Codepage::CP273.is_ebcdic());
384        assert!(Codepage::CP500.is_ebcdic());
385        assert!(Codepage::CP1047.is_ebcdic());
386        assert!(Codepage::CP1140.is_ebcdic());
387    }
388
389    // --- Codepage code_page_number exhaustive ---
390
391    #[test]
392    fn test_codepage_code_page_number_all_variants() {
393        assert_eq!(Codepage::ASCII.code_page_number(), None);
394        assert_eq!(Codepage::CP037.code_page_number(), Some(37));
395        assert_eq!(Codepage::CP273.code_page_number(), Some(273));
396        assert_eq!(Codepage::CP500.code_page_number(), Some(500));
397        assert_eq!(Codepage::CP1047.code_page_number(), Some(1047));
398        assert_eq!(Codepage::CP1140.code_page_number(), Some(1140));
399    }
400
401    // --- UnmappablePolicy Display tests ---
402
403    #[test]
404    fn test_unmappable_policy_display_all_variants() {
405        assert_eq!(format!("{}", UnmappablePolicy::Error), "error");
406        assert_eq!(format!("{}", UnmappablePolicy::Replace), "replace");
407        assert_eq!(format!("{}", UnmappablePolicy::Skip), "skip");
408    }
409
410    // --- UnmappablePolicy FromStr tests ---
411
412    #[test]
413    fn test_unmappable_policy_from_str_all_valid() {
414        assert_eq!(
415            <UnmappablePolicy as std::str::FromStr>::from_str("replace").unwrap(),
416            UnmappablePolicy::Replace
417        );
418        assert_eq!(
419            <UnmappablePolicy as std::str::FromStr>::from_str("skip").unwrap(),
420            UnmappablePolicy::Skip
421        );
422        assert_eq!(
423            <UnmappablePolicy as std::str::FromStr>::from_str("error").unwrap(),
424            UnmappablePolicy::Error
425        );
426    }
427
428    #[test]
429    fn test_unmappable_policy_from_str_case_insensitive() {
430        assert_eq!(
431            <UnmappablePolicy as std::str::FromStr>::from_str("REPLACE").unwrap(),
432            UnmappablePolicy::Replace
433        );
434        assert_eq!(
435            <UnmappablePolicy as std::str::FromStr>::from_str("SKIP").unwrap(),
436            UnmappablePolicy::Skip
437        );
438    }
439
440    // --- Zoned sign table exhaustive ---
441
442    #[test]
443    fn test_get_zoned_sign_table_ebcdic_unsigned_nibbles() {
444        let table = get_zoned_sign_table(Codepage::CP037);
445        for (i, &entry) in table.iter().enumerate().take(0xB + 1) {
446            assert_eq!(entry, (false, false), "Expected unsigned at nibble 0x{i:X}");
447        }
448        assert_eq!(table[0xE], (false, false));
449    }
450
451    #[test]
452    fn test_get_zoned_sign_table_all_ebcdic_codepages_same() {
453        let cp037 = get_zoned_sign_table(Codepage::CP037);
454        let cp273 = get_zoned_sign_table(Codepage::CP273);
455        let cp500 = get_zoned_sign_table(Codepage::CP500);
456        let cp1047 = get_zoned_sign_table(Codepage::CP1047);
457        let cp1140 = get_zoned_sign_table(Codepage::CP1140);
458        assert_eq!(cp037, cp273);
459        assert_eq!(cp037, cp500);
460        assert_eq!(cp037, cp1047);
461        assert_eq!(cp037, cp1140);
462    }
463
464    // --- Serde round-trip ---
465
466    #[test]
467    fn test_codepage_serde_roundtrip() {
468        let cp = Codepage::CP037;
469        let json = serde_json::to_string(&cp).unwrap();
470        let deserialized: Codepage = serde_json::from_str(&json).unwrap();
471        assert_eq!(cp, deserialized);
472    }
473
474    #[test]
475    fn test_unmappable_policy_serde_roundtrip() {
476        let policy = UnmappablePolicy::Replace;
477        let json = serde_json::to_string(&policy).unwrap();
478        let deserialized: UnmappablePolicy = serde_json::from_str(&json).unwrap();
479        assert_eq!(policy, deserialized);
480    }
481
482    // --- Additional coverage ---
483
484    #[test]
485    fn test_codepage_clone_preserves_value() {
486        let cp = Codepage::CP500;
487        let cloned = cp;
488        assert_eq!(cp, cloned);
489    }
490
491    #[test]
492    fn test_codepage_eq_different_variants() {
493        assert_ne!(Codepage::ASCII, Codepage::CP037);
494        assert_ne!(Codepage::CP037, Codepage::CP273);
495        assert_ne!(Codepage::CP273, Codepage::CP500);
496        assert_ne!(Codepage::CP500, Codepage::CP1047);
497        assert_ne!(Codepage::CP1047, Codepage::CP1140);
498    }
499
500    #[test]
501    fn test_codepage_debug_format() {
502        let debug = format!("{:?}", Codepage::CP037);
503        assert_eq!(debug, "CP037");
504        let debug = format!("{:?}", Codepage::ASCII);
505        assert_eq!(debug, "ASCII");
506    }
507
508    #[test]
509    fn test_codepage_serde_all_variants_roundtrip() {
510        let variants = [
511            Codepage::ASCII,
512            Codepage::CP037,
513            Codepage::CP273,
514            Codepage::CP500,
515            Codepage::CP1047,
516            Codepage::CP1140,
517        ];
518        for cp in variants {
519            let json = serde_json::to_string(&cp).unwrap();
520            let deserialized: Codepage = serde_json::from_str(&json).unwrap();
521            assert_eq!(cp, deserialized, "Roundtrip failed for {cp}");
522        }
523    }
524
525    #[test]
526    fn test_codepage_from_str_cp037_explicit() {
527        // cp037 should match explicitly, not just as default
528        assert_eq!(
529            <Codepage as std::str::FromStr>::from_str("cp037").unwrap(),
530            Codepage::CP037
531        );
532    }
533
534    #[test]
535    fn test_codepage_display_roundtrip_via_from_str() {
536        let variants = [
537            Codepage::ASCII,
538            Codepage::CP273,
539            Codepage::CP500,
540            Codepage::CP1047,
541            Codepage::CP1140,
542        ];
543        for cp in variants {
544            let displayed = cp.to_string();
545            let parsed: Codepage = displayed.parse().unwrap();
546            assert_eq!(cp, parsed, "Display/FromStr roundtrip failed for {cp}");
547        }
548    }
549
550    #[test]
551    fn test_unmappable_policy_clone_preserves_value() {
552        let policy = UnmappablePolicy::Skip;
553        let cloned = policy;
554        assert_eq!(policy, cloned);
555    }
556
557    #[test]
558    fn test_unmappable_policy_debug_format() {
559        assert_eq!(format!("{:?}", UnmappablePolicy::Error), "Error");
560        assert_eq!(format!("{:?}", UnmappablePolicy::Replace), "Replace");
561        assert_eq!(format!("{:?}", UnmappablePolicy::Skip), "Skip");
562    }
563
564    #[test]
565    fn test_unmappable_policy_serde_all_variants_roundtrip() {
566        let variants = [
567            UnmappablePolicy::Error,
568            UnmappablePolicy::Replace,
569            UnmappablePolicy::Skip,
570        ];
571        for policy in variants {
572            let json = serde_json::to_string(&policy).unwrap();
573            let deserialized: UnmappablePolicy = serde_json::from_str(&json).unwrap();
574            assert_eq!(policy, deserialized, "Roundtrip failed for {policy}");
575        }
576    }
577
578    #[test]
579    fn test_unmappable_policy_eq_different_variants() {
580        assert_ne!(UnmappablePolicy::Error, UnmappablePolicy::Replace);
581        assert_ne!(UnmappablePolicy::Replace, UnmappablePolicy::Skip);
582        assert_ne!(UnmappablePolicy::Skip, UnmappablePolicy::Error);
583    }
584
585    #[test]
586    fn test_unmappable_policy_from_str_empty_defaults_to_error() {
587        assert_eq!(
588            <UnmappablePolicy as std::str::FromStr>::from_str("").unwrap(),
589            UnmappablePolicy::Error
590        );
591    }
592
593    #[test]
594    fn test_space_byte_consistency_with_is_ebcdic() {
595        let variants = [
596            Codepage::ASCII,
597            Codepage::CP037,
598            Codepage::CP273,
599            Codepage::CP500,
600            Codepage::CP1047,
601            Codepage::CP1140,
602        ];
603        for cp in variants {
604            if cp.is_ebcdic() {
605                assert_eq!(space_byte(cp), 0x40, "EBCDIC {cp} should have space 0x40");
606            } else {
607                assert_eq!(space_byte(cp), 0x20, "ASCII should have space 0x20");
608            }
609        }
610    }
611
612    #[test]
613    fn test_codepage_is_ascii_and_is_ebcdic_mutually_exclusive() {
614        let variants = [
615            Codepage::ASCII,
616            Codepage::CP037,
617            Codepage::CP273,
618            Codepage::CP500,
619            Codepage::CP1047,
620            Codepage::CP1140,
621        ];
622        for cp in variants {
623            assert_ne!(
624                cp.is_ascii(),
625                cp.is_ebcdic(),
626                "is_ascii and is_ebcdic must be mutually exclusive for {cp}"
627            );
628        }
629    }
630
631    #[test]
632    fn test_get_zoned_sign_table_ebcdic_positive_nibble_f() {
633        let table = get_zoned_sign_table(Codepage::CP037);
634        // 0xF_ is unsigned/positive default
635        let (is_signed, is_negative) = table[0xF];
636        assert!(is_signed);
637        assert!(!is_negative);
638    }
639
640    #[test]
641    fn test_get_zoned_sign_table_ebcdic_negative_nibble_d() {
642        let table = get_zoned_sign_table(Codepage::CP037);
643        let (is_signed, is_negative) = table[0xD];
644        assert!(is_signed);
645        assert!(is_negative);
646    }
647}