Skip to main content

gmcrypto_core/asn1/
reader.rs

1//! Strict-canonical DER reader primitives.
2//!
3//! Each primitive returns `Option<(T, &[u8])>` — the parsed value and
4//! the remaining input. `None` for any malformed input; **single
5//! shape, every failure folds to `None`** per the project failure-
6//! mode invariant. No structured error type is exposed.
7//!
8//! Readers borrow into the input slice (zero-allocation). Strict-
9//! canonical-INTEGER discipline matches the v0.2 `asn1::sig` rules
10//! (reject empty content, reject sign-bit-set first byte without
11//! `0x00` pad, reject redundant `0x00` leading-pad). The canonical
12//! single-byte zero (`02 01 00`) is **accepted** here; callers that
13//! disallow zero (e.g. SM2 signatures where `r, s ∈ [1, n-1]`)
14//! check `result.bytes() == &[0x00]` post-read.
15//!
16//! Maximum supported DER length: **16 MiB** (3-byte length encoding,
17//! `0x83`-prefixed). Anything above that is rejected on read.
18
19use alloc::vec::Vec;
20
21/// Universal primitive INTEGER tag.
22pub const TAG_INTEGER: u8 = 0x02;
23/// Universal primitive BIT STRING tag.
24pub const TAG_BIT_STRING: u8 = 0x03;
25/// Universal primitive OCTET STRING tag.
26pub const TAG_OCTET_STRING: u8 = 0x04;
27/// Universal primitive NULL tag.
28pub const TAG_NULL: u8 = 0x05;
29/// Universal primitive OBJECT IDENTIFIER tag.
30pub const TAG_OID: u8 = 0x06;
31/// Universal constructed SEQUENCE tag.
32pub const TAG_SEQUENCE: u8 = 0x30;
33/// Universal constructed SET tag.
34pub const TAG_SET: u8 = 0x31;
35
36/// Maximum DER length the reader/writer support: 16 MiB.
37pub const MAX_DER_LEN: usize = 16_777_216;
38
39/// Read a single-byte tag, asserting it equals `expected`.
40#[must_use]
41pub fn read_tag(input: &[u8], expected: u8) -> Option<&[u8]> {
42    let (tag, rest) = input.split_first()?;
43    if *tag == expected { Some(rest) } else { None }
44}
45
46/// Read a DER length encoding.
47///
48/// Supports the 1-byte form (`< 0x80`), the 2-byte (`0x81 LL`),
49/// the 3-byte (`0x82 HH LL`), and the 4-byte (`0x83 HH MM LL`)
50/// forms. Rejects non-minimal encodings and lengths
51/// `≥ MAX_DER_LEN`.
52#[must_use]
53pub fn read_length(input: &[u8]) -> Option<(usize, &[u8])> {
54    let (first, rest) = input.split_first()?;
55    if *first < 0x80 {
56        Some((*first as usize, rest))
57    } else if *first == 0x81 {
58        let (b, rest) = rest.split_first()?;
59        if *b < 0x80 {
60            return None; // not minimal: length < 128 must use the 1-byte form
61        }
62        Some((*b as usize, rest))
63    } else if *first == 0x82 {
64        let (hi, rest) = rest.split_first()?;
65        let (lo, rest) = rest.split_first()?;
66        let len = (usize::from(*hi) << 8) | usize::from(*lo);
67        if len < 256 {
68            return None; // not minimal
69        }
70        Some((len, rest))
71    } else if *first == 0x83 {
72        let (b2, rest) = rest.split_first()?;
73        let (b1, rest) = rest.split_first()?;
74        let (b0, rest) = rest.split_first()?;
75        let len = (usize::from(*b2) << 16) | (usize::from(*b1) << 8) | usize::from(*b0);
76        if len < 65_536 {
77            return None; // not minimal
78        }
79        Some((len, rest))
80    } else {
81        // 4-byte+ lengths reject; 16 MiB is the documented ceiling.
82        None
83    }
84}
85
86/// Read a tag-length-value triple, asserting tag equals `expected`.
87///
88/// Returns the value bytes (borrowed) and the remainder after the
89/// value.
90#[must_use]
91pub fn read_tlv(input: &[u8], expected: u8) -> Option<(&[u8], &[u8])> {
92    let rest = read_tag(input, expected)?;
93    let (len, rest) = read_length(rest)?;
94    if rest.len() < len {
95        return None;
96    }
97    Some(rest.split_at(len))
98}
99
100/// Read a DER INTEGER.
101///
102/// Returns the canonical unsigned big-endian content bytes (with the
103/// disambiguating `0x00` pad stripped, if present) and the
104/// remainder. The single-byte `[0x00]` (canonical zero) is returned
105/// as-is; callers that disallow zero must check `bytes == &[0x00]`
106/// post-read.
107///
108/// Strict-canonical rules per X.690 §8.3.2 / §10.2:
109/// - empty content rejected;
110/// - sign-bit-set first byte without `0x00` pad rejected (would be
111///   negative in two's complement);
112/// - redundant `0x00` leading-pad (BER style) rejected.
113#[must_use]
114pub fn read_integer(input: &[u8]) -> Option<(&[u8], &[u8])> {
115    let (bytes, rest) = read_tlv(input, TAG_INTEGER)?;
116    if bytes.is_empty() {
117        return None;
118    }
119    if bytes[0] & 0x80 != 0 {
120        return None; // would be negative in two's complement
121    }
122    let unsigned = if bytes[0] == 0x00 {
123        if bytes.len() == 1 {
124            // Canonical encoding of zero — accept and return [0x00].
125            bytes
126        } else if bytes[1] & 0x80 == 0 {
127            // Leading 0x00 followed by a high-bit-clear byte is
128            // redundant (BER, not DER).
129            return None;
130        } else {
131            &bytes[1..]
132        }
133    } else {
134        bytes
135    };
136    Some((unsigned, rest))
137}
138
139/// Read a DER OCTET STRING.
140///
141/// Returns the value bytes (borrowed) and the remainder.
142#[must_use]
143pub fn read_octet_string(input: &[u8]) -> Option<(&[u8], &[u8])> {
144    read_tlv(input, TAG_OCTET_STRING)
145}
146
147/// Read a DER NULL — must be exactly `05 00`.
148#[must_use]
149pub fn read_null(input: &[u8]) -> Option<&[u8]> {
150    let (value, rest) = read_tlv(input, TAG_NULL)?;
151    if !value.is_empty() {
152        return None;
153    }
154    Some(rest)
155}
156
157/// Read a DER OBJECT IDENTIFIER.
158///
159/// Returns the encoded sub-identifier bytes (per X.690 §8.19, no
160/// outer `06 LEN` framing) and the remainder. Callers compare to
161/// fixed encodings from [`super::oid`].
162///
163/// Sanity checks: non-empty content; the final byte's high bit is
164/// clear (no continuation past the last sub-identifier).
165#[must_use]
166pub fn read_oid(input: &[u8]) -> Option<(&[u8], &[u8])> {
167    let (value, rest) = read_tlv(input, TAG_OID)?;
168    if value.is_empty() {
169        return None;
170    }
171    // The final sub-identifier byte must not have the continuation bit set.
172    if value[value.len() - 1] & 0x80 != 0 {
173        return None;
174    }
175    Some((value, rest))
176}
177
178/// Read a DER BIT STRING.
179///
180/// Returns `(unused_bits, value_bytes, rest)`. `unused_bits` is the
181/// count from the first content byte; for the SPKI uncompressed-
182/// point case this MUST be `0` — caller checks. `unused_bits > 7`
183/// is rejected. An empty BIT STRING value (no `unused_bits` byte
184/// at all) is also rejected.
185#[must_use]
186pub fn read_bit_string(input: &[u8]) -> Option<(u8, &[u8], &[u8])> {
187    let (value, rest) = read_tlv(input, TAG_BIT_STRING)?;
188    let (unused, bit_bytes) = value.split_first()?;
189    if *unused > 7 {
190        return None;
191    }
192    Some((*unused, bit_bytes, rest))
193}
194
195/// Read a DER SEQUENCE.
196///
197/// Returns the body bytes (borrowed) and the remainder after the
198/// sequence. Callers iterate the body via further `read_*` calls
199/// and check that the body slice is fully consumed.
200#[must_use]
201pub fn read_sequence(input: &[u8]) -> Option<(&[u8], &[u8])> {
202    read_tlv(input, TAG_SEQUENCE)
203}
204
205/// Read a context-tagged `[n] EXPLICIT` field.
206///
207/// Returns the inner (constructed-content) bytes and the remainder.
208/// Tag numbers above 30 (which would require multi-byte tag form)
209/// are rejected — none of the W2 wire formats need them.
210#[must_use]
211pub fn read_context_tagged_explicit(input: &[u8], n: u8) -> Option<(&[u8], &[u8])> {
212    if n > 30 {
213        return None;
214    }
215    // Class = context (0xA0); P/C = constructed (0x20). These two share the
216    // upper nibble 0xA0; tag number occupies the low 5 bits.
217    let tag = 0xA0 | n;
218    read_tlv(input, tag)
219}
220
221/// Read a context-tagged `[n] IMPLICIT` primitive field (no inner
222/// constructed wrapper). Returns the value bytes and the remainder.
223#[must_use]
224pub fn read_context_tagged_implicit(input: &[u8], n: u8) -> Option<(&[u8], &[u8])> {
225    if n > 30 {
226        return None;
227    }
228    // Class = context (0x80); P/C = primitive (0x00).
229    let tag = 0x80 | n;
230    read_tlv(input, tag)
231}
232
233/// Read a `SEQUENCE OF` body and collect items via a closure.
234///
235/// `read_item` runs on the body slice and consumes one item per
236/// call; iteration stops when the body is empty. Returns `None`
237/// if any individual item read fails or the body has trailing
238/// bytes the closure didn't consume.
239#[must_use]
240pub fn collect_sequence_of<'a, T, F>(body: &'a [u8], mut read_item: F) -> Option<Vec<T>>
241where
242    F: FnMut(&'a [u8]) -> Option<(T, &'a [u8])>,
243{
244    let mut items = Vec::new();
245    let mut cursor = body;
246    while !cursor.is_empty() {
247        let (item, next) = read_item(cursor)?;
248        items.push(item);
249        cursor = next;
250    }
251    Some(items)
252}
253
254#[cfg(test)]
255mod tests {
256    use super::*;
257
258    // ---------- read_length ----------
259
260    #[test]
261    fn length_one_byte_forms() {
262        assert_eq!(read_length(&[0x00]), Some((0, &[][..])));
263        assert_eq!(read_length(&[0x7F]), Some((127, &[][..])));
264        // Trailing input after the length is preserved.
265        assert_eq!(
266            read_length(&[0x05, 0xAA, 0xBB]),
267            Some((5, &[0xAA, 0xBB][..]))
268        );
269    }
270
271    #[test]
272    fn length_two_byte_form() {
273        assert_eq!(read_length(&[0x81, 0x80]), Some((128, &[][..])));
274        assert_eq!(read_length(&[0x81, 0xFF]), Some((255, &[][..])));
275    }
276
277    #[test]
278    fn length_two_byte_non_minimal_rejected() {
279        // 0x81 0x05 should have used the 1-byte form (0x05).
280        assert_eq!(read_length(&[0x81, 0x05]), None);
281        assert_eq!(read_length(&[0x81, 0x7F]), None);
282    }
283
284    #[test]
285    fn length_three_byte_form() {
286        assert_eq!(read_length(&[0x82, 0x01, 0x00]), Some((256, &[][..])));
287        assert_eq!(read_length(&[0x82, 0xFF, 0xFF]), Some((65_535, &[][..])));
288    }
289
290    #[test]
291    fn length_three_byte_non_minimal_rejected() {
292        // 0x82 0x00 0xFF should have used the 1-byte form.
293        assert_eq!(read_length(&[0x82, 0x00, 0xFF]), None);
294        // 0x82 0x00 0xFF — len = 255 — non-minimal.
295        assert_eq!(read_length(&[0x82, 0x00, 0xFF]), None);
296    }
297
298    #[test]
299    fn length_four_byte_form() {
300        assert_eq!(
301            read_length(&[0x83, 0x01, 0x00, 0x00]),
302            Some((65_536, &[][..]))
303        );
304        assert_eq!(
305            read_length(&[0x83, 0xFF, 0xFF, 0xFF]),
306            Some((16_777_215, &[][..]))
307        );
308    }
309
310    #[test]
311    fn length_four_byte_non_minimal_rejected() {
312        assert_eq!(read_length(&[0x83, 0x00, 0xFF, 0xFF]), None);
313    }
314
315    #[test]
316    fn length_above_max_rejected() {
317        // 0x84 indicates 4 content bytes — not supported.
318        assert_eq!(read_length(&[0x84, 0x01, 0x00, 0x00, 0x00]), None);
319    }
320
321    #[test]
322    fn length_truncated_rejected() {
323        assert_eq!(read_length(&[]), None);
324        assert_eq!(read_length(&[0x81]), None);
325        assert_eq!(read_length(&[0x82, 0x01]), None);
326        assert_eq!(read_length(&[0x83, 0x01, 0x00]), None);
327    }
328
329    // ---------- read_integer ----------
330
331    #[test]
332    fn integer_canonical_zero() {
333        let (bytes, rest) = read_integer(&[0x02, 0x01, 0x00]).expect("zero");
334        assert_eq!(bytes, &[0x00]);
335        assert!(rest.is_empty());
336    }
337
338    #[test]
339    fn integer_small_positive() {
340        let (bytes, _) = read_integer(&[0x02, 0x01, 0x01]).unwrap();
341        assert_eq!(bytes, &[0x01]);
342        let (bytes, _) = read_integer(&[0x02, 0x01, 0x7F]).unwrap();
343        assert_eq!(bytes, &[0x7F]);
344    }
345
346    #[test]
347    fn integer_strips_disambiguating_pad() {
348        // 0x80 alone would be negative; 0x00 0x80 is the canonical
349        // unsigned encoding of 128.
350        let (bytes, _) = read_integer(&[0x02, 0x02, 0x00, 0x80]).unwrap();
351        assert_eq!(bytes, &[0x80]);
352    }
353
354    #[test]
355    fn integer_rejects_redundant_pad() {
356        // 0x00 0x01 — high bit of 0x01 is clear, so the 0x00 pad is
357        // redundant (BER, non-canonical).
358        assert!(read_integer(&[0x02, 0x02, 0x00, 0x01]).is_none());
359    }
360
361    #[test]
362    fn integer_rejects_negative() {
363        // 0x80 alone — sign bit set, no pad → would be negative in
364        // two's complement. SM2 has no negative integers on the wire.
365        assert!(read_integer(&[0x02, 0x01, 0x80]).is_none());
366        assert!(read_integer(&[0x02, 0x01, 0xFF]).is_none());
367    }
368
369    #[test]
370    fn integer_rejects_empty_content() {
371        assert!(read_integer(&[0x02, 0x00]).is_none());
372    }
373
374    #[test]
375    fn integer_rejects_wrong_tag() {
376        assert!(read_integer(&[0x03, 0x01, 0x01]).is_none());
377    }
378
379    #[test]
380    fn integer_preserves_remainder() {
381        let (bytes, rest) = read_integer(&[0x02, 0x01, 0x05, 0xDE, 0xAD]).unwrap();
382        assert_eq!(bytes, &[0x05]);
383        assert_eq!(rest, &[0xDE, 0xAD]);
384    }
385
386    // ---------- read_octet_string ----------
387
388    #[test]
389    fn octet_string_round_trip() {
390        let (value, rest) = read_octet_string(&[0x04, 0x03, 0x01, 0x02, 0x03]).unwrap();
391        assert_eq!(value, &[0x01, 0x02, 0x03]);
392        assert!(rest.is_empty());
393    }
394
395    #[test]
396    fn octet_string_empty() {
397        let (value, rest) = read_octet_string(&[0x04, 0x00]).unwrap();
398        assert!(value.is_empty());
399        assert!(rest.is_empty());
400    }
401
402    // ---------- read_null ----------
403
404    #[test]
405    fn null_canonical() {
406        assert_eq!(read_null(&[0x05, 0x00]), Some(&[][..]));
407        assert_eq!(read_null(&[0x05, 0x00, 0xFF]), Some(&[0xFF][..]));
408    }
409
410    #[test]
411    fn null_with_content_rejected() {
412        assert!(read_null(&[0x05, 0x01, 0x00]).is_none());
413    }
414
415    // ---------- read_oid ----------
416
417    #[test]
418    fn oid_id_pbkdf2() {
419        // 1.2.840.113549.1.5.12 → 06 09 2A 86 48 86 F7 0D 01 05 0C
420        let der = [
421            0x06, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x05, 0x0C,
422        ];
423        let (value, rest) = read_oid(&der).unwrap();
424        assert_eq!(value, &der[2..]);
425        assert!(rest.is_empty());
426    }
427
428    #[test]
429    fn oid_empty_rejected() {
430        assert!(read_oid(&[0x06, 0x00]).is_none());
431    }
432
433    #[test]
434    fn oid_unterminated_continuation_rejected() {
435        // 0x80 has the high bit set → continuation, but it's the
436        // last byte — malformed.
437        assert!(read_oid(&[0x06, 0x01, 0x80]).is_none());
438        assert!(read_oid(&[0x06, 0x02, 0x2A, 0x80]).is_none());
439    }
440
441    // ---------- read_bit_string ----------
442
443    #[test]
444    fn bit_string_zero_unused() {
445        let (unused, bytes, rest) = read_bit_string(&[0x03, 0x03, 0x00, 0xAB, 0xCD]).unwrap();
446        assert_eq!(unused, 0);
447        assert_eq!(bytes, &[0xAB, 0xCD]);
448        assert!(rest.is_empty());
449    }
450
451    #[test]
452    fn bit_string_unused_above_7_rejected() {
453        assert!(read_bit_string(&[0x03, 0x02, 0x08, 0xFF]).is_none());
454    }
455
456    #[test]
457    fn bit_string_empty_value_rejected() {
458        // 0x03 0x00 — no unused-bits byte at all.
459        assert!(read_bit_string(&[0x03, 0x00]).is_none());
460    }
461
462    // ---------- read_sequence ----------
463
464    #[test]
465    fn sequence_round_trip() {
466        // SEQUENCE { INTEGER 1, INTEGER 2 } = 30 06 02 01 01 02 01 02
467        let der = [0x30, 0x06, 0x02, 0x01, 0x01, 0x02, 0x01, 0x02];
468        let (body, rest) = read_sequence(&der).unwrap();
469        assert_eq!(body, &der[2..]);
470        assert!(rest.is_empty());
471        // Iterate body.
472        let (a, body) = read_integer(body).unwrap();
473        let (b, body) = read_integer(body).unwrap();
474        assert_eq!(a, &[0x01]);
475        assert_eq!(b, &[0x02]);
476        assert!(body.is_empty());
477    }
478
479    // ---------- context tags ----------
480
481    #[test]
482    fn context_explicit_round_trip() {
483        // [0] EXPLICIT INTEGER 1 = A0 03 02 01 01
484        let der = [0xA0, 0x03, 0x02, 0x01, 0x01];
485        let (inner, rest) = read_context_tagged_explicit(&der, 0).unwrap();
486        assert!(rest.is_empty());
487        let (val, _) = read_integer(inner).unwrap();
488        assert_eq!(val, &[0x01]);
489    }
490
491    #[test]
492    fn context_implicit_round_trip() {
493        // [1] IMPLICIT OCTET STRING "ab" = 81 02 61 62
494        let der = [0x81, 0x02, 0x61, 0x62];
495        let (value, rest) = read_context_tagged_implicit(&der, 1).unwrap();
496        assert_eq!(value, b"ab");
497        assert!(rest.is_empty());
498    }
499
500    #[test]
501    fn context_explicit_wrong_number_rejected() {
502        let der = [0xA0, 0x03, 0x02, 0x01, 0x01];
503        assert!(read_context_tagged_explicit(&der, 1).is_none());
504    }
505
506    #[test]
507    fn context_explicit_above_30_rejected() {
508        // We don't support multi-byte tag form.
509        assert!(read_context_tagged_explicit(&[0xBF, 0x1F, 0x00], 31).is_none());
510    }
511
512    // ---------- collect_sequence_of ----------
513
514    #[test]
515    fn collect_three_integers() {
516        let body = [0x02, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x01, 0x03];
517        let items = collect_sequence_of(&body, |b| {
518            let (v, rest) = read_integer(b)?;
519            Some((v[0], rest))
520        })
521        .unwrap();
522        assert_eq!(items, alloc::vec![1u8, 2, 3]);
523    }
524
525    #[test]
526    fn collect_stops_on_short_input() {
527        let body = [0x02, 0x01, 0x01, 0x02, 0x01]; // truncated second INTEGER
528        let result: Option<Vec<u8>> = collect_sequence_of(&body, |b| {
529            let (v, rest) = read_integer(b)?;
530            Some((v[0], rest))
531        });
532        assert!(result.is_none());
533    }
534}