of_dn_parser/
lib.rs

1//! Distinguished name (DN) parser and formatter following OpenFinance
2//! Brasil's DCR 1.0 standard.
3
4use std::{
5    borrow::Cow,
6    result,
7    str::{self, FromStr, Utf8Error},
8    string::FromUtf8Error,
9};
10
11use derive_more::{Display, Error, From};
12
13#[cfg(test)]
14mod test;
15
16// List of symbols that must be escaped with a backslash
17const ESCAPABLE_SYMBOLS: [char; 10] = [' ', '"', '#', '+', ',', ';', '<', '=', '>', '\\'];
18
19/// Possible errors when parsing distinguished names.
20#[derive(Debug, Display, Error, From)]
21pub enum Error {
22    /// Could not decode a hex string.
23    Hex(hex::FromHexError),
24    /// Found an invalid RDN type.
25    #[display(fmt = "invalid RDN type: {_0}")]
26    #[from(ignore)]
27    InvalidType(#[error(not(source))] String),
28    /// Found an invalid value for the specified RDN type.
29    #[display(fmt = "invalid value for {ty:?}: {value}")]
30    #[from(ignore)]
31    InvalidValue { ty: RdnType, value: String },
32    /// Found a character in a position where it is invalid.
33    #[display(fmt = "unexpected character: {_0:?}")]
34    #[from(ignore)]
35    UnexpectedCharacter(#[error(not(source))] char),
36    /// String ended unexpectedly.
37    #[display(fmt = "unexpected EOF")]
38    UnexpectedEof,
39    /// We don't support nor need to support multi-value RDNs.
40    #[display(fmt = "multi-value RDNs are not supported")]
41    UnsupportedMultiValueRdns,
42    /// Found a non-UTF-8 string.
43    FromUtf8(FromUtf8Error),
44    /// Found a non-UTF-8 string.
45    Utf8(Utf8Error),
46}
47
48/// Parsing result type.
49pub type Result<T> = result::Result<T, Error>;
50
51/// A distinguished name (DN).
52///
53/// DNs are composed of a sequence of key-value pairs called relative
54/// distinguished names (RDNs).
55#[derive(Clone, Debug)]
56pub struct DistinguishedName {
57    rdns: Vec<RelativeDistinguishedName>,
58}
59
60impl DistinguishedName {
61    /// Find the value of the first occurence of the given RDN type.
62    pub fn find(&self, ty: RdnType) -> Option<&str> {
63        self.rdns
64            .iter()
65            .find_map(|x| if x.ty() == ty { Some(x.value()) } else { None })
66    }
67
68    /// Returns an iterator over all RDNs of this DN.
69    pub fn iter(&self) -> impl Iterator<Item = &RelativeDistinguishedName> {
70        self.rdns.iter()
71    }
72
73    /// Get the organization ID of this certificate. The way organization IDs
74    /// are specified in OpenFinance certificates is a special kind of mess so
75    /// we need a specific function for this.
76    pub fn organization_id(&self) -> Result<Option<Cow<str>>> {
77        // For newer certificates, the organization ID should be the value of
78        // `OU` and the spec also leaves open the possibility of it being the
79        // value of `organizationalUnitName`
80        let org_id = self
81            .find(RdnType::Ou)
82            .or_else(|| self.find(RdnType::OrganizationalUnitName));
83        if let Some(org_id) = org_id {
84            return Ok(Some(org_id.into()));
85        }
86
87        // For older certificates, we have nightmare as the value of
88        // `organizationIdentifier`
89        let Some(org_id) = self.find(RdnType::OrganizationIdentifier) else {
90            return Ok(None);
91        };
92        let org_id = org_id.to_ascii_lowercase();
93
94        Ok(Some(extract_organization_id(&org_id)?.into()))
95    }
96
97    /// Create a comparator for this DN.
98    /// [RFC4518](https://datatracker.ietf.org/doc/html/rfc451) requires that
99    /// DNs be transformed before comparison, which is implemented by this
100    /// comparator.
101    pub fn comparator(&self) -> Result<DnComparator> {
102        DnComparator::new(self)
103    }
104
105    /// Serialize into the OpenFinance variant string format:
106    /// <https://openfinancebrasil.atlassian.net/wiki/spaces/OF/pages/240649661/EN+Open+Finance+Brasil+Financial-grade+API+Dynamic+Client+Registration+1.0+Implementers+Draft+3#7.1.2.-Certificate-Distinguished-Name-Parsing>.
107    pub fn to_of_string(&self) -> String {
108        let mut res = String::new();
109        for (i, rdn) in self.rdns.iter().rev().enumerate() {
110            if i > 0 {
111                res.push(',');
112            }
113
114            let ty = rdn.ty();
115            let value = rdn.value();
116            res += ty.as_of_str();
117            res.push('=');
118            if ty.of_encodes_as_hex() {
119                res.push('#');
120                res += &hex::encode(value);
121            } else {
122                res.reserve(value.len());
123                for c in value.chars() {
124                    if ESCAPABLE_SYMBOLS.contains(&c) {
125                        // Note: for simplicity we'll be escaping everything
126                        // we can unconditionally even when this is not
127                        // necesary
128                        res.push('\\');
129                    }
130                    res.push(c);
131                }
132            }
133        }
134
135        res
136    }
137}
138
139/// Parse from the canonical string format:
140/// <https://datatracker.ietf.org/doc/html/rfc4514>.
141impl FromStr for DistinguishedName {
142    type Err = Error;
143
144    fn from_str(s: &str) -> Result<Self> {
145        // This format is faily straightforward and so the parser is
146        // implemented manually. Parser crates wouldn't help by much.
147        let mut rdns = Vec::new();
148        let mut acc = Vec::new();
149        let mut escaping = Escaping::None;
150        let mut value_is_hex = false;
151        let mut ty = None::<RdnType>;
152        let chars = s.bytes().map(ParseItem::from).chain([ParseItem::Eof]);
153        for c in chars {
154            if escaping.is_pending() {
155                let ParseItem::Byte(c) = c else {
156                    // Cannot end a DN with a backslash
157                    return Err(Error::UnexpectedEof);
158                };
159                if let Some(escaped) = escaping.consume(c)? {
160                    acc.push(escaped);
161                }
162
163                continue;
164            }
165
166            match c {
167                // A DN is a list of RDNs separated by commas
168                ParseItem::Byte(b',') | ParseItem::Eof => {
169                    let value = str::from_utf8(&acc)?.trim();
170                    if value.is_empty() {
171                        if c.is_eof() && ty.is_none() {
172                            // EOF and the DN is complete
173                            break;
174                        } else {
175                            // We already parsed a type but this RDN is
176                            // missing a value
177                            return if c.is_eof() {
178                                Err(Error::UnexpectedEof)
179                            } else {
180                                Err(Error::UnexpectedCharacter(','))
181                            };
182                        }
183                    }
184
185                    // If we're ending the definition of this RDN then we must
186                    // already have parsed an RDN type
187                    let rdn_type = ty.ok_or_else(|| {
188                        if c.is_eof() {
189                            Error::UnexpectedEof
190                        } else {
191                            Error::UnexpectedCharacter(',')
192                        }
193                    })?;
194                    ty = None;
195
196                    // Decode the value. This may be a hex encoded string
197                    let rdn_value = if value_is_hex {
198                        value_is_hex = false;
199                        let value = hex::decode(value)?;
200
201                        String::from_utf8(value)?
202                    } else {
203                        value.to_owned()
204                    };
205                    acc.clear();
206
207                    rdns.push(RelativeDistinguishedName::new(rdn_type, rdn_value));
208                }
209                // An RDN is an RDN type and a value separated by an equals
210                // sign
211                ParseItem::Byte(b'=') => {
212                    if ty.is_some() {
213                        // Something like 'a = b = c' is not a valid RDN
214                        return Err(Error::UnexpectedCharacter('='));
215                    }
216
217                    let ty_str = str::from_utf8(&acc)?.trim();
218                    if ty_str.is_empty() {
219                        return Err(Error::UnexpectedCharacter('='));
220                    }
221
222                    ty = Some(ty_str.parse()?);
223                    acc.clear();
224                }
225                // A backslash starts an escape sequence
226                ParseItem::Byte(b'\\') => {
227                    escaping = Escaping::Started;
228                }
229                // An octothorpe right after the equals sign means that the
230                // value is an encoded hex string
231                ParseItem::Byte(b'#') => {
232                    if acc.is_empty() {
233                        value_is_hex = true;
234                    } else {
235                        acc.push(b'#');
236                    }
237                }
238                // A plus sign is used to define multi-valued RDNs but we have
239                // no need for this here
240                ParseItem::Byte(b'+') => return Err(Error::UnsupportedMultiValueRdns),
241                // Every other byte is a literal
242                ParseItem::Byte(c) => acc.push(c),
243            }
244        }
245
246        // For some reason the string format serializes RDNs in the inverse
247        // order
248        rdns.reverse();
249
250        Ok(Self { rdns })
251    }
252}
253
254#[derive(Clone, Copy)]
255enum ParseItem {
256    Byte(u8),
257    Eof,
258}
259
260impl ParseItem {
261    fn is_eof(self) -> bool {
262        matches!(self, Self::Eof)
263    }
264}
265
266impl From<u8> for ParseItem {
267    fn from(value: u8) -> Self {
268        Self::Byte(value)
269    }
270}
271
272#[derive(Clone, Copy)]
273enum Escaping {
274    None,
275    Started,
276    Hex(u8),
277}
278
279impl Escaping {
280    fn is_pending(self) -> bool {
281        matches!(self, Self::Started | Self::Hex(_))
282    }
283
284    fn consume(&mut self, c: u8) -> Result<Option<u8>> {
285        match *self {
286            Self::Started => {
287                if ESCAPABLE_SYMBOLS.contains(&(c as char)) {
288                    *self = Self::None;
289
290                    Ok(Some(c))
291                } else {
292                    *self = Self::Hex(c);
293
294                    Ok(None)
295                }
296            }
297            Self::Hex(previous) => {
298                *self = Self::None;
299                let mut byte = [0; 1];
300                hex::decode_to_slice([previous, c], &mut byte)?;
301
302                Ok(Some(byte[0]))
303            }
304            Self::None => {
305                unreachable!("BUG: called `Escaping::consume` when no escaping is active")
306            }
307        }
308    }
309}
310
311/// A transformed [DistinguishedName] suitable for comparisons.
312#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
313pub struct DnComparator {
314    rdns: Vec<RdnComparator>,
315}
316
317impl DnComparator {
318    /// Create a new comparator from a [DistinguishedName].
319    pub fn new(dn: &DistinguishedName) -> Result<Self> {
320        let rdns = dn.iter().map(RdnComparator::new).collect::<Result<_>>()?;
321
322        Ok(Self { rdns })
323    }
324}
325
326/// A key-value pair that is part of a [DistinguishedName].
327///
328/// Multi-value RDNs are not supported.
329#[derive(Clone, Debug)]
330pub struct RelativeDistinguishedName {
331    ty: RdnType,
332    value: String,
333}
334
335impl RelativeDistinguishedName {
336    /// Create a new RDN.
337    pub fn new(ty: RdnType, value: String) -> Self {
338        Self { ty, value }
339    }
340
341    /// Get the type of this RDN.
342    pub fn ty(&self) -> RdnType {
343        self.ty
344    }
345
346    /// Get the value of this RDN.
347    pub fn value(&self) -> &str {
348        &self.value
349    }
350}
351
352/// A transformed [RelativeDistinguishedName] suitable for comparisons.
353#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
354pub struct RdnComparator {
355    ty: RdnType,
356    value: String,
357}
358
359impl RdnComparator {
360    /// Create a new comparator from a [RelativeDistinguishedName].
361    pub fn new(rdn: &RelativeDistinguishedName) -> Result<Self> {
362        let ty = rdn.ty();
363
364        // Prepare the value so it can be compared correctly. Comparison
365        // between values is fuzzy. Some characters must be replaced before
366        // comparison, while others must be removed.
367        //
368        // <https://datatracker.ietf.org/doc/html/rfc4518#section-2>
369        //
370        // TODO: this is not 100% complete.
371        let mut value = rdn
372            .value()
373            .chars()
374            .filter_map(|c| {
375                if c == '\u{0340}'
376                    || c == '\u{0341}'
377                    || c == '\u{200E}'
378                    || c == '\u{200F}'
379                    || ('\u{202A}'..='\u{202E}').contains(&c)
380                    || ('\u{206A}'..='\u{206F}').contains(&c)
381                    || ('\u{E000}'..='\u{F8FF}').contains(&c)
382                    || ('\u{F0000}'..='\u{FFFFD}').contains(&c)
383                    || ('\u{100000}'..='\u{10FFFD}').contains(&c)
384                    || c == '\u{FFFD}'
385                {
386                    // These characters are prohibited
387                    Some(Err(Error::UnexpectedCharacter(c)))
388                } else if c == '\u{0009}'
389                    || c == '\u{000A}'
390                    || c == '\u{000B}'
391                    || c == '\u{000C}'
392                    || c == '\u{000D}'
393                    || c == '\u{0085}'
394                    || c.is_whitespace()
395                {
396                    // These characters are compared as if they were a simple
397                    // space
398                    Some(Ok(' '))
399                } else if c == '\u{00AD}'
400                    || c == '\u{1806}'
401                    || c == '\u{034F}'
402                    || ('\u{180B}'..='\u{180D}').contains(&c)
403                    || ('\u{FE0F}'..='\u{FF00}').contains(&c)
404                    || c == '\u{FFFC}'
405                    || c.is_control()
406                    || c == '\u{200B}'
407                {
408                    // These characters are ignored during comparison
409                    None
410                } else {
411                    // Character is used in comparisons
412                    Some(Ok(c))
413                }
414            })
415            .collect::<Result<String>>()?;
416        if !ty.is_comparison_case_sensitive() {
417            value.make_ascii_lowercase();
418        }
419
420        // Specifically this RDN requires extra processing
421        if ty == RdnType::OrganizationIdentifier {
422            value = extract_organization_id(&value)?;
423        }
424
425        Ok(Self {
426            ty,
427            value: value.trim().to_owned(),
428        })
429    }
430}
431
432/// A relative distinguished name type.
433///
434/// This is the type of a single component of a full DN. We only support a
435/// select set of RDN types:
436///
437/// > the Authorization Server shall accept only the AttributeTypes
438/// > (descriptors) defined in the last paragraph of clause 3 RFC4514 in
439/// > string format, it shall also accept in OID format, with their values in
440/// > ASN.1, all the AttributeTypes defined in Distinguished Name Open Finance
441/// > Brasil x.509 Certificate Standards or added by the Certificate
442/// > Authority.
443///
444/// <https://openfinancebrasil.atlassian.net/wiki/spaces/OF/pages/240650099/EN+Padr+o+de+Certificados+Open+Finance+Brasil+2.0#5.2.2.1.-Open-Finance-Brasil-Attributes>
445#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
446pub enum RdnType {
447    /// Common name.
448    Cn,
449    /// Locality name.
450    L,
451    /// State or province name.
452    St,
453    /// Organization name.
454    O,
455    /// Organizational unit name.
456    Ou,
457    /// Country name.
458    C,
459    /// Street address.
460    Street,
461    /// Domain component.
462    Dc,
463    /// User ID.
464    Uid,
465    /// Type of business category.
466    BusinessCategory,
467    /// Jurisdiction country name.
468    JurisdictionCountryName,
469    /// National Register of Legal Personnel (CNPJ) of the legal entity
470    /// holding the certificate.
471    SerialNumber,
472    /// Participant Code associated with the CNPJ listed in the Directory
473    /// Service of Open Finance Brasil.
474    OrganizationIdentifier,
475    /// Participant Code associated with the CNPJ listed in the Directory
476    /// Service of Open Finance Brasil.
477    OrganizationalUnitName,
478}
479
480impl RdnType {
481    fn as_of_str(self) -> &'static str {
482        match self {
483            Self::Cn => "CN",
484            Self::L => "L",
485            Self::St => "ST",
486            Self::O => "O",
487            Self::Ou => "OU",
488            Self::C => "C",
489            Self::Street => "Street",
490            Self::Dc => "DC",
491            Self::Uid => "UID",
492            Self::BusinessCategory => "2.5.4.15",
493            Self::JurisdictionCountryName => "1.3.6.1.4.1.311.60.2.1.3",
494            Self::SerialNumber => "2.5.4.5",
495            Self::OrganizationIdentifier => "2.5.4.97",
496            Self::OrganizationalUnitName => "2.5.4.11",
497        }
498    }
499
500    fn of_encodes_as_hex(self) -> bool {
501        matches!(
502            self,
503            Self::BusinessCategory
504                | Self::JurisdictionCountryName
505                | Self::SerialNumber
506                | Self::OrganizationIdentifier
507                | Self::OrganizationalUnitName
508        )
509    }
510
511    fn is_comparison_case_sensitive(self) -> bool {
512        matches!(
513            self,
514            Self::Cn
515                | Self::L
516                | Self::St
517                | Self::O
518                | Self::Ou
519                | Self::C
520                | Self::JurisdictionCountryName
521                | Self::OrganizationalUnitName
522        )
523    }
524}
525
526/// Parse from the canonical string format:
527/// <https://datatracker.ietf.org/doc/html/rfc4514>.
528impl FromStr for RdnType {
529    type Err = Error;
530
531    fn from_str(s: &str) -> Result<Self> {
532        match s.to_lowercase().as_str() {
533            // https://datatracker.ietf.org/doc/html/rfc4519#section-2.3
534            "cn" | "2.5.4.3" => Ok(Self::Cn),
535            // https://datatracker.ietf.org/doc/html/rfc4519#section-2.16
536            "l" | "2.5.4.7" => Ok(Self::L),
537            // https://datatracker.ietf.org/doc/html/rfc4519#section-2.33
538            "st" | "2.5.4.8" => Ok(Self::St),
539            // https://datatracker.ietf.org/doc/html/rfc4519#section-2.19
540            "o" | "2.5.4.10" => Ok(Self::O),
541            // https://datatracker.ietf.org/doc/html/rfc4519#section-2.20
542            "ou" => Ok(Self::Ou),
543            // https://datatracker.ietf.org/doc/html/rfc4519#section-2.2
544            "c" | "2.5.4.6" => Ok(Self::C),
545            // https://datatracker.ietf.org/doc/html/rfc4519#section-2.34
546            "street" | "2.5.4.9" => Ok(Self::Street),
547            // https://datatracker.ietf.org/doc/html/rfc4519#section-2.4
548            "dc" | "0.9.2342.19200300.100.1.25" => Ok(Self::Dc),
549            // https://datatracker.ietf.org/doc/html/rfc4519#section-2.39
550            "uid" | "0.9.2342.19200300.100.1.1" => Ok(Self::Uid),
551            // https://datatracker.ietf.org/doc/html/rfc4519#section-2.1
552            "businesscategory" | "2.5.4.15" => Ok(Self::BusinessCategory),
553            // https://oidref.com/1.3.6.1.4.1.311.60.2.1.3
554            "jurisdictioncountryname" | "jurisdictionc" | "1.3.6.1.4.1.311.60.2.1.3" => {
555                Ok(Self::JurisdictionCountryName)
556            }
557            // https://datatracker.ietf.org/doc/html/rfc4519#section-2.31
558            "serialnumber" | "2.5.4.5" => Ok(Self::SerialNumber),
559            // https://oidref.com/2.5.4.97
560            "organizationidentifier" | "2.5.4.97" => Ok(Self::OrganizationIdentifier),
561            // https://openfinancebrasil.atlassian.net/wiki/spaces/OF/pages/240650099/EN+Padr+o+de+Certificados+Open+Finance+Brasil+2.0#5.2.2.1.-Open-Finance-Brasil-Attributes
562            "organizationalunitname" | "2.5.4.11" => Ok(Self::OrganizationalUnitName),
563            _ => Err(Error::InvalidType(s.to_owned())),
564        }
565    }
566}
567
568// Clean the value of `organizationIdentifier` according to the OF spec.
569//
570// One day the people working on the OpenFinance spec woke up with the most
571// brilliant idea ever: how about we add extra arbitrary complexity for
572// absolutely no reason at all? 'Genius!' they thought. And so in their
573// infinite wisdom they added the following:
574//
575// [...] convert ASN.1 values from OID 2.5.4.97 organizationIdentifier to
576// human readable text [...] retrieve the full value of the OID 2.5.4.97
577// contained in the subject_DN. [...] Apply a filter using regular expression
578// to retrieve the org_id after ('OFBBR-')
579//
580// https://openfinancebrasil.atlassian.net/wiki/spaces/OF/pages/240649661/EN+Open+Finance+Brasil+Financial-grade+API+Dynamic+Client+Registration+1.0+Implementers+Draft+3#7.1.2.-Certificate-Distinguished-Name-Parsing
581//
582// That is, for `organizationIdentifier` ONLY, it is permissible to have any
583// amount of garbage before `OFBBR-`. Luckly we can assume here that this
584// value is lower case and we don't need an actual regex.
585fn extract_organization_id(org_id: &str) -> Result<String> {
586    const PREFIX: &str = "ofbbr-";
587
588    let mut idx = org_id.find(PREFIX).ok_or_else(|| Error::InvalidValue {
589        ty: RdnType::OrganizationIdentifier,
590        value: org_id.to_owned(),
591    })?;
592    idx += PREFIX.len();
593
594    Ok(org_id[idx..].to_owned())
595}