of_dn_parser/lib.rs
1//! Distinguished name (DN) parser and formatter following OpenFinance
2//! Brasil's DCR 1.0 standard.
3
4use std::{
5 borrow::Cow,
6 result,
7 str::{self, FromStr, Utf8Error},
8 string::FromUtf8Error,
9};
10
11use derive_more::{Display, Error, From};
12
13#[cfg(test)]
14mod test;
15
16// List of symbols that must be escaped with a backslash
17const ESCAPABLE_SYMBOLS: [char; 10] = [' ', '"', '#', '+', ',', ';', '<', '=', '>', '\\'];
18
19/// Possible errors when parsing distinguished names.
20#[derive(Debug, Display, Error, From)]
21pub enum Error {
22 /// Could not decode a hex string.
23 Hex(hex::FromHexError),
24 /// Found an invalid RDN type.
25 #[display(fmt = "invalid RDN type: {_0}")]
26 #[from(ignore)]
27 InvalidType(#[error(not(source))] String),
28 /// Found an invalid value for the specified RDN type.
29 #[display(fmt = "invalid value for {ty:?}: {value}")]
30 #[from(ignore)]
31 InvalidValue { ty: RdnType, value: String },
32 /// Found a character in a position where it is invalid.
33 #[display(fmt = "unexpected character: {_0:?}")]
34 #[from(ignore)]
35 UnexpectedCharacter(#[error(not(source))] char),
36 /// String ended unexpectedly.
37 #[display(fmt = "unexpected EOF")]
38 UnexpectedEof,
39 /// We don't support nor need to support multi-value RDNs.
40 #[display(fmt = "multi-value RDNs are not supported")]
41 UnsupportedMultiValueRdns,
42 /// Found a non-UTF-8 string.
43 FromUtf8(FromUtf8Error),
44 /// Found a non-UTF-8 string.
45 Utf8(Utf8Error),
46}
47
48/// Parsing result type.
49pub type Result<T> = result::Result<T, Error>;
50
51/// A distinguished name (DN).
52///
53/// DNs are composed of a sequence of key-value pairs called relative
54/// distinguished names (RDNs).
55#[derive(Clone, Debug)]
56pub struct DistinguishedName {
57 rdns: Vec<RelativeDistinguishedName>,
58}
59
60impl DistinguishedName {
61 /// Find the value of the first occurence of the given RDN type.
62 pub fn find(&self, ty: RdnType) -> Option<&str> {
63 self.rdns
64 .iter()
65 .find_map(|x| if x.ty() == ty { Some(x.value()) } else { None })
66 }
67
68 /// Returns an iterator over all RDNs of this DN.
69 pub fn iter(&self) -> impl Iterator<Item = &RelativeDistinguishedName> {
70 self.rdns.iter()
71 }
72
73 /// Get the organization ID of this certificate. The way organization IDs
74 /// are specified in OpenFinance certificates is a special kind of mess so
75 /// we need a specific function for this.
76 pub fn organization_id(&self) -> Result<Option<Cow<str>>> {
77 // For newer certificates, the organization ID should be the value of
78 // `OU` and the spec also leaves open the possibility of it being the
79 // value of `organizationalUnitName`
80 let org_id = self
81 .find(RdnType::Ou)
82 .or_else(|| self.find(RdnType::OrganizationalUnitName));
83 if let Some(org_id) = org_id {
84 return Ok(Some(org_id.into()));
85 }
86
87 // For older certificates, we have nightmare as the value of
88 // `organizationIdentifier`
89 let Some(org_id) = self.find(RdnType::OrganizationIdentifier) else {
90 return Ok(None);
91 };
92 let org_id = org_id.to_ascii_lowercase();
93
94 Ok(Some(extract_organization_id(&org_id)?.into()))
95 }
96
97 /// Create a comparator for this DN.
98 /// [RFC4518](https://datatracker.ietf.org/doc/html/rfc451) requires that
99 /// DNs be transformed before comparison, which is implemented by this
100 /// comparator.
101 pub fn comparator(&self) -> Result<DnComparator> {
102 DnComparator::new(self)
103 }
104
105 /// Serialize into the OpenFinance variant string format:
106 /// <https://openfinancebrasil.atlassian.net/wiki/spaces/OF/pages/240649661/EN+Open+Finance+Brasil+Financial-grade+API+Dynamic+Client+Registration+1.0+Implementers+Draft+3#7.1.2.-Certificate-Distinguished-Name-Parsing>.
107 pub fn to_of_string(&self) -> String {
108 let mut res = String::new();
109 for (i, rdn) in self.rdns.iter().rev().enumerate() {
110 if i > 0 {
111 res.push(',');
112 }
113
114 let ty = rdn.ty();
115 let value = rdn.value();
116 res += ty.as_of_str();
117 res.push('=');
118 if ty.of_encodes_as_hex() {
119 res.push('#');
120 res += &hex::encode(value);
121 } else {
122 res.reserve(value.len());
123 for c in value.chars() {
124 if ESCAPABLE_SYMBOLS.contains(&c) {
125 // Note: for simplicity we'll be escaping everything
126 // we can unconditionally even when this is not
127 // necesary
128 res.push('\\');
129 }
130 res.push(c);
131 }
132 }
133 }
134
135 res
136 }
137}
138
139/// Parse from the canonical string format:
140/// <https://datatracker.ietf.org/doc/html/rfc4514>.
141impl FromStr for DistinguishedName {
142 type Err = Error;
143
144 fn from_str(s: &str) -> Result<Self> {
145 // This format is faily straightforward and so the parser is
146 // implemented manually. Parser crates wouldn't help by much.
147 let mut rdns = Vec::new();
148 let mut acc = Vec::new();
149 let mut escaping = Escaping::None;
150 let mut value_is_hex = false;
151 let mut ty = None::<RdnType>;
152 let chars = s.bytes().map(ParseItem::from).chain([ParseItem::Eof]);
153 for c in chars {
154 if escaping.is_pending() {
155 let ParseItem::Byte(c) = c else {
156 // Cannot end a DN with a backslash
157 return Err(Error::UnexpectedEof);
158 };
159 if let Some(escaped) = escaping.consume(c)? {
160 acc.push(escaped);
161 }
162
163 continue;
164 }
165
166 match c {
167 // A DN is a list of RDNs separated by commas
168 ParseItem::Byte(b',') | ParseItem::Eof => {
169 let value = str::from_utf8(&acc)?.trim();
170 if value.is_empty() {
171 if c.is_eof() && ty.is_none() {
172 // EOF and the DN is complete
173 break;
174 } else {
175 // We already parsed a type but this RDN is
176 // missing a value
177 return if c.is_eof() {
178 Err(Error::UnexpectedEof)
179 } else {
180 Err(Error::UnexpectedCharacter(','))
181 };
182 }
183 }
184
185 // If we're ending the definition of this RDN then we must
186 // already have parsed an RDN type
187 let rdn_type = ty.ok_or_else(|| {
188 if c.is_eof() {
189 Error::UnexpectedEof
190 } else {
191 Error::UnexpectedCharacter(',')
192 }
193 })?;
194 ty = None;
195
196 // Decode the value. This may be a hex encoded string
197 let rdn_value = if value_is_hex {
198 value_is_hex = false;
199 let value = hex::decode(value)?;
200
201 String::from_utf8(value)?
202 } else {
203 value.to_owned()
204 };
205 acc.clear();
206
207 rdns.push(RelativeDistinguishedName::new(rdn_type, rdn_value));
208 }
209 // An RDN is an RDN type and a value separated by an equals
210 // sign
211 ParseItem::Byte(b'=') => {
212 if ty.is_some() {
213 // Something like 'a = b = c' is not a valid RDN
214 return Err(Error::UnexpectedCharacter('='));
215 }
216
217 let ty_str = str::from_utf8(&acc)?.trim();
218 if ty_str.is_empty() {
219 return Err(Error::UnexpectedCharacter('='));
220 }
221
222 ty = Some(ty_str.parse()?);
223 acc.clear();
224 }
225 // A backslash starts an escape sequence
226 ParseItem::Byte(b'\\') => {
227 escaping = Escaping::Started;
228 }
229 // An octothorpe right after the equals sign means that the
230 // value is an encoded hex string
231 ParseItem::Byte(b'#') => {
232 if acc.is_empty() {
233 value_is_hex = true;
234 } else {
235 acc.push(b'#');
236 }
237 }
238 // A plus sign is used to define multi-valued RDNs but we have
239 // no need for this here
240 ParseItem::Byte(b'+') => return Err(Error::UnsupportedMultiValueRdns),
241 // Every other byte is a literal
242 ParseItem::Byte(c) => acc.push(c),
243 }
244 }
245
246 // For some reason the string format serializes RDNs in the inverse
247 // order
248 rdns.reverse();
249
250 Ok(Self { rdns })
251 }
252}
253
254#[derive(Clone, Copy)]
255enum ParseItem {
256 Byte(u8),
257 Eof,
258}
259
260impl ParseItem {
261 fn is_eof(self) -> bool {
262 matches!(self, Self::Eof)
263 }
264}
265
266impl From<u8> for ParseItem {
267 fn from(value: u8) -> Self {
268 Self::Byte(value)
269 }
270}
271
272#[derive(Clone, Copy)]
273enum Escaping {
274 None,
275 Started,
276 Hex(u8),
277}
278
279impl Escaping {
280 fn is_pending(self) -> bool {
281 matches!(self, Self::Started | Self::Hex(_))
282 }
283
284 fn consume(&mut self, c: u8) -> Result<Option<u8>> {
285 match *self {
286 Self::Started => {
287 if ESCAPABLE_SYMBOLS.contains(&(c as char)) {
288 *self = Self::None;
289
290 Ok(Some(c))
291 } else {
292 *self = Self::Hex(c);
293
294 Ok(None)
295 }
296 }
297 Self::Hex(previous) => {
298 *self = Self::None;
299 let mut byte = [0; 1];
300 hex::decode_to_slice([previous, c], &mut byte)?;
301
302 Ok(Some(byte[0]))
303 }
304 Self::None => {
305 unreachable!("BUG: called `Escaping::consume` when no escaping is active")
306 }
307 }
308 }
309}
310
311/// A transformed [DistinguishedName] suitable for comparisons.
312#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
313pub struct DnComparator {
314 rdns: Vec<RdnComparator>,
315}
316
317impl DnComparator {
318 /// Create a new comparator from a [DistinguishedName].
319 pub fn new(dn: &DistinguishedName) -> Result<Self> {
320 let rdns = dn.iter().map(RdnComparator::new).collect::<Result<_>>()?;
321
322 Ok(Self { rdns })
323 }
324}
325
326/// A key-value pair that is part of a [DistinguishedName].
327///
328/// Multi-value RDNs are not supported.
329#[derive(Clone, Debug)]
330pub struct RelativeDistinguishedName {
331 ty: RdnType,
332 value: String,
333}
334
335impl RelativeDistinguishedName {
336 /// Create a new RDN.
337 pub fn new(ty: RdnType, value: String) -> Self {
338 Self { ty, value }
339 }
340
341 /// Get the type of this RDN.
342 pub fn ty(&self) -> RdnType {
343 self.ty
344 }
345
346 /// Get the value of this RDN.
347 pub fn value(&self) -> &str {
348 &self.value
349 }
350}
351
352/// A transformed [RelativeDistinguishedName] suitable for comparisons.
353#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
354pub struct RdnComparator {
355 ty: RdnType,
356 value: String,
357}
358
359impl RdnComparator {
360 /// Create a new comparator from a [RelativeDistinguishedName].
361 pub fn new(rdn: &RelativeDistinguishedName) -> Result<Self> {
362 let ty = rdn.ty();
363
364 // Prepare the value so it can be compared correctly. Comparison
365 // between values is fuzzy. Some characters must be replaced before
366 // comparison, while others must be removed.
367 //
368 // <https://datatracker.ietf.org/doc/html/rfc4518#section-2>
369 //
370 // TODO: this is not 100% complete.
371 let mut value = rdn
372 .value()
373 .chars()
374 .filter_map(|c| {
375 if c == '\u{0340}'
376 || c == '\u{0341}'
377 || c == '\u{200E}'
378 || c == '\u{200F}'
379 || ('\u{202A}'..='\u{202E}').contains(&c)
380 || ('\u{206A}'..='\u{206F}').contains(&c)
381 || ('\u{E000}'..='\u{F8FF}').contains(&c)
382 || ('\u{F0000}'..='\u{FFFFD}').contains(&c)
383 || ('\u{100000}'..='\u{10FFFD}').contains(&c)
384 || c == '\u{FFFD}'
385 {
386 // These characters are prohibited
387 Some(Err(Error::UnexpectedCharacter(c)))
388 } else if c == '\u{0009}'
389 || c == '\u{000A}'
390 || c == '\u{000B}'
391 || c == '\u{000C}'
392 || c == '\u{000D}'
393 || c == '\u{0085}'
394 || c.is_whitespace()
395 {
396 // These characters are compared as if they were a simple
397 // space
398 Some(Ok(' '))
399 } else if c == '\u{00AD}'
400 || c == '\u{1806}'
401 || c == '\u{034F}'
402 || ('\u{180B}'..='\u{180D}').contains(&c)
403 || ('\u{FE0F}'..='\u{FF00}').contains(&c)
404 || c == '\u{FFFC}'
405 || c.is_control()
406 || c == '\u{200B}'
407 {
408 // These characters are ignored during comparison
409 None
410 } else {
411 // Character is used in comparisons
412 Some(Ok(c))
413 }
414 })
415 .collect::<Result<String>>()?;
416 if !ty.is_comparison_case_sensitive() {
417 value.make_ascii_lowercase();
418 }
419
420 // Specifically this RDN requires extra processing
421 if ty == RdnType::OrganizationIdentifier {
422 value = extract_organization_id(&value)?;
423 }
424
425 Ok(Self {
426 ty,
427 value: value.trim().to_owned(),
428 })
429 }
430}
431
432/// A relative distinguished name type.
433///
434/// This is the type of a single component of a full DN. We only support a
435/// select set of RDN types:
436///
437/// > the Authorization Server shall accept only the AttributeTypes
438/// > (descriptors) defined in the last paragraph of clause 3 RFC4514 in
439/// > string format, it shall also accept in OID format, with their values in
440/// > ASN.1, all the AttributeTypes defined in Distinguished Name Open Finance
441/// > Brasil x.509 Certificate Standards or added by the Certificate
442/// > Authority.
443///
444/// <https://openfinancebrasil.atlassian.net/wiki/spaces/OF/pages/240650099/EN+Padr+o+de+Certificados+Open+Finance+Brasil+2.0#5.2.2.1.-Open-Finance-Brasil-Attributes>
445#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
446pub enum RdnType {
447 /// Common name.
448 Cn,
449 /// Locality name.
450 L,
451 /// State or province name.
452 St,
453 /// Organization name.
454 O,
455 /// Organizational unit name.
456 Ou,
457 /// Country name.
458 C,
459 /// Street address.
460 Street,
461 /// Domain component.
462 Dc,
463 /// User ID.
464 Uid,
465 /// Type of business category.
466 BusinessCategory,
467 /// Jurisdiction country name.
468 JurisdictionCountryName,
469 /// National Register of Legal Personnel (CNPJ) of the legal entity
470 /// holding the certificate.
471 SerialNumber,
472 /// Participant Code associated with the CNPJ listed in the Directory
473 /// Service of Open Finance Brasil.
474 OrganizationIdentifier,
475 /// Participant Code associated with the CNPJ listed in the Directory
476 /// Service of Open Finance Brasil.
477 OrganizationalUnitName,
478}
479
480impl RdnType {
481 fn as_of_str(self) -> &'static str {
482 match self {
483 Self::Cn => "CN",
484 Self::L => "L",
485 Self::St => "ST",
486 Self::O => "O",
487 Self::Ou => "OU",
488 Self::C => "C",
489 Self::Street => "Street",
490 Self::Dc => "DC",
491 Self::Uid => "UID",
492 Self::BusinessCategory => "2.5.4.15",
493 Self::JurisdictionCountryName => "1.3.6.1.4.1.311.60.2.1.3",
494 Self::SerialNumber => "2.5.4.5",
495 Self::OrganizationIdentifier => "2.5.4.97",
496 Self::OrganizationalUnitName => "2.5.4.11",
497 }
498 }
499
500 fn of_encodes_as_hex(self) -> bool {
501 matches!(
502 self,
503 Self::BusinessCategory
504 | Self::JurisdictionCountryName
505 | Self::SerialNumber
506 | Self::OrganizationIdentifier
507 | Self::OrganizationalUnitName
508 )
509 }
510
511 fn is_comparison_case_sensitive(self) -> bool {
512 matches!(
513 self,
514 Self::Cn
515 | Self::L
516 | Self::St
517 | Self::O
518 | Self::Ou
519 | Self::C
520 | Self::JurisdictionCountryName
521 | Self::OrganizationalUnitName
522 )
523 }
524}
525
526/// Parse from the canonical string format:
527/// <https://datatracker.ietf.org/doc/html/rfc4514>.
528impl FromStr for RdnType {
529 type Err = Error;
530
531 fn from_str(s: &str) -> Result<Self> {
532 match s.to_lowercase().as_str() {
533 // https://datatracker.ietf.org/doc/html/rfc4519#section-2.3
534 "cn" | "2.5.4.3" => Ok(Self::Cn),
535 // https://datatracker.ietf.org/doc/html/rfc4519#section-2.16
536 "l" | "2.5.4.7" => Ok(Self::L),
537 // https://datatracker.ietf.org/doc/html/rfc4519#section-2.33
538 "st" | "2.5.4.8" => Ok(Self::St),
539 // https://datatracker.ietf.org/doc/html/rfc4519#section-2.19
540 "o" | "2.5.4.10" => Ok(Self::O),
541 // https://datatracker.ietf.org/doc/html/rfc4519#section-2.20
542 "ou" => Ok(Self::Ou),
543 // https://datatracker.ietf.org/doc/html/rfc4519#section-2.2
544 "c" | "2.5.4.6" => Ok(Self::C),
545 // https://datatracker.ietf.org/doc/html/rfc4519#section-2.34
546 "street" | "2.5.4.9" => Ok(Self::Street),
547 // https://datatracker.ietf.org/doc/html/rfc4519#section-2.4
548 "dc" | "0.9.2342.19200300.100.1.25" => Ok(Self::Dc),
549 // https://datatracker.ietf.org/doc/html/rfc4519#section-2.39
550 "uid" | "0.9.2342.19200300.100.1.1" => Ok(Self::Uid),
551 // https://datatracker.ietf.org/doc/html/rfc4519#section-2.1
552 "businesscategory" | "2.5.4.15" => Ok(Self::BusinessCategory),
553 // https://oidref.com/1.3.6.1.4.1.311.60.2.1.3
554 "jurisdictioncountryname" | "jurisdictionc" | "1.3.6.1.4.1.311.60.2.1.3" => {
555 Ok(Self::JurisdictionCountryName)
556 }
557 // https://datatracker.ietf.org/doc/html/rfc4519#section-2.31
558 "serialnumber" | "2.5.4.5" => Ok(Self::SerialNumber),
559 // https://oidref.com/2.5.4.97
560 "organizationidentifier" | "2.5.4.97" => Ok(Self::OrganizationIdentifier),
561 // https://openfinancebrasil.atlassian.net/wiki/spaces/OF/pages/240650099/EN+Padr+o+de+Certificados+Open+Finance+Brasil+2.0#5.2.2.1.-Open-Finance-Brasil-Attributes
562 "organizationalunitname" | "2.5.4.11" => Ok(Self::OrganizationalUnitName),
563 _ => Err(Error::InvalidType(s.to_owned())),
564 }
565 }
566}
567
568// Clean the value of `organizationIdentifier` according to the OF spec.
569//
570// One day the people working on the OpenFinance spec woke up with the most
571// brilliant idea ever: how about we add extra arbitrary complexity for
572// absolutely no reason at all? 'Genius!' they thought. And so in their
573// infinite wisdom they added the following:
574//
575// [...] convert ASN.1 values from OID 2.5.4.97 organizationIdentifier to
576// human readable text [...] retrieve the full value of the OID 2.5.4.97
577// contained in the subject_DN. [...] Apply a filter using regular expression
578// to retrieve the org_id after ('OFBBR-')
579//
580// https://openfinancebrasil.atlassian.net/wiki/spaces/OF/pages/240649661/EN+Open+Finance+Brasil+Financial-grade+API+Dynamic+Client+Registration+1.0+Implementers+Draft+3#7.1.2.-Certificate-Distinguished-Name-Parsing
581//
582// That is, for `organizationIdentifier` ONLY, it is permissible to have any
583// amount of garbage before `OFBBR-`. Luckly we can assume here that this
584// value is lower case and we don't need an actual regex.
585fn extract_organization_id(org_id: &str) -> Result<String> {
586 const PREFIX: &str = "ofbbr-";
587
588 let mut idx = org_id.find(PREFIX).ok_or_else(|| Error::InvalidValue {
589 ty: RdnType::OrganizationIdentifier,
590 value: org_id.to_owned(),
591 })?;
592 idx += PREFIX.len();
593
594 Ok(org_id[idx..].to_owned())
595}