Skip to main content

uuid/
parser.rs

1// Copyright 2013-2014 The Rust Project Developers.
2// Copyright 2018 The Uuid Project Developers.
3//
4// See the COPYRIGHT file at the top-level directory of this distribution.
5//
6// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
7// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
8// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
9// option. This file may not be copied, modified, or distributed
10// except according to those terms.
11
12//! [`Uuid`] parsing constructs and utilities.
13//!
14//! [`Uuid`]: ../struct.Uuid.html
15
16use crate::{
17    error::*,
18    std::{convert::TryFrom, str},
19    Uuid,
20};
21
22#[cfg(feature = "std")]
23use crate::std::string::String;
24
25impl str::FromStr for Uuid {
26    type Err = Error;
27
28    fn from_str(uuid_str: &str) -> Result<Self, Self::Err> {
29        Uuid::parse_str(uuid_str)
30    }
31}
32
33impl TryFrom<&'_ str> for Uuid {
34    type Error = Error;
35
36    fn try_from(uuid_str: &'_ str) -> Result<Self, Self::Error> {
37        Uuid::parse_str(uuid_str)
38    }
39}
40
41#[cfg(feature = "std")]
42impl TryFrom<String> for Uuid {
43    type Error = Error;
44
45    fn try_from(uuid_str: String) -> Result<Self, Self::Error> {
46        Uuid::try_from(uuid_str.as_ref())
47    }
48}
49
50impl Uuid {
51    /// Parses a `Uuid` from a string of hexadecimal digits with optional
52    /// hyphens.
53    ///
54    /// Any of the formats generated by this module (simple, hyphenated, urn,
55    /// Microsoft GUID) are supported by this parsing function.
56    ///
57    /// Prefer [`try_parse`] unless you need detailed user-facing diagnostics.
58    /// This method will be eventually deprecated in favor of `try_parse`.
59    ///
60    /// # Examples
61    ///
62    /// Parse a hyphenated UUID:
63    ///
64    /// ```
65    /// # use uuid::{Uuid, Version, Variant};
66    /// # fn main() -> Result<(), uuid::Error> {
67    /// let uuid = Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000")?;
68    ///
69    /// assert_eq!(Some(Version::Random), uuid.get_version());
70    /// assert_eq!(Variant::RFC4122, uuid.get_variant());
71    /// # Ok(())
72    /// # }
73    /// ```
74    ///
75    /// [`try_parse`]: #method.try_parse
76    pub fn parse_str(input: &str) -> Result<Uuid, Error> {
77        try_parse(input.as_bytes())
78            .map(Uuid::from_bytes)
79            .map_err(InvalidUuid::into_err)
80    }
81
82    /// Parses a `Uuid` from a string of hexadecimal digits with optional
83    /// hyphens.
84    ///
85    /// This function is similar to [`parse_str`], in fact `parse_str` shares
86    /// the same underlying parser. The difference is that if `try_parse`
87    /// fails, it won't generate very useful error messages. The `parse_str`
88    /// function will eventually be deprecated in favor of `try_parse`.
89    ///
90    /// To parse a UUID from a byte stream instead of a UTF8 string, see
91    /// [`try_parse_ascii`].
92    ///
93    /// # Examples
94    ///
95    /// Parse a hyphenated UUID:
96    ///
97    /// ```
98    /// # use uuid::{Uuid, Version, Variant};
99    /// # fn main() -> Result<(), uuid::Error> {
100    /// let uuid = Uuid::try_parse("550e8400-e29b-41d4-a716-446655440000")?;
101    ///
102    /// assert_eq!(Some(Version::Random), uuid.get_version());
103    /// assert_eq!(Variant::RFC4122, uuid.get_variant());
104    /// # Ok(())
105    /// # }
106    /// ```
107    ///
108    /// [`parse_str`]: #method.parse_str
109    /// [`try_parse_ascii`]: #method.try_parse_ascii
110    pub const fn try_parse(input: &str) -> Result<Uuid, Error> {
111        Self::try_parse_ascii(input.as_bytes())
112    }
113
114    /// Parses a `Uuid` from a string of hexadecimal digits with optional
115    /// hyphens.
116    ///
117    /// The input is expected to be a string of ASCII characters. This method
118    /// can be more convenient than [`try_parse`] if the UUID is being
119    /// parsed from a byte stream instead of from a UTF8 string.
120    ///
121    /// # Examples
122    ///
123    /// Parse a hyphenated UUID:
124    ///
125    /// ```
126    /// # use uuid::{Uuid, Version, Variant};
127    /// # fn main() -> Result<(), uuid::Error> {
128    /// let uuid = Uuid::try_parse_ascii(b"550e8400-e29b-41d4-a716-446655440000")?;
129    ///
130    /// assert_eq!(Some(Version::Random), uuid.get_version());
131    /// assert_eq!(Variant::RFC4122, uuid.get_variant());
132    /// # Ok(())
133    /// # }
134    /// ```
135    ///
136    /// [`try_parse`]: #method.try_parse
137    pub const fn try_parse_ascii(input: &[u8]) -> Result<Uuid, Error> {
138        match try_parse(input) {
139            Ok(bytes) => Ok(Uuid::from_bytes(bytes)),
140            // If parsing fails then we don't know exactly what went wrong
141            // In this case, we just return a generic error
142            Err(_) => Err(Error(ErrorKind::ParseOther)),
143        }
144    }
145}
146
147const fn try_parse(input: &'_ [u8]) -> Result<[u8; 16], InvalidUuid<'_>> {
148    match (input.len(), input) {
149        // Inputs of 32 bytes must be a non-hyphenated UUID
150        (32, s) => parse_simple(s, true),
151        // Hyphenated UUIDs may be wrapped in various ways:
152        // - `{UUID}` for braced UUIDs
153        // - `urn:uuid:UUID` for URNs
154        // - `UUID` for a regular hyphenated UUID
155        (36, s)
156        | (38, [b'{', s @ .., b'}'])
157        | (45, [b'u', b'r', b'n', b':', b'u', b'u', b'i', b'd', b':', s @ ..]) => {
158            parse_hyphenated(s)
159        }
160        // Any other shaped input is immediately invalid
161        _ => Err(InvalidUuid(input, RequestedUuid::Any)),
162    }
163}
164
165#[inline]
166#[allow(dead_code)]
167pub(crate) const fn parse_braced(input: &'_ [u8]) -> Result<[u8; 16], InvalidUuid<'_>> {
168    if let (38, [b'{', s @ .., b'}']) = (input.len(), input) {
169        parse_hyphenated(s)
170    } else {
171        Err(InvalidUuid(input, RequestedUuid::Braced))
172    }
173}
174
175#[inline]
176#[allow(dead_code)]
177pub(crate) const fn parse_urn(input: &'_ [u8]) -> Result<[u8; 16], InvalidUuid<'_>> {
178    if let (45, [b'u', b'r', b'n', b':', b'u', b'u', b'i', b'd', b':', s @ ..]) =
179        (input.len(), input)
180    {
181        parse_hyphenated(s)
182    } else {
183        Err(InvalidUuid(input, RequestedUuid::Urn))
184    }
185}
186
187#[inline]
188pub(crate) const fn parse_simple(
189    s: &'_ [u8],
190    speculative: bool,
191) -> Result<[u8; 16], InvalidUuid<'_>> {
192    // This length check here removes all other bounds
193    // checks in this function
194    if s.len() != 32 {
195        return Err(InvalidUuid(
196            s,
197            if speculative {
198                RequestedUuid::Any
199            } else {
200                RequestedUuid::Simple
201            },
202        ));
203    }
204
205    let mut buf: [u8; 16] = [0; 16];
206    let mut i = 0;
207
208    while i < 16 {
209        // Convert a two-char hex value (like `A8`)
210        // into a byte (like `10101000`)
211        let h1 = HEX_TABLE[s[i * 2] as usize];
212        let h2 = HEX_TABLE[s[i * 2 + 1] as usize];
213
214        // We use `0xff` as a sentinel value to indicate
215        // an invalid hex character sequence (like the letter `G`)
216        if h1 | h2 == 0xff {
217            return Err(InvalidUuid(
218                s,
219                if speculative {
220                    RequestedUuid::Any
221                } else {
222                    RequestedUuid::Simple
223                },
224            ));
225        }
226
227        // The upper nibble needs to be shifted into position
228        // to produce the final byte value
229        buf[i] = SHL4_TABLE[h1 as usize] | h2;
230        i += 1;
231    }
232
233    Ok(buf)
234}
235
236#[inline]
237pub(crate) const fn parse_hyphenated(s: &'_ [u8]) -> Result<[u8; 16], InvalidUuid<'_>> {
238    // This length check here removes all other bounds
239    // checks in this function
240    if s.len() != 36 {
241        return Err(InvalidUuid(s, RequestedUuid::Hyphenated));
242    }
243
244    // We look at two hex-encoded values (4 chars) at a time because
245    // that's the size of the smallest group in a hyphenated UUID.
246    // The indexes we're interested in are:
247    //
248    // uuid     : 936da01f-9abd-4d9d-80c7-02af85c822a8
249    //            |   |   ||   ||   ||   ||   |   |
250    // hyphens  : |   |   8|  13|  18|  23|   |   |
251    // positions: 0   4    9   14   19   24  28  32
252
253    // First, ensure the hyphens appear in the right places
254    match [s[8], s[13], s[18], s[23]] {
255        [b'-', b'-', b'-', b'-'] => {}
256        _ => return Err(InvalidUuid(s, RequestedUuid::Hyphenated)),
257    }
258
259    let positions: [u8; 8] = [0, 4, 9, 14, 19, 24, 28, 32];
260    let mut buf: [u8; 16] = [0; 16];
261    let mut j = 0;
262
263    while j < 8 {
264        let i = positions[j];
265
266        // The decoding here is the same as the simple case
267        // We're just dealing with two values instead of one
268        let h1 = HEX_TABLE[s[i as usize] as usize];
269        let h2 = HEX_TABLE[s[(i + 1) as usize] as usize];
270        let h3 = HEX_TABLE[s[(i + 2) as usize] as usize];
271        let h4 = HEX_TABLE[s[(i + 3) as usize] as usize];
272
273        if h1 | h2 | h3 | h4 == 0xff {
274            return Err(InvalidUuid(s, RequestedUuid::Hyphenated));
275        }
276
277        buf[j * 2] = SHL4_TABLE[h1 as usize] | h2;
278        buf[j * 2 + 1] = SHL4_TABLE[h3 as usize] | h4;
279        j += 1;
280    }
281
282    Ok(buf)
283}
284
285const HEX_TABLE: &[u8; 256] = &{
286    let mut buf = [0; 256];
287    let mut i: u8 = 0;
288
289    loop {
290        buf[i as usize] = match i {
291            b'0'..=b'9' => i - b'0',
292            b'a'..=b'f' => i - b'a' + 10,
293            b'A'..=b'F' => i - b'A' + 10,
294            _ => 0xff,
295        };
296
297        if i == 255 {
298            break buf;
299        }
300
301        i += 1
302    }
303};
304
305const SHL4_TABLE: &[u8; 256] = &{
306    let mut buf = [0; 256];
307    let mut i: u8 = 0;
308
309    loop {
310        buf[i as usize] = i.wrapping_shl(4);
311
312        if i == 255 {
313            break buf;
314        }
315
316        i += 1;
317    }
318};
319
320#[cfg(test)]
321mod tests {
322    use super::*;
323    use crate::{
324        fmt::*,
325        std::{str::FromStr, string::ToString},
326        tests::some_uuid_iter,
327    };
328
329    #[test]
330    fn test_parse_valid() {
331        let from_hyphenated = Uuid::parse_str("67e55044-10b1-426f-9247-bb680e5fe0c8").unwrap();
332        let from_simple = Uuid::parse_str("67e5504410b1426f9247bb680e5fe0c8").unwrap();
333        let from_urn = Uuid::parse_str("urn:uuid:67e55044-10b1-426f-9247-bb680e5fe0c8").unwrap();
334        let from_guid = Uuid::parse_str("{67e55044-10b1-426f-9247-bb680e5fe0c8}").unwrap();
335
336        assert_eq!(from_hyphenated, from_simple);
337        assert_eq!(from_hyphenated, from_urn);
338        assert_eq!(from_hyphenated, from_guid);
339
340        assert!(Uuid::parse_str("00000000000000000000000000000000").is_ok());
341        assert!(Uuid::parse_str("67e55044-10b1-426f-9247-bb680e5fe0c8").is_ok());
342        assert!(Uuid::parse_str("F9168C5E-CEB2-4faa-B6BF-329BF39FA1E4").is_ok());
343        assert!(Uuid::parse_str("67e5504410b1426f9247bb680e5fe0c8").is_ok());
344        assert!(Uuid::parse_str("01020304-1112-2122-3132-414243444546").is_ok());
345        assert!(Uuid::parse_str("urn:uuid:67e55044-10b1-426f-9247-bb680e5fe0c8").is_ok());
346        assert!(Uuid::parse_str("{6d93bade-bd9f-4e13-8914-9474e1e3567b}").is_ok());
347
348        // Nil
349        let nil = Uuid::nil();
350        assert_eq!(
351            Uuid::parse_str("00000000000000000000000000000000").unwrap(),
352            nil
353        );
354        assert_eq!(
355            Uuid::parse_str("00000000-0000-0000-0000-000000000000").unwrap(),
356            nil
357        );
358    }
359
360    #[test]
361    fn test_parse_invalid() {
362        // Invalid
363        assert_eq!(
364            Uuid::parse_str(""),
365            Err(Error(ErrorKind::ParseLength { len: 0 }))
366        );
367
368        assert_eq!(
369            Uuid::parse_str("{}"),
370            Err(Error(ErrorKind::ParseGroupCount { count: 1 }))
371        );
372
373        assert_eq!(
374            Uuid::parse_str("!"),
375            Err(Error(ErrorKind::ParseChar {
376                character: '!',
377                index: 0,
378            }))
379        );
380
381        assert_eq!(
382            Uuid::parse_str("F9168C5E-CEB2-4faa-B6BF-329BF39FA1E45"),
383            Err(Error(ErrorKind::ParseGroupLength {
384                group: 4,
385                len: 13,
386                index: 25,
387            }))
388        );
389
390        assert_eq!(
391            Uuid::parse_str("F9168C5E-CEB2-4faa-BBF-329BF39FA1E4"),
392            Err(Error(ErrorKind::ParseGroupLength {
393                group: 3,
394                len: 3,
395                index: 20,
396            }))
397        );
398
399        assert_eq!(
400            Uuid::parse_str("F9168C5E-CEB2-4faa-BGBF-329BF39FA1E4"),
401            Err(Error(ErrorKind::ParseChar {
402                character: 'G',
403                index: 20,
404            }))
405        );
406
407        assert_eq!(
408            Uuid::parse_str("F9168C5E-CEB2F4faaFB6BFF329BF39FA1E4"),
409            Err(Error(ErrorKind::ParseGroupCount { count: 2 }))
410        );
411
412        assert_eq!(
413            Uuid::parse_str("F9168C5E-CEB2-4faaFB6BFF329BF39FA1E4"),
414            Err(Error(ErrorKind::ParseGroupCount { count: 3 }))
415        );
416
417        assert_eq!(
418            Uuid::parse_str("F9168C5E-CEB2-4faa-B6BFF329BF39FA1E4"),
419            Err(Error(ErrorKind::ParseGroupCount { count: 4 }))
420        );
421
422        assert_eq!(
423            Uuid::parse_str("F9168C5E-CEB2-4faa"),
424            Err(Error(ErrorKind::ParseGroupCount { count: 3 }))
425        );
426
427        assert_eq!(
428            Uuid::parse_str("F9168C5E-CEB2-4faaXB6BFF329BF39FA1E4"),
429            Err(Error(ErrorKind::ParseChar {
430                character: 'X',
431                index: 18,
432            }))
433        );
434
435        assert_eq!(
436            Uuid::parse_str("{F9168C5E-CEB2-4faa9B6BFF329BF39FA1E41"),
437            Err(Error(ErrorKind::ParseChar {
438                character: '{',
439                index: 0,
440            }))
441        );
442
443        assert_eq!(
444            Uuid::parse_str("{F9168C5E-CEB2-4faa9B6BFF329BF39FA1E41}"),
445            Err(Error(ErrorKind::ParseGroupCount { count: 3 }))
446        );
447
448        assert_eq!(
449            Uuid::parse_str("F9168C5E-CEB-24fa-eB6BFF32-BF39FA1E4"),
450            Err(Error(ErrorKind::ParseGroupLength {
451                group: 1,
452                len: 3,
453                index: 10,
454            }))
455        );
456
457        // // (group, found, expecting)
458        assert_eq!(
459            Uuid::parse_str("01020304-1112-2122-3132-41424344"),
460            Err(Error(ErrorKind::ParseGroupLength {
461                group: 4,
462                len: 8,
463                index: 25,
464            }))
465        );
466
467        assert_eq!(
468            Uuid::parse_str("67e5504410b1426f9247bb680e5fe0c"),
469            Err(Error(ErrorKind::ParseLength { len: 31 }))
470        );
471
472        assert_eq!(
473            Uuid::parse_str("67e5504410b1426f9247bb680e5fe0c88"),
474            Err(Error(ErrorKind::ParseLength { len: 33 }))
475        );
476
477        assert_eq!(
478            Uuid::parse_str("67e5504410b1426f9247bb680e5fe0cg8"),
479            Err(Error(ErrorKind::ParseChar {
480                character: 'g',
481                index: 31,
482            }))
483        );
484
485        assert_eq!(
486            Uuid::parse_str("67e5504410b1426%9247bb680e5fe0c8"),
487            Err(Error(ErrorKind::ParseChar {
488                character: '%',
489                index: 15,
490            }))
491        );
492
493        assert_eq!(
494            Uuid::parse_str("231231212212423424324323477343246663"),
495            Err(Error(ErrorKind::ParseGroupCount { count: 1 }))
496        );
497
498        assert_eq!(
499            Uuid::parse_str("{00000000000000000000000000000000}"),
500            Err(Error(ErrorKind::ParseGroupCount { count: 1 }))
501        );
502
503        assert_eq!(
504            Uuid::parse_str("67e5504410b1426f9247bb680e5fe0c"),
505            Err(Error(ErrorKind::ParseLength { len: 31 }))
506        );
507
508        assert_eq!(
509            Uuid::parse_str("67e550X410b1426f9247bb680e5fe0cd"),
510            Err(Error(ErrorKind::ParseChar {
511                character: 'X',
512                index: 6,
513            }))
514        );
515
516        assert_eq!(
517            Uuid::parse_str("67e550-4105b1426f9247bb680e5fe0c"),
518            Err(Error(ErrorKind::ParseGroupCount { count: 2 }))
519        );
520
521        assert_eq!(
522            Uuid::parse_str("F9168C5E-CEB2-4faa-B6BF1-02BF39FA1E4"),
523            Err(Error(ErrorKind::ParseGroupLength {
524                group: 3,
525                len: 5,
526                index: 20,
527            }))
528        );
529
530        assert_eq!(
531            Uuid::parse_str("\u{bcf3c}"),
532            Err(Error(ErrorKind::ParseChar {
533                character: '\u{bcf3c}',
534                index: 0,
535            }))
536        );
537
538        assert_eq!(
539            Err(Error(ErrorKind::ParseLength { len: 0 })),
540            Hyphenated::from_str("")
541        );
542
543        assert_eq!(
544            Err(Error(ErrorKind::ParseGroupCount { count: 1 })),
545            Hyphenated::from_str("550e8400e29b41d4a716446655440000")
546        );
547
548        assert_eq!(
549            Err(Error(ErrorKind::ParseChar {
550                character: '-',
551                index: 8
552            })),
553            Simple::from_str("550e8400-e29b-41d4-a716-446655440000")
554        );
555
556        assert_eq!(
557            Err(Error(ErrorKind::ParseChar {
558                character: '5',
559                index: 0
560            })),
561            Urn::from_str("550e8400-e29b-41d4-a716-446655440000")
562        );
563        assert_eq!(
564            Err(Error(ErrorKind::ParseChar {
565                character: ':',
566                index: 0
567            })),
568            Urn::from_str(":550e8400-e29b-41d4-a716-446655440000")
569        );
570
571        assert_eq!(
572            Err(Error(ErrorKind::ParseChar {
573                character: '5',
574                index: 0
575            })),
576            Braced::from_str("550e8400-e29b-41d4-a716-446655440000")
577        );
578        assert_eq!(
579            Err(Error(ErrorKind::ParseChar {
580                character: '{',
581                index: 1
582            })),
583            Braced::from_str("{{550e8400-e29b-41d4-a716-446655440000}}")
584        );
585
586        // Unicode
587        assert_eq!(
588            Uuid::from_str("{6e0----------9=4O-0e5\u{14}e0c4\u{ec2f}8}"),
589            Err(Error(ErrorKind::ParseChar {
590                character: '=',
591                index: 15,
592            }))
593        );
594
595        assert_eq!(
596            Uuid::from_str("urn:uuid:urae0c8"),
597            Err(Error(ErrorKind::ParseChar {
598                character: 'u',
599                index: 9,
600            }))
601        );
602    }
603
604    #[test]
605    fn test_roundtrip_default() {
606        for uuid_orig in some_uuid_iter() {
607            let orig_str = uuid_orig.to_string();
608            let uuid_out = Uuid::parse_str(&orig_str).unwrap();
609            assert_eq!(uuid_orig, uuid_out);
610        }
611    }
612
613    #[test]
614    fn test_roundtrip_hyphenated() {
615        for uuid_orig in some_uuid_iter() {
616            let orig_str = uuid_orig.hyphenated().to_string();
617            let uuid_out = Uuid::parse_str(&orig_str).unwrap();
618            assert_eq!(uuid_orig, uuid_out);
619        }
620    }
621
622    #[test]
623    fn test_roundtrip_simple() {
624        for uuid_orig in some_uuid_iter() {
625            let orig_str = uuid_orig.simple().to_string();
626            let uuid_out = Uuid::parse_str(&orig_str).unwrap();
627            assert_eq!(uuid_orig, uuid_out);
628        }
629    }
630
631    #[test]
632    fn test_roundtrip_urn() {
633        for uuid_orig in some_uuid_iter() {
634            let orig_str = uuid_orig.urn().to_string();
635            let uuid_out = Uuid::parse_str(&orig_str).unwrap();
636            assert_eq!(uuid_orig, uuid_out);
637        }
638    }
639
640    #[test]
641    fn test_roundtrip_braced() {
642        for uuid_orig in some_uuid_iter() {
643            let orig_str = uuid_orig.braced().to_string();
644            let uuid_out = Uuid::parse_str(&orig_str).unwrap();
645            assert_eq!(uuid_orig, uuid_out);
646        }
647    }
648
649    #[test]
650    fn test_roundtrip_parse_urn() {
651        for uuid_orig in some_uuid_iter() {
652            let orig_str = uuid_orig.urn().to_string();
653            let uuid_out = Uuid::from_bytes(parse_urn(orig_str.as_bytes()).unwrap());
654            assert_eq!(uuid_orig, uuid_out);
655        }
656    }
657
658    #[test]
659    fn test_roundtrip_parse_braced() {
660        for uuid_orig in some_uuid_iter() {
661            let orig_str = uuid_orig.braced().to_string();
662            let uuid_out = Uuid::from_bytes(parse_braced(orig_str.as_bytes()).unwrap());
663            assert_eq!(uuid_orig, uuid_out);
664        }
665    }
666
667    #[test]
668    fn test_try_parse_ascii_non_utf8() {
669        assert!(Uuid::try_parse_ascii(b"67e55044-10b1-426f-9247-bb680e5\0e0c8").is_err());
670    }
671}