ssdeep/internals/hash/
parser_state.rs

1// SPDX-License-Identifier: MIT
2// SPDX-FileCopyrightText: Copyright (C) 2023–2025 Tsukasa OI <floss_ssdeep@irq.a4lg.com>.
3
4//! Internal parser state and its handling.
5
6/// An enumeration representing a cause of a fuzzy hash parse error.
7///
8/// # Compatibility Note
9///
10/// Since the version 0.3, the representation of this enum is no longer
11/// specified as specific representation of this enum is not important.
12#[non_exhaustive]
13#[derive(Debug, Clone, Copy, PartialEq, Eq)]
14pub enum ParseErrorKind {
15    /// Block size: is empty.
16    BlockSizeIsEmpty,
17
18    /// Block size: starts with the digit zero (`'0'`).
19    BlockSizeStartsWithZero,
20
21    /// Block size: is not valid.
22    BlockSizeIsInvalid,
23
24    /// Block size: is too large to parse.
25    BlockSizeIsTooLarge,
26
27    /// Block hash (either 1 or 2): block hash is too long.
28    BlockHashIsTooLong,
29
30    /// Any: an unexpected character is encountered.
31    UnexpectedCharacter,
32
33    /// Any: an unexpected end-of-string is encountered.
34    UnexpectedEndOfString,
35}
36
37impl core::fmt::Display for ParseErrorKind {
38    #[rustfmt::skip]
39    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
40        f.write_str(match self { // grcov-excl-br-line:MATCH_ENUM
41            ParseErrorKind::BlockHashIsTooLong      => "block hash is too long",
42            ParseErrorKind::BlockSizeIsEmpty        => "block size field is empty",
43            ParseErrorKind::BlockSizeStartsWithZero => "block size starts with '0'",
44            ParseErrorKind::BlockSizeIsInvalid      => "block size is not valid",
45            ParseErrorKind::BlockSizeIsTooLarge     => "block size is too large",
46            ParseErrorKind::UnexpectedCharacter     => "an unexpected character is encountered",
47            ParseErrorKind::UnexpectedEndOfString   => "end-of-string is not expected",
48        })
49    }
50}
51
52/// A part which (possibly) caused a fuzzy hash parse error.
53///
54/// See ["Fuzzy Hash Internals" section of `FuzzyHashData`](crate::internals::hash::FuzzyHashData#fuzzy-hash-internals)
55/// for corresponding parts.
56///
57/// Since the parser currently ignores the file name part,
58/// this part is not in this enumeration for now.
59///
60/// # Compatibility Note
61///
62/// On the next major release, the `FileName` variant will be added.
63#[derive(Debug, Clone, Copy, PartialEq, Eq)]
64pub enum ParseErrorOrigin {
65    /// Block size.
66    BlockSize,
67
68    /// Block hash 1.
69    BlockHash1,
70
71    /// Block hash 2.
72    BlockHash2,
73}
74
75impl core::fmt::Display for ParseErrorOrigin {
76    #[rustfmt::skip]
77    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
78        f.write_str(match self { // grcov-excl-br-line:MATCH_ENUM
79            ParseErrorOrigin::BlockSize  => "block size",
80            ParseErrorOrigin::BlockHash1 => "block hash 1",
81            ParseErrorOrigin::BlockHash2 => "block hash 2",
82        })
83    }
84}
85
86// grcov-excl-br-start:STRUCT_MEMBER
87
88/// The error type for parse operations of a fuzzy hash.
89///
90/// See also: [`ParseErrorInfo`]
91#[derive(Debug, Clone, Copy, PartialEq, Eq)]
92pub struct ParseError(
93    // Use pub(crate) to enable direct initialization.
94    pub(crate) ParseErrorKind,
95    pub(crate) ParseErrorOrigin,
96    pub(crate) usize,
97);
98
99// grcov-excl-br-stop
100
101/// The trait implementing a fuzzy hash parse error.
102pub trait ParseErrorInfo {
103    /// Returns the cause of the error.
104    fn kind(&self) -> ParseErrorKind;
105
106    /// Returns the part which (possibly) caused the error.
107    fn origin(&self) -> ParseErrorOrigin;
108
109    /// Returns the offset which (possibly) caused the error.
110    ///
111    /// Note that this offset may not be exact but may be usable as a hint.
112    fn offset(&self) -> usize;
113}
114
115impl ParseErrorInfo for ParseError {
116    fn kind(&self) -> ParseErrorKind {
117        self.0
118    }
119    fn origin(&self) -> ParseErrorOrigin {
120        self.1
121    }
122    fn offset(&self) -> usize {
123        self.2
124    }
125}
126
127impl core::fmt::Display for ParseError {
128    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
129        write!(
130            f,
131            "error occurred while parsing a fuzzy hash ({1}, at byte offset {2}): {0}",
132            self.kind(),
133            self.origin(),
134            self.offset()
135        )
136    }
137}
138
139crate::internals::macros::impl_error!(ParseError {});
140
141/// A type which represents a state after parsing a block hash.
142///
143/// Note that while some of them always represent one of error conditions,
144/// some are valid depending on the context.
145#[derive(Debug, Clone, Copy, PartialEq, Eq)]
146pub(crate) enum BlockHashParseState {
147    /// The end of the string is encountered.
148    MetEndOfString,
149
150    /// A comma character (`,`) is encountered.
151    MetComma,
152
153    /// A colon character (`:`) is encountered.
154    MetColon,
155
156    /// A block hash is too long so that would cause an overflow.
157    OverflowError,
158
159    /// An invalid Base64 alphabet (or just an unexpected character) is encountered.
160    Base64Error,
161}
162
163pub(crate) mod tests;