ssdeep/internals/hash/parser_state.rs
1// SPDX-License-Identifier: MIT
2// SPDX-FileCopyrightText: Copyright (C) 2023–2025 Tsukasa OI <floss_ssdeep@irq.a4lg.com>.
3
4//! Internal parser state and its handling.
5
6/// An enumeration representing a cause of a fuzzy hash parse error.
7///
8/// # Compatibility Note
9///
10/// Since the version 0.3, the representation of this enum is no longer
11/// specified as specific representation of this enum is not important.
12#[non_exhaustive]
13#[derive(Debug, Clone, Copy, PartialEq, Eq)]
14pub enum ParseErrorKind {
15 /// Block size: is empty.
16 BlockSizeIsEmpty,
17
18 /// Block size: starts with the digit zero (`'0'`).
19 BlockSizeStartsWithZero,
20
21 /// Block size: is not valid.
22 BlockSizeIsInvalid,
23
24 /// Block size: is too large to parse.
25 BlockSizeIsTooLarge,
26
27 /// Block hash (either 1 or 2): block hash is too long.
28 BlockHashIsTooLong,
29
30 /// Any: an unexpected character is encountered.
31 UnexpectedCharacter,
32
33 /// Any: an unexpected end-of-string is encountered.
34 UnexpectedEndOfString,
35}
36
37impl core::fmt::Display for ParseErrorKind {
38 #[rustfmt::skip]
39 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
40 f.write_str(match self { // grcov-excl-br-line:MATCH_ENUM
41 ParseErrorKind::BlockHashIsTooLong => "block hash is too long",
42 ParseErrorKind::BlockSizeIsEmpty => "block size field is empty",
43 ParseErrorKind::BlockSizeStartsWithZero => "block size starts with '0'",
44 ParseErrorKind::BlockSizeIsInvalid => "block size is not valid",
45 ParseErrorKind::BlockSizeIsTooLarge => "block size is too large",
46 ParseErrorKind::UnexpectedCharacter => "an unexpected character is encountered",
47 ParseErrorKind::UnexpectedEndOfString => "end-of-string is not expected",
48 })
49 }
50}
51
52/// A part which (possibly) caused a fuzzy hash parse error.
53///
54/// See ["Fuzzy Hash Internals" section of `FuzzyHashData`](crate::internals::hash::FuzzyHashData#fuzzy-hash-internals)
55/// for corresponding parts.
56///
57/// Since the parser currently ignores the file name part,
58/// this part is not in this enumeration for now.
59///
60/// # Compatibility Note
61///
62/// On the next major release, the `FileName` variant will be added.
63#[derive(Debug, Clone, Copy, PartialEq, Eq)]
64pub enum ParseErrorOrigin {
65 /// Block size.
66 BlockSize,
67
68 /// Block hash 1.
69 BlockHash1,
70
71 /// Block hash 2.
72 BlockHash2,
73}
74
75impl core::fmt::Display for ParseErrorOrigin {
76 #[rustfmt::skip]
77 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
78 f.write_str(match self { // grcov-excl-br-line:MATCH_ENUM
79 ParseErrorOrigin::BlockSize => "block size",
80 ParseErrorOrigin::BlockHash1 => "block hash 1",
81 ParseErrorOrigin::BlockHash2 => "block hash 2",
82 })
83 }
84}
85
86// grcov-excl-br-start:STRUCT_MEMBER
87
88/// The error type for parse operations of a fuzzy hash.
89///
90/// See also: [`ParseErrorInfo`]
91#[derive(Debug, Clone, Copy, PartialEq, Eq)]
92pub struct ParseError(
93 // Use pub(crate) to enable direct initialization.
94 pub(crate) ParseErrorKind,
95 pub(crate) ParseErrorOrigin,
96 pub(crate) usize,
97);
98
99// grcov-excl-br-stop
100
101/// The trait implementing a fuzzy hash parse error.
102pub trait ParseErrorInfo {
103 /// Returns the cause of the error.
104 fn kind(&self) -> ParseErrorKind;
105
106 /// Returns the part which (possibly) caused the error.
107 fn origin(&self) -> ParseErrorOrigin;
108
109 /// Returns the offset which (possibly) caused the error.
110 ///
111 /// Note that this offset may not be exact but may be usable as a hint.
112 fn offset(&self) -> usize;
113}
114
115impl ParseErrorInfo for ParseError {
116 fn kind(&self) -> ParseErrorKind {
117 self.0
118 }
119 fn origin(&self) -> ParseErrorOrigin {
120 self.1
121 }
122 fn offset(&self) -> usize {
123 self.2
124 }
125}
126
127impl core::fmt::Display for ParseError {
128 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
129 write!(
130 f,
131 "error occurred while parsing a fuzzy hash ({1}, at byte offset {2}): {0}",
132 self.kind(),
133 self.origin(),
134 self.offset()
135 )
136 }
137}
138
139crate::internals::macros::impl_error!(ParseError {});
140
141/// A type which represents a state after parsing a block hash.
142///
143/// Note that while some of them always represent one of error conditions,
144/// some are valid depending on the context.
145#[derive(Debug, Clone, Copy, PartialEq, Eq)]
146pub(crate) enum BlockHashParseState {
147 /// The end of the string is encountered.
148 MetEndOfString,
149
150 /// A comma character (`,`) is encountered.
151 MetComma,
152
153 /// A colon character (`:`) is encountered.
154 MetColon,
155
156 /// A block hash is too long so that would cause an overflow.
157 OverflowError,
158
159 /// An invalid Base64 alphabet (or just an unexpected character) is encountered.
160 Base64Error,
161}
162
163pub(crate) mod tests;