1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253
use base64::prelude::*;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
/// A collection of errors that can occur when interacting
/// with the Arachnid Shield API.
#[derive(thiserror::Error, Debug)]
pub enum ArachnidShieldError {
/// Represents any failed but complete interaction with the Arachnid Shield API.
/// This usually indicates either the provided media was unsupported,
/// or the API user credentials could not be validated.
///
/// To set up an account, visit [Arachnid Shield]'s [contact us] page and
/// get in touch with us.
///
/// [Arachnid Shield]: https://projectarachnid.ca/en/#shield
/// [contact us]: https://projectarachnid.ca/en/contact/
#[error("Arachnid Shield API request failed: {0}")]
APIError(ErrorDetail),
/// Represents anything that could go wrong while initiating
/// a request to the Arachnid Shield API.
#[error(transparent)]
ReqwestFailed(#[from] reqwest::Error),
/// Represents the mime type we intend to send to Arachnid Shield
/// could not be sent because it is not a valid http header.
#[error("Provided mime type: {0} is not a valid http header")]
BadMimeType(String),
/// Represents anything that can go wrong while reading media
/// from a file.
#[error(transparent)]
IOError(#[from] std::io::Error),
/// Represents that the mime type could not be recognized
/// when processing a file.
#[error("Could not identify mime type for file: {0}")]
FailedToRecognizeMimeType(String),
}
/// A match object representing the image in our database that has the
/// same cryptographic hash as the scanned image.
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub struct MatchDetails {
/// The base32 representation of the SHA1 hash of the media.
pub sha1_base32: String,
/// The hexadecimal representation of the SHA256 hash of the media.
pub sha256_hex: String,
/// The classification category for this media, if any.
pub classification: Option<MediaClassification>,
/// The numeric distance between the two images.
/// A distance below 5000 represents a close match; below 2000 is very close.
pub distance: usize,
}
/// A record of a near match (based on perceptual hashing) to a known image in our database.
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub struct NearMatchDetails {
/// The time, in seconds, in the submitted video file where the match was found.
/// For still images this will be 0.
pub timestamp: f64,
/// The base-32 representation of the SHA1 cryptographic hash of the media in our database.
pub sha1_base32: String,
/// The base-16 (hexadecimal) representation of the SHA256 cryptographic hash of the media in our database.
pub sha256_hex: String,
/// The classification of the media in our database.
pub classification: Option<MediaClassification>,
}
/// A record of a media (+ metadata) that has been scanned by the Arachnid Shield API
/// and potential any visual or cryptographic matches attached to it.
#[derive(Debug, Deserialize, Serialize)]
#[serde(rename_all = "snake_case")]
pub struct ScannedMedia {
/// The base-32 representation of the SHA1 cryptographic hash of the media.
pub sha1_base32: String,
/// The base-16 (hexadecimal) representation of the SHA256 cryptographic hash of the media.
pub sha256_hex: String,
/// The total size, in bytes, of the media that was scanned.
pub size_bytes: usize,
/// The classification assigned to this media.
pub classification: Option<MediaClassification>,
/// The technology that was used to verify a match between two media.
pub match_type: Option<MatchType>,
/// A record of a near match (based on perceptual hashing) to a known image in our database.
pub near_match_details: Vec<NearMatchDetails>,
}
impl ScannedMedia {
/// Determine whether the scanned media has known matches.
#[inline(always)]
pub fn no_known_match(&self) -> bool {
matches!(self.classification, Some(MediaClassification::NoKnownMatch))
}
/// Determine whether the scanned media matches any known media.
#[inline(always)]
pub fn matches_known_media(&self) -> bool {
self.classification
.is_some_and(|classification| classification != MediaClassification::NoKnownMatch)
}
}
/// A record of a match for PDQ hash that has been scanned by the Arachnid Shield API
#[derive(Debug, Deserialize, Serialize)]
#[serde(rename_all = "snake_case")]
pub struct ScannedPdqHash {
/// The classification assigned to this media.
pub classification: MediaClassification,
/// The technology that was used to verify a match between two media.
pub match_type: Option<MatchType>,
/// A record of a near match (based on perceptual hashing) to a known image in our database.
pub near_match_details: Option<NearMatchDetails>,
}
impl ScannedPdqHash {
/// Determine whether the scanned PDQ hash matches any known media.
#[inline(always)]
pub fn matches_known_media(&self) -> bool {
self.classification != MediaClassification::NoKnownMatch
}
}
/// A record of the matches for PDQ hashes that have been scanned by the Arachnid Shield API.
#[derive(Deserialize, Serialize, Debug)]
pub struct ScannedPdqHashes {
/// A collection of the match details for scanned PDQ hashes.
pub scanned_hashes: HashMap<String, ScannedPdqHash>,
}
/// A disjoint union of categories that a media
/// could be classified as.
/// A list of the possible categories that an image or video could be classified as.
///
/// ### Note
///
/// Video files are classified based on their frames. So, if any frame from a video matches a known `csam` image, the video will be classified as `csam`.
/// Similarly, if any frame matches a `harmful-abusive-material` image, the video will be classified as `harmful-abusive-material`.
/// If both `csam` and `harmful-abusive-material` frames are matched in a single video, the classification `csam` will be returned.
///
/// More classification types may be added in the future.
#[derive(Debug, Copy, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "kebab-case")]
#[non_exhaustive]
pub enum MediaClassification {
/// Child sexual abuse material, also known as "child pornography".
#[serde(rename = "csam")]
CSAM,
/// Content considered harmful to children includes all images or videos associated with the abusive incident, nude or partially nude images or videos of children that have become publicly available and are used in a sexualized context or connected to sexual commentary.
HarmfulAbusiveMaterial,
/// The media was not an exact match or near match to any classified CSAM or harmful/abusive material in our database.
NoKnownMatch,
}
/// The technology that was used to verify a match between two media.
/// This indicates whether the submitted media matched media in our database exactly (by cryptographic hash) or visually (by visual hash).
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
#[non_exhaustive]
pub enum MatchType {
/// An exact cryptographic hash match using SHA1
Exact,
/// A visual near-match using PhotoDNA
Near,
}
/// A representation of a request to scan media from a url.
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub struct ScanMediaFromUrl {
/// The url to get the media from.
url: String,
}
impl ScanMediaFromUrl {
pub fn new(url: &str) -> Self {
Self {
url: url.to_string(),
}
}
pub(crate) fn body(&self) -> Vec<u8> {
serde_json::to_vec(&self).expect("Should be able to serialize a ScanMediaFromUrl request.")
}
}
/// A representation of a request to scan media from provided bytes.
#[derive(Debug)]
pub struct ScanMediaFromBytes {
/// The raw contents of a media in bytes.
data: Vec<u8>,
/// The mime type for the given media.
mime_type: mime::Mime,
}
impl ScanMediaFromBytes {
/// Get the mime type as a string.
pub fn mime_type(&self) -> String {
self.mime_type.to_string()
}
/// Build a new request to scan media from provided bytes.
pub fn new(data: impl Into<Vec<u8>>, mime_type: mime::Mime) -> Self {
Self {
data: data.into(),
mime_type,
}
}
/// Consume the stored data in this request container.
pub(crate) fn body(self) -> Vec<u8> {
self.data
}
}
/// A representation of a request to scan a pdq list.
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub struct ScanPdqHashes {
hashes: Vec<String>, // List of base64 encoded hashes
}
impl ScanPdqHashes {
/// Each pdq is bytes at input
pub fn new(data: &[[u8; 32]]) -> Self {
// Firstly, encode the bytes to base64 string
Self {
hashes: data.iter().map(|x| BASE64_STANDARD.encode(x)).collect(),
}
}
// JSON object
pub(crate) fn body(&self) -> Vec<u8> {
serde_json::to_vec(&self).expect("Should be able to serialize a ScanPdqHashes request.")
}
}
/// A container for any error messages that
/// the Arachnid Shield API sends us.
#[derive(Debug, Serialize, Deserialize)]
pub struct ErrorDetail {
/// The actual error message inside the container.
pub detail: String,
}
impl AsRef<str> for ErrorDetail {
fn as_ref(&self) -> &str {
self.detail.as_ref()
}
}
impl core::fmt::Display for ErrorDetail {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.detail)
}
}