1use csv::ReaderBuilder;
2
3use http::StatusCode;
4use std::net::IpAddr;
5
6use chrono::{DateTime, Utc};
7use serde::de::{self, Deserializer, Visitor};
8use serde::ser::Serializer;
9use serde::Deserialize;
10use serde_with::serde_as;
11use serde_with::{DeserializeAs, SerializeAs};
12use std::fmt;
13use std::fmt::Display;
14use std::marker::PhantomData;
15use std::option::Option;
16use std::str::FromStr;
17
18pub struct DefaultStringToNone;
20
21impl<'de, Str> DeserializeAs<'de, Option<Str>> for DefaultStringToNone
22where
23 Str: FromStr,
24 Str::Err: Display,
25{
26 fn deserialize_as<D>(deserializer: D) -> Result<Option<Str>, D::Error>
27 where
28 D: Deserializer<'de>,
29 {
30 struct OptionStringEmptyNone<S>(PhantomData<S>);
31 impl<'de, S> Visitor<'de> for OptionStringEmptyNone<S>
32 where
33 S: FromStr,
34 S::Err: Display,
35 {
36 type Value = Option<S>;
37
38 fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
39 formatter.write_str("a string")
40 }
41
42 fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
43 where
44 E: de::Error,
45 {
46 match value {
47 "-" => Ok(None),
48 v => S::from_str(v).map(Some).map_err(de::Error::custom),
49 }
50 }
51
52 fn visit_unit<E>(self) -> Result<Self::Value, E>
54 where
55 E: de::Error,
56 {
57 Ok(None)
58 }
59 }
60
61 deserializer.deserialize_any(OptionStringEmptyNone(PhantomData))
62 }
63}
64
65impl<T> SerializeAs<Option<T>> for DefaultStringToNone
66where
67 T: Display,
68{
69 fn serialize_as<S>(source: &Option<T>, serializer: S) -> Result<S::Ok, S::Error>
70 where
71 S: Serializer,
72 {
73 if let Some(value) = source {
74 serializer.collect_str(value)
75 } else {
76 serializer.serialize_str("-")
77 }
78 }
79}
80
81pub fn deserialize_number_from_string<'de, T, D>(deserializer: D) -> Result<Option<T>, D::Error>
82where
83 D: Deserializer<'de>,
84 T: FromStr + serde::Deserialize<'de>,
85 <T as FromStr>::Err: Display,
86{
87 #[derive(Deserialize)]
88 #[serde(untagged)]
89 enum StringOrInt<T> {
90 String(String),
91 Number(T),
92 }
93
94 match StringOrInt::<T>::deserialize(deserializer)? {
95 StringOrInt::String(s) => {
96 if s == "-" {
97 Ok(None)
98 } else {
99 s.parse::<T>().map(Some).map_err(serde::de::Error::custom)
100 }
101 }
102 StringOrInt::Number(i) => Ok(Some(i)),
103 }
104}
105
106pub struct DefaultIpAddrToNone;
111
112impl<'de, T> DeserializeAs<'de, Option<T>> for DefaultIpAddrToNone
113where
114 T: FromStr,
115 T::Err: Display,
116{
117 fn deserialize_as<D>(deserializer: D) -> Result<Option<T>, D::Error>
118 where
119 D: Deserializer<'de>,
120 {
121 struct OptionIpAddrToNone<S>(PhantomData<S>);
122 impl<'de, S> Visitor<'de> for OptionIpAddrToNone<S>
123 where
124 S: FromStr,
125 S::Err: Display,
126 {
127 type Value = Option<S>;
128
129 fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
130 formatter.write_str("a string")
131 }
132
133 fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
134 where
135 E: de::Error,
136 {
137 match value {
138 "-" => Ok(None),
139 v => S::from_str(v).map(Some).map_err(de::Error::custom),
140 }
141 }
142 }
143
144 deserializer.deserialize_any(OptionIpAddrToNone(PhantomData))
145 }
146}
147
148impl<T> SerializeAs<Option<T>> for DefaultIpAddrToNone
149where
150 T: Display,
151{
152 fn serialize_as<S>(source: &Option<T>, serializer: S) -> Result<S::Ok, S::Error>
153 where
154 S: Serializer,
155 {
156 if let Some(value) = source {
157 serializer.collect_str(value)
158 } else {
159 serializer.serialize_str("-")
160 }
161 }
162}
163
164mod my_date_format {
166 use chrono::{DateTime, NaiveDateTime, Utc};
167 use serde::{self, Deserialize, Deserializer, Serializer};
168
169 const FORMAT: &str = "%d/%b/%Y:%H:%M:%S %z";
170
171 pub fn serialize<S>(date: &DateTime<Utc>, serializer: S) -> Result<S::Ok, S::Error>
172 where
173 S: Serializer,
174 {
175 let s = format!("{}", date.format(FORMAT));
176 serializer.serialize_str(&s)
177 }
178
179 pub fn deserialize<'de, D>(deserializer: D) -> Result<DateTime<Utc>, D::Error>
180 where
181 D: Deserializer<'de>,
182 {
183 let s = String::deserialize(deserializer)?;
184 let dt = NaiveDateTime::parse_from_str(&s, FORMAT).map_err(serde::de::Error::custom)?;
185 Ok(DateTime::<Utc>::from_naive_utc_and_offset(dt, Utc))
186 }
187}
188
189#[serde_as]
190#[derive(Debug, Deserialize, PartialEq)]
191pub struct S3AccessLogRecord {
192 pub bucket_owner: String,
193 pub bucket_name: String,
194 #[serde(with = "my_date_format")]
195 pub time: DateTime<Utc>,
196 #[serde_as(as = "DefaultIpAddrToNone")]
197 pub remote_ip: Option<IpAddr>,
198 #[serde_as(as = "DefaultStringToNone")]
199 pub requester: Option<String>, pub request_id: String, pub operation: String,
202 pub key: String,
203 pub request_uri: String,
204 #[serde(with = "http_serde::status_code")]
205 pub http_status: StatusCode,
206 pub error_code: String,
207 #[serde(deserialize_with = "deserialize_number_from_string")]
208 pub bytes_sent: Option<u64>, #[serde(deserialize_with = "deserialize_number_from_string")]
210 pub object_size: Option<u64>, pub total_time: u64,
212 #[serde(deserialize_with = "deserialize_number_from_string")]
213 pub turn_around_time: Option<u64>,
214 pub referer: String,
215 pub user_agent: String,
216 pub version_id: String,
217 pub host_id: String,
218 pub signature_version: String,
219 pub cipher_suite: String,
220 pub authentication_type: String,
221 pub host_header: String,
222 #[serde_as(as = "DefaultStringToNone")]
223 pub tls_version: Option<String>,
224 #[serde_as(as = "DefaultStringToNone")]
225 pub access_point_arn: Option<String>,
226 #[serde_as(as = "DefaultStringToNone")]
227 pub acl_required: Option<String>,
228}
229
230pub fn convert_wsc_str_to_s3_access_log_record(
231 wsv: &str,
232 ignore_error: bool,
233) -> Vec<S3AccessLogRecord> {
234 let valid_wsv = wsv.replace(['[', ']'], "\"");
235 let mut reader = ReaderBuilder::new()
236 .has_headers(false)
237 .delimiter(b' ')
238 .from_reader(valid_wsv.as_bytes());
239 if ignore_error {
240 return reader
241 .deserialize::<S3AccessLogRecord>()
242 .filter_map(|res| res.ok())
243 .collect();
244 } else {
245 return reader
246 .deserialize::<S3AccessLogRecord>()
247 .map(|res| res.expect("error will parsing csv content"))
248 .collect();
249 }
250}
251
252#[cfg(test)]
253mod tests {
254 use super::*;
255 use chrono::{DateTime, NaiveDateTime, Utc};
256 use serde::Serialize;
257 use serde_test::assert_de_tokens_error;
258 use std::net::Ipv4Addr;
259 use serde::Deserialize;
261 #[derive(Debug, PartialEq, Deserialize, Serialize)]
264 struct DeserializeNumberFromStringTest {
265 #[serde(deserialize_with = "deserialize_number_from_string")]
266 string_as_number: Option<u64>,
267 }
268
269 #[test]
270 fn it_instanciate_s3_access_log_record_struct() {
271 let dt =
272 NaiveDateTime::parse_from_str("11/Nov/2023:03:37:50 +0000", "%d/%b/%Y:%H:%M:%S %z")
273 .unwrap();
274 S3AccessLogRecord {
275 bucket_owner: "7e1c2dcc1527ebbd9a81efbefb6a7d5945b7c6fe00160f682c2b7c056d301e83"
276 .to_string(),
277 bucket_name: "aws-website-demonchy-5v3aj".to_string(),
278 time: DateTime::<Utc>::from_naive_utc_and_offset(dt, Utc),
279 remote_ip: Some(std::net::IpAddr::V4(Ipv4Addr::new(130, 176, 48, 151))),
280 requester: None,
281 request_id: "YDYP07R0QHFNH76W".to_string(),
282 operation: "WEBSITE.GET.OBJECT".to_string(),
283 key: "favicon.ico".to_string(),
284 request_uri: "GET /favicon.ico HTTP/1.1".to_string(),
285 http_status: StatusCode::NOT_FOUND,
286 error_code: "NoSuchKey".to_string(),
287 bytes_sent: Some(346),
288 object_size: None,
289 total_time: 39,
290 turn_around_time: None,
291 referer: "-".to_string(),
292 user_agent: "Amazon CloudFront".to_string(),
293 version_id: "-".to_string(),
294 host_id: "m3PGwDN1s8smqpOSEELewHILMcdm7xri7/UsWHBhRrT0w23Pp0YcEmgboXyHFTv7qR7RvFMrUgo="
295 .to_string(),
296 signature_version: "-".to_string(),
297 cipher_suite: "-".to_string(),
298 authentication_type: "-".to_string(),
299 host_header: "aws-website-demonchy-5v3aj.s3-website-us-east-1.amazonaws.com"
300 .to_string(),
301 tls_version: None,
302 access_point_arn: None,
303 acl_required: None,
304 };
305 }
306
307 #[test]
308 fn it_deserialize_number_from_string_convert_negative_number_to_u64_error() {
309 assert_de_tokens_error::<DeserializeNumberFromStringTest>(
310 &[serde_test::Token::I8(-14)],
311 "invalid type: integer `-14`, expected struct DeserializeNumberFromStringTest",
312 )
313 }
314}