s3_access_log_rust/
lib.rs

1use csv::ReaderBuilder;
2
3use http::StatusCode;
4use std::net::IpAddr;
5
6use chrono::{DateTime, Utc};
7use serde::de::{self, Deserializer, Visitor};
8use serde::ser::Serializer;
9use serde::Deserialize;
10use serde_with::serde_as;
11use serde_with::{DeserializeAs, SerializeAs};
12use std::fmt;
13use std::fmt::Display;
14use std::marker::PhantomData;
15use std::option::Option;
16use std::str::FromStr;
17
18// copy paste from https://docs.rs/serde_with/latest/src/serde_with/de/impls.rs.html#939-981
19pub struct DefaultStringToNone;
20
21impl<'de, Str> DeserializeAs<'de, Option<Str>> for DefaultStringToNone
22where
23    Str: FromStr,
24    Str::Err: Display,
25{
26    fn deserialize_as<D>(deserializer: D) -> Result<Option<Str>, D::Error>
27    where
28        D: Deserializer<'de>,
29    {
30        struct OptionStringEmptyNone<S>(PhantomData<S>);
31        impl<'de, S> Visitor<'de> for OptionStringEmptyNone<S>
32        where
33            S: FromStr,
34            S::Err: Display,
35        {
36            type Value = Option<S>;
37
38            fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
39                formatter.write_str("a string")
40            }
41
42            fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
43            where
44                E: de::Error,
45            {
46                match value {
47                    "-" => Ok(None),
48                    v => S::from_str(v).map(Some).map_err(de::Error::custom),
49                }
50            }
51
52            // handles the `null` case
53            fn visit_unit<E>(self) -> Result<Self::Value, E>
54            where
55                E: de::Error,
56            {
57                Ok(None)
58            }
59        }
60
61        deserializer.deserialize_any(OptionStringEmptyNone(PhantomData))
62    }
63}
64
65impl<T> SerializeAs<Option<T>> for DefaultStringToNone
66where
67    T: Display,
68{
69    fn serialize_as<S>(source: &Option<T>, serializer: S) -> Result<S::Ok, S::Error>
70    where
71        S: Serializer,
72    {
73        if let Some(value) = source {
74            serializer.collect_str(value)
75        } else {
76            serializer.serialize_str("-")
77        }
78    }
79}
80
81pub fn deserialize_number_from_string<'de, T, D>(deserializer: D) -> Result<Option<T>, D::Error>
82where
83    D: Deserializer<'de>,
84    T: FromStr + serde::Deserialize<'de>,
85    <T as FromStr>::Err: Display,
86{
87    #[derive(Deserialize)]
88    #[serde(untagged)]
89    enum StringOrInt<T> {
90        String(String),
91        Number(T),
92    }
93
94    match StringOrInt::<T>::deserialize(deserializer)? {
95        StringOrInt::String(s) => {
96            if s == "-" {
97                Ok(None)
98            } else {
99                s.parse::<T>().map(Some).map_err(serde::de::Error::custom)
100            }
101        }
102        StringOrInt::Number(i) => Ok(Some(i)),
103    }
104}
105
106//https://users.rust-lang.org/t/serde-serialization-with-option-of-external-struct/67746/2
107//https://play.rust-lang.org/?version=stable&mode=debug&edition=2021&gist=f42e6ff34e6171af99c432dbbd3b8df1
108
109// copy paste from https://docs.rs/serde_with/latest/src/serde_with/de/impls.rs.html#939-981
110pub struct DefaultIpAddrToNone;
111
112impl<'de, T> DeserializeAs<'de, Option<T>> for DefaultIpAddrToNone
113where
114    T: FromStr,
115    T::Err: Display,
116{
117    fn deserialize_as<D>(deserializer: D) -> Result<Option<T>, D::Error>
118    where
119        D: Deserializer<'de>,
120    {
121        struct OptionIpAddrToNone<S>(PhantomData<S>);
122        impl<'de, S> Visitor<'de> for OptionIpAddrToNone<S>
123        where
124            S: FromStr,
125            S::Err: Display,
126        {
127            type Value = Option<S>;
128
129            fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
130                formatter.write_str("a string")
131            }
132
133            fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
134            where
135                E: de::Error,
136            {
137                match value {
138                    "-" => Ok(None),
139                    v => S::from_str(v).map(Some).map_err(de::Error::custom),
140                }
141            }
142        }
143
144        deserializer.deserialize_any(OptionIpAddrToNone(PhantomData))
145    }
146}
147
148impl<T> SerializeAs<Option<T>> for DefaultIpAddrToNone
149where
150    T: Display,
151{
152    fn serialize_as<S>(source: &Option<T>, serializer: S) -> Result<S::Ok, S::Error>
153    where
154        S: Serializer,
155    {
156        if let Some(value) = source {
157            serializer.collect_str(value)
158        } else {
159            serializer.serialize_str("-")
160        }
161    }
162}
163
164// from https://serde.rs/custom-date-format.html#date-in-a-custom-format
165mod my_date_format {
166    use chrono::{DateTime, NaiveDateTime, Utc};
167    use serde::{self, Deserialize, Deserializer, Serializer};
168
169    const FORMAT: &str = "%d/%b/%Y:%H:%M:%S %z";
170
171    pub fn serialize<S>(date: &DateTime<Utc>, serializer: S) -> Result<S::Ok, S::Error>
172    where
173        S: Serializer,
174    {
175        let s = format!("{}", date.format(FORMAT));
176        serializer.serialize_str(&s)
177    }
178
179    pub fn deserialize<'de, D>(deserializer: D) -> Result<DateTime<Utc>, D::Error>
180    where
181        D: Deserializer<'de>,
182    {
183        let s = String::deserialize(deserializer)?;
184        let dt = NaiveDateTime::parse_from_str(&s, FORMAT).map_err(serde::de::Error::custom)?;
185        Ok(DateTime::<Utc>::from_naive_utc_and_offset(dt, Utc))
186    }
187}
188
189#[serde_as]
190#[derive(Debug, Deserialize, PartialEq)]
191pub struct S3AccessLogRecord {
192    pub bucket_owner: String,
193    pub bucket_name: String,
194    #[serde(with = "my_date_format")]
195    pub time: DateTime<Utc>,
196    #[serde_as(as = "DefaultIpAddrToNone")]
197    pub remote_ip: Option<IpAddr>,
198    #[serde_as(as = "DefaultStringToNone")]
199    pub requester: Option<String>, //The canonical user ID of the requester, or a - for unauthenticated requests. If the requester was an IAM user, this field returns the requester's IAM user name along with the AWS account root user that the IAM user belongs to. This identifier is the same one used for access control purposes.
200    pub request_id: String, //A string generated by Amazon S3 to uniquely identify each request.
201    pub operation: String,
202    pub key: String,
203    pub request_uri: String,
204    #[serde(with = "http_serde::status_code")]
205    pub http_status: StatusCode,
206    pub error_code: String,
207    #[serde(deserialize_with = "deserialize_number_from_string")]
208    pub bytes_sent: Option<u64>, // The number of response bytes sent, excluding HTTP protocol overhead, or - if zero. WTF !!!
209    #[serde(deserialize_with = "deserialize_number_from_string")]
210    pub object_size: Option<u64>, // can also be - but the doc don't mention it !!!
211    pub total_time: u64,
212    #[serde(deserialize_with = "deserialize_number_from_string")]
213    pub turn_around_time: Option<u64>,
214    pub referer: String,
215    pub user_agent: String,
216    pub version_id: String,
217    pub host_id: String,
218    pub signature_version: String,
219    pub cipher_suite: String,
220    pub authentication_type: String,
221    pub host_header: String,
222    #[serde_as(as = "DefaultStringToNone")]
223    pub tls_version: Option<String>,
224    #[serde_as(as = "DefaultStringToNone")]
225    pub access_point_arn: Option<String>,
226    #[serde_as(as = "DefaultStringToNone")]
227    pub acl_required: Option<String>,
228}
229
230pub fn convert_wsc_str_to_s3_access_log_record(
231    wsv: &str,
232    ignore_error: bool,
233) -> Vec<S3AccessLogRecord> {
234    let valid_wsv = wsv.replace(['[', ']'], "\"");
235    let mut reader = ReaderBuilder::new()
236        .has_headers(false)
237        .delimiter(b' ')
238        .from_reader(valid_wsv.as_bytes());
239    if ignore_error {
240        return reader
241            .deserialize::<S3AccessLogRecord>()
242            .filter_map(|res| res.ok())
243            .collect();
244    } else {
245        return reader
246            .deserialize::<S3AccessLogRecord>()
247            .map(|res| res.expect("error will parsing csv content"))
248            .collect();
249    }
250}
251
252#[cfg(test)]
253mod tests {
254    use super::*;
255    use chrono::{DateTime, NaiveDateTime, Utc};
256    use serde::Serialize;
257    use serde_test::assert_de_tokens_error;
258    use std::net::Ipv4Addr;
259    //use claims::assert_ok_eq;
260    use serde::Deserialize;
261    //use serde_assert::{Deserializer, Token};
262
263    #[derive(Debug, PartialEq, Deserialize, Serialize)]
264    struct DeserializeNumberFromStringTest {
265        #[serde(deserialize_with = "deserialize_number_from_string")]
266        string_as_number: Option<u64>,
267    }
268
269    #[test]
270    fn it_instanciate_s3_access_log_record_struct() {
271        let dt =
272            NaiveDateTime::parse_from_str("11/Nov/2023:03:37:50 +0000", "%d/%b/%Y:%H:%M:%S %z")
273                .unwrap();
274        S3AccessLogRecord {
275            bucket_owner: "7e1c2dcc1527ebbd9a81efbefb6a7d5945b7c6fe00160f682c2b7c056d301e83"
276                .to_string(),
277            bucket_name: "aws-website-demonchy-5v3aj".to_string(),
278            time: DateTime::<Utc>::from_naive_utc_and_offset(dt, Utc),
279            remote_ip: Some(std::net::IpAddr::V4(Ipv4Addr::new(130, 176, 48, 151))),
280            requester: None,
281            request_id: "YDYP07R0QHFNH76W".to_string(),
282            operation: "WEBSITE.GET.OBJECT".to_string(),
283            key: "favicon.ico".to_string(),
284            request_uri: "GET /favicon.ico HTTP/1.1".to_string(),
285            http_status: StatusCode::NOT_FOUND,
286            error_code: "NoSuchKey".to_string(),
287            bytes_sent: Some(346),
288            object_size: None,
289            total_time: 39,
290            turn_around_time: None,
291            referer: "-".to_string(),
292            user_agent: "Amazon CloudFront".to_string(),
293            version_id: "-".to_string(),
294            host_id: "m3PGwDN1s8smqpOSEELewHILMcdm7xri7/UsWHBhRrT0w23Pp0YcEmgboXyHFTv7qR7RvFMrUgo="
295                .to_string(),
296            signature_version: "-".to_string(),
297            cipher_suite: "-".to_string(),
298            authentication_type: "-".to_string(),
299            host_header: "aws-website-demonchy-5v3aj.s3-website-us-east-1.amazonaws.com"
300                .to_string(),
301            tls_version: None,
302            access_point_arn: None,
303            acl_required: None,
304        };
305    }
306
307    #[test]
308    fn it_deserialize_number_from_string_convert_negative_number_to_u64_error() {
309        assert_de_tokens_error::<DeserializeNumberFromStringTest>(
310            &[serde_test::Token::I8(-14)],
311            "invalid type: integer `-14`, expected struct DeserializeNumberFromStringTest",
312        )
313    }
314}