cloudfront_logs/
shared.rs

1use crate::{COMMENT_U8, TABS, TAB_U8};
2
3/// Validates a log line
4///
5/// This function checks if
6/// * the line is not empty,
7/// * not a comment line
8/// * and has the correct number of fields.
9///
10/// # Examples
11///
12/// ```rust
13/// use cloudfront_logs::validate_line;
14///
15/// let okay_line = "2019-12-04	21:02:31	LAX1	392	192.0.2.100	GET	d111111abcdef8.cloudfront.net	/index.html	200	-	Mozilla/5.0%20(Windows%20NT%2010.0;%20Win64;%20x64)%20AppleWebKit/537.36%20(KHTML,%20like%20Gecko)%20Chrome/78.0.3904.108%20Safari/537.36	-	-	Hit	SOX4xwn4XV6Q4rgb7XiVGOHms_BGlTAC4KyHmureZmBNrjGdRLiNIQ==	d111111abcdef8.cloudfront.net	https	23	0.001	-	TLSv1.2	ECDHE-RSA-AES128-GCM-SHA256	Hit	HTTP/2.0	-	-	11040	0.001	Hit	text/html	78	-	-";
16/// let broken_line = "2019-12-04	21:02:31	LAX1	392	192.0.2.100	GET	d111111abcdef8.cloudfront.net	/index.html	200	-	Mozilla/5.0%20(Windows%20NT%2010.0;%20Win64;%20x64)%20AppleWebKit/537.36%20(KHTML,%20like%20Gecko)%20Chrome/78.0.3904.108%20Safari/537.36	-	-	Hit	SOX4xwn4XV6Q4rgb7XiVGOHms_BGlTAC4KyHmureZmBNrjGdRLiNIQ==	d111111abcdef8.cloudfront.net	https	23	0.001	-	TLSv1.2	ECDHE-RSA-AES128-GCM-SHA256	Hit	HTTP/2.0	-	-";
17/// let empty_line = "";
18/// let comment_line = "#Version: 1.0";
19///
20/// assert!(validate_line(okay_line).is_ok());
21/// assert!(validate_line(broken_line).is_err());
22/// assert!(validate_line(empty_line).is_err());
23/// assert!(validate_line(comment_line).is_err());
24/// ```
25#[inline]
26pub fn validate_line(line: &str) -> Result<(), &'static str> {
27    let bytes = line.as_bytes();
28    if bytes.is_empty() {
29        return Err("Invalid log line (empty)");
30    }
31    if bytes[0] == COMMENT_U8 {
32        return Err("Invalid log line (comment)");
33    }
34    if memchr::memchr_iter(TAB_U8, bytes).count() != TABS {
35        return Err("Invalid log line (field count)");
36    }
37    Ok(())
38}
39
40#[inline]
41pub(crate) fn split(line: &str) -> MemchrTabSplitter<'_> {
42    MemchrTabSplitter::new(line)
43}
44
45#[derive(Debug, Clone)]
46pub(crate) struct MemchrTabSplitter<'a> {
47    pub(crate) data: &'a str,
48    pub(crate) prev: usize,
49    pub(crate) end: usize,
50    pub(crate) iter: memchr::Memchr<'a>,
51}
52
53impl<'a> MemchrTabSplitter<'a> {
54    pub(crate) fn new(data: &'a str) -> Self {
55        let prev = 0;
56        let end = data.len();
57        let iter = memchr::memchr_iter(TAB_U8, data.as_bytes());
58        Self {
59            data,
60            prev,
61            end,
62            iter,
63        }
64    }
65}
66
67impl<'a> Iterator for MemchrTabSplitter<'a> {
68    type Item = &'a str;
69
70    fn next(&mut self) -> Option<Self::Item> {
71        let current_tab = self.iter.next();
72        if let Some(tab_idx) = current_tab {
73            assert!(tab_idx > 0, "Found tab stop at index 0 (invalid log line)");
74
75            let from = self.prev;
76            let to = tab_idx;
77            self.prev = to + 1;
78            Some(&self.data[from..to])
79        } else {
80            // get field after the last tab stop
81            if self.prev < self.end {
82                let from = self.prev;
83                self.prev = self.end;
84                Some(&self.data[from..])
85            } else {
86                None
87            }
88        }
89    }
90}
91
92// if the input is "-", return Ok(None), otherwise parse the input as T;
93// -> parse_as_option(iter.next().unwrap()).map_err(|_e| "…")?
94// -> parse_as_option(str_input).map_err(|_e| "…")?
95pub(crate) fn parse_as_option<T: std::str::FromStr>(s: &str) -> Result<Option<T>, T::Err> {
96    if s == "-" {
97        Ok(None)
98    } else {
99        s.parse().map(|v| Some(v))
100    }
101}
102
103// better chainable version of parse_as_option;
104// -> iter.next().and_then(as_optional_t).transpose().map_err(|_e| "…")?
105pub(crate) fn as_optional_t<T: std::str::FromStr>(s: &str) -> Option<Result<T, T::Err>> {
106    if s == "-" {
107        None
108    } else {
109        Some(s.parse())
110    }
111}
112
113// String type extension trait;
114// returns None if the input is "-", otherwise Some(String)
115pub(crate) trait ToOptionalString {
116    fn to_optional_string(&self) -> Option<String>;
117}
118
119impl ToOptionalString for &str {
120    fn to_optional_string(&self) -> Option<String> {
121        if self == &"-" {
122            None
123        } else {
124            Some((*self).to_string())
125        }
126    }
127}
128
129// str type extension trait;
130// returns None if the input is "-", otherwise Some(&str)
131#[cfg(feature = "parquet")]
132pub(crate) trait AsOptionalStr {
133    fn as_optional_str(&self) -> Option<&str>;
134}
135
136#[cfg(feature = "parquet")]
137impl AsOptionalStr for str {
138    fn as_optional_str(&self) -> Option<&str> {
139        if self == "-" {
140            None
141        } else {
142            Some(self)
143        }
144    }
145}