rhit/nginx_log/
log_line.rs

1use {
2    crate::*,
3    std::{
4        num::ParseIntError,
5        str::FromStr,
6    },
7    thiserror::Error,
8};
9
10#[derive(Debug, Error)]
11pub enum ParseLogError {
12    #[error("invalid log line {0:?}")]
13    InvalidLogLine(String),
14    #[error("character not found {0:?}")]
15    CharNotFound(char),
16    #[error("date parse error")]
17    InvalidDateTime(#[from] ParseDateTimeError),
18    #[error("expected int")]
19    IntExpected(#[from] ParseIntError),
20}
21
22/// A line in the access log, describing a hit.
23// perf note: parsing the remote adress as IP is costly
24// (app is about 3% faster if I replace this field with a string)
25#[derive(Debug)]
26pub struct LogLine {
27    pub remote_addr: Box<str>,
28    pub date_time: DateTime,
29    pub date_idx: usize,
30    pub method: Method,
31    pub path: Box<str>,
32    pub status: u16,
33    pub bytes_sent: u64,
34    pub referer: Box<str>,
35}
36
37impl DateIndexed for LogLine {
38    fn date_idx(&self) -> usize {
39        self.date_idx
40    }
41    fn bytes(&self) -> u64 {
42        self.bytes_sent
43    }
44}
45impl DateIndexed for &LogLine {
46    fn date_idx(&self) -> usize {
47        self.date_idx
48    }
49    fn bytes(&self) -> u64 {
50        self.bytes_sent
51    }
52}
53
54impl LogLine {
55    pub fn is_resource(&self) -> bool {
56        let s = &self.path;
57        s.ends_with(".png")
58            || s.ends_with(".css")
59            || s.ends_with(".svg")
60            || s.ends_with(".jpg")
61            || s.ends_with(".jpeg")
62            || s.ends_with(".gif")
63            || s.ends_with(".ico")
64            || s.ends_with(".js")
65            || s.ends_with(".woff2")
66            || s.ends_with(".webp")
67        // verified to be much much slower:
68        // lazy_regex::bytes_regex_is_match!(
69        //     "\\.(png|css|svg|jpe?g|gif|ico|js|woff2|webp)$",
70        //     self.path.as_bytes(),
71        // )
72    }
73    pub fn date(&self) -> Date {
74        self.date_time.date
75    }
76    pub fn time(&self) -> Time {
77        self.date_time.time
78    }
79}
80
81impl FromStr for LogLine {
82    type Err = ParseLogError;
83    fn from_str(s: &str) -> Result<Self, Self::Err> {
84        let mut ranger = Ranger::new(s);
85        let remote_addr = ranger.until(' ')?.into();
86        let date_time = DateTime::from_nginx(ranger.between('[', ']')?)?;
87        let mut request = ranger.between('"', '"')?.split(' ');
88        let (method, path) = match (request.next(), request.next()) {
89            (Some(method), Some(path)) => (Method::from(method), path),
90            (Some(path), None) => (Method::None, path),
91            _ => unreachable!(),
92        };
93        let path = path.split('?').next().unwrap().into();
94        let status = ranger.between(' ', ' ')?.parse()?;
95        let bytes_sent = ranger.between(' ', ' ')?.parse()?;
96        let referer = ranger.between('"', '"')?.into();
97        Ok(LogLine {
98            remote_addr,
99            date_time,
100            date_idx: 0,
101            method,
102            path,
103            status,
104            bytes_sent,
105            referer,
106        })
107    }
108}
109
110#[cfg(test)]
111mod log_line_parsing_tests {
112
113    use super::*;
114
115    static SIO_PULL_LINE: &str = r#"10.232.28.160 - - [22/Jan/2021:02:49:30 +0000] "GET /socket.io/?EIO=3&transport=polling&t=NSd_nu- HTTP/1.1" 200 99 "https://miaou.dystroy.org/3" "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36""#;
116    #[test]
117    fn parse_sio_line() {
118        let ll = LogLine::from_str(SIO_PULL_LINE).unwrap();
119        assert_eq!(&*ll.remote_addr, "10.232.28.160");
120        assert_eq!(ll.method, Method::Get);
121        assert_eq!(&*ll.path, "/socket.io/");
122        assert_eq!(ll.status, 200);
123        assert_eq!(ll.bytes_sent, 99);
124        assert_eq!(&*ll.referer, "https://miaou.dystroy.org/3");
125    }
126
127    static NO_VERB_LINE: &str = r#"119.142.145.250 - - [10/Jan/2021:10:27:01 +0000] "\x16\x03\x01\x00u\x01\x00\x00q\x03\x039a\xDF\xCA\x90\xB1\xB4\xC2SB\x96\xF0\xB7\x96CJD\xE1\xBF\x0E\xE1Y\xA2\x87v\x1D\xED\xBDo\x05A\x9D\x00\x00\x1A\xC0/\xC0+\xC0\x11\xC0\x07\xC0\x13\xC0\x09\xC0\x14\xC0" 400 173 "-" "-""#;
128    #[test]
129    fn parse_no_method_line() {
130        let ll = LogLine::from_str(NO_VERB_LINE).unwrap();
131        assert_eq!(ll.method, Method::None);
132        assert_eq!(ll.status, 400);
133        assert_eq!(ll.bytes_sent, 173);
134    }
135
136
137    static ISSUE_3_LINE: &str = r#"0.0.0.0 - - [2021-03-03T09:08:37+08:00] "GET /zhly/assets/guide/audit-opinion.png HTTP/1.1" 200 3911 "http://0.0.0.0:8091/zhly/" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4427.5 Safari/537.36" "-""#;
138    #[test]
139    fn parse_issue_3_line() {
140        let ll = LogLine::from_str(ISSUE_3_LINE).unwrap();
141        assert_eq!(&*ll.remote_addr, "0.0.0.0");
142        assert_eq!(ll.method, Method::Get);
143        assert_eq!(ll.status, 200);
144    }
145}
146