cloudfront_logs/
consts.rs

1/// CloudFront log format version header; currently only 1.0 is known and supported
2///
3/// This line is the first line of the log file and is used to identify the version of the log file format.
4#[allow(unused)]
5pub(crate) const VERSION_COMMENT: &str = "#Version: 1.0";
6
7/// CloudFront log fields header comment
8///
9/// This line is the second line of the log file and is used to identify the fields in the log file.
10///
11/// Also check the official documentation for the list of fields and their description:
12/// <https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/AccessLogs.html#LogFileFormat>
13#[allow(unused)]
14pub(crate) const FIELDS_COMMENT: &str = "#Fields: date time x-edge-location sc-bytes c-ip cs-method cs(Host) cs-uri-stem sc-status cs(Referer) cs(User-Agent) cs-uri-query cs(Cookie) x-edge-result-type x-edge-request-id x-host-header cs-protocol cs-bytes time-taken x-forwarded-for ssl-protocol ssl-cipher x-edge-response-result-type cs-protocol-version fle-status fle-encrypted-fields c-port time-to-first-byte x-edge-detailed-result-type sc-content-type sc-content-len sc-range-start sc-range-end";
15
16/// Comment marker
17pub(crate) const COMMENT: char = '#';
18
19pub(crate) const COMMENT_U8: u8 = COMMENT as u8;
20
21/// Field delimiter, as char
22pub(crate) const TAB: char = '\t';
23
24/// Field delimiter, as u8
25pub(crate) const TAB_U8: u8 = TAB as u8;
26
27/// Number of field separators in the log line
28pub(crate) const TABS: usize = 32;
29
30/// Number of fields in the log line
31#[allow(unused)]
32pub(crate) const FIELDS: usize = TABS + 1;
33
34#[cfg(feature = "time")]
35pub const TIME_DATE_FMT: &[time::format_description::FormatItem<'_>] =
36    time::macros::format_description!("[year]-[month]-[day]");
37
38#[cfg(feature = "time")]
39pub const TIME_TIME_FMT: &[time::format_description::FormatItem<'_>] =
40    time::macros::format_description!("[hour]:[minute]:[second]");
41
42#[cfg(feature = "chrono")]
43pub const CHRONO_DATE_FMT: &str = "%Y-%m-%d";
44
45#[cfg(feature = "chrono")]
46pub const CHRONO_TIME_FMT: &str = "%H:%M:%S";
47
48#[cfg(feature = "parquet")]
49pub mod parquet_schemata {
50    // taken from auto-generated schema via parquet test samples
51    pub const V0: &str = r#"message rust_schema {
52REQUIRED INT32 date (DATE);
53REQUIRED BYTE_ARRAY time (STRING);
54REQUIRED INT64 datetime (TIMESTAMP_MILLIS);
55REQUIRED BYTE_ARRAY x_edge_location (STRING);
56REQUIRED INT64 sc_bytes (INTEGER(64,false));
57REQUIRED BYTE_ARRAY c_ip (STRING);
58REQUIRED BYTE_ARRAY cs_method (STRING);
59REQUIRED BYTE_ARRAY cs_host (STRING);
60REQUIRED BYTE_ARRAY cs_uri_stem (STRING);
61REQUIRED INT32 sc_status (INTEGER(16,false));
62OPTIONAL BYTE_ARRAY cs_referer (STRING);
63REQUIRED BYTE_ARRAY cs_user_agent (STRING);
64OPTIONAL BYTE_ARRAY cs_uri_query (STRING);
65OPTIONAL BYTE_ARRAY cs_cookie (STRING);
66REQUIRED BYTE_ARRAY x_edge_result_type (STRING);
67REQUIRED BYTE_ARRAY x_edge_request_id (STRING);
68REQUIRED BYTE_ARRAY x_host_header (STRING);
69REQUIRED BYTE_ARRAY cs_protocol (STRING);
70REQUIRED INT64 cs_bytes (INTEGER(64,false));
71REQUIRED DOUBLE time_taken;
72OPTIONAL BYTE_ARRAY x_forwarded_for (STRING);
73OPTIONAL BYTE_ARRAY ssl_protocol (STRING);
74OPTIONAL BYTE_ARRAY ssl_cipher (STRING);
75REQUIRED BYTE_ARRAY x_edge_response_result_type (STRING);
76REQUIRED BYTE_ARRAY cs_protocol_version (STRING);
77OPTIONAL BYTE_ARRAY fle_status (STRING);
78OPTIONAL INT64 fle_encrypted_fields (INTEGER(64,false));
79REQUIRED INT32 c_port (INTEGER(16,false));
80REQUIRED DOUBLE time_to_first_byte;
81REQUIRED BYTE_ARRAY x_edge_detailed_result_type (STRING);
82OPTIONAL BYTE_ARRAY sc_content_type (STRING);
83OPTIONAL INT64 sc_content_len (INTEGER(64,false));
84OPTIONAL INT64 sc_range_start (INTEGER(64,true));
85OPTIONAL INT64 sc_range_end (INTEGER(64,true));
86    }"#;
87
88    // derived from V0, but with considerations of the parquet format spec;
89    // see https://github.com/apache/parquet-format/blob/master/LogicalTypes.md
90    //
91    // notes:
92    // * we cannot fix "time" yet, until upstream has proper type support
93    // * docs are slightly confusing, keep in mind:
94    //   * TIMESTAMP(MILLIS,true) is the correct way of writing the logical TS type
95    //   * docs say "INT(64,false)", but we still have to write "INTEGER(64,false)"
96    pub const V1: &str = r#"message rust_schema {
97REQUIRED INT32 date (DATE);
98REQUIRED BYTE_ARRAY time (STRING);
99REQUIRED INT64 datetime (TIMESTAMP(MILLIS,true));
100REQUIRED BYTE_ARRAY x_edge_location (STRING);
101REQUIRED INT64 sc_bytes (INTEGER(64,false));
102REQUIRED BYTE_ARRAY c_ip (STRING);
103REQUIRED BYTE_ARRAY cs_method (STRING);
104REQUIRED BYTE_ARRAY cs_host (STRING);
105REQUIRED BYTE_ARRAY cs_uri_stem (STRING);
106REQUIRED INT32 sc_status (INTEGER(16,false));
107OPTIONAL BYTE_ARRAY cs_referer (STRING);
108REQUIRED BYTE_ARRAY cs_user_agent (STRING);
109OPTIONAL BYTE_ARRAY cs_uri_query (STRING);
110OPTIONAL BYTE_ARRAY cs_cookie (STRING);
111REQUIRED BYTE_ARRAY x_edge_result_type (STRING);
112REQUIRED BYTE_ARRAY x_edge_request_id (STRING);
113REQUIRED BYTE_ARRAY x_host_header (STRING);
114REQUIRED BYTE_ARRAY cs_protocol (STRING);
115REQUIRED INT64 cs_bytes (INTEGER(64,false));
116REQUIRED DOUBLE time_taken;
117OPTIONAL BYTE_ARRAY x_forwarded_for (STRING);
118OPTIONAL BYTE_ARRAY ssl_protocol (STRING);
119OPTIONAL BYTE_ARRAY ssl_cipher (STRING);
120REQUIRED BYTE_ARRAY x_edge_response_result_type (STRING);
121REQUIRED BYTE_ARRAY cs_protocol_version (STRING);
122OPTIONAL BYTE_ARRAY fle_status (STRING);
123OPTIONAL INT64 fle_encrypted_fields (INTEGER(64,false));
124REQUIRED INT32 c_port (INTEGER(16,false));
125REQUIRED DOUBLE time_to_first_byte;
126REQUIRED BYTE_ARRAY x_edge_detailed_result_type (STRING);
127OPTIONAL BYTE_ARRAY sc_content_type (STRING);
128OPTIONAL INT64 sc_content_len (INTEGER(64,false));
129OPTIONAL INT64 sc_range_start (INTEGER(64,true));
130OPTIONAL INT64 sc_range_end (INTEGER(64,true));
131    }"#;
132}