pingap_logger/
access.rs

1// Copyright 2024-2025 Tree xie.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use bytes::BytesMut;
16use chrono::{Local, Utc};
17use pingap_core::{get_hostname, Ctx, HOST_NAME_TAG};
18use pingap_util::{format_byte_size, format_duration};
19use pingora::http::ResponseHeader;
20use pingora::proxy::Session;
21use regex::Regex;
22use substring::Substring;
23
24// Enum representing different types of log tags that can be used in the logging format
25#[derive(Debug, Clone, PartialEq)]
26pub enum TagCategory {
27    Fill,     // Static text
28    Host,     // Server hostname
29    Method,   // HTTP method (GET, POST, etc.)
30    Path,     // Request path
31    Proto,    // Protocol version
32    Query,    // Query parameters
33    Remote,   // Remote address
34    ClientIp, // Client IP address
35    Scheme,
36    Uri,
37    Referrer,
38    UserAgent,
39    When,
40    WhenUtcIso,
41    WhenUnix,
42    Size,
43    SizeHuman,
44    Status,
45    Latency,
46    LatencyHuman,
47    Cookie,
48    RequestHeader,
49    ResponseHeader,
50    Context,
51    PayloadSize,
52    PayloadSizeHuman,
53    RequestId,
54}
55
56// Represents a single tag in the log format
57#[derive(Debug, Clone)]
58pub struct Tag {
59    pub category: TagCategory,
60    pub data: Option<String>, // Optional data associated with the tag
61}
62
63#[derive(Debug, Default, Clone)]
64pub struct Parser {
65    pub needs_timestamp: bool,
66    pub capacity: usize,
67    pub tags: Vec<Tag>,
68}
69
70// Parses special tags with prefixes like ~, >, <, :, $
71fn format_extra_tag(key: &str) -> Option<Tag> {
72    // Requires at least 2 chars (prefix + content)
73    if key.len() < 2 {
74        return None;
75    }
76    let key = key.substring(1, key.len() - 1);
77    let ch = key.substring(0, 1);
78    let value = key.substring(1, key.len());
79    match ch {
80        "~" => Some(Tag {
81            // Cookie values
82            category: TagCategory::Cookie,
83            data: Some(value.to_string()),
84        }),
85        ">" => Some(Tag {
86            // Request headers
87            category: TagCategory::RequestHeader,
88            data: Some(value.to_string()),
89        }),
90        "<" => Some(Tag {
91            // Response headers
92            category: TagCategory::ResponseHeader,
93            data: Some(value.to_string()),
94        }),
95        ":" => Some(Tag {
96            category: TagCategory::Context,
97            data: Some(value.to_string()),
98        }),
99        "$" => {
100            if key.as_bytes() == HOST_NAME_TAG {
101                Some(Tag {
102                    category: TagCategory::Fill,
103                    data: Some(get_hostname().to_string()),
104                })
105            } else {
106                Some(Tag {
107                    category: TagCategory::Fill,
108                    data: Some(std::env::var(value).unwrap_or_default()),
109                })
110            }
111        },
112        _ => None,
113    }
114}
115
116// Predefined log formats
117static COMBINED: &str = r###"{remote} "{method} {uri} {proto}" {status} {size_human} "{referer}" "{user_agent}""###;
118static COMMON: &str =
119    r###"{remote} "{method} {uri} {proto}" {status} {size_human}""###;
120static SHORT: &str = r###"{remote} {method} {uri} {proto} {status} {size_human} - {latency}ms"###;
121static TINY: &str = r###"{method} {uri} {status} {size_human} - {latency}ms"###;
122
123impl From<&str> for Parser {
124    fn from(value: &str) -> Self {
125        let value = match value {
126            "combined" => COMBINED,
127            "common" => COMMON,
128            "short" => SHORT,
129            "tiny" => TINY,
130            _ => value,
131        };
132        let reg = Regex::new(r"(\{[a-zA-Z_<>\-~:$]+*\})").unwrap();
133        let mut current = 0;
134        let mut end = 0;
135        let mut tags = vec![];
136
137        while let Some(result) = reg.find_at(value, current) {
138            if end < result.start() {
139                tags.push(Tag {
140                    category: TagCategory::Fill,
141                    data: Some(
142                        value.substring(end, result.start()).to_string(),
143                    ),
144                });
145            }
146            let key = result.as_str();
147
148            match key {
149                "{host}" => tags.push(Tag {
150                    category: TagCategory::Host,
151                    data: None,
152                }),
153                "{method}" => tags.push(Tag {
154                    category: TagCategory::Method,
155                    data: None,
156                }),
157                "{path}" => tags.push(Tag {
158                    category: TagCategory::Path,
159                    data: None,
160                }),
161                "{proto}" => tags.push(Tag {
162                    category: TagCategory::Proto,
163                    data: None,
164                }),
165                "{query}" => tags.push(Tag {
166                    category: TagCategory::Query,
167                    data: None,
168                }),
169                "{remote}" => tags.push(Tag {
170                    category: TagCategory::Remote,
171                    data: None,
172                }),
173                "{client_ip}" => tags.push(Tag {
174                    category: TagCategory::ClientIp,
175                    data: None,
176                }),
177                "{scheme}" => tags.push(Tag {
178                    category: TagCategory::Scheme,
179                    data: None,
180                }),
181                "{uri}" => tags.push(Tag {
182                    category: TagCategory::Uri,
183                    data: None,
184                }),
185                "{referer}" => tags.push(Tag {
186                    category: TagCategory::Referrer,
187                    data: None,
188                }),
189                "{user_agent}" => tags.push(Tag {
190                    category: TagCategory::UserAgent,
191                    data: None,
192                }),
193                "{when}" => tags.push(Tag {
194                    category: TagCategory::When,
195                    data: None,
196                }),
197                "{when_utc_iso}" => tags.push(Tag {
198                    category: TagCategory::WhenUtcIso,
199                    data: None,
200                }),
201                "{when_unix}" => tags.push(Tag {
202                    category: TagCategory::WhenUnix,
203                    data: None,
204                }),
205                "{size}" => tags.push(Tag {
206                    category: TagCategory::Size,
207                    data: None,
208                }),
209                "{size_human}" => tags.push(Tag {
210                    category: TagCategory::SizeHuman,
211                    data: None,
212                }),
213                "{status}" => tags.push(Tag {
214                    category: TagCategory::Status,
215                    data: None,
216                }),
217                "{latency}" => tags.push(Tag {
218                    category: TagCategory::Latency,
219                    data: None,
220                }),
221                "{latency_human}" => tags.push(Tag {
222                    category: TagCategory::LatencyHuman,
223                    data: None,
224                }),
225                "{payload_size}" => tags.push(Tag {
226                    category: TagCategory::PayloadSize,
227                    data: None,
228                }),
229                "{payload_size_human}" => tags.push(Tag {
230                    category: TagCategory::PayloadSizeHuman,
231                    data: None,
232                }),
233                "{request_id}" => tags.push(Tag {
234                    category: TagCategory::RequestId,
235                    data: None,
236                }),
237                _ => {
238                    if let Some(tag) = format_extra_tag(key) {
239                        tags.push(tag);
240                    }
241                },
242            }
243
244            end = result.end();
245            current = result.start() + 1;
246        }
247        if end < value.len() {
248            tags.push(Tag {
249                category: TagCategory::Fill,
250                data: Some(value.substring(end, value.len()).to_string()),
251            });
252        }
253        let needs_timestamp = tags.iter().any(|t| {
254            matches!(
255                t.category,
256                TagCategory::When
257                    | TagCategory::WhenUtcIso
258                    | TagCategory::WhenUnix
259                    | TagCategory::Latency
260                    | TagCategory::LatencyHuman
261            )
262        });
263        let capacity = Parser::estimate_capacity(&tags);
264        Parser {
265            capacity,
266            tags,
267            needs_timestamp,
268        }
269    }
270}
271
272fn get_resp_header_value<'a>(
273    resp_header: &'a ResponseHeader,
274    key: &str,
275) -> Option<&'a [u8]> {
276    resp_header.headers.get(key).map(|v| v.as_bytes())
277}
278
279impl Parser {
280    // Add a method to estimate capacity based on tag types
281    fn estimate_capacity(tags: &[Tag]) -> usize {
282        // Base size plus estimation for each tag type
283        let mut size = 128; // Base size
284        for tag in tags {
285            size += match tag.category {
286                TagCategory::Fill => tag.data.as_ref().map_or(0, |s| s.len()),
287                TagCategory::Uri | TagCategory::Path => 64, // URIs can be long
288                TagCategory::UserAgent => 100, // User agents are often long
289                // Add more specific estimates for other tag types
290                _ => 16, // Default estimate for other tags
291            };
292        }
293        size
294    }
295    // Formats a log entry based on the session and context
296    pub fn format(&self, session: &Session, ctx: &Ctx) -> String {
297        // Better capacity estimation based on tag types and count
298        let mut buf = BytesMut::with_capacity(self.capacity);
299        let req_header = session.req_header();
300
301        // Then only calculate if needed
302        let (now, now_ms) = if self.needs_timestamp {
303            let n = Utc::now();
304            (Some(n), Some(n.timestamp_millis() as u64))
305        } else {
306            (None, None)
307        };
308
309        // Process each tag in the format string
310        for tag in self.tags.iter() {
311            match tag.category {
312                TagCategory::Fill => {
313                    // Static text, just append it
314                    if let Some(data) = &tag.data {
315                        buf.extend_from_slice(data.as_bytes());
316                    }
317                },
318                TagCategory::Host => {
319                    // Add the host from request headers
320                    if let Some(host) = pingap_core::get_host(req_header) {
321                        buf.extend_from_slice(host.as_bytes());
322                    }
323                },
324                TagCategory::Method => {
325                    buf.extend_from_slice(
326                        req_header.method.as_str().as_bytes(),
327                    );
328                },
329                TagCategory::Path => {
330                    buf.extend_from_slice(req_header.uri.path().as_bytes());
331                },
332                TagCategory::Proto => {
333                    if session.is_http2() {
334                        buf.extend_from_slice(b"HTTP/2.0");
335                    } else {
336                        buf.extend_from_slice(b"HTTP/1.1");
337                    }
338                },
339                TagCategory::Query => {
340                    if let Some(query) = req_header.uri.query() {
341                        buf.extend_from_slice(query.as_bytes());
342                    }
343                },
344                TagCategory::Remote => {
345                    if let Some(addr) = &ctx.remote_addr {
346                        buf.extend_from_slice(addr.as_bytes());
347                    }
348                },
349                TagCategory::ClientIp => {
350                    if let Some(client_ip) = &ctx.client_ip {
351                        buf.extend_from_slice(client_ip.as_bytes());
352                    } else {
353                        buf.extend_from_slice(
354                            pingap_core::get_client_ip(session).as_bytes(),
355                        );
356                    }
357                },
358                TagCategory::Scheme => {
359                    if ctx.tls_version.is_some() {
360                        buf.extend_from_slice(b"https");
361                    } else {
362                        buf.extend_from_slice(b"http");
363                    }
364                },
365                TagCategory::Uri => {
366                    if let Some(value) = req_header.uri.path_and_query() {
367                        buf.extend_from_slice(value.as_str().as_bytes());
368                    }
369                },
370                TagCategory::Referrer => {
371                    let value = session.get_header_bytes("Referer");
372                    buf.extend_from_slice(value);
373                },
374                TagCategory::UserAgent => {
375                    let value = session.get_header_bytes("User-Agent");
376                    buf.extend_from_slice(value);
377                },
378                TagCategory::When => {
379                    if let Some(now) = &now {
380                        buf.extend_from_slice(
381                            now.with_timezone(&Local).to_rfc3339().as_bytes(),
382                        );
383                    }
384                },
385                TagCategory::WhenUtcIso => {
386                    if let Some(now) = &now {
387                        buf.extend_from_slice(now.to_rfc3339().as_bytes());
388                    }
389                },
390                TagCategory::WhenUnix => {
391                    if let Some(now_ms) = now_ms {
392                        buf.extend_from_slice(
393                            itoa::Buffer::new().format(now_ms).as_bytes(),
394                        );
395                    }
396                },
397                TagCategory::Size => {
398                    buf.extend_from_slice(
399                        itoa::Buffer::new()
400                            .format(session.body_bytes_sent())
401                            .as_bytes(),
402                    );
403                },
404                TagCategory::SizeHuman => {
405                    buf = format_byte_size(buf, session.body_bytes_sent());
406                },
407                TagCategory::Status => {
408                    if let Some(status) = ctx.status {
409                        buf.extend_from_slice(status.as_str().as_bytes());
410                    } else {
411                        buf.extend_from_slice(b"0");
412                    }
413                },
414                TagCategory::Latency => {
415                    if let Some(now_ms) = now_ms {
416                        let ms = now_ms - ctx.created_at;
417                        buf.extend_from_slice(
418                            itoa::Buffer::new().format(ms).as_bytes(),
419                        );
420                    }
421                },
422                TagCategory::LatencyHuman => {
423                    if let Some(now_ms) = now_ms {
424                        let ms = now_ms - ctx.created_at;
425                        buf = format_duration(buf, ms);
426                    }
427                },
428                TagCategory::Cookie => {
429                    if let Some(cookie) = &tag.data {
430                        if let Some(value) =
431                            pingap_core::get_cookie_value(req_header, cookie)
432                        {
433                            buf.extend_from_slice(value.as_bytes());
434                        }
435                    }
436                },
437                TagCategory::RequestHeader => {
438                    if let Some(key) = &tag.data {
439                        if let Some(value) = req_header.headers.get(key) {
440                            buf.extend_from_slice(value.as_bytes());
441                        }
442                    }
443                },
444                TagCategory::ResponseHeader => {
445                    if let Some(resp_header) = session.response_written() {
446                        if let Some(key) = &tag.data {
447                            if let Some(value) =
448                                get_resp_header_value(resp_header, key)
449                            {
450                                buf.extend_from_slice(value);
451                            }
452                        }
453                    }
454                },
455                TagCategory::PayloadSize => {
456                    buf.extend_from_slice(
457                        itoa::Buffer::new().format(ctx.payload_size).as_bytes(),
458                    );
459                },
460                TagCategory::PayloadSizeHuman => {
461                    buf = format_byte_size(buf, ctx.payload_size);
462                },
463                TagCategory::RequestId => {
464                    if let Some(key) = &ctx.request_id {
465                        buf.extend_from_slice(key.as_bytes());
466                    }
467                },
468                TagCategory::Context => {
469                    if let Some(key) = &tag.data {
470                        buf = ctx.append_value(buf, key.as_str());
471                    }
472                },
473            };
474        }
475
476        std::string::String::from_utf8(buf.into()).unwrap_or_default()
477    }
478}
479
480#[cfg(test)]
481mod tests {
482    use super::{format_extra_tag, Parser, Tag, TagCategory};
483    use http::Method;
484    use pingap_core::Ctx;
485    use pingora::proxy::Session;
486    use pretty_assertions::assert_eq;
487    use tokio_test::io::Builder;
488
489    #[test]
490    fn test_format_extra_tag() {
491        assert_eq!(true, format_extra_tag(":").is_none());
492
493        let cookie = format_extra_tag("{~deviceId}").unwrap();
494        assert_eq!(TagCategory::Cookie, cookie.category);
495        assert_eq!("deviceId", cookie.data.unwrap());
496
497        let req_header = format_extra_tag("{>X-User}").unwrap();
498        assert_eq!(TagCategory::RequestHeader, req_header.category);
499        assert_eq!("X-User", req_header.data.unwrap());
500
501        let resp_header = format_extra_tag("{<X-Response-Id}").unwrap();
502        assert_eq!(TagCategory::ResponseHeader, resp_header.category);
503        assert_eq!("X-Response-Id", resp_header.data.unwrap());
504
505        let hostname = format_extra_tag("{$hostname}").unwrap();
506        assert_eq!(TagCategory::Fill, hostname.category);
507        assert_eq!(false, hostname.data.unwrap().is_empty());
508
509        let env = format_extra_tag("{$HOME}").unwrap();
510        assert_eq!(TagCategory::Fill, env.category);
511        assert_eq!(false, env.data.unwrap().is_empty());
512    }
513    #[test]
514    fn test_parse_format() {
515        let tests = [
516            (
517                "{host}",
518                Tag {
519                    category: TagCategory::Host,
520                    data: None,
521                },
522            ),
523            (
524                "{method}",
525                Tag {
526                    category: TagCategory::Method,
527                    data: None,
528                },
529            ),
530            (
531                "{path}",
532                Tag {
533                    category: TagCategory::Path,
534                    data: None,
535                },
536            ),
537            (
538                "{proto}",
539                Tag {
540                    category: TagCategory::Proto,
541                    data: None,
542                },
543            ),
544            (
545                "{query}",
546                Tag {
547                    category: TagCategory::Query,
548                    data: None,
549                },
550            ),
551            (
552                "{remote}",
553                Tag {
554                    category: TagCategory::Remote,
555                    data: None,
556                },
557            ),
558            (
559                "{client_ip}",
560                Tag {
561                    category: TagCategory::ClientIp,
562                    data: None,
563                },
564            ),
565            (
566                "{scheme}",
567                Tag {
568                    category: TagCategory::Scheme,
569                    data: None,
570                },
571            ),
572            (
573                "{uri}",
574                Tag {
575                    category: TagCategory::Uri,
576                    data: None,
577                },
578            ),
579            (
580                "{referer}",
581                Tag {
582                    category: TagCategory::Referrer,
583                    data: None,
584                },
585            ),
586            (
587                "{user_agent}",
588                Tag {
589                    category: TagCategory::UserAgent,
590                    data: None,
591                },
592            ),
593            (
594                "{when}",
595                Tag {
596                    category: TagCategory::When,
597                    data: None,
598                },
599            ),
600            (
601                "{when_utc_iso}",
602                Tag {
603                    category: TagCategory::WhenUtcIso,
604                    data: None,
605                },
606            ),
607            (
608                "{when_unix}",
609                Tag {
610                    category: TagCategory::WhenUnix,
611                    data: None,
612                },
613            ),
614            (
615                "{size}",
616                Tag {
617                    category: TagCategory::Size,
618                    data: None,
619                },
620            ),
621            (
622                "{size_human}",
623                Tag {
624                    category: TagCategory::SizeHuman,
625                    data: None,
626                },
627            ),
628            (
629                "{status}",
630                Tag {
631                    category: TagCategory::Status,
632                    data: None,
633                },
634            ),
635            (
636                "{latency}",
637                Tag {
638                    category: TagCategory::Latency,
639                    data: None,
640                },
641            ),
642            (
643                "{latency_human}",
644                Tag {
645                    category: TagCategory::LatencyHuman,
646                    data: None,
647                },
648            ),
649            (
650                "{payload_size}",
651                Tag {
652                    category: TagCategory::PayloadSize,
653                    data: None,
654                },
655            ),
656            (
657                "{payload_size_human}",
658                Tag {
659                    category: TagCategory::PayloadSizeHuman,
660                    data: None,
661                },
662            ),
663            (
664                "{request_id}",
665                Tag {
666                    category: TagCategory::RequestId,
667                    data: None,
668                },
669            ),
670        ];
671
672        for (value, tag) in tests {
673            let p = Parser::from(value);
674            assert_eq!(tag.category, p.tags[0].category);
675        }
676    }
677
678    #[tokio::test]
679    async fn test_logger() {
680        let p: Parser =
681            "{host} {method} {path} {proto} {query} {remote} {client_ip} \
682{scheme} {uri} {referer} {user_agent} {size} \
683{size_human} {status} {payload_size} {payload_size_human} \
684{~deviceId} {>accept} {:upstream_reused} {:upstream_addr} \
685{:processing} {:upstream_connect_time_human} {:location} \
686{:connection_time_human} {:tls_version} {request_id}"
687                .into();
688        let headers = [
689            "Host: github.com",
690            "Referer: https://github.com/",
691            "User-Agent: pingap/0.1.1",
692            "Cookie: deviceId=abc",
693            "Accept: application/json",
694        ]
695        .join("\r\n");
696        let input_header =
697            format!("GET /vicanso/pingap?size=1 HTTP/1.1\r\n{headers}\r\n\r\n");
698        let mock_io = Builder::new().read(input_header.as_bytes()).build();
699
700        let mut session = Session::new_h1(Box::new(mock_io));
701        session.read_request().await.unwrap();
702        assert_eq!(Method::GET, session.req_header().method);
703
704        let ctx = Ctx {
705            upstream_reused: true,
706            upstream_address: "192.186.1.1:6188".to_string(),
707            remote_addr: Some("10.1.1.1".to_string()),
708            client_ip: Some("1.1.1.1".to_string()),
709            processing: 1,
710            upstream_connect_time: Some(100),
711            location: "test".to_string(),
712            connection_time: 300,
713            tls_version: Some("1.2".to_string()),
714            request_id: Some("nanoid".to_string()),
715            ..Default::default()
716        };
717        let log = p.format(&session, &ctx);
718        assert_eq!(
719            "github.com GET /vicanso/pingap HTTP/1.1 size=1 10.1.1.1 1.1.1.1 https /vicanso/pingap?size=1 https://github.com/ pingap/0.1.1 0 0B 0 0 0B abc application/json true 192.186.1.1:6188 1 100ms test 300ms 1.2 nanoid",
720            log
721        );
722
723        let p: Parser = "{when_utc_iso}".into();
724        let log = p.format(&session, &ctx);
725        assert_eq!(true, log.len() > 25);
726
727        let p: Parser = "{when}".into();
728        let log = p.format(&session, &ctx);
729        assert_eq!(true, log.len() > 25);
730
731        let p: Parser = "{when_unix}".into();
732        let log = p.format(&session, &ctx);
733        assert_eq!(true, log.len() == 13);
734    }
735}