vrl 0.32.0

Vector Remap Language
Documentation
{
  "anchor": "parse_nginx_log",
  "name": "parse_nginx_log",
  "category": "Parse",
  "description": "Parses Nginx access and error log lines. Lines can be in [`combined`](https://nginx.org/en/docs/http/ngx_http_log_module.html), [`ingress_upstreaminfo`](https://kubernetes.github.io/ingress-nginx/user-guide/nginx-configuration/log-format/), [`main`](https://hg.nginx.org/pkg-oss/file/tip/debian/debian/nginx.conf) or [`error`](https://github.com/nginx/nginx/blob/branches/stable-1.18/src/core/ngx_log.c#L102) format.",
  "arguments": [
    {
      "name": "value",
      "description": "The string to parse.",
      "required": true,
      "type": [
        "string"
      ]
    },
    {
      "name": "format",
      "description": "The format to use for parsing the log.",
      "required": true,
      "type": [
        "string"
      ],
      "enum": {
        "combined": "Nginx combined format",
        "error": "Default Nginx error format",
        "ingress_upstreaminfo": "Provides detailed upstream information (Nginx Ingress Controller)",
        "main": "Nginx main format used by Docker images"
      }
    },
    {
      "name": "timestamp_format",
      "description": "The [date/time format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html#specifiers) to use for encoding the timestamp. The time is parsed\nin local time if the timestamp doesn't specify a timezone. The default format is `%d/%b/%Y:%T %z` for\ncombined logs and `%Y/%m/%d %H:%M:%S` for error logs.",
      "required": false,
      "type": [
        "string"
      ],
      "default": "%d/%b/%Y:%T %z"
    }
  ],
  "return": {
    "types": [
      "object"
    ]
  },
  "internal_failure_reasons": [
    "`value` does not match the specified format.",
    "`timestamp_format` is not a valid format string.",
    "The timestamp in `value` fails to parse using the provided `timestamp_format`."
  ],
  "examples": [
    {
      "title": "Parse via Nginx log format (combined)",
      "source": "parse_nginx_log!(\n    s'172.17.0.1 - alice [01/Apr/2021:12:02:31 +0000] \"POST /not-found HTTP/1.1\" 404 153 \"http://localhost/somewhere\" \"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36\" \"2.75\"',\n    \"combined\",\n)\n",
      "return": {
        "agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36",
        "client": "172.17.0.1",
        "compression": "2.75",
        "referer": "http://localhost/somewhere",
        "request": "POST /not-found HTTP/1.1",
        "size": 153,
        "status": 404,
        "timestamp": "2021-04-01T12:02:31Z",
        "user": "alice"
      }
    },
    {
      "title": "Parse via Nginx log format (error)",
      "source": "parse_nginx_log!(\n    s'2021/04/01 13:02:31 [error] 31#31: *1 open() \"/usr/share/nginx/html/not-found\" failed (2: No such file or directory), client: 172.17.0.1, server: localhost, request: \"POST /not-found HTTP/1.1\", host: \"localhost:8081\"',\n    \"error\"\n)\n",
      "return": {
        "cid": 1,
        "client": "172.17.0.1",
        "host": "localhost:8081",
        "message": "open() \"/usr/share/nginx/html/not-found\" failed (2: No such file or directory)",
        "pid": 31,
        "request": "POST /not-found HTTP/1.1",
        "server": "localhost",
        "severity": "error",
        "tid": 31,
        "timestamp": "2021-04-01T13:02:31Z"
      }
    },
    {
      "title": "Parse via Nginx log format (ingress_upstreaminfo)",
      "source": "parse_nginx_log!(\n    s'0.0.0.0 - bob [18/Mar/2023:15:00:00 +0000] \"GET /some/path HTTP/2.0\" 200 12312 \"https://10.0.0.1/some/referer\" \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36\" 462 0.050 [some-upstream-service-9000] [some-other-upstream-5000] 10.0.50.80:9000 19437 0.049 200 752178adb17130b291aefd8c386279e7',\n    \"ingress_upstreaminfo\"\n)\n",
      "return": {
        "body_bytes_size": 12312,
        "http_referer": "https://10.0.0.1/some/referer",
        "http_user_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
        "proxy_alternative_upstream_name": "some-other-upstream-5000",
        "proxy_upstream_name": "some-upstream-service-9000",
        "remote_addr": "0.0.0.0",
        "remote_user": "bob",
        "req_id": "752178adb17130b291aefd8c386279e7",
        "request": "GET /some/path HTTP/2.0",
        "request_length": 462,
        "request_time": 0.05,
        "status": 200,
        "timestamp": "2023-03-18T15:00:00Z",
        "upstream_addr": "10.0.50.80:9000",
        "upstream_response_length": 19437,
        "upstream_response_time": 0.049,
        "upstream_status": 200
      }
    },
    {
      "title": "Parse via Nginx log format (main)",
      "source": "parse_nginx_log!(\n    s'172.24.0.3 - alice [31/Dec/2024:17:32:06 +0000] \"GET / HTTP/1.1\" 200 615 \"https://domain.tld/path\" \"curl/8.11.1\" \"1.2.3.4, 10.10.1.1\"',\n    \"main\"\n)\n",
      "return": {
        "body_bytes_size": 615,
        "http_referer": "https://domain.tld/path",
        "http_user_agent": "curl/8.11.1",
        "http_x_forwarded_for": "1.2.3.4, 10.10.1.1",
        "remote_addr": "172.24.0.3",
        "remote_user": "alice",
        "request": "GET / HTTP/1.1",
        "status": 200,
        "timestamp": "2024-12-31T17:32:06Z"
      }
    }
  ],
  "notices": [
    "Missing information in the log message may be indicated by `-`. These fields are\nomitted in the result.",
    "In case of `ingress_upstreaminfo` format the following fields may be safely omitted\nin the log message: `remote_addr`, `remote_user`, `http_referer`, `http_user_agent`,\n`proxy_alternative_upstream_name`, `upstream_addr`, `upstream_response_length`,\n`upstream_response_time`, `upstream_status`."
  ],
  "pure": true
}