token-count 0.4.0

Count tokens for LLM models using exact tokenization
Documentation
{
  "cl100k_base": [
    {
      "input": "Hello world",
      "expected_tokens": 2
    },
    {
      "input": "The quick brown fox jumps over the lazy dog",
      "expected_tokens": 9
    },
    {
      "input": "",
      "expected_tokens": 0
    },
    {
      "input": " ",
      "expected_tokens": 1
    },
    {
      "input": "\n",
      "expected_tokens": 1
    },
    {
      "input": "a",
      "expected_tokens": 1
    },
    {
      "input": "Hello δΈ–η•Œ 🌍",
      "expected_tokens": 8
    },
    {
      "input": "Emoji test: πŸŽ‰πŸŽŠπŸŽˆ",
      "expected_tokens": 12
    },
    {
      "input": "Japanese: γ“γ‚“γ«γ‘γ―δΈ–η•Œ",
      "expected_tokens": 7
    },
    {
      "input": "Mixed: Hello δΈ–η•Œ test 123",
      "expected_tokens": 10
    }
  ],
  "o200k_base": [
    {
      "input": "Hello world",
      "expected_tokens": 2
    },
    {
      "input": "The quick brown fox jumps over the lazy dog",
      "expected_tokens": 9
    },
    {
      "input": "",
      "expected_tokens": 0
    },
    {
      "input": " ",
      "expected_tokens": 1
    },
    {
      "input": "Hello δΈ–η•Œ 🌍",
      "expected_tokens": 4
    }
  ]
}