tessera-embeddings 0.1.0

{
  "version": "1.0",
  "model_categories": {
    "multi_vector": {
      "description": "Multi-vector models outputting one vector per token (ColBERT, ColPali)",
      "models": [
        {
          "id": "colbert-v2",
          "type": "colbert",
          "name": "ColBERT v2",
          "huggingface_id": "colbert-ir/colbertv2.0",
          "organization": "Stanford NLP",
          "release_date": "2022",
          "architecture": {
            "type": "bert",
            "variant": "bert-base",
            "has_projection": true,
            "projection_dims": 128
          },
          "specs": {
            "parameters": "110M",
            "embedding_dim": 128,
            "hidden_dim": 768,
            "context_length": 512,
            "max_position_embeddings": 512,
            "vocab_size": 30522
          },
          "files": {
            "tokenizer": "tokenizer.json",
            "config": "config.json",
            "weights": {
              "safetensors": "model.safetensors",
              "pytorch": "pytorch_model.bin"
            }
          },
          "capabilities": {
            "languages": ["en"],
            "modalities": ["text"],
            "multi_vector": true,
            "quantization": ["fp32", "fp16", "int8", "binary"]
          },
          "performance": {
            "beir_avg": 0.52,
            "ms_marco_mrr10": 0.39
          },
          "license": "MIT",
          "description": "Original ColBERT v2 from Stanford, baseline for late interaction retrieval. Uses BERT-base with projection layer to 128 dimensions."
        },
        {
          "id": "colbert-small",
          "type": "colbert",
          "name": "ColBERT Small",
          "huggingface_id": "answerdotai/answerai-colbert-small-v1",
          "organization": "Answer.AI",
          "release_date": "2024",
          "architecture": {
            "type": "distilbert",
            "variant": "distilbert-base",
            "has_projection": true,
            "projection_dims": 96
          },
          "specs": {
            "parameters": "33M",
            "embedding_dim": 96,
            "hidden_dim": 384,
            "context_length": 512,
            "max_position_embeddings": 512,
            "vocab_size": 30522
          },
          "files": {
            "tokenizer": "tokenizer.json",
            "config": "config.json",
            "weights": {
              "safetensors": "model.safetensors",
              "pytorch": "pytorch_model.bin"
            }
          },
          "capabilities": {
            "languages": ["en"],
            "modalities": ["text"],
            "multi_vector": true,
            "quantization": ["fp32", "fp16", "int8"]
          },
          "performance": {
            "beir_avg": 0.45,
            "ms_marco_mrr10": 0.32
          },
          "license": "Apache-2.0",
          "description": "Compact ColBERT variant based on DistilBERT. Recommended for development and testing due to smaller size and faster inference."
        },
        {
          "id": "jina-colbert-v2",
          "type": "colbert",
          "name": "Jina ColBERT v2",
          "huggingface_id": "jinaai/jina-colbert-v2",
          "organization": "Jina AI",
          "release_date": "2024",
          "architecture": {
            "type": "jina-bert",
            "variant": "jina-bert-v2-base-en",
            "has_projection": false,
            "projection_dims": null
          },
          "specs": {
            "parameters": "560M",
            "embedding_dim": {
              "default": 768,
              "matryoshka": {
                "min": 64,
                "max": 768,
                "supported": [64, 96, 128, 256, 384, 512, 768],
                "strategy": "truncate_output"
              }
            },
            "hidden_dim": 768,
            "context_length": 8192,
            "max_position_embeddings": 8192,
            "vocab_size": 30528
          },
          "files": {
            "tokenizer": "tokenizer.json",
            "config": "config.json",
            "weights": {
              "safetensors": "model.safetensors",
              "pytorch": "pytorch_model.bin"
            }
          },
          "capabilities": {
            "languages": ["en", "de", "fr", "es", "it", "pt", "nl", "pl", "ru", "zh", "ja", "ko", "ar", "hi", "th", "tr", "vi", "id", "ms", "fa", "uk", "ro", "cs", "sv", "da", "no", "fi", "el", "he", "bg", "hr", "sk", "sl", "et", "lv", "lt", "hu", "ca", "eu", "gl", "cy", "sq", "mk", "sr", "bs", "mt", "is", "ga", "af", "sw", "zu", "xh", "st", "tn", "ny", "sn", "yo", "ig", "ha", "am", "ti", "om", "so", "mg", "mi", "sm", "to", "fj", "haw", "ht", "qu", "gn", "ay", "tt", "ug", "kk", "ky", "tg", "uz", "tk", "mn", "bo", "dz", "ne", "si", "my", "km", "lo"],
            "modalities": ["text"],
            "multi_vector": true,
            "quantization": ["fp32", "fp16", "int8"]
          },
          "performance": {
            "beir_avg": 0.54,
            "ms_marco_mrr10": 0.42
          },
          "license": "Apache-2.0",
          "description": "Multilingual ColBERT supporting 89 languages with extended 8K context length. Supports Matryoshka representations from 64 to 768 dimensions."
        },
        {
          "id": "jina-colbert-v2-96",
          "type": "colbert",
          "name": "Jina ColBERT v2 (96-dim)",
          "huggingface_id": "jinaai/jina-colbert-v2",
          "organization": "Jina AI",
          "release_date": "2024",
          "architecture": {
            "type": "jina-bert",
            "variant": "jina-bert-v2-base-en",
            "has_projection": false,
            "projection_dims": null
          },
          "specs": {
            "parameters": "560M",
            "embedding_dim": 96,
            "hidden_dim": 768,
            "context_length": 8192,
            "max_position_embeddings": 8192,
            "vocab_size": 30528
          },
          "files": {
            "tokenizer": "tokenizer.json",
            "config": "config.json",
            "weights": {
              "safetensors": "model.safetensors",
              "pytorch": "pytorch_model.bin"
            }
          },
          "capabilities": {
            "languages": ["en", "de", "fr", "es", "it", "pt", "nl", "pl", "ru", "zh", "ja", "ko", "ar", "hi", "th", "tr", "vi", "id", "ms", "fa", "uk", "ro", "cs", "sv", "da", "no", "fi", "el", "he", "bg", "hr", "sk", "sl", "et", "lv", "lt", "hu", "ca", "eu", "gl", "cy", "sq", "mk", "sr", "bs", "mt", "is", "ga", "af", "sw", "zu", "xh", "st", "tn", "ny", "sn", "yo", "ig", "ha", "am", "ti", "om", "so", "mg", "mi", "sm", "to", "fj", "haw", "ht", "qu", "gn", "ay", "tt", "ug", "kk", "ky", "tg", "uz", "tk", "mn", "bo", "dz", "ne", "si", "my", "km", "lo"],
            "modalities": ["text"],
            "multi_vector": true,
            "quantization": ["fp32", "fp16", "int8"]
          },
          "performance": {
            "beir_avg": 0.53,
            "ms_marco_mrr10": 0.41
          },
          "license": "Apache-2.0",
          "description": "Jina ColBERT v2 at 96 dimensions for compact storage with minimal quality loss."
        },
        {
          "id": "jina-colbert-v2-64",
          "type": "colbert",
          "name": "Jina ColBERT v2 (64-dim)",
          "huggingface_id": "jinaai/jina-colbert-v2",
          "organization": "Jina AI",
          "release_date": "2024",
          "architecture": {
            "type": "jina-bert",
            "variant": "jina-bert-v2-base-en",
            "has_projection": false,
            "projection_dims": null
          },
          "specs": {
            "parameters": "560M",
            "embedding_dim": 64,
            "hidden_dim": 768,
            "context_length": 8192,
            "max_position_embeddings": 8192,
            "vocab_size": 30528
          },
          "files": {
            "tokenizer": "tokenizer.json",
            "config": "config.json",
            "weights": {
              "safetensors": "model.safetensors",
              "pytorch": "pytorch_model.bin"
            }
          },
          "capabilities": {
            "languages": ["en", "de", "fr", "es", "it", "pt", "nl", "pl", "ru", "zh", "ja", "ko", "ar", "hi", "th", "tr", "vi", "id", "ms", "fa", "uk", "ro", "cs", "sv", "da", "no", "fi", "el", "he", "bg", "hr", "sk", "sl", "et", "lv", "lt", "hu", "ca", "eu", "gl", "cy", "sq", "mk", "sr", "bs", "mt", "is", "ga", "af", "sw", "zu", "xh", "st", "tn", "ny", "sn", "yo", "ig", "ha", "am", "ti", "om", "so", "mg", "mi", "sm", "to", "fj", "haw", "ht", "qu", "gn", "ay", "tt", "ug", "kk", "ky", "tg", "uz", "tk", "mn", "bo", "dz", "ne", "si", "my", "km", "lo"],
            "modalities": ["text"],
            "multi_vector": true,
            "quantization": ["fp32", "fp16", "int8"]
          },
          "performance": {
            "beir_avg": 0.51,
            "ms_marco_mrr10": 0.39
          },
          "license": "Apache-2.0",
          "description": "Jina ColBERT v2 at 64 dimensions for maximum compactness."
        },
        {
          "id": "colpali-v1.2",
          "type": "vision-language",
          "name": "ColPali v1.2 Merged",
          "huggingface_id": "vidore/colpali-v1.2-merged",
          "organization": "vidore",
          "release_date": "2024",
          "architecture": {
            "type": "paligemma",
            "variant": "paligemma-3b-mix-448",
            "has_projection": true,
            "projection_dims": 128,
            "image_size": 448,
            "patch_size": 14
          },
          "specs": {
            "parameters": "3B",
            "embedding_dim": 128,
            "hidden_dim": 2048,
            "context_length": 512,
            "max_position_embeddings": 512,
            "vocab_size": 257216
          },
          "files": {
            "tokenizer": "tokenizer.json",
            "config": "config.json",
            "weights": {
              "safetensors": "model.safetensors",
              "pytorch": "pytorch_model.bin"
            }
          },
          "capabilities": {
            "languages": ["multilingual"],
            "modalities": ["vision", "text"],
            "multi_vector": true,
            "quantization": ["fp32", "fp16", "int8"]
          },
          "performance": {
            "beir_avg": 0.0,
            "ms_marco_mrr10": 0.0
          },
          "license": "gemma",
          "description": "Vision-language ColBERT model for document retrieval. Encodes page images as multi-vector patch embeddings for OCR-free document search using late interaction. Based on PaliGemma-3B with fixed initialization and right padding."
        },
        {
          "id": "colpali-v1.3-hf",
          "type": "vision-language",
          "name": "ColPali v1.3 HF",
          "huggingface_id": "vidore/colpali-v1.3-hf",
          "organization": "vidore",
          "release_date": "2024",
          "architecture": {
            "type": "paligemma",
            "variant": "paligemma-3b-mix-448",
            "has_projection": true,
            "projection_dims": 128,
            "image_size": 448,
            "patch_size": 14
          },
          "specs": {
            "parameters": "3B",
            "embedding_dim": 128,
            "hidden_dim": 2048,
            "context_length": 512,
            "max_position_embeddings": 512,
            "vocab_size": 257216
          },
          "files": {
            "tokenizer": "tokenizer.json",
            "config": "config.json",
            "weights": {
              "safetensors": "model.safetensors",
              "pytorch": "pytorch_model.bin"
            }
          },
          "capabilities": {
            "languages": ["multilingual"],
            "modalities": ["vision", "text"],
            "multi_vector": true,
            "quantization": ["fp32", "fp16", "int8"]
          },
          "performance": {
            "beir_avg": 0.0,
            "ms_marco_mrr10": 0.0
          },
          "license": "gemma",
          "description": "Latest ColPali vision-language model for document retrieval. Encodes page images as multi-vector patch embeddings for OCR-free document search using late interaction. Improved performance over v1.2 with average NDCG@5 of 0.546 on ViDoRe benchmark."
        },
        {
          "id": "gte-modern-colbert",
          "type": "colbert",
          "name": "GTE-ModernColBERT v1",
          "huggingface_id": "lightonai/GTE-ModernColBERT-v1",
          "organization": "LightOn AI",
          "release_date": "2025",
          "architecture": {
            "type": "modernbert",
            "variant": "gte-modernbert-base",
            "has_projection": false,
            "projection_dims": null
          },
          "specs": {
            "parameters": "130M",
            "embedding_dim": 768,
            "hidden_dim": 768,
            "context_length": 8192,
            "max_position_embeddings": 8192,
            "vocab_size": 50370
          },
          "files": {
            "tokenizer": "tokenizer.json",
            "config": "config.json",
            "weights": {
              "safetensors": "model.safetensors",
              "pytorch": "pytorch_model.bin"
            }
          },
          "capabilities": {
            "languages": ["en"],
            "modalities": ["text"],
            "multi_vector": true,
            "quantization": ["fp32", "fp16", "int8"]
          },
          "performance": {
            "beir_avg": 0.68,
            "ms_marco_mrr10": 0.75
          },
          "license": "Apache-2.0",
          "description": "Modern ColBERT model based on ModernBERT architecture with improved reasoning performance. Uses global-local attention and extended 8K context length."
        },
        {
          "id": "bge-m3-multi",
          "type": "unified",
          "name": "BGE-M3 (Multi-Vector Mode)",
          "huggingface_id": "BAAI/bge-m3",
          "organization": "BAAI",
          "release_date": "2024",
          "architecture": {
            "type": "xlm-roberta",
            "variant": "xlm-roberta-large",
            "has_projection": false,
            "projection_dims": null
          },
          "specs": {
            "parameters": "568M",
            "embedding_dim": 1024,
            "hidden_dim": 1024,
            "context_length": 8192,
            "max_position_embeddings": 8192,
            "vocab_size": 250002
          },
          "files": {
            "tokenizer": "tokenizer.json",
            "config": "config.json",
            "weights": {
              "safetensors": "model.safetensors",
              "pytorch": "pytorch_model.bin"
            }
          },
          "capabilities": {
            "languages": ["en", "zh", "es", "fr", "de", "ar", "hi", "ja", "ko", "ru", "th", "tr", "vi", "id", "ms", "fa", "uk", "ro", "cs", "sv", "da", "no", "fi", "el", "he", "bg", "hr", "sk", "sl", "et", "lv", "lt", "hu", "ca", "eu", "gl", "cy", "sq", "mk", "sr", "bs", "mt", "is", "ga", "af", "sw", "zu", "xh", "st", "tn", "ny", "sn", "yo", "ig", "ha", "am", "ti", "om", "so", "mg", "mi", "sm", "to", "fj", "haw", "ht", "qu", "gn", "ay", "tt", "ug", "kk", "ky", "tg", "uz", "tk", "mn", "bo", "dz", "ne", "si", "my", "km", "lo", "pl", "it", "pt", "nl"],
            "modalities": ["text"],
            "multi_vector": true,
            "quantization": ["fp32", "fp16", "int8"]
          },
          "performance": {
            "beir_avg": 0.55,
            "ms_marco_mrr10": 0.44
          },
          "license": "MIT",
          "description": "Unified embedding model supporting dense, sparse, and multi-vector representations. Supports 100+ languages with 8K context. 1024 dimensions per token in multi-vector mode."
        }
      ]
    },
    "dense": {
      "description": "Dense models outputting single vector per input (BERT, GTE, E5, etc.)",
      "models": [
        {
          "id": "gte-qwen2-7b",
          "type": "dense",
          "name": "GTE-Qwen2-7B",
          "huggingface_id": "Alibaba-NLP/gte-Qwen2-7B-instruct",
          "organization": "Alibaba",
          "release_date": "2024",
          "architecture": {
            "type": "qwen2",
            "variant": "qwen2-7b-decoder",
            "has_projection": false,
            "projection_dims": null
          },
          "pooling": {
            "strategy": "mean",
            "normalize": true
          },
          "specs": {
            "parameters": "7B",
            "embedding_dim": {
              "default": 3584,
              "matryoshka": {
                "min": 512,
                "max": 3584,
                "supported": [512, 1024, 2048, 3584],
                "strategy": "truncate_pooled"
              }
            },
            "hidden_dim": 3584,
            "context_length": 32768,
            "max_position_embeddings": 32768,
            "vocab_size": 151936
          },
          "files": {
            "tokenizer": "tokenizer.json",
            "config": "config.json",
            "weights": {
              "safetensors": "model.safetensors",
              "pytorch": "pytorch_model.bin"
            }
          },
          "capabilities": {
            "languages": ["en", "zh", "multilingual"],
            "modalities": ["text"],
            "multi_vector": false,
            "quantization": ["fp32", "fp16", "int8", "int4"]
          },
          "performance": {
            "beir_avg": 0.62,
            "ms_marco_mrr10": 0.51
          },
          "license": "Apache-2.0",
          "description": "State-of-the-art dense retrieval model based on Qwen2-7B decoder with bidirectional attention. Supports Matryoshka dimensions from 512 to 3584."
        },
        {
          "id": "nomic-embed-v1.5",
          "type": "dense",
          "name": "Nomic Embed v1.5",
          "huggingface_id": "nomic-ai/nomic-embed-text-v1.5",
          "organization": "Nomic AI",
          "release_date": "2024",
          "architecture": {
            "type": "bert",
            "variant": "nomic-bert",
            "has_projection": false,
            "projection_dims": null
          },
          "pooling": {
            "strategy": "mean",
            "normalize": true
          },
          "specs": {
            "parameters": "137M",
            "embedding_dim": {
              "default": 768,
              "matryoshka": {
                "min": 64,
                "max": 768,
                "supported": [64, 128, 256, 512, 768],
                "strategy": "truncate_pooled"
              }
            },
            "hidden_dim": 768,
            "context_length": 8192,
            "max_position_embeddings": 8192,
            "vocab_size": 30528
          },
          "files": {
            "tokenizer": "tokenizer.json",
            "config": "config.json",
            "weights": {
              "safetensors": "model.safetensors",
              "pytorch": "pytorch_model.bin"
            }
          },
          "capabilities": {
            "languages": ["en"],
            "modalities": ["text"],
            "multi_vector": false,
            "quantization": ["fp32", "fp16", "int8"]
          },
          "performance": {
            "beir_avg": 0.54,
            "ms_marco_mrr10": 0.43
          },
          "license": "Apache-2.0",
          "description": "Efficient embedding model with strong performance and Matryoshka support. Extended context window of 8K tokens."
        },
        {
          "id": "bge-base-en-v1.5",
          "type": "dense",
          "name": "BGE-Base-EN-v1.5",
          "huggingface_id": "BAAI/bge-base-en-v1.5",
          "organization": "BAAI",
          "release_date": "2023",
          "architecture": {
            "type": "bert",
            "variant": "bert-base",
            "has_projection": false,
            "projection_dims": null
          },
          "pooling": {
            "strategy": "mean",
            "normalize": true
          },
          "specs": {
            "parameters": "109M",
            "embedding_dim": 768,
            "hidden_dim": 768,
            "context_length": 512,
            "max_position_embeddings": 512,
            "vocab_size": 30522
          },
          "files": {
            "tokenizer": "tokenizer.json",
            "config": "config.json",
            "weights": {
              "safetensors": "model.safetensors",
              "pytorch": "pytorch_model.bin"
            }
          },
          "capabilities": {
            "languages": ["en"],
            "modalities": ["text"],
            "multi_vector": false,
            "quantization": ["fp32", "fp16", "int8"]
          },
          "performance": {
            "beir_avg": 0.53,
            "ms_marco_mrr10": 0.42
          },
          "license": "MIT",
          "description": "Strong baseline English embedding model from BAAI. Fixed 768-dimensional embeddings with excellent performance."
        },
        {
          "id": "snowflake-arctic-l",
          "type": "dense",
          "name": "Snowflake Arctic Embed L",
          "huggingface_id": "Snowflake/snowflake-arctic-embed-l-v2.0",
          "organization": "Snowflake",
          "release_date": "2024",
          "architecture": {
            "type": "bert",
            "variant": "bert-large",
            "has_projection": false,
            "projection_dims": null
          },
          "pooling": {
            "strategy": "mean",
            "normalize": true
          },
          "specs": {
            "parameters": "335M",
            "embedding_dim": {
              "default": 1024,
              "matryoshka": {
                "min": 256,
                "max": 1024,
                "supported": [256, 512, 768, 1024],
                "strategy": "truncate_pooled"
              }
            },
            "hidden_dim": 1024,
            "context_length": 512,
            "max_position_embeddings": 512,
            "vocab_size": 30522
          },
          "files": {
            "tokenizer": "tokenizer.json",
            "config": "config.json",
            "weights": {
              "safetensors": "model.safetensors",
              "pytorch": "pytorch_model.bin"
            }
          },
          "capabilities": {
            "languages": ["en"],
            "modalities": ["text"],
            "multi_vector": false,
            "quantization": ["fp32", "fp16", "int8"]
          },
          "performance": {
            "beir_avg": 0.56,
            "ms_marco_mrr10": 0.45
          },
          "license": "Apache-2.0",
          "description": "High-performance large embedding model with Matryoshka support from Snowflake. Strong retrieval performance."
        },
        {
          "id": "qwen3-embedding-8b",
          "type": "dense",
          "name": "Qwen3-Embedding-8B",
          "huggingface_id": "Qwen/Qwen3-Embedding-8B",
          "organization": "Alibaba",
          "release_date": "2025",
          "architecture": {
            "type": "qwen3",
            "variant": "qwen3-8b-decoder",
            "has_projection": false,
            "projection_dims": null
          },
          "pooling": {
            "strategy": "mean",
            "normalize": true
          },
          "specs": {
            "parameters": "8B",
            "embedding_dim": {
              "default": 4096,
              "matryoshka": {
                "min": 32,
                "max": 4096,
                "supported": [32, 64, 128, 256, 512, 1024, 2048, 4096],
                "strategy": "truncate_pooled"
              }
            },
            "hidden_dim": 4096,
            "context_length": 32768,
            "max_position_embeddings": 32768,
            "vocab_size": 151936
          },
          "files": {
            "tokenizer": "tokenizer.json",
            "config": "config.json",
            "weights": {
              "safetensors": "model.safetensors",
              "pytorch": "pytorch_model.bin"
            }
          },
          "capabilities": {
            "languages": ["en", "zh", "multilingual"],
            "modalities": ["text"],
            "multi_vector": false,
            "quantization": ["fp32", "fp16", "int8", "int4"]
          },
          "performance": {
            "beir_avg": 0.0,
            "ms_marco_mrr10": 0.0
          },
          "license": "Apache-2.0",
          "description": "State-of-the-art multilingual dense retrieval model based on Qwen3-8B with bidirectional attention. Ranks #1 on MTEB multilingual leaderboard with score of 70.58. Supports Matryoshka dimensions from 32 to 4096 and instruction-aware embeddings across 100+ languages."
        },
        {
          "id": "qwen3-embedding-4b",
          "type": "dense",
          "name": "Qwen3-Embedding-4B",
          "huggingface_id": "Qwen/Qwen3-Embedding-4B",
          "organization": "Alibaba",
          "release_date": "2025",
          "architecture": {
            "type": "qwen3",
            "variant": "qwen3-4b-decoder",
            "has_projection": false,
            "projection_dims": null
          },
          "pooling": {
            "strategy": "mean",
            "normalize": true
          },
          "specs": {
            "parameters": "4B",
            "embedding_dim": {
              "default": 2560,
              "matryoshka": {
                "min": 32,
                "max": 2560,
                "supported": [32, 64, 128, 256, 512, 1024, 2048, 2560],
                "strategy": "truncate_pooled"
              }
            },
            "hidden_dim": 2560,
            "context_length": 32768,
            "max_position_embeddings": 32768,
            "vocab_size": 151936
          },
          "files": {
            "tokenizer": "tokenizer.json",
            "config": "config.json",
            "weights": {
              "safetensors": "model.safetensors",
              "pytorch": "pytorch_model.bin"
            }
          },
          "capabilities": {
            "languages": ["en", "zh", "multilingual"],
            "modalities": ["text"],
            "multi_vector": false,
            "quantization": ["fp32", "fp16", "int8", "int4"]
          },
          "performance": {
            "beir_avg": 0.0,
            "ms_marco_mrr10": 0.0
          },
          "license": "Apache-2.0",
          "description": "Efficient multilingual dense retrieval model based on Qwen3-4B. Achieves 69.45 MTEB score with strong retrieval performance. Supports Matryoshka dimensions from 32 to 2560 and instruction-aware embeddings across 100+ languages."
        },
        {
          "id": "qwen3-embedding-0.6b",
          "type": "dense",
          "name": "Qwen3-Embedding-0.6B",
          "huggingface_id": "Qwen/Qwen3-Embedding-0.6B",
          "organization": "Alibaba",
          "release_date": "2025",
          "architecture": {
            "type": "qwen3",
            "variant": "qwen3-0.6b-decoder",
            "has_projection": false,
            "projection_dims": null
          },
          "pooling": {
            "strategy": "mean",
            "normalize": true
          },
          "specs": {
            "parameters": "0.6B",
            "embedding_dim": {
              "default": 1024,
              "matryoshka": {
                "min": 32,
                "max": 1024,
                "supported": [32, 64, 128, 256, 512, 1024],
                "strategy": "truncate_pooled"
              }
            },
            "hidden_dim": 1024,
            "context_length": 32768,
            "max_position_embeddings": 32768,
            "vocab_size": 151936
          },
          "files": {
            "tokenizer": "tokenizer.json",
            "config": "config.json",
            "weights": {
              "safetensors": "model.safetensors",
              "pytorch": "pytorch_model.bin"
            }
          },
          "capabilities": {
            "languages": ["en", "zh", "multilingual"],
            "modalities": ["text"],
            "multi_vector": false,
            "quantization": ["fp32", "fp16", "int8", "int4"]
          },
          "performance": {
            "beir_avg": 0.0,
            "ms_marco_mrr10": 0.0
          },
          "license": "Apache-2.0",
          "description": "Compact multilingual dense retrieval model based on Qwen3-0.6B with 28 layers. Achieves 64.33 MTEB score with efficient inference. Supports Matryoshka dimensions from 32 to 1024 and instruction-aware embeddings across 100+ languages."
        },
        {
          "id": "jina-embeddings-v3",
          "type": "dense",
          "name": "Jina Embeddings v3",
          "huggingface_id": "jinaai/jina-embeddings-v3",
          "organization": "Jina AI",
          "release_date": "2024",
          "architecture": {
            "type": "xlm-roberta",
            "variant": "jina-xlm-roberta",
            "has_projection": false,
            "projection_dims": null
          },
          "pooling": {
            "strategy": "mean",
            "normalize": true
          },
          "specs": {
            "parameters": "570M",
            "embedding_dim": {
              "default": 1024,
              "matryoshka": {
                "min": 32,
                "max": 1024,
                "supported": [32, 64, 128, 256, 512, 768, 1024],
                "strategy": "truncate_pooled"
              }
            },
            "hidden_dim": 1024,
            "context_length": 8192,
            "max_position_embeddings": 8192,
            "vocab_size": 250002
          },
          "files": {
            "tokenizer": "tokenizer.json",
            "config": "config.json",
            "weights": {
              "safetensors": "model.safetensors",
              "pytorch": "pytorch_model.bin"
            }
          },
          "capabilities": {
            "languages": ["en", "zh", "de", "fr", "es", "it", "pt", "ja", "ko", "ar", "ru", "multilingual"],
            "modalities": ["text"],
            "multi_vector": false,
            "quantization": ["fp32", "fp16", "int8"]
          },
          "performance": {
            "beir_avg": 0.0,
            "ms_marco_mrr10": 0.0
          },
          "license": "Apache-2.0",
          "description": "Frontier multilingual embedding model with 570M parameters based on XLM-RoBERTa with 24 layers. Features task-specific LoRA adapters for retrieval, classification, and text-matching. Supports 89 languages with extended 8K context using RoPE. Ranks 2nd on MTEB English leaderboard for models under 1B parameters."
        }
      ]
    },
    "sparse": {
      "description": "Sparse models with vocabulary-sized vectors (SPLADE, uniCOIL)",
      "models": [
        {
          "id": "splade-v3",
          "type": "sparse",
          "name": "SPLADE v3",
          "huggingface_id": "naver/splade-v3",
          "organization": "Naver Labs",
          "release_date": "2023",
          "architecture": {
            "type": "bert",
            "variant": "distilbert-base",
            "has_projection": false,
            "projection_dims": null
          },
          "specs": {
            "parameters": "66M",
            "embedding_dim": 30522,
            "hidden_dim": 768,
            "context_length": 512,
            "max_position_embeddings": 512,
            "vocab_size": 30522
          },
          "files": {
            "tokenizer": "tokenizer.json",
            "config": "config.json",
            "weights": {
              "safetensors": "model.safetensors",
              "pytorch": "pytorch_model.bin"
            }
          },
          "capabilities": {
            "languages": ["en"],
            "modalities": ["text"],
            "multi_vector": false,
            "quantization": ["fp32", "fp16"]
          },
          "performance": {
            "beir_avg": 0.49,
            "ms_marco_mrr10": 0.38
          },
          "license": "CC-BY-NC-SA-4.0",
          "description": "Sparse lexical retrieval using learned term expansion. Vocabulary-sized vectors with 99.82% sparsity for efficient inverted index storage."
        },
        {
          "id": "minicoil-v1",
          "type": "sparse",
          "name": "miniCOIL v1",
          "huggingface_id": "Qdrant/minicoil-v1",
          "organization": "Qdrant",
          "release_date": "2024",
          "architecture": {
            "type": "bert",
            "variant": "distilbert-base",
            "has_projection": true,
            "projection_dims": 4
          },
          "specs": {
            "parameters": "66M",
            "embedding_dim": 4,
            "hidden_dim": 768,
            "context_length": 512,
            "max_position_embeddings": 512,
            "vocab_size": 30522
          },
          "files": {
            "tokenizer": "tokenizer.json",
            "config": "config.json",
            "weights": {
              "safetensors": "model.safetensors",
              "pytorch": "pytorch_model.bin"
            }
          },
          "capabilities": {
            "languages": ["en"],
            "modalities": ["text"],
            "multi_vector": false,
            "quantization": ["fp32", "fp16", "int8"]
          },
          "performance": {
            "beir_avg": 0.46,
            "ms_marco_mrr10": 0.35
          },
          "license": "Apache-2.0",
          "description": "Compact sparse retrieval model with 4-dimensional term vectors. Highly efficient for inverted index storage with competitive performance."
        },
        {
          "id": "splade-pp-en-v1",
          "type": "sparse",
          "name": "SPLADE++ EN v1",
          "huggingface_id": "prithivida/Splade_PP_en_v1",
          "organization": "prithivida",
          "release_date": "2024",
          "architecture": {
            "type": "bert",
            "variant": "bert-base-uncased",
            "has_projection": false,
            "has_mlm_head": true
          },
          "specs": {
            "parameters": "109M",
            "embedding_dim": 30522,
            "hidden_dim": 768,
            "context_length": 512,
            "max_position_embeddings": 512,
            "vocab_size": 30522
          },
          "files": {
            "tokenizer": "tokenizer.json",
            "config": "config.json",
            "weights": {
              "safetensors": "model.safetensors",
              "pytorch": "pytorch_model.bin",
              "onnx": "onnx/model.onnx"
            }
          },
          "capabilities": {
            "languages": ["en"],
            "modalities": ["text"],
            "multi_vector": false,
            "quantization": []
          },
          "performance": {
            "beir_avg": 0.0,
            "ms_marco_mrr10": 0.3722
          },
          "license": "apache-2.0",
          "description": "SPLADE++ efficient sparse embedding model with automatic token expansion for retrieval tasks. Uses BERT-base with MLM head and restrictive FLOPS schedule (doc:128, query:24 tokens)."
        },
        {
          "id": "splade-pp-en-v2",
          "type": "sparse",
          "name": "SPLADE++ EN v2",
          "huggingface_id": "prithivida/Splade_PP_en_v2",
          "organization": "prithivida",
          "release_date": "2024",
          "architecture": {
            "type": "bert",
            "variant": "bert-base-uncased",
            "has_projection": false,
            "has_mlm_head": true
          },
          "specs": {
            "parameters": "109M",
            "embedding_dim": 30522,
            "hidden_dim": 768,
            "context_length": 512,
            "max_position_embeddings": 512,
            "vocab_size": 30522
          },
          "files": {
            "tokenizer": "tokenizer.json",
            "config": "config.json",
            "weights": {
              "safetensors": "model.safetensors",
              "pytorch": "pytorch_model.bin",
              "onnx": "onnx/model.onnx"
            }
          },
          "capabilities": {
            "languages": ["en"],
            "modalities": ["text"],
            "multi_vector": false,
            "quantization": []
          },
          "performance": {
            "beir_avg": 0.0,
            "ms_marco_mrr10": 0.378
          },
          "license": "apache-2.0",
          "description": "Improved SPLADE++ v2 with middle-trained BERT-base (MLM loss) for better corpus awareness. Achieves 37.8 MRR@10 with efficient token budget and 48.81ms retrieval latency."
        }
      ]
    },
    "timeseries": {
      "description": "Time series foundation models (TimesFM, Chronos)",
      "models": [
        {
          "id": "timesfm-1.0-200m",
          "type": "timeseries",
          "name": "TimesFM 1.0 200M",
          "huggingface_id": "google/timesfm-1.0-200m",
          "organization": "Google Research",
          "release_date": "2024",
          "architecture": {
            "type": "decoder-transformer",
            "variant": "patched-decoder",
            "has_projection": false,
            "projection_dims": null
          },
          "specs": {
            "parameters": "200M",
            "embedding_dim": 1280,
            "hidden_dim": 1280,
            "context_length": 512,
            "max_position_embeddings": 512,
            "vocab_size": 0
          },
          "files": {
            "tokenizer": "tokenizer.json",
            "config": "config.json",
            "weights": {
              "safetensors": "model.safetensors",
              "pytorch": "pytorch_model.bin"
            }
          },
          "capabilities": {
            "languages": [],
            "modalities": ["timeseries"],
            "multi_vector": false,
            "quantization": ["fp32", "fp16", "int8"]
          },
          "performance": {
            "beir_avg": 0.0,
            "ms_marco_mrr10": 0.0
          },
          "license": "Apache-2.0",
          "description": "Pre-trained time series foundation model using decoder-only transformer. Supports context of 512 time points with strong zero-shot forecasting."
        },
        {
          "id": "chronos-bolt-small",
          "type": "timeseries",
          "name": "Chronos Bolt Small",
          "huggingface_id": "amazon/chronos-bolt-small",
          "organization": "Amazon",
          "release_date": "2024",
          "architecture": {
            "type": "t5",
            "variant": "t5-encoder-decoder",
            "has_projection": true,
            "projection_dims": 512
          },
          "specs": {
            "parameters": "48M",
            "embedding_dim": 512,
            "hidden_dim": 2048,
            "context_length": 2048,
            "prediction_length": 64,
            "max_position_embeddings": 2048,
            "vocab_size": 2,
            "num_encoder_layers": 6,
            "num_decoder_layers": 6,
            "num_heads": 8,
            "d_kv": 64,
            "patch_size": 32,
            "num_quantiles": 9
          },
          "files": {
            "tokenizer": "tokenizer.json",
            "config": "config.json",
            "weights": {
              "safetensors": "model.safetensors",
              "pytorch": "pytorch_model.bin"
            }
          },
          "capabilities": {
            "languages": [],
            "modalities": ["timeseries"],
            "multi_vector": false,
            "probabilistic": true,
            "quantization": ["fp32", "fp16", "int8"]
          },
          "performance": {
            "beir_avg": 0.0,
            "ms_marco_mrr10": 0.0
          },
          "license": "Apache-2.0",
          "description": "Efficient T5 encoder-decoder model for probabilistic time series forecasting. Uses continuous patch-based encoding (NOT discrete tokenization like original Chronos). Predicts 9 quantiles for uncertainty quantification. 250x faster than original Chronos. Trained on 100B observations with direct multi-step forecasting."
        }
      ]
    },
    "geometric": {
      "description": "Non-Euclidean embeddings (hyperbolic, spherical, quaternion)",
      "models": []
    }
  }
}