{
"version": "1.0",
"model_categories": {
"multi_vector": {
"description": "Multi-vector models outputting one vector per token (ColBERT, ColPali)",
"models": [
{
"id": "colbert-v2",
"type": "colbert",
"name": "ColBERT v2",
"huggingface_id": "colbert-ir/colbertv2.0",
"organization": "Stanford NLP",
"release_date": "2022",
"architecture": {
"type": "bert",
"variant": "bert-base",
"has_projection": true,
"projection_dims": 128
},
"specs": {
"parameters": "110M",
"embedding_dim": 128,
"hidden_dim": 768,
"context_length": 512,
"max_position_embeddings": 512,
"vocab_size": 30522
},
"files": {
"tokenizer": "tokenizer.json",
"config": "config.json",
"weights": {
"safetensors": "model.safetensors",
"pytorch": "pytorch_model.bin"
}
},
"capabilities": {
"languages": ["en"],
"modalities": ["text"],
"multi_vector": true,
"quantization": ["fp32", "fp16", "int8", "binary"]
},
"performance": {
"beir_avg": 0.52,
"ms_marco_mrr10": 0.39
},
"license": "MIT",
"description": "Original ColBERT v2 from Stanford, baseline for late interaction retrieval. Uses BERT-base with projection layer to 128 dimensions."
},
{
"id": "colbert-small",
"type": "colbert",
"name": "ColBERT Small",
"huggingface_id": "answerdotai/answerai-colbert-small-v1",
"organization": "Answer.AI",
"release_date": "2024",
"architecture": {
"type": "distilbert",
"variant": "distilbert-base",
"has_projection": true,
"projection_dims": 96
},
"specs": {
"parameters": "33M",
"embedding_dim": 96,
"hidden_dim": 384,
"context_length": 512,
"max_position_embeddings": 512,
"vocab_size": 30522
},
"files": {
"tokenizer": "tokenizer.json",
"config": "config.json",
"weights": {
"safetensors": "model.safetensors",
"pytorch": "pytorch_model.bin"
}
},
"capabilities": {
"languages": ["en"],
"modalities": ["text"],
"multi_vector": true,
"quantization": ["fp32", "fp16", "int8"]
},
"performance": {
"beir_avg": 0.45,
"ms_marco_mrr10": 0.32
},
"license": "Apache-2.0",
"description": "Compact ColBERT variant based on DistilBERT. Recommended for development and testing due to smaller size and faster inference."
},
{
"id": "jina-colbert-v2",
"type": "colbert",
"name": "Jina ColBERT v2",
"huggingface_id": "jinaai/jina-colbert-v2",
"organization": "Jina AI",
"release_date": "2024",
"architecture": {
"type": "jina-bert",
"variant": "jina-bert-v2-base-en",
"has_projection": false,
"projection_dims": null
},
"specs": {
"parameters": "560M",
"embedding_dim": {
"default": 768,
"matryoshka": {
"min": 64,
"max": 768,
"supported": [64, 96, 128, 256, 384, 512, 768],
"strategy": "truncate_output"
}
},
"hidden_dim": 768,
"context_length": 8192,
"max_position_embeddings": 8192,
"vocab_size": 30528
},
"files": {
"tokenizer": "tokenizer.json",
"config": "config.json",
"weights": {
"safetensors": "model.safetensors",
"pytorch": "pytorch_model.bin"
}
},
"capabilities": {
"languages": ["en", "de", "fr", "es", "it", "pt", "nl", "pl", "ru", "zh", "ja", "ko", "ar", "hi", "th", "tr", "vi", "id", "ms", "fa", "uk", "ro", "cs", "sv", "da", "no", "fi", "el", "he", "bg", "hr", "sk", "sl", "et", "lv", "lt", "hu", "ca", "eu", "gl", "cy", "sq", "mk", "sr", "bs", "mt", "is", "ga", "af", "sw", "zu", "xh", "st", "tn", "ny", "sn", "yo", "ig", "ha", "am", "ti", "om", "so", "mg", "mi", "sm", "to", "fj", "haw", "ht", "qu", "gn", "ay", "tt", "ug", "kk", "ky", "tg", "uz", "tk", "mn", "bo", "dz", "ne", "si", "my", "km", "lo"],
"modalities": ["text"],
"multi_vector": true,
"quantization": ["fp32", "fp16", "int8"]
},
"performance": {
"beir_avg": 0.54,
"ms_marco_mrr10": 0.42
},
"license": "Apache-2.0",
"description": "Multilingual ColBERT supporting 89 languages with extended 8K context length. Supports Matryoshka representations from 64 to 768 dimensions."
},
{
"id": "jina-colbert-v2-96",
"type": "colbert",
"name": "Jina ColBERT v2 (96-dim)",
"huggingface_id": "jinaai/jina-colbert-v2",
"organization": "Jina AI",
"release_date": "2024",
"architecture": {
"type": "jina-bert",
"variant": "jina-bert-v2-base-en",
"has_projection": false,
"projection_dims": null
},
"specs": {
"parameters": "560M",
"embedding_dim": 96,
"hidden_dim": 768,
"context_length": 8192,
"max_position_embeddings": 8192,
"vocab_size": 30528
},
"files": {
"tokenizer": "tokenizer.json",
"config": "config.json",
"weights": {
"safetensors": "model.safetensors",
"pytorch": "pytorch_model.bin"
}
},
"capabilities": {
"languages": ["en", "de", "fr", "es", "it", "pt", "nl", "pl", "ru", "zh", "ja", "ko", "ar", "hi", "th", "tr", "vi", "id", "ms", "fa", "uk", "ro", "cs", "sv", "da", "no", "fi", "el", "he", "bg", "hr", "sk", "sl", "et", "lv", "lt", "hu", "ca", "eu", "gl", "cy", "sq", "mk", "sr", "bs", "mt", "is", "ga", "af", "sw", "zu", "xh", "st", "tn", "ny", "sn", "yo", "ig", "ha", "am", "ti", "om", "so", "mg", "mi", "sm", "to", "fj", "haw", "ht", "qu", "gn", "ay", "tt", "ug", "kk", "ky", "tg", "uz", "tk", "mn", "bo", "dz", "ne", "si", "my", "km", "lo"],
"modalities": ["text"],
"multi_vector": true,
"quantization": ["fp32", "fp16", "int8"]
},
"performance": {
"beir_avg": 0.53,
"ms_marco_mrr10": 0.41
},
"license": "Apache-2.0",
"description": "Jina ColBERT v2 at 96 dimensions for compact storage with minimal quality loss."
},
{
"id": "jina-colbert-v2-64",
"type": "colbert",
"name": "Jina ColBERT v2 (64-dim)",
"huggingface_id": "jinaai/jina-colbert-v2",
"organization": "Jina AI",
"release_date": "2024",
"architecture": {
"type": "jina-bert",
"variant": "jina-bert-v2-base-en",
"has_projection": false,
"projection_dims": null
},
"specs": {
"parameters": "560M",
"embedding_dim": 64,
"hidden_dim": 768,
"context_length": 8192,
"max_position_embeddings": 8192,
"vocab_size": 30528
},
"files": {
"tokenizer": "tokenizer.json",
"config": "config.json",
"weights": {
"safetensors": "model.safetensors",
"pytorch": "pytorch_model.bin"
}
},
"capabilities": {
"languages": ["en", "de", "fr", "es", "it", "pt", "nl", "pl", "ru", "zh", "ja", "ko", "ar", "hi", "th", "tr", "vi", "id", "ms", "fa", "uk", "ro", "cs", "sv", "da", "no", "fi", "el", "he", "bg", "hr", "sk", "sl", "et", "lv", "lt", "hu", "ca", "eu", "gl", "cy", "sq", "mk", "sr", "bs", "mt", "is", "ga", "af", "sw", "zu", "xh", "st", "tn", "ny", "sn", "yo", "ig", "ha", "am", "ti", "om", "so", "mg", "mi", "sm", "to", "fj", "haw", "ht", "qu", "gn", "ay", "tt", "ug", "kk", "ky", "tg", "uz", "tk", "mn", "bo", "dz", "ne", "si", "my", "km", "lo"],
"modalities": ["text"],
"multi_vector": true,
"quantization": ["fp32", "fp16", "int8"]
},
"performance": {
"beir_avg": 0.51,
"ms_marco_mrr10": 0.39
},
"license": "Apache-2.0",
"description": "Jina ColBERT v2 at 64 dimensions for maximum compactness."
},
{
"id": "colpali-v1.2",
"type": "vision-language",
"name": "ColPali v1.2 Merged",
"huggingface_id": "vidore/colpali-v1.2-merged",
"organization": "vidore",
"release_date": "2024",
"architecture": {
"type": "paligemma",
"variant": "paligemma-3b-mix-448",
"has_projection": true,
"projection_dims": 128,
"image_size": 448,
"patch_size": 14
},
"specs": {
"parameters": "3B",
"embedding_dim": 128,
"hidden_dim": 2048,
"context_length": 512,
"max_position_embeddings": 512,
"vocab_size": 257216
},
"files": {
"tokenizer": "tokenizer.json",
"config": "config.json",
"weights": {
"safetensors": "model.safetensors",
"pytorch": "pytorch_model.bin"
}
},
"capabilities": {
"languages": ["multilingual"],
"modalities": ["vision", "text"],
"multi_vector": true,
"quantization": ["fp32", "fp16", "int8"]
},
"performance": {
"beir_avg": 0.0,
"ms_marco_mrr10": 0.0
},
"license": "gemma",
"description": "Vision-language ColBERT model for document retrieval. Encodes page images as multi-vector patch embeddings for OCR-free document search using late interaction. Based on PaliGemma-3B with fixed initialization and right padding."
},
{
"id": "colpali-v1.3-hf",
"type": "vision-language",
"name": "ColPali v1.3 HF",
"huggingface_id": "vidore/colpali-v1.3-hf",
"organization": "vidore",
"release_date": "2024",
"architecture": {
"type": "paligemma",
"variant": "paligemma-3b-mix-448",
"has_projection": true,
"projection_dims": 128,
"image_size": 448,
"patch_size": 14
},
"specs": {
"parameters": "3B",
"embedding_dim": 128,
"hidden_dim": 2048,
"context_length": 512,
"max_position_embeddings": 512,
"vocab_size": 257216
},
"files": {
"tokenizer": "tokenizer.json",
"config": "config.json",
"weights": {
"safetensors": "model.safetensors",
"pytorch": "pytorch_model.bin"
}
},
"capabilities": {
"languages": ["multilingual"],
"modalities": ["vision", "text"],
"multi_vector": true,
"quantization": ["fp32", "fp16", "int8"]
},
"performance": {
"beir_avg": 0.0,
"ms_marco_mrr10": 0.0
},
"license": "gemma",
"description": "Latest ColPali vision-language model for document retrieval. Encodes page images as multi-vector patch embeddings for OCR-free document search using late interaction. Improved performance over v1.2 with average NDCG@5 of 0.546 on ViDoRe benchmark."
},
{
"id": "gte-modern-colbert",
"type": "colbert",
"name": "GTE-ModernColBERT v1",
"huggingface_id": "lightonai/GTE-ModernColBERT-v1",
"organization": "LightOn AI",
"release_date": "2025",
"architecture": {
"type": "modernbert",
"variant": "gte-modernbert-base",
"has_projection": false,
"projection_dims": null
},
"specs": {
"parameters": "130M",
"embedding_dim": 768,
"hidden_dim": 768,
"context_length": 8192,
"max_position_embeddings": 8192,
"vocab_size": 50370
},
"files": {
"tokenizer": "tokenizer.json",
"config": "config.json",
"weights": {
"safetensors": "model.safetensors",
"pytorch": "pytorch_model.bin"
}
},
"capabilities": {
"languages": ["en"],
"modalities": ["text"],
"multi_vector": true,
"quantization": ["fp32", "fp16", "int8"]
},
"performance": {
"beir_avg": 0.68,
"ms_marco_mrr10": 0.75
},
"license": "Apache-2.0",
"description": "Modern ColBERT model based on ModernBERT architecture with improved reasoning performance. Uses global-local attention and extended 8K context length."
},
{
"id": "bge-m3-multi",
"type": "unified",
"name": "BGE-M3 (Multi-Vector Mode)",
"huggingface_id": "BAAI/bge-m3",
"organization": "BAAI",
"release_date": "2024",
"architecture": {
"type": "xlm-roberta",
"variant": "xlm-roberta-large",
"has_projection": false,
"projection_dims": null
},
"specs": {
"parameters": "568M",
"embedding_dim": 1024,
"hidden_dim": 1024,
"context_length": 8192,
"max_position_embeddings": 8192,
"vocab_size": 250002
},
"files": {
"tokenizer": "tokenizer.json",
"config": "config.json",
"weights": {
"safetensors": "model.safetensors",
"pytorch": "pytorch_model.bin"
}
},
"capabilities": {
"languages": ["en", "zh", "es", "fr", "de", "ar", "hi", "ja", "ko", "ru", "th", "tr", "vi", "id", "ms", "fa", "uk", "ro", "cs", "sv", "da", "no", "fi", "el", "he", "bg", "hr", "sk", "sl", "et", "lv", "lt", "hu", "ca", "eu", "gl", "cy", "sq", "mk", "sr", "bs", "mt", "is", "ga", "af", "sw", "zu", "xh", "st", "tn", "ny", "sn", "yo", "ig", "ha", "am", "ti", "om", "so", "mg", "mi", "sm", "to", "fj", "haw", "ht", "qu", "gn", "ay", "tt", "ug", "kk", "ky", "tg", "uz", "tk", "mn", "bo", "dz", "ne", "si", "my", "km", "lo", "pl", "it", "pt", "nl"],
"modalities": ["text"],
"multi_vector": true,
"quantization": ["fp32", "fp16", "int8"]
},
"performance": {
"beir_avg": 0.55,
"ms_marco_mrr10": 0.44
},
"license": "MIT",
"description": "Unified embedding model supporting dense, sparse, and multi-vector representations. Supports 100+ languages with 8K context. 1024 dimensions per token in multi-vector mode."
}
]
},
"dense": {
"description": "Dense models outputting single vector per input (BERT, GTE, E5, etc.)",
"models": [
{
"id": "gte-qwen2-7b",
"type": "dense",
"name": "GTE-Qwen2-7B",
"huggingface_id": "Alibaba-NLP/gte-Qwen2-7B-instruct",
"organization": "Alibaba",
"release_date": "2024",
"architecture": {
"type": "qwen2",
"variant": "qwen2-7b-decoder",
"has_projection": false,
"projection_dims": null
},
"pooling": {
"strategy": "mean",
"normalize": true
},
"specs": {
"parameters": "7B",
"embedding_dim": {
"default": 3584,
"matryoshka": {
"min": 512,
"max": 3584,
"supported": [512, 1024, 2048, 3584],
"strategy": "truncate_pooled"
}
},
"hidden_dim": 3584,
"context_length": 32768,
"max_position_embeddings": 32768,
"vocab_size": 151936
},
"files": {
"tokenizer": "tokenizer.json",
"config": "config.json",
"weights": {
"safetensors": "model.safetensors",
"pytorch": "pytorch_model.bin"
}
},
"capabilities": {
"languages": ["en", "zh", "multilingual"],
"modalities": ["text"],
"multi_vector": false,
"quantization": ["fp32", "fp16", "int8", "int4"]
},
"performance": {
"beir_avg": 0.62,
"ms_marco_mrr10": 0.51
},
"license": "Apache-2.0",
"description": "State-of-the-art dense retrieval model based on Qwen2-7B decoder with bidirectional attention. Supports Matryoshka dimensions from 512 to 3584."
},
{
"id": "nomic-embed-v1.5",
"type": "dense",
"name": "Nomic Embed v1.5",
"huggingface_id": "nomic-ai/nomic-embed-text-v1.5",
"organization": "Nomic AI",
"release_date": "2024",
"architecture": {
"type": "bert",
"variant": "nomic-bert",
"has_projection": false,
"projection_dims": null
},
"pooling": {
"strategy": "mean",
"normalize": true
},
"specs": {
"parameters": "137M",
"embedding_dim": {
"default": 768,
"matryoshka": {
"min": 64,
"max": 768,
"supported": [64, 128, 256, 512, 768],
"strategy": "truncate_pooled"
}
},
"hidden_dim": 768,
"context_length": 8192,
"max_position_embeddings": 8192,
"vocab_size": 30528
},
"files": {
"tokenizer": "tokenizer.json",
"config": "config.json",
"weights": {
"safetensors": "model.safetensors",
"pytorch": "pytorch_model.bin"
}
},
"capabilities": {
"languages": ["en"],
"modalities": ["text"],
"multi_vector": false,
"quantization": ["fp32", "fp16", "int8"]
},
"performance": {
"beir_avg": 0.54,
"ms_marco_mrr10": 0.43
},
"license": "Apache-2.0",
"description": "Efficient embedding model with strong performance and Matryoshka support. Extended context window of 8K tokens."
},
{
"id": "bge-base-en-v1.5",
"type": "dense",
"name": "BGE-Base-EN-v1.5",
"huggingface_id": "BAAI/bge-base-en-v1.5",
"organization": "BAAI",
"release_date": "2023",
"architecture": {
"type": "bert",
"variant": "bert-base",
"has_projection": false,
"projection_dims": null
},
"pooling": {
"strategy": "mean",
"normalize": true
},
"specs": {
"parameters": "109M",
"embedding_dim": 768,
"hidden_dim": 768,
"context_length": 512,
"max_position_embeddings": 512,
"vocab_size": 30522
},
"files": {
"tokenizer": "tokenizer.json",
"config": "config.json",
"weights": {
"safetensors": "model.safetensors",
"pytorch": "pytorch_model.bin"
}
},
"capabilities": {
"languages": ["en"],
"modalities": ["text"],
"multi_vector": false,
"quantization": ["fp32", "fp16", "int8"]
},
"performance": {
"beir_avg": 0.53,
"ms_marco_mrr10": 0.42
},
"license": "MIT",
"description": "Strong baseline English embedding model from BAAI. Fixed 768-dimensional embeddings with excellent performance."
},
{
"id": "snowflake-arctic-l",
"type": "dense",
"name": "Snowflake Arctic Embed L",
"huggingface_id": "Snowflake/snowflake-arctic-embed-l-v2.0",
"organization": "Snowflake",
"release_date": "2024",
"architecture": {
"type": "bert",
"variant": "bert-large",
"has_projection": false,
"projection_dims": null
},
"pooling": {
"strategy": "mean",
"normalize": true
},
"specs": {
"parameters": "335M",
"embedding_dim": {
"default": 1024,
"matryoshka": {
"min": 256,
"max": 1024,
"supported": [256, 512, 768, 1024],
"strategy": "truncate_pooled"
}
},
"hidden_dim": 1024,
"context_length": 512,
"max_position_embeddings": 512,
"vocab_size": 30522
},
"files": {
"tokenizer": "tokenizer.json",
"config": "config.json",
"weights": {
"safetensors": "model.safetensors",
"pytorch": "pytorch_model.bin"
}
},
"capabilities": {
"languages": ["en"],
"modalities": ["text"],
"multi_vector": false,
"quantization": ["fp32", "fp16", "int8"]
},
"performance": {
"beir_avg": 0.56,
"ms_marco_mrr10": 0.45
},
"license": "Apache-2.0",
"description": "High-performance large embedding model with Matryoshka support from Snowflake. Strong retrieval performance."
},
{
"id": "qwen3-embedding-8b",
"type": "dense",
"name": "Qwen3-Embedding-8B",
"huggingface_id": "Qwen/Qwen3-Embedding-8B",
"organization": "Alibaba",
"release_date": "2025",
"architecture": {
"type": "qwen3",
"variant": "qwen3-8b-decoder",
"has_projection": false,
"projection_dims": null
},
"pooling": {
"strategy": "mean",
"normalize": true
},
"specs": {
"parameters": "8B",
"embedding_dim": {
"default": 4096,
"matryoshka": {
"min": 32,
"max": 4096,
"supported": [32, 64, 128, 256, 512, 1024, 2048, 4096],
"strategy": "truncate_pooled"
}
},
"hidden_dim": 4096,
"context_length": 32768,
"max_position_embeddings": 32768,
"vocab_size": 151936
},
"files": {
"tokenizer": "tokenizer.json",
"config": "config.json",
"weights": {
"safetensors": "model.safetensors",
"pytorch": "pytorch_model.bin"
}
},
"capabilities": {
"languages": ["en", "zh", "multilingual"],
"modalities": ["text"],
"multi_vector": false,
"quantization": ["fp32", "fp16", "int8", "int4"]
},
"performance": {
"beir_avg": 0.0,
"ms_marco_mrr10": 0.0
},
"license": "Apache-2.0",
"description": "State-of-the-art multilingual dense retrieval model based on Qwen3-8B with bidirectional attention. Ranks #1 on MTEB multilingual leaderboard with score of 70.58. Supports Matryoshka dimensions from 32 to 4096 and instruction-aware embeddings across 100+ languages."
},
{
"id": "qwen3-embedding-4b",
"type": "dense",
"name": "Qwen3-Embedding-4B",
"huggingface_id": "Qwen/Qwen3-Embedding-4B",
"organization": "Alibaba",
"release_date": "2025",
"architecture": {
"type": "qwen3",
"variant": "qwen3-4b-decoder",
"has_projection": false,
"projection_dims": null
},
"pooling": {
"strategy": "mean",
"normalize": true
},
"specs": {
"parameters": "4B",
"embedding_dim": {
"default": 2560,
"matryoshka": {
"min": 32,
"max": 2560,
"supported": [32, 64, 128, 256, 512, 1024, 2048, 2560],
"strategy": "truncate_pooled"
}
},
"hidden_dim": 2560,
"context_length": 32768,
"max_position_embeddings": 32768,
"vocab_size": 151936
},
"files": {
"tokenizer": "tokenizer.json",
"config": "config.json",
"weights": {
"safetensors": "model.safetensors",
"pytorch": "pytorch_model.bin"
}
},
"capabilities": {
"languages": ["en", "zh", "multilingual"],
"modalities": ["text"],
"multi_vector": false,
"quantization": ["fp32", "fp16", "int8", "int4"]
},
"performance": {
"beir_avg": 0.0,
"ms_marco_mrr10": 0.0
},
"license": "Apache-2.0",
"description": "Efficient multilingual dense retrieval model based on Qwen3-4B. Achieves 69.45 MTEB score with strong retrieval performance. Supports Matryoshka dimensions from 32 to 2560 and instruction-aware embeddings across 100+ languages."
},
{
"id": "qwen3-embedding-0.6b",
"type": "dense",
"name": "Qwen3-Embedding-0.6B",
"huggingface_id": "Qwen/Qwen3-Embedding-0.6B",
"organization": "Alibaba",
"release_date": "2025",
"architecture": {
"type": "qwen3",
"variant": "qwen3-0.6b-decoder",
"has_projection": false,
"projection_dims": null
},
"pooling": {
"strategy": "mean",
"normalize": true
},
"specs": {
"parameters": "0.6B",
"embedding_dim": {
"default": 1024,
"matryoshka": {
"min": 32,
"max": 1024,
"supported": [32, 64, 128, 256, 512, 1024],
"strategy": "truncate_pooled"
}
},
"hidden_dim": 1024,
"context_length": 32768,
"max_position_embeddings": 32768,
"vocab_size": 151936
},
"files": {
"tokenizer": "tokenizer.json",
"config": "config.json",
"weights": {
"safetensors": "model.safetensors",
"pytorch": "pytorch_model.bin"
}
},
"capabilities": {
"languages": ["en", "zh", "multilingual"],
"modalities": ["text"],
"multi_vector": false,
"quantization": ["fp32", "fp16", "int8", "int4"]
},
"performance": {
"beir_avg": 0.0,
"ms_marco_mrr10": 0.0
},
"license": "Apache-2.0",
"description": "Compact multilingual dense retrieval model based on Qwen3-0.6B with 28 layers. Achieves 64.33 MTEB score with efficient inference. Supports Matryoshka dimensions from 32 to 1024 and instruction-aware embeddings across 100+ languages."
},
{
"id": "jina-embeddings-v3",
"type": "dense",
"name": "Jina Embeddings v3",
"huggingface_id": "jinaai/jina-embeddings-v3",
"organization": "Jina AI",
"release_date": "2024",
"architecture": {
"type": "xlm-roberta",
"variant": "jina-xlm-roberta",
"has_projection": false,
"projection_dims": null
},
"pooling": {
"strategy": "mean",
"normalize": true
},
"specs": {
"parameters": "570M",
"embedding_dim": {
"default": 1024,
"matryoshka": {
"min": 32,
"max": 1024,
"supported": [32, 64, 128, 256, 512, 768, 1024],
"strategy": "truncate_pooled"
}
},
"hidden_dim": 1024,
"context_length": 8192,
"max_position_embeddings": 8192,
"vocab_size": 250002
},
"files": {
"tokenizer": "tokenizer.json",
"config": "config.json",
"weights": {
"safetensors": "model.safetensors",
"pytorch": "pytorch_model.bin"
}
},
"capabilities": {
"languages": ["en", "zh", "de", "fr", "es", "it", "pt", "ja", "ko", "ar", "ru", "multilingual"],
"modalities": ["text"],
"multi_vector": false,
"quantization": ["fp32", "fp16", "int8"]
},
"performance": {
"beir_avg": 0.0,
"ms_marco_mrr10": 0.0
},
"license": "Apache-2.0",
"description": "Frontier multilingual embedding model with 570M parameters based on XLM-RoBERTa with 24 layers. Features task-specific LoRA adapters for retrieval, classification, and text-matching. Supports 89 languages with extended 8K context using RoPE. Ranks 2nd on MTEB English leaderboard for models under 1B parameters."
}
]
},
"sparse": {
"description": "Sparse models with vocabulary-sized vectors (SPLADE, uniCOIL)",
"models": [
{
"id": "splade-v3",
"type": "sparse",
"name": "SPLADE v3",
"huggingface_id": "naver/splade-v3",
"organization": "Naver Labs",
"release_date": "2023",
"architecture": {
"type": "bert",
"variant": "distilbert-base",
"has_projection": false,
"projection_dims": null
},
"specs": {
"parameters": "66M",
"embedding_dim": 30522,
"hidden_dim": 768,
"context_length": 512,
"max_position_embeddings": 512,
"vocab_size": 30522
},
"files": {
"tokenizer": "tokenizer.json",
"config": "config.json",
"weights": {
"safetensors": "model.safetensors",
"pytorch": "pytorch_model.bin"
}
},
"capabilities": {
"languages": ["en"],
"modalities": ["text"],
"multi_vector": false,
"quantization": ["fp32", "fp16"]
},
"performance": {
"beir_avg": 0.49,
"ms_marco_mrr10": 0.38
},
"license": "CC-BY-NC-SA-4.0",
"description": "Sparse lexical retrieval using learned term expansion. Vocabulary-sized vectors with 99.82% sparsity for efficient inverted index storage."
},
{
"id": "minicoil-v1",
"type": "sparse",
"name": "miniCOIL v1",
"huggingface_id": "Qdrant/minicoil-v1",
"organization": "Qdrant",
"release_date": "2024",
"architecture": {
"type": "bert",
"variant": "distilbert-base",
"has_projection": true,
"projection_dims": 4
},
"specs": {
"parameters": "66M",
"embedding_dim": 4,
"hidden_dim": 768,
"context_length": 512,
"max_position_embeddings": 512,
"vocab_size": 30522
},
"files": {
"tokenizer": "tokenizer.json",
"config": "config.json",
"weights": {
"safetensors": "model.safetensors",
"pytorch": "pytorch_model.bin"
}
},
"capabilities": {
"languages": ["en"],
"modalities": ["text"],
"multi_vector": false,
"quantization": ["fp32", "fp16", "int8"]
},
"performance": {
"beir_avg": 0.46,
"ms_marco_mrr10": 0.35
},
"license": "Apache-2.0",
"description": "Compact sparse retrieval model with 4-dimensional term vectors. Highly efficient for inverted index storage with competitive performance."
},
{
"id": "splade-pp-en-v1",
"type": "sparse",
"name": "SPLADE++ EN v1",
"huggingface_id": "prithivida/Splade_PP_en_v1",
"organization": "prithivida",
"release_date": "2024",
"architecture": {
"type": "bert",
"variant": "bert-base-uncased",
"has_projection": false,
"has_mlm_head": true
},
"specs": {
"parameters": "109M",
"embedding_dim": 30522,
"hidden_dim": 768,
"context_length": 512,
"max_position_embeddings": 512,
"vocab_size": 30522
},
"files": {
"tokenizer": "tokenizer.json",
"config": "config.json",
"weights": {
"safetensors": "model.safetensors",
"pytorch": "pytorch_model.bin",
"onnx": "onnx/model.onnx"
}
},
"capabilities": {
"languages": ["en"],
"modalities": ["text"],
"multi_vector": false,
"quantization": []
},
"performance": {
"beir_avg": 0.0,
"ms_marco_mrr10": 0.3722
},
"license": "apache-2.0",
"description": "SPLADE++ efficient sparse embedding model with automatic token expansion for retrieval tasks. Uses BERT-base with MLM head and restrictive FLOPS schedule (doc:128, query:24 tokens)."
},
{
"id": "splade-pp-en-v2",
"type": "sparse",
"name": "SPLADE++ EN v2",
"huggingface_id": "prithivida/Splade_PP_en_v2",
"organization": "prithivida",
"release_date": "2024",
"architecture": {
"type": "bert",
"variant": "bert-base-uncased",
"has_projection": false,
"has_mlm_head": true
},
"specs": {
"parameters": "109M",
"embedding_dim": 30522,
"hidden_dim": 768,
"context_length": 512,
"max_position_embeddings": 512,
"vocab_size": 30522
},
"files": {
"tokenizer": "tokenizer.json",
"config": "config.json",
"weights": {
"safetensors": "model.safetensors",
"pytorch": "pytorch_model.bin",
"onnx": "onnx/model.onnx"
}
},
"capabilities": {
"languages": ["en"],
"modalities": ["text"],
"multi_vector": false,
"quantization": []
},
"performance": {
"beir_avg": 0.0,
"ms_marco_mrr10": 0.378
},
"license": "apache-2.0",
"description": "Improved SPLADE++ v2 with middle-trained BERT-base (MLM loss) for better corpus awareness. Achieves 37.8 MRR@10 with efficient token budget and 48.81ms retrieval latency."
}
]
},
"timeseries": {
"description": "Time series foundation models (TimesFM, Chronos)",
"models": [
{
"id": "timesfm-1.0-200m",
"type": "timeseries",
"name": "TimesFM 1.0 200M",
"huggingface_id": "google/timesfm-1.0-200m",
"organization": "Google Research",
"release_date": "2024",
"architecture": {
"type": "decoder-transformer",
"variant": "patched-decoder",
"has_projection": false,
"projection_dims": null
},
"specs": {
"parameters": "200M",
"embedding_dim": 1280,
"hidden_dim": 1280,
"context_length": 512,
"max_position_embeddings": 512,
"vocab_size": 0
},
"files": {
"tokenizer": "tokenizer.json",
"config": "config.json",
"weights": {
"safetensors": "model.safetensors",
"pytorch": "pytorch_model.bin"
}
},
"capabilities": {
"languages": [],
"modalities": ["timeseries"],
"multi_vector": false,
"quantization": ["fp32", "fp16", "int8"]
},
"performance": {
"beir_avg": 0.0,
"ms_marco_mrr10": 0.0
},
"license": "Apache-2.0",
"description": "Pre-trained time series foundation model using decoder-only transformer. Supports context of 512 time points with strong zero-shot forecasting."
},
{
"id": "chronos-bolt-small",
"type": "timeseries",
"name": "Chronos Bolt Small",
"huggingface_id": "amazon/chronos-bolt-small",
"organization": "Amazon",
"release_date": "2024",
"architecture": {
"type": "t5",
"variant": "t5-encoder-decoder",
"has_projection": true,
"projection_dims": 512
},
"specs": {
"parameters": "48M",
"embedding_dim": 512,
"hidden_dim": 2048,
"context_length": 2048,
"prediction_length": 64,
"max_position_embeddings": 2048,
"vocab_size": 2,
"num_encoder_layers": 6,
"num_decoder_layers": 6,
"num_heads": 8,
"d_kv": 64,
"patch_size": 32,
"num_quantiles": 9
},
"files": {
"tokenizer": "tokenizer.json",
"config": "config.json",
"weights": {
"safetensors": "model.safetensors",
"pytorch": "pytorch_model.bin"
}
},
"capabilities": {
"languages": [],
"modalities": ["timeseries"],
"multi_vector": false,
"probabilistic": true,
"quantization": ["fp32", "fp16", "int8"]
},
"performance": {
"beir_avg": 0.0,
"ms_marco_mrr10": 0.0
},
"license": "Apache-2.0",
"description": "Efficient T5 encoder-decoder model for probabilistic time series forecasting. Uses continuous patch-based encoding (NOT discrete tokenization like original Chronos). Predicts 9 quantiles for uncertainty quantification. 250x faster than original Chronos. Trained on 100B observations with direct multi-step forecasting."
}
]
},
"geometric": {
"description": "Non-Euclidean embeddings (hyperbolic, spherical, quaternion)",
"models": []
}
}
}