name: codescout-retrieval-matrix
services:
embedder-sparse:
image: ghcr.io/huggingface/text-embeddings-inference:86-1.8
container_name: codescout-matrix-sparse
restart: unless-stopped
command: ["--model-id", "naver/splade-cocondenser-ensembledistil", "--pooling", "splade", "--dtype", "float16", "--auto-truncate"]
ports:
- "127.0.0.1:8091:80"
volumes:
- model_cache:/data
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
networks: [retrieval_net]
embedder-jina-base:
image: ghcr.io/huggingface/text-embeddings-inference:86-1.8
container_name: codescout-matrix-jina-base
restart: unless-stopped
command: ["--model-id", "jinaai/jina-embeddings-v2-base-code", "--dtype", "float16", "--auto-truncate"]
ports:
- "127.0.0.1:8090:80"
shm_size: 2g
volumes:
- model_cache:/data
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
networks: [retrieval_net]
embedder-bge-small:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.6
container_name: codescout-matrix-bge-small
restart: unless-stopped
command: ["--model-id", "BAAI/bge-small-en-v1.5", "--dtype", "float32", "--auto-truncate"]
ports:
- "127.0.0.1:8092:80"
volumes:
- model_cache:/data
networks: [retrieval_net]
embedder-jina-small:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.6
container_name: codescout-matrix-jina-small
restart: unless-stopped
command: ["--model-id", "jinaai/jina-embeddings-v2-small-en", "--dtype", "float32", "--auto-truncate"]
ports:
- "127.0.0.1:8093:80"
volumes:
- model_cache:/data
networks: [retrieval_net]
volumes:
model_cache:
external: true
name: codescout-retrieval_model_cache
networks:
retrieval_net:
external: true
name: codescout-retrieval_retrieval_net