apiVersion: v1
kind: Pod
metadata:
name: llama-factory
namespace: orign
spec:
containers:
- image: swr.cn-east-317.qdrgznjszx.com/donggang/llama-factory-ascend910:cann8-py310-torch2.2.0-ubuntu18.04
imagePullPolicy: Always
name: llama-factory
ports:
- containerPort: 8000
protocol: TCP
resources:
limits:
memory: 40Gi
nvidia.com/gpu: "1"
requests:
memory: 40Gi
nvidia.com/gpu: "1"
volumeMounts:
- mountPath: /root/.cache/huggingface
name: huggingface-cache
nodeSelector:
gpu-type: l40s-2xlarge
role: gpu
tolerations:
- effect: NoSchedule
key: gpu
operator: Equal
value: "true"
- effect: NoSchedule
key: nvidia.com/gpu
operator: Exists
volumes:
- name: huggingface-cache
persistentVolumeClaim:
claimName: huggingface-cache-pvc