ai00_core/
reload.rs

1use std::path::PathBuf;
2
3use derivative::Derivative;
4use salvo::oapi::ToSchema;
5use serde::{Deserialize, Serialize};
6use web_rwkv::runtime::model::Quant;
7
8use crate::StateId;
9
10#[derive(Debug, Clone, Derivative, Serialize, Deserialize, ToSchema)]
11#[derivative(Default)]
12#[serde(default)]
13pub struct Model {
14    /// Path to the folder containing all models.
15    #[derivative(Default(value = "\"assets/models\".into()"))]
16    #[serde(alias = "model_path")]
17    #[salvo(schema(value_type = String))]
18    pub path: PathBuf,
19    /// Name of the model.
20    #[serde(alias = "model_name")]
21    #[salvo(schema(value_type = String))]
22    pub name: PathBuf,
23    /// Specify layers that needs to be quantized.
24    pub quant: usize,
25    /// Quantization type (`Int8` or `NF4`).
26    #[salvo(schema(value_type = super::sealed::Quant))]
27    pub quant_type: Quant,
28    /// Precision for intermediate tensors (`Fp16` or `Fp32`).
29    pub precision: Precision,
30    /// Maximum tokens to be processed in parallel at once.
31    #[derivative(Default(value = "128"))]
32    pub token_chunk_size: usize,
33    /// Number of states that are cached on GPU.
34    #[derivative(Default(value = "8"))]
35    pub max_batch: usize,
36}
37
38/// Low-rank adaptor.
39#[derive(Debug, Clone, Derivative, Serialize, Deserialize, ToSchema)]
40#[derivative(Default)]
41#[serde(default)]
42pub struct Lora {
43    /// Path to the LoRA.
44    #[salvo(schema(value_type = String))]
45    pub path: PathBuf,
46    /// Blend factor.
47    #[derivative(Default(value = "1.0"))]
48    pub alpha: f32,
49}
50
51/// State-tuned initial state.
52#[derive(Debug, Clone, Derivative, Serialize, Deserialize, ToSchema)]
53#[derivative(Default)]
54#[serde(default)]
55pub struct State {
56    /// Path to the initial state.
57    #[salvo(schema(value_type = String))]
58    pub path: PathBuf,
59    /// Given name for the state.
60    pub name: Option<String>,
61    /// UUID for this state.
62    #[serde(default = "StateId::new")]
63    pub id: StateId,
64    /// If this state should be loaded on startup.
65    pub default: bool,
66}
67
68#[derive(Debug, Derivative, Clone, Serialize, Deserialize, ToSchema)]
69#[derivative(Default)]
70#[serde(default)]
71pub struct Tokenizer {
72    #[derivative(Default(value = "\"assets/tokenizer/rwkv_vocab_v20230424.json\".into()"))]
73    #[salvo(schema(value_type = String))]
74    pub path: PathBuf,
75}
76
77#[derive(Debug, Derivative, Clone, Serialize, Deserialize, ToSchema)]
78#[derivative(Default)]
79#[serde(default)]
80pub struct BnfOption {
81    /// Enable the cache that accelerates the expansion of certain short schemas.
82    #[derivative(Default(value = "true"))]
83    pub enable_bytes_cache: bool,
84    /// The initial nonterminal of the BNF schemas.
85    #[derivative(Default(value = "\"start\".into()"))]
86    pub start_nonterminal: String,
87}
88
89#[derive(Debug, Default, Clone, Copy, Serialize, Deserialize, ToSchema)]
90pub enum Precision {
91    #[default]
92    Fp16,
93    Fp32,
94}
95
96#[derive(Debug, Default, Clone, Copy, Serialize, Deserialize, ToSchema)]
97pub enum AdapterOption {
98    #[default]
99    Auto,
100    Economical,
101    Manual(usize),
102}