use anyhow::Error;
use serde::{Deserialize, Serialize};
use trustformers_core::errors::{invalid_config, Result};
use trustformers_core::tensor::Tensor;
use trustformers_core::{Config, Layer, Model};
#[cfg(feature = "llama")]
use crate::llama::{LlamaConfig, LlamaModel};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CodeSpecializedConfig {
pub base_config: LlamaConfig,
pub code_vocab_size: Option<usize>,
pub fill_in_middle: bool,
pub supported_languages: Vec<String>,
pub special_tokens: CodeSpecialTokens,
pub code_context_length: usize,
pub hierarchical_attention: bool,
pub model_variant: CodeModelVariant,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CodeSpecialTokens {
pub fim_prefix: String,
pub fim_middle: String,
pub fim_suffix: String,
pub eot_token: String,
pub repo_token: String,
pub file_token: String,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub enum CodeModelVariant {
CodeLlama,
CodeLlamaInstruct,
CodeLlamaPython,
StarCoder,
StarCoderBase,
StarCoder2,
DeepSeekCoder,
DeepSeekCoderInstruct,
QwenCoder,
}
impl Default for CodeSpecialTokens {
fn default() -> Self {
Self {
fim_prefix: "<PRE>".to_string(),
fim_middle: "<MID>".to_string(),
fim_suffix: "<SUF>".to_string(),
eot_token: "<|endoftext|>".to_string(),
repo_token: "<|repo_token|>".to_string(),
file_token: "<|file_token|>".to_string(),
}
}
}
impl Default for CodeSpecializedConfig {
fn default() -> Self {
Self {
base_config: LlamaConfig::default(),
code_vocab_size: None,
fill_in_middle: true,
supported_languages: vec![
"python".to_string(),
"javascript".to_string(),
"typescript".to_string(),
"java".to_string(),
"cpp".to_string(),
"c".to_string(),
"rust".to_string(),
"go".to_string(),
"html".to_string(),
"css".to_string(),
"sql".to_string(),
"bash".to_string(),
],
special_tokens: CodeSpecialTokens::default(),
code_context_length: 16384,
hierarchical_attention: true,
model_variant: CodeModelVariant::CodeLlama,
}
}
}
impl CodeSpecializedConfig {
pub fn code_llama_7b() -> Self {
Self {
base_config: LlamaConfig::code_llama_7b(),
code_vocab_size: Some(32016),
code_context_length: 16384,
model_variant: CodeModelVariant::CodeLlama,
..Self::default()
}
}
pub fn code_llama_13b() -> Self {
Self {
base_config: LlamaConfig {
vocab_size: 32016,
hidden_size: 5120,
intermediate_size: 13824,
num_hidden_layers: 40,
num_attention_heads: 40,
max_position_embeddings: 16384,
..LlamaConfig::llama2_13b()
},
code_vocab_size: Some(32016),
code_context_length: 16384,
model_variant: CodeModelVariant::CodeLlama,
..Self::default()
}
}
pub fn code_llama_34b() -> Self {
Self {
base_config: LlamaConfig {
vocab_size: 32016,
hidden_size: 8192,
intermediate_size: 22016,
num_hidden_layers: 48,
num_attention_heads: 64,
num_key_value_heads: Some(8), max_position_embeddings: 16384,
..LlamaConfig::default()
},
code_vocab_size: Some(32016),
code_context_length: 16384,
model_variant: CodeModelVariant::CodeLlama,
..Self::default()
}
}
pub fn code_llama_7b_instruct() -> Self {
Self {
model_variant: CodeModelVariant::CodeLlamaInstruct,
..Self::code_llama_7b()
}
}
pub fn code_llama_13b_instruct() -> Self {
Self {
model_variant: CodeModelVariant::CodeLlamaInstruct,
..Self::code_llama_13b()
}
}
pub fn code_llama_34b_instruct() -> Self {
Self {
model_variant: CodeModelVariant::CodeLlamaInstruct,
..Self::code_llama_34b()
}
}
pub fn code_llama_7b_python() -> Self {
Self {
supported_languages: vec!["python".to_string()],
model_variant: CodeModelVariant::CodeLlamaPython,
..Self::code_llama_7b()
}
}
pub fn code_llama_13b_python() -> Self {
Self {
supported_languages: vec!["python".to_string()],
model_variant: CodeModelVariant::CodeLlamaPython,
..Self::code_llama_13b()
}
}
pub fn code_llama_34b_python() -> Self {
Self {
supported_languages: vec!["python".to_string()],
model_variant: CodeModelVariant::CodeLlamaPython,
..Self::code_llama_34b()
}
}
pub fn starcoder_15b() -> Self {
Self {
base_config: LlamaConfig {
vocab_size: 49152,
hidden_size: 6144,
intermediate_size: 24576,
num_hidden_layers: 40,
num_attention_heads: 48,
max_position_embeddings: 8192,
..LlamaConfig::default()
},
code_vocab_size: Some(49152),
code_context_length: 8192,
model_variant: CodeModelVariant::StarCoder,
special_tokens: CodeSpecialTokens {
fim_prefix: "<fim_prefix>".to_string(),
fim_middle: "<fim_middle>".to_string(),
fim_suffix: "<fim_suffix>".to_string(),
eot_token: "<|endoftext|>".to_string(),
repo_token: "<reponame>".to_string(),
file_token: "<filename>".to_string(),
},
..Self::default()
}
}
pub fn starcoder_base_15b() -> Self {
Self {
model_variant: CodeModelVariant::StarCoderBase,
..Self::starcoder_15b()
}
}
pub fn starcoder2_7b() -> Self {
Self {
base_config: LlamaConfig {
vocab_size: 49152,
hidden_size: 4096,
intermediate_size: 16384,
num_hidden_layers: 32,
num_attention_heads: 32,
num_key_value_heads: Some(4), max_position_embeddings: 16384,
..LlamaConfig::default()
},
code_vocab_size: Some(49152),
code_context_length: 16384,
model_variant: CodeModelVariant::StarCoder2,
..Self::default()
}
}
pub fn starcoder2_15b() -> Self {
Self {
base_config: LlamaConfig {
vocab_size: 49152,
hidden_size: 6144,
intermediate_size: 24576,
num_hidden_layers: 40,
num_attention_heads: 48,
num_key_value_heads: Some(6), max_position_embeddings: 16384,
..LlamaConfig::default()
},
code_vocab_size: Some(49152),
code_context_length: 16384,
model_variant: CodeModelVariant::StarCoder2,
..Self::default()
}
}
pub fn deepseek_coder_1b() -> Self {
Self {
base_config: LlamaConfig {
vocab_size: 32000,
hidden_size: 2048,
intermediate_size: 5504,
num_hidden_layers: 24,
num_attention_heads: 16,
max_position_embeddings: 16384,
..LlamaConfig::default()
},
code_vocab_size: Some(32000),
code_context_length: 16384,
model_variant: CodeModelVariant::DeepSeekCoder,
..Self::default()
}
}
pub fn deepseek_coder_7b() -> Self {
Self {
base_config: LlamaConfig {
vocab_size: 32000,
hidden_size: 4096,
intermediate_size: 11008,
num_hidden_layers: 32,
num_attention_heads: 32,
max_position_embeddings: 16384,
..LlamaConfig::default()
},
code_vocab_size: Some(32000),
code_context_length: 16384,
model_variant: CodeModelVariant::DeepSeekCoder,
..Self::default()
}
}
pub fn deepseek_coder_33b() -> Self {
Self {
base_config: LlamaConfig {
vocab_size: 32000,
hidden_size: 7168,
intermediate_size: 20480,
num_hidden_layers: 62,
num_attention_heads: 56,
num_key_value_heads: Some(8), max_position_embeddings: 16384,
..LlamaConfig::default()
},
code_vocab_size: Some(32000),
code_context_length: 16384,
model_variant: CodeModelVariant::DeepSeekCoder,
..Self::default()
}
}
pub fn deepseek_coder_1b_instruct() -> Self {
Self {
model_variant: CodeModelVariant::DeepSeekCoderInstruct,
..Self::deepseek_coder_1b()
}
}
pub fn deepseek_coder_7b_instruct() -> Self {
Self {
model_variant: CodeModelVariant::DeepSeekCoderInstruct,
..Self::deepseek_coder_7b()
}
}
pub fn deepseek_coder_33b_instruct() -> Self {
Self {
model_variant: CodeModelVariant::DeepSeekCoderInstruct,
..Self::deepseek_coder_33b()
}
}
pub fn qwen_coder_1_5b() -> Self {
Self {
base_config: LlamaConfig {
vocab_size: 151936,
hidden_size: 1536,
intermediate_size: 8960,
num_hidden_layers: 28,
num_attention_heads: 12,
num_key_value_heads: Some(2),
max_position_embeddings: 131072,
..LlamaConfig::default()
},
code_vocab_size: Some(151936),
code_context_length: 131072,
model_variant: CodeModelVariant::QwenCoder,
..Self::default()
}
}
pub fn qwen_coder_7b() -> Self {
Self {
base_config: LlamaConfig {
vocab_size: 151936,
hidden_size: 3584,
intermediate_size: 18944,
num_hidden_layers: 28,
num_attention_heads: 28,
num_key_value_heads: Some(4),
max_position_embeddings: 131072,
..LlamaConfig::default()
},
code_vocab_size: Some(151936),
code_context_length: 131072,
model_variant: CodeModelVariant::QwenCoder,
..Self::default()
}
}
pub fn qwen_coder_32b() -> Self {
Self {
base_config: LlamaConfig {
vocab_size: 151936,
hidden_size: 5120,
intermediate_size: 27392,
num_hidden_layers: 64,
num_attention_heads: 40,
num_key_value_heads: Some(8),
max_position_embeddings: 131072,
..LlamaConfig::default()
},
code_vocab_size: Some(151936),
code_context_length: 131072,
model_variant: CodeModelVariant::QwenCoder,
..Self::default()
}
}
pub fn from_pretrained_name(name: &str) -> Option<Self> {
match name {
"codellama/CodeLlama-7b-hf" | "code-llama-7b" => Some(Self::code_llama_7b()),
"codellama/CodeLlama-13b-hf" | "code-llama-13b" => Some(Self::code_llama_13b()),
"codellama/CodeLlama-34b-hf" | "code-llama-34b" => Some(Self::code_llama_34b()),
"codellama/CodeLlama-7b-Instruct-hf" | "code-llama-7b-instruct" => {
Some(Self::code_llama_7b_instruct())
},
"codellama/CodeLlama-13b-Instruct-hf" | "code-llama-13b-instruct" => {
Some(Self::code_llama_13b_instruct())
},
"codellama/CodeLlama-34b-Instruct-hf" | "code-llama-34b-instruct" => {
Some(Self::code_llama_34b_instruct())
},
"codellama/CodeLlama-7b-Python-hf" | "code-llama-7b-python" => {
Some(Self::code_llama_7b_python())
},
"codellama/CodeLlama-13b-Python-hf" | "code-llama-13b-python" => {
Some(Self::code_llama_13b_python())
},
"codellama/CodeLlama-34b-Python-hf" | "code-llama-34b-python" => {
Some(Self::code_llama_34b_python())
},
"bigcode/starcoder" | "starcoder-15b" => Some(Self::starcoder_15b()),
"bigcode/starcoderbase" | "starcoder-base-15b" => Some(Self::starcoder_base_15b()),
"bigcode/starcoder2-7b" | "starcoder2-7b" => Some(Self::starcoder2_7b()),
"bigcode/starcoder2-15b" | "starcoder2-15b" => Some(Self::starcoder2_15b()),
"deepseek-ai/deepseek-coder-1.3b-base" | "deepseek-coder-1b" => {
Some(Self::deepseek_coder_1b())
},
"deepseek-ai/deepseek-coder-6.7b-base" | "deepseek-coder-7b" => {
Some(Self::deepseek_coder_7b())
},
"deepseek-ai/deepseek-coder-33b-base" | "deepseek-coder-33b" => {
Some(Self::deepseek_coder_33b())
},
"deepseek-ai/deepseek-coder-1.3b-instruct" | "deepseek-coder-1b-instruct" => {
Some(Self::deepseek_coder_1b_instruct())
},
"deepseek-ai/deepseek-coder-6.7b-instruct" | "deepseek-coder-7b-instruct" => {
Some(Self::deepseek_coder_7b_instruct())
},
"deepseek-ai/deepseek-coder-33b-instruct" | "deepseek-coder-33b-instruct" => {
Some(Self::deepseek_coder_33b_instruct())
},
"Qwen/Qwen2.5-Coder-1.5B" | "qwen-coder-1.5b" => Some(Self::qwen_coder_1_5b()),
"Qwen/Qwen2.5-Coder-7B" | "qwen-coder-7b" => Some(Self::qwen_coder_7b()),
"Qwen/Qwen2.5-Coder-32B" | "qwen-coder-32b" => Some(Self::qwen_coder_32b()),
_ => None,
}
}
pub fn available_models() -> Vec<&'static str> {
vec![
"code-llama-7b",
"code-llama-13b",
"code-llama-34b",
"code-llama-7b-instruct",
"code-llama-13b-instruct",
"code-llama-34b-instruct",
"code-llama-7b-python",
"code-llama-13b-python",
"code-llama-34b-python",
"starcoder-15b",
"starcoder-base-15b",
"starcoder2-7b",
"starcoder2-15b",
"deepseek-coder-1b",
"deepseek-coder-7b",
"deepseek-coder-33b",
"deepseek-coder-1b-instruct",
"deepseek-coder-7b-instruct",
"deepseek-coder-33b-instruct",
"qwen-coder-1.5b",
"qwen-coder-7b",
"qwen-coder-32b",
]
}
pub fn validate(&self) -> Result<()> {
self.base_config.validate()?;
if self.code_context_length == 0 {
return Err(invalid_config(
"code_context_length",
"Code context length must be greater than 0",
));
}
if self.supported_languages.is_empty() {
return Err(invalid_config(
"supported_languages",
"At least one programming language must be supported",
));
}
Ok(())
}
pub fn effective_vocab_size(&self) -> usize {
self.code_vocab_size.unwrap_or(self.base_config.vocab_size)
}
pub fn supports_fim(&self) -> bool {
self.fill_in_middle
}
pub fn supports_language(&self, language: &str) -> bool {
self.supported_languages.iter().any(|lang| lang.eq_ignore_ascii_case(language))
}
pub fn architecture(&self) -> &'static str {
match self.model_variant {
CodeModelVariant::CodeLlama => "CodeLlama",
CodeModelVariant::CodeLlamaInstruct => "CodeLlama-Instruct",
CodeModelVariant::CodeLlamaPython => "CodeLlama-Python",
CodeModelVariant::StarCoder => "StarCoder",
CodeModelVariant::StarCoderBase => "StarCoderBase",
CodeModelVariant::StarCoder2 => "StarCoder2",
CodeModelVariant::DeepSeekCoder => "DeepSeekCoder",
CodeModelVariant::DeepSeekCoderInstruct => "DeepSeekCoder-Instruct",
CodeModelVariant::QwenCoder => "QwenCoder",
}
}
}
pub struct CodeSpecializedModel {
base_model: LlamaModel,
config: CodeSpecializedConfig,
}
impl CodeSpecializedModel {
pub fn new(config: CodeSpecializedConfig) -> Result<Self> {
config.validate()?;
let base_model = LlamaModel::new(config.base_config.clone())?;
Ok(Self { base_model, config })
}
pub fn config(&self) -> &CodeSpecializedConfig {
&self.config
}
pub fn supports_fim(&self) -> bool {
self.config.supports_fim()
}
pub fn supports_language(&self, language: &str) -> bool {
self.config.supports_language(language)
}
pub fn supported_languages(&self) -> &[String] {
&self.config.supported_languages
}
pub fn from_pretrained_name(name: &str) -> Result<Self> {
let config = CodeSpecializedConfig::from_pretrained_name(name)
.ok_or_else(|| Error::msg(format!("Unknown code model: {}", name)))?;
Self::new(config)
}
}
impl Layer for CodeSpecializedModel {
type Input = Vec<u32>; type Output = Tensor;
fn forward(&self, input: Self::Input) -> Result<Self::Output> {
self.base_model.forward(input)
}
}
pub struct CodeSpecializedForCausalLM {
model: CodeSpecializedModel,
lm_head: trustformers_core::layers::Linear,
}
impl CodeSpecializedForCausalLM {
pub fn new(config: CodeSpecializedConfig) -> Result<Self> {
let vocab_size = config.effective_vocab_size();
let hidden_size = config.base_config.hidden_size;
let model = CodeSpecializedModel::new(config)?;
let lm_head = trustformers_core::layers::Linear::new(hidden_size, vocab_size, false);
Ok(Self { model, lm_head })
}
pub fn config(&self) -> &CodeSpecializedConfig {
self.model.config()
}
pub fn from_pretrained_name(name: &str) -> Result<Self> {
let config = CodeSpecializedConfig::from_pretrained_name(name)
.ok_or_else(|| Error::msg(format!("Unknown code model: {}", name)))?;
Self::new(config)
}
}
impl Layer for CodeSpecializedForCausalLM {
type Input = Vec<u32>; type Output = Tensor;
fn forward(&self, input: Self::Input) -> Result<Self::Output> {
let hidden_states = self.model.forward(input)?;
self.lm_head.forward(hidden_states)
}
}
pub type CodeLlamaConfig = CodeSpecializedConfig;
pub type CodeLlamaModel = CodeSpecializedModel;
pub type CodeLlamaForCausalLM = CodeSpecializedForCausalLM;
pub type StarCoderConfig = CodeSpecializedConfig;
pub type StarCoderModel = CodeSpecializedModel;
pub type StarCoderForCausalLM = CodeSpecializedForCausalLM;
pub type DeepSeekCoderConfig = CodeSpecializedConfig;
pub type DeepSeekCoderModel = CodeSpecializedModel;
pub type DeepSeekCoderForCausalLM = CodeSpecializedForCausalLM;
pub type QwenCoderConfig = CodeSpecializedConfig;
pub type QwenCoderModel = CodeSpecializedModel;
pub type QwenCoderForCausalLM = CodeSpecializedForCausalLM;
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_code_specialized_config_creation() {
let config = CodeSpecializedConfig::code_llama_7b();
assert_eq!(config.base_config.vocab_size, 32016);
assert_eq!(config.code_context_length, 16384);
assert_eq!(config.model_variant, CodeModelVariant::CodeLlama);
assert!(config.supports_fim());
}
#[test]
fn test_starcoder_config() {
let config = CodeSpecializedConfig::starcoder_15b();
assert_eq!(config.base_config.vocab_size, 49152);
assert_eq!(config.code_context_length, 8192);
assert_eq!(config.model_variant, CodeModelVariant::StarCoder);
assert_eq!(config.special_tokens.fim_prefix, "<fim_prefix>");
}
#[test]
fn test_deepseek_coder_config() {
let config = CodeSpecializedConfig::deepseek_coder_7b();
assert_eq!(config.base_config.vocab_size, 32000);
assert_eq!(config.code_context_length, 16384);
assert_eq!(config.model_variant, CodeModelVariant::DeepSeekCoder);
}
#[test]
fn test_qwen_coder_config() {
let config = CodeSpecializedConfig::qwen_coder_7b();
assert_eq!(config.base_config.vocab_size, 151936);
assert_eq!(config.code_context_length, 131072);
assert_eq!(config.model_variant, CodeModelVariant::QwenCoder);
}
#[test]
fn test_from_pretrained_name() {
let config = CodeSpecializedConfig::from_pretrained_name("code-llama-7b");
assert!(config.is_some());
let config = config.expect("operation failed");
assert_eq!(config.model_variant, CodeModelVariant::CodeLlama);
let config = CodeSpecializedConfig::from_pretrained_name("starcoder-15b");
assert!(config.is_some());
let config = config.expect("operation failed");
assert_eq!(config.model_variant, CodeModelVariant::StarCoder);
let config = CodeSpecializedConfig::from_pretrained_name("unknown-model");
assert!(config.is_none());
}
#[test]
fn test_available_models() {
let models = CodeSpecializedConfig::available_models();
assert!(models.contains(&"code-llama-7b"));
assert!(models.contains(&"starcoder-15b"));
assert!(models.contains(&"deepseek-coder-7b"));
assert!(models.contains(&"qwen-coder-7b"));
assert!(models.len() >= 20); }
#[test]
fn test_language_support() {
let config = CodeSpecializedConfig::default();
assert!(config.supports_language("python"));
assert!(config.supports_language("Python"));
assert!(config.supports_language("PYTHON"));
assert!(config.supports_language("rust"));
assert!(!config.supports_language("cobol"));
}
#[test]
fn test_python_specialized_config() {
let config = CodeSpecializedConfig::code_llama_7b_python();
assert_eq!(config.supported_languages.len(), 1);
assert!(config.supports_language("python"));
assert!(!config.supports_language("java"));
assert_eq!(config.model_variant, CodeModelVariant::CodeLlamaPython);
}
#[test]
fn test_instruct_variants() {
let config = CodeSpecializedConfig::code_llama_7b_instruct();
assert_eq!(config.model_variant, CodeModelVariant::CodeLlamaInstruct);
let config = CodeSpecializedConfig::deepseek_coder_7b_instruct();
assert_eq!(
config.model_variant,
CodeModelVariant::DeepSeekCoderInstruct
);
}
#[test]
fn test_config_validation() {
let config = CodeSpecializedConfig::default();
assert!(config.validate().is_ok());
let invalid_config = CodeSpecializedConfig {
code_context_length: 0,
..CodeSpecializedConfig::default()
};
assert!(invalid_config.validate().is_err());
let mut invalid_config = CodeSpecializedConfig::default();
invalid_config.supported_languages.clear();
assert!(invalid_config.validate().is_err());
}
#[test]
fn test_architecture_names() {
let config = CodeSpecializedConfig::code_llama_7b();
assert_eq!(config.architecture(), "CodeLlama");
let config = CodeSpecializedConfig::starcoder_15b();
assert_eq!(config.architecture(), "StarCoder");
let config = CodeSpecializedConfig::deepseek_coder_7b_instruct();
assert_eq!(config.architecture(), "DeepSeekCoder-Instruct");
}
#[test]
fn test_effective_vocab_size() {
let config = CodeSpecializedConfig::code_llama_7b();
assert_eq!(config.effective_vocab_size(), 32016);
let mut config = CodeSpecializedConfig {
code_vocab_size: None,
..CodeSpecializedConfig::default()
};
config.base_config.vocab_size = 50000;
assert_eq!(config.effective_vocab_size(), 50000);
}
#[test]
fn test_model_creation() {
let config = CodeSpecializedConfig {
base_config: LlamaConfig {
vocab_size: 1000,
hidden_size: 64,
intermediate_size: 256,
num_hidden_layers: 2,
num_attention_heads: 4,
max_position_embeddings: 512,
..LlamaConfig::default()
},
code_context_length: 512,
..CodeSpecializedConfig::default()
};
let model = CodeSpecializedModel::new(config.clone());
assert!(model.is_ok());
let model = model.expect("operation failed");
assert!(model.supports_fim());
assert!(model.supports_language("python"));
let causal_lm = CodeSpecializedForCausalLM::new(config);
assert!(causal_lm.is_ok());
}
#[test]
fn test_grouped_query_attention_configs() {
let config = CodeSpecializedConfig::code_llama_34b();
assert_eq!(config.base_config.num_key_value_heads, Some(8));
let config = CodeSpecializedConfig::starcoder2_7b();
assert_eq!(config.base_config.num_key_value_heads, Some(4));
let config = CodeSpecializedConfig::deepseek_coder_33b();
assert_eq!(config.base_config.num_key_value_heads, Some(8));
}
#[test]
fn test_context_lengths() {
let config = CodeSpecializedConfig::code_llama_7b();
assert_eq!(config.code_context_length, 16384);
let config = CodeSpecializedConfig::starcoder_15b();
assert_eq!(config.code_context_length, 8192);
let config = CodeSpecializedConfig::qwen_coder_7b();
assert_eq!(config.code_context_length, 131072);
}
}