use crate::config::LocateAnythingConfig;
use anyhow::{Context, Result};
use serde::Deserialize;
use std::path::Path;
#[derive(Debug, Clone, Deserialize)]
pub struct ProcessorPromptConfig {
pub image_start_token: String,
pub image_end_token: String,
pub image_token: String,
}
impl ProcessorPromptConfig {
pub fn from_model_dir(model_dir: &Path) -> Result<Self> {
let path = model_dir.join("processor_config.json");
let raw = std::fs::read_to_string(&path).with_context(|| format!("read {path:?}"))?;
serde_json::from_str(&raw).with_context(|| format!("parse {path:?}"))
}
pub fn expand_image_placeholder(&self, text: &str, n_image_tokens: usize) -> String {
let span = format!(
"<image 1>{}{}{}",
self.image_start_token,
self.image_token.repeat(n_image_tokens),
self.image_end_token
);
text.replace("<image-1>", &span)
}
pub fn build_chat_prompt_string(
&self,
user_body_with_placeholder: &str,
n_image_tokens: usize,
) -> String {
let user_expanded =
self.expand_image_placeholder(user_body_with_placeholder, n_image_tokens);
format!(
"<|im_start|>system\nYou are a helpful assistant.\n<|im_end|>\n\
<|im_start|>user\n\
{user_expanded}\
<|im_end|>\n\
<|im_start|>assistant\n"
)
}
}
#[cfg(feature = "tokenizer")]
pub fn build_processor_prompt_ids(
model_dir: &Path,
cfg: &LocateAnythingConfig,
tokenizer: &tokenizers::Tokenizer,
user_text_with_placeholder: &str,
n_image_tokens: usize,
) -> Result<Vec<u32>> {
let proc_cfg = ProcessorPromptConfig::from_model_dir(model_dir)?;
let user_body = user_text_with_placeholder
.strip_prefix("<image-1>")
.unwrap_or(user_text_with_placeholder);
let mut ids = crate::tokenizer::encode(
tokenizer,
"<|im_start|>system\nYou are a helpful assistant.\n<|im_end|>\n",
)?;
ids.extend(crate::tokenizer::encode(tokenizer, "<|im_start|>user\n")?);
ids.extend(crate::tokenizer::encode(tokenizer, "<image 1>")?);
ids.extend(crate::tokenizer::encode(
tokenizer,
&proc_cfg.image_start_token,
)?);
ids.extend(std::iter::repeat_n(cfg.image_token_index, n_image_tokens));
ids.extend(crate::tokenizer::encode(
tokenizer,
&proc_cfg.image_end_token,
)?);
ids.extend(crate::tokenizer::encode(tokenizer, user_body)?);
ids.extend(crate::tokenizer::encode(
tokenizer,
"<|im_end|>\n<|im_start|>assistant\n",
)?);
Ok(ids)
}
#[cfg(feature = "tokenizer")]
pub fn ground_single_with_image_placeholder(phrase: &str) -> String {
format!("<image-1>{}", crate::prompts::ground_single(phrase))
}