rlx_locateanything/
processor_prompt.rs1use crate::config::LocateAnythingConfig;
23use anyhow::{Context, Result};
24use serde::Deserialize;
25use std::path::Path;
26
27#[derive(Debug, Clone, Deserialize)]
28pub struct ProcessorPromptConfig {
29 pub image_start_token: String,
30 pub image_end_token: String,
31 pub image_token: String,
32}
33
34impl ProcessorPromptConfig {
35 pub fn from_model_dir(model_dir: &Path) -> Result<Self> {
36 let path = model_dir.join("processor_config.json");
37 let raw = std::fs::read_to_string(&path).with_context(|| format!("read {path:?}"))?;
38 serde_json::from_str(&raw).with_context(|| format!("parse {path:?}"))
39 }
40
41 pub fn expand_image_placeholder(&self, text: &str, n_image_tokens: usize) -> String {
43 let span = format!(
44 "<image 1>{}{}{}",
45 self.image_start_token,
46 self.image_token.repeat(n_image_tokens),
47 self.image_end_token
48 );
49 text.replace("<image-1>", &span)
50 }
51
52 pub fn build_chat_prompt_string(
54 &self,
55 user_body_with_placeholder: &str,
56 n_image_tokens: usize,
57 ) -> String {
58 let user_expanded =
59 self.expand_image_placeholder(user_body_with_placeholder, n_image_tokens);
60 format!(
61 "<|im_start|>system\nYou are a helpful assistant.\n<|im_end|>\n\
62 <|im_start|>user\n\
63 {user_expanded}\
64 <|im_end|>\n\
65 <|im_start|>assistant\n"
66 )
67 }
68}
69
70#[cfg(feature = "tokenizer")]
76pub fn build_processor_prompt_ids(
77 model_dir: &Path,
78 cfg: &LocateAnythingConfig,
79 tokenizer: &tokenizers::Tokenizer,
80 user_text_with_placeholder: &str,
81 n_image_tokens: usize,
82) -> Result<Vec<u32>> {
83 let proc_cfg = ProcessorPromptConfig::from_model_dir(model_dir)?;
84 let user_body = user_text_with_placeholder
85 .strip_prefix("<image-1>")
86 .unwrap_or(user_text_with_placeholder);
87
88 let mut ids = crate::tokenizer::encode(
89 tokenizer,
90 "<|im_start|>system\nYou are a helpful assistant.\n<|im_end|>\n",
91 )?;
92 ids.extend(crate::tokenizer::encode(tokenizer, "<|im_start|>user\n")?);
93 ids.extend(crate::tokenizer::encode(tokenizer, "<image 1>")?);
94 ids.extend(crate::tokenizer::encode(
95 tokenizer,
96 &proc_cfg.image_start_token,
97 )?);
98 ids.extend(std::iter::repeat_n(cfg.image_token_index, n_image_tokens));
99 ids.extend(crate::tokenizer::encode(
100 tokenizer,
101 &proc_cfg.image_end_token,
102 )?);
103 ids.extend(crate::tokenizer::encode(tokenizer, user_body)?);
104 ids.extend(crate::tokenizer::encode(
105 tokenizer,
106 "<|im_end|>\n<|im_start|>assistant\n",
107 )?);
108 Ok(ids)
109}
110
111#[cfg(feature = "tokenizer")]
112pub fn ground_single_with_image_placeholder(phrase: &str) -> String {
113 format!("<image-1>{}", crate::prompts::ground_single(phrase))
114}