1use std::collections::BTreeMap;
44use std::fs;
45use std::path::Path;
46
47use devboy_core::ToolValueModel;
48use serde::{Deserialize, Serialize};
49use thiserror::Error;
50
51use crate::token_counter::Tokenizer;
52
53#[derive(Error, Debug)]
54pub enum ConfigError {
55 #[error("adaptive-config I/O: {0}")]
56 Io(#[from] std::io::Error),
57 #[error("adaptive-config parse: {0}")]
58 Parse(#[from] toml::de::Error),
59 #[error("adaptive-config serialize: {0}")]
60 Serialize(#[from] toml::ser::Error),
61 #[error("adaptive-config unsupported schema version {0} (expected 1)")]
62 UnsupportedSchemaVersion(u32),
63}
64
65pub type Result<T> = std::result::Result<T, ConfigError>;
66
67pub const CURRENT_SCHEMA_VERSION: u32 = 4;
68
69pub const MIN_SUPPORTED_SCHEMA_VERSION: u32 = 1;
71
72#[derive(Debug, Clone, Serialize, Deserialize)]
74pub struct AdaptiveConfig {
75 #[serde(default = "default_schema_version")]
76 pub schema_version: u32,
77 #[serde(default)]
78 pub dedup: DedupConfig,
79 #[serde(default)]
80 pub templates: TemplatesConfig,
81 #[serde(default)]
82 pub mckp: MckpConfig,
83 #[serde(default)]
84 pub telemetry: TelemetryConfig,
85 #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
87 pub endpoint_overrides: BTreeMap<String, EndpointOverride>,
88 #[serde(default)]
90 pub profiles: ProfilesConfig,
91 #[serde(default)]
93 pub hints: HintsConfig,
94 #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
100 pub tools: BTreeMap<String, ToolValueModel>,
101 #[serde(default)]
104 pub enrichment: EnrichmentConfig,
105}
106
107fn default_schema_version() -> u32 {
108 CURRENT_SCHEMA_VERSION
109}
110
111impl Default for AdaptiveConfig {
112 fn default() -> Self {
113 Self {
114 schema_version: CURRENT_SCHEMA_VERSION,
115 dedup: DedupConfig::default(),
116 templates: TemplatesConfig::default(),
117 mckp: MckpConfig::default(),
118 telemetry: TelemetryConfig::default(),
119 endpoint_overrides: BTreeMap::new(),
120 profiles: ProfilesConfig::default(),
121 hints: HintsConfig::default(),
122 tools: BTreeMap::new(),
123 enrichment: EnrichmentConfig::default(),
124 }
125 }
126}
127
128impl AdaptiveConfig {
129 pub fn load_or_default(path: impl AsRef<Path>) -> Result<Self> {
132 let path = path.as_ref();
133 if !path.exists() {
134 return Ok(Self::default());
135 }
136 let s = fs::read_to_string(path)?;
137 let mut cfg: AdaptiveConfig = toml::from_str(&s)?;
138 cfg.upgrade_in_place()?;
139 Ok(cfg)
140 }
141
142 pub fn load(path: impl AsRef<Path>) -> Result<Self> {
144 let s = fs::read_to_string(path)?;
145 let mut cfg: AdaptiveConfig = toml::from_str(&s)?;
146 cfg.upgrade_in_place()?;
147 Ok(cfg)
148 }
149
150 fn upgrade_in_place(&mut self) -> Result<()> {
161 if self.schema_version > CURRENT_SCHEMA_VERSION {
162 return Err(ConfigError::UnsupportedSchemaVersion(self.schema_version));
163 }
164 if self.schema_version < MIN_SUPPORTED_SCHEMA_VERSION {
165 return Err(ConfigError::UnsupportedSchemaVersion(self.schema_version));
166 }
167 if self.schema_version < CURRENT_SCHEMA_VERSION {
169 self.schema_version = CURRENT_SCHEMA_VERSION;
170 }
171 Ok(())
172 }
173
174 pub fn save(&self, path: impl AsRef<Path>) -> Result<()> {
176 let path = path.as_ref();
177 if let Some(parent) = path.parent() {
178 fs::create_dir_all(parent)?;
179 }
180 let s = toml::to_string_pretty(self)?;
181 let tmp = path.with_extension("toml.tmp");
183 fs::write(&tmp, s)?;
184 fs::rename(&tmp, path)?;
185 Ok(())
186 }
187
188 pub fn effective_dedup_enabled(&self, endpoint: &str) -> bool {
192 if let Some(o) = self.endpoint_overrides.get(endpoint)
193 && let Some(v) = o.dedup_enabled
194 {
195 return v;
196 }
197 self.dedup.enabled_for(endpoint)
198 }
199
200 pub fn effective_min_body_chars(&self, endpoint: &str) -> usize {
203 self.endpoint_overrides
204 .get(endpoint)
205 .and_then(|o| o.min_body_chars)
206 .unwrap_or(self.dedup.min_body_chars)
207 }
208
209 pub fn effective_lru_size(&self, endpoint: &str) -> usize {
214 let per_ep = self
215 .endpoint_overrides
216 .get(endpoint)
217 .and_then(|o| o.lru_size);
218 match per_ep {
219 Some(n) => n.max(self.dedup.lru_size),
220 None => self.dedup.lru_size,
221 }
222 }
223
224 pub fn max_lru_size(&self) -> usize {
228 let mut n = self.dedup.lru_size;
229 for o in self.endpoint_overrides.values() {
230 if let Some(v) = o.lru_size {
231 n = n.max(v);
232 }
233 }
234 n.max(1)
235 }
236
237 pub fn effective_tokenizer_profile(&self) -> &TokenizerProfile {
242 let active = self.profiles.tokenizer.active.as_str();
243 let id = if active == "auto" || active.is_empty() {
244 "anthropic_class"
245 } else {
246 active
247 };
248 self.profiles
249 .tokenizer
250 .variants
251 .get(id)
252 .or_else(|| self.profiles.tokenizer.variants.get("anthropic_class"))
253 .unwrap_or_else(|| {
254 static FALLBACK: std::sync::OnceLock<TokenizerProfile> = std::sync::OnceLock::new();
259 FALLBACK.get_or_init(TokenizerProfile::default)
260 })
261 }
262
263 pub fn effective_token_count(&self, text: &str) -> usize {
268 self.effective_tokenizer_profile().count_tokens(text)
269 }
270
271 pub fn effective_template(&self, endpoint: &str) -> Option<&str> {
274 if let Some(o) = self.endpoint_overrides.get(endpoint)
275 && let Some(t) = o.template_id.as_deref()
276 {
277 return Some(t);
278 }
279 self.templates.template_for(endpoint)
280 }
281
282 pub fn effective_tool_value_model(&self, tool_name: &str) -> Option<&ToolValueModel> {
290 if let Some(m) = self.tools.get(tool_name) {
291 return Some(m);
292 }
293 self.tools.get("*")
294 }
295
296 pub fn merge_right_wins(&mut self, other: AdaptiveConfig) {
299 self.dedup = other.dedup;
300 self.templates = other.templates;
301 self.mckp = other.mckp;
302 self.telemetry = other.telemetry;
303 self.profiles = other.profiles;
304 self.hints = other.hints;
305 for (k, v) in other.endpoint_overrides {
306 self.endpoint_overrides.insert(k, v);
307 }
308 for (k, v) in other.tools {
312 self.tools.insert(k, v);
313 }
314 }
315}
316
317#[derive(Debug, Clone, Serialize, Deserialize)]
320pub struct DedupConfig {
321 #[serde(default = "default_lru_size")]
323 pub lru_size: usize,
324 #[serde(default)]
326 pub hint_verbosity: HintVerbosity,
327 #[serde(default)]
329 pub near_ref_enabled: bool,
330 #[serde(default = "default_min_body_chars")]
332 pub min_body_chars: usize,
333 #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
335 pub enabled_per_endpoint: BTreeMap<String, bool>,
336}
337
338fn default_lru_size() -> usize {
339 5
340}
341fn default_min_body_chars() -> usize {
342 200
343}
344
345impl Default for DedupConfig {
346 fn default() -> Self {
347 Self {
348 lru_size: default_lru_size(),
349 hint_verbosity: HintVerbosity::Standard,
350 near_ref_enabled: false,
351 min_body_chars: default_min_body_chars(),
352 enabled_per_endpoint: BTreeMap::new(),
353 }
354 }
355}
356
357impl DedupConfig {
358 pub fn enabled_for(&self, endpoint: &str) -> bool {
360 self.enabled_per_endpoint
361 .get(endpoint)
362 .copied()
363 .unwrap_or(true)
364 }
365}
366
367#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
373#[serde(rename_all = "snake_case")]
374pub enum HintVerbosity {
375 Terse,
377 #[default]
379 Standard,
380 Verbose,
382}
383
384impl HintVerbosity {
385 pub fn to_runtime(self) -> crate::dedup::HintVerbosity {
388 match self {
389 Self::Terse => crate::dedup::HintVerbosity::Terse,
390 Self::Standard => crate::dedup::HintVerbosity::Standard,
391 Self::Verbose => crate::dedup::HintVerbosity::Verbose,
392 }
393 }
394}
395
396#[derive(Debug, Clone, Serialize, Deserialize)]
399pub struct TemplatesConfig {
400 #[serde(default = "default_active_templates")]
402 pub active: Vec<String>,
403 #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
405 pub endpoint_overrides: BTreeMap<String, String>,
406}
407
408fn default_active_templates() -> Vec<String> {
409 vec![
410 "csv_from_md".to_string(),
411 "pipeline_deep_mckp".to_string(),
412 "mr_diff_fence".to_string(),
413 ]
414}
415
416impl Default for TemplatesConfig {
417 fn default() -> Self {
418 Self {
419 active: default_active_templates(),
420 endpoint_overrides: BTreeMap::new(),
421 }
422 }
423}
424
425impl TemplatesConfig {
426 pub fn is_template_active(&self, id: &str) -> bool {
427 self.active.iter().any(|s| s == id)
428 }
429 pub fn template_for(&self, endpoint: &str) -> Option<&str> {
430 self.endpoint_overrides.get(endpoint).map(String::as_str)
431 }
432}
433
434#[derive(Debug, Clone, Serialize, Deserialize)]
437pub struct MckpConfig {
438 #[serde(default = "default_recursion_depth")]
440 pub recursion_depth: usize,
441 #[serde(default = "default_formats_enabled")]
443 pub formats_enabled: Vec<String>,
444 #[serde(default)]
445 pub shape_thresholds: ShapeThresholds,
446}
447
448fn default_recursion_depth() -> usize {
449 5
450}
451
452fn default_formats_enabled() -> Vec<String> {
453 vec![
454 "csv_from_md".to_string(),
455 "deep_mckp".to_string(),
456 "kv".to_string(),
457 "csv".to_string(),
458 "json_compact".to_string(),
459 ]
460}
461
462impl Default for MckpConfig {
463 fn default() -> Self {
464 Self {
465 recursion_depth: default_recursion_depth(),
466 formats_enabled: default_formats_enabled(),
467 shape_thresholds: ShapeThresholds::default(),
468 }
469 }
470}
471
472impl MckpConfig {
473 pub fn format_enabled(&self, id: &str) -> bool {
474 self.formats_enabled.iter().any(|s| s == id)
475 }
476}
477
478#[derive(Debug, Clone, Serialize, Deserialize)]
479pub struct ShapeThresholds {
480 #[serde(default = "thr_md_cols")]
482 pub markdown_table_min_cols: usize,
483 #[serde(default = "thr_arr_items")]
485 pub array_of_objects_min_items: usize,
486 #[serde(default = "thr_key_stability")]
488 pub array_of_objects_min_key_stability: f32,
489 #[serde(default = "thr_flat_fields")]
491 pub flat_object_min_fields: usize,
492}
493
494fn thr_md_cols() -> usize {
495 2
496}
497fn thr_arr_items() -> usize {
498 4
499}
500fn thr_key_stability() -> f32 {
501 0.7
502}
503fn thr_flat_fields() -> usize {
504 8
505}
506
507impl Default for ShapeThresholds {
508 fn default() -> Self {
509 Self {
510 markdown_table_min_cols: thr_md_cols(),
511 array_of_objects_min_items: thr_arr_items(),
512 array_of_objects_min_key_stability: thr_key_stability(),
513 flat_object_min_fields: thr_flat_fields(),
514 }
515 }
516}
517
518#[derive(Debug, Clone, Serialize, Deserialize)]
521pub struct TelemetryConfig {
522 #[serde(default = "default_telemetry_enabled")]
526 pub enabled: bool,
527 #[serde(default, skip_serializing_if = "Option::is_none")]
530 pub path: Option<String>,
531 #[serde(default = "default_rotate_mib")]
538 pub rotate_mib: u32,
539 #[serde(default = "default_sample_rate")]
541 pub sample_rate: f32,
542 #[serde(default = "default_flush_every")]
544 pub flush_every_n: usize,
545}
546
547fn default_telemetry_enabled() -> bool {
548 false
549}
550fn default_rotate_mib() -> u32 {
551 100
552}
553fn default_sample_rate() -> f32 {
554 1.0
555}
556fn default_flush_every() -> usize {
557 25
558}
559
560impl Default for TelemetryConfig {
561 fn default() -> Self {
562 Self {
563 enabled: default_telemetry_enabled(),
564 path: None,
565 rotate_mib: default_rotate_mib(),
566 sample_rate: default_sample_rate(),
567 flush_every_n: default_flush_every(),
568 }
569 }
570}
571
572#[derive(Debug, Clone, Serialize, Deserialize)]
581pub struct EnrichmentConfig {
582 #[serde(default = "default_enrichment_enabled")]
587 pub enabled: bool,
588
589 #[serde(default = "default_max_parallel_prefetches")]
595 pub max_parallel_prefetches: u32,
596
597 #[serde(default = "default_prefetch_budget_tokens")]
602 pub prefetch_budget_tokens: u32,
603
604 #[serde(default = "default_prefetch_timeout_ms")]
614 pub prefetch_timeout_ms: u32,
615
616 #[serde(default = "default_respect_rate_limits")]
622 pub respect_rate_limits: bool,
623}
624
625fn default_enrichment_enabled() -> bool {
626 false
627}
628fn default_max_parallel_prefetches() -> u32 {
629 3
630}
631fn default_prefetch_budget_tokens() -> u32 {
632 8000
633}
634fn default_prefetch_timeout_ms() -> u32 {
635 1000
636}
637fn default_respect_rate_limits() -> bool {
638 true
639}
640
641impl Default for EnrichmentConfig {
642 fn default() -> Self {
643 Self {
644 enabled: default_enrichment_enabled(),
645 max_parallel_prefetches: default_max_parallel_prefetches(),
646 prefetch_budget_tokens: default_prefetch_budget_tokens(),
647 prefetch_timeout_ms: default_prefetch_timeout_ms(),
648 respect_rate_limits: default_respect_rate_limits(),
649 }
650 }
651}
652
653#[derive(Debug, Clone, Default, Serialize, Deserialize)]
658pub struct EndpointOverride {
659 #[serde(default, skip_serializing_if = "Option::is_none")]
660 pub dedup_enabled: Option<bool>,
661 #[serde(default, skip_serializing_if = "Option::is_none")]
662 pub lru_size: Option<usize>,
663 #[serde(default, skip_serializing_if = "Option::is_none")]
664 pub template_id: Option<String>,
665 #[serde(default, skip_serializing_if = "Option::is_none")]
666 pub min_body_chars: Option<usize>,
667}
668
669#[derive(Debug, Clone, Default, Serialize, Deserialize)]
685pub struct ProfilesConfig {
686 #[serde(default)]
687 pub tokenizer: TokenizerProfilesConfig,
688 #[serde(default)]
689 pub llm: LlmProfilesConfig,
690 #[serde(default)]
691 pub agent: AgentProfilesConfig,
692 #[serde(default)]
693 pub data: DataProfilesConfig,
694}
695
696#[derive(Debug, Clone, Serialize, Deserialize)]
710pub struct TokenizerProfile {
711 pub chars_per_token: f32,
714 #[serde(default)]
717 pub bpe: Tokenizer,
718 #[serde(default = "default_inline_json_cost")]
721 pub inline_json_cost: f32,
722 #[serde(default = "default_toon_overhead")]
725 pub toon_overhead: f32,
726 #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
728 pub format_factors: BTreeMap<String, f32>,
729}
730
731fn default_inline_json_cost() -> f32 {
732 1.0
733}
734fn default_toon_overhead() -> f32 {
735 1.0
736}
737
738impl Default for TokenizerProfile {
739 fn default() -> Self {
740 Self {
741 chars_per_token: 4.0,
742 bpe: Tokenizer::Heuristic,
743 inline_json_cost: default_inline_json_cost(),
744 toon_overhead: default_toon_overhead(),
745 format_factors: BTreeMap::new(),
746 }
747 }
748}
749
750impl TokenizerProfile {
751 pub fn count_tokens(&self, text: &str) -> usize {
756 if text.is_empty() {
757 return 0;
758 }
759 match self.bpe {
760 Tokenizer::Heuristic => {
761 let cpt = if self.chars_per_token > 0.0 {
762 self.chars_per_token as f64
763 } else {
764 3.5
765 };
766 (text.len() as f64 / cpt).ceil() as usize
767 }
768 tk => tk.count(text),
769 }
770 }
771}
772
773#[derive(Debug, Clone, Serialize, Deserialize)]
774pub struct TokenizerProfilesConfig {
775 #[serde(default = "default_active_auto")]
777 pub active: String,
778 #[serde(default = "default_tokenizer_variants")]
779 pub variants: BTreeMap<String, TokenizerProfile>,
780}
781
782fn default_active_auto() -> String {
783 "auto".to_string()
784}
785
786fn default_tokenizer_variants() -> BTreeMap<String, TokenizerProfile> {
787 let mut m = BTreeMap::new();
788 m.insert(
789 "anthropic_class".into(),
790 TokenizerProfile {
791 chars_per_token: 3.5,
792 bpe: Tokenizer::O200kBase,
793 inline_json_cost: 2.2,
794 toon_overhead: 1.13,
795 format_factors: BTreeMap::new(),
796 },
797 );
798 m.insert(
799 "openai_o200k".into(),
800 TokenizerProfile {
801 chars_per_token: 4.0,
802 bpe: Tokenizer::O200kBase,
803 inline_json_cost: 1.0,
804 toon_overhead: 0.60,
805 format_factors: BTreeMap::new(),
806 },
807 );
808 m.insert(
809 "openai_cl100k".into(),
810 TokenizerProfile {
811 chars_per_token: 3.7,
812 bpe: Tokenizer::Cl100kBase,
813 inline_json_cost: 1.0,
814 toon_overhead: 0.60,
815 format_factors: BTreeMap::new(),
816 },
817 );
818 m.insert(
819 "ollama_bpe".into(),
820 TokenizerProfile {
821 chars_per_token: 3.8,
822 bpe: Tokenizer::Heuristic,
823 inline_json_cost: 1.0,
824 toon_overhead: 1.00,
825 format_factors: BTreeMap::new(),
826 },
827 );
828 m
829}
830
831impl Default for TokenizerProfilesConfig {
832 fn default() -> Self {
833 Self {
834 active: default_active_auto(),
835 variants: default_tokenizer_variants(),
836 }
837 }
838}
839
840impl TokenizerProfilesConfig {
841 pub fn get(&self, id: &str) -> Option<&TokenizerProfile> {
843 self.variants.get(id)
844 }
845}
846
847#[derive(Debug, Clone, Serialize, Deserialize)]
850pub struct LlmProfile {
851 pub tokenizer: String,
853 #[serde(default = "default_prefer_explicit_keys")]
857 pub prefer_explicit_keys: bool,
858 #[serde(default = "default_context_window")]
860 pub context_window: u32,
861 #[serde(default, skip_serializing_if = "Option::is_none")]
864 pub max_inline_nested: Option<u32>,
865}
866
867fn default_prefer_explicit_keys() -> bool {
868 true
869}
870fn default_context_window() -> u32 {
871 32_000
872}
873
874impl Default for LlmProfile {
875 fn default() -> Self {
876 Self {
877 tokenizer: "ollama_bpe".to_string(),
878 prefer_explicit_keys: default_prefer_explicit_keys(),
879 context_window: default_context_window(),
880 max_inline_nested: None,
881 }
882 }
883}
884
885#[derive(Debug, Clone, Serialize, Deserialize)]
886pub struct LlmProfilesConfig {
887 #[serde(default = "default_active_auto")]
890 pub active: String,
891 #[serde(default = "default_llm_variants")]
892 pub variants: BTreeMap<String, LlmProfile>,
893}
894
895fn default_llm_variants() -> BTreeMap<String, LlmProfile> {
896 let mut m = BTreeMap::new();
897 m.insert(
898 "default".into(),
899 LlmProfile {
900 tokenizer: "openai_o200k".into(),
901 prefer_explicit_keys: true,
902 context_window: 32_000,
903 max_inline_nested: Some(256),
904 },
905 );
906 m.insert(
907 "glm-5.1".into(),
908 LlmProfile {
909 tokenizer: "anthropic_class".into(),
910 prefer_explicit_keys: true,
911 context_window: 128_000,
912 max_inline_nested: Some(128),
913 },
914 );
915 m.insert(
916 "claude-sonnet-4.6".into(),
917 LlmProfile {
918 tokenizer: "anthropic_class".into(),
919 prefer_explicit_keys: true,
920 context_window: 200_000,
921 max_inline_nested: Some(64),
922 },
923 );
924 m.insert(
925 "gpt-oss:20b".into(),
926 LlmProfile {
927 tokenizer: "ollama_bpe".into(),
928 prefer_explicit_keys: false,
929 context_window: 8_192,
930 max_inline_nested: Some(512),
931 },
932 );
933 m.insert(
934 "gemma4:26b".into(),
935 LlmProfile {
936 tokenizer: "ollama_bpe".into(),
937 prefer_explicit_keys: false,
938 context_window: 8_192,
939 max_inline_nested: Some(512),
940 },
941 );
942 m
943}
944
945impl Default for LlmProfilesConfig {
946 fn default() -> Self {
947 Self {
948 active: default_active_auto(),
949 variants: default_llm_variants(),
950 }
951 }
952}
953
954impl LlmProfilesConfig {
955 pub fn resolve<'a>(&'a self, session_model_id: Option<&str>) -> &'a LlmProfile {
958 let key: &str = if self.active == "auto" {
959 session_model_id.unwrap_or("default")
960 } else {
961 self.active.as_str()
962 };
963 self.variants
964 .get(key)
965 .or_else(|| self.variants.get("default"))
966 .unwrap_or_else(|| {
967 static FALLBACK: std::sync::OnceLock<LlmProfile> = std::sync::OnceLock::new();
970 FALLBACK.get_or_init(LlmProfile::default)
971 })
972 }
973}
974
975#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
978#[serde(rename_all = "snake_case")]
979pub enum Priority {
980 Latency,
981 #[default]
982 Balanced,
983 Accuracy,
984}
985
986#[derive(Debug, Clone, Serialize, Deserialize)]
987pub struct AgentProfile {
988 #[serde(default)]
989 pub priority: Priority,
990 #[serde(default = "default_recursion_depth")]
991 pub mckp_recursion_depth: usize,
992 #[serde(default = "default_hint_aggressiveness")]
994 pub hint_aggressiveness: f32,
995 #[serde(default)]
996 pub near_ref_enabled: bool,
997}
998
999fn default_hint_aggressiveness() -> f32 {
1000 0.5
1001}
1002
1003impl Default for AgentProfile {
1004 fn default() -> Self {
1005 Self {
1006 priority: Priority::Balanced,
1007 mckp_recursion_depth: default_recursion_depth(),
1008 hint_aggressiveness: default_hint_aggressiveness(),
1009 near_ref_enabled: false,
1010 }
1011 }
1012}
1013
1014#[derive(Debug, Clone, Serialize, Deserialize)]
1015pub struct AgentProfilesConfig {
1016 #[serde(default = "default_active_auto")]
1017 pub active: String,
1018 #[serde(default = "default_auto_window")]
1020 pub auto_detect_window: usize,
1021 #[serde(default = "default_agent_variants")]
1022 pub variants: BTreeMap<String, AgentProfile>,
1023}
1024
1025fn default_auto_window() -> usize {
1026 50
1027}
1028
1029fn default_agent_variants() -> BTreeMap<String, AgentProfile> {
1030 let mut m = BTreeMap::new();
1031 m.insert("default".into(), AgentProfile::default());
1032 m.insert(
1033 "file_search_heavy".into(),
1034 AgentProfile {
1035 priority: Priority::Latency,
1036 mckp_recursion_depth: 3,
1037 hint_aggressiveness: 0.3,
1038 near_ref_enabled: false,
1039 },
1040 );
1041 m.insert(
1042 "marathon_refactor".into(),
1043 AgentProfile {
1044 priority: Priority::Accuracy,
1045 mckp_recursion_depth: 7,
1046 hint_aggressiveness: 0.7,
1047 near_ref_enabled: true,
1048 },
1049 );
1050 m
1051}
1052
1053impl Default for AgentProfilesConfig {
1054 fn default() -> Self {
1055 Self {
1056 active: default_active_auto(),
1057 auto_detect_window: default_auto_window(),
1058 variants: default_agent_variants(),
1059 }
1060 }
1061}
1062
1063impl AgentProfilesConfig {
1064 pub fn resolve<'a>(&'a self, stats: &SessionStats) -> &'a AgentProfile {
1067 let key: &str = if self.active == "auto" {
1068 classify_agent(stats)
1069 } else {
1070 self.active.as_str()
1071 };
1072 self.variants
1073 .get(key)
1074 .or_else(|| self.variants.get("default"))
1075 .unwrap_or_else(|| {
1076 static FALLBACK: std::sync::OnceLock<AgentProfile> = std::sync::OnceLock::new();
1077 FALLBACK.get_or_init(AgentProfile::default)
1078 })
1079 }
1080}
1081
1082fn classify_agent(stats: &SessionStats) -> &'static str {
1089 if stats.event_count >= 500 && stats.compaction_count >= 3 {
1090 "marathon_refactor"
1091 } else if stats.event_count <= 200 && stats.read_share >= 0.5 {
1092 "file_search_heavy"
1093 } else {
1094 "default"
1095 }
1096}
1097
1098#[derive(Debug, Clone, Serialize, Deserialize)]
1101pub struct DataProfile {
1102 pub endpoint_pattern: String,
1104 #[serde(default, skip_serializing_if = "Option::is_none")]
1106 pub preferred_format: Option<String>,
1107 #[serde(default, skip_serializing_if = "Vec::is_empty")]
1109 pub hint_set: Vec<String>,
1110}
1111
1112#[derive(Debug, Clone, Serialize, Deserialize)]
1113pub struct DataProfilesConfig {
1114 #[serde(default = "default_active_auto")]
1115 pub active: String,
1116 #[serde(default = "default_data_variants")]
1117 pub variants: BTreeMap<String, DataProfile>,
1118}
1119
1120fn default_data_variants() -> BTreeMap<String, DataProfile> {
1121 let mut m = BTreeMap::new();
1122 m.insert(
1123 "gitlab_issues".into(),
1124 DataProfile {
1125 endpoint_pattern: "mcp__gitlab__get_issues".into(),
1126 preferred_format: Some("csv_from_md".into()),
1127 hint_set: vec!["near_ref".into()],
1128 },
1129 );
1130 m.insert(
1131 "github_pulls".into(),
1132 DataProfile {
1133 endpoint_pattern: "mcp__github__list_pulls".into(),
1134 preferred_format: Some("csv_from_md".into()),
1135 hint_set: vec!["near_ref".into()],
1136 },
1137 );
1138 m.insert(
1139 "k8s_logs".into(),
1140 DataProfile {
1141 endpoint_pattern: "mcp__k8s__get_logs".into(),
1142 preferred_format: Some("pipeline_deep_mckp".into()),
1143 hint_set: vec!["timestamp_ref".into()],
1144 },
1145 );
1146 m.insert(
1147 "mr_diffs".into(),
1148 DataProfile {
1149 endpoint_pattern: "mcp__gitlab__get_mr_diff".into(),
1150 preferred_format: Some("mr_diff_fence".into()),
1151 hint_set: Vec::new(),
1152 },
1153 );
1154 m
1155}
1156
1157impl Default for DataProfilesConfig {
1158 fn default() -> Self {
1159 Self {
1160 active: default_active_auto(),
1161 variants: default_data_variants(),
1162 }
1163 }
1164}
1165
1166impl DataProfilesConfig {
1167 pub fn match_endpoint(&self, endpoint: &str) -> Option<&DataProfile> {
1169 if self.active != "auto" {
1171 return self.variants.get(&self.active);
1172 }
1173 self.variants
1174 .values()
1175 .find(|v| endpoint == v.endpoint_pattern || endpoint.starts_with(&v.endpoint_pattern))
1176 }
1177}
1178
1179#[derive(Debug, Clone, Serialize, Deserialize)]
1182pub struct HintTypeRule {
1183 #[serde(default = "default_true")]
1184 pub enabled: bool,
1185 #[serde(default, skip_serializing_if = "Option::is_none")]
1188 pub max_per_session: Option<u32>,
1189 #[serde(default = "default_any_model")]
1191 pub applies_to_models: Vec<String>,
1192}
1193
1194fn default_true() -> bool {
1195 true
1196}
1197fn default_any_model() -> Vec<String> {
1198 vec!["*".to_string()]
1199}
1200
1201impl Default for HintTypeRule {
1202 fn default() -> Self {
1203 Self {
1204 enabled: true,
1205 max_per_session: None,
1206 applies_to_models: default_any_model(),
1207 }
1208 }
1209}
1210
1211impl HintTypeRule {
1212 pub fn applies_to(&self, model_id: &str) -> bool {
1214 if !self.enabled {
1215 return false;
1216 }
1217 self.applies_to_models
1218 .iter()
1219 .any(|m| m == "*" || m == model_id)
1220 }
1221}
1222
1223#[derive(Debug, Clone, Serialize, Deserialize)]
1224pub struct HintsConfig {
1225 #[serde(default)]
1226 pub default_verbosity: HintVerbosity,
1227 #[serde(default = "default_hint_types")]
1228 pub types: BTreeMap<String, HintTypeRule>,
1229}
1230
1231fn default_hint_types() -> BTreeMap<String, HintTypeRule> {
1232 let mut m = BTreeMap::new();
1233 m.insert(
1234 "near_ref".into(),
1235 HintTypeRule {
1236 enabled: true,
1237 max_per_session: Some(50),
1238 applies_to_models: default_any_model(),
1239 },
1240 );
1241 m.insert(
1242 "timestamp_ref".into(),
1243 HintTypeRule {
1244 enabled: true,
1245 max_per_session: Some(100),
1246 applies_to_models: default_any_model(),
1247 },
1248 );
1249 m.insert(
1250 "delta".into(),
1251 HintTypeRule {
1252 enabled: false, max_per_session: Some(20),
1254 applies_to_models: default_any_model(),
1255 },
1256 );
1257 m.insert(
1258 "schema_explainer".into(),
1260 HintTypeRule {
1261 enabled: false,
1262 max_per_session: None,
1263 applies_to_models: default_any_model(),
1264 },
1265 );
1266 m.insert(
1267 "inline_format_hint".into(),
1269 HintTypeRule {
1270 enabled: true,
1271 max_per_session: Some(10),
1272 applies_to_models: vec!["gpt-oss:20b".into(), "gemma4:26b".into()],
1273 },
1274 );
1275 m
1276}
1277
1278impl Default for HintsConfig {
1279 fn default() -> Self {
1280 Self {
1281 default_verbosity: HintVerbosity::Standard,
1282 types: default_hint_types(),
1283 }
1284 }
1285}
1286
1287impl HintsConfig {
1288 pub fn allow(&self, type_id: &str, model_id: &str) -> bool {
1292 match self.types.get(type_id) {
1293 Some(rule) => rule.applies_to(model_id),
1294 None => false, }
1296 }
1297}
1298
1299#[derive(Debug, Clone, Default)]
1304pub struct SessionStats {
1305 pub event_count: usize,
1306 pub compaction_count: usize,
1307 pub read_share: f32,
1309}
1310
1311#[derive(Debug, Clone, Default)]
1314pub struct SessionContext {
1315 pub model_id: Option<String>,
1316 pub stats: SessionStats,
1317}
1318
1319#[derive(Debug, Clone)]
1322pub struct EffectiveConfig {
1323 pub tokenizer: TokenizerProfile,
1324 pub llm: LlmProfile,
1325 pub agent: AgentProfile,
1326 pub hints: HintsConfig,
1327 pub mckp: MckpConfig,
1330}
1331
1332impl EffectiveConfig {
1333 pub fn resolve(cfg: &AdaptiveConfig, ctx: &SessionContext) -> Self {
1335 let llm = cfg.profiles.llm.resolve(ctx.model_id.as_deref()).clone();
1336 let tokenizer_id = if cfg.profiles.tokenizer.active == "auto" {
1337 llm.tokenizer.as_str()
1338 } else {
1339 cfg.profiles.tokenizer.active.as_str()
1340 };
1341 let tokenizer = cfg
1342 .profiles
1343 .tokenizer
1344 .get(tokenizer_id)
1345 .cloned()
1346 .unwrap_or_default();
1347 let agent = cfg.profiles.agent.resolve(&ctx.stats).clone();
1348 let mut mckp = cfg.mckp.clone();
1349 mckp.recursion_depth = agent.mckp_recursion_depth;
1350 Self {
1351 tokenizer,
1352 llm,
1353 agent,
1354 hints: cfg.hints.clone(),
1355 mckp,
1356 }
1357 }
1358
1359 pub fn preferred_format_for<'a>(
1361 &self,
1362 cfg: &'a AdaptiveConfig,
1363 endpoint: &str,
1364 ) -> Option<&'a str> {
1365 if let Some(dp) = cfg.profiles.data.match_endpoint(endpoint)
1366 && let Some(f) = dp.preferred_format.as_deref()
1367 {
1368 return Some(f);
1369 }
1370 cfg.effective_template(endpoint)
1371 }
1372
1373 pub fn allow_hint(&self, type_id: &str) -> bool {
1376 let model_id = "default"; let _ = model_id;
1379 self.hints.allow(type_id, "*")
1380 }
1381}
1382
1383#[cfg(test)]
1386mod tests {
1387 use super::*;
1388
1389 #[test]
1390 fn default_is_valid() {
1391 let cfg = AdaptiveConfig::default();
1392 assert_eq!(cfg.schema_version, CURRENT_SCHEMA_VERSION);
1393 assert_eq!(cfg.dedup.lru_size, 5);
1394 assert!(cfg.dedup.enabled_for("anything"));
1395 assert!(cfg.templates.is_template_active("csv_from_md"));
1396 assert!(cfg.mckp.format_enabled("deep_mckp"));
1397 }
1398
1399 #[test]
1400 fn roundtrip_toml() {
1401 let mut cfg = AdaptiveConfig::default();
1402 cfg.dedup.lru_size = 7;
1403 cfg.dedup.near_ref_enabled = true;
1404 cfg.dedup
1405 .enabled_per_endpoint
1406 .insert("mcp__test__get".into(), false);
1407 cfg.templates
1408 .endpoint_overrides
1409 .insert("mcp__test__get".into(), "csv_from_md".into());
1410 cfg.endpoint_overrides.insert(
1411 "Bash:git_log".into(),
1412 EndpointOverride {
1413 dedup_enabled: Some(false),
1414 ..Default::default()
1415 },
1416 );
1417
1418 let s = toml::to_string_pretty(&cfg).unwrap();
1419 let parsed: AdaptiveConfig = toml::from_str(&s).unwrap();
1420 assert_eq!(parsed.dedup.lru_size, 7);
1421 assert!(parsed.dedup.near_ref_enabled);
1422 assert!(!parsed.dedup.enabled_for("mcp__test__get"));
1423 assert_eq!(
1424 parsed.templates.template_for("mcp__test__get"),
1425 Some("csv_from_md")
1426 );
1427 }
1428
1429 #[test]
1430 fn unknown_schema_version_is_rejected() {
1431 let cfg = AdaptiveConfig {
1432 schema_version: 99,
1433 ..Default::default()
1434 };
1435 let s = toml::to_string(&cfg).unwrap();
1436 let err = toml::from_str::<AdaptiveConfig>(&s).ok().and_then(|c| {
1437 if c.schema_version != CURRENT_SCHEMA_VERSION {
1438 Some(c.schema_version)
1439 } else {
1440 None
1441 }
1442 });
1443 assert_eq!(err, Some(99));
1444 }
1445
1446 #[test]
1447 fn load_or_default_handles_missing_file() {
1448 let p = std::env::temp_dir().join("definitely_does_not_exist_12345.toml");
1449 let cfg = AdaptiveConfig::load_or_default(&p).unwrap();
1450 assert_eq!(cfg.schema_version, CURRENT_SCHEMA_VERSION);
1451 }
1452
1453 #[test]
1454 fn save_and_load_roundtrip() {
1455 let pid = std::process::id();
1456 let p = std::env::temp_dir().join(format!("devboy_cfg_test_{pid}.toml"));
1457 let mut cfg = AdaptiveConfig::default();
1458 cfg.dedup.lru_size = 10;
1459 cfg.mckp.recursion_depth = 7;
1460 cfg.save(&p).unwrap();
1461 let loaded = AdaptiveConfig::load(&p).unwrap();
1462 assert_eq!(loaded.dedup.lru_size, 10);
1463 assert_eq!(loaded.mckp.recursion_depth, 7);
1464 std::fs::remove_file(&p).ok();
1465 }
1466
1467 #[test]
1470 fn default_profiles_have_expected_variants() {
1471 let cfg = AdaptiveConfig::default();
1472 assert!(cfg.profiles.tokenizer.get("anthropic_class").is_some());
1474 assert!(cfg.profiles.tokenizer.get("openai_o200k").is_some());
1475 assert!(cfg.profiles.tokenizer.get("ollama_bpe").is_some());
1476 assert!(cfg.profiles.llm.variants.contains_key("default"));
1478 assert!(cfg.profiles.llm.variants.contains_key("glm-5.1"));
1479 assert!(cfg.profiles.llm.variants.contains_key("gpt-oss:20b"));
1480 assert!(cfg.profiles.agent.variants.contains_key("default"));
1482 assert!(
1483 cfg.profiles
1484 .agent
1485 .variants
1486 .contains_key("file_search_heavy")
1487 );
1488 assert!(
1489 cfg.profiles
1490 .agent
1491 .variants
1492 .contains_key("marathon_refactor")
1493 );
1494 assert!(cfg.profiles.data.variants.contains_key("gitlab_issues"));
1496 assert!(cfg.profiles.data.variants.contains_key("k8s_logs"));
1497 }
1498
1499 #[test]
1500 fn anthropic_tokenizer_has_inline_json_penalty() {
1501 let cfg = AdaptiveConfig::default();
1502 let p = cfg.profiles.tokenizer.get("anthropic_class").unwrap();
1503 assert!(p.inline_json_cost > 2.0);
1506 assert!((p.toon_overhead - 1.13).abs() < 0.001);
1507 }
1508
1509 #[test]
1510 fn llm_resolve_picks_exact_model_match() {
1511 let cfg = AdaptiveConfig::default();
1512 let p = cfg.profiles.llm.resolve(Some("glm-5.1"));
1513 assert_eq!(p.tokenizer, "anthropic_class");
1514 assert_eq!(p.context_window, 128_000);
1515 }
1516
1517 #[test]
1518 fn llm_resolve_falls_back_to_default_for_unknown() {
1519 let cfg = AdaptiveConfig::default();
1520 let p = cfg.profiles.llm.resolve(Some("unknown-model-xyz"));
1521 assert_eq!(p.tokenizer, "openai_o200k");
1523 }
1524
1525 #[test]
1526 fn agent_classifier_picks_marathon_for_long_session() {
1527 let cfg = AdaptiveConfig::default();
1528 let stats = SessionStats {
1529 event_count: 800,
1530 compaction_count: 5,
1531 read_share: 0.3,
1532 };
1533 let p = cfg.profiles.agent.resolve(&stats);
1534 assert_eq!(p.priority, Priority::Accuracy);
1535 assert_eq!(p.mckp_recursion_depth, 7);
1536 assert!(p.near_ref_enabled);
1537 }
1538
1539 #[test]
1540 fn agent_classifier_picks_file_search_for_short_read_heavy() {
1541 let cfg = AdaptiveConfig::default();
1542 let stats = SessionStats {
1543 event_count: 80,
1544 compaction_count: 0,
1545 read_share: 0.7,
1546 };
1547 let p = cfg.profiles.agent.resolve(&stats);
1548 assert_eq!(p.priority, Priority::Latency);
1549 assert_eq!(p.mckp_recursion_depth, 3);
1550 }
1551
1552 #[test]
1553 fn agent_classifier_default_for_balanced_session() {
1554 let cfg = AdaptiveConfig::default();
1555 let stats = SessionStats {
1556 event_count: 300,
1557 compaction_count: 0,
1558 read_share: 0.4,
1559 };
1560 let p = cfg.profiles.agent.resolve(&stats);
1561 assert_eq!(p.priority, Priority::Balanced);
1562 }
1563
1564 #[test]
1565 fn data_profile_matches_endpoint_prefix() {
1566 let cfg = AdaptiveConfig::default();
1567 let dp = cfg.profiles.data.match_endpoint("mcp__gitlab__get_issues");
1568 assert!(dp.is_some());
1569 assert_eq!(dp.unwrap().preferred_format.as_deref(), Some("csv_from_md"));
1570 }
1571
1572 #[test]
1573 fn data_profile_returns_none_for_unmatched() {
1574 let cfg = AdaptiveConfig::default();
1575 let dp = cfg.profiles.data.match_endpoint("Bash:git_log");
1576 assert!(dp.is_none());
1577 }
1578
1579 #[test]
1580 fn hint_policy_disables_schema_explainer_by_default() {
1581 let cfg = AdaptiveConfig::default();
1584 assert!(!cfg.hints.allow("schema_explainer", "glm-5.1"));
1585 assert!(!cfg.hints.allow("schema_explainer", "gpt-oss:20b"));
1586 }
1587
1588 #[test]
1589 fn hint_policy_inline_format_hint_only_for_local_models() {
1590 let cfg = AdaptiveConfig::default();
1591 assert!(cfg.hints.allow("inline_format_hint", "gpt-oss:20b"));
1592 assert!(cfg.hints.allow("inline_format_hint", "gemma4:26b"));
1593 assert!(!cfg.hints.allow("inline_format_hint", "glm-5.1"));
1594 assert!(!cfg.hints.allow("inline_format_hint", "claude-sonnet-4.6"));
1595 }
1596
1597 #[test]
1598 fn hint_policy_unknown_type_fails_closed() {
1599 let cfg = AdaptiveConfig::default();
1600 assert!(!cfg.hints.allow("never_seen_hint_type", "anything"));
1601 }
1602
1603 #[test]
1604 fn effective_config_resolves_glm_to_anthropic_tokenizer() {
1605 let cfg = AdaptiveConfig::default();
1606 let ctx = SessionContext {
1607 model_id: Some("glm-5.1".to_string()),
1608 stats: SessionStats::default(),
1609 };
1610 let eff = EffectiveConfig::resolve(&cfg, &ctx);
1611 assert_eq!(eff.llm.tokenizer, "anthropic_class");
1612 assert!(eff.tokenizer.inline_json_cost > 2.0);
1613 assert_eq!(eff.llm.context_window, 128_000);
1614 }
1615
1616 #[test]
1617 fn effective_config_recursion_depth_from_agent_profile() {
1618 let cfg = AdaptiveConfig::default();
1619 let ctx = SessionContext {
1620 model_id: Some("gpt-oss:20b".to_string()),
1621 stats: SessionStats {
1622 event_count: 1000,
1623 compaction_count: 5,
1624 read_share: 0.2,
1625 },
1626 };
1627 let eff = EffectiveConfig::resolve(&cfg, &ctx);
1628 assert_eq!(eff.mckp.recursion_depth, 7);
1630 assert_eq!(eff.agent.priority, Priority::Accuracy);
1631 }
1632
1633 #[test]
1634 fn effective_config_preferred_format_from_data_profile() {
1635 let cfg = AdaptiveConfig::default();
1636 let ctx = SessionContext::default();
1637 let eff = EffectiveConfig::resolve(&cfg, &ctx);
1638 let f = eff.preferred_format_for(&cfg, "mcp__gitlab__get_issues");
1639 assert_eq!(f, Some("csv_from_md"));
1640 }
1641
1642 #[test]
1643 fn schema_v1_file_upgrades_to_v2_in_memory() {
1644 let v1 = r#"
1646schema_version = 1
1647
1648[dedup]
1649lru_size = 7
1650
1651[mckp]
1652recursion_depth = 6
1653"#;
1654 let pid = std::process::id();
1655 let p = std::env::temp_dir().join(format!("devboy_cfg_v1_{pid}.toml"));
1656 std::fs::write(&p, v1).unwrap();
1657 let loaded = AdaptiveConfig::load(&p).unwrap();
1658 assert_eq!(loaded.schema_version, CURRENT_SCHEMA_VERSION);
1659 assert_eq!(loaded.dedup.lru_size, 7);
1660 assert_eq!(loaded.mckp.recursion_depth, 6);
1661 assert!(loaded.profiles.tokenizer.get("anthropic_class").is_some());
1663 assert!(loaded.hints.types.contains_key("near_ref"));
1664 std::fs::remove_file(&p).ok();
1665 }
1666
1667 #[test]
1668 fn future_schema_version_is_rejected_on_load() {
1669 let s = format!("schema_version = {}\n[dedup]\n", CURRENT_SCHEMA_VERSION + 1);
1670 let pid = std::process::id();
1671 let p = std::env::temp_dir().join(format!("devboy_cfg_future_{pid}.toml"));
1672 std::fs::write(&p, s).unwrap();
1673 let err = AdaptiveConfig::load(&p);
1674 assert!(matches!(err, Err(ConfigError::UnsupportedSchemaVersion(_))));
1675 std::fs::remove_file(&p).ok();
1676 }
1677
1678 #[test]
1679 fn profiles_roundtrip_through_toml() {
1680 let mut cfg = AdaptiveConfig::default();
1681 cfg.profiles.llm.active = "claude-sonnet-4.6".to_string();
1682 cfg.profiles.agent.active = "marathon_refactor".to_string();
1683 cfg.hints.types.get_mut("near_ref").unwrap().max_per_session = Some(99);
1684 let s = toml::to_string_pretty(&cfg).unwrap();
1685 let parsed: AdaptiveConfig = toml::from_str(&s).unwrap();
1686 assert_eq!(parsed.profiles.llm.active, "claude-sonnet-4.6");
1687 assert_eq!(parsed.profiles.agent.active, "marathon_refactor");
1688 assert_eq!(parsed.hints.types["near_ref"].max_per_session, Some(99));
1689 }
1690
1691 #[test]
1692 fn endpoint_override_roundtrip() {
1693 let mut cfg = AdaptiveConfig::default();
1694 cfg.endpoint_overrides.insert(
1695 "mcp__xxx__yyy".into(),
1696 EndpointOverride {
1697 dedup_enabled: Some(true),
1698 lru_size: Some(10),
1699 template_id: Some("custom".into()),
1700 min_body_chars: Some(50),
1701 },
1702 );
1703 let s = toml::to_string_pretty(&cfg).unwrap();
1704 let parsed: AdaptiveConfig = toml::from_str(&s).unwrap();
1705 let o = parsed.endpoint_overrides.get("mcp__xxx__yyy").unwrap();
1706 assert_eq!(o.lru_size, Some(10));
1707 assert_eq!(o.template_id.as_deref(), Some("custom"));
1708 }
1709
1710 #[test]
1711 fn effective_dedup_enabled_falls_back_correctly() {
1712 let mut cfg = AdaptiveConfig::default();
1713 assert!(cfg.effective_dedup_enabled("anything"));
1715 cfg.dedup.enabled_per_endpoint.insert("a".into(), false);
1717 assert!(!cfg.effective_dedup_enabled("a"));
1718 cfg.endpoint_overrides.insert(
1720 "a".into(),
1721 EndpointOverride {
1722 dedup_enabled: Some(true),
1723 ..Default::default()
1724 },
1725 );
1726 assert!(cfg.effective_dedup_enabled("a"));
1727 }
1728
1729 #[test]
1730 fn effective_min_body_chars_uses_override() {
1731 let mut cfg = AdaptiveConfig::default();
1732 assert_eq!(cfg.effective_min_body_chars("x"), cfg.dedup.min_body_chars);
1733 cfg.endpoint_overrides.insert(
1734 "x".into(),
1735 EndpointOverride {
1736 min_body_chars: Some(42),
1737 ..Default::default()
1738 },
1739 );
1740 assert_eq!(cfg.effective_min_body_chars("x"), 42);
1741 }
1742
1743 #[test]
1744 fn effective_lru_size_uses_override_when_larger() {
1745 let mut cfg = AdaptiveConfig::default();
1746 cfg.dedup.lru_size = 5;
1747 cfg.endpoint_overrides.insert(
1748 "big".into(),
1749 EndpointOverride {
1750 lru_size: Some(15),
1751 ..Default::default()
1752 },
1753 );
1754 assert_eq!(cfg.effective_lru_size("big"), 15);
1756 cfg.endpoint_overrides.insert(
1758 "small".into(),
1759 EndpointOverride {
1760 lru_size: Some(2),
1761 ..Default::default()
1762 },
1763 );
1764 assert_eq!(cfg.effective_lru_size("small"), 5);
1765 }
1766
1767 #[test]
1768 fn max_lru_size_across_all_overrides() {
1769 let mut cfg = AdaptiveConfig::default();
1770 cfg.dedup.lru_size = 5;
1771 cfg.endpoint_overrides.insert(
1772 "a".into(),
1773 EndpointOverride {
1774 lru_size: Some(12),
1775 ..Default::default()
1776 },
1777 );
1778 cfg.endpoint_overrides.insert(
1779 "b".into(),
1780 EndpointOverride {
1781 lru_size: Some(8),
1782 ..Default::default()
1783 },
1784 );
1785 assert_eq!(cfg.max_lru_size(), 12);
1786 }
1787
1788 #[test]
1789 fn effective_template_prefers_endpoint_override() {
1790 let mut cfg = AdaptiveConfig::default();
1791 cfg.templates
1792 .endpoint_overrides
1793 .insert("x".into(), "csv_from_md".into());
1794 assert_eq!(cfg.effective_template("x"), Some("csv_from_md"));
1795 cfg.endpoint_overrides.insert(
1796 "x".into(),
1797 EndpointOverride {
1798 template_id: Some("custom_tpl".into()),
1799 ..Default::default()
1800 },
1801 );
1802 assert_eq!(cfg.effective_template("x"), Some("custom_tpl"));
1803 }
1804
1805 #[test]
1806 fn merge_right_wins_overwrites_sections() {
1807 let mut a = AdaptiveConfig::default();
1808 a.endpoint_overrides.insert(
1809 "keep".into(),
1810 EndpointOverride {
1811 dedup_enabled: Some(false),
1812 ..Default::default()
1813 },
1814 );
1815 let mut b = AdaptiveConfig::default();
1816 b.dedup.lru_size = 42;
1817 b.endpoint_overrides.insert(
1818 "keep".into(),
1819 EndpointOverride {
1820 dedup_enabled: Some(true),
1821 ..Default::default()
1822 },
1823 );
1824 b.endpoint_overrides.insert(
1825 "new".into(),
1826 EndpointOverride {
1827 dedup_enabled: Some(true),
1828 ..Default::default()
1829 },
1830 );
1831 a.merge_right_wins(b);
1832 assert_eq!(a.dedup.lru_size, 42);
1833 assert_eq!(a.endpoint_overrides["keep"].dedup_enabled, Some(true));
1834 assert!(a.endpoint_overrides.contains_key("new"));
1835 }
1836
1837 #[test]
1838 fn hint_verbosity_to_runtime_mapping() {
1839 assert_eq!(
1840 HintVerbosity::Terse.to_runtime(),
1841 crate::dedup::HintVerbosity::Terse
1842 );
1843 assert_eq!(
1844 HintVerbosity::Standard.to_runtime(),
1845 crate::dedup::HintVerbosity::Standard
1846 );
1847 assert_eq!(
1848 HintVerbosity::Verbose.to_runtime(),
1849 crate::dedup::HintVerbosity::Verbose
1850 );
1851 }
1852
1853 #[test]
1854 fn mckp_config_format_disabled_is_respected() {
1855 let mut cfg = MckpConfig::default();
1856 assert!(cfg.format_enabled("csv"));
1857 cfg.formats_enabled = vec![];
1858 assert!(!cfg.format_enabled("csv"));
1859 }
1860
1861 #[test]
1862 fn templates_is_template_active_false_for_unknown() {
1863 let t = TemplatesConfig::default();
1864 assert!(!t.is_template_active("not_a_real_template"));
1865 assert!(t.is_template_active("csv_from_md"));
1866 }
1867
1868 #[test]
1869 fn tokenizer_profile_heuristic_uses_chars_per_token() {
1870 let p = TokenizerProfile {
1871 chars_per_token: 4.0,
1872 bpe: Tokenizer::Heuristic,
1873 ..Default::default()
1874 };
1875 assert_eq!(p.count_tokens("abcdefgh"), 2);
1877 assert_eq!(p.count_tokens(""), 0);
1879 }
1880
1881 #[test]
1882 fn tokenizer_profile_bpe_overrides_heuristic() {
1883 let p = TokenizerProfile {
1884 chars_per_token: 1.0,
1886 bpe: Tokenizer::O200kBase,
1887 ..Default::default()
1888 };
1889 let n = p.count_tokens("hello world");
1891 assert!(n > 0 && n < 5, "BPE should win, got {n}");
1892 }
1893
1894 #[test]
1895 fn default_tokenizer_variants_have_real_bpe_for_modern_models() {
1896 let variants = default_tokenizer_variants();
1897 assert_eq!(
1898 variants.get("anthropic_class").unwrap().bpe,
1899 Tokenizer::O200kBase
1900 );
1901 assert_eq!(
1902 variants.get("openai_o200k").unwrap().bpe,
1903 Tokenizer::O200kBase
1904 );
1905 assert_eq!(
1906 variants.get("openai_cl100k").unwrap().bpe,
1907 Tokenizer::Cl100kBase
1908 );
1909 assert_eq!(
1911 variants.get("ollama_bpe").unwrap().bpe,
1912 Tokenizer::Heuristic
1913 );
1914 }
1915
1916 #[test]
1919 fn schema_v3_default_carries_empty_tools_map() {
1920 let cfg = AdaptiveConfig::default();
1921 assert_eq!(cfg.schema_version, CURRENT_SCHEMA_VERSION);
1922 assert!(cfg.tools.is_empty());
1929 }
1930
1931 #[test]
1932 fn schema_v1_v2_v3_files_upgrade_to_current_with_empty_tools() {
1933 for raw in [
1937 "schema_version = 1\n",
1938 "schema_version = 2\n[profiles.tokenizer]\nactive = \"auto\"\n",
1939 "schema_version = 3\n[tools.Read]\nvalue_class = \"critical\"\n",
1940 ] {
1941 let mut cfg: AdaptiveConfig = toml::from_str(raw).unwrap();
1942 cfg.upgrade_in_place().unwrap();
1943 assert_eq!(cfg.schema_version, CURRENT_SCHEMA_VERSION);
1944 assert!(!cfg.enrichment.enabled);
1947 }
1948 }
1949
1950 #[test]
1951 fn enrichment_config_round_trips_with_overrides() {
1952 let raw = r#"
1953schema_version = 4
1954
1955[enrichment]
1956enabled = true
1957max_parallel_prefetches = 5
1958prefetch_budget_tokens = 12000
1959prefetch_timeout_ms = 1500
1960respect_rate_limits = false
1961"#;
1962 let cfg: AdaptiveConfig = toml::from_str(raw).unwrap();
1963 assert!(cfg.enrichment.enabled);
1964 assert_eq!(cfg.enrichment.max_parallel_prefetches, 5);
1965 assert_eq!(cfg.enrichment.prefetch_budget_tokens, 12000);
1966 assert_eq!(cfg.enrichment.prefetch_timeout_ms, 1500);
1967 assert!(!cfg.enrichment.respect_rate_limits);
1968
1969 let s = toml::to_string_pretty(&cfg).unwrap();
1970 let back: AdaptiveConfig = toml::from_str(&s).unwrap();
1971 assert!(back.enrichment.enabled);
1972 assert_eq!(back.enrichment.prefetch_timeout_ms, 1500);
1973 }
1974
1975 #[test]
1976 fn enrichment_defaults_are_safe() {
1977 let cfg = AdaptiveConfig::default();
1978 assert!(!cfg.enrichment.enabled);
1981 assert_eq!(cfg.enrichment.max_parallel_prefetches, 3);
1982 assert_eq!(cfg.enrichment.prefetch_budget_tokens, 8000);
1983 assert_eq!(cfg.enrichment.prefetch_timeout_ms, 1000);
1984 assert!(cfg.enrichment.respect_rate_limits);
1985 }
1986
1987 #[test]
1988 fn effective_tool_value_model_exact_match_wins() {
1989 let mut cfg = AdaptiveConfig::default();
1990 cfg.tools.insert(
1991 "Read".into(),
1992 devboy_core::ToolValueModel::critical_with_size(2.5),
1993 );
1994 let m = cfg.effective_tool_value_model("Read").unwrap();
1995 assert_eq!(m.cost_model.typical_kb, 2.5);
1996 assert_eq!(m.value_class, devboy_core::ValueClass::Critical);
1997 }
1998
1999 #[test]
2000 fn effective_tool_value_model_falls_back_to_wildcard() {
2001 let mut cfg = AdaptiveConfig::default();
2002 cfg.tools
2003 .insert("*".into(), devboy_core::ToolValueModel::audit_only());
2004 let m = cfg.effective_tool_value_model("UnknownTool").unwrap();
2005 assert_eq!(m.value_class, devboy_core::ValueClass::AuditOnly);
2006 }
2007
2008 #[test]
2009 fn effective_tool_value_model_none_when_unconfigured() {
2010 let cfg = AdaptiveConfig::default();
2011 assert!(cfg.effective_tool_value_model("Read").is_none());
2012 }
2013
2014 #[test]
2015 fn round_trip_via_toml_with_tools_block() {
2016 let mut cfg = AdaptiveConfig::default();
2017 cfg.tools.insert(
2018 "Read".into(),
2019 devboy_core::ToolValueModel::critical_with_size(2.5),
2020 );
2021 cfg.tools.insert(
2022 "TaskUpdate".into(),
2023 devboy_core::ToolValueModel::audit_only(),
2024 );
2025 let s = toml::to_string_pretty(&cfg).unwrap();
2026 assert!(s.contains("[tools.Read]"));
2027 assert!(s.contains("[tools.TaskUpdate]"));
2028 let back: AdaptiveConfig = toml::from_str(&s).unwrap();
2029 assert_eq!(back.tools.len(), 2);
2030 assert_eq!(
2031 back.effective_tool_value_model("Read")
2032 .unwrap()
2033 .cost_model
2034 .typical_kb,
2035 2.5
2036 );
2037 }
2038
2039 #[test]
2040 fn merge_right_wins_unions_tools_blocks() {
2041 let mut left = AdaptiveConfig::default();
2042 left.tools.insert(
2043 "Read".into(),
2044 devboy_core::ToolValueModel::critical_with_size(2.5),
2045 );
2046 left.tools
2047 .insert("Bash".into(), devboy_core::ToolValueModel::default());
2048
2049 let mut right = AdaptiveConfig::default();
2050 right.tools.insert(
2051 "Read".into(),
2052 devboy_core::ToolValueModel::critical_with_size(99.0),
2053 );
2054 right.tools.insert(
2055 "WebFetch".into(),
2056 devboy_core::ToolValueModel::critical_with_size(1.2),
2057 );
2058
2059 left.merge_right_wins(right);
2060 assert_eq!(
2062 left.effective_tool_value_model("Read")
2063 .unwrap()
2064 .cost_model
2065 .typical_kb,
2066 99.0
2067 );
2068 assert!(left.effective_tool_value_model("Bash").is_some());
2070 assert!(left.effective_tool_value_model("WebFetch").is_some());
2072 }
2073}