1use serde::{Deserialize, Serialize};
79use std::fs::File;
80use std::io::{self, BufReader};
81use std::path::{Path, PathBuf};
82
83#[derive(Debug, Clone, Serialize, Deserialize)]
96pub struct ConfigFile {
97 #[serde(default = "default_auto_pull")]
103 pub auto_pull: bool,
104
105 #[serde(default)]
109 pub models_home: Option<PathBuf>,
110
111 #[serde(default)]
115 pub model: Option<ModelConfig>,
116
117 #[serde(default = "default_n_ctx")]
121 pub n_ctx: u32,
122
123 #[serde(default)]
127 pub n_gpu_layers: i32,
128
129 #[serde(default)]
132 pub admin_addr: Option<String>,
133
134 #[serde(default)]
138 pub backends: Option<Vec<BackendEntry>>,
139
140 #[serde(default)]
147 pub listen: Option<ListenConfig>,
148}
149
150#[derive(Debug, Clone, Serialize, Deserialize, Default)]
158pub struct ListenConfig {
159 #[serde(default)]
166 pub tcp: Option<String>,
167
168 #[serde(default)]
171 pub tcp_v2: Option<String>,
172
173 #[serde(default)]
177 pub tcp_embed: Option<String>,
178
179 #[serde(default)]
187 pub api_key_env: Option<String>,
188}
189
190#[derive(Debug, Clone, Serialize, Deserialize)]
195#[serde(tag = "kind", rename_all = "kebab-case")]
196pub enum BackendEntry {
197 Llamacpp(LlamacppEntry),
199 OpenaiCompat(OpenaiCompatEntry),
204 BedrockInvoke(BedrockInvokeEntry),
210}
211
212impl BackendEntry {
213 pub fn name(&self) -> &str {
216 match self {
217 BackendEntry::Llamacpp(e) => &e.name,
218 BackendEntry::OpenaiCompat(e) => &e.name,
219 BackendEntry::BedrockInvoke(e) => &e.name,
220 }
221 }
222}
223
224#[derive(Debug, Clone, Serialize, Deserialize)]
226pub struct LlamacppEntry {
227 pub name: String,
231
232 pub model: ModelConfig,
234
235 #[serde(default = "default_n_ctx")]
237 pub n_ctx: u32,
238
239 #[serde(default)]
241 pub n_gpu_layers: i32,
242
243 #[serde(default)]
249 pub embed: bool,
250
251 #[serde(default, skip_serializing_if = "Option::is_none")]
258 pub embed_pooling: Option<i32>,
259
260 #[serde(default = "default_embed_n_ctx")]
266 pub embed_n_ctx: u32,
267}
268
269#[derive(Debug, Clone, Serialize, Deserialize)]
271pub struct OpenaiCompatEntry {
272 pub name: String,
274
275 pub base_url: String,
280
281 pub model: String,
285
286 #[serde(default)]
293 pub api_key_env: Option<String>,
294
295 #[serde(default = "default_openai_timeout_secs")]
297 pub timeout_secs: u64,
298}
299
300#[derive(Debug, Clone, Serialize, Deserialize)]
315pub struct BedrockInvokeEntry {
316 pub name: String,
318
319 pub region: String,
322
323 pub model_id: String,
327
328 #[serde(default)]
335 pub bearer_token_env: Option<String>,
336
337 #[serde(default)]
341 pub endpoint: Option<String>,
342
343 #[serde(default = "default_bedrock_timeout_secs")]
345 pub timeout_secs: u64,
346}
347
348#[derive(Debug, Clone, Serialize, Deserialize)]
358pub struct ModelConfig {
359 pub name: String,
362 pub sha256: String,
364 #[serde(default)]
366 pub size_bytes: Option<u64>,
367 pub source_url: String,
369 #[serde(default)]
371 pub license: Option<String>,
372}
373
374fn default_auto_pull() -> bool {
375 true
376}
377
378fn default_n_ctx() -> u32 {
379 8192
380}
381
382fn default_embed_n_ctx() -> u32 {
383 2048
384}
385
386fn default_openai_timeout_secs() -> u64 {
387 300
388}
389
390fn default_bedrock_timeout_secs() -> u64 {
391 300
392}
393
394fn home_dir() -> Option<PathBuf> {
395 #[cfg(unix)]
396 {
397 std::env::var_os("HOME").map(PathBuf::from)
398 }
399 #[cfg(not(unix))]
400 {
401 std::env::var_os("USERPROFILE").map(PathBuf::from)
402 }
403}
404
405pub fn default_config_path() -> PathBuf {
409 if let Ok(p) = std::env::var("INFERD_CONFIG") {
410 return PathBuf::from(p);
411 }
412 let home = home_dir().unwrap_or_else(|| PathBuf::from("."));
413 home.join(".inferd").join("config.json")
414}
415
416#[derive(Debug, thiserror::Error)]
418pub enum ConfigError {
419 #[error("config file not found: {0}")]
421 NotFound(PathBuf),
422 #[error("io reading {path}: {source}")]
424 Io {
425 path: PathBuf,
427 #[source]
429 source: io::Error,
430 },
431 #[error("parse {path}: {source}")]
433 Parse {
434 path: PathBuf,
436 #[source]
438 source: serde_json::Error,
439 },
440 #[error("invalid config: {0}")]
442 Invalid(String),
443}
444
445impl ConfigFile {
446 pub fn load(path: &Path) -> Result<Self, ConfigError> {
448 let file = File::open(path).map_err(|e| {
449 if e.kind() == io::ErrorKind::NotFound {
450 ConfigError::NotFound(path.to_path_buf())
451 } else {
452 ConfigError::Io {
453 path: path.to_path_buf(),
454 source: e,
455 }
456 }
457 })?;
458 let reader = BufReader::new(file);
459 let mut cfg: ConfigFile =
460 serde_json::from_reader(reader).map_err(|e| ConfigError::Parse {
461 path: path.to_path_buf(),
462 source: e,
463 })?;
464 cfg.expand_paths();
465 cfg.validate()?;
466 Ok(cfg)
467 }
468
469 fn expand_paths(&mut self) {
470 if let Some(p) = self.models_home.as_ref()
471 && let Some(stripped) = p
472 .to_str()
473 .and_then(|s| s.strip_prefix("~/").or_else(|| s.strip_prefix("~\\")))
474 && let Some(home) = home_dir()
475 {
476 self.models_home = Some(home.join(stripped));
477 }
478 }
479
480 fn validate(&self) -> Result<(), ConfigError> {
481 match (&self.model, &self.backends) {
482 (Some(_), Some(_)) => {
483 return Err(ConfigError::Invalid(
484 "config: `model` and `backends` are mutually exclusive — \
485 pick one shape, not both"
486 .into(),
487 ));
488 }
489 (None, None) => {
490 return Err(ConfigError::Invalid(
491 "config: must specify either `model` (legacy single-backend) \
492 or `backends` (multi-backend list)"
493 .into(),
494 ));
495 }
496 _ => {}
497 }
498 if self.n_ctx == 0 {
499 return Err(ConfigError::Invalid("n_ctx must be > 0".into()));
500 }
501 if let Some(m) = &self.model {
502 validate_model_config(m)?;
503 }
504 if let Some(listen) = &self.listen {
505 if let Some(addr) = &listen.tcp
506 && addr.trim().is_empty()
507 {
508 return Err(ConfigError::Invalid(
509 "listen.tcp must not be empty when set".into(),
510 ));
511 }
512 if let Some(addr) = &listen.tcp_v2
513 && addr.trim().is_empty()
514 {
515 return Err(ConfigError::Invalid(
516 "listen.tcp_v2 must not be empty when set".into(),
517 ));
518 }
519 if let Some(addr) = &listen.tcp_embed
520 && addr.trim().is_empty()
521 {
522 return Err(ConfigError::Invalid(
523 "listen.tcp_embed must not be empty when set".into(),
524 ));
525 }
526 }
527 if let Some(list) = &self.backends {
528 if list.is_empty() {
529 return Err(ConfigError::Invalid(
530 "backends list must not be empty".into(),
531 ));
532 }
533 let mut seen = std::collections::HashSet::with_capacity(list.len());
534 for entry in list {
535 let name = entry.name();
536 if name.is_empty() {
537 return Err(ConfigError::Invalid(
538 "backends[].name must not be empty".into(),
539 ));
540 }
541 if !seen.insert(name.to_string()) {
542 return Err(ConfigError::Invalid(format!(
543 "duplicate backends[].name {name:?} — names must be unique"
544 )));
545 }
546 match entry {
547 BackendEntry::Llamacpp(e) => {
548 validate_model_config(&e.model)?;
549 if e.n_ctx == 0 {
550 return Err(ConfigError::Invalid(format!(
551 "backends[{name:?}].n_ctx must be > 0"
552 )));
553 }
554 }
555 BackendEntry::OpenaiCompat(e) => {
556 if e.base_url.trim().is_empty() {
557 return Err(ConfigError::Invalid(format!(
558 "backends[{name:?}].base_url must not be empty"
559 )));
560 }
561 if !(e.base_url.starts_with("https://")
562 || e.base_url.starts_with("http://"))
563 {
564 return Err(ConfigError::Invalid(format!(
565 "backends[{name:?}].base_url must be http:// or https:// \
566 (got {:?})",
567 e.base_url
568 )));
569 }
570 if e.model.trim().is_empty() {
571 return Err(ConfigError::Invalid(format!(
572 "backends[{name:?}].model must not be empty"
573 )));
574 }
575 if e.timeout_secs == 0 {
576 return Err(ConfigError::Invalid(format!(
577 "backends[{name:?}].timeout_secs must be > 0"
578 )));
579 }
580 }
581 BackendEntry::BedrockInvoke(e) => {
582 if e.region.trim().is_empty() {
583 return Err(ConfigError::Invalid(format!(
584 "backends[{name:?}].region must not be empty"
585 )));
586 }
587 if e.model_id.trim().is_empty() {
588 return Err(ConfigError::Invalid(format!(
589 "backends[{name:?}].model_id must not be empty"
590 )));
591 }
592 if e.timeout_secs == 0 {
593 return Err(ConfigError::Invalid(format!(
594 "backends[{name:?}].timeout_secs must be > 0"
595 )));
596 }
597 }
598 }
599 }
600 }
601 Ok(())
602 }
603
604 pub fn resolved_backends(&self) -> Vec<BackendEntry> {
609 if let Some(list) = &self.backends {
610 return list.clone();
611 }
612 let m = self
616 .model
617 .as_ref()
618 .expect("validate() guarantees one of model|backends is set")
619 .clone();
620 vec![BackendEntry::Llamacpp(LlamacppEntry {
621 name: m.name.clone(),
622 model: m,
623 n_ctx: self.n_ctx,
624 n_gpu_layers: self.n_gpu_layers,
625 embed: false,
630 embed_pooling: None,
631 embed_n_ctx: default_embed_n_ctx(),
632 })]
633 }
634}
635
636fn validate_model_config(m: &ModelConfig) -> Result<(), ConfigError> {
637 if m.name.is_empty() {
638 return Err(ConfigError::Invalid("model.name must not be empty".into()));
639 }
640 if !m.source_url.starts_with("https://") {
641 return Err(ConfigError::Invalid(format!(
642 "model.source_url must be https:// (got {:?})",
643 m.source_url
644 )));
645 }
646 if m.sha256.len() != 64
647 || !m
648 .sha256
649 .bytes()
650 .all(|b| b.is_ascii_hexdigit() && !b.is_ascii_uppercase())
651 {
652 return Err(ConfigError::Invalid(
653 "model.sha256 must be 64 lowercase hex chars".into(),
654 ));
655 }
656 Ok(())
657}
658
659impl From<&ModelConfig> for crate::fetch::ModelSpec {
660 fn from(m: &ModelConfig) -> Self {
661 crate::fetch::ModelSpec {
662 name: m.name.clone(),
663 source_url: m.source_url.clone(),
664 sha256_hex: m.sha256.clone(),
665 size_bytes: m.size_bytes,
666 license: m.license.clone(),
667 source: None,
668 }
669 }
670}
671
672#[cfg(test)]
673mod tests {
674 use super::*;
675 use std::io::Write;
676
677 fn write_config(s: &str) -> tempfile::NamedTempFile {
678 let mut f = tempfile::NamedTempFile::new().unwrap();
679 f.write_all(s.as_bytes()).unwrap();
680 f.flush().unwrap();
681 f
682 }
683
684 fn good_json() -> String {
685 r#"{
686 "auto_pull": true,
687 "models_home": "/tmp/inferd-models-home",
688 "model": {
689 "name": "gemma-4-e4b",
690 "sha256": "30d1e7949597a3446726064e80b876fd1b5cba4aa6eec53d27afa420e731fb36",
691 "size_bytes": 5126304928,
692 "source_url": "https://huggingface.co/unsloth/gemma-4-E4B-it-GGUF/resolve/main/gemma-4-E4B-it-UD-Q4_K_XL.gguf",
693 "license": "apache-2.0"
694 },
695 "n_ctx": 8192,
696 "n_gpu_layers": 0
697 }"#
698 .to_string()
699 }
700
701 #[test]
702 fn load_well_formed_config() {
703 let f = write_config(&good_json());
704 let cfg = ConfigFile::load(f.path()).unwrap();
705 let m = cfg.model.as_ref().expect("legacy model present");
706 assert_eq!(m.name, "gemma-4-e4b");
707 assert_eq!(m.size_bytes, Some(5_126_304_928));
708 assert_eq!(m.license.as_deref(), Some("apache-2.0"));
709 assert!(cfg.auto_pull);
710 assert_eq!(cfg.n_ctx, 8192);
711 assert_eq!(
712 cfg.models_home,
713 Some(PathBuf::from("/tmp/inferd-models-home"))
714 );
715 }
716
717 #[test]
718 fn missing_file_returns_not_found() {
719 let path = std::env::temp_dir().join("inferd-config-does-not-exist.json");
720 let _ = std::fs::remove_file(&path);
721 let err = ConfigFile::load(&path).unwrap_err();
722 assert!(matches!(err, ConfigError::NotFound(_)));
723 }
724
725 #[test]
726 fn invalid_json_returns_parse_error() {
727 let f = write_config("{ not valid json");
728 let err = ConfigFile::load(f.path()).unwrap_err();
729 assert!(matches!(err, ConfigError::Parse { .. }));
730 }
731
732 #[test]
733 fn http_url_rejected() {
734 let bad = good_json().replace("https://", "http://");
735 let f = write_config(&bad);
736 let err = ConfigFile::load(f.path()).unwrap_err();
737 match err {
738 ConfigError::Invalid(msg) => assert!(msg.contains("https://")),
739 other => panic!("expected Invalid, got {other:?}"),
740 }
741 }
742
743 #[test]
744 fn uppercase_sha_rejected() {
745 let bad = good_json().replace(
746 "30d1e7949597a3446726064e80b876fd1b5cba4aa6eec53d27afa420e731fb36",
747 "30D1E7949597A3446726064E80B876FD1B5CBA4AA6EEC53D27AFA420E731FB36",
748 );
749 let f = write_config(&bad);
750 let err = ConfigFile::load(f.path()).unwrap_err();
751 match err {
752 ConfigError::Invalid(msg) => assert!(msg.contains("lowercase hex")),
753 other => panic!("expected Invalid, got {other:?}"),
754 }
755 }
756
757 #[test]
758 fn short_sha_rejected() {
759 let bad = good_json().replace(
760 "30d1e7949597a3446726064e80b876fd1b5cba4aa6eec53d27afa420e731fb36",
761 "30d1e7",
762 );
763 let f = write_config(&bad);
764 let err = ConfigFile::load(f.path()).unwrap_err();
765 assert!(matches!(err, ConfigError::Invalid(_)));
766 }
767
768 #[test]
769 fn defaults_when_optional_fields_missing() {
770 let json = r#"{
771 "model": {
772 "name": "gemma-4-e4b",
773 "sha256": "30d1e7949597a3446726064e80b876fd1b5cba4aa6eec53d27afa420e731fb36",
774 "source_url": "https://example.com/x.gguf"
775 }
776 }"#;
777 let f = write_config(json);
778 let cfg = ConfigFile::load(f.path()).unwrap();
779 let m = cfg.model.as_ref().expect("legacy model present");
780 assert!(cfg.auto_pull);
781 assert_eq!(cfg.n_ctx, 8192);
782 assert_eq!(cfg.n_gpu_layers, 0);
783 assert!(m.size_bytes.is_none());
784 assert!(cfg.models_home.is_none());
785 assert!(m.license.is_none());
786 }
787
788 #[test]
789 fn modelconfig_converts_to_fetch_modelspec() {
790 let cfg = ModelConfig {
791 name: "x".into(),
792 sha256: "abc".into(),
793 size_bytes: Some(42),
794 source_url: "https://e/x.gguf".into(),
795 license: Some("mit".into()),
796 };
797 let spec: crate::fetch::ModelSpec = (&cfg).into();
798 assert_eq!(spec.name, "x");
799 assert_eq!(spec.size_bytes, Some(42));
800 assert_eq!(spec.sha256_hex, "abc");
801 assert_eq!(spec.license.as_deref(), Some("mit"));
802 }
803
804 fn good_multi_backend_json() -> String {
805 r#"{
806 "models_home": "/tmp/inferd-models-home",
807 "backends": [
808 {
809 "kind": "llamacpp",
810 "name": "local-gemma",
811 "model": {
812 "name": "gemma-4-e4b",
813 "sha256": "30d1e7949597a3446726064e80b876fd1b5cba4aa6eec53d27afa420e731fb36",
814 "source_url": "https://example.com/gemma.gguf"
815 },
816 "n_ctx": 8192,
817 "n_gpu_layers": 35
818 },
819 {
820 "kind": "openai-compat",
821 "name": "anthropic-fallback",
822 "base_url": "https://api.anthropic.com",
823 "model": "claude-opus-4-7",
824 "api_key_env": "ANTHROPIC_API_KEY"
825 }
826 ]
827 }"#
828 .to_string()
829 }
830
831 #[test]
832 fn load_multi_backend_config() {
833 let f = write_config(&good_multi_backend_json());
834 let cfg = ConfigFile::load(f.path()).unwrap();
835 assert!(cfg.model.is_none());
836 let list = cfg.backends.as_ref().expect("backends present");
837 assert_eq!(list.len(), 2);
838 match &list[0] {
839 BackendEntry::Llamacpp(e) => {
840 assert_eq!(e.name, "local-gemma");
841 assert_eq!(e.model.name, "gemma-4-e4b");
842 assert_eq!(e.n_ctx, 8192);
843 assert_eq!(e.n_gpu_layers, 35);
844 }
845 other => panic!("expected llamacpp, got {other:?}"),
846 }
847 match &list[1] {
848 BackendEntry::OpenaiCompat(e) => {
849 assert_eq!(e.name, "anthropic-fallback");
850 assert_eq!(e.base_url, "https://api.anthropic.com");
851 assert_eq!(e.model, "claude-opus-4-7");
852 assert_eq!(e.api_key_env.as_deref(), Some("ANTHROPIC_API_KEY"));
853 assert_eq!(e.timeout_secs, 300);
854 }
855 other => panic!("expected openai-compat, got {other:?}"),
856 }
857 }
858
859 #[test]
860 fn rejects_both_model_and_backends() {
861 let json = r#"{
862 "model": {
863 "name": "gemma-4-e4b",
864 "sha256": "30d1e7949597a3446726064e80b876fd1b5cba4aa6eec53d27afa420e731fb36",
865 "source_url": "https://example.com/x.gguf"
866 },
867 "backends": [
868 {
869 "kind": "openai-compat",
870 "name": "x",
871 "base_url": "https://api.openai.com",
872 "model": "gpt-4o-mini"
873 }
874 ]
875 }"#;
876 let f = write_config(json);
877 let err = ConfigFile::load(f.path()).unwrap_err();
878 match err {
879 ConfigError::Invalid(msg) => assert!(msg.contains("mutually exclusive")),
880 other => panic!("expected Invalid, got {other:?}"),
881 }
882 }
883
884 #[test]
885 fn rejects_neither_model_nor_backends() {
886 let json = r#"{ "auto_pull": true }"#;
887 let f = write_config(json);
888 let err = ConfigFile::load(f.path()).unwrap_err();
889 match err {
890 ConfigError::Invalid(msg) => assert!(msg.contains("must specify either")),
891 other => panic!("expected Invalid, got {other:?}"),
892 }
893 }
894
895 #[test]
896 fn rejects_empty_backends_list() {
897 let json = r#"{ "backends": [] }"#;
898 let f = write_config(json);
899 let err = ConfigFile::load(f.path()).unwrap_err();
900 match err {
901 ConfigError::Invalid(msg) => assert!(msg.contains("must not be empty")),
902 other => panic!("expected Invalid, got {other:?}"),
903 }
904 }
905
906 #[test]
907 fn rejects_duplicate_backend_names() {
908 let json = r#"{
909 "backends": [
910 {
911 "kind": "openai-compat",
912 "name": "dup",
913 "base_url": "https://api.openai.com",
914 "model": "gpt-4o-mini"
915 },
916 {
917 "kind": "openai-compat",
918 "name": "dup",
919 "base_url": "https://api.anthropic.com",
920 "model": "claude-opus-4-7"
921 }
922 ]
923 }"#;
924 let f = write_config(json);
925 let err = ConfigFile::load(f.path()).unwrap_err();
926 match err {
927 ConfigError::Invalid(msg) => assert!(msg.contains("duplicate")),
928 other => panic!("expected Invalid, got {other:?}"),
929 }
930 }
931
932 #[test]
933 fn rejects_openai_compat_without_base_url() {
934 let json = r#"{
935 "backends": [
936 {
937 "kind": "openai-compat",
938 "name": "x",
939 "base_url": "",
940 "model": "gpt-4o-mini"
941 }
942 ]
943 }"#;
944 let f = write_config(json);
945 let err = ConfigFile::load(f.path()).unwrap_err();
946 assert!(matches!(err, ConfigError::Invalid(_)));
947 }
948
949 #[test]
950 fn rejects_openai_compat_with_bad_scheme() {
951 let json = r#"{
952 "backends": [
953 {
954 "kind": "openai-compat",
955 "name": "x",
956 "base_url": "ftp://api.openai.com",
957 "model": "gpt-4o-mini"
958 }
959 ]
960 }"#;
961 let f = write_config(json);
962 let err = ConfigFile::load(f.path()).unwrap_err();
963 match err {
964 ConfigError::Invalid(msg) => assert!(msg.contains("http")),
965 other => panic!("expected Invalid, got {other:?}"),
966 }
967 }
968
969 #[test]
970 fn accepts_openai_compat_with_localhost_http() {
971 let json = r#"{
972 "backends": [
973 {
974 "kind": "openai-compat",
975 "name": "ollama",
976 "base_url": "http://localhost:11434",
977 "model": "llama3.1:8b"
978 }
979 ]
980 }"#;
981 let f = write_config(json);
982 let cfg = ConfigFile::load(f.path()).unwrap();
983 assert_eq!(cfg.resolved_backends().len(), 1);
984 }
985
986 #[test]
987 fn rejects_unknown_kind() {
988 let json = r#"{
989 "backends": [
990 {
991 "kind": "future-thing-not-supported",
992 "name": "x"
993 }
994 ]
995 }"#;
996 let f = write_config(json);
997 let err = ConfigFile::load(f.path()).unwrap_err();
998 assert!(matches!(err, ConfigError::Parse { .. }));
999 }
1000
1001 #[test]
1002 fn loads_bedrock_invoke_entry() {
1003 let json = r#"{
1004 "backends": [
1005 {
1006 "kind": "bedrock-invoke",
1007 "name": "bedrock-claude",
1008 "region": "us-east-1",
1009 "model_id": "anthropic.claude-3-5-sonnet-20241022-v2:0",
1010 "bearer_token_env": "AWS_BEARER_TOKEN_BEDROCK"
1011 }
1012 ]
1013 }"#;
1014 let f = write_config(json);
1015 let cfg = ConfigFile::load(f.path()).unwrap();
1016 let list = cfg.backends.as_ref().unwrap();
1017 assert_eq!(list.len(), 1);
1018 match &list[0] {
1019 BackendEntry::BedrockInvoke(e) => {
1020 assert_eq!(e.name, "bedrock-claude");
1021 assert_eq!(e.region, "us-east-1");
1022 assert_eq!(e.model_id, "anthropic.claude-3-5-sonnet-20241022-v2:0");
1023 assert_eq!(
1024 e.bearer_token_env.as_deref(),
1025 Some("AWS_BEARER_TOKEN_BEDROCK")
1026 );
1027 assert!(e.endpoint.is_none());
1028 assert_eq!(e.timeout_secs, 300);
1029 }
1030 other => panic!("expected bedrock-invoke, got {other:?}"),
1031 }
1032 }
1033
1034 #[test]
1035 fn rejects_bedrock_invoke_without_region() {
1036 let json = r#"{
1037 "backends": [
1038 {
1039 "kind": "bedrock-invoke",
1040 "name": "x",
1041 "region": "",
1042 "model_id": "anthropic.claude-3-5-sonnet-20241022-v2:0"
1043 }
1044 ]
1045 }"#;
1046 let f = write_config(json);
1047 let err = ConfigFile::load(f.path()).unwrap_err();
1048 match err {
1049 ConfigError::Invalid(msg) => assert!(msg.contains("region")),
1050 other => panic!("expected Invalid, got {other:?}"),
1051 }
1052 }
1053
1054 #[test]
1055 fn rejects_bedrock_invoke_without_model_id() {
1056 let json = r#"{
1057 "backends": [
1058 {
1059 "kind": "bedrock-invoke",
1060 "name": "x",
1061 "region": "us-east-1",
1062 "model_id": ""
1063 }
1064 ]
1065 }"#;
1066 let f = write_config(json);
1067 let err = ConfigFile::load(f.path()).unwrap_err();
1068 match err {
1069 ConfigError::Invalid(msg) => assert!(msg.contains("model_id")),
1070 other => panic!("expected Invalid, got {other:?}"),
1071 }
1072 }
1073
1074 #[test]
1075 fn legacy_model_promotes_to_one_backend() {
1076 let f = write_config(&good_json());
1077 let cfg = ConfigFile::load(f.path()).unwrap();
1078 let resolved = cfg.resolved_backends();
1079 assert_eq!(resolved.len(), 1);
1080 match &resolved[0] {
1081 BackendEntry::Llamacpp(e) => {
1082 assert_eq!(e.name, "gemma-4-e4b");
1083 assert_eq!(e.n_ctx, 8192);
1084 assert_eq!(e.n_gpu_layers, 0);
1085 }
1086 other => panic!("expected llamacpp, got {other:?}"),
1087 }
1088 }
1089
1090 #[test]
1091 fn multi_backend_resolved_passes_through() {
1092 let f = write_config(&good_multi_backend_json());
1093 let cfg = ConfigFile::load(f.path()).unwrap();
1094 let resolved = cfg.resolved_backends();
1095 assert_eq!(resolved.len(), 2);
1096 assert_eq!(resolved[0].name(), "local-gemma");
1097 assert_eq!(resolved[1].name(), "anthropic-fallback");
1098 }
1099
1100 #[test]
1101 fn listen_block_absent_by_default() {
1102 let f = write_config(&good_json());
1103 let cfg = ConfigFile::load(f.path()).unwrap();
1104 assert!(cfg.listen.is_none());
1105 }
1106
1107 #[test]
1108 fn listen_block_carries_tcp_and_api_key_env() {
1109 let json = r#"{
1110 "model": {
1111 "name": "gemma-4-e4b",
1112 "sha256": "30d1e7949597a3446726064e80b876fd1b5cba4aa6eec53d27afa420e731fb36",
1113 "source_url": "https://example.com/x.gguf"
1114 },
1115 "listen": {
1116 "tcp": "127.0.0.1:9090",
1117 "tcp_v2": "127.0.0.1:9091",
1118 "api_key_env": "INFERD_TCP_API_KEY"
1119 }
1120 }"#;
1121 let f = write_config(json);
1122 let cfg = ConfigFile::load(f.path()).unwrap();
1123 let listen = cfg.listen.as_ref().expect("listen present");
1124 assert_eq!(listen.tcp.as_deref(), Some("127.0.0.1:9090"));
1125 assert_eq!(listen.tcp_v2.as_deref(), Some("127.0.0.1:9091"));
1126 assert_eq!(listen.api_key_env.as_deref(), Some("INFERD_TCP_API_KEY"));
1127 }
1128
1129 #[test]
1130 fn llamacpp_entry_embed_defaults_off() {
1131 let f = write_config(&good_multi_backend_json());
1132 let cfg = ConfigFile::load(f.path()).unwrap();
1133 let list = cfg.backends.as_ref().unwrap();
1134 match &list[0] {
1135 BackendEntry::Llamacpp(e) => {
1136 assert!(!e.embed);
1137 assert!(e.embed_pooling.is_none());
1138 assert_eq!(e.embed_n_ctx, 2048);
1139 }
1140 other => panic!("expected llamacpp, got {other:?}"),
1141 }
1142 }
1143
1144 #[test]
1145 fn llamacpp_entry_carries_embed_fields() {
1146 let json = r#"{
1147 "backends": [
1148 {
1149 "kind": "llamacpp",
1150 "name": "embeddings",
1151 "embed": true,
1152 "embed_pooling": 1,
1153 "embed_n_ctx": 1024,
1154 "model": {
1155 "name": "embeddinggemma-300m",
1156 "sha256": "30d1e7949597a3446726064e80b876fd1b5cba4aa6eec53d27afa420e731fb36",
1157 "source_url": "https://example.com/embed.gguf"
1158 }
1159 }
1160 ]
1161 }"#;
1162 let f = write_config(json);
1163 let cfg = ConfigFile::load(f.path()).unwrap();
1164 let list = cfg.backends.as_ref().unwrap();
1165 match &list[0] {
1166 BackendEntry::Llamacpp(e) => {
1167 assert!(e.embed);
1168 assert_eq!(e.embed_pooling, Some(1));
1169 assert_eq!(e.embed_n_ctx, 1024);
1170 }
1171 other => panic!("expected llamacpp, got {other:?}"),
1172 }
1173 }
1174
1175 #[test]
1176 fn legacy_promotion_keeps_embed_off() {
1177 let f = write_config(&good_json());
1178 let cfg = ConfigFile::load(f.path()).unwrap();
1179 let list = cfg.resolved_backends();
1180 match &list[0] {
1181 BackendEntry::Llamacpp(e) => {
1182 assert!(!e.embed);
1183 assert!(e.embed_pooling.is_none());
1184 assert_eq!(e.embed_n_ctx, 2048);
1185 }
1186 other => panic!("expected llamacpp, got {other:?}"),
1187 }
1188 }
1189
1190 #[test]
1191 fn listen_rejects_empty_tcp() {
1192 let json = r#"{
1193 "model": {
1194 "name": "gemma-4-e4b",
1195 "sha256": "30d1e7949597a3446726064e80b876fd1b5cba4aa6eec53d27afa420e731fb36",
1196 "source_url": "https://example.com/x.gguf"
1197 },
1198 "listen": { "tcp": " " }
1199 }"#;
1200 let f = write_config(json);
1201 let err = ConfigFile::load(f.path()).unwrap_err();
1202 match err {
1203 ConfigError::Invalid(msg) => assert!(msg.contains("listen.tcp")),
1204 other => panic!("expected Invalid, got {other:?}"),
1205 }
1206 }
1207}