1use std::path::PathBuf;
13
14use mdmodels::{llm::extraction::query_openai, prelude::DataModel};
15use serde_json::Value;
16use thiserror::Error;
17
18const SPECS: &str = include_str!("../specs/specifications/v2.md");
20
21const DEFAULT_SYSTEM_PROMPT: &str =
23 "You are a helpful scientific assistant that is capable to identify scientific facts and data from a given text. You are also capable of extracting information from a given text and returning it in a structured format. Please return the information in a JSON format. Think step by step and work precisely.";
24
25pub fn query_llm(
47 prompt: impl Into<PromptInput>,
48 system_prompt: Option<impl Into<PromptInput>>,
49 llm_model: Option<String>,
50 api_key: Option<String>,
51) -> Result<Value, LLMError> {
52 let llm_model = llm_model.unwrap_or_else(|| "gpt-4o".to_string());
53 let api_key = match api_key {
54 Some(key) => key,
55 None => std::env::var("OPENAI_API_KEY").map_err(LLMError::EnvError)?,
56 };
57
58 let prompt: String = prompt.into().try_into()?;
59 let system: String = if let Some(system_prompt) = system_prompt {
60 system_prompt.into().try_into()?
61 } else {
62 DEFAULT_SYSTEM_PROMPT.to_string()
63 };
64
65 let model = DataModel::from_markdown_string(SPECS)
66 .map_err(|e| LLMError::DataModelError(e.to_string()))?;
67
68 tokio::runtime::Runtime::new()
69 .unwrap()
70 .block_on(query_openai(
71 prompt.as_str(),
72 system.as_str(),
73 &model,
74 "EnzymeMLDocument",
75 &llm_model,
76 false,
77 Some(api_key),
78 ))
79 .map_err(LLMError::LLMServiceError)
80}
81
82#[derive(Debug)]
87pub enum PromptInput {
88 File(PathBuf),
90 String(String),
92}
93
94impl TryInto<String> for PromptInput {
95 type Error = LLMError;
96
97 fn try_into(self) -> Result<String, Self::Error> {
98 match self {
99 PromptInput::String(s) => Ok(s),
100 PromptInput::File(path) => {
101 Ok(std::fs::read_to_string(path).map_err(LLMError::FileError)?)
102 }
103 }
104 }
105}
106
107impl From<String> for PromptInput {
108 fn from(s: String) -> Self {
109 PromptInput::String(s)
110 }
111}
112
113impl From<&str> for PromptInput {
114 fn from(s: &str) -> Self {
115 PromptInput::String(s.to_string())
116 }
117}
118
119impl From<PathBuf> for PromptInput {
120 fn from(path: PathBuf) -> Self {
121 PromptInput::File(path)
122 }
123}
124
125#[derive(Debug, Error)]
130pub enum LLMError {
131 #[error("File not found: {0}")]
133 FileError(#[from] std::io::Error),
134 #[error("Environment variable not found: {0}")]
136 EnvError(#[from] std::env::VarError),
137 #[error("LLM service error: {0}")]
139 LLMServiceError(#[from] Box<dyn std::error::Error>),
140 #[error("LLM model error: {0}")]
142 DataModelError(String),
143 #[error("Serde error: {0}")]
145 SerdeError(#[from] serde_json::Error),
146}