Skip to main content

owl_ms_language_server/
web.rs

1use itertools::Itertools;
2use log::{debug, error};
3use std::{collections::HashMap, path::PathBuf, sync::Mutex, time::Duration};
4use thiserror::Error;
5use ureq::{http::StatusCode, Agent};
6
7/// Trait for simple http get requests. It can be mocked with the static client.
8pub trait HttpClient: Send + Sync + std::fmt::Debug {
9    /// # Errors
10    ///
11    /// This function will return an error if the get request is not successfull.
12    fn get(&self, url: &str) -> Result<String>;
13}
14
15#[derive(Error, Debug)]
16pub enum Error {
17    #[error("Ureq Error: {0}")]
18    Ureq(#[from] ureq::Error),
19    #[error("The request to {0} could not be fulfilled because: {1}")]
20    Web(String, &'static str), // Url and reason
21}
22
23pub type Result<T> = std::result::Result<T, Error>;
24
25#[derive(Debug)]
26pub struct UreqClient {
27    agent: Agent,
28    state: Mutex<ClientState>,
29}
30
31#[derive(Debug, Default)]
32struct ClientState {
33    retry_count: HashMap<String, usize>,
34    cache: HashMap<String, String>,
35}
36
37impl Default for UreqClient {
38    fn default() -> Self {
39        Self {
40            agent: Agent::config_builder()
41                .timeout_global(Some(Duration::from_secs(5)))
42                .build()
43                .into(),
44            state: Mutex::default(),
45        }
46    }
47}
48
49impl HttpClient for UreqClient {
50    fn get(&self, url: &str) -> Result<String> {
51        // Some common ontologies that are statictly included in the binary
52        match url {
53            "http://www.w3.org/2000/01/rdf-schema#" => {
54                return Ok(include_str!("../static/rdfs.owl").to_string())
55            }
56            "http://www.w3.org/2002/07/owl#" => {
57                return Ok(include_str!("../static/owl.owl").to_string())
58            }
59            "http://www.w3.org/1999/02/22-rdf-syntax-ns#" => {
60                return Ok(include_str!("../static/rdf.owl").to_string())
61            }
62            "http://purl.org/dc/elements/1.1/" => {
63                return Ok(include_str!("../static/dc.owl").to_string())
64            }
65            _ => {}
66        }
67
68        let mut state = self.state.lock().expect("Client should not panic");
69
70        if let Some(value) = state.cache.get(url) {
71            debug!("Found value in cache");
72            return Ok(value.clone());
73        }
74        if let Some(count) = state.retry_count.get_mut(url) {
75            // TODO more sensible numbers :>
76            if count >= &mut 1 {
77                return Err(Error::Web(url.into(), "too many requests"));
78            }
79            *count += 1;
80        } else {
81            state.retry_count.insert(url.into(), 1);
82        }
83
84        let mut response = self
85            .agent
86            .get(url)
87            .header("Accept", "application/rdf+xml")
88            .call()?;
89
90        if response.status() == StatusCode::NOT_ACCEPTABLE {
91            error!("not acceptable {url}");
92            return Err(Error::Web(url.to_string(), "Not acceptable"));
93        }
94
95        // "content-type" can be something like "text/html; charset=UTF-8". So lets look for substrings.
96        if response
97            .headers()
98            .get("Content-Type")
99            .and_then(|v| v.to_str().ok())
100            .is_some_and(|v| v.split(';').map(str::trim).contains("text/html"))
101        {
102            return Err(Error::Web(
103                url.to_string(),
104                "Content type not supported (got html)",
105            ));
106        }
107
108        let read_to_string = response.body_mut().read_to_string()?;
109        state.cache.insert(url.into(), read_to_string.clone());
110        Ok(read_to_string)
111    }
112}
113
114/// This client is simply offline
115#[derive(Debug)]
116pub struct OfflineClient;
117
118impl HttpClient for OfflineClient {
119    fn get(&self, url: &str) -> Result<String> {
120        Err(Error::Web(url.to_string(), "You are offline"))
121    }
122}
123
124/// This is a simple URL to Path escape. Changing this will break the web cache.
125#[must_use]
126pub fn url_to_filename(url: &str) -> PathBuf {
127    url.replace('/', "_slash_")
128        .replace(':', "_colon_")
129        .replace('#', "_hash_")
130        .chars()
131        .map(|c| if c.is_alphanumeric() { c } else { '_' })
132        .chain(".cache".chars())
133        .collect::<String>()
134        .into()
135}
136
137#[cfg(test)]
138mod tests {
139    use super::*;
140    use std::path::PathBuf;
141
142    #[test]
143    fn test_simple_url() {
144        let result = url_to_filename("https://example.com");
145        assert_eq!(
146            result,
147            PathBuf::from("https_colon__slash__slash_example_com.cache")
148        );
149    }
150
151    #[test]
152    fn test_url_with_path() {
153        let result = url_to_filename("https://example.com/path/to/page");
154        assert_eq!(
155            result,
156            PathBuf::from(
157                "https_colon__slash__slash_example_com_slash_path_slash_to_slash_page.cache"
158            )
159        );
160    }
161
162    #[test]
163    fn test_url_with_port() {
164        let result = url_to_filename("http://localhost:8080/api");
165        assert_eq!(
166            result,
167            PathBuf::from("http_colon__slash__slash_localhost_colon_8080_slash_api.cache")
168        );
169    }
170
171    #[test]
172    fn test_url_with_query_params() {
173        let result = url_to_filename("https://api.example.com?key=value&id=123");
174        assert_eq!(
175            result,
176            PathBuf::from("https_colon__slash__slash_api_example_com_key_value_id_123.cache")
177        );
178    }
179
180    #[test]
181    fn test_url_with_hash() {
182        let result = url_to_filename("https://example.com/page#section");
183        assert_eq!(
184            result,
185            PathBuf::from("https_colon__slash__slash_example_com_slash_page_hash_section.cache")
186        );
187    }
188
189    #[test]
190    fn test_url_with_special_chars() {
191        let result = url_to_filename("https://example.com/path@file!name");
192        assert_eq!(
193            result,
194            PathBuf::from("https_colon__slash__slash_example_com_slash_path_file_name.cache")
195        );
196    }
197
198    #[test]
199    fn test_url_with_multiple_special_chars() {
200        let result = url_to_filename("https://example.com/path?q=test&sort=asc#top");
201        assert_eq!(
202            result,
203            PathBuf::from(
204                "https_colon__slash__slash_example_com_slash_path_q_test_sort_asc_hash_top.cache"
205            )
206        );
207    }
208
209    #[test]
210    fn test_ftp_url() {
211        let result = url_to_filename("ftp://files.example.org:21/download");
212        assert_eq!(
213            result,
214            PathBuf::from(
215                "ftp_colon__slash__slash_files_example_org_colon_21_slash_download.cache"
216            )
217        );
218    }
219
220    #[test]
221    fn test_empty_string() {
222        let result = url_to_filename("");
223        assert_eq!(result, PathBuf::from(".cache"));
224    }
225
226    #[test]
227    fn test_url_with_dashes_and_underscores() {
228        let result = url_to_filename("https://my-site.com/my_page");
229        assert_eq!(
230            result,
231            PathBuf::from("https_colon__slash__slash_my_site_com_slash_my_page.cache")
232        );
233    }
234
235    #[test]
236    fn test_cache_extension_always_added() {
237        let result = url_to_filename("simple");
238        assert!(result.to_string_lossy().ends_with(".cache"));
239    }
240
241    #[test]
242    fn test_url_with_encoded_chars() {
243        let result = url_to_filename("https://example.com/path%20with%20spaces");
244        assert_eq!(
245            result,
246            PathBuf::from("https_colon__slash__slash_example_com_slash_path_20with_20spaces.cache")
247        );
248    }
249}