jsona_util/schema/
associations.rs

1use anyhow::anyhow;
2use jsona::dom::{KeyOrIndex, Node};
3use jsona_schema::{Schema, SchemaType};
4use once_cell::sync::Lazy;
5use parking_lot::{RwLock, RwLockReadGuard};
6use regex::Regex;
7use serde::{Deserialize, Serialize};
8use serde_json::{json, Value};
9use std::{fmt::Debug, sync::Arc};
10use url::Url;
11
12use crate::{
13    environment::Environment,
14    schema::Fetcher,
15    util::{url::is_url, GlobRule},
16    HashMap,
17};
18
19pub const SCHEMA_REF_KEY: &str = "@jsonaschema";
20
21pub static SCHEMA_REF_SCHEMA: Lazy<Schema> = Lazy::new(|| Schema {
22    schema_type: Some(SchemaType::String.into()),
23    description: Some("A ref to jsona schema".into()),
24    ..Default::default()
25});
26
27static DEFAULT_SCHEMASTORE_URI: Lazy<Url> = Lazy::new(|| {
28    Url::parse("https://cdn.jsdelivr.net/npm/@jsona/schemastore@latest/index.json").unwrap()
29});
30
31static RE_SCHEMA_NAME: Lazy<Regex> = Lazy::new(|| Regex::new(r"^([A-Za-z_-]+)$").unwrap());
32
33pub mod priority {
34    pub const STORE: usize = 30;
35    pub const CONFIG: usize = 50;
36    pub const LSP_CONFIG: usize = 70;
37    pub const SCHEMA_FIELD: usize = 80;
38    pub const MAX: usize = usize::MAX;
39}
40
41pub mod source {
42    pub const STORE: &str = "store";
43    pub const CONFIG: &str = "config";
44    pub const LSP_CONFIG: &str = "lsp_config";
45    pub const SCHEMA_FIELD: &str = "@jsonaschema";
46}
47
48#[derive(Clone)]
49pub struct SchemaAssociations<E: Environment> {
50    env: E,
51    fetcher: Fetcher<E>,
52    associations: Arc<RwLock<Vec<(AssociationRule, SchemaAssociation)>>>,
53    cache: Arc<RwLock<HashMap<Url, Option<Arc<SchemaAssociation>>>>>,
54    store_schema_urls: Arc<RwLock<HashMap<String, Url>>>,
55}
56
57impl<E: Environment> SchemaAssociations<E> {
58    pub(crate) fn new(env: E, fetcher: Fetcher<E>) -> Self {
59        Self {
60            env,
61            fetcher,
62            associations: Default::default(),
63            cache: Arc::new(RwLock::new(HashMap::default())),
64            store_schema_urls: Arc::new(RwLock::new(HashMap::default())),
65        }
66    }
67    pub fn add(&self, rule: AssociationRule, assoc: SchemaAssociation) {
68        tracing::debug!("add an association {:?} {:?}", rule, assoc);
69        self.associations.write().push((rule, assoc));
70        self.cache.write().clear();
71    }
72
73    pub fn read(&self) -> RwLockReadGuard<'_, Vec<(AssociationRule, SchemaAssociation)>> {
74        self.associations.read()
75    }
76
77    pub fn clear(&self) {
78        self.associations.write().clear();
79        self.cache.write().clear();
80        self.store_schema_urls.write().clear();
81    }
82
83    pub async fn add_from_schemastore(
84        &self,
85        url: &Option<Url>,
86        base: &Option<Url>,
87    ) -> Result<(), anyhow::Error> {
88        let url = url.as_ref().unwrap_or(&DEFAULT_SCHEMASTORE_URI);
89        let schemastore = self.load_schemastore(url).await?;
90        tracing::info!(%url, "use schema store");
91        for schema in &schemastore.0 {
92            if self
93                .store_schema_urls
94                .write()
95                .insert(schema.name.clone(), schema.url.clone())
96                .is_some()
97            {
98                tracing::warn!("schema name {} already exist", schema.name);
99            }
100            let include = schema
101                .file_match
102                .iter()
103                .filter_map(|v| GlobRule::preprocessing_pattern(v, base));
104            match GlobRule::new(include, [] as [&str; 0]) {
105                Ok(rule) => {
106                    self.add(
107                        rule.into(),
108                        SchemaAssociation {
109                            url: schema.url.clone(),
110                            meta: json!({
111                                "name": schema.name,
112                                "description": schema.description,
113                                "source": source::STORE,
114                            }),
115                            priority: priority::STORE,
116                        },
117                    );
118                }
119                Err(error) => {
120                    tracing::warn!(
121                        %error,
122                        schema_name = %schema.name,
123                        source = %url,
124                        "invalid glob pattern(s)"
125                    );
126                }
127            }
128        }
129        Ok(())
130    }
131
132    /// Adds the schema from a `@jsonaschema` annotation in the root.
133    pub fn add_from_document(&self, doc_url: &Url, node: &Node) {
134        let mut dirty = false;
135        self.associations
136            .write()
137            .retain(|(rule, assoc)| match rule {
138                AssociationRule::Url(u) => {
139                    if u == doc_url && assoc.meta["source"] == source::SCHEMA_FIELD {
140                        dirty = true;
141                        false
142                    } else {
143                        true
144                    }
145                }
146                _ => true,
147            });
148        if dirty {
149            self.cache.write().clear();
150        }
151        if let Some(url) = node
152            .get(&KeyOrIndex::annotation(SCHEMA_REF_KEY))
153            .and_then(|v| v.as_string().cloned())
154            .and_then(|v| self.get_schema_url(v.value()))
155        {
156            self.add(
157                AssociationRule::Url(doc_url.clone()),
158                SchemaAssociation {
159                    url,
160                    priority: priority::SCHEMA_FIELD,
161                    meta: json!({ "source": source::SCHEMA_FIELD }),
162                },
163            )
164        }
165    }
166
167    pub fn get_schema_url(&self, schema_ref: &str) -> Option<Url> {
168        if RE_SCHEMA_NAME.is_match(schema_ref) {
169            self.store_schema_urls.read().get(schema_ref).cloned()
170        } else {
171            self.env.to_url(schema_ref)
172        }
173    }
174
175    pub fn schema_key_complete_schema(&self) -> Schema {
176        let enum_value: Vec<_> = self
177            .store_schema_urls
178            .read()
179            .keys()
180            .map(|v| json!(v))
181            .collect();
182        Schema {
183            schema_type: Some(SchemaType::String.into()),
184            enum_value: Some(enum_value),
185            ..Default::default()
186        }
187    }
188
189    pub fn query_for(&self, file: &Url) -> Option<Arc<SchemaAssociation>> {
190        if let Some(assoc) = self.cache.read().get(file).cloned() {
191            return assoc;
192        }
193        let assoc = self
194            .associations
195            .read()
196            .iter()
197            .filter_map(|(rule, assoc)| {
198                if rule.is_match(file) {
199                    Some(assoc.clone())
200                } else {
201                    None
202                }
203            })
204            .max_by_key(|assoc| assoc.priority)
205            .map(Arc::new);
206        self.cache.write().insert(file.clone(), assoc.clone());
207        assoc
208    }
209
210    async fn load_schemastore(&self, index_url: &Url) -> Result<SchemaStore, anyhow::Error> {
211        self.fetcher
212            .fetch(index_url)
213            .await
214            .and_then(|v| serde_json::from_slice(&v).map_err(|e| anyhow!("{}", e)))
215    }
216}
217
218#[derive(Clone)]
219pub enum AssociationRule {
220    Glob(GlobRule),
221    Url(Url),
222}
223
224impl Debug for AssociationRule {
225    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
226        match self {
227            AssociationRule::Glob(glob) => write!(f, "Glob({:?})", glob),
228            AssociationRule::Url(url) => write!(f, "Url({})", url),
229        }
230    }
231}
232
233impl From<GlobRule> for AssociationRule {
234    fn from(v: GlobRule) -> Self {
235        Self::Glob(v)
236    }
237}
238
239impl From<Url> for AssociationRule {
240    fn from(v: Url) -> Self {
241        Self::Url(v)
242    }
243}
244
245impl AssociationRule {
246    pub fn batch(patterns: &[String], base: &Option<Url>) -> Result<Vec<Self>, anyhow::Error> {
247        let mut rules = vec![];
248        let mut glob_includes = vec![];
249        for pattern in patterns {
250            if is_url(pattern) {
251                rules.push(Self::Url(pattern.parse()?));
252            } else if let Some(p) = GlobRule::preprocessing_pattern(pattern, base) {
253                glob_includes.push(p);
254            }
255        }
256        if !glob_includes.is_empty() {
257            rules.push(Self::Glob(GlobRule::new(&glob_includes, &[] as &[&str])?));
258        }
259        Ok(rules)
260    }
261
262    pub fn glob(pattern: &str) -> Result<Self, anyhow::Error> {
263        Ok(Self::Glob(GlobRule::new(&[pattern], &[] as &[&str])?))
264    }
265
266    pub fn is_match(&self, url: &Url) -> bool {
267        match self {
268            AssociationRule::Glob(g) => g.is_match_url(url),
269            AssociationRule::Url(u) => u == url,
270        }
271    }
272}
273
274#[derive(Clone)]
275pub struct SchemaAssociation {
276    pub meta: Value,
277    pub url: Url,
278    pub priority: usize,
279}
280
281impl Debug for SchemaAssociation {
282    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
283        f.debug_struct("SchemaAssociation")
284            .field(
285                "meta",
286                &serde_json::to_string(&self.meta).unwrap_or_default(),
287            )
288            .field("url", &self.url.to_string())
289            .field("priority", &self.priority)
290            .finish()
291    }
292}
293
294#[derive(Debug, Clone, Serialize, Deserialize)]
295#[serde(transparent)]
296pub struct SchemaStore(Vec<SchemaStoreMeta>);
297
298#[derive(Debug, Clone, Serialize, Deserialize)]
299#[serde(rename_all = "camelCase")]
300pub struct SchemaStoreMeta {
301    #[serde(default)]
302    pub name: String,
303    #[serde(default)]
304    pub description: String,
305    pub url: Url,
306    #[serde(default)]
307    pub file_match: Vec<String>,
308}
309
310#[cfg(test)]
311mod tests {
312    use super::*;
313    macro_rules! assert_association_rule {
314        ($t:expr, $p:expr, $b:expr, $r:expr) => {
315            let p: Vec<String> = $p.into_iter().map(|v| v.to_string()).collect();
316            let t: Url = $t.parse().unwrap();
317            let b: Option<Url> = $b.parse().ok();
318            assert_eq!(
319                AssociationRule::batch(&p, &b)
320                    .unwrap()
321                    .iter()
322                    .any(|v| v.is_match(&t)),
323                $r
324            );
325        };
326    }
327
328    #[test]
329    fn test_association_rule() {
330        assert_association_rule!("file:///home/u1/abc", ["abc"], "file:///home/u1", true);
331        assert_association_rule!("file:///home/u1/abc", ["abc"], "file:///home/u1/", true);
332        assert_association_rule!("file:///home/u1/abc", ["ab*"], "file:///home/u1", true);
333        assert_association_rule!("file:///home/u1/abc", ["*bc"], "file:///home/u1", true);
334        assert_association_rule!("file:///home/u1/abcd", ["ab*"], "file:///home/u1", true);
335        assert_association_rule!("file:///home/u1/p1/abc", ["abc"], "file:///home/u1", true);
336        assert_association_rule!("file:///home/u1/p1/abc", ["*abc"], "file:///home/u1", true);
337        assert_association_rule!("file:///home/u1/p1/abc", ["*/abc"], "file:///home/u1", true);
338        assert_association_rule!(
339            "file:///home/u1/p1/abc",
340            ["**/abc"],
341            "file:///home/u1",
342            true
343        );
344        assert_association_rule!("file:///c%3A/abc", ["abc"], "file:///c%3A/abc", true);
345        assert_association_rule!("file:///c%3A/abc", ["abc"], "file:///c%3A", true);
346    }
347}