Skip to main content

annis_web/client/
corpora.rs

1use graphannis::AnnotationGraph;
2use percent_encoding::{utf8_percent_encode, AsciiSet, CONTROLS};
3use serde::{Deserialize, Serialize};
4
5use crate::{
6    errors::AppError,
7    state::{GlobalAppState, SessionArg},
8    Result,
9};
10
11/// Get a sorted list of all corpus names
12pub async fn list(session: &SessionArg, state: &GlobalAppState) -> Result<Vec<String>> {
13    let client = state.create_client(session)?;
14    let request = client.get(state.service_url.join("corpora")?).build()?;
15    let mut corpora: Vec<String> = client.execute(request).await?.json().await?;
16    corpora.sort_by_key(|k| k.to_lowercase());
17
18    Ok(corpora)
19}
20
21#[derive(Serialize, Debug)]
22struct SubgraphRequest {
23    node_ids: Vec<String>,
24    segmentation: Option<String>,
25    left: usize,
26    right: usize,
27}
28
29const QUERY: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'#').add(b'<').add(b'>');
30
31/// Get the subgraph for a given match
32pub async fn subgraph(
33    session: &SessionArg,
34    corpus: &str,
35    node_ids: Vec<String>,
36    segmentation: Option<String>,
37    left: usize,
38    right: usize,
39    state: &GlobalAppState,
40) -> Result<AnnotationGraph> {
41    let url = state.service_url.join(&format!(
42        "corpora/{}/subgraph",
43        utf8_percent_encode(corpus, QUERY)
44    ))?;
45    let client = state.create_client(session)?;
46
47    let body = SubgraphRequest {
48        node_ids,
49        segmentation,
50        left,
51        right,
52    };
53
54    let request = client
55        .request(reqwest::Method::POST, url.clone())
56        .json(&body)
57        .build()?;
58
59    let response = client.execute(request).await?;
60    if response.status().is_success() {
61        let response_body = response.text().await?;
62
63        let (g, _config) = graphannis_core::graph::serialization::graphml::import::<
64            graphannis::model::AnnotationComponentType,
65            _,
66            _,
67        >(response_body.as_bytes(), true, |_| {})?;
68
69        Ok(g)
70    } else {
71        Err(AppError::Backend {
72            status_code: response.status(),
73            url: response.url().clone(),
74        })
75    }
76}
77
78#[derive(Serialize)]
79struct ComponentsRequest {
80    #[serde(rename = "type")]
81    ctype: Option<String>,
82    name: Option<String>,
83}
84
85#[derive(Deserialize, Debug)]
86struct ComponentResponse {
87    #[serde(rename = "type")]
88    _ctype: Option<String>,
89    name: String,
90    layer: String,
91}
92
93/// List all segmentions (in addition to the token layer) for a given corpus.
94pub async fn segmentations(
95    session: &SessionArg,
96    corpus: &str,
97    state: &GlobalAppState,
98) -> Result<Vec<String>> {
99    let url = state.service_url.join(&format!(
100        "corpora/{}/components",
101        utf8_percent_encode(corpus, QUERY)
102    ))?;
103    let client = state.create_client(session)?;
104
105    let query_params = ComponentsRequest {
106        ctype: Some("Ordering".to_string()),
107        name: None,
108    };
109
110    let request = client
111        .request(reqwest::Method::GET, url.clone())
112        .query(&query_params)
113        .build()?;
114
115    let ordering_components: Vec<ComponentResponse> = client.execute(request).await?.json().await?;
116    let result: Vec<String> = ordering_components
117        .into_iter()
118        .filter(|c| !c.name.is_empty() && c.layer != "annis")
119        .map(|c| c.name)
120        .collect();
121    Ok(result)
122}