acorn/
lib.rs

1//! # 🌱 ACORN Library
2//! > "Plant an ACORN and grow your research"
3//!
4//! `acorn-lib` is a one-stop-shop for everything related to building and maintaining research activity data (RAD)-related technology, including the Accessible Content Optimization for Research Needs (ACORN) tool.
5//! The modules, structs, enums and constants found here support the ACORN CLI, which checks, analyzes, and exports research activity data into useable formats.
6//!
7#[cfg(feature = "std")]
8use crate::analyzer::{link_check, Check};
9#[cfg(feature = "std")]
10use crate::io::network_get_request;
11#[cfg(feature = "std")]
12use crate::prelude::PathBuf;
13use crate::util::Label;
14use derive_more::Display;
15use serde::{Deserialize, Serialize};
16#[cfg(feature = "std")]
17use tracing::debug;
18use tracing::{error, trace, warn};
19use uriparse::URI;
20use urlencoding::encode;
21
22#[cfg(feature = "analyzer")]
23pub mod analyzer;
24#[cfg(feature = "doctor")]
25pub mod doctor;
26#[cfg(feature = "std")]
27pub mod io;
28#[cfg(feature = "powerpoint")]
29pub mod powerpoint;
30pub mod prelude;
31pub mod schema;
32pub mod util;
33
34/// Abstraction for file and folder locations that can be local (e.g., file:///path/to/project) or remote (e.g., <https://gitlab.com/project>)
35#[derive(Clone, Debug, Display, Serialize, Deserialize)]
36#[serde(untagged)]
37pub enum Location {
38    /// Just the URI string (assumes remote location)
39    Simple(String),
40    /// Local file path
41    #[display("{uri}")]
42    Detailed {
43        /// URI Scheme
44        ///
45        /// See [RFC 8089] for more information
46        ///
47        /// [RFC 8089]: https://datatracker.ietf.org/doc/rfc8089/
48        scheme: Scheme,
49        /// Full URI value
50        uri: String,
51    },
52}
53/// Struct for release data from GitLab or GitHub
54#[derive(Clone, Debug, Serialize, Deserialize)]
55pub struct Release {
56    /// Name of release
57    pub name: String,
58    /// Tag name of release
59    /// ### Example
60    /// > `v1.0.0`
61    pub tag_name: String,
62    /// Prose description of release
63    #[serde(alias = "body")]
64    pub description: String,
65    /// Date of release creation
66    pub created_at: String,
67    /// Date of release publication
68    #[serde(alias = "published_at")]
69    pub released_at: String,
70    /// Release response message
71    pub message: Option<String>,
72}
73/// Git hosting repository data
74#[derive(Clone, Debug, Display, Serialize, Deserialize)]
75#[serde(tag = "provider", rename_all = "lowercase")]
76pub enum Repository {
77    /// Generic Git repository
78    /// ### Note
79    /// > This repository type should be used for local and offline repositories. Having the associated data be version controlled by Git is recommended, but not required.
80    #[display("git")]
81    Git {
82        /// Repository location information
83        location: Location,
84    },
85    /// GitHub
86    ///
87    /// See <https://docs.github.com/en/rest/reference/repos>
88    #[display("github")]
89    GitHub {
90        /// Repository location information
91        #[serde(alias = "uri")]
92        location: Location,
93    },
94    /// GitLab
95    ///
96    /// See <https://docs.gitlab.com/api/repositories/#list-repository-tree>
97    #[display("gitlab")]
98    GitLab {
99        /// Integer ID of GitLab project
100        ///
101        /// See <https://docs.gitlab.com/api/projects/#get-a-single-project> for more information
102        id: Option<u64>,
103        /// Repository location information
104        #[serde(alias = "uri")]
105        location: Location,
106    },
107}
108/// URI Scheme
109///
110/// See [RFC 8089] for more information
111///
112/// [RFC 8089]: https://datatracker.ietf.org/doc/rfc8089/
113#[derive(Clone, Debug, Display, Serialize, Deserialize, PartialEq)]
114#[serde(rename_all = "lowercase")]
115pub enum Scheme {
116    /// Secure HTTP
117    #[display("https")]
118    HTTPS,
119    /// Local file or folder
120    #[display("file")]
121    File,
122    /// Unsupported scheme (e.g., insecure, not implemented, etc.)
123    Unsupported,
124}
125impl Location {
126    /// Get associated location hash
127    /// > Useful for standardizing file path handling across local and remote contexts
128    /// ### Example
129    /// ```rust
130    /// use acorn::Location;
131    ///
132    /// let location = Location::Simple("https://code.ornl.gov/research-enablement/buckets/nssd".to_string());
133    /// assert_eq!(location.hash(), "code_ornl_gov_research-enablement_buckets_nssd");
134    /// ```
135    pub fn hash(&self) -> String {
136        let uri = self.uri().unwrap();
137        let host = match uri.host() {
138            | Some(value) => value.clone().to_string().replace('.', "_"),
139            | None => "".to_string(),
140        };
141        let segments = uri
142            .path()
143            .segments()
144            .iter()
145            .map(|s| s.to_string())
146            .filter(|s| !(s.is_empty() || s.eq(".")))
147            .collect::<Vec<_>>();
148        [host, segments.join("_").to_lowercase()]
149            .into_iter()
150            .filter(|x| !x.is_empty())
151            .collect::<Vec<String>>()
152            .join("_")
153    }
154    /// Get associated location value scheme (e.g., https, file, etc.)
155    /// ### Example
156    /// ```rust
157    /// use acorn::{Location, Scheme};
158    ///
159    /// let location = Location::Simple("https://code.ornl.gov/research-enablement/buckets/nssd".to_string());
160    /// assert_eq!(location.scheme(), Scheme::HTTPS);
161    /// let location = Location::Simple("file://localhost/buckets/nssd".to_string());
162    /// assert_eq!(location.scheme(), Scheme::File);
163    /// ```
164    pub fn scheme(&self) -> Scheme {
165        match self {
166            | Location::Simple(value) => match URI::try_from(value.as_str()) {
167                | Ok(uri) => match uri.scheme() {
168                    | uriparse::Scheme::HTTPS => Scheme::HTTPS,
169                    | uriparse::Scheme::File => Scheme::File,
170                    | _ => Scheme::Unsupported,
171                },
172                | Err(_) => Scheme::Unsupported,
173            },
174            | Location::Detailed { scheme, .. } => scheme.clone(),
175        }
176    }
177    /// Check if a location exists (i.e., is reachable and accessible)
178    #[cfg(feature = "std")]
179    pub async fn exists(self) -> bool {
180        let uri = self.uri();
181        match self.scheme() {
182            | Scheme::HTTPS => match uri {
183                | Some(uri) => match link_check(Some(uri.into())).await {
184                    | Check { success, .. } if success => true,
185                    | _ => false,
186                },
187                | None => false,
188            },
189            | Scheme::File => match uri {
190                | Some(value) => PathBuf::from(value.path().to_string()).exists(),
191                | None => false,
192            },
193            | Scheme::Unsupported => false,
194        }
195    }
196    /// Extract and return URI from a location value
197    pub fn uri(&self) -> Option<URI<'_>> {
198        fn parse_uri(value: &str) -> Option<URI<'_>> {
199            match URI::try_from(value) {
200                | Ok(value) => Some(value),
201                | Err(why) => {
202                    warn!("=> {} Parse URI - {why}", Label::fail());
203                    None
204                }
205            }
206        }
207        match self {
208            | Location::Simple(value) => parse_uri(value),
209            | Location::Detailed { uri, .. } => parse_uri(uri),
210        }
211    }
212}
213impl Repository {
214    /// Return whether or not the associated URI for a repository is local (e.g., has "file" scheme)
215    pub fn is_local(self) -> bool {
216        let local_schemes = [Scheme::File];
217        local_schemes.contains(&self.location().scheme())
218    }
219    /// Get metadata for latest release of a Gitlab or GitHub repository
220    #[cfg(feature = "std")]
221    pub fn latest_release(self) -> Option<Release> {
222        match self.releases() {
223            | releases if releases.is_empty() => None,
224            | releases => {
225                let release = releases[0].clone();
226                trace!("=> {} Latest {:#?}", Label::using(), release);
227                Some(release)
228            }
229        }
230    }
231    /// Get repository location
232    pub fn location(self) -> Location {
233        match self {
234            | Repository::Git { location, .. } => location,
235            | Repository::GitHub { location, .. } => location,
236            | Repository::GitLab { location, .. } => location,
237        }
238    }
239    /// Get repository ID
240    pub fn id(&self) -> Option<String> {
241        match self {
242            | Repository::Git { .. } => None,
243            | Repository::GitHub { .. } => None,
244            | Repository::GitLab { id, location } => match location.uri() {
245                | Some(value) => {
246                    let mut path = value.path().to_string();
247                    path.remove(0);
248                    let encoded = encode(&path).to_string();
249                    trace!(encoded, "=> {} ID", Label::using());
250                    Some(encoded)
251                }
252                | None => {
253                    warn!("=> {} Parse GitLab URI", Label::fail());
254                    match id {
255                        | Some(value) => Some(value.to_string()),
256                        | None => None,
257                    }
258                }
259            },
260        }
261    }
262    #[cfg(feature = "std")]
263    fn releases(self) -> Vec<Release> {
264        let maybe_url = match &self {
265            | Repository::Git { .. } => None,
266            | Repository::GitHub { location } => match location.uri() {
267                | Some(uri) => {
268                    let host = uri.host().unwrap().to_string();
269                    let path = uri.path();
270                    let endpoint = Some(format!("https://api.{host}/repos{path}/releases"));
271                    endpoint
272                }
273                | None => {
274                    error!("=> {} Parse GitHub URI", Label::fail());
275                    None
276                }
277            },
278            | Repository::GitLab { location, .. } => match self.id() {
279                | Some(id) => match location.uri() {
280                    | Some(uri) => {
281                        let host = uri.host().unwrap().to_string();
282                        Some(format!("https://{host}/api/v4/projects/{id}/releases"))
283                    }
284                    | None => {
285                        error!("=> {} Parse GitLab URI", Label::fail());
286                        None
287                    }
288                },
289                | None => None,
290            },
291        };
292        if let Some(url) = maybe_url {
293            debug!(url, "=> {}", Label::using());
294            match network_get_request(url).send() {
295                | Ok(response) => {
296                    let text = response.text();
297                    match text {
298                        | Ok(text) => {
299                            if text.contains("API rate limit exceeded") {
300                                println!("API rate limit exceeded");
301                                error!("=> {} GitHub API rate limit exceeded", Label::fail());
302                                vec![]
303                            } else {
304                                let releases: Vec<Release> = match serde_json::from_str(&text) {
305                                    | Ok(values) => values,
306                                    | Err(why) => {
307                                        error!("=> {} Parse {} API JSON response - {why}", self, Label::fail());
308                                        vec![]
309                                    }
310                                };
311                                releases
312                            }
313                        }
314                        | Err(why) => {
315                            error!("=> {} Parse {} API text response - {why}", self, Label::fail());
316                            vec![]
317                        }
318                    }
319                }
320                | Err(why) => {
321                    error!("=> {} Download {} releases - {why}", self, Label::fail());
322                    vec![]
323                }
324            }
325        } else {
326            vec![]
327        }
328    }
329    /// Get URL for raw data of a file at a given path
330    pub fn raw_url(&self, path: String) -> Option<String> {
331        match self {
332            | Repository::GitHub { location, .. } => match location.uri() {
333                | Some(ref value) => Some(format!("https://raw.githubusercontent.com{}/refs/heads/main/{path}", value.path())),
334                | None => {
335                    error!("=> {} Parse GitHub URI", Label::fail());
336                    None
337                }
338            },
339            | Repository::GitLab { location, .. } => Some(format!("{location}/-/raw/main/{path}")),
340            | Repository::Git { .. } => None,
341        }
342    }
343}