Skip to main content

acorn/
lib.rs

1//! # 🌱 ACORN Library
2//! > "Plant an ACORN and grow your research"
3//!
4//! `acorn-lib` is a one-stop-shop for everything related to building and maintaining research activity data (RAD)-related technology, including the Accessible Content Optimization for Research Needs (ACORN) tool.
5//! The modules, structs, enums and constants found here support the ACORN CLI, which checks, analyzes, and exports research activity data into useable formats.
6//!
7#[cfg(feature = "std")]
8use crate::analyzer::{link_check, Check};
9#[cfg(feature = "std")]
10use crate::io::network_get_request;
11#[cfg(feature = "std")]
12use crate::prelude::PathBuf;
13use crate::util::Label;
14use derive_more::Display;
15use serde::{Deserialize, Serialize};
16#[cfg(feature = "std")]
17use tracing::debug;
18use tracing::{error, trace, warn};
19use uriparse::URI;
20use urlencoding::encode;
21
22#[cfg(feature = "analyzer")]
23pub mod analyzer;
24#[cfg(feature = "doctor")]
25pub mod doctor;
26#[cfg(feature = "std")]
27pub mod io;
28#[cfg(feature = "powerpoint")]
29pub mod powerpoint;
30pub mod prelude;
31pub mod schema;
32pub mod util;
33
34/// Abstraction for file and folder locations that can be local (e.g., file:///path/to/project) or remote (e.g., <https://gitlab.com/project>)
35#[derive(Clone, Debug, Display, Serialize, Deserialize)]
36#[serde(untagged)]
37pub enum Location {
38    /// Just the URI string (assumes remote location)
39    Simple(String),
40    /// Location defined by URI and scheme - intended for use with remote or local locations
41    #[display("{uri}")]
42    Detailed {
43        /// URI Scheme
44        ///
45        /// See [RFC 8089] for more information
46        ///
47        /// [RFC 8089]: https://datatracker.ietf.org/doc/rfc8089/
48        scheme: Scheme,
49        /// Full URI value
50        uri: String,
51    },
52}
53/// Git hosting repository data
54#[derive(Clone, Debug, Display, Serialize, Deserialize)]
55#[serde(tag = "provider", rename_all = "lowercase")]
56pub enum Repository {
57    /// Generic Git repository
58    /// ### Note
59    /// > This repository type should be used for local and offline repositories. Having the associated data be version controlled by Git is recommended, but not required.
60    #[display("git")]
61    Git {
62        /// Repository location information
63        location: Location,
64    },
65    /// GitHub
66    ///
67    /// See <https://docs.github.com/en/rest/reference/repos>
68    #[display("github")]
69    GitHub {
70        /// Repository location information
71        #[serde(alias = "uri")]
72        location: Location,
73    },
74    /// GitLab
75    ///
76    /// See <https://docs.gitlab.com/api/repositories/#list-repository-tree>
77    #[display("gitlab")]
78    GitLab {
79        /// Integer ID of GitLab project
80        ///
81        /// See <https://docs.gitlab.com/api/projects/#get-a-single-project> for more information
82        id: Option<u64>,
83        /// Repository location information
84        #[serde(alias = "uri")]
85        location: Location,
86    },
87}
88/// URI Scheme
89///
90/// See [RFC 8089] for more information
91///
92/// [RFC 8089]: https://datatracker.ietf.org/doc/rfc8089/
93#[derive(Clone, Debug, Display, Serialize, Deserialize, PartialEq)]
94#[serde(rename_all = "lowercase")]
95pub enum Scheme {
96    /// Secure HTTP
97    #[display("https")]
98    HTTPS,
99    /// Insecure HTTP included primarily for contexts necessitating its use (ex., local development)
100    #[display("http")]
101    HTTP,
102    /// Local file or folder
103    #[display("file")]
104    File,
105    /// Unsupported scheme (e.g., insecure, not implemented, etc.)
106    Unsupported,
107}
108/// Struct for release data from GitLab or GitHub
109#[derive(Clone, Debug, Serialize, Deserialize)]
110pub struct Release {
111    /// Name of release
112    pub name: String,
113    /// Tag name of release
114    /// ### Example
115    /// > `v1.0.0`
116    pub tag_name: String,
117    /// Prose description of release
118    #[serde(alias = "body")]
119    pub description: String,
120    /// Date of release creation
121    pub created_at: String,
122    /// Date of release publication
123    #[serde(alias = "published_at")]
124    pub released_at: String,
125    /// Release response message
126    pub message: Option<String>,
127}
128impl Location {
129    /// Get associated location hash
130    /// > Useful for standardizing file path handling across local and remote contexts
131    /// ### Example
132    /// ```rust
133    /// use acorn::Location;
134    ///
135    /// let location = Location::Simple("https://code.ornl.gov/research-enablement/buckets/nssd".to_string());
136    /// assert_eq!(location.hash(), "code_ornl_gov_research-enablement_buckets_nssd");
137    /// ```
138    pub fn hash(&self) -> String {
139        let uri = self.uri().unwrap();
140        let host = match uri.host() {
141            | Some(value) => value.clone().to_string().replace('.', "_"),
142            | None => "".to_string(),
143        };
144        let segments = uri
145            .path()
146            .segments()
147            .iter()
148            .map(|s| s.to_string())
149            .filter(|s| !(s.is_empty() || s.eq(".")))
150            .collect::<Vec<_>>();
151        [host, segments.join("_").to_lowercase()]
152            .into_iter()
153            .filter(|x| !x.is_empty())
154            .collect::<Vec<String>>()
155            .join("_")
156    }
157    /// Get associated location value scheme (e.g., https, file, etc.)
158    /// ### Example
159    /// ```rust
160    /// use acorn::{Location, Scheme};
161    ///
162    /// let location = Location::Simple("https://code.ornl.gov/research-enablement/buckets/nssd".to_string());
163    /// assert_eq!(location.scheme(), Scheme::HTTPS);
164    /// let location = Location::Simple("file://localhost/buckets/nssd".to_string());
165    /// assert_eq!(location.scheme(), Scheme::File);
166    /// ```
167    pub fn scheme(&self) -> Scheme {
168        match self {
169            | Location::Simple(value) => match URI::try_from(value.as_str()) {
170                | Ok(uri) => match uri.scheme() {
171                    | uriparse::Scheme::HTTPS => Scheme::HTTPS,
172                    | uriparse::Scheme::HTTP => Scheme::HTTP,
173                    | uriparse::Scheme::File => Scheme::File,
174                    | _ => Scheme::Unsupported,
175                },
176                | Err(_) => Scheme::Unsupported,
177            },
178            | Location::Detailed { scheme, .. } => scheme.clone(),
179        }
180    }
181    /// Check if a location exists (i.e., is reachable and accessible)
182    #[cfg(feature = "std")]
183    pub async fn exists(self) -> bool {
184        let uri = self.uri();
185        let scheme = self.scheme();
186        if scheme == Scheme::HTTP {
187            warn!("=> {} HTTP is supported but only advised in local development scenarios", Label::skip());
188        }
189        match scheme {
190            | Scheme::HTTPS | Scheme::HTTP => match uri {
191                | Some(uri) => match link_check(Some(uri.into())).await {
192                    | Check { success, .. } if success => true,
193                    | _ => false,
194                },
195                | None => false,
196            },
197            | Scheme::File => match uri {
198                | Some(value) => PathBuf::from(value.path().to_string()).exists(),
199                | None => false,
200            },
201            | Scheme::Unsupported => false,
202        }
203    }
204    /// Extract and return URI from a location value
205    pub fn uri(&self) -> Option<URI<'static>> {
206        fn parse_uri(value: String) -> Option<URI<'static>> {
207            let leaked: &'static str = Box::leak(value.into_boxed_str());
208            match URI::try_from(leaked) {
209                | Ok(value) => Some(value),
210                | Err(why) => {
211                    warn!("=> {} Parse URI - {why}", Label::fail());
212                    None
213                }
214            }
215        }
216        match self {
217            | Location::Simple(value) => parse_uri(value.clone()),
218            | Location::Detailed { scheme, uri } => match URI::try_from(uri.as_str()) {
219                | Ok(parsed) => {
220                    let authority = parsed.authority().map(|auth| auth.to_string());
221                    let path = parsed.path().to_string();
222                    let query = parsed.query().map(|q| format!("?{q}")).unwrap_or_default();
223                    let fragment = parsed.fragment().map(|f| format!("#{f}")).unwrap_or_default();
224                    let rebuilt = match authority {
225                        | Some(auth) if !auth.is_empty() => format!("{scheme}://{auth}{path}{query}{fragment}"),
226                        | _ => format!("{scheme}:{path}{query}{fragment}"),
227                    };
228                    parse_uri(rebuilt)
229                }
230                | Err(_) => {
231                    let rebuilt = format!("{scheme}://{uri}");
232                    parse_uri(rebuilt)
233                }
234            },
235        }
236    }
237}
238impl Repository {
239    /// Return whether or not the associated URI for a repository is local (e.g., has "file" scheme)
240    pub fn is_local(self) -> bool {
241        let local_schemes = [Scheme::File];
242        local_schemes.contains(&self.location().scheme())
243    }
244    /// Get metadata for latest release of a Gitlab or GitHub repository
245    #[cfg(feature = "std")]
246    pub fn latest_release(self) -> Option<Release> {
247        match self.releases() {
248            | releases if releases.is_empty() => None,
249            | releases => {
250                let release = releases[0].clone();
251                trace!("=> {} Latest {:#?}", Label::using(), release);
252                Some(release)
253            }
254        }
255    }
256    /// Get repository location
257    pub fn location(self) -> Location {
258        match self {
259            | Repository::Git { location, .. } => location,
260            | Repository::GitHub { location, .. } => location,
261            | Repository::GitLab { location, .. } => location,
262        }
263    }
264    /// Get repository ID
265    pub fn id(&self) -> Option<String> {
266        match self {
267            | Repository::Git { .. } => None,
268            | Repository::GitHub { .. } => None,
269            | Repository::GitLab { id, location } => match location.uri() {
270                | Some(value) => {
271                    let mut path = value.path().to_string();
272                    path.remove(0);
273                    let encoded = encode(&path).to_string();
274                    trace!(encoded, "=> {} ID", Label::using());
275                    Some(encoded)
276                }
277                | None => {
278                    warn!("=> {} Parse GitLab URI", Label::fail());
279                    match id {
280                        | Some(value) => Some(value.to_string()),
281                        | None => None,
282                    }
283                }
284            },
285        }
286    }
287    #[cfg(feature = "std")]
288    fn releases(self) -> Vec<Release> {
289        let maybe_url = match &self {
290            | Repository::Git { .. } => None,
291            | Repository::GitHub { location } => match location.uri() {
292                | Some(uri) => {
293                    let host = uri.host().unwrap().to_string();
294                    let path = uri.path();
295                    let endpoint = Some(format!("https://api.{host}/repos{path}/releases"));
296                    endpoint
297                }
298                | None => {
299                    error!("=> {} Parse GitHub URI", Label::fail());
300                    None
301                }
302            },
303            | Repository::GitLab { location, .. } => match self.id() {
304                | Some(id) => match location.uri() {
305                    | Some(uri) => {
306                        let host = uri.host().unwrap().to_string();
307                        Some(format!("https://{host}/api/v4/projects/{id}/releases"))
308                    }
309                    | None => {
310                        error!("=> {} Parse GitLab URI", Label::fail());
311                        None
312                    }
313                },
314                | None => None,
315            },
316        };
317        if let Some(url) = maybe_url {
318            debug!(url, "=> {}", Label::using());
319            match network_get_request(url).send() {
320                | Ok(response) => {
321                    let text = response.text();
322                    match text {
323                        | Ok(text) => {
324                            if text.contains("API rate limit exceeded") {
325                                println!("API rate limit exceeded");
326                                error!("=> {} GitHub API rate limit exceeded", Label::fail());
327                                vec![]
328                            } else {
329                                let releases: Vec<Release> = match serde_json::from_str(&text) {
330                                    | Ok(values) => values,
331                                    | Err(why) => {
332                                        error!("=> {} Parse {} API JSON response - {why}", self, Label::fail());
333                                        vec![]
334                                    }
335                                };
336                                releases
337                            }
338                        }
339                        | Err(why) => {
340                            error!("=> {} Parse {} API text response - {why}", self, Label::fail());
341                            vec![]
342                        }
343                    }
344                }
345                | Err(why) => {
346                    error!("=> {} Download {} releases - {why}", self, Label::fail());
347                    vec![]
348                }
349            }
350        } else {
351            vec![]
352        }
353    }
354    /// Get URL for raw data of a file at a given path
355    pub fn raw_url(&self, path: String) -> Option<String> {
356        match self {
357            | Repository::GitHub { location, .. } => match location.uri() {
358                | Some(ref value) => Some(format!("https://raw.githubusercontent.com{}/refs/heads/main/{path}", value.path())),
359                | None => {
360                    error!("=> {} Parse GitHub URI", Label::fail());
361                    None
362                }
363            },
364            | Repository::GitLab { location, .. } => Some(format!("{location}/-/raw/main/{path}")),
365            | Repository::Git { .. } => None,
366        }
367    }
368}