fastobo/ast/
doc.rs

1use std::fmt::Display;
2use std::fmt::Formatter;
3use std::fmt::Result as FmtResult;
4use std::fmt::Write;
5use std::iter::FromIterator;
6
7use fastobo_derive_internal::FromStr;
8
9use crate::ast::*;
10use crate::error::CardinalityError;
11use crate::error::SyntaxError;
12use crate::parser::Cache;
13use crate::parser::FromPair;
14use crate::semantics::Identified;
15use crate::semantics::Orderable;
16use crate::syntax::pest::iterators::Pair;
17use crate::syntax::Rule;
18
19/// A complete OBO document in format version 1.4.
20#[derive(Clone, Default, Debug, Hash, Eq, FromStr, PartialEq)]
21pub struct OboDoc {
22    header: HeaderFrame,
23    entities: Vec<EntityFrame>,
24}
25
26/// Constructors and builder methods.
27///
28/// # Parser
29/// Use `from_file` to parse a file on the local filesystem, or `from_stream`
30/// to parse a `BufRead` implementor (`BufRead` is needed instead of `Read` as
31/// the parser is line-based):
32/// ```rust
33/// # extern crate fastobo;
34/// # use std::io::BufReader;
35/// # use std::fs::File;
36/// # use fastobo::ast::*;
37/// let doc1 = fastobo::from_file("tests/data/ms.obo").unwrap();
38///
39/// // This is equivalent to (but with the file path set in eventual errors):
40/// let mut r = BufReader::new(File::open("tests/data/ms.obo").unwrap());
41/// let doc2 = fastobo::from_reader(&mut r).unwrap();
42///
43/// // FIXME: threaded parser may not maintain ordering YET
44/// // assert_eq!(doc1, doc2);
45/// ```
46///
47/// # Builder Pattern
48/// The builder pattern makes it easy to create an `OboDoc` from an interator
49/// of `EntityFrame`, in order to add an `HeaderFrame` after all the entities
50/// where collected:
51/// ```rust
52/// # extern crate fastobo;
53/// # use fastobo::ast::*;
54/// use std::iter::FromIterator;
55///
56/// let entities = vec![TermFrame::new(ClassIdent::from(PrefixedIdent::new("TEST", "001")))];
57/// let doc = OboDoc::from_iter(entities.into_iter())
58///     .and_header(HeaderFrame::from(HeaderClause::FormatVersion(Box::new("1.4".into()))));
59/// ```
60impl OboDoc {
61    /// Create a new empty OBO document.
62    pub fn new() -> Self {
63        Default::default()
64    }
65
66    /// Create a new OBO document with the provided frame.
67    pub fn with_header(header: HeaderFrame) -> Self {
68        Self {
69            header,
70            entities: Default::default(),
71        }
72    }
73
74    /// Use the provided frame as the header of the OBO document.
75    #[must_use]
76    pub fn and_header(mut self, header: HeaderFrame) -> Self {
77        self.header = header;
78        self
79    }
80
81    /// Create a new OBO document with the provided entity frames.
82    pub fn with_entities(entities: Vec<EntityFrame>) -> Self {
83        Self {
84            header: Default::default(),
85            entities,
86        }
87    }
88
89    /// Use the provided entity frames as the content of the OBO document.
90    #[must_use]
91    pub fn and_entities(mut self, entities: Vec<EntityFrame>) -> Self {
92        self.entities = entities;
93        self
94    }
95}
96
97/// Shared and mutable getters.
98impl OboDoc {
99    /// Get a reference to the header of the OBO document.
100    pub fn header(&self) -> &HeaderFrame {
101        &self.header
102    }
103
104    /// Get a mutable reference to the header of the OBO document.
105    pub fn header_mut(&mut self) -> &mut HeaderFrame {
106        &mut self.header
107    }
108
109    /// Get a reference to the entities of the OBO document.
110    pub fn entities(&self) -> &Vec<EntityFrame> {
111        &self.entities
112    }
113
114    /// Get a reference to the entities of the OBO document.
115    pub fn entities_mut(&mut self) -> &mut Vec<EntityFrame> {
116        &mut self.entities
117    }
118
119    /// Check whether or not the document is empty.
120    ///
121    /// An empty document has no header clauses and no entity frames.
122    #[inline]
123    pub fn is_empty(&self) -> bool {
124        self.header().is_empty() && self.entities().is_empty()
125    }
126}
127
128/// Additional methods for `OboDoc` that can be used to edit the syntax tree.
129///
130/// The OBO 1.4 semantics are used to process header macros or to add the
131/// default OBO namespace to all the frames of the document.
132impl OboDoc {
133    /// Assign the ontology default namespace to all frames without one.
134    ///
135    /// This function will not check the cardinality of `namespace` clauses in
136    /// entity frames: it will only add a single `namespace` clause to all
137    /// frames that have none.
138    ///
139    /// # Errors
140    ///
141    /// If all frames already have a `namespace` clause, this function will
142    /// not check the contents of the header, return `Ok(())`. However, if
143    /// a frame requires the assignment of the default namespace, then a
144    /// [`CardinalityError`](../error/enum.CardinalityError.html) may be raised depending on the header contents.
145    ///
146    /// # Example
147    /// ```rust
148    /// # extern crate fastobo;
149    /// # use pretty_assertions::assert_eq;
150    /// # use std::str::FromStr;
151    /// # use std::string::ToString;
152    /// # use fastobo::ast::*;
153    /// let mut doc = OboDoc::from_str(
154    /// "default-namespace: test
155    ///
156    /// [Term]
157    /// id: TST:01
158    ///
159    /// [Term]
160    /// id: PATO:0000001
161    /// namespace: quality
162    /// ").unwrap();
163    ///
164    /// doc.assign_namespaces().unwrap();
165    /// assert_eq!(doc.to_string(),
166    /// "default-namespace: test
167    ///
168    /// [Term]
169    /// id: TST:01
170    /// namespace: test
171    ///
172    /// [Term]
173    /// id: PATO:0000001
174    /// namespace: quality
175    /// ");
176    ///
177    pub fn assign_namespaces(&mut self) -> Result<(), CardinalityError> {
178        macro_rules! expand {
179            ($frame:ident, $clause:ident, $ns:ident, $outer:lifetime) => {{
180                if !$frame
181                    .iter()
182                    .any(|clause| matches!(clause.as_ref(), $clause::Namespace(_)))
183                {
184                    match $ns {
185                        Err(e) => return Err(e.clone()),
186                        Ok(&ns) => {
187                            $frame.push(Line::from($clause::Namespace(Box::new(ns.clone()))))
188                        }
189                    }
190                }
191            }};
192        }
193
194        use self::EntityFrame::*;
195
196        // Force borrowck to split borrows: we shoudl be able to borrow
197        // the header AND the entities at the same time.
198        let ns = self.header.default_namespace();
199        let ns_ref = ns.as_ref();
200        for entity in &mut self.entities {
201            match entity {
202                Term(x) => expand!(x, TermClause, ns_ref, 'outer),
203                Typedef(x) => expand!(x, TypedefClause, ns_ref, 'outer),
204                Instance(x) => expand!(x, InstanceClause, ns_ref, 'outer),
205            }
206        }
207
208        Ok(())
209    }
210
211    /// Process macros in the header frame, adding clauses to relevant entities.
212    ///
213    /// Header macros are used to expand an ontology by overloading the
214    /// actual semantics of  `xref` clauses contained in several entity frames.
215    /// In case the translated clauses are already present in the document,
216    /// they *won't* be added a second time.
217    ///
218    /// The following implicit macros will be processed even if they are not
219    /// part of the document:
220    /// - `treat-xrefs-as-equivalent: RO`
221    /// - `treat-xrefs-as-equivalent: BFO`
222    ///
223    /// # Note
224    /// After processing the document, neither the original frame `xrefs`
225    /// nor the `treat-xrefs` header clauses will be removed from the AST.
226    ///
227    /// # See also
228    /// - [Header Macro Translation](http://owlcollab.github.io/oboformat/doc/obo-syntax.html#4.4.2)
229    ///   section of the syntax and semantics guide.
230    pub fn treat_xrefs(&mut self) {
231        use self::HeaderClause::*;
232
233        // Force borrowck to split borrows: we should be able to mutably
234        // borrow the header AND the entities at the same time.
235        let entities = &mut self.entities;
236
237        // Apply implicit macros for `BFO` and `RO`
238        crate::semantics::as_equivalent(entities, &IdentPrefix::new("BFO"));
239        crate::semantics::as_equivalent(entities, &IdentPrefix::new("RO"));
240
241        // Apply all `treat-xrefs` macros to the document.
242        for clause in &self.header {
243            match clause {
244                TreatXrefsAsEquivalent(prefix) => crate::semantics::as_equivalent(entities, prefix),
245                TreatXrefsAsIsA(prefix) => crate::semantics::as_is_a(entities, prefix),
246                TreatXrefsAsHasSubclass(prefix) => {
247                    crate::semantics::as_has_subclass(entities, prefix)
248                }
249                TreatXrefsAsGenusDifferentia(prefix, rel, cls) => {
250                    crate::semantics::as_genus_differentia(entities, prefix, rel, cls)
251                }
252                TreatXrefsAsReverseGenusDifferentia(prefix, rel, cls) => {
253                    crate::semantics::as_reverse_genus_differentia(entities, prefix, rel, cls)
254                }
255                TreatXrefsAsRelationship(prefix, rel) => {
256                    crate::semantics::as_relationship(entities, prefix, rel)
257                }
258                _ => (),
259            }
260        }
261    }
262
263    /// Check if the OBO document is fully labeled.
264    ///
265    /// An OBO ontology is fully labeled if every frame has exactly one `name`
266    /// clause. This is equivalent to the definition in the [OBO specification]
267    /// if we suppose an invalid OBO document is never *fully labeled*.
268    ///
269    /// [OBO specification]: http://owlcollab.github.io/oboformat/doc/obo-syntax.html#6.1.5
270    pub fn is_fully_labeled(&self) -> bool {
271        self.entities.iter().all(|frame| match frame {
272            EntityFrame::Term(f) => f.name().is_ok(),
273            EntityFrame::Typedef(f) => f.name().is_ok(),
274            EntityFrame::Instance(f) => f.name().is_ok(),
275        })
276    }
277}
278
279impl AsRef<[EntityFrame]> for OboDoc {
280    fn as_ref(&self) -> &[EntityFrame] {
281        self.entities.as_slice()
282    }
283}
284
285impl AsRef<Vec<EntityFrame>> for OboDoc {
286    fn as_ref(&self) -> &Vec<EntityFrame> {
287        &self.entities
288    }
289}
290
291impl AsMut<Vec<EntityFrame>> for OboDoc {
292    fn as_mut(&mut self) -> &mut Vec<EntityFrame> {
293        &mut self.entities
294    }
295}
296
297impl Display for OboDoc {
298    fn fmt(&self, f: &mut Formatter) -> FmtResult {
299        self.header.fmt(f)?;
300        if !self.header.is_empty() && !self.entities.is_empty() {
301            f.write_char('\n')?;
302        }
303
304        let mut entities = self.entities.iter().peekable();
305        while let Some(entity) = entities.next() {
306            entity.fmt(f)?;
307            if entities.peek().is_some() {
308                f.write_char('\n')?;
309            }
310        }
311        Ok(())
312    }
313}
314
315impl<E> FromIterator<E> for OboDoc
316where
317    E: Into<EntityFrame>,
318{
319    fn from_iter<T>(iter: T) -> Self
320    where
321        T: IntoIterator<Item = E>,
322    {
323        Self::with_entities(iter.into_iter().map(Into::into).collect())
324    }
325}
326
327impl Orderable for OboDoc {
328    /// Sort the document in the right serialization order.
329    fn sort(&mut self) {
330        self.header.sort_unstable();
331        // FIXME(@althonos): should probably not require cloning here.
332        self.entities
333            .sort_unstable_by(|e1, e2| e1.as_id().cmp(e2.as_id()));
334        for entity in &mut self.entities {
335            entity.sort()
336        }
337    }
338
339    /// Check if the document is sorted in the right serialization order.
340    fn is_sorted(&self) -> bool {
341        // Check entities are sorted on their identifier.
342        for i in 1..self.entities.len() {
343            if self.entities[i - 1].as_id() > self.entities[i].as_id() {
344                return false;
345            }
346        }
347
348        // Check every entity is sorted.
349        for entity in &self.entities {
350            if !entity.is_sorted() {
351                return false;
352            }
353        }
354
355        // Check the header is sorted.
356        self.header.is_sorted()
357    }
358}
359
360impl<'i> FromPair<'i> for OboDoc {
361    const RULE: Rule = Rule::OboDoc;
362    unsafe fn from_pair_unchecked(
363        pair: Pair<'i, Rule>,
364        cache: &Cache,
365    ) -> Result<Self, SyntaxError> {
366        let mut inner = pair.into_inner();
367
368        let mut entities = Vec::new();
369        let header = HeaderFrame::from_pair_unchecked(inner.next().unwrap(), cache)?;
370
371        let mut pair = inner.next().unwrap();
372        while pair.as_rule() != Rule::EOI {
373            entities.push(EntityFrame::from_pair_unchecked(pair, cache)?);
374            pair = inner.next().unwrap();
375        }
376        Ok(OboDoc { header, entities })
377    }
378}
379
380#[cfg(test)]
381mod tests {
382
383    use super::*;
384
385    use std::iter::FromIterator;
386    use std::str::FromStr;
387
388    use pretty_assertions::assert_eq;
389    use textwrap_macros::dedent;
390
391    #[test]
392    fn from_str() {
393        // Empty file should give empty `OboDoc`.
394        let doc = OboDoc::from_str("").unwrap();
395        self::assert_eq!(doc, Default::default());
396
397        // Empty lines should be ignored.
398        let doc = OboDoc::from_str("\n\n").unwrap();
399        self::assert_eq!(doc, Default::default());
400
401        // A simple file should parse.
402        let doc = OboDoc::from_str(dedent!(
403            r#"
404            format-version: 1.2
405
406            [Term]
407            id: TEST:001
408            "#
409        ))
410        .unwrap();
411
412        let header = HeaderFrame::from_iter(vec![HeaderClause::FormatVersion(Box::new(
413            UnquotedString::new("1.2"),
414        ))]);
415        let term = TermFrame::new(ClassIdent::from(PrefixedIdent::new("TEST", "001")));
416        self::assert_eq!(doc, OboDoc::from_iter(Some(term)).and_header(header));
417    }
418
419    #[test]
420    fn to_string() {
421        // Empty `OboDoc` should give empty string.
422        let doc = OboDoc::default();
423        self::assert_eq!(doc.to_string(), "");
424
425        // `OboDoc` with only header frame should not add newline separator.
426        let doc = OboDoc::with_header(HeaderFrame::from(vec![
427            HeaderClause::FormatVersion(Box::new(UnquotedString::new("1.2"))),
428            HeaderClause::Remark(Box::new(UnquotedString::new("this is a test"))),
429        ]));
430        self::assert_eq!(
431            doc.to_string(),
432            dedent!(
433                r#"
434                format-version: 1.2
435                remark: this is a test
436                "#
437            )
438            .trim_start_matches('\n')
439        );
440    }
441
442    #[test]
443    fn is_fully_labeled() {
444        let doc = OboDoc::from_str("[Term]\nid: TEST:001\n").unwrap();
445        assert!(!doc.is_fully_labeled());
446
447        let doc = OboDoc::from_str("[Term]\nid: TEST:001\nname: test item\n").unwrap();
448        assert!(doc.is_fully_labeled());
449
450        let doc = OboDoc::from_str(dedent!(
451            r#"
452            [Term]
453            id: TEST:001
454            name: test item
455
456            [Term]
457            id: TEST:002
458            name: test item two
459            "#
460        ))
461        .unwrap();
462        assert!(doc.is_fully_labeled());
463
464        let doc = OboDoc::from_str(dedent!(
465            r#"
466            [Term]
467            id: TEST:001
468            name: test item
469
470            [Term]
471            id: TEST:002
472            "#
473        ))
474        .unwrap();
475        assert!(!doc.is_fully_labeled());
476
477        let doc = OboDoc::from_str(dedent!(
478            r#"
479            [Term]
480            id: TEST:001
481
482            [Term]
483            id: TEST:002
484            name: test item two
485            "#
486        ))
487        .unwrap();
488        assert!(!doc.is_fully_labeled());
489    }
490}