fastobo/ast/doc.rs
1use std::fmt::Display;
2use std::fmt::Formatter;
3use std::fmt::Result as FmtResult;
4use std::fmt::Write;
5use std::iter::FromIterator;
6
7use fastobo_derive_internal::FromStr;
8
9use crate::ast::*;
10use crate::error::CardinalityError;
11use crate::error::SyntaxError;
12use crate::parser::Cache;
13use crate::parser::FromPair;
14use crate::semantics::Identified;
15use crate::semantics::Orderable;
16use crate::syntax::pest::iterators::Pair;
17use crate::syntax::Rule;
18
19/// A complete OBO document in format version 1.4.
20#[derive(Clone, Default, Debug, Hash, Eq, FromStr, PartialEq)]
21pub struct OboDoc {
22 header: HeaderFrame,
23 entities: Vec<EntityFrame>,
24}
25
26/// Constructors and builder methods.
27///
28/// # Parser
29/// Use `from_file` to parse a file on the local filesystem, or `from_stream`
30/// to parse a `BufRead` implementor (`BufRead` is needed instead of `Read` as
31/// the parser is line-based):
32/// ```rust
33/// # extern crate fastobo;
34/// # use std::io::BufReader;
35/// # use std::fs::File;
36/// # use fastobo::ast::*;
37/// let doc1 = fastobo::from_file("tests/data/ms.obo").unwrap();
38///
39/// // This is equivalent to (but with the file path set in eventual errors):
40/// let mut r = BufReader::new(File::open("tests/data/ms.obo").unwrap());
41/// let doc2 = fastobo::from_reader(&mut r).unwrap();
42///
43/// // FIXME: threaded parser may not maintain ordering YET
44/// // assert_eq!(doc1, doc2);
45/// ```
46///
47/// # Builder Pattern
48/// The builder pattern makes it easy to create an `OboDoc` from an interator
49/// of `EntityFrame`, in order to add an `HeaderFrame` after all the entities
50/// where collected:
51/// ```rust
52/// # extern crate fastobo;
53/// # use fastobo::ast::*;
54/// use std::iter::FromIterator;
55///
56/// let entities = vec![TermFrame::new(ClassIdent::from(PrefixedIdent::new("TEST", "001")))];
57/// let doc = OboDoc::from_iter(entities.into_iter())
58/// .and_header(HeaderFrame::from(HeaderClause::FormatVersion(Box::new("1.4".into()))));
59/// ```
60impl OboDoc {
61 /// Create a new empty OBO document.
62 pub fn new() -> Self {
63 Default::default()
64 }
65
66 /// Create a new OBO document with the provided frame.
67 pub fn with_header(header: HeaderFrame) -> Self {
68 Self {
69 header,
70 entities: Default::default(),
71 }
72 }
73
74 /// Use the provided frame as the header of the OBO document.
75 #[must_use]
76 pub fn and_header(mut self, header: HeaderFrame) -> Self {
77 self.header = header;
78 self
79 }
80
81 /// Create a new OBO document with the provided entity frames.
82 pub fn with_entities(entities: Vec<EntityFrame>) -> Self {
83 Self {
84 header: Default::default(),
85 entities,
86 }
87 }
88
89 /// Use the provided entity frames as the content of the OBO document.
90 #[must_use]
91 pub fn and_entities(mut self, entities: Vec<EntityFrame>) -> Self {
92 self.entities = entities;
93 self
94 }
95}
96
97/// Shared and mutable getters.
98impl OboDoc {
99 /// Get a reference to the header of the OBO document.
100 pub fn header(&self) -> &HeaderFrame {
101 &self.header
102 }
103
104 /// Get a mutable reference to the header of the OBO document.
105 pub fn header_mut(&mut self) -> &mut HeaderFrame {
106 &mut self.header
107 }
108
109 /// Get a reference to the entities of the OBO document.
110 pub fn entities(&self) -> &Vec<EntityFrame> {
111 &self.entities
112 }
113
114 /// Get a reference to the entities of the OBO document.
115 pub fn entities_mut(&mut self) -> &mut Vec<EntityFrame> {
116 &mut self.entities
117 }
118
119 /// Check whether or not the document is empty.
120 ///
121 /// An empty document has no header clauses and no entity frames.
122 #[inline]
123 pub fn is_empty(&self) -> bool {
124 self.header().is_empty() && self.entities().is_empty()
125 }
126}
127
128/// Additional methods for `OboDoc` that can be used to edit the syntax tree.
129///
130/// The OBO 1.4 semantics are used to process header macros or to add the
131/// default OBO namespace to all the frames of the document.
132impl OboDoc {
133 /// Assign the ontology default namespace to all frames without one.
134 ///
135 /// This function will not check the cardinality of `namespace` clauses in
136 /// entity frames: it will only add a single `namespace` clause to all
137 /// frames that have none.
138 ///
139 /// # Errors
140 ///
141 /// If all frames already have a `namespace` clause, this function will
142 /// not check the contents of the header, return `Ok(())`. However, if
143 /// a frame requires the assignment of the default namespace, then a
144 /// [`CardinalityError`](../error/enum.CardinalityError.html) may be raised depending on the header contents.
145 ///
146 /// # Example
147 /// ```rust
148 /// # extern crate fastobo;
149 /// # use pretty_assertions::assert_eq;
150 /// # use std::str::FromStr;
151 /// # use std::string::ToString;
152 /// # use fastobo::ast::*;
153 /// let mut doc = OboDoc::from_str(
154 /// "default-namespace: test
155 ///
156 /// [Term]
157 /// id: TST:01
158 ///
159 /// [Term]
160 /// id: PATO:0000001
161 /// namespace: quality
162 /// ").unwrap();
163 ///
164 /// doc.assign_namespaces().unwrap();
165 /// assert_eq!(doc.to_string(),
166 /// "default-namespace: test
167 ///
168 /// [Term]
169 /// id: TST:01
170 /// namespace: test
171 ///
172 /// [Term]
173 /// id: PATO:0000001
174 /// namespace: quality
175 /// ");
176 ///
177 pub fn assign_namespaces(&mut self) -> Result<(), CardinalityError> {
178 macro_rules! expand {
179 ($frame:ident, $clause:ident, $ns:ident, $outer:lifetime) => {{
180 if !$frame
181 .iter()
182 .any(|clause| matches!(clause.as_ref(), $clause::Namespace(_)))
183 {
184 match $ns {
185 Err(e) => return Err(e.clone()),
186 Ok(&ns) => {
187 $frame.push(Line::from($clause::Namespace(Box::new(ns.clone()))))
188 }
189 }
190 }
191 }};
192 }
193
194 use self::EntityFrame::*;
195
196 // Force borrowck to split borrows: we shoudl be able to borrow
197 // the header AND the entities at the same time.
198 let ns = self.header.default_namespace();
199 let ns_ref = ns.as_ref();
200 for entity in &mut self.entities {
201 match entity {
202 Term(x) => expand!(x, TermClause, ns_ref, 'outer),
203 Typedef(x) => expand!(x, TypedefClause, ns_ref, 'outer),
204 Instance(x) => expand!(x, InstanceClause, ns_ref, 'outer),
205 }
206 }
207
208 Ok(())
209 }
210
211 /// Process macros in the header frame, adding clauses to relevant entities.
212 ///
213 /// Header macros are used to expand an ontology by overloading the
214 /// actual semantics of `xref` clauses contained in several entity frames.
215 /// In case the translated clauses are already present in the document,
216 /// they *won't* be added a second time.
217 ///
218 /// The following implicit macros will be processed even if they are not
219 /// part of the document:
220 /// - `treat-xrefs-as-equivalent: RO`
221 /// - `treat-xrefs-as-equivalent: BFO`
222 ///
223 /// # Note
224 /// After processing the document, neither the original frame `xrefs`
225 /// nor the `treat-xrefs` header clauses will be removed from the AST.
226 ///
227 /// # See also
228 /// - [Header Macro Translation](http://owlcollab.github.io/oboformat/doc/obo-syntax.html#4.4.2)
229 /// section of the syntax and semantics guide.
230 pub fn treat_xrefs(&mut self) {
231 use self::HeaderClause::*;
232
233 // Force borrowck to split borrows: we should be able to mutably
234 // borrow the header AND the entities at the same time.
235 let entities = &mut self.entities;
236
237 // Apply implicit macros for `BFO` and `RO`
238 crate::semantics::as_equivalent(entities, &IdentPrefix::new("BFO"));
239 crate::semantics::as_equivalent(entities, &IdentPrefix::new("RO"));
240
241 // Apply all `treat-xrefs` macros to the document.
242 for clause in &self.header {
243 match clause {
244 TreatXrefsAsEquivalent(prefix) => crate::semantics::as_equivalent(entities, prefix),
245 TreatXrefsAsIsA(prefix) => crate::semantics::as_is_a(entities, prefix),
246 TreatXrefsAsHasSubclass(prefix) => {
247 crate::semantics::as_has_subclass(entities, prefix)
248 }
249 TreatXrefsAsGenusDifferentia(prefix, rel, cls) => {
250 crate::semantics::as_genus_differentia(entities, prefix, rel, cls)
251 }
252 TreatXrefsAsReverseGenusDifferentia(prefix, rel, cls) => {
253 crate::semantics::as_reverse_genus_differentia(entities, prefix, rel, cls)
254 }
255 TreatXrefsAsRelationship(prefix, rel) => {
256 crate::semantics::as_relationship(entities, prefix, rel)
257 }
258 _ => (),
259 }
260 }
261 }
262
263 /// Check if the OBO document is fully labeled.
264 ///
265 /// An OBO ontology is fully labeled if every frame has exactly one `name`
266 /// clause. This is equivalent to the definition in the [OBO specification]
267 /// if we suppose an invalid OBO document is never *fully labeled*.
268 ///
269 /// [OBO specification]: http://owlcollab.github.io/oboformat/doc/obo-syntax.html#6.1.5
270 pub fn is_fully_labeled(&self) -> bool {
271 self.entities.iter().all(|frame| match frame {
272 EntityFrame::Term(f) => f.name().is_ok(),
273 EntityFrame::Typedef(f) => f.name().is_ok(),
274 EntityFrame::Instance(f) => f.name().is_ok(),
275 })
276 }
277}
278
279impl AsRef<[EntityFrame]> for OboDoc {
280 fn as_ref(&self) -> &[EntityFrame] {
281 self.entities.as_slice()
282 }
283}
284
285impl AsRef<Vec<EntityFrame>> for OboDoc {
286 fn as_ref(&self) -> &Vec<EntityFrame> {
287 &self.entities
288 }
289}
290
291impl AsMut<Vec<EntityFrame>> for OboDoc {
292 fn as_mut(&mut self) -> &mut Vec<EntityFrame> {
293 &mut self.entities
294 }
295}
296
297impl Display for OboDoc {
298 fn fmt(&self, f: &mut Formatter) -> FmtResult {
299 self.header.fmt(f)?;
300 if !self.header.is_empty() && !self.entities.is_empty() {
301 f.write_char('\n')?;
302 }
303
304 let mut entities = self.entities.iter().peekable();
305 while let Some(entity) = entities.next() {
306 entity.fmt(f)?;
307 if entities.peek().is_some() {
308 f.write_char('\n')?;
309 }
310 }
311 Ok(())
312 }
313}
314
315impl<E> FromIterator<E> for OboDoc
316where
317 E: Into<EntityFrame>,
318{
319 fn from_iter<T>(iter: T) -> Self
320 where
321 T: IntoIterator<Item = E>,
322 {
323 Self::with_entities(iter.into_iter().map(Into::into).collect())
324 }
325}
326
327impl Orderable for OboDoc {
328 /// Sort the document in the right serialization order.
329 fn sort(&mut self) {
330 self.header.sort_unstable();
331 // FIXME(@althonos): should probably not require cloning here.
332 self.entities
333 .sort_unstable_by(|e1, e2| e1.as_id().cmp(e2.as_id()));
334 for entity in &mut self.entities {
335 entity.sort()
336 }
337 }
338
339 /// Check if the document is sorted in the right serialization order.
340 fn is_sorted(&self) -> bool {
341 // Check entities are sorted on their identifier.
342 for i in 1..self.entities.len() {
343 if self.entities[i - 1].as_id() > self.entities[i].as_id() {
344 return false;
345 }
346 }
347
348 // Check every entity is sorted.
349 for entity in &self.entities {
350 if !entity.is_sorted() {
351 return false;
352 }
353 }
354
355 // Check the header is sorted.
356 self.header.is_sorted()
357 }
358}
359
360impl<'i> FromPair<'i> for OboDoc {
361 const RULE: Rule = Rule::OboDoc;
362 unsafe fn from_pair_unchecked(
363 pair: Pair<'i, Rule>,
364 cache: &Cache,
365 ) -> Result<Self, SyntaxError> {
366 let mut inner = pair.into_inner();
367
368 let mut entities = Vec::new();
369 let header = HeaderFrame::from_pair_unchecked(inner.next().unwrap(), cache)?;
370
371 let mut pair = inner.next().unwrap();
372 while pair.as_rule() != Rule::EOI {
373 entities.push(EntityFrame::from_pair_unchecked(pair, cache)?);
374 pair = inner.next().unwrap();
375 }
376 Ok(OboDoc { header, entities })
377 }
378}
379
380#[cfg(test)]
381mod tests {
382
383 use super::*;
384
385 use std::iter::FromIterator;
386 use std::str::FromStr;
387
388 use pretty_assertions::assert_eq;
389 use textwrap_macros::dedent;
390
391 #[test]
392 fn from_str() {
393 // Empty file should give empty `OboDoc`.
394 let doc = OboDoc::from_str("").unwrap();
395 self::assert_eq!(doc, Default::default());
396
397 // Empty lines should be ignored.
398 let doc = OboDoc::from_str("\n\n").unwrap();
399 self::assert_eq!(doc, Default::default());
400
401 // A simple file should parse.
402 let doc = OboDoc::from_str(dedent!(
403 r#"
404 format-version: 1.2
405
406 [Term]
407 id: TEST:001
408 "#
409 ))
410 .unwrap();
411
412 let header = HeaderFrame::from_iter(vec![HeaderClause::FormatVersion(Box::new(
413 UnquotedString::new("1.2"),
414 ))]);
415 let term = TermFrame::new(ClassIdent::from(PrefixedIdent::new("TEST", "001")));
416 self::assert_eq!(doc, OboDoc::from_iter(Some(term)).and_header(header));
417 }
418
419 #[test]
420 fn to_string() {
421 // Empty `OboDoc` should give empty string.
422 let doc = OboDoc::default();
423 self::assert_eq!(doc.to_string(), "");
424
425 // `OboDoc` with only header frame should not add newline separator.
426 let doc = OboDoc::with_header(HeaderFrame::from(vec![
427 HeaderClause::FormatVersion(Box::new(UnquotedString::new("1.2"))),
428 HeaderClause::Remark(Box::new(UnquotedString::new("this is a test"))),
429 ]));
430 self::assert_eq!(
431 doc.to_string(),
432 dedent!(
433 r#"
434 format-version: 1.2
435 remark: this is a test
436 "#
437 )
438 .trim_start_matches('\n')
439 );
440 }
441
442 #[test]
443 fn is_fully_labeled() {
444 let doc = OboDoc::from_str("[Term]\nid: TEST:001\n").unwrap();
445 assert!(!doc.is_fully_labeled());
446
447 let doc = OboDoc::from_str("[Term]\nid: TEST:001\nname: test item\n").unwrap();
448 assert!(doc.is_fully_labeled());
449
450 let doc = OboDoc::from_str(dedent!(
451 r#"
452 [Term]
453 id: TEST:001
454 name: test item
455
456 [Term]
457 id: TEST:002
458 name: test item two
459 "#
460 ))
461 .unwrap();
462 assert!(doc.is_fully_labeled());
463
464 let doc = OboDoc::from_str(dedent!(
465 r#"
466 [Term]
467 id: TEST:001
468 name: test item
469
470 [Term]
471 id: TEST:002
472 "#
473 ))
474 .unwrap();
475 assert!(!doc.is_fully_labeled());
476
477 let doc = OboDoc::from_str(dedent!(
478 r#"
479 [Term]
480 id: TEST:001
481
482 [Term]
483 id: TEST:002
484 name: test item two
485 "#
486 ))
487 .unwrap();
488 assert!(!doc.is_fully_labeled());
489 }
490}