onenote_parser/onenote/
mod.rs

1use crate::errors::{ErrorKind, Result};
2use crate::fsshttpb::packaging::OneStorePackaging;
3use crate::onenote::notebook::Notebook;
4use crate::onenote::section::{Section, SectionEntry, SectionGroup};
5use crate::onestore::parse_store;
6use crate::reader::Reader;
7use std::ffi::OsStr;
8use std::fs::File;
9use std::io::{BufReader, Read};
10use std::path::Path;
11
12pub(crate) mod content;
13pub(crate) mod embedded_file;
14pub(crate) mod iframe;
15pub(crate) mod image;
16pub(crate) mod ink;
17pub(crate) mod list;
18pub(crate) mod note_tag;
19pub(crate) mod notebook;
20pub(crate) mod outline;
21pub(crate) mod page;
22pub(crate) mod page_content;
23pub(crate) mod page_series;
24pub(crate) mod rich_text;
25pub(crate) mod section;
26pub(crate) mod table;
27
28/// The OneNote file parser.
29pub struct Parser;
30
31impl Parser {
32    /// Create a new OneNote file parser.
33    pub fn new() -> Parser {
34        Parser {}
35    }
36
37    /// Parse a OneNote notebook.
38    ///
39    /// The `path` argument must point to a `.onetoc2` file. This will parse the
40    /// table of contents of the notebook as well as all contained
41    /// sections from the folder that the table of contents file is in.
42    pub fn parse_notebook(&mut self, path: &Path) -> Result<Notebook> {
43        let file = File::open(path)?;
44        let data = Parser::read(file)?;
45        let packaging = OneStorePackaging::parse(&mut Reader::new(data.as_slice()))?;
46        let store = parse_store(&packaging)?;
47
48        if store.schema_guid() != guid!({E4DBFD38-E5C7-408B-A8A1-0E7B421E1F5F}) {
49            return Err(ErrorKind::NotATocFile {
50                file: path.to_string_lossy().to_string(),
51            }
52            .into());
53        }
54
55        let base_dir = path.parent().expect("no base dir found");
56        let sections = notebook::parse_toc(store.data_root())?
57            .iter()
58            .map(|name| {
59                let mut file = base_dir.to_path_buf();
60                file.push(name);
61
62                file
63            })
64            .filter(|p| p.exists())
65            .filter(|p| !p.ends_with("OneNote_RecycleBin"))
66            .map(|path| {
67                if path.is_file() {
68                    self.parse_section(&path).map(SectionEntry::Section)
69                } else {
70                    self.parse_section_group(&path)
71                        .map(SectionEntry::SectionGroup)
72                }
73            })
74            .collect::<Result<_>>()?;
75
76        Ok(Notebook { entries: sections })
77    }
78
79    /// Parse a OneNote section file.
80    ///
81    /// The `path` argument must point to a `.one` file that contains a
82    /// OneNote section.
83    pub fn parse_section(&mut self, path: &Path) -> Result<Section> {
84        let file = File::open(path)?;
85        let data = Parser::read(file)?;
86        let packaging = OneStorePackaging::parse(&mut Reader::new(data.as_slice()))?;
87        let store = parse_store(&packaging)?;
88
89        if store.schema_guid() != guid!({1F937CB4-B26F-445F-B9F8-17E20160E461}) {
90            return Err(ErrorKind::NotASectionFile {
91                file: path.to_string_lossy().to_string(),
92            }
93            .into());
94        }
95
96        section::parse_section(
97            store,
98            path.file_name()
99                .expect("file without file name")
100                .to_string_lossy()
101                .to_string(),
102        )
103    }
104
105    fn parse_section_group(&mut self, path: &Path) -> Result<SectionGroup> {
106        let display_name = path
107            .file_name()
108            .expect("file without file name")
109            .to_string_lossy()
110            .to_string();
111
112        for entry in path.read_dir()? {
113            let entry = entry?;
114            let is_toc = entry
115                .path()
116                .extension()
117                .map(|ext| ext == OsStr::new("onetoc2"))
118                .unwrap_or_default();
119
120            if is_toc {
121                return self
122                    .parse_notebook(&entry.path())
123                    .map(|group| SectionGroup {
124                        display_name,
125                        entries: group.entries,
126                    });
127            }
128        }
129
130        Err(ErrorKind::TocFileMissing {
131            dir: path.as_os_str().to_string_lossy().into_owned(),
132        }
133        .into())
134    }
135
136    fn read(file: File) -> Result<Vec<u8>> {
137        let size = file.metadata()?.len();
138        let mut data = Vec::with_capacity(size as usize);
139
140        let mut buf = BufReader::new(file);
141        buf.read_to_end(&mut data)?;
142
143        Ok(data)
144    }
145}
146
147impl Default for Parser {
148    fn default() -> Self {
149        Self::new()
150    }
151}