onenote_parser/onenote/
mod.rs

1use crate::errors::{ErrorKind, Result};
2use crate::fsshttpb::packaging::OneStorePackaging;
3use crate::onenote::notebook::Notebook;
4use crate::onenote::section::{Section, SectionEntry, SectionGroup};
5use crate::onestore::parse_store;
6use crate::reader::Reader;
7use std::ffi::OsStr;
8use std::fs::File;
9use std::io::{BufReader, Read};
10use std::path::Path;
11
12pub(crate) mod content;
13pub(crate) mod embedded_file;
14pub(crate) mod iframe;
15pub(crate) mod image;
16pub(crate) mod ink;
17pub(crate) mod list;
18pub(crate) mod math_inline_object;
19pub(crate) mod note_tag;
20pub(crate) mod notebook;
21pub(crate) mod outline;
22pub(crate) mod page;
23pub(crate) mod page_content;
24pub(crate) mod page_series;
25pub(crate) mod rich_text;
26pub(crate) mod section;
27pub(crate) mod table;
28
29/// The OneNote file parser.
30///
31/// Use [`Parser::parse_notebook`] to load a notebook from a `.onetoc2` file or
32/// [`Parser::parse_section`] to load a single `.one` section. These methods
33/// expect OneDrive downloads (FSSHTTP packaging) and will return an error if the
34/// input is not the expected file type.
35///
36/// # Thread safety
37///
38/// The parser is stateless and can be shared across threads.
39pub struct Parser;
40
41impl Parser {
42    /// Create a new OneNote file parser.
43    ///
44    /// The parser holds no state; reuse a single instance across multiple
45    /// parses if desired.
46    pub fn new() -> Parser {
47        Parser {}
48    }
49
50    /// Parse a OneNote notebook.
51    ///
52    /// The `path` argument must point to a `.onetoc2` file. This will parse the
53    /// table of contents of the notebook as well as all contained
54    /// sections from the folder that the table of contents file is in.
55    ///
56    /// Returns [`ErrorKind::NotATocFile`] if the file is not a notebook table of
57    /// contents.
58    pub fn parse_notebook(&self, path: &Path) -> Result<Notebook> {
59        let file = File::open(path)?;
60        let data = Parser::read(file)?;
61        let packaging = OneStorePackaging::parse(&mut Reader::new(data.as_slice()))?;
62        let store = parse_store(&packaging)?;
63
64        if store.schema_guid() != guid!("E4DBFD38-E5C7-408B-A8A1-0E7B421E1F5F") {
65            return Err(ErrorKind::NotATocFile {
66                file: path.to_string_lossy().to_string(),
67            }
68            .into());
69        }
70
71        let base_dir = path.parent().ok_or_else(|| ErrorKind::InvalidPath {
72            message: "path has no parent directory".into(),
73        })?;
74        let (entries, color) = notebook::parse_toc(store.data_root())?;
75        let sections = entries
76            .iter()
77            .map(|name| {
78                let mut file = base_dir.to_path_buf();
79                file.push(name);
80
81                file
82            })
83            .filter(|p| p.exists())
84            .filter(|p| !p.ends_with("OneNote_RecycleBin"))
85            .map(|path| {
86                if path.is_file() {
87                    self.parse_section(&path).map(SectionEntry::Section)
88                } else {
89                    self.parse_section_group(&path)
90                        .map(SectionEntry::SectionGroup)
91                }
92            })
93            .collect::<Result<_>>()?;
94
95        Ok(Notebook {
96            entries: sections,
97            color,
98        })
99    }
100
101    /// Parse a OneNote section buffer.
102    ///
103    /// The `data` argument must contain a OneNote section.
104    /// The `file_name` is used to populate section metadata and error messages.
105    ///
106    /// Returns [`ErrorKind::NotASectionFile`] if the buffer does not contain a
107    /// section file.
108    pub fn parse_section_buffer(&self, data: &[u8], file_name: &Path) -> Result<Section> {
109        let packaging = OneStorePackaging::parse(&mut Reader::new(data))?;
110        let store = parse_store(&packaging)?;
111
112        if store.schema_guid() != guid!("1F937CB4-B26F-445F-B9F8-17E20160E461") {
113            return Err(ErrorKind::NotASectionFile {
114                file: file_name.to_string_lossy().into_owned(),
115            }
116            .into());
117        }
118
119        section::parse_section(store, file_name.to_string_lossy().into_owned())
120    }
121
122    /// Parse a OneNote section file.
123    ///
124    /// The `path` argument must point to a `.one` file that contains a
125    /// OneNote section.
126    ///
127    /// Returns [`ErrorKind::NotASectionFile`] if the file does not contain a
128    /// section.
129    pub fn parse_section(&self, path: &Path) -> Result<Section> {
130        let file = File::open(path)?;
131        let data = Parser::read(file)?;
132        let packaging = OneStorePackaging::parse(&mut Reader::new(data.as_slice()))?;
133        let store = parse_store(&packaging)?;
134
135        if store.schema_guid() != guid!("1F937CB4-B26F-445F-B9F8-17E20160E461") {
136            return Err(ErrorKind::NotASectionFile {
137                file: path.to_string_lossy().to_string(),
138            }
139            .into());
140        }
141
142        section::parse_section(
143            store,
144            path.file_name()
145                .ok_or_else(|| ErrorKind::InvalidPath {
146                    message: "path has no file name".into(),
147                })?
148                .to_string_lossy()
149                .to_string(),
150        )
151    }
152
153    fn parse_section_group(&self, path: &Path) -> Result<SectionGroup> {
154        let display_name = path
155            .file_name()
156            .ok_or_else(|| ErrorKind::InvalidPath {
157                message: "path has no file name".into(),
158            })?
159            .to_string_lossy()
160            .to_string();
161
162        for entry in path.read_dir()? {
163            let entry = entry?;
164            let is_toc = entry
165                .path()
166                .extension()
167                .map(|ext| ext == OsStr::new("onetoc2"))
168                .unwrap_or_default();
169
170            if is_toc {
171                return self
172                    .parse_notebook(&entry.path())
173                    .map(|group| SectionGroup {
174                        display_name,
175                        entries: group.entries,
176                    });
177            }
178        }
179
180        Err(ErrorKind::TocFileMissing {
181            dir: path.as_os_str().to_string_lossy().into_owned(),
182        }
183        .into())
184    }
185
186    fn read(file: File) -> Result<Vec<u8>> {
187        let size = file.metadata()?.len();
188        let mut data = Vec::with_capacity(size as usize);
189
190        let mut buf = BufReader::new(file);
191        buf.read_to_end(&mut data)?;
192
193        Ok(data)
194    }
195}
196
197impl Default for Parser {
198    fn default() -> Self {
199        Self::new()
200    }
201}