onenote_parser/onenote/
mod.rs1use crate::errors::{ErrorKind, Result};
2use crate::fsshttpb::packaging::OneStorePackaging;
3use crate::onenote::notebook::Notebook;
4use crate::onenote::section::{Section, SectionEntry, SectionGroup};
5use crate::onestore::parse_store;
6use crate::reader::Reader;
7use std::ffi::OsStr;
8use std::fs::File;
9use std::io::{BufReader, Read};
10use std::path::Path;
11
12pub(crate) mod content;
13pub(crate) mod embedded_file;
14pub(crate) mod iframe;
15pub(crate) mod image;
16pub(crate) mod ink;
17pub(crate) mod list;
18pub(crate) mod note_tag;
19pub(crate) mod notebook;
20pub(crate) mod outline;
21pub(crate) mod page;
22pub(crate) mod page_content;
23pub(crate) mod page_series;
24pub(crate) mod rich_text;
25pub(crate) mod section;
26pub(crate) mod table;
27
28pub struct Parser;
30
31impl Parser {
32 pub fn new() -> Parser {
34 Parser {}
35 }
36
37 pub fn parse_notebook(&mut self, path: &Path) -> Result<Notebook> {
43 let file = File::open(path)?;
44 let data = Parser::read(file)?;
45 let packaging = OneStorePackaging::parse(&mut Reader::new(data.as_slice()))?;
46 let store = parse_store(&packaging)?;
47
48 if store.schema_guid() != guid!({E4DBFD38-E5C7-408B-A8A1-0E7B421E1F5F}) {
49 return Err(ErrorKind::NotATocFile {
50 file: path.to_string_lossy().to_string(),
51 }
52 .into());
53 }
54
55 let base_dir = path.parent().expect("no base dir found");
56 let sections = notebook::parse_toc(store.data_root())?
57 .iter()
58 .map(|name| {
59 let mut file = base_dir.to_path_buf();
60 file.push(name);
61
62 file
63 })
64 .filter(|p| p.exists())
65 .filter(|p| !p.ends_with("OneNote_RecycleBin"))
66 .map(|path| {
67 if path.is_file() {
68 self.parse_section(&path).map(SectionEntry::Section)
69 } else {
70 self.parse_section_group(&path)
71 .map(SectionEntry::SectionGroup)
72 }
73 })
74 .collect::<Result<_>>()?;
75
76 Ok(Notebook { entries: sections })
77 }
78
79 pub fn parse_section(&mut self, path: &Path) -> Result<Section> {
84 let file = File::open(path)?;
85 let data = Parser::read(file)?;
86 let packaging = OneStorePackaging::parse(&mut Reader::new(data.as_slice()))?;
87 let store = parse_store(&packaging)?;
88
89 if store.schema_guid() != guid!({1F937CB4-B26F-445F-B9F8-17E20160E461}) {
90 return Err(ErrorKind::NotASectionFile {
91 file: path.to_string_lossy().to_string(),
92 }
93 .into());
94 }
95
96 section::parse_section(
97 store,
98 path.file_name()
99 .expect("file without file name")
100 .to_string_lossy()
101 .to_string(),
102 )
103 }
104
105 fn parse_section_group(&mut self, path: &Path) -> Result<SectionGroup> {
106 let display_name = path
107 .file_name()
108 .expect("file without file name")
109 .to_string_lossy()
110 .to_string();
111
112 for entry in path.read_dir()? {
113 let entry = entry?;
114 let is_toc = entry
115 .path()
116 .extension()
117 .map(|ext| ext == OsStr::new("onetoc2"))
118 .unwrap_or_default();
119
120 if is_toc {
121 return self
122 .parse_notebook(&entry.path())
123 .map(|group| SectionGroup {
124 display_name,
125 entries: group.entries,
126 });
127 }
128 }
129
130 Err(ErrorKind::TocFileMissing {
131 dir: path.as_os_str().to_string_lossy().into_owned(),
132 }
133 .into())
134 }
135
136 fn read(file: File) -> Result<Vec<u8>> {
137 let size = file.metadata()?.len();
138 let mut data = Vec::with_capacity(size as usize);
139
140 let mut buf = BufReader::new(file);
141 buf.read_to_end(&mut data)?;
142
143 Ok(data)
144 }
145}
146
147impl Default for Parser {
148 fn default() -> Self {
149 Self::new()
150 }
151}