blades/
sources.rs

1// Blades  Copyright (C) 2021 Maroš Grego
2//
3// This file is part of Blades. This program comes with ABSOLUTELY NO WARRANTY;
4// This is free software, and you are welcome to redistribute it under the
5// conditions of the GNU General Public License version 3.0.
6//
7// You should have received a copy of the GNU General Public License
8// along with Blades.  If not, see <http://www.gnu.org/licenses/>
9use crate::page::Page;
10
11use std::ffi::OsStr;
12use std::fs::{read_dir, File};
13use std::io::{self, Read};
14use std::ops::Range;
15use std::path::{is_separator, PathBuf};
16use std::time::SystemTime;
17
18/// A structure that can parse Page from binary data.
19/// Is typically a deserializer or an enum of deserializers.
20pub trait Parser: Default + Sized {
21    /// The error that can happen during parsing.
22    type Error: std::error::Error;
23
24    /// The kind of parser that should be used, based on the file extension.
25    fn from_extension(_extension: &OsStr) -> Option<Self> {
26        Some(Self::default())
27    }
28
29    /// Parse the binary data into a Page.
30    fn parse<'a>(&self, data: &'a [u8]) -> Result<Page<'a>, Self::Error>;
31}
32
33/// Data about where the source of a one particular file is located
34pub struct Source<P: Parser> {
35    /// Range in the slice of data
36    pub(crate) source: Range<usize>,
37    /// Range in the slice of data
38    pub(crate) path: Range<usize>,
39    /// Range in the slice of sources
40    pub(crate) pages: Range<usize>,
41    /// Range in the slice of sources
42    pub(crate) subsections: Range<usize>,
43    pub(crate) is_section: bool,
44    pub(crate) parent: usize,
45    pub(crate) date: Option<SystemTime>,
46    pub(crate) to_load: Option<PathBuf>,
47    pub(crate) format: P,
48}
49
50/// All of the site source files
51pub struct Sources<P: Parser> {
52    /// Binary data read of all the files
53    pub(crate) data: Vec<u8>,
54    /// Info about where the particular files are loaded
55    sources: Vec<Source<P>>,
56}
57
58impl<P: Parser> Source<P> {
59    #[inline]
60    fn new(
61        path: Range<usize>,
62        src: Range<usize>,
63        parent: usize,
64        date: Option<SystemTime>,
65        format: P,
66    ) -> Self {
67        Self {
68            source: src,
69            path,
70            pages: 0..0,
71            subsections: 0..0,
72            is_section: false,
73            parent,
74            date,
75            to_load: None,
76            format,
77        }
78    }
79
80    /// Create a placeholder source, not referencing any data.
81    #[inline]
82    fn empty(section: PathBuf, parent: usize) -> Self {
83        Self {
84            source: 0..0,
85            path: 0..0,
86            pages: 0..0,
87            subsections: 0..0,
88            is_section: true,
89            parent,
90            date: None,
91            to_load: Some(section),
92            format: P::default(),
93        }
94    }
95}
96
97impl<P: Parser> Sources<P> {
98    /// Add all the sources from the current directory to `self`.
99    #[inline]
100    fn step(
101        &mut self,
102        index: usize,
103        path: PathBuf,
104        dirs: &mut Vec<PathBuf>,
105        content_dir: &str,
106    ) -> Result<(), io::Error> {
107        let start = self.sources.len();
108        let mut index_file = None;
109        for (path, date, format) in read_dir(&path)?
110            .filter_map(Result::ok)
111            .filter(|entry| {
112                entry
113                    .file_type()
114                    .map(|ft| {
115                        if ft.is_dir() {
116                            dirs.push(entry.path());
117                            false
118                        } else {
119                            ft.is_file()
120                        }
121                    })
122                    .unwrap_or(false)
123            })
124            .map(|entry| {
125                let date = entry.metadata().and_then(|m| m.created()).ok();
126                (entry.path(), date)
127            })
128            .filter_map(|(path, date)| {
129                let ext = path.extension().unwrap_or_default();
130                let format = P::from_extension(ext)?;
131                if path.file_stem()? == "index" {
132                    index_file = Some((path, date, format));
133                    return None;
134                };
135                Some((path, date, format))
136            })
137        {
138            let start = self.data.len();
139            let read = File::open(&path)?.read_to_end(&mut self.data)?;
140            let mid = start + read;
141            let path = path.to_string_lossy();
142            let p = path.strip_prefix(content_dir).unwrap_or(&path);
143            let p = p.strip_prefix(is_separator).unwrap_or(p);
144            let ext_start = p.rfind('.').unwrap_or(p.len());
145            self.data.extend_from_slice(p[..ext_start].as_ref());
146            let end = self.data.len();
147            self.sources
148                .push(Source::new(mid..end, start..mid, index, date, format));
149        }
150        let end = self.sources.len();
151
152        for dir in dirs.drain(..) {
153            self.sources.push(Source::empty(dir, index));
154        }
155        let len = self.sources.len();
156
157        let source_start = self.data.len();
158        let read = if let Some((path, date, format)) = index_file {
159            self.sources[index].date = date;
160            self.sources[index].format = format;
161            File::open(path)?.read_to_end(&mut self.data)?
162        } else {
163            0
164        };
165        let mid = source_start + read;
166
167        let path = path.to_string_lossy();
168        let p = path.strip_prefix(content_dir).unwrap_or(&path);
169        let p = p.strip_prefix(is_separator).unwrap_or(p);
170        self.data.extend_from_slice(p.as_ref());
171        let source_end = self.data.len();
172
173        self.sources[index].path = mid..source_end;
174        self.sources[index].source = source_start..mid;
175        self.sources[index].pages = start..end;
176        if len > end {
177            self.sources[index].subsections = end..len;
178        }
179
180        Ok(())
181    }
182
183    /// Load all the sources from the directory
184    pub fn load(dir: &str) -> Result<Self, io::Error> {
185        let mut sources = Self {
186            data: Vec::with_capacity(65536),
187            sources: Vec::with_capacity(64),
188        };
189        sources.sources.push(Source::empty(dir.into(), 0));
190
191        let mut dirs_buffer = Vec::new();
192        let mut i = 0;
193        // Check all the sources whether they contain something more to load.
194        while i < sources.sources.len() {
195            if let Some(path) = sources.sources[i].to_load.take() {
196                sources.step(i, path, &mut dirs_buffer, dir)?;
197            }
198            i += 1;
199        }
200
201        Ok(sources)
202    }
203
204    /// Get a reference of the inner list of sources.
205    pub fn sources(&self) -> &[Source<P>] {
206        &self.sources
207    }
208}