1use std::collections::{HashMap, HashSet, hash_map::DefaultHasher};
2use std::hash::{Hash, Hasher};
3use std::io::Error as IOError;
4use std::ops::Range;
5use std::path::{Path, PathBuf};
6use std::sync::{Arc, mpsc};
7use std::{fs, mem, slice};
8
9use threadpool::ThreadPool;
10
11use crate::line_parser::{IncludePath, Line, parse_line};
12
13pub struct SearchPaths {
18 base_paths: Vec<PathBuf>,
19 quoted_paths: Vec<PathBuf>,
20}
21
22impl SearchPaths {
23 pub fn new() -> Self {
25 SearchPaths {
26 base_paths: Vec::new(),
27 quoted_paths: Vec::new(),
28 }
29 }
30
31 pub fn push_base_path<P>(&mut self, path: P)
36 where
37 P: AsRef<Path>,
38 {
39 let mut buf = PathBuf::new();
40
41 buf.push(path);
42
43 self.base_paths.push(buf);
44 }
45
46 pub fn push_quoted_path<P>(&mut self, path: P)
51 where
52 P: AsRef<Path>,
53 {
54 let mut buf = PathBuf::new();
55
56 buf.push(path);
57
58 self.quoted_paths.push(buf);
59 }
60
61 pub fn base_paths(&self) -> impl Iterator<Item = &PathBuf> {
67 self.base_paths.iter()
68 }
69
70 pub fn quoted_paths(&self) -> impl Iterator<Item = &PathBuf> {
76 self.quoted_paths.iter().chain(self.base_paths.iter())
77 }
78}
79
80#[derive(Debug)]
82pub enum Error {
83 FileNotFound(FileNotFoundError),
85
86 IO(IOError),
88
89 Parse(ParseError),
92}
93
94impl From<FileNotFoundError> for Error {
95 fn from(err: FileNotFoundError) -> Self {
96 Error::FileNotFound(err)
97 }
98}
99
100impl From<IOError> for Error {
101 fn from(err: IOError) -> Self {
102 Error::IO(err)
103 }
104}
105
106impl From<ParseError> for Error {
107 fn from(err: ParseError) -> Self {
108 Error::Parse(err)
109 }
110}
111
112#[derive(Debug)]
117pub struct FileNotFoundError {
118 included_path: PathBuf,
119 source_file: PathBuf,
120 source: String,
121 line_number: usize,
122}
123
124impl FileNotFoundError {
125 pub fn included_path(&self) -> &Path {
127 &self.included_path
128 }
129
130 pub fn source_file(&self) -> &Path {
133 &self.source_file
134 }
135
136 pub fn source(&self) -> &str {
139 &self.source
140 }
141
142 pub fn line_number(&self) -> usize {
144 self.line_number
145 }
146}
147
148#[derive(Debug)]
153pub struct ParseError {
154 message: String,
155 source_file: PathBuf,
156 source: String,
157 line_number: usize,
158}
159
160impl ParseError {
161 pub fn message(&self) -> &str {
163 &self.message
164 }
165
166 pub fn source_file(&self) -> &Path {
168 &self.source_file
169 }
170
171 pub fn source(&self) -> &str {
173 &self.source
174 }
175
176 pub fn line_number(&self) -> usize {
178 self.line_number
179 }
180}
181
182pub fn preprocess<P, S>(
197 entry_point: P,
198 search_paths: SearchPaths,
199 output_sink: S,
200) -> Result<S, Error>
201where
202 P: AsRef<Path>,
203 S: OutputSink,
204{
205 preprocess_with_source_tracker(entry_point, search_paths, output_sink, NoTrack)
206}
207
208pub fn preprocess_with_source_tracker<P, S, T>(
217 entry_point: P,
218 search_paths: SearchPaths,
219 mut output_sink: S,
220 mut source_tracker: T,
221) -> Result<S, Error>
222where
223 P: AsRef<Path>,
224 S: OutputSink,
225 T: SourceTracker,
226{
227 let parsed = Parsed::try_init(entry_point, search_paths)?;
228
229 parsed.write(&mut output_sink, &mut source_tracker);
230
231 Ok(output_sink)
232}
233
234enum LoadState {
235 Loaded(ParsedNode),
236 Pending,
237}
238
239impl LoadState {
240 fn loaded(&self) -> Option<&ParsedNode> {
241 if let LoadState::Loaded(node) = self {
242 Some(node)
243 } else {
244 None
245 }
246 }
247}
248
249struct Parsed {
250 lookup: HashMap<u64, LoadState>,
251 root_key: u64,
252}
253
254impl Parsed {
255 fn try_init<P>(entry_point: P, search_paths: SearchPaths) -> Result<Self, Error>
256 where
257 P: AsRef<Path>,
258 {
259 let mut lookup = HashMap::new();
260 let (tx, rx) = mpsc::channel();
261 let pool = ThreadPool::new(num_cpus::get());
262 let entry_path = entry_point.as_ref().canonicalize()?;
263
264 let mut hasher = DefaultHasher::new();
265
266 entry_path.hash(&mut hasher);
267
268 let root_key = hasher.finish();
269 let root_node = ParsedNode::try_parse(entry_path, &search_paths);
270
271 lookup.insert(root_key, LoadState::Pending);
272
273 tx.send(root_node).unwrap();
274
275 let search_paths = Arc::new(search_paths);
276 let mut balance = 1;
277
278 loop {
279 if balance == 0 {
280 break;
281 }
282
283 let node = rx.recv().unwrap()?;
284
285 balance -= 1;
286
287 'inner: for chunk in node.chunks() {
289 if let NodeChunk::Include(path) = chunk {
290 let mut hasher = DefaultHasher::new();
291
292 path.hash(&mut hasher);
293
294 let key = hasher.finish();
295
296 if lookup.contains_key(&key) {
297 continue 'inner;
299 }
300
301 lookup.insert(key, LoadState::Pending);
303 balance += 1;
304
305 let tx_clone = tx.clone();
306 let search_paths_clone = search_paths.clone();
307 let path_buf = path.to_path_buf();
308
309 pool.execute(move || {
310 tx_clone
311 .send(ParsedNode::try_parse(path_buf, &search_paths_clone))
312 .unwrap();
313 });
314 }
315 }
316
317 lookup.insert(node.key(), LoadState::Loaded(node));
318 }
319
320 Ok(Parsed { lookup, root_key })
321 }
322
323 fn get_by_key(&self, key: u64) -> Option<&ParsedNode> {
324 self.lookup.get(&key).and_then(|node| node.loaded())
325 }
326
327 fn get_by_path<P>(&self, path: P) -> Option<&ParsedNode>
328 where
329 P: AsRef<Path>,
330 {
331 let mut hasher = DefaultHasher::new();
332
333 path.as_ref().hash(&mut hasher);
334
335 let key = hasher.finish();
336
337 self.get_by_key(key)
338 }
339
340 fn write<S, T>(&self, output_sink: &mut S, source_tracker: &mut T)
341 where
342 S: OutputSink,
343 T: SourceTracker,
344 {
345 let mut stack = Vec::new();
346 let mut seen = HashSet::new();
347
348 let root_node = self.get_by_key(self.root_key).unwrap();
349
350 if root_node.once() {
351 seen.insert(root_node.key());
352 }
353
354 let mut current_node = root_node;
355 let mut current_chunk = 0;
356
357 loop {
358 if let Some(chunk) = current_node.get_chunk(current_chunk) {
359 match chunk {
360 NodeChunk::Text(chunk) => {
361 output_sink.sink_source_mapped(SourceMappedChunk {
362 text: chunk.text(),
363 source_path: current_node.path(),
364 source_range: chunk.byte_range(),
365 });
366
367 current_chunk += 1;
368 }
369 NodeChunk::Include(path) => {
370 let node = self.get_by_path(path).unwrap();
371
372 if node.once() && seen.contains(&node.key()) {
373 current_chunk += 1;
374 } else {
375 seen.insert(node.key());
376
377 stack.push((current_node.key(), current_chunk));
378
379 current_node = node;
380 current_chunk = 0;
381 }
382 }
383 }
384 } else {
385 if let Some((parent_key, child_chunk)) = stack.pop() {
386 output_sink.sink("\n");
388
389 current_node = self.get_by_key(parent_key).unwrap();
390 current_chunk = child_chunk + 1;
391 } else {
392 break;
393 }
394 }
395 }
396
397 for node in self.lookup.values() {
398 let node = node.loaded().unwrap();
399
400 source_tracker.track(node.path(), node.source());
401 }
402 }
403}
404
405#[derive(Debug)]
406enum NodeChunkInternal {
407 Text(Range<usize>),
408 Include(PathBuf),
409}
410
411struct TextChunk<'a> {
412 byte_range: Range<usize>,
413 text: &'a str,
414}
415
416impl<'a> TextChunk<'a> {
417 fn text(&self) -> &str {
418 &self.text
419 }
420
421 fn byte_range(&self) -> Range<usize> {
422 self.byte_range.clone()
423 }
424}
425
426enum NodeChunk<'a> {
427 Text(TextChunk<'a>),
428 Include(&'a Path),
429}
430
431struct ParsedNode {
432 path: PathBuf,
433 key: u64,
434 once: bool,
435 source: String,
436 chunk_buffer: Vec<NodeChunkInternal>,
437}
438
439impl ParsedNode {
440 fn try_parse(path: PathBuf, search_paths: &SearchPaths) -> Result<Self, Error> {
441 let source = fs::read_to_string(&path)?;
442 let source_len = source.len();
443
444 let mut remainder = source.as_str();
445 let mut line_number = 0;
446 let mut chunk_buffer = Vec::new();
447 let mut once = false;
448 let mut current_text_range = 0..0;
449
450 while remainder.len() > 0 {
451 let (new_remainder, line) = parse_line(remainder).map_err(|err| {
452 let mut buf = PathBuf::new();
453
454 buf.push(&path);
455
456 ParseError {
457 source_file: buf,
458 line_number,
459 source: source.clone(),
460 message: err.to_string(),
461 }
462 })?;
463
464 let pos = source_len - new_remainder.len();
465
466 if line == Line::Text {
467 current_text_range.end = pos;
468 } else {
469 let range = mem::replace(&mut current_text_range, pos..pos);
470
471 if range.len() > 0 {
472 chunk_buffer.push(NodeChunkInternal::Text(range))
473 }
474 }
475
476 match line {
477 Line::Include(target) => {
478 let resolved = try_resolve_include_path(
479 target,
480 (path.as_ref(), &source, line_number),
481 search_paths,
482 )?;
483
484 chunk_buffer.push(NodeChunkInternal::Include(resolved));
485 }
486 Line::PragmaOnce => {
487 once = true;
488 }
489 Line::Text => (),
490 }
491
492 remainder = new_remainder;
493 line_number += 1;
494 }
495
496 if current_text_range.len() != 0 {
497 chunk_buffer.push(NodeChunkInternal::Text(current_text_range))
498 }
499
500 let mut hasher = DefaultHasher::new();
501
502 path.hash(&mut hasher);
503
504 let key = hasher.finish();
505
506 Ok(ParsedNode {
507 path,
508 key,
509 once,
510 source,
511 chunk_buffer,
512 })
513 }
514
515 fn path(&self) -> &Path {
516 self.path.as_ref()
517 }
518
519 fn key(&self) -> u64 {
520 self.key
521 }
522
523 fn source(&self) -> &str {
524 &self.source
525 }
526
527 fn once(&self) -> bool {
528 self.once
529 }
530
531 fn get_chunk(&self, index: usize) -> Option<NodeChunk<'_>> {
532 self.chunk_buffer.get(index).map(|chunk| match chunk {
533 NodeChunkInternal::Text(range) => NodeChunk::Text(TextChunk {
534 byte_range: range.clone(),
535 text: &self.source[range.clone()],
536 }),
537 NodeChunkInternal::Include(path) => NodeChunk::Include(path.as_path()),
538 })
539 }
540
541 fn chunks(&self) -> NodeChunks<'_> {
542 let ParsedNode {
543 source,
544 chunk_buffer,
545 ..
546 } = self;
547
548 NodeChunks {
549 source,
550 chunks: chunk_buffer.iter(),
551 }
552 }
553}
554
555struct NodeChunks<'a> {
556 source: &'a String,
557 chunks: slice::Iter<'a, NodeChunkInternal>,
558}
559
560impl<'a> Iterator for NodeChunks<'a> {
561 type Item = NodeChunk<'a>;
562
563 fn next(&mut self) -> Option<Self::Item> {
564 let NodeChunks { source, chunks } = self;
565
566 if let Some(chunk) = chunks.next() {
567 let chunk = match chunk {
568 NodeChunkInternal::Text(range) => NodeChunk::Text(TextChunk {
569 byte_range: range.clone(),
570 text: &source[range.clone()],
571 }),
572 NodeChunkInternal::Include(path) => NodeChunk::Include(path),
573 };
574
575 Some(chunk)
576 } else {
577 None
578 }
579 }
580}
581
582pub struct SourceMappedChunk<'a> {
587 text: &'a str,
588 source_path: &'a Path,
589 source_range: Range<usize>,
590}
591
592impl<'a> SourceMappedChunk<'a> {
593 pub fn text(&self) -> &str {
595 &self.text
596 }
597
598 pub fn source_path(&self) -> &Path {
600 &self.source_path
601 }
602
603 pub fn source_range(&self) -> Range<usize> {
605 self.source_range.clone()
606 }
607}
608
609pub trait OutputSink {
625 fn sink(&mut self, chunk: &str);
627
628 fn sink_source_mapped(&mut self, source_mapped_chunk: SourceMappedChunk);
630}
631
632impl OutputSink for String {
633 fn sink(&mut self, chunk: &str) {
634 self.push_str(chunk);
635 }
636
637 fn sink_source_mapped(&mut self, source_mapped_chunk: SourceMappedChunk) {
638 self.push_str(source_mapped_chunk.text)
639 }
640}
641
642pub trait SourceTracker {
650 fn track(&mut self, path: &Path, source: &str);
653}
654
655impl<T> SourceTracker for &'_ mut T
656where
657 T: SourceTracker,
658{
659 fn track(&mut self, path: &Path, source: &str) {
660 <T as SourceTracker>::track(self, path, source)
661 }
662}
663
664struct NoTrack;
665
666impl SourceTracker for NoTrack {
667 fn track(&mut self, _path: &Path, _source: &str) {}
668}
669
670fn try_resolve_include_path(
671 include_path: IncludePath,
672 included_from: (&Path, &str, usize),
673 search_paths: &SearchPaths,
674) -> Result<PathBuf, Error> {
675 let mut resolved = None;
676
677 let path = match include_path {
678 IncludePath::Angle(path) => {
679 for search_path in search_paths.base_paths() {
680 let join = search_path.join(path);
681
682 if join.is_file() {
683 resolved = Some(join);
684
685 break;
686 }
687 }
688
689 path
690 }
691 IncludePath::Quote(path) => {
692 let join = included_from.0.parent().unwrap().join(path);
693
694 if join.is_file() {
695 resolved = Some(join);
696 } else {
697 for search_path in search_paths.quoted_paths() {
698 let join = search_path.join(path);
699
700 if join.is_file() {
701 resolved = Some(join);
702
703 break;
704 }
705 }
706 }
707
708 path
709 }
710 };
711
712 if let Some(resolved) = resolved {
713 Ok(resolved.canonicalize()?)
714 } else {
715 Err(FileNotFoundError {
716 included_path: path.to_path_buf(),
717 source_file: included_from.0.to_path_buf(),
718 source: included_from.1.to_string(),
719 line_number: included_from.2,
720 }
721 .into())
722 }
723}