codespan_preprocessed 0.8.0

Beautiful diagnostic reporting for M4 (or cpp) preprocessed text files
Documentation
use std::ops::{Range, Index};
use std::iter;
use codespan_reporting::files;
use codespan_reporting::files::{Files, SimpleFile};
use std::cmp::Ordering;
use std::io::Read;
use std::path::Path;


#[derive(Clone, Debug)]
struct LineDirective {
    line_index: usize,
    byte_index: usize,
    offset: isize,
    filename: Option<Range<usize>>
}

/// Slice of the input file.
///
/// The input file is sliced into different
/// parts corresponding to new location directive.
/// This slicing is so used as file identification for
/// `codespan_reporting`.
#[derive(Clone, Debug, PartialEq)]
pub struct FileSlice {
    name: Range<usize>,
    bytes: Range<usize>,
    lines: Range<usize>,
    offset: isize
}

/// The codemap of a preprocessed file.
#[derive(Debug)]
pub struct PreprocessedFile<Source> {
    ids: Vec<FileSlice>,
    lines: Vec<Range<usize>>,
    contents: Source
}


impl<'a, S:'a+AsRef<str>> Files<'a> for PreprocessedFile<S>
{
    type FileId = &'a FileSlice;
    type Name = &'a str;
    type Source = &'a str;

    fn name(&'a self, id: Self::FileId) -> Result<Self::Name, files::Error> {
        Ok(self.contents.as_ref().index(id.name.clone()))
    }

    fn source(&'a self, _: Self::FileId) -> Result<Self::Source, files::Error> {
        Ok(self.contents.as_ref())
    }

    fn line_index(&'a self, id: Self::FileId, byte_index: usize) -> Result<usize, files::Error>
    {
        if id.bytes.end <= byte_index {
            Ok((id.lines.end as isize -1 -id.offset) as usize)
        } else if byte_index < id.bytes.start {
            Err(files::Error::FileMissing)
        } else {
            Ok((self.lines.binary_search_by(|bytes| {
                if byte_index < bytes.start { Ordering::Greater } else if byte_index > bytes.end { Ordering::Less } else { Ordering::Equal }
            }).unwrap() as isize - id.offset) as usize)
        }
    }

    fn line_range(&'a self, id: Self::FileId, line_index: usize) -> Result<Range<usize>, files::Error>
    {
        self.lines.get((line_index as isize+id.offset) as usize).cloned()
            .ok_or(files::Error::LineTooLarge { given: line_index, max: self.lines.len() })
    }
}

impl<Source> PreprocessedFile<Source>
    where
        Source: AsRef<str>
{
    pub fn new(contents: Source) -> Self
    {
        let mut line_endings = contents
            .as_ref()
            .match_indices('\n')
            .map(|(b,_)| b )
            .collect::<Vec<_>>();

        // if the last line is not terminated with an EOL, assume it
        match line_endings.last() {
            // nothing to do,the last line will have an EOL
            Some(l) if *l == contents.as_ref().len() - 1 => { }
            // the data has no EOL at the end...
            _ => line_endings.push(contents.as_ref().len() )
        }

        let line_ranges =
            iter::once(0)
                .chain(line_endings.iter().map(|e| *e+1))
                .zip(line_endings.iter())
                .map(|(s,e)| s .. *e)
                .collect::<Vec<_>>();

        let directives =
            line_ranges.iter()
                .enumerate()
                .filter(|(_, r)| contents.as_ref()[r.start..r.end].starts_with("#line"))
                .map(|(l, r)| {
                    let str = &contents.as_ref()[r.start..r.end];
                    if let Some(sep) = str[6..].find(' ') {
                        let sep = sep + 6;
                        LineDirective {
                            line_index: l,
                            byte_index: r.start,
                            offset: l as isize + 2 - str[6..sep].parse::<isize>().unwrap(),
                            filename: Some(r.start+sep+2..r.start+str.len()-1)
                        }
                    } else {
                        LineDirective {
                            line_index: l,
                            byte_index: r.start,
                            offset: l as isize + 2 - str[6..].parse::<isize>().unwrap(),
                            filename: None
                        }
                    }
                })
                .collect::<Vec<_>>();

        let mut current = 0..0;
        let mut files = Vec::with_capacity(directives.len()+2);

        if let Some(first) = directives.first() {
            if first.line_index > 0 {
                files.push(FileSlice {
                    name: current.clone(),
                    bytes: 0..first.byte_index,
                    lines: 0..first.line_index,
                    offset: 0
                });
            }
            files.extend(directives.iter()
                .zip(directives.iter().skip(1))
                .map(|(start, end)| {
                    if let Some(filename) = start.filename.clone() {
                        current = filename;
                    }
                    FileSlice {
                        name: current.clone(),
                        bytes: line_ranges[start.line_index+1].start .. end.byte_index ,
                        lines: start.line_index+1 .. end.line_index,
                        offset: start.offset
                    }
                }));

            let last_directive = directives.last().unwrap();

            // if the file ends with a directive (which should never
            // happen when it comes from m4 or cpp), we ignore this last one
            // since it will generate out of bounds for line_ranges access
            if last_directive.line_index+1 < line_ranges.len() {
                // ok, here, we know that there is some chars behind the directive
                files.push(FileSlice {
                    name: last_directive.filename.clone().unwrap_or(current),
                    bytes: line_ranges[last_directive.line_index+1].start .. line_ranges.last().unwrap().end,
                    lines: last_directive.line_index+1 .. line_ranges.len(),
                    offset: last_directive.offset
                });
            }
        } else {
            files.push(FileSlice {
                name: current,
                bytes: 0..line_ranges.last().unwrap().end,
                lines: 0..line_ranges.len(),
                offset: 0
            })
        }

        PreprocessedFile {
            ids: files,
            lines: line_ranges,
            contents
        }
    }

    #[inline]
    pub fn source(&self) -> &str { self.contents.as_ref() }

    #[inline]
    pub fn len(&self) -> usize { self.source().len() }

    #[inline]
    pub fn is_empty(&self) -> bool { self.source().is_empty() }
}

impl PreprocessedFile<String>
{
    pub fn open<P: AsRef<Path>>(filename: P) -> Result<Self, std::io::Error>
    {
        let mut file = std::fs::File::open(&filename)?;
        let mut buf = Vec::new();
        file.read_to_end(&mut buf)?;
        // prepend '#line' directive to correctly locate diagnosis
        let contents = format!("#line 1 \"{}\"\n{}",
                               filename.as_ref().to_string_lossy(),
                               String::from_utf8(buf).expect("invalid UTF-8 characters in file"));
        Ok(PreprocessedFile::new(contents))
    }

    pub fn from_stdin() -> Result<Self, std::io::Error>
    {
        let mut buf = Vec::new();
        std::io::stdin().read_to_end(&mut buf)?;
        let contents = String::from_utf8(buf).expect("invalid UTF-8 characters on stdin");
        Ok(PreprocessedFile::new(contents))
    }
}

pub trait EasyLocation<'a>:Files<'a>
{
    fn file_id(&'a self, byte_index: usize) -> <Self as Files<'a>>::FileId;
}

impl<'a, S:'a+AsRef<str>> EasyLocation<'a> for PreprocessedFile<S>
{
    fn file_id(&'a self, byte_index: usize) -> <Self as Files<'a>>::FileId
    {
        // as ids are sorted according to the byte order of the input,
        // we could use a binary_search...
        match self.ids.binary_search_by(|x|
            if byte_index < x.bytes.start { Ordering::Greater }
            else if byte_index > x.bytes.end { Ordering::Less }
            else { Ordering::Equal }
        ) {
            Ok(i) => &self.ids[i],
            Err(i) if i < self.ids.len() => &self.ids[i],
            _ => self.ids.last().unwrap(),
        }
    }
}


impl<'a,N,S> EasyLocation<'a> for SimpleFile<N,S>
    where
        N: 'a + std::fmt::Display + Clone,
        S: 'a + AsRef<str>,
{
    fn file_id(&'a self, _: usize) -> <Self as Files<'a>>::FileId { }
}