1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72

mod content_match;
mod content_search_result;
mod magic_numbers;
mod extensions;
mod needle;

pub use {
    content_match::ContentMatch,
    content_search_result::ContentSearchResult,
    needle::Needle,
    std::io::{ BufRead, BufReader},
};

use {
    memmap::Mmap,
    std::{
        fs::File,
        io,
        path::Path,
    },
};

pub const MAX_FILE_SIZE: usize = 10 * 1024 * 1024;

pub fn get_mmap<P: AsRef<Path>>(hay_path: P) -> io::Result<Mmap> {
    let file = File::open(hay_path.as_ref())?;
    let hay = unsafe { Mmap::map(&file)? };
    Ok(hay)
}

/// return the memmap to the file except if it was determined
/// that the file is binary (from its extension, size, or first bytes)
pub fn get_mmap_if_not_binary<P: AsRef<Path>>(hay_path: P) -> io::Result<Option<Mmap>> {
    if let Some(ext) = hay_path.as_ref().extension().and_then(|s| s.to_str()) {
        if extensions::is_known_binary(&ext) {
            return Ok(None);
        }
    }
    let hay = get_mmap(&hay_path)?;
    if hay.len() > MAX_FILE_SIZE || magic_numbers::is_known_binary(&hay) {
        return Ok(None);
    }
    Ok(Some(hay))
}

/// return false when the file looks suitable for searching as text.
///
/// This function is quite slow as it creates a memmap just to check
/// a few bytes. If the memmap can be used, prefer `get_mmap_if_not_binary`
pub fn is_path_binary<P: AsRef<Path>>(path: P) -> bool {
    match get_mmap_if_not_binary(path) {
        Ok(Some(_)) => false,
        _ => true,
    }
}

pub fn line_count_at_pos<P: AsRef<Path>>(path: P, pos: usize) -> io::Result<usize> {
    let mut reader = BufReader::new(File::open(path)?);
    let mut line = String::new();
    let mut line_count = 1;
    let mut bytes_count = 0;
    while reader.read_line(&mut line)? > 0 {
        bytes_count += line.len();
        if bytes_count >= pos {
            return Ok(line_count);
        }
        line_count += 1;
        line.clear();
    }
    Err(io::Error::new(io::ErrorKind::UnexpectedEof, "too short".to_string()))
}