extern crate petgraph;
extern crate ptree;
#[macro_use]
extern crate lazy_static;
extern crate base58;
extern crate regex;
extern crate sha2;
use encoding::label::encoding_from_whatwg_label;
use encoding::DecoderTrap;
use normalize_line_endings::normalized;
use petgraph::algo::is_cyclic_directed;
use petgraph::dot::Dot;
use petgraph::prelude::*;
use petgraph::visit::Walker;
use ptree::graph::print_graph;
use regex::Regex;
use std::fmt;
use std::fs::File;
use std::io;
use std::io::Read;
use std::iter::FromIterator;
use std::path::Path;
use std::path::PathBuf;
pub fn decode_data_as_utf8(byte_str: &[u8], normalize_endings: bool) -> String {
let result = chardet::detect(&byte_str);
let encoding = chardet::charset2encoding(&result.0);
let coder = encoding_from_whatwg_label(encoding);
if coder.is_some() {
let utf8_text = coder
.unwrap()
.decode(&byte_str, DecoderTrap::Ignore)
.expect("Error decoding utf-8 data");
if normalize_endings {
let normalized_text = String::from_iter(normalized(utf8_text.chars()));
assert!(!normalized_text.contains('\r'));
normalized_text
} else {
utf8_text
}
} else {
String::new()
}
}
#[derive(Default, Debug, Clone, PartialEq, Eq)]
pub struct Include {
pub include_path: PathBuf,
pub range_start: usize,
pub range_end: usize,
pub relative_path: bool,
}
#[derive(Default, Debug, Clone, PartialEq, Eq)]
pub struct IncludeNode {
pub working_dir: PathBuf,
pub include_file: PathBuf,
pub source_identity: Option<String>,
pub patched_identity: Option<String>,
pub flattened: String,
}
impl fmt::Display for IncludeNodeWeight {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let source_identity = match self.node.source_identity {
Some(ref identity) => &identity,
None => "INVALID",
};
let patched_identity = match self.node.patched_identity {
Some(ref identity) => &identity,
None => "INVALID",
};
write!(
f,
"(s:[{}] p:[{}] f:[{:#?}])",
&source_identity,
&patched_identity,
&self.node.include_file.file_name().unwrap_or_default()
)
}
}
impl IncludeNode {
pub fn new(working_dir: &Path, include_file: &Path) -> Self {
IncludeNode {
working_dir: working_dir.into(),
include_file: include_file.into(),
source_identity: None,
patched_identity: None,
flattened: String::new(),
}
}
pub fn data_as_string(&self, normalize_endings: bool) -> String {
let data = read_file(&self.include_file);
if let Ok(ref data) = data {
decode_data_as_utf8(&data, normalize_endings)
} else {
String::new()
}
}
}
#[derive(Default, Debug, Clone, PartialEq, Eq)]
pub struct IncludeNodeWeight {
pub node: IncludeNode,
pub includes: Vec<Include>,
pub(crate) is_root: bool,
}
pub type IncludeNodeLevel = u32;
pub type IncludeNodeGraph = Graph<IncludeNodeWeight, IncludeNodeLevel>;
pub fn compute_identity(data: &[u8]) -> String {
use base58::ToBase58;
use sha2::{Digest, Sha256};
let mut hasher = Sha256::default();
hasher.input(data);
hasher.result().to_base58()
}
pub fn traverse_build(
mut graph: &mut IncludeNodeGraph,
working_dir: &Path,
include_file: &Path,
level: IncludeNodeLevel,
normalize_endings: bool,
) -> NodeIndex {
let include_dir = include_file.parent().unwrap();
let include_node = IncludeNode::new(&working_dir, &include_file);
let include_text = include_node.data_as_string(normalize_endings);
let includes = resolve_includes(&include_text, &working_dir, &include_dir);
let (graph_node, outgoing_nodes) = if graph.node_count() == 0 {
let graph_node = graph.add_node(IncludeNodeWeight {
node: include_node,
includes: includes.clone(),
is_root: true,
});
let outgoing_nodes = includes
.iter()
.map(|ref include| {
traverse_build(
&mut graph,
&working_dir,
&include.include_path,
level + 1,
normalize_endings,
)
})
.collect::<Vec<NodeIndex>>();
(graph_node, outgoing_nodes)
} else {
let outgoing_nodes = includes
.iter()
.map(|ref include| {
traverse_build(
&mut graph,
&working_dir,
&include.include_path,
level + 1,
normalize_endings,
)
})
.collect::<Vec<NodeIndex>>();
let graph_node = graph.add_node(IncludeNodeWeight {
node: include_node,
includes,
is_root: false,
});
(graph_node, outgoing_nodes)
};
outgoing_nodes.iter().for_each(|outgoing_node| {
let edge = graph.add_edge(graph_node, *outgoing_node, level);
assert!(!is_cyclic_directed(&*graph));
if is_cyclic_directed(&*graph) {
graph.remove_edge(edge);
}
});
graph_node
}
pub fn traverse_patch(graph: &mut IncludeNodeGraph, root_node: NodeIndex, normalize_endings: bool) {
let dfs_nodes = DfsPostOrder::new(&*graph, root_node)
.iter(&*graph)
.collect::<Vec<NodeIndex>>();
dfs_nodes.iter().for_each(|node_index| {
let neighbors = graph
.neighbors_directed(*node_index, Direction::Outgoing)
.map(|neighbor| {
let neighbor_weight = &graph[neighbor];
assert!(neighbor_weight.node.source_identity.is_some());
assert!(neighbor_weight.node.patched_identity.is_some());
(
neighbor_weight.node.include_file.clone(),
neighbor_weight
.node
.patched_identity
.as_ref()
.unwrap()
.clone(),
)
})
.collect::<Vec<(PathBuf, String)>>();
if let Some(ref mut node_weight) = graph.node_weight_mut(*node_index) {
let mut node = &mut node_weight.node;
let mut include_text = node.data_as_string(normalize_endings);
node.source_identity = Some(compute_identity(&include_text.as_bytes()));
for (ref include_file, ref patched_identity) in neighbors {
if let Some(ref include) = node_weight
.includes
.iter()
.find(|&include| &include.include_path == include_file)
{
let patch = format!("#include \"{}\"", patched_identity);
include_text.replace_range(include.range_start..include.range_end, &patch);
}
}
node.patched_identity = Some(compute_identity(&include_text.as_bytes()));
node.flattened = include_text;
}
});
}
pub fn traverse_flatten(
graph: &mut IncludeNodeGraph,
root_node: NodeIndex,
normalize_endings: bool,
) {
let dfs_nodes = DfsPostOrder::new(&*graph, root_node)
.iter(&*graph)
.collect::<Vec<NodeIndex>>();
dfs_nodes.iter().for_each(|node_index| {
let neighbors = graph
.neighbors_directed(*node_index, Direction::Outgoing)
.map(|neighbor| {
let neighbor_weight = &graph[neighbor];
(
neighbor_weight.node.include_file.clone(),
neighbor_weight.node.flattened.to_owned(),
)
})
.collect::<Vec<(PathBuf, String)>>();
if let Some(ref mut node_weight) = graph.node_weight_mut(*node_index) {
let mut node = &mut node_weight.node;
let mut include_text = node.data_as_string(normalize_endings);
node.source_identity = Some(compute_identity(&include_text.as_bytes()));
for (ref include_file, ref flattened) in neighbors {
if let Some(ref include) = node_weight
.includes
.iter()
.find(|&include| &include.include_path == include_file)
{
let patch = format!(
"// EMBED-START - {:?}\n{}\n// EMBED-FINISH - {:?}",
&include_file, &flattened, &include_file
);
include_text.replace_range(include.range_start..include.range_end, &patch);
}
}
node.patched_identity = Some(compute_identity(&include_text.as_bytes()));
node.flattened = include_text;
}
});
}
pub fn path_exists<P: AsRef<Path>>(path: P) -> bool {
std::fs::metadata(path.as_ref()).is_ok()
}
pub fn path_to_string(path: &Path) -> Option<String> {
let path_os_str = path.as_os_str();
if let Some(path_str) = path_os_str.to_str() {
Some(path_str.to_string())
} else {
None
}
}
pub fn path_strip_base(working_dir: &Path, include_file: &Path) -> PathBuf {
if let Ok(ref prefix) = working_dir.canonicalize() {
if let Ok(ref path) = include_file.strip_prefix(&prefix) {
path.to_path_buf()
} else {
include_file.to_path_buf()
}
} else {
include_file.to_path_buf()
}
}
pub fn read_file<P: AsRef<Path>>(path: P) -> io::Result<Vec<u8>> {
let mut file = File::open(path)?;
let meta = file.metadata()?;
let size = meta.len() as usize;
let mut data = vec![0; size];
file.read_exact(&mut data)?;
Ok(data)
}
pub fn parse_includes(input: &str) -> Vec<Include> {
lazy_static! {
static ref ABSOLUTE_PATH_REGEX: Regex = Regex::new(r#"(?m)^*\#\s*include\s*<([^<>]+)>"#)
.expect("failed to compile absolute include path regex");
}
lazy_static! {
static ref RELATIVE_PATH_REGEX: Regex = Regex::new(r#"(?m)^*\#\s*include\s*"([^"]+)""#)
.expect("failed to compile relative include path regex");
}
let mut references: Vec<Include> = Vec::with_capacity(8);
let absolute_results = ABSOLUTE_PATH_REGEX.find_iter(input);
for absolute_result in absolute_results {
let range_start = absolute_result.start();
let range_end = absolute_result.end();
let range_text = &input[range_start..range_end];
let range_caps = ABSOLUTE_PATH_REGEX.captures(range_text).unwrap();
let include_path = range_caps.get(1).map_or("", |m| m.as_str());
if !include_path.is_empty() {
references.push(Include {
include_path: Path::new(include_path).to_path_buf(),
range_start,
range_end,
relative_path: false,
});
}
}
let relative_results = RELATIVE_PATH_REGEX.find_iter(input);
for relative_result in relative_results {
let range_start = relative_result.start();
let range_end = relative_result.end();
let range_text = &input[range_start..range_end];
let range_text = range_text.trim().trim_matches('\n');
let range_caps = RELATIVE_PATH_REGEX.captures(range_text).unwrap();
let include_path = range_caps.get(1).map_or("", |m| m.as_str());
if !include_path.is_empty() {
references.push(Include {
include_path: Path::new(include_path).to_path_buf(),
range_start,
range_end,
relative_path: true,
});
}
}
references
}
pub fn resolve_includes(text: &str, working_dir: &Path, include_dir: &Path) -> Vec<Include> {
let mut includes = parse_includes(&text);
for include in &mut includes {
let parent_path = if include.relative_path {
include_dir
} else {
working_dir
};
let full_path = parent_path.join(&include.include_path);
if let Ok(ref canonicalized) = &full_path.canonicalize() {
include.include_path = canonicalized.to_path_buf();
}
}
includes.retain(|include| {
let include_path = Path::new(&include.include_path);
let exists = path_exists(&include_path);
if !exists {
println!("Include path is invalid: {:?}", include_path);
}
exists
});
includes.sort_by(|a, b| a.range_start.cmp(&b.range_start));
includes
}
pub fn graph_to_stdout(graph: &IncludeNodeGraph, root_node: NodeIndex) -> std::io::Result<()> {
print_graph(&graph, root_node)
}
pub fn graph_to_dot(graph: &IncludeNodeGraph) -> String {
Dot::new(&graph).to_string()
}
pub fn graph_to_node_vec(graph: &IncludeNodeGraph) -> Vec<IncludeNode> {
graph
.raw_nodes()
.iter()
.map(|node| node.weight.node.clone())
.collect::<Vec<IncludeNode>>()
}
pub fn get_root_node(graph: &IncludeNodeGraph) -> Option<IncludeNode> {
if let Some(ref node) = graph.raw_nodes().iter().find(|&node| node.weight.is_root) {
Some(node.weight.node.clone())
} else {
None
}
}