use std::{
path::{Path, PathBuf},
str::FromStr,
};
#[cfg(feature = "network")]
use std::{fs::File, io};
use url::Url;
use crate::{
Options, Preprocessor, SafeMode,
error::{Error, Positioning, SourceLocation},
model::{HEADER, LeveloffsetRange, Position, SourceRange, substitute},
};
use super::tag::{DELIMITERS, Filter as TagFilter, Name as TagName, apply_tag_filters};
#[derive(Debug)]
pub(crate) struct Include {
file_parent: PathBuf,
target: Target,
level_offset: Option<isize>,
line_range: Vec<LinesRange>,
tags: Vec<TagName>,
indent: Option<usize>,
encoding: Option<String>,
opts: Vec<String>,
options: Options,
line_number: usize,
current_offset: usize,
current_file: Option<PathBuf>,
}
#[derive(Debug)]
enum LinesRange {
Single(usize),
Range(usize, isize),
}
#[derive(Debug)]
pub(crate) enum Target {
Path(PathBuf),
Url(Url),
}
#[derive(Debug, Clone, Copy)]
struct LocationContext<'a> {
line_number: usize,
current_offset: usize,
current_file: Option<&'a Path>,
}
peg::parser! {
grammar include_parser(
path: &std::path::Path,
options: &Options,
location: LocationContext<'_>
) for str {
pub(crate) rule include() -> Result<Include, Error>
= "include::" target:target() "[" attrs:attributes()? "]" {
let target_raw = substitute(&target, HEADER, &options.document_attributes);
let target =
if target_raw.starts_with("http://") || target_raw.starts_with("https://") {
Target::Url(Url::parse(&target_raw)?)
} else {
Target::Path(PathBuf::from(target_raw))
};
let mut include = Include {
file_parent: path.to_path_buf(),
target,
level_offset: None,
line_range: Vec::new(),
tags: Vec::new(),
indent: None,
encoding: None,
opts: Vec::new(),
options: options.clone(),
line_number: location.line_number,
current_offset: location.current_offset,
current_file: location.current_file.map(Path::to_path_buf),
};
if let Some(attrs) = attrs {
include.parse_attributes(attrs)?;
}
Ok(include)
}
rule target() -> String
= t:$((!['[' | ' ' | '\t'] [_])+) {
t.to_string()
}
rule attributes() -> Vec<(String, String)>
= pair:attribute_pair() pairs:("," p:attribute_pair() { p })* {
let mut attrs = vec![pair];
attrs.extend(pairs);
attrs
}
rule attribute_pair() -> (String, String)
= k:attribute_key() "=" v:attribute_value() {
(k, v)
}
rule attribute_key() -> String
= k:$("leveloffset" / "lines" / "tags" / "tag" / "indent" / "encoding" / "opts") {
k.to_string()
}
rule attribute_value() -> String
= "\"" v:$((!['"'] [_])*) "\"" { v.to_string() }
/ v:$((![','] ![']'] [_])*) { v.to_string() }
}
}
impl FromStr for LinesRange {
type Err = Error;
fn from_str(line_range: &str) -> Result<Self, Self::Err> {
Self::from_str_with_location(line_range, None)
}
}
impl LinesRange {
fn create_error(line_range: &str, location: Option<(usize, usize, Option<&Path>)>) -> Error {
let (line_number, _current_offset, current_file) = location.unwrap_or((1, 0, None));
Error::InvalidLineRange(
Box::new(SourceLocation {
file: current_file.map(Path::to_path_buf),
positioning: Positioning::Position(Position {
line: line_number,
column: 1,
}),
}),
line_range.to_string(),
)
}
fn from_str_with_location(
line_range: &str,
location: Option<(usize, usize, Option<&Path>)>,
) -> Result<Self, Error> {
if line_range.contains("..") {
let mut parts = line_range.split("..");
let start = parts
.next()
.ok_or_else(|| Self::create_error(line_range, location))?
.parse()
.map_err(|_| Self::create_error(line_range, location))?;
let end = parts
.next()
.ok_or_else(|| Self::create_error(line_range, location))?
.parse()
.map_err(|_| Self::create_error(line_range, location))?;
Ok(LinesRange::Range(start, end))
} else {
Ok(LinesRange::Single(line_range.parse().map_err(|e| {
tracing::error!(?line_range, ?e, "Failed to parse line range");
Self::create_error(line_range, location)
})?))
}
}
fn parse(
value: &str,
line_number: usize,
current_offset: usize,
current_file: Option<&Path>,
) -> Result<Vec<Self>, Error> {
let location = Some((line_number, current_offset, current_file));
let separator = if value.contains(';') {
';'
} else if value.contains(',') {
','
} else {
return Ok(vec![Self::from_str_with_location(value, location)?]);
};
value
.split(separator)
.map(|part| Self::from_str_with_location(part, location))
.collect()
}
}
#[derive(Debug)]
pub(crate) struct IncludeResult {
pub(crate) lines: Vec<String>,
pub(crate) effective_leveloffset: Option<isize>,
pub(crate) nested_leveloffset_ranges: Vec<LeveloffsetRange>,
pub(crate) file: Option<PathBuf>,
pub(crate) nested_source_ranges: Vec<SourceRange>,
}
impl Include {
fn parse_attributes(&mut self, attributes: Vec<(String, String)>) -> Result<(), Error> {
for (key, value) in attributes {
match key.as_ref() {
"leveloffset" => {
self.level_offset = Some(value.parse().map_err(|_| {
Error::InvalidLevelOffset(
Box::new(SourceLocation {
file: self.current_file.clone(),
positioning: Positioning::Position(Position {
line: self.line_number,
column: 1,
}),
}),
value.clone(),
)
})?);
}
"lines" => {
self.line_range.extend(LinesRange::parse(
&value,
self.line_number,
self.current_offset,
self.current_file.as_deref(),
)?);
}
"tag" => self.tags.push(TagName::from(value)),
"tags" => {
self.tags.extend(value.split(DELIMITERS).map(TagName::from));
}
"indent" => {
self.indent = Some(value.parse().map_err(|_| {
Error::InvalidIndent(
Box::new(SourceLocation {
file: self.current_file.clone(),
positioning: Positioning::Position(Position {
line: self.line_number,
column: 1,
}),
}),
value.clone(),
)
})?);
}
"encoding" => {
self.encoding = Some(value.clone());
}
"opts" => {
self.opts.extend(value.split(',').map(str::to_string));
}
unknown => {
tracing::error!(?unknown, "unknown attribute key in include directive");
return Err(Error::InvalidIncludeDirective(
Box::new(SourceLocation {
file: self.current_file.clone(),
positioning: Positioning::Position(Position {
line: self.line_number,
column: 1,
}),
}),
unknown.to_string(),
));
}
}
}
Ok(())
}
pub(crate) fn parse(
file_parent: &Path,
line: &str,
line_number: usize,
line_start_offset: usize,
current_file: Option<&Path>,
options: &Options,
) -> Result<Self, Error> {
let location = LocationContext {
line_number,
current_offset: line_start_offset,
current_file,
};
include_parser::include(line, file_parent, options, location).map_err(|e| {
tracing::error!(?line, error=?e, "failed to parse include directive");
let location = e.location;
Error::Parse(
Box::new(crate::SourceLocation {
file: current_file.map(Path::to_path_buf),
positioning: crate::Positioning::Position(Position {
line: line_number,
column: location.column,
}),
}),
e.expected.to_string(),
)
})?
}
fn resolve_target_path(&self) -> Result<Option<PathBuf>, Error> {
match &self.target {
Target::Path(path) => Ok(Some(self.file_parent.join(path))),
Target::Url(url) => self.resolve_url_target(url),
}
}
#[allow(clippy::unnecessary_wraps)] fn resolve_url_target(&self, url: &Url) -> Result<Option<PathBuf>, Error> {
if self.options.safe_mode > SafeMode::Server {
tracing::warn!(safe_mode=?self.options.safe_mode, "URL includes are disabled by default. If you want to enable them, must run in `SERVER` mode or less.");
return Ok(None);
}
if self
.options
.document_attributes
.get("allow-uri-read")
.is_none()
{
tracing::warn!(
"URL includes are disabled by default. If you want to enable them, set the 'allow-uri-read' attribute to 'true' in the document attributes or in the command line."
);
return Ok(None);
}
#[cfg(not(feature = "network"))]
{
tracing::warn!(url=?url, "network support is disabled, cannot fetch remote includes");
Ok(None)
}
#[cfg(feature = "network")]
{
let mut temp_path = std::env::temp_dir();
let Some(file_name) = url.path_segments().and_then(std::iter::Iterator::last) else {
tracing::error!(url=?url, "failed to extract file name from URL");
return Ok(None);
};
temp_path.push(file_name);
let mut response = ureq::get(url.as_str())
.call()
.map_err(|e| Error::HttpRequest(e.to_string()))?;
let mut file = File::create(&temp_path)?;
io::copy(&mut response.body_mut().as_reader(), &mut file)?;
tracing::debug!(?temp_path, url=?url, "downloaded file from URL");
Ok(Some(temp_path))
}
}
fn apply_content_filters(&self, content_lines: &[String]) -> Vec<String> {
let mut lines = Vec::new();
if !self.tags.is_empty() {
let filters: Vec<TagFilter> = self
.tags
.iter()
.map(|t| TagFilter::parse(t.as_str()))
.collect();
let selected_indices = apply_tag_filters(content_lines, &filters);
if self.line_range.is_empty() {
for idx in selected_indices {
if let Some(line) = content_lines.get(idx) {
lines.push(line.clone());
}
}
} else {
let line_range_indices = self.collect_line_range_indices(content_lines.len());
for idx in selected_indices {
if line_range_indices.contains(&idx)
&& let Some(line) = content_lines.get(idx)
{
lines.push(line.clone());
}
}
}
} else if self.line_range.is_empty() {
lines.extend(content_lines.iter().cloned());
} else {
self.extend_lines_with_ranges(content_lines, &mut lines);
}
lines
}
fn apply_indent(lines: &[String], indent: usize) -> Vec<String> {
let min_indent = lines
.iter()
.filter(|line| !line.trim().is_empty())
.map(|line| line.len() - line.trim_start().len())
.min()
.unwrap_or(0);
let prefix = " ".repeat(indent);
lines
.iter()
.map(|line| {
if line.trim().is_empty() {
String::new()
} else {
let stripped = if min_indent > 0 {
&line[min_indent..]
} else {
line.as_str()
};
format!("{prefix}{stripped}")
}
})
.collect()
}
pub(crate) fn read_content_from_file(
&self,
file_path: &Path,
) -> Result<(String, Vec<LeveloffsetRange>, Vec<SourceRange>), Error> {
let content =
crate::preprocessor::read_and_decode_file(file_path, self.encoding.as_deref())?;
if let Some(ext) = file_path.extension() &&
["adoc", "asciidoc", "ad", "asc", "txt"].contains(&ext.to_string_lossy().as_ref())
{
return super::Preprocessor
.process_inner(&content, Some(file_path), &self.options)
.map(|result| (result.text, result.leveloffset_ranges, result.source_ranges))
.map_err(|error| {
tracing::error!(path=?file_path, ?error, "failed to process file");
error
});
}
Ok((Preprocessor::normalize(&content), Vec::new(), Vec::new()))
}
pub(crate) fn lines(&self) -> Result<IncludeResult, Error> {
let Some(path) = self.resolve_target_path()? else {
return Ok(IncludeResult {
lines: Vec::new(),
effective_leveloffset: None,
nested_leveloffset_ranges: Vec::new(),
file: None,
nested_source_ranges: Vec::new(),
});
};
if !path.exists() {
if !self.opts.contains(&"optional".to_string()) {
tracing::warn!(path=?path, "file is missing - include directive won't be processed");
}
return Ok(IncludeResult {
lines: Vec::new(),
effective_leveloffset: None,
nested_leveloffset_ranges: Vec::new(),
file: None,
nested_source_ranges: Vec::new(),
});
}
let (content, nested_leveloffset_ranges, nested_source_ranges) =
self.read_content_from_file(&path)?;
let effective_leveloffset = self.calculate_effective_leveloffset();
let content_lines = content.lines().map(str::to_string).collect::<Vec<_>>();
let lines = self.apply_content_filters(&content_lines);
let lines = if let Some(indent) = self.indent {
Self::apply_indent(&lines, indent)
} else {
lines
};
Ok(IncludeResult {
lines,
effective_leveloffset,
nested_leveloffset_ranges,
file: Some(path),
nested_source_ranges,
})
}
fn calculate_effective_leveloffset(&self) -> Option<isize> {
self.level_offset.map(|level_offset| {
let current_offset = self
.options
.document_attributes
.get_string("leveloffset")
.and_then(|s| s.parse::<isize>().ok())
.unwrap_or(0);
current_offset + level_offset
})
}
fn validate_line_number(num: usize) -> Option<usize> {
if num < 1 {
tracing::warn!(?num, "invalid line number in include directive");
None
} else {
Some(num - 1)
}
}
fn resolve_end_line(end: isize, max_size: usize) -> Option<usize> {
match end {
-1 => Some(max_size),
n if n > 0 => match usize::try_from(n - 1) {
Ok(val) => Some(val),
Err(e) => {
tracing::error!(?end, ?e, "failed to cast end line number to usize");
None
}
},
_ => {
tracing::error!(?end, "invalid end line number in include directive");
None
}
}
}
fn collect_line_range_indices(
&self,
content_lines_count: usize,
) -> std::collections::HashSet<usize> {
let mut indices = std::collections::HashSet::new();
for line in &self.line_range {
match line {
LinesRange::Single(line_number) => {
if let Some(idx) = Self::validate_line_number(*line_number) {
if idx < content_lines_count {
indices.insert(idx);
}
}
}
LinesRange::Range(start, end) => {
let Some(start_idx) = Self::validate_line_number(*start) else {
continue;
};
let Some(end_idx) = Self::resolve_end_line(*end, content_lines_count) else {
continue;
};
if start_idx < content_lines_count
&& end_idx < content_lines_count
&& start_idx <= end_idx
{
for i in start_idx..=end_idx {
indices.insert(i);
}
}
}
}
}
indices
}
pub(crate) fn extend_lines_with_ranges(
&self,
content_lines: &[String],
lines: &mut Vec<String>,
) {
let content_lines_count = content_lines.len();
for line in &self.line_range {
match line {
LinesRange::Single(line_number) => {
if let Some(idx) = Self::validate_line_number(*line_number)
&& idx < content_lines_count
&& let Some(line) = content_lines.get(idx)
{
lines.push(line.clone());
}
}
LinesRange::Range(start, end) => {
let Some(start_idx) = Self::validate_line_number(*start) else {
continue;
};
let Some(end_idx) = Self::resolve_end_line(*end, content_lines_count) else {
continue;
};
if start_idx < content_lines_count
&& end_idx < content_lines_count
&& start_idx <= end_idx
&& let Some(new_lines) = content_lines.get(start_idx..=end_idx)
{
lines.extend_from_slice(new_lines);
}
}
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
#[test]
fn test_parse_simple_include() -> Result<(), Error> {
let path = PathBuf::from("/tmp");
let line = "include::target.adoc[]";
let options = Options::default();
let include = Include::parse(&path, line, 1, 0, None, &options)?;
assert!(matches!(
include.target,
Target::Path(ref path) if path.as_path() == Path::new("target.adoc")
));
Ok(())
}
#[test]
fn test_parse_include_with_attributes() -> Result<(), Error> {
let path = PathBuf::from("/tmp");
let line = "include::target.adoc[leveloffset=+1,lines=1..5,tag=example]";
let options = Options::default();
let include = Include::parse(&path, line, 1, 0, None, &options)?;
assert_eq!(include.level_offset, Some(1));
assert_eq!(include.tags, vec![TagName::from("example")]);
assert!(!include.line_range.is_empty());
Ok(())
}
#[test]
fn test_parse_include_with_url() -> Result<(), Error> {
let path = PathBuf::from("/tmp");
let line = "include::https://example.com/doc.adoc[]";
let options = Options::default();
let include = Include::parse(&path, line, 1, 0, None, &options)?;
assert!(matches!(
include.target,
Target::Url(url) if url.as_str() == "https://example.com/doc.adoc"
));
Ok(())
}
#[test]
fn test_parse_quoted_attributes() -> Result<(), Error> {
let path = PathBuf::from("/tmp");
let line = r#"include::target.adoc[tag="example code",encoding="utf-8"]"#;
let options = Options::default();
let include = Include::parse(&path, line, 1, 0, None, &options)?;
assert_eq!(include.tags, vec![TagName::from("example code")]);
assert_eq!(include.encoding, Some("utf-8".to_string()));
Ok(())
}
#[test]
fn test_parse_include_with_tags_attribute() -> Result<(), Error> {
let path = PathBuf::from("/tmp");
let line = "include::target.adoc[tags=intro;main;conclusion]";
let options = Options::default();
let include = Include::parse(&path, line, 1, 0, None, &options)?;
assert_eq!(
include.tags,
vec![
TagName::from("intro"),
TagName::from("main"),
TagName::from("conclusion")
]
);
Ok(())
}
#[test]
fn test_parse_include_with_negated_tag() -> Result<(), Error> {
let path = PathBuf::from("/tmp");
let line = "include::target.adoc[tags=*;!debug]";
let options = Options::default();
let include = Include::parse(&path, line, 1, 0, None, &options)?;
assert_eq!(
include.tags,
vec![TagName::from("*"), TagName::from("!debug")]
);
Ok(())
}
#[test]
fn test_parse_include_with_wildcard() -> Result<(), Error> {
let path = PathBuf::from("/tmp");
let line = "include::target.adoc[tags=**]";
let options = Options::default();
let include = Include::parse(&path, line, 1, 0, None, &options)?;
assert_eq!(include.tags, vec![TagName::from("**")]);
Ok(())
}
#[test]
fn test_parse_include_with_indent() -> Result<(), Error> {
let path = PathBuf::from("/tmp");
let line = "include::target.adoc[indent=4]";
let options = Options::default();
let include = Include::parse(&path, line, 1, 0, None, &options)?;
assert_eq!(include.indent, Some(4));
Ok(())
}
#[test]
fn test_apply_indent_basic() {
let lines = vec![
"def hello".to_string(),
" puts \"Hello\"".to_string(),
"end".to_string(),
];
let result = Include::apply_indent(&lines, 4);
assert_eq!(
result,
vec![" def hello", " puts \"Hello\"", " end",]
);
}
#[test]
fn test_apply_indent_zero() {
let lines = vec![
" def hello".to_string(),
" puts \"Hello\"".to_string(),
" end".to_string(),
];
let result = Include::apply_indent(&lines, 0);
assert_eq!(result, vec!["def hello", " puts \"Hello\"", "end",]);
}
#[test]
fn test_apply_indent_empty_lines() {
let lines = vec![
"def hello".to_string(),
String::new(),
" puts \"Hello\"".to_string(),
" ".to_string(),
"end".to_string(),
];
let result = Include::apply_indent(&lines, 2);
assert_eq!(
result,
vec![" def hello", "", " puts \"Hello\"", "", " end",]
);
}
#[test]
fn test_apply_indent_mixed_whitespace() {
let lines = vec![
"\tdef hello".to_string(),
"\t\tputs \"Hello\"".to_string(),
"\tend".to_string(),
];
let result = Include::apply_indent(&lines, 2);
assert_eq!(result, vec![" def hello", " \tputs \"Hello\"", " end",]);
}
}