html_streaming_editor/
lib.rs

1use log::debug;
2use peg::str::LineCol;
3use snafu::{Backtrace, ResultExt, Snafu};
4use std::fs::File;
5use std::io::{BufRead, BufReader, Read};
6
7pub(crate) use crate::css::{
8    CssAttributeComparison, CssAttributeSelector, CssPseudoClass, CssSelector, CssSelectorList,
9    CssSelectorPath, CssSelectorStep,
10};
11use crate::html::HtmlContent;
12use crate::string_creating::StringValueCreatingPipeline;
13
14pub use crate::html::HtmlRenderable;
15
16mod css;
17mod element_creating;
18mod element_processing;
19mod html;
20mod parsing;
21mod string_creating;
22
23#[derive(Debug, Snafu)]
24pub enum StreamingEditorError {
25    #[snafu(display("Failed to read input from"))]
26    ReadingInputFailed {
27        source: std::io::Error,
28        backtrace: Backtrace,
29    },
30    #[snafu(display("Failed to write output into"))]
31    WritingOutputFailed { source: std::io::Error },
32    #[snafu(display("Failed to parse input HTML"))]
33    ParsingInputFailed {
34        source: tl::ParseError,
35        backtrace: Backtrace,
36    },
37    #[snafu(display("Failed to convert parsed HTML into memory model"))]
38    LoadingParsedHtmlFailed {
39        #[snafu(backtrace)]
40        source: html::HtmlDomError,
41    },
42    #[snafu(display("Failed to parse pipeline"))]
43    ParsingPipelineFailed {
44        source: peg::error::ParseError<LineCol>,
45        backtrace: Backtrace,
46    },
47    #[snafu(display("Failed to run pipeline"))]
48    RunningPipelineFailed {
49        #[snafu(backtrace)]
50        source: PipelineError,
51    },
52}
53
54#[derive(Debug, Snafu)]
55#[snafu(visibility(pub(crate)))]
56pub enum PipelineError {
57    #[snafu(display("Command at index {index} failed"))]
58    CommandFailed {
59        index: usize,
60        #[snafu(backtrace)]
61        source: CommandError,
62    },
63}
64
65#[derive(Debug, Snafu)]
66#[snafu(visibility(pub(crate)))]
67pub enum CommandError {
68    #[snafu(display("Sub-Pipeline failed"))]
69    SubpipelineFailed {
70        #[snafu(backtrace)]
71        #[snafu(source(from(PipelineError, Box::new)))]
72        source: Box<PipelineError>,
73    },
74    #[snafu(display("Failed to read input from"))]
75    ReadingCommandInputFailed {
76        source: std::io::Error,
77        backtrace: Backtrace,
78    },
79    #[snafu(display("Failed to parse input HTML"))]
80    ParsingCommandInputFailed {
81        source: tl::ParseError,
82        backtrace: Backtrace,
83    },
84    #[snafu(display("Failed to convert parsed HTML into memory model"))]
85    LoadingParsedCommandHtmlFailed {
86        #[snafu(backtrace)]
87        source: crate::html::HtmlDomError,
88    },
89    #[snafu(display("Failed to parse regular expression"))]
90    ParsingRegexFailed {
91        source: regex::Error,
92        backtrace: Backtrace,
93    },
94}
95
96pub struct HtmlStreamingEditor<'a> {
97    input: &'a mut dyn BufRead,
98}
99
100impl<'a> HtmlStreamingEditor<'a> {
101    pub fn new(input: &'a mut dyn BufRead) -> Self {
102        HtmlStreamingEditor { input }
103    }
104
105    pub fn run(
106        self,
107        pipeline_definition: &str,
108    ) -> Result<Vec<Box<dyn HtmlRenderable>>, StreamingEditorError> {
109        let pipeline =
110            parsing::grammar::pipeline(pipeline_definition).context(ParsingPipelineFailedSnafu)?;
111        debug!("Parsed Pipeline: {:#?}", &pipeline);
112
113        let mut string_content = String::new();
114        self.input
115            .read_to_string(&mut string_content)
116            .context(ReadingInputFailedSnafu)?;
117
118        let dom = tl::parse(&string_content, tl::ParserOptions::default())
119            .context(ParsingInputFailedSnafu)?;
120        let root_element = HtmlContent::import(dom).context(LoadingParsedHtmlFailedSnafu)?;
121        let result = pipeline
122            .run_on(vec![root_element])
123            .context(RunningPipelineFailedSnafu)?;
124
125        debug!("Final Result: {:#?}", &result);
126        Ok(result
127            .iter()
128            .map(|n| Box::new(n.clone()) as Box<dyn HtmlRenderable>)
129            .collect::<Vec<_>>())
130    }
131}
132
133pub fn report<E: 'static>(err: &E)
134where
135    E: std::error::Error,
136    E: snafu::ErrorCompat,
137    E: Send + Sync,
138{
139    eprintln!("[ERROR] {}", err);
140    if let Some(source) = err.source() {
141        eprintln!();
142        eprintln!("Caused by:");
143        for (i, e) in std::iter::successors(Some(source), |e| e.source()).enumerate() {
144            eprintln!("   {}: {}", i, e);
145        }
146    }
147
148    if let Some(backtrace) = snafu::ErrorCompat::backtrace(err) {
149        eprintln!("Backtrace:");
150        eprintln!("{}", backtrace);
151    }
152}
153
154/// Is the value directly defined or is it a sub-pipeline?
155#[derive(Debug, PartialEq, Clone)]
156pub(crate) enum ValueSource<'a> {
157    StringValue(&'a str),
158    SubPipeline(StringValueCreatingPipeline<'a>),
159}
160
161impl<'a> ValueSource<'a> {
162    pub fn render(
163        &self,
164        element: &rctree::Node<HtmlContent>,
165    ) -> Result<Vec<String>, PipelineError> {
166        match self {
167            ValueSource::StringValue(value) => Ok(vec![String::from(*value)]),
168            ValueSource::SubPipeline(pipeline) => pipeline.run_on(element),
169        }
170    }
171}
172
173pub(crate) fn load_html_file(file_path: &str) -> Result<rctree::Node<HtmlContent>, CommandError> {
174    let file = File::open(file_path).context(ReadingCommandInputFailedSnafu)?;
175    let mut buffered_reader = BufReader::new(file);
176
177    let mut string_content = String::new();
178    buffered_reader
179        .read_to_string(&mut string_content)
180        .context(ReadingCommandInputFailedSnafu)?;
181
182    let dom = tl::parse(&string_content, tl::ParserOptions::default())
183        .context(ParsingCommandInputFailedSnafu)?;
184
185    HtmlContent::import(dom).context(LoadingParsedCommandHtmlFailedSnafu)
186}
187
188#[cfg(test)]
189pub(crate) fn load_inline_html(html: &str) -> rctree::Node<HtmlContent> {
190    let dom = tl::parse(html, tl::ParserOptions::default()).unwrap();
191
192    HtmlContent::import(dom).unwrap()
193}