ptero/method/
line_extend.rs

1//! # Description
2//!
3//! This encoder extends line with extra word (to be greater than pivot) to encode bit.
4//! If the line length is greater than the pivot the bit 1 is encoded, otherwise 0.
5//!  
6//! For more info about pivot see [LineByPivotIterator](../../text/struct.LineByPivotIterator.html).
7//!
8//! # Behavior
9//!
10//! This encoder can return [EncodingError](../struct.EncodingError.html) when no extra words are found
11//! and the bit 1 occurs.
12use std::error::Error;
13
14use log::trace;
15use regex::Regex;
16
17use crate::{
18    binary::Bit,
19    context::{Context, ContextError, PivotByLineContext, PivotByRawLineContext},
20    decoder::Decoder,
21    encoder::{Capacity, Encoder, EncoderResult, EncodingError},
22};
23
24use super::Method;
25
26/// Character used as the word delimiter.
27pub const ASCII_DELIMITER: char = ' ';
28
29/// Set of possible line endings, the set is different from one used [crate::method::trailing_unicode] as it
30/// includes all possible characters, not the curated set used in encoding.
31pub const POSSIBLE_LINE_ENDINGS_SET: [char; 32] = [
32    '\u{0020}', '\u{2000}', '\u{2001}', '\u{2002}', '\u{2003}', '\u{2004}', '\u{2005}', '\u{2006}',
33    '\u{2007}', '\u{2009}', '\u{200A}', '\u{200B}', '\u{200C}', '\u{200D}', '\u{200E}', '\u{2028}',
34    '\u{202A}', '\u{202C}', '\u{202D}', '\u{202F}', '\u{205F}', '\u{2060}', '\u{2061}', '\u{2062}',
35    '\u{2063}', '\u{2064}', '\u{2066}', '\u{2068}', '\u{2069}', '\u{3000}', '\u{FEFF}', '\u{00A0}',
36];
37
38/// Unit structure representing the line extension method.
39///
40/// Accepts only following contexts: [PivotByLineContext](crate::context::PivotByLineContext) for [Encoder](crate::encoder::Encoder) trait and
41// [PivotByRawLineContext](crate::context::PivotByRawLineContext) for [Decoder](crate::decoder::Decoder) trait.
42// *Decoder needs to consume raw lines to be able to decode information using pivot.*
43pub struct LineExtendMethod;
44
45impl LineExtendMethod {
46    pub fn new() -> Self {
47        LineExtendMethod {}
48    }
49}
50
51impl Default for LineExtendMethod {
52    fn default() -> Self {
53        Self::new()
54    }
55}
56
57impl Capacity for LineExtendMethod {
58    fn bitrate(&self) -> usize {
59        1
60    }
61}
62
63impl Encoder<PivotByLineContext> for LineExtendMethod {
64    fn partial_encode(
65        &self,
66        context: &mut PivotByLineContext,
67        data: &mut dyn Iterator<Item = Bit>,
68    ) -> Result<EncoderResult, Box<dyn Error>> {
69        Ok(match data.next() {
70            Some(Bit(1)) => {
71                // TODO: Provide mapping for ContextError -> EncodingError
72                let word = context
73                    .next_word()
74                    .ok_or_else(EncodingError::no_words_error)?;
75                trace!("Extending line with '{}'", &word);
76                let text = context.get_current_text_mut()?;
77                text.push(ASCII_DELIMITER);
78                text.push_str(word.as_str());
79                EncoderResult::Success
80            }
81            None => EncoderResult::NoDataLeft,
82            _ => {
83                trace!("Leaving line as-is");
84                EncoderResult::Success
85            }
86        })
87    }
88}
89
90impl Decoder<PivotByRawLineContext> for LineExtendMethod {
91    fn partial_decode(&self, context: &PivotByRawLineContext) -> Result<Vec<Bit>, ContextError> {
92        let repeated_whitespace_pattern = Regex::new(r"\s+").unwrap();
93        let cleaned_line = repeated_whitespace_pattern
94            .replace_all(context.get_current_text()?, " ");
95        let bit = if cleaned_line.trim_end_matches(&POSSIBLE_LINE_ENDINGS_SET[..]).len() > context.get_pivot() {
96            trace!("Line is extended over the {} length", context.get_pivot());
97            Bit(1)
98        } else {
99            trace!("Line not extended");
100            Bit(0)
101        };
102        Ok(vec![bit])
103    }
104}
105
106impl Method<PivotByLineContext, PivotByRawLineContext> for LineExtendMethod {
107    fn method_name(&self) -> String {
108        "LineExtendMethod".to_string()
109    }
110}