1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
use std::error::Error;

#[cfg(feature = "compile-with-external-structures")]
use crate::containers::ExternalList;
#[cfg(feature = "compile-with-external-structures")]
type List<T> = ExternalList<T>;
#[cfg(not(feature = "compile-with-external-structures"))]
type List<T> = Vec<T>;

#[cfg(feature = "compile-with-external-structures")]
use crate::containers::ExternalStringPtr;
#[cfg(feature = "compile-with-external-structures")]
type StringPtr = ExternalStringPtr;
#[cfg(not(feature = "compile-with-external-structures"))]
type StringPtr = String;

/// Decoder is what is used if input source has encoding
/// that is not supported out of the box.
///
/// Supported encoding are:
/// 1. UTF-8
/// 2. ASCII-8BIT (or BINARY, it's an alias)
///
/// So if your source looks like this:
///
/// ```text
/// # encoding: koi8-r
/// \xFF = 42
/// ```
///
/// you need to provide a decoder that converts this byte sequence
/// into UTF-8 bytes.
///
/// Decoding function
///
/// Takes encoding name and initial input as arguments
/// and returns `Ok(decoded)` vector of bytes or `Err(error)` that will be returned
/// in the `ParserResult::diagnostics` vector.
pub type CustomDecoderFn = dyn Fn(StringPtr, List<u8>) -> CustomDecoderResult;

/// Custom decoder, a wrapper around a function
pub struct CustomDecoder {
    f: Option<Box<CustomDecoderFn>>,
}

impl CustomDecoder {
    /// Constructs a rewriter based on a given function
    pub fn new(f: Box<CustomDecoderFn>) -> Self {
        Self { f: Some(f) }
    }

    /// Constructs a no-op token rewriter that has no side effect. Default value.
    pub fn none() -> Self {
        Self { f: None }
    }

    /// Returns an optional reference to a function that rewrite tokens
    pub fn as_option(&self) -> Option<&CustomDecoderFn> {
        if let Some(f) = &self.f {
            let f = &**f;
            Some(f)
        } else {
            None
        }
    }

    pub(crate) fn take(&mut self) -> Self {
        Self { f: self.f.take() }
    }
}

impl std::fmt::Debug for CustomDecoder {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("CustomDecoder")
            .field("f", &self.as_option().map(|_| "function"))
            .finish()
    }
}

impl Default for CustomDecoder {
    fn default() -> Self {
        Self::none()
    }
}

/// Result that is returned from decoding function
#[repr(C)]
#[derive(Debug)]
pub enum CustomDecoderResult {
    /// Ok + decoded bytes
    Ok(List<u8>),

    /// Err + reason
    Err(InputError),
}

impl CustomDecoderResult {
    pub(crate) fn to_result(self) -> Result<List<u8>, InputError> {
        match self {
            Self::Ok(value) => Ok(value),
            Self::Err(err) => Err(err),
        }
    }
}

/// An enum with all possible kinds of errors that can be returned
/// from a decoder
#[derive(Debug)]
#[repr(C)]
pub enum InputError {
    /// Emitted when no custom decoder provided but input has custom encoding.
    ///
    /// You can return this error from your custom decoder if you don't support given encoding.
    UnsupportedEncoding(StringPtr),

    /// Generic error that can be emitted from a custom decoder
    DecodingError(StringPtr),
}

impl std::fmt::Display for InputError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{:?}", self)
    }
}

impl Error for InputError {}

pub fn decode_input(
    input: List<u8>,
    enc: StringPtr,
    decoder: CustomDecoder,
) -> CustomDecoderResult {
    match enc.to_uppercase().as_str() {
        "UTF-8" | "ASCII-8BIT" | "BINARY" => {
            return CustomDecoderResult::Ok(input.into());
        }
        _ => {
            if let Some(f) = decoder.as_option() {
                f(enc, input)
            } else {
                CustomDecoderResult::Err(InputError::UnsupportedEncoding(enc))
            }
        }
    }
}