1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
//! A `Parser` for parsing inputs that are chunks from streaming sources.

use std::{rc::Rc, hash::Hash};

use crate::{
    Parser as ParserStruct, ParseIterItem,
    parser::{BoxDatumAllocator, HashMapOperatorBindings},
    text::{TextVec, chunk::PosStrish},
    combiner::{OpFn, ApFn},
    source_stream::StrishIterSourceStream,
};

use super::helper::collect_up_to_first_err;


#[doc(no_inline)]
pub use crate::parser::DefaultCharClassifier as CharClassifier;


/// Chosen so that the application can choose whether or not to use all the
/// capacity of an input stream's `String` chunks, and so that referenced parts
/// of chunks are zero-copy, and so that `char` positions are tracked (relative
/// to the input stream), and so that, when chunks are broken around escape
/// characters, a simple `Vec` is used to logically concatenate them.
///
/// This `Text` type is a `TextVec` of `PosStrish<Rc<String>>` chunks.
pub type Text = TextVec<PosStrish<Rc<String>>>;

/// Chosen so that the [`Datum`]s in the AST values returned from parsing are
/// simply allocated in heap `Box`es without limit.
///
/// This `DatumAllocator` type is a `BoxDatumAllocator` of our `Text` type and
/// of the given `Extra` type.
///
/// The `Extra` type parameter determines the type used in the [`Datum::Extra`]
/// variant of our `Datum` type, and it defaults to `()`.
///
/// [`Datum`]: ../../../kul_core/enum.Datum.html
/// [`Datum::Extra`]: ../../../kul_core/enum.Datum.html#variant.Extra
pub type DatumAllocator<Extra = ()> = BoxDatumAllocator<Text, Extra>;

/// Chosen so that you may establish bindings simply using the `std` [`HashMap`]
/// and with flexible trait objects for the function types.
///
/// This `OperatorBindings` type is a `HashMapOperatorBindings` that binds
/// operator sub-forms, as our [`Datum`] type, to [`Combiner`] macro
/// functions/closures, as `Box`ed `dyn` trait objects.
///
/// The `Extra` type parameter determines the type used in the [`Datum::Extra`]
/// variant of our `Datum` type.  It defaults to `()`.
///
/// The `CombinerError` type parameter determines the type used in the
/// [`Error::FailedCombiner`] variant of the crate's error type which your
/// `Combiner` functions may return.  It defaults to `()`.
///
/// [`Datum`]: ../../../kul_core/enum.Datum.html
/// [`Combiner`]: ../../../kul_core/enum.Combiner.html
/// [`HashMap`]: http://doc.rust-lang.org/std/collections/struct.HashMap.html
/// [`Datum::Extra`]: ../../../kul_core/enum.Datum.html#variant.Extra
/// [`Error::FailedCombiner`]: ../../../kul_core/enum.Error.html#variant.FailedCombiner
pub type OperatorBindings<Extra = (), CombinerError = ()>
    = HashMapOperatorBindings<DatumAllocator<Extra>,
                              Box<OpFn<DatumAllocator<Extra>,
                                       CombinerError>>,
                              Box<ApFn<DatumAllocator<Extra>,
                                       CombinerError>>,
                              CombinerError>;

/// A [`Parser`] for parsing inputs that are chunks from a streaming source, as
/// a sequence of `String` chunks yielded by an `Iterator`, that uses: the
/// default characters as delimiters, `Box`es to allocate the [`Datum`]s in ASTs
/// returned from parsing, and [`HashMap`]s to bind operator sub-forms to
/// [`Combiner`] macro functions.
///
/// The AST values returned from parsing are zero-copy with regard to the input
/// parts they refer to, achieved by shared ownership, i.e. `Rc`-wrapping, of
/// the input `String` chunks.
///
/// You must decide which types you want to use in the [`Datum::Extra`] variant
/// and for any errors returned by your macro/"combiner" functions in the
/// [`Error::FailedCombiner`] variant.  If unsure, the `()` type is suitable for
/// either or both, and this is their default.
///
/// [`Parser`]: ../../../kul_core/struct.Parser.html
/// [`Datum`]: ../../../kul_core/enum.Datum.html
/// [`HashMap`]: http://doc.rust-lang.org/std/collections/struct.HashMap.html
/// [`Combiner`]: ../../../kul_core/enum.Combiner.html
/// [`Datum::Extra`]: ../../../kul_core/enum.Datum.html#variant.Extra
/// [`Error::FailedCombiner`]: ../../../kul_core/enum.Error.html#variant.FailedCombiner
pub type Parser<Extra = (), CombinerError = ()>
    = ParserStruct<CharClassifier,
                   DatumAllocator<Extra>,
                   OperatorBindings<Extra, CombinerError>>;

/// The `Result` of parsing a top-level form, and the type of elements returned
/// by [`parse_stream`] and [`parse_stream_with`].
///
/// An `Ok` value contains a [`Datum`] value that is the AST (abstract syntax
/// tree) of the parsed top-level form.
///
/// An `Err` value contains an [`Error`] value variant that describes the error
/// caused by either a syntax error in the input or by one of your [`Combiner`]
/// functions returning a value of your chosen `CombinerError` type parameter.
///
/// [`parse_stream`]: fn.parse_stream.html
/// [`parse_stream_with`]: fn.parse_stream_with.html
/// [`Datum`]: ../../../kul_core/enum.Datum.html
/// [`Error`]: ../../../kul_core/enum.Error.html
/// [`Combiner`]: ../../../kul_core/enum.Combiner.html
pub type TopFormResult<Extra = (), CombinerError = ()>
    = ParseIterItem<DatumAllocator<Extra>,
                    OperatorBindings<Extra, CombinerError>>;


/// Make a new `Parser` that uses the given `OperatorBindings` value and that
/// uses the types chosen by this module.
///
/// You may call the returned `Parser`'s [`parse`] method and use the
/// [`ParseIter`] values however you can.
///
/// [`parse`]: ../../../kul_core/struct.Parser.html#method.parse
/// [`ParseIter`]: ../../../kul_core/struct.ParseIter.html
#[inline]
pub fn parser<Extra, CombinerError>(
    bindings: OperatorBindings<Extra, CombinerError>
) -> Parser<Extra, CombinerError>
    where Extra: Hash + Eq,
{
    ParserStruct {
        classifier: CharClassifier,
        allocator: DatumAllocator::default(),
        bindings,
    }
}

/// Parse the given stream, which is an `Iterator` of `String` chunks, using a
/// [`Parser`] that uses the given `OperatorBindings` value, and return a vector
/// of the results as `Datum` ASTs for each successfully-parsed top-level form
/// and/or an `Error`.
///
/// This enables you to give bindings for parsing certain nest forms in your
/// custom ways and enables you to [choose](type.OperatorBindings.html) what to
/// use for the `Extra` and `CombinerError` type parameters.
///
/// If an error is returned by the parser, parsing will be immediately aborted
/// and the last element of the returned vector will be that error.  If instead
/// you want to continue trying to parse after an error occurred, use the
/// [`parser` function] to directly work with a `Parser`.
///
/// [`Parser`]: type.Parser.html
/// [`parser` function]: fn.parser.html
pub fn parse_stream_with<I, Extra, CombinerError>(
    input: I,
    bindings: OperatorBindings<Extra, CombinerError>
)
    -> Vec<TopFormResult<Extra, CombinerError>>
    where I: Iterator<Item = String>,
          Extra: Hash + Eq,
{
    let input_source_stream = StrishIterSourceStream::new(input.map(Rc::new));
    collect_up_to_first_err(parser(bindings).parse(input_source_stream))
}

/// Parse the given stream, which is an `Iterator` of `String` chunks, using a
/// [`Parser`] with no bindings of operators, and return a vector of the results
/// as `Datum` ASTs for each successfully-parsed top-level form and/or an
/// `Error`.
///
/// This will parse all nest forms recursively in the same way and produce the
/// [`Datum::Combination`] variant to represent each in the returned ASTs.  This
/// produces a simplistic kind of S-expression structure that is useful when you
/// don't want any special parsing of nested text, either because you want to
/// prevent that but still be able to analyze the basic structure, or because
/// your application is so simple that it only uses the format/language/syntax
/// to this extent.
///
/// This uses `()` for the types in the `Datum::Extra` and
/// `Error::FailedCombiner` variants, because these variants cannot be produced
/// when there are no bindings.
///
/// See [`parse_stream_with`], which this uses, for the description of how
/// errors are returned.
///
/// [`Parser`]: type.Parser.html
/// [`Datum::Combination`]: ../../../kul_core/enum.Datum.html#variant.Combination
/// [`parse_stream_with`]: fn.parse_stream_with.html
#[inline]
#[allow(clippy::module_name_repetitions)]
pub fn parse_stream<I>(input: I) -> Vec<TopFormResult>
    where I: Iterator<Item = String>,
{
    let empty_bindings = OperatorBindings::default();
    parse_stream_with(input, empty_bindings)
}


#[cfg(test)]
mod tests {
    use super::*;
    use crate::{
        Datum, Combiner, Text as _, Error,
        datum::DatumBox,
        text::chunk::CharPos,
    };
    use std::{collections::HashMap, iter::FromIterator};

    fn read_into(dest: &mut [u8], from: &[u8]) {
        dest.copy_from_slice(from);
    }

    fn buffer_stream_chunk(from: &str) -> String {
        let buf_size = from.len();
        let mut b = vec![0; buf_size];
        read_into(&mut b, from.as_bytes());
        String::from_utf8(b).unwrap()
    }

    /// Mock stream source
    fn stream<'a>(from: &'a [&'static str]) -> impl Iterator<Item = String> + 'a {
        from.iter().map(|&s| buffer_stream_chunk(s))
    }

    #[test]
    fn parse_stream() {
        assert_eq!(super::parse_stream(stream(&[])), []);
        assert_eq!(super::parse_stream(stream(&[""])), []);
        assert_eq!(super::parse_stream(stream(&["a"])),
                   [Ok(Datum::Text(TextVec::from_str("a")))]);
        assert_eq!(super::parse_stream(stream(&["{", "}"])),
                   [Ok(Datum::EmptyNest)]);
        assert_eq!(super::parse_stream(stream(&["{b", "", "oo}"])),
                   [Ok(Datum::Combination {
                       operator: DatumBox::new(Datum::Text(TextVec::from_str("boo"))),
                       operands: DatumBox::new(Datum::EmptyList),
                   })]);
        assert_eq!(super::parse_stream(stream(&["c{} d", ""])),
                   [Ok(Datum::Text(TextVec::from_str("c"))),
                    Ok(Datum::EmptyNest),
                    Ok(Datum::Text(TextVec::from_str(" d")))]);
        assert_eq!(super::parse_stream(stream(&["e ", "{", "f {", "}"])),
                   [Ok(Datum::Text(TextVec::from_str("e "))),
                    Err(Error::MissingEndChar)]);
        assert_eq!(super::parse_stream(stream(&["", "λ} h"])),
                   [Err(Error::UnbalancedEndChar(CharPos(1)))]);
    }

    #[test]
    fn parse_stream_with() {
        fn bindings() -> OperatorBindings<char, f32> {
            let pairs: Vec<(_, Combiner<Box<OpFn<_, _>>, Box<ApFn<_, _>>>)> = vec![
                (Datum::Text(TextVec::from_str("op")),
                 Combiner::Operative(Box::new(|_, _, _| Ok(Some(Datum::Extra('λ')))))),
                (Datum::Text(TextVec::from_str("e")),
                 Combiner::Applicative(Box::new(|_, _, _|
                                                Err(Error::FailedCombiner(1.5))))),
            ];
            OperatorBindings::new(HashMap::from_iter(pairs.into_iter()))
        }
        assert_eq!(super::parse_stream_with(stream(&[""]), bindings()), []);
        assert_eq!(super::parse_stream_with(stream(&["a"]), bindings()),
                   [Ok(Datum::Text(TextVec::from_str("a")))]);
        assert_eq!(super::parse_stream_with(stream(&["{}"]), bindings()),
                   [Ok(Datum::EmptyNest)]);
        assert_eq!(super::parse_stream_with(stream(&["{o", "p ignored}"]), bindings()),
                   [Ok(Datum::Extra('λ'))]);
        assert_eq!(super::parse_stream_with(stream(&["{z", "z ", "", "}"]), bindings()),
                   [Ok(Datum::Combination {
                           operator: DatumBox::new(Datum::Text(TextVec::from_str("zz"))),
                           operands: DatumBox::new(Datum::EmptyList),
                       })]);
        assert_eq!(super::parse_stream_with(stream(&["", "{", "e}aborted"]), bindings()),
                   [Err(Error::FailedCombiner(1.5))]);
    }
}


// Note: Tests of the `parser` function are in the integration test, because
// that is required for the impl of TestOperatorBindings for
// HashMapOperatorBindings to work, for using `test_suite0`.  (That impl doesn't
// work with unit tests because the test harness compilation actually doesn't
// use the same type that it's impl'ed for.)