serde_jsonlines/lib.rs
1#![cfg_attr(docsrs, feature(doc_cfg))]
2//! [JSON Lines](https://jsonlines.org) (a.k.a. newline-delimited JSON) is a
3//! simple format for storing sequences of JSON values in which each value is
4//! serialized on a single line and terminated by a newline sequence. The
5//! `serde-jsonlines` crate provides functionality for reading & writing these
6//! documents (whether all at once or line by line) using [`serde`]'s
7//! serialization & deserialization features.
8//!
9//! Basic usage involves simply importing the [`BufReadExt`] or [`WriteExt`]
10//! extension trait and then using the [`json_lines()`][BufReadExt::json_lines]
11//! or [`write_json_lines()`][WriteExt::write_json_lines] method on a `BufRead`
12//! or `Write` value to read or write a sequence of JSON Lines values.
13//! Convenience functions are also provided for the common case of reading or
14//! writing a JSON Lines file given as a filepath.
15//!
16//! At a lower level, values can be read or written one at a time (which is
17//! useful if, say, different lines are different types) by wrapping a
18//! `BufRead` or `Write` value in a [`JsonLinesReader`] or [`JsonLinesWriter`]
19//! and then calling the wrapped structure's [`read()`][JsonLinesReader::read]
20//! or [`write()`][JsonLinesWriter::write] method, respectively.
21//!
22//! When the `async` feature is enabled, analogous types for working with JSON
23//! Lines asynchronously under [`tokio`] become available.
24//!
25//! Example
26//! =======
27//!
28//! ```no_run
29//! use serde::{Deserialize, Serialize};
30//! use serde_jsonlines::{json_lines, write_json_lines};
31//! use std::io::Result;
32//!
33//! #[derive(Debug, Deserialize, Eq, PartialEq, Serialize)]
34//! pub struct Structure {
35//! pub name: String,
36//! pub size: i32,
37//! pub on: bool,
38//! }
39//!
40//! fn main() -> Result<()> {
41//! let values = vec![
42//! Structure {
43//! name: "Foo Bar".into(),
44//! size: 42,
45//! on: true,
46//! },
47//! Structure {
48//! name: "Quux".into(),
49//! size: 23,
50//! on: false,
51//! },
52//! Structure {
53//! name: "Gnusto Cleesh".into(),
54//! size: 17,
55//! on: true,
56//! },
57//! ];
58//! write_json_lines("example.jsonl", &values)?;
59//! let values2 = json_lines("example.jsonl")?.collect::<Result<Vec<Structure>>>()?;
60//! assert_eq!(values, values2);
61//! Ok(())
62//! }
63//! ```
64
65use serde::{de::DeserializeOwned, Serialize};
66use std::fs::{File, OpenOptions};
67use std::io::{BufRead, BufReader, BufWriter, Result, Write};
68use std::marker::PhantomData;
69use std::path::Path;
70
71#[cfg(feature = "async")]
72mod asynclib;
73#[cfg(feature = "async")]
74pub use asynclib::*;
75
76/// A type alias for a [`JsonLinesIter`] on a buffered file object.
77///
78/// This is the return type of [`json_lines()`].
79pub type JsonLinesFileIter<T> = JsonLinesIter<BufReader<File>, T>;
80
81/// A structure for writing JSON values as JSON Lines.
82///
83/// A `JsonLinesWriter` wraps a [`std::io::Write`] instance and writes
84/// [`serde::Serialize`] values to it by serializing each one as a single line
85/// of JSON and appending a newline.
86///
87/// # Example
88///
89/// ```no_run
90/// use serde::Serialize;
91/// use serde_jsonlines::JsonLinesWriter;
92/// use std::fs::{read_to_string, File};
93///
94/// #[derive(Serialize)]
95/// pub struct Structure {
96/// pub name: String,
97/// pub size: i32,
98/// pub on: bool,
99/// }
100///
101/// fn main() -> std::io::Result<()> {
102/// {
103/// let fp = File::create("example.jsonl")?;
104/// let mut writer = JsonLinesWriter::new(fp);
105/// writer.write_all([
106/// Structure {
107/// name: "Foo Bar".into(),
108/// size: 42,
109/// on: true,
110/// },
111/// Structure {
112/// name: "Quux".into(),
113/// size: 23,
114/// on: false,
115/// },
116/// Structure {
117/// name: "Gnusto Cleesh".into(),
118/// size: 17,
119/// on: true,
120/// },
121/// ])?;
122/// writer.flush()?;
123/// }
124/// // End the block to close the writer
125/// assert_eq!(
126/// read_to_string("example.jsonl")?,
127/// concat!(
128/// "{\"name\":\"Foo Bar\",\"size\":42,\"on\":true}\n",
129/// "{\"name\":\"Quux\",\"size\":23,\"on\":false}\n",
130/// "{\"name\":\"Gnusto Cleesh\",\"size\":17,\"on\":true}\n",
131/// )
132/// );
133/// Ok(())
134/// }
135/// ```
136#[derive(Clone, Debug, Eq, PartialEq)]
137pub struct JsonLinesWriter<W> {
138 inner: W,
139}
140
141impl<W> JsonLinesWriter<W> {
142 /// Construct a new `JsonLinesWriter` from a [`std::io::Write`] instance
143 pub fn new(writer: W) -> Self {
144 JsonLinesWriter { inner: writer }
145 }
146
147 /// Consume the `JsonLinesWriter` and return the underlying writer
148 pub fn into_inner(self) -> W {
149 self.inner
150 }
151
152 /// Get a reference to the underlying writer
153 pub fn get_ref(&self) -> &W {
154 &self.inner
155 }
156
157 /// Get a mutable reference to the underlying writer
158 pub fn get_mut(&mut self) -> &mut W {
159 &mut self.inner
160 }
161}
162
163impl<W: Write> JsonLinesWriter<W> {
164 /// Serialize a value as a line of JSON and write it to the underlying
165 /// writer, followed by a newline.
166 ///
167 /// Note that separate calls to this method may write different types of
168 /// values.
169 ///
170 /// # Errors
171 ///
172 /// Has the same error conditions as [`serde_json::to_writer()`] and
173 /// [`std::io::Write::write_all()`].
174 pub fn write<T>(&mut self, value: &T) -> Result<()>
175 where
176 T: ?Sized + Serialize,
177 {
178 serde_json::to_writer(&mut self.inner, value)?;
179 self.inner.write_all(b"\n")?;
180 Ok(())
181 }
182
183 /// Serialize each item in an iterator as a line of JSON, and write out
184 /// each one followed by a newline to the underlying writer.
185 ///
186 /// All values in a single call to `write_all()` must be the same type, but
187 /// separate calls may write different types.
188 ///
189 /// # Errors
190 ///
191 /// Has the same error conditions as [`write()`][JsonLinesWriter::write].
192 pub fn write_all<T, I>(&mut self, items: I) -> Result<()>
193 where
194 I: IntoIterator<Item = T>,
195 T: Serialize,
196 {
197 for value in items {
198 self.write(&value)?;
199 }
200 Ok(())
201 }
202
203 /// Flush the underlying writer.
204 ///
205 /// Neither [`write()`][JsonLinesWriter::write] nor
206 /// [`write_all()`][JsonLinesWriter::write_all] flush the writer, so you
207 /// must explicitly call this method if you need output flushed.
208 ///
209 /// # Errors
210 ///
211 /// Has the same error conditions as [`std::io::Write::flush()`].
212 pub fn flush(&mut self) -> Result<()> {
213 self.inner.flush()
214 }
215}
216
217/// A structure for reading JSON values from JSON Lines input.
218///
219/// A `JsonLinesReader` wraps a [`std::io::BufRead`] instance and parses each
220/// line as a [`serde::de::DeserializeOwned`] value in JSON.
221///
222/// # Example
223///
224/// ```no_run
225/// use serde::Deserialize;
226/// use serde_jsonlines::JsonLinesReader;
227/// use std::fs::{write, File};
228/// use std::io::BufReader;
229///
230/// #[derive(Debug, Deserialize, PartialEq)]
231/// pub struct Structure {
232/// pub name: String,
233/// pub size: i32,
234/// pub on: bool,
235/// }
236///
237/// fn main() -> std::io::Result<()> {
238/// write(
239/// "example.jsonl",
240/// concat!(
241/// "{\"name\": \"Foo Bar\", \"on\":true,\"size\": 42 }\n",
242/// "{ \"name\":\"Quux\", \"on\" : false ,\"size\": 23}\n",
243/// " {\"name\": \"Gnusto Cleesh\" , \"on\": true, \"size\": 17}\n",
244/// ),
245/// )?;
246/// let fp = BufReader::new(File::open("example.jsonl")?);
247/// let reader = JsonLinesReader::new(fp);
248/// let items = reader
249/// .read_all::<Structure>()
250/// .collect::<std::io::Result<Vec<_>>>()?;
251/// assert_eq!(
252/// items,
253/// [
254/// Structure {
255/// name: "Foo Bar".into(),
256/// size: 42,
257/// on: true,
258/// },
259/// Structure {
260/// name: "Quux".into(),
261/// size: 23,
262/// on: false,
263/// },
264/// Structure {
265/// name: "Gnusto Cleesh".into(),
266/// size: 17,
267/// on: true,
268/// },
269/// ]
270/// );
271/// Ok(())
272/// }
273/// ```
274#[derive(Clone, Debug, Eq, PartialEq)]
275pub struct JsonLinesReader<R> {
276 inner: R,
277}
278
279impl<R> JsonLinesReader<R> {
280 /// Construct a new `JsonLinesReader` from a [`std::io::BufRead`] instance
281 pub fn new(reader: R) -> Self {
282 JsonLinesReader { inner: reader }
283 }
284
285 /// Consume the `JsonLinesReader` and return the underlying reader
286 pub fn into_inner(self) -> R {
287 self.inner
288 }
289
290 /// Get a reference to the underlying reader
291 pub fn get_ref(&self) -> &R {
292 &self.inner
293 }
294
295 /// Get a mutable reference to the underlying reader
296 pub fn get_mut(&mut self) -> &mut R {
297 &mut self.inner
298 }
299
300 /// Consume the `JsonLinesReader` and return an iterator over the
301 /// deserialized JSON values from each line.
302 ///
303 /// The returned iterator has an `Item` type of `std::io::Result<T>`. Each
304 /// call to `next()` has the same error conditions as
305 /// [`read()`][JsonLinesReader::read].
306 ///
307 /// Note that all deserialized values will be of the same type. If you
308 /// wish to read lines of varying types, use the
309 /// [`read()`][JsonLinesReader::read] method instead.
310 pub fn read_all<T>(self) -> JsonLinesIter<R, T> {
311 JsonLinesIter {
312 reader: self,
313 _output: PhantomData,
314 }
315 }
316}
317
318impl<R: BufRead> JsonLinesReader<R> {
319 /// Read & deserialize a line of JSON from the underlying reader.
320 ///
321 /// If end-of-file is reached, this method returns `Ok(None)`.
322 ///
323 /// Note that separate calls to this method may read different types of
324 /// values.
325 ///
326 /// # Errors
327 ///
328 /// Has the same error conditions as [`std::io::BufRead::read_line()`] and
329 /// [`serde_json::from_str()`]. Note that, in the latter case (which can
330 /// be identified by the [`std::io::Error`] having a [`serde_json::Error`]
331 /// value as its payload), continuing to read from the `JsonLinesReader`
332 /// afterwards will pick up on the next line as though the error never
333 /// happened, so invalid JSON can be easily ignored if you so wish.
334 pub fn read<T>(&mut self) -> Result<Option<T>>
335 where
336 T: DeserializeOwned,
337 {
338 let mut s = String::new();
339 let r = self.inner.read_line(&mut s)?;
340 if r == 0 {
341 Ok(None)
342 } else {
343 Ok(Some(serde_json::from_str::<T>(&s)?))
344 }
345 }
346}
347
348/// An iterator over the lines of a [`BufRead`] value `R` that decodes each
349/// line as JSON of type `T`.
350///
351/// This iterator yields items of type `Result<T, std::io::Error>`. Errors
352/// occurr under the same conditions as for [`JsonLinesReader::read()`].
353///
354/// Iterators of this type are returned by [`JsonLinesReader::read_all()`],
355/// [`BufReadExt::json_lines()`], and [`json_lines()`].
356#[derive(Clone, Debug, Eq, PartialEq)]
357pub struct JsonLinesIter<R, T> {
358 reader: JsonLinesReader<R>,
359 _output: PhantomData<T>,
360}
361
362impl<R, T> JsonLinesIter<R, T> {
363 /// Construct a new `JsonLinesIter` from a [`std::io::BufRead`] instance
364 pub fn new(reader: R) -> Self {
365 JsonLinesIter {
366 reader: JsonLinesReader::new(reader),
367 _output: PhantomData,
368 }
369 }
370
371 /// Consume the `JsonLinesIter` and return the underlying reader
372 pub fn into_inner(self) -> R {
373 self.reader.into_inner()
374 }
375
376 /// Get a reference to the underlying reader
377 pub fn get_ref(&self) -> &R {
378 self.reader.get_ref()
379 }
380
381 /// Get a mutable reference to the underlying reader
382 pub fn get_mut(&mut self) -> &mut R {
383 self.reader.get_mut()
384 }
385}
386
387impl<R, T> Iterator for JsonLinesIter<R, T>
388where
389 T: DeserializeOwned,
390 R: BufRead,
391{
392 type Item = Result<T>;
393
394 fn next(&mut self) -> Option<Result<T>> {
395 self.reader.read().transpose()
396 }
397}
398
399/// An extension trait for the [`std::io::Write`] trait that adds a
400/// `write_json_lines()` method
401///
402/// # Example
403///
404/// ```no_run
405/// use serde::Serialize;
406/// use serde_jsonlines::WriteExt;
407/// use std::fs::{read_to_string, File};
408/// use std::io::Write;
409///
410/// #[derive(Serialize)]
411/// pub struct Structure {
412/// pub name: String,
413/// pub size: i32,
414/// pub on: bool,
415/// }
416///
417/// fn main() -> std::io::Result<()> {
418/// {
419/// let mut fp = File::create("example.jsonl")?;
420/// fp.write_json_lines([
421/// Structure {
422/// name: "Foo Bar".into(),
423/// size: 42,
424/// on: true,
425/// },
426/// Structure {
427/// name: "Quux".into(),
428/// size: 23,
429/// on: false,
430/// },
431/// Structure {
432/// name: "Gnusto Cleesh".into(),
433/// size: 17,
434/// on: true,
435/// },
436/// ])?;
437/// fp.flush()?;
438/// }
439/// // End the block to close the writer
440/// assert_eq!(
441/// read_to_string("example.jsonl")?,
442/// concat!(
443/// "{\"name\":\"Foo Bar\",\"size\":42,\"on\":true}\n",
444/// "{\"name\":\"Quux\",\"size\":23,\"on\":false}\n",
445/// "{\"name\":\"Gnusto Cleesh\",\"size\":17,\"on\":true}\n",
446/// )
447/// );
448/// Ok(())
449/// }
450/// ```
451pub trait WriteExt: Write {
452 /// Serialize each item in an iterator as a line of JSON, and write out
453 /// each one followed by a newline.
454 ///
455 /// All values in a single call to `write_json_lines()` must be the same
456 /// type, but separate calls may write different types.
457 ///
458 /// This method does not flush.
459 ///
460 /// # Errors
461 ///
462 /// Has the same error conditions as [`serde_json::to_writer()`] and
463 /// [`std::io::Write::write_all()`].
464 fn write_json_lines<T, I>(&mut self, items: I) -> Result<()>
465 where
466 I: IntoIterator<Item = T>,
467 T: Serialize,
468 {
469 for value in items {
470 serde_json::to_writer(&mut *self, &value)?;
471 self.write_all(b"\n")?;
472 }
473 Ok(())
474 }
475}
476
477impl<W: Write> WriteExt for W {}
478
479/// An extension trait for the [`std::io::BufRead`] trait that adds a
480/// `json_lines()` method
481///
482/// # Example
483///
484/// ```no_run
485/// use serde::Deserialize;
486/// use serde_jsonlines::BufReadExt;
487/// use std::fs::{write, File};
488/// use std::io::{BufReader, Result};
489///
490/// #[derive(Debug, Deserialize, PartialEq)]
491/// pub struct Structure {
492/// pub name: String,
493/// pub size: i32,
494/// pub on: bool,
495/// }
496///
497/// fn main() -> Result<()> {
498/// write(
499/// "example.jsonl",
500/// concat!(
501/// "{\"name\": \"Foo Bar\", \"on\":true,\"size\": 42 }\n",
502/// "{ \"name\":\"Quux\", \"on\" : false ,\"size\": 23}\n",
503/// " {\"name\": \"Gnusto Cleesh\" , \"on\": true, \"size\": 17}\n",
504/// ),
505/// )?;
506/// let fp = BufReader::new(File::open("example.jsonl")?);
507/// let items = fp.json_lines::<Structure>().collect::<Result<Vec<_>>>()?;
508/// assert_eq!(
509/// items,
510/// [
511/// Structure {
512/// name: "Foo Bar".into(),
513/// size: 42,
514/// on: true,
515/// },
516/// Structure {
517/// name: "Quux".into(),
518/// size: 23,
519/// on: false,
520/// },
521/// Structure {
522/// name: "Gnusto Cleesh".into(),
523/// size: 17,
524/// on: true,
525/// },
526/// ]
527/// );
528/// Ok(())
529/// }
530/// ```
531pub trait BufReadExt: BufRead {
532 /// Consume the reader and return an iterator over the deserialized JSON
533 /// values from each line.
534 ///
535 /// The returned iterator has an `Item` type of `std::io::Result<T>`. Each
536 /// call to `next()` has the same error conditions as
537 /// [`JsonLinesReader::read()`].
538 ///
539 /// Note that all deserialized values will be of the same type.
540 fn json_lines<T>(self) -> JsonLinesIter<Self, T>
541 where
542 Self: Sized,
543 {
544 JsonLinesIter::new(self)
545 }
546}
547
548impl<R: BufRead> BufReadExt for R {}
549
550/// Write an iterator of values to the file at `path` as JSON Lines.
551///
552/// If the file does not already exist, it is created. If it does exist, any
553/// contents are discarded.
554///
555/// # Errors
556///
557/// Has the same error conditions as [`File::create()`],
558/// [`serde_json::to_writer()`], [`std::io::Write::write_all()`], and
559/// [`std::io::Write::flush()`].
560///
561/// # Example
562///
563/// ```no_run
564/// use serde::Serialize;
565/// use serde_jsonlines::write_json_lines;
566/// use std::fs::read_to_string;
567///
568/// #[derive(Serialize)]
569/// pub struct Structure {
570/// pub name: String,
571/// pub size: i32,
572/// pub on: bool,
573/// }
574///
575/// fn main() -> std::io::Result<()> {
576/// write_json_lines(
577/// "example.jsonl",
578/// [
579/// Structure {
580/// name: "Foo Bar".into(),
581/// size: 42,
582/// on: true,
583/// },
584/// Structure {
585/// name: "Quux".into(),
586/// size: 23,
587/// on: false,
588/// },
589/// Structure {
590/// name: "Gnusto Cleesh".into(),
591/// size: 17,
592/// on: true,
593/// },
594/// ],
595/// )?;
596/// assert_eq!(
597/// read_to_string("example.jsonl")?,
598/// concat!(
599/// "{\"name\":\"Foo Bar\",\"size\":42,\"on\":true}\n",
600/// "{\"name\":\"Quux\",\"size\":23,\"on\":false}\n",
601/// "{\"name\":\"Gnusto Cleesh\",\"size\":17,\"on\":true}\n",
602/// )
603/// );
604/// Ok(())
605/// }
606/// ```
607pub fn write_json_lines<P, I, T>(path: P, items: I) -> Result<()>
608where
609 P: AsRef<Path>,
610 I: IntoIterator<Item = T>,
611 T: Serialize,
612{
613 let mut fp = BufWriter::new(File::create(path)?);
614 fp.write_json_lines(items)?;
615 fp.flush()
616}
617
618/// Append an iterator of values to the file at `path` as JSON Lines.
619///
620/// If the file does not already exist, it is created. If it does exist, the
621/// new lines are added after any lines that are already present.
622///
623/// # Errors
624///
625/// Has the same error conditions as [`File::create()`],
626/// [`serde_json::to_writer()`], [`std::io::Write::write_all()`], and
627/// [`std::io::Write::flush()`].
628///
629/// # Example
630///
631/// ```no_run
632/// use serde::Serialize;
633/// use serde_jsonlines::append_json_lines;
634/// use std::fs::read_to_string;
635///
636/// #[derive(Serialize)]
637/// pub struct Structure {
638/// pub name: String,
639/// pub size: i32,
640/// pub on: bool,
641/// }
642///
643/// fn main() -> std::io::Result<()> {
644/// append_json_lines(
645/// "example.jsonl",
646/// [
647/// Structure {
648/// name: "Foo Bar".into(),
649/// size: 42,
650/// on: true,
651/// },
652/// Structure {
653/// name: "Quux".into(),
654/// size: 23,
655/// on: false,
656/// },
657/// ],
658/// )?;
659/// assert_eq!(
660/// read_to_string("example.jsonl")?,
661/// concat!(
662/// "{\"name\":\"Foo Bar\",\"size\":42,\"on\":true}\n",
663/// "{\"name\":\"Quux\",\"size\":23,\"on\":false}\n",
664/// )
665/// );
666/// append_json_lines(
667/// "example.jsonl",
668/// [
669/// Structure {
670/// name: "Gnusto Cleesh".into(),
671/// size: 17,
672/// on: true,
673/// },
674/// Structure {
675/// name: "baz".into(),
676/// size: 69105,
677/// on: false,
678/// },
679/// ],
680/// )?;
681/// assert_eq!(
682/// read_to_string("example.jsonl")?,
683/// concat!(
684/// "{\"name\":\"Foo Bar\",\"size\":42,\"on\":true}\n",
685/// "{\"name\":\"Quux\",\"size\":23,\"on\":false}\n",
686/// "{\"name\":\"Gnusto Cleesh\",\"size\":17,\"on\":true}\n",
687/// "{\"name\":\"baz\",\"size\":69105,\"on\":false}\n",
688/// )
689/// );
690/// Ok(())
691/// }
692/// ```
693pub fn append_json_lines<P, I, T>(path: P, items: I) -> Result<()>
694where
695 P: AsRef<Path>,
696 I: IntoIterator<Item = T>,
697 T: Serialize,
698{
699 let mut fp = BufWriter::new(OpenOptions::new().append(true).create(true).open(path)?);
700 fp.write_json_lines(items)?;
701 fp.flush()
702}
703
704/// Iterate over JSON Lines values from a file.
705///
706/// `json_lines(path)` returns an iterator of values deserialized from the JSON
707/// Lines in the file at `path`.
708///
709/// The returned iterator has an `Item` type of `std::io::Result<T>`. Each
710/// call to `next()` has the same error conditions as
711/// [`JsonLinesReader::read()`].
712///
713/// # Errors
714///
715/// Has the same error conditions as [`File::open()`].
716///
717/// # Example
718///
719/// ```no_run
720/// use serde::Deserialize;
721/// use serde_jsonlines::json_lines;
722/// use std::fs::write;
723/// use std::io::Result;
724///
725/// #[derive(Debug, Deserialize, PartialEq)]
726/// pub struct Structure {
727/// pub name: String,
728/// pub size: i32,
729/// pub on: bool,
730/// }
731///
732/// fn main() -> Result<()> {
733/// write(
734/// "example.jsonl",
735/// concat!(
736/// "{\"name\": \"Foo Bar\", \"on\":true,\"size\": 42 }\n",
737/// "{ \"name\":\"Quux\", \"on\" : false ,\"size\": 23}\n",
738/// " {\"name\": \"Gnusto Cleesh\" , \"on\": true, \"size\": 17}\n",
739/// ),
740/// )?;
741/// let items = json_lines::<Structure, _>("example.jsonl")?.collect::<Result<Vec<_>>>()?;
742/// assert_eq!(
743/// items,
744/// [
745/// Structure {
746/// name: "Foo Bar".into(),
747/// size: 42,
748/// on: true,
749/// },
750/// Structure {
751/// name: "Quux".into(),
752/// size: 23,
753/// on: false,
754/// },
755/// Structure {
756/// name: "Gnusto Cleesh".into(),
757/// size: 17,
758/// on: true,
759/// },
760/// ]
761/// );
762/// Ok(())
763/// }
764/// ```
765pub fn json_lines<T, P: AsRef<Path>>(path: P) -> Result<JsonLinesFileIter<T>> {
766 let fp = BufReader::new(File::open(path)?);
767 Ok(fp.json_lines())
768}