disco_quick/
reader.rs

1pub use crate::artist::ArtistsReader;
2pub use crate::label::LabelsReader;
3pub use crate::master::MastersReader;
4pub use crate::release::ReleasesReader;
5use flate2::read::GzDecoder;
6use quick_xml::events::Event;
7use quick_xml::Error as XmlError;
8use std::fmt;
9use std::fs::File;
10use std::io::{BufRead, BufReader, Error as IoError, Seek};
11use std::path::Path;
12use thiserror::Error;
13
14pub type XmlReader = quick_xml::Reader<Box<dyn BufRead>>;
15
16pub fn get_xml_reader(path: &Path) -> Result<XmlReader, IoError> {
17    let file = File::open(path)?;
18    let gz = GzDecoder::new(file);
19    let reader: Box<dyn BufRead> = if gz.header().is_some() {
20        Box::new(BufReader::new(gz))
21    } else {
22        let mut reader = gz.into_inner();
23        reader.rewind()?;
24        Box::new(BufReader::new(reader))
25    };
26    Ok(quick_xml::Reader::from_reader(reader))
27}
28
29pub enum DiscogsReader {
30    Artists(Box<ArtistsReader>),
31    Labels(Box<LabelsReader>),
32    Masters(Box<MastersReader>),
33    Releases(Box<ReleasesReader>),
34}
35
36impl DiscogsReader {
37    /// Open an XML file at the given path, and return the appropriate reader based on its contents.
38    /// The file can be either uncompressed or gzip compressed.
39    pub fn from_path<P: AsRef<Path>>(path: P) -> Result<DiscogsReader, ReaderError> {
40        // Since GzDecoder doesn't impl Seek, we open the file twice. Once to read the start tag,
41        // then again so the parsers can read from the start of the file, which is necessary for
42        // old versions of the dump that contain e.g. <artist> as the first tag, not <artists>
43        let path = path.as_ref();
44        let start_tag = {
45            let xml_reader = get_xml_reader(path)?;
46            read_start_tag(xml_reader)?
47        };
48        let xml_reader = get_xml_reader(path)?;
49        let buf = Vec::with_capacity(4096);
50        let reader = match start_tag.as_ref() {
51            "artists" | "artist" => {
52                DiscogsReader::Artists(Box::new(ArtistsReader::new(xml_reader, buf)))
53            }
54            "labels" | "label" => {
55                DiscogsReader::Labels(Box::new(LabelsReader::new(xml_reader, buf)))
56            }
57            "masters" | "master" => {
58                DiscogsReader::Masters(Box::new(MastersReader::new(xml_reader, buf)))
59            }
60            "releases" | "release" => {
61                DiscogsReader::Releases(Box::new(ReleasesReader::new(xml_reader, buf)))
62            }
63            _ => {
64                return Err(ReaderError::InvalidStartTag(start_tag));
65            }
66        };
67        Ok(reader)
68    }
69}
70
71fn read_start_tag(mut reader: XmlReader) -> Result<String, ReaderError> {
72    let mut buf = Vec::with_capacity(4096);
73    let start_event = loop {
74        match reader.read_event_into(&mut buf)? {
75            Event::Start(ev) => break ev,
76            Event::Eof => return Err(ReaderError::NoStartTag),
77            _ => continue,
78        }
79    };
80    Ok(String::from_utf8_lossy(start_event.name().as_ref()).into_owned())
81}
82
83#[derive(Error, Debug)]
84pub enum ReaderError {
85    #[error(transparent)]
86    IoError(#[from] IoError),
87    #[error(transparent)]
88    XmlError(#[from] XmlError),
89    #[error("No start tag present in file")]
90    NoStartTag,
91    #[error("Invalid start tag present in file: {0}")]
92    InvalidStartTag(String),
93}
94
95impl fmt::Display for DiscogsReader {
96    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
97        let name = match &self {
98            DiscogsReader::Artists(_) => "artists",
99            DiscogsReader::Labels(_) => "labels",
100            DiscogsReader::Masters(_) => "masters",
101            DiscogsReader::Releases(_) => "releases",
102        };
103        write!(f, "{name}")
104    }
105}