1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
use byteorder::{BigEndian, ReadBytesExt};
use bytes::Buf;
use osm_pbf_proto::fileformat::blob::Data;
pub use osm_pbf_proto::fileformat::{Blob as PbfBlob, BlobHeader as PbfBlobHeader};
use osm_pbf_proto::osmformat::{HeaderBlock, PrimitiveBlock as PbfPrimitiveBlock};
use osm_pbf_proto::protobuf::{self as pb, CodedInputStream, Message};
use std::fs::File;
use std::io::{self, Read};
use std::iter;
use std::path::Path;

use crate::data::OSMDataBlob;
use crate::error::{Error, Result};

const MAX_HEADER_SIZE: u32 = 64 * 1024;
const MAX_UNCOMPRESSED_DATA_SIZE: usize = 32 * 1024 * 1024;

#[derive(PartialEq, Clone, Debug)]
pub enum Blob<M> {
    Encoded(PbfBlob),
    Decoded(M),
}

impl<M> Blob<M> {
    const INST: Self = Self::Encoded(PbfBlob {
        raw_size: None,
        data: None,
        special_fields: pb::SpecialFields::new(),
    });

    #[inline]
    const fn new(blob: PbfBlob) -> Self {
        Self::Encoded(blob)
    }
}

impl<M> Default for Blob<M> {
    fn default() -> Self {
        Self::Encoded(PbfBlob::new())
    }
}

impl<M: Message> Blob<M> {
    pub fn decode_into(mut self) -> Result<M> {
        self.decode()?;
        let Self::Decoded(d) = self else {
            unreachable!();
        };
        Ok(d)
    }

    pub fn decode(&mut self) -> Result<&mut M> {
        if let Self::Encoded(d) = self {
            let r = match &d.data {
                Some(Data::Raw(r)) => M::parse_from_tokio_bytes(r)?,
                Some(Data::ZlibData(z)) => {
                    let mut decoder = flate2::bufread::ZlibDecoder::new(io::Cursor::new(z));
                    M::parse_from_reader(&mut decoder)?
                }
                Some(Data::LzmaData(z)) => {
                    let mut decoder = xz2::bufread::XzDecoder::new(io::Cursor::new(z));
                    M::parse_from_reader(&mut decoder)?
                }
                None => M::new(),
                _ => {
                    return Err(Error::UnsupportedEncoding);
                }
            };
            *self = Self::Decoded(r);
        }
        let Self::Decoded(d) = self else {
            unreachable!();
        };
        Ok(d)
    }

    pub fn parse_and_decode(is: &mut CodedInputStream<'_>) -> pb::Result<M> {
        let mut data = M::new();
        while let Some(tag) = is.read_raw_tag_or_eof()? {
            match tag {
                10 => {
                    // Raw (1)
                    let len = is.read_raw_varint64()?;
                    let old_limit = is.push_limit(len)?;
                    data.merge_from(is)?;
                    is.pop_limit(old_limit);
                }
                #[cfg(feature = "zlib")]
                26 => {
                    // ZlibData (3)
                    let len = is.read_raw_varint64()?;
                    let old_limit = is.push_limit(len)?;
                    let read: &mut dyn io::BufRead = is;
                    {
                        let mut decoder = flate2::bufread::ZlibDecoder::new(read);
                        let mut is = CodedInputStream::new(&mut decoder);
                        data.merge_from(&mut is)?;
                    }
                    is.pop_limit(old_limit);
                }
                #[cfg(feature = "lzma")]
                34 => {
                    // LzmaData (4)
                    let len = is.read_raw_varint64()?;
                    let old_limit = is.push_limit(len)?;
                    let read: &mut dyn io::BufRead = is;
                    {
                        let mut decoder = xz2::bufread::XzDecoder::new(read);
                        let mut is = CodedInputStream::new(&mut decoder);
                        data.merge_from(&mut is)?;
                    }
                    is.pop_limit(old_limit);
                }
                /*
                42 => { // OBSOLETEzip2Data (5)
                    todo!()
                },
                50 => { // Lz4Data (6)
                    todo!()
                },
                58 => { // ZstdData (
                        // 7)
                    todo!()
                },
                */
                tag => {
                    pb::rt::skip_field_for_tag(tag, is)?;
                }
            };
        }
        data.check_initialized()?;
        Ok(data)
    }
}

#[derive(Debug)]
pub struct Blobs<R> {
    header: HeaderBlock,
    reader: R,
}

impl<R> Blobs<R> {
    #[inline]
    pub fn into_reader(self) -> R {
        self.reader
    }

    #[inline]
    pub fn header(&self) -> &HeaderBlock {
        &self.header
    }
}

impl<R: AsRef<[u8]>> Blobs<io::Cursor<R>> {
    #[inline]
    pub fn from_bytes(bytes: R) -> Result<Self> {
        Self::from_buf_read(io::Cursor::new(bytes))
    }
}

impl<R: Read> Blobs<io::BufReader<R>> {
    #[inline]
    pub fn from_read(read: R) -> Result<Self> {
        Self::from_buf_read(io::BufReader::new(read))
    }
}

impl Blobs<io::BufReader<File>> {
    #[inline]
    pub fn from_path(path: impl AsRef<Path>) -> Result<Self> {
        let file = File::open(path)?;
        Self::from_read(file)
    }
}

impl<R: io::Seek> Blobs<R> {
    #[inline]
    pub fn rewind(&mut self) -> Result<()> {
        self.reader.rewind()?;
        Ok(())
    }
}

impl<R: io::BufRead> Blobs<R> {
    #[inline]
    pub fn from_buf_read(reader: R) -> Result<Self> {
        let mut r = Self {
            header: HeaderBlock::new(),
            reader,
        };
        r._read_header_block()?;
        Ok(r)
    }

    fn _read_blob_header(&mut self) -> Result<Option<PbfBlobHeader>> {
        let header_size = match self.reader.read_u32::<BigEndian>() {
            Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => {
                return Ok(None); // Expected EOF
            }
            Err(e) => return Err(Error::IoError(e)),
            Ok(header_size) if header_size > MAX_HEADER_SIZE => {
                return Err(Error::BlobHeaderToLarge);
            }
            Ok(header_size) => header_size as usize,
        };

        let header: PbfBlobHeader = self.read_msg_exact(header_size)?;
        let data_size = header.datasize() as usize;
        if data_size > MAX_UNCOMPRESSED_DATA_SIZE {
            return Err(Error::BlobDataToLarge);
        }
        Ok(Some(header))
    }

    fn read_msg_exact<M: Message>(&mut self, exact_size: usize) -> Result<M> {
        let mut input = self.reader.by_ref().take(exact_size as u64);
        let mut input = CodedInputStream::from_buf_read(&mut input);
        let msg = M::parse_from_reader(&mut input)?;
        input.check_eof()?;
        Ok(msg)
    }

    pub fn next_blob(&mut self) -> Result<Option<(PbfBlobHeader, PbfBlob)>> {
        let Some(header) = self._read_blob_header()? else {
            return Ok(None);
        };
        let blob: PbfBlob = self.read_msg_exact(header.datasize() as usize)?;
        Ok(Some((header, blob)))
    }

    fn _read_header_block(&mut self) -> Result<()> {
        let Some(header) = self._read_blob_header()? else {
            return Err(io::ErrorKind::UnexpectedEof.into());
        };
        if header.type_() != "OSMHeader" {
            return Err(Error::UnexpectedBlobType(header.type_().to_string()));
        }
        let mut input = self.reader.by_ref().take(header.datasize() as u64);
        let mut input = CodedInputStream::from_buf_read(&mut input);
        self.header = Blob::parse_and_decode(&mut input)?;
        input.check_eof()?;
        Ok(())
    }

    pub fn next_primitive_block(&mut self) -> Result<Option<OSMDataBlob>> {
        let Some(header) = self._read_blob_header()? else {
            return Ok(None);
        };
        if header.type_() != "OSMData" {
            return Err(Error::UnexpectedBlobType(header.type_().to_string()));
        }
        let blob: PbfBlob = self.read_msg_exact(header.datasize() as usize)?;
        Ok(Some(Blob::Encoded(blob)))
    }

    pub fn next_primitive_block_decoded(&mut self) -> Result<Option<PbfPrimitiveBlock>> {
        let Some(header) = self._read_blob_header()? else {
            return Ok(None);
        };
        if header.type_() != "OSMData" {
            return Err(Error::UnexpectedBlobType(header.type_().to_string()));
        }
        let mut input = self.reader.by_ref().take(header.datasize() as u64);
        let mut input = CodedInputStream::from_buf_read(&mut input);
        let decoded = Blob::parse_and_decode(&mut input)?;
        input.check_eof()?;
        Ok(Some(decoded))
    }
}

impl<R: io::BufRead + io::Seek> Blobs<R> {
    fn next_blob_with(
        &mut self,
        cond: impl Fn(&PbfBlobHeader) -> bool,
    ) -> Result<Option<(PbfBlobHeader, PbfBlob)>> {
        loop {
            let Some(header) = self._read_blob_header()? else {
                return Ok(None);
            };
            if cond(&header) {
                let blob: PbfBlob = self.read_msg_exact((header.datasize() as u32) as usize)?;
                return Ok(Some((header, blob)));
            }
            self.reader
                .seek(io::SeekFrom::Current((header.datasize() as u32) as i64))?;
        }
    }
}

impl<R: io::BufRead> Iterator for Blobs<R> {
    type Item = Result<OSMDataBlob>;

    #[inline]
    fn next(&mut self) -> Option<Result<OSMDataBlob>> {
        self.next_primitive_block().transpose()
    }
}

impl<R: io::BufRead> iter::FusedIterator for Blobs<R> {}