osm_io/osm/pbf/
file_block.rs

1use std::fs::File;
2use std::io::{Cursor, Read, Seek, SeekFrom, Write};
3
4use anyhow::{anyhow, Context};
5use flate2::bufread::ZlibDecoder;
6use flate2::Compression;
7use flate2::write::ZlibEncoder;
8use prost::Message;
9
10use crate::osmpbf;
11use crate::osm::model::bounding_box::BoundingBox;
12use crate::osm::model::element::Element;
13use crate::osm::pbf::blob_desc::BlobDesc;
14use crate::osm::pbf::compression_type::CompressionType;
15use crate::osm::pbf::file_block_metadata::FileBlockMetadata;
16use crate::osm::pbf::osm_data::OsmData;
17use crate::osm::pbf::osm_header::OsmHeader;
18use crate::osmpbf::BlobHeader;
19use crate::osmpbf::blob::Data;
20
21/// A header or data file block in *.osm.pbf file
22#[derive(Debug)]
23pub enum FileBlock {
24    Header {
25        metadata: FileBlockMetadata,
26        header: OsmHeader,
27    },
28    Data {
29        metadata: FileBlockMetadata,
30        data: OsmData,
31    },
32}
33
34impl FileBlock {
35    pub(crate) fn new(index: usize, blob_type: String, data: Vec<u8>) -> Result<FileBlock, anyhow::Error> {
36        let blob_type_str = blob_type.as_str();
37        match blob_type_str {
38            "OSMHeader" => {
39                Ok(
40                    FileBlock::Header {
41                        metadata: FileBlockMetadata::new(blob_type, index),
42                        header: OsmHeader::from_bytes(data)?,
43                    }
44                )
45            }
46            "OSMData" => {
47                Ok(
48                    FileBlock::Data {
49                        metadata: FileBlockMetadata::new(blob_type, index),
50                        data: OsmData::new(data)?,
51                    }
52                )
53            }
54            _ => {
55                Err(anyhow!("Failed to decode file block"))
56            }
57        }
58    }
59
60    #[allow(dead_code)]
61    pub(crate) fn index(&self) -> usize {
62        match self {
63            FileBlock::Header { metadata, header: _header } => {
64                metadata.index()
65            }
66            FileBlock::Data { metadata, data: _data } => {
67                metadata.index()
68            }
69        }
70    }
71
72    pub(crate) fn from_elements(index: usize, elements: Vec<Element>) -> FileBlock {
73        FileBlock::Data {
74            metadata: FileBlockMetadata::new("OSMData".to_string(), index),
75            data: OsmData::from_elements(elements, None),
76        }
77    }
78
79    #[allow(dead_code)]
80    pub(crate) fn compute_bounding_box(&self) -> Option<BoundingBox> {
81        match self {
82            FileBlock::Header { metadata: _, header } => {
83                header.info().bounding_box().clone()
84            }
85            FileBlock::Data { metadata: _, data } => {
86                data.compute_bounding_box()
87            }
88        }
89    }
90
91    pub(crate) fn from_header(osm_header: OsmHeader) -> FileBlock {
92        FileBlock::Header {
93            metadata: FileBlockMetadata::new("OSMHeader".to_string(), 0),
94            header: osm_header.clone(),
95        }
96    }
97
98    fn zlib_decode(data: Vec<u8>, raw_size: usize) -> Result<Vec<u8>, anyhow::Error> {
99        let mut decoder = ZlibDecoder::new(data.as_slice());
100        let mut decoded = vec![0_u8; raw_size];
101        decoder.read_exact(&mut decoded)?;
102        Ok(decoded)
103    }
104
105    fn zlib_encode(buf: Vec<u8>, compression_level: Compression) -> Result<Vec<u8>, anyhow::Error> {
106        let mut encoder = ZlibEncoder::new(Vec::new(), compression_level);
107        encoder.write_all(buf.as_slice())?;
108        encoder.flush()?;
109        let encoded = encoder.finish()?;
110        Ok(encoded)
111    }
112
113    pub(crate) fn read_blob_data(blob: osmpbf::Blob) -> Result<Vec<u8>, anyhow::Error> {
114        match blob.data {
115            None => {
116                Err(
117                    anyhow!("Input file too short")
118                )
119            }
120            Some(data) => {
121                match data {
122                    Data::Raw(raw_data) => {
123                        // Uncompressed data - return as-is
124                        Ok(raw_data)
125                    }
126                    Data::ZlibData(zlib_data) => {
127                        // for now ignore that the uncompressed size is optional
128                        FileBlock::zlib_decode(zlib_data, blob.raw_size.unwrap() as usize)
129                    }
130                    Data::LzmaData(_) => {
131                        Err(
132                            // TODO:
133                            anyhow!("Lzma data type not implemented")
134                        )
135                    }
136                    Data::ObsoleteBzip2Data(_) => {
137                        Err(
138                            anyhow!("Obsolete Bzip data type not implemented")
139                        )
140                    }
141                    Data::Lz4Data(_) => {
142                        Err(
143                            // TODO:
144                            anyhow!("Lz4 data type not implemented")
145                        )
146                    }
147                    Data::ZstdData(_) => {
148                        Err(
149                            anyhow!("Zstd data type not implemented")
150                        )
151                    }
152                }
153            }
154        }
155    }
156
157    pub(crate) fn from_blob_desc(blob_desc: &BlobDesc) -> Result<FileBlock, anyhow::Error> {
158        let mut file = File::open(blob_desc.path()).with_context(
159            || anyhow!("Failed to open {:?} for reading", blob_desc.path())
160        )?;
161        file.seek(SeekFrom::Start(blob_desc.start())).with_context(
162            || anyhow!("Failed seek to {} in {:?} ", blob_desc.start(), blob_desc.path())
163        )?;
164        let mut blob_buffer = vec![0; blob_desc.length() as usize];
165        file.read_exact(&mut blob_buffer).ok().with_context(
166            || anyhow!("Failed to read {} bytes from {:?} ", blob_desc.length(), blob_desc.path())
167        )?;
168        Self::deserialize(blob_desc, &mut blob_buffer)
169    }
170
171    pub(crate) fn serialize(file_block: &FileBlock, compression: CompressionType) -> Result<(Vec<u8>, Vec<u8>), anyhow::Error> {
172        let (blob_type, compression_level, block_data) = match file_block {
173            FileBlock::Header { metadata: _, header } => {
174                ("OSMHeader".to_string(), Compression::none(), header.serialize()?)
175            }
176            FileBlock::Data { metadata: _, data } => {
177                ("OSMData".to_string(), Compression::default(), data.serialize()?)
178            }
179        };
180
181        let mut raw_size = None;
182        let mut data = None;
183        if !block_data.is_empty() {
184            raw_size = Some(block_data.len() as i32);
185            data = match compression {
186                CompressionType::Uncompressed => {
187                    Some(Data::Raw(block_data))
188                }
189                CompressionType::Zlib => {
190                    let encoded = Self::zlib_encode(block_data, compression_level)?;
191                    Some(Data::ZlibData(encoded))
192                }
193            };
194        }
195
196        let blob = osmpbf::Blob {
197            raw_size,
198            data,
199        };
200        let body = blob.encode_to_vec();
201
202        let blob_header = BlobHeader {
203            r#type: blob_type,
204            indexdata: None,
205            datasize: body.len() as i32,
206        };
207
208
209        let header = blob_header.encode_to_vec();
210
211        Ok((header, body))
212    }
213
214    fn deserialize(blob_desc: &BlobDesc, blob_buffer: &mut Vec<u8>) -> Result<FileBlock, anyhow::Error> {
215        // use BlobDesc rather than BlobHeader to skip reading again the blob header
216        let protobuf_blob = osmpbf::Blob::decode(&mut Cursor::new(blob_buffer)).with_context(
217            || anyhow!("Failed to decode a message from blob {} from {:?}", blob_desc.index(), blob_desc.path())
218        )?;
219        let data = FileBlock::read_blob_data(protobuf_blob)?;
220        FileBlock::new(blob_desc.index(), blob_desc.t(), data)
221    }
222
223    #[allow(dead_code)]
224    pub(crate) fn metadata(&self) -> &FileBlockMetadata {
225        match self {
226            FileBlock::Header { metadata, header: _ } => {
227                metadata
228            }
229            FileBlock::Data { metadata, data: _ } => {
230                metadata
231            }
232        }
233    }
234
235    pub(crate) fn as_osm_header(&self) -> Result<&OsmHeader, anyhow::Error> {
236        match self {
237            FileBlock::Header { header, .. } => {
238                Ok(header)
239            }
240            FileBlock::Data { .. } => {
241                Err(anyhow!("Not an OSMHeader"))
242            }
243        }
244    }
245    pub(crate) fn is_osm_header(&self) -> bool {
246        match self {
247            FileBlock::Header { header: _, .. } => {
248                true
249            }
250            FileBlock::Data { .. } => {
251                false
252            }
253        }
254    }
255
256    pub(crate) fn is_osm_data(&self) -> bool {
257        !self.is_osm_header()
258    }
259
260    #[allow(dead_code)]
261    pub(crate) fn as_osm_data(&self) -> Result<&OsmData, anyhow::Error> {
262        match self {
263            FileBlock::Header { .. } => {
264                Err(anyhow!("Not an OSMData"))
265            }
266            FileBlock::Data { data, .. } => {
267                Ok(data)
268            }
269        }
270    }
271
272    #[allow(dead_code)]
273    pub(crate) fn elements(&self) -> &Vec<Element> {
274        self.as_osm_data().unwrap().elements()
275    }
276
277    pub(crate) fn take_elements(&mut self) -> Vec<Element> {
278        match self {
279            FileBlock::Header { .. } => {
280                panic!("Not a Data variant")
281            }
282            FileBlock::Data { data, .. } => {
283                data.take_elements()
284            }
285        }
286    }
287}
288
289impl Default for FileBlock {
290    fn default() -> Self {
291        FileBlock::Data { metadata: Default::default(), data: Default::default() }
292    }
293}