Skip to main content

rpdfium_core/
fx_stream.rs

1//! PDF source data abstraction.
2//!
3//! The [`PdfSource`] trait abstracts over the source of PDF data, allowing
4//! the same parsing code to work with heap-owned data (`Vec<u8>`, `Arc<[u8]>`)
5//! and future backends like memory-mapped files.
6//!
7//! On WASM, PDF data is provided as `Vec<u8>` from a JavaScript `ArrayBuffer`.
8//! On native platforms, `Arc<[u8]>` is the default. A future `MmapSource`
9//! behind the `mmap` feature flag will enable memory-mapped file access.
10
11use std::sync::Arc;
12
13/// Abstraction over PDF source data.
14///
15/// Implementations must provide contiguous byte access via [`AsRef<[u8]>`].
16/// All implementations must be `Send + Sync` to support concurrent object
17/// resolution across threads.
18pub trait PdfSource: AsRef<[u8]> + Send + Sync {
19    /// Total size in bytes.
20    fn len(&self) -> usize {
21        self.as_ref().len()
22    }
23
24    /// Returns `true` if the source contains no bytes.
25    fn is_empty(&self) -> bool {
26        self.len() == 0
27    }
28}
29
30/// Heap-owned byte slice (safe, works everywhere including WASM).
31impl PdfSource for Arc<[u8]> {}
32
33/// Owned byte vector (safe, works everywhere including WASM).
34impl PdfSource for Vec<u8> {}
35
36/// Trait for writing a block of bytes.
37///
38/// Interface-only port of `IFX_WriteStream` (fx_stream.h).
39pub trait WriteStream {
40    /// Write `data` to the stream. Empty slice is a no-op; returns `true`.
41    fn write_block(&mut self, data: &[u8]) -> bool;
42}
43
44/// Trait for a write stream that exposes its current byte offset.
45///
46/// Interface-only port of `IFX_ArchiveStream` (fx_stream.h).
47pub trait ArchiveStream: WriteStream {
48    fn current_offset(&self) -> i64;
49}
50
51/// Trait for a seekable writable stream.
52///
53/// Interface-only port of `IFX_SeekableWriteStream` (fx_stream.h).
54pub trait SeekableWriteStream: WriteStream {
55    /// Returns the total size of the stream in bytes.
56    ///
57    /// Corresponds to `IFX_StreamWithSize::GetSize()` in PDFium upstream.
58    fn size(&self) -> i64;
59
60    /// Upstream-aligned alias for [`size`](Self::size).
61    ///
62    /// Corresponds to `IFX_StreamWithSize::GetSize()` in PDFium upstream.
63    #[inline]
64    fn get_size(&self) -> i64 {
65        self.size()
66    }
67
68    fn flush(&mut self) -> bool;
69}
70
71/// Trait combining seekable read and write access.
72///
73/// Interface-only port of `IFX_SeekableStream` (fx_stream.h).
74pub trait SeekableStream: SeekableWriteStream {}
75
76/// Concrete `WriteStream` implementation for `Vec<u8>`.
77impl WriteStream for Vec<u8> {
78    fn write_block(&mut self, data: &[u8]) -> bool {
79        self.extend_from_slice(data);
80        true
81    }
82}
83
84/// Trait for writing PDF bytes with position tracking.
85///
86/// Port of `IFX_ArchiveStream` (fx_stream.h): write + offset tracking.
87/// Uses `io::Result` instead of `bool` return (idiomatic Rust error handling).
88pub trait PdfWrite {
89    /// Write raw bytes to the output.
90    fn write_bytes(&mut self, data: &[u8]) -> std::io::Result<()>;
91    /// Current byte offset in the output.
92    fn position(&self) -> u64;
93}
94
95/// `PdfWrite` implementation for `Vec<u8>`.
96impl PdfWrite for Vec<u8> {
97    fn write_bytes(&mut self, data: &[u8]) -> std::io::Result<()> {
98        self.extend_from_slice(data);
99        Ok(())
100    }
101
102    fn position(&self) -> u64 {
103        self.len() as u64
104    }
105}
106
107/// Position-tracking writer wrapper.
108///
109/// Wraps any [`std::io::Write`] implementor and tracks the byte offset written.
110pub struct CountingWriter<W: std::io::Write> {
111    inner: W,
112    pos: u64,
113}
114
115impl<W: std::io::Write> CountingWriter<W> {
116    /// Create a new counting writer starting at offset 0.
117    pub fn new(inner: W) -> Self {
118        Self { inner, pos: 0 }
119    }
120
121    /// Create a counting writer with an initial offset (e.g. for incremental appends).
122    pub fn with_offset(inner: W, offset: u64) -> Self {
123        Self { inner, pos: offset }
124    }
125
126    /// Consume the writer and return the inner writer.
127    pub fn into_inner(self) -> W {
128        self.inner
129    }
130
131    /// Flush the underlying writer.
132    pub fn flush(&mut self) -> std::io::Result<()> {
133        self.inner.flush()
134    }
135}
136
137impl<W: std::io::Write> PdfWrite for CountingWriter<W> {
138    fn write_bytes(&mut self, data: &[u8]) -> std::io::Result<()> {
139        self.inner.write_all(data)?;
140        self.pos += data.len() as u64;
141        Ok(())
142    }
143
144    fn position(&self) -> u64 {
145        self.pos
146    }
147}
148
149/// In-memory stream supporting both read and write access.
150///
151/// Port of `CFX_MemoryStream` (cfx_memorystream.h/cpp).
152/// Implements both [`PdfSource`] (read) and [`PdfWrite`] (write).
153pub struct MemoryStream {
154    data: Vec<u8>,
155}
156
157impl MemoryStream {
158    /// Create an empty `MemoryStream`.
159    pub fn new() -> Self {
160        Self { data: Vec::new() }
161    }
162
163    /// Create a `MemoryStream` from existing bytes.
164    pub fn from_bytes(data: Vec<u8>) -> Self {
165        Self { data }
166    }
167
168    /// Consume the stream and return the underlying bytes.
169    pub fn into_bytes(self) -> Vec<u8> {
170        self.data
171    }
172
173    /// Return a slice of the stream's bytes.
174    pub fn as_bytes(&self) -> &[u8] {
175        &self.data
176    }
177}
178
179impl Default for MemoryStream {
180    fn default() -> Self {
181        Self::new()
182    }
183}
184
185impl AsRef<[u8]> for MemoryStream {
186    fn as_ref(&self) -> &[u8] {
187        &self.data
188    }
189}
190
191// Vec<u8> is Send + Sync, so MemoryStream { data: Vec<u8> } is too.
192impl PdfSource for MemoryStream {}
193
194impl PdfWrite for MemoryStream {
195    fn write_bytes(&mut self, data: &[u8]) -> std::io::Result<()> {
196        self.data.extend_from_slice(data);
197        Ok(())
198    }
199
200    fn position(&self) -> u64 {
201        self.data.len() as u64
202    }
203}
204
205impl WriteStream for MemoryStream {
206    fn write_block(&mut self, data: &[u8]) -> bool {
207        self.data.extend_from_slice(data);
208        true
209    }
210}
211
212#[cfg(test)]
213mod tests {
214    use super::*;
215
216    #[test]
217    fn test_arc_source_len() {
218        let data: Arc<[u8]> = Arc::from(b"hello" as &[u8]);
219        assert_eq!(PdfSource::len(&data), 5);
220        assert!(!PdfSource::is_empty(&data));
221    }
222
223    #[test]
224    fn test_arc_source_empty() {
225        let data: Arc<[u8]> = Arc::from(b"" as &[u8]);
226        assert_eq!(PdfSource::len(&data), 0);
227        assert!(PdfSource::is_empty(&data));
228    }
229
230    #[test]
231    fn test_vec_source_len() {
232        let data: Vec<u8> = b"hello world".to_vec();
233        assert_eq!(PdfSource::len(&data), 11);
234        assert!(!PdfSource::is_empty(&data));
235    }
236
237    #[test]
238    fn test_vec_source_empty() {
239        let data: Vec<u8> = Vec::new();
240        assert_eq!(PdfSource::len(&data), 0);
241        assert!(PdfSource::is_empty(&data));
242    }
243
244    #[test]
245    fn test_arc_source_as_ref() {
246        let data: Arc<[u8]> = Arc::from(b"PDF" as &[u8]);
247        assert_eq!(data.as_ref(), b"PDF");
248    }
249
250    #[test]
251    fn test_vec_source_as_ref() {
252        let data: Vec<u8> = b"PDF".to_vec();
253        assert_eq!(<Vec<u8> as AsRef<[u8]>>::as_ref(&data), b"PDF");
254    }
255
256    #[test]
257    fn test_pdf_source_is_send_sync() {
258        fn assert_send_sync<T: PdfSource>() {}
259        assert_send_sync::<Arc<[u8]>>();
260        assert_send_sync::<Vec<u8>>();
261    }
262
263    #[test]
264    fn test_pdf_source_object_safe() {
265        // Verify that PdfSource can be used as a trait object
266        fn use_source(source: &dyn PdfSource) -> usize {
267            source.len()
268        }
269        let data: Vec<u8> = b"test".to_vec();
270        assert_eq!(use_source(&data), 4);
271    }
272
273    #[test]
274    fn test_vec_write_stream_write_block() {
275        let mut buf: Vec<u8> = Vec::new();
276        assert!(buf.write_block(b"hello"));
277        assert!(buf.write_block(b" world"));
278        assert_eq!(&buf, b"hello world");
279    }
280
281    #[test]
282    fn test_vec_write_stream_empty_slice() {
283        let mut buf: Vec<u8> = Vec::new();
284        assert!(buf.write_block(b""));
285        assert!(buf.is_empty());
286    }
287
288    #[test]
289    fn test_vec_pdf_write_tracks_position() {
290        let mut buf: Vec<u8> = Vec::new();
291        assert_eq!(buf.position(), 0);
292        buf.write_bytes(b"abc").unwrap();
293        assert_eq!(buf.position(), 3);
294        buf.write_bytes(b"de").unwrap();
295        assert_eq!(buf.position(), 5);
296    }
297
298    #[test]
299    fn test_counting_writer_tracks_position() {
300        let mut writer = CountingWriter::new(Vec::new());
301        assert_eq!(writer.position(), 0);
302        writer.write_bytes(b"hello").unwrap();
303        assert_eq!(writer.position(), 5);
304        writer.write_bytes(b" world").unwrap();
305        assert_eq!(writer.position(), 11);
306    }
307
308    #[test]
309    fn test_counting_writer_with_offset() {
310        let mut writer = CountingWriter::with_offset(Vec::new(), 100);
311        assert_eq!(writer.position(), 100);
312        writer.write_bytes(b"data").unwrap();
313        assert_eq!(writer.position(), 104);
314    }
315
316    #[test]
317    fn test_counting_writer_into_inner() {
318        let mut writer = CountingWriter::new(Vec::new());
319        writer.write_bytes(b"test").unwrap();
320        let inner = writer.into_inner();
321        assert_eq!(&inner, b"test");
322    }
323
324    #[test]
325    fn test_memory_stream_default_is_empty() {
326        let ms = MemoryStream::default();
327        assert!(ms.as_bytes().is_empty());
328        assert_eq!(ms.position(), 0);
329    }
330
331    #[test]
332    fn test_memory_stream_new_is_empty() {
333        let ms = MemoryStream::new();
334        assert!(ms.as_bytes().is_empty());
335    }
336
337    #[test]
338    fn test_memory_stream_from_bytes() {
339        let ms = MemoryStream::from_bytes(b"hello".to_vec());
340        assert_eq!(ms.as_bytes(), b"hello");
341        assert_eq!(ms.position(), 5);
342    }
343
344    #[test]
345    fn test_memory_stream_write_and_read_back() {
346        let mut ms = MemoryStream::new();
347        ms.write_bytes(b"PDF").unwrap();
348        ms.write_bytes(b"-1.7").unwrap();
349        assert_eq!(ms.as_bytes(), b"PDF-1.7");
350        assert_eq!(ms.position(), 7);
351    }
352
353    #[test]
354    fn test_memory_stream_into_bytes() {
355        let mut ms = MemoryStream::new();
356        ms.write_bytes(b"data").unwrap();
357        let bytes = ms.into_bytes();
358        assert_eq!(&bytes, b"data");
359    }
360
361    #[test]
362    fn test_memory_stream_write_block() {
363        let mut ms = MemoryStream::new();
364        assert!(ms.write_block(b"block"));
365        assert_eq!(ms.as_bytes(), b"block");
366    }
367
368    #[test]
369    fn test_memory_stream_as_pdf_source() {
370        let ms = MemoryStream::from_bytes(b"hello".to_vec());
371        assert_eq!(PdfSource::len(&ms), 5);
372        assert!(!PdfSource::is_empty(&ms));
373    }
374
375    #[test]
376    fn test_memory_stream_is_send_sync() {
377        fn assert_send_sync<T: Send + Sync>() {}
378        assert_send_sync::<MemoryStream>();
379    }
380}