xisf_rs/
lib.rs

1//! [![Crates.io](https://img.shields.io/crates/v/xisf-rs)](https://crates.io/crates/xisf-rs)
2//! ![Minimum rustc version](https://img.shields.io/badge/rustc-1.64+-lightgray.svg)
3//! ![License](https://img.shields.io/crates/l/xisf-rs.svg)
4//!
5//! An unaffiliated implementation of Pleiades Astrophoto's open-source Extensible Image Serialization Format (XISF) file format,
6//! the native image format for their flagship editing software PixInsight. Aims for 100% support for
7//! [spec version 1.0](https://pixinsight.com/doc/docs/XISF-1.0-spec/XISF-1.0-spec.html), as opposed to implementations such as
8//! [libXISF](https://gitea.nouspiro.space/nou/libXISF) or Pleiades Astrophoto's own [PixInsight Class Libraries](https://gitlab.com/pixinsight/PCL),
9//! which are written with 2D images in mind.
10//!
11//! See the examples folder for a a simple XISF to FITS converter powered by this library.
12//! <div class="warning">
13//!
14//! The examples folder is set up as part of a workspace, which turns off example auto-discovery.
15//! The command to run the program in subfolder `examples/NAME` from the root directory
16//! is `cargo run -p NAME` rather than `cargo run --example NAME`.
17//!
18//! </div>
19
20#![warn(missing_debug_implementations, rust_2018_idioms, missing_docs)]
21
22use byteorder::{LittleEndian, ReadBytesExt};
23use error_stack::{Report, Result, ResultExt, report};
24use libxml::{
25    parser::Parser as XmlParser,
26    readonly::RoNode,
27    xpath::Context as XpathContext,
28};
29use std::{
30    collections::HashMap,
31    ffi::CStr,
32    fs::File,
33    io::{BufReader, Read},
34    path::Path,
35};
36
37pub mod error;
38use error::{ParseNodeError, ParseNodeErrorKind::{self, *}, ReadFileError, ReadPropertyError};
39
40pub mod data_block;
41use data_block::{ChecksumAlgorithm, CompressionAlgorithm, CompressionLevel, Context};
42
43pub mod image;
44use image::Image;
45
46mod reference;
47pub(crate) use reference::*;
48
49pub mod property;
50use property::*;
51
52mod metadata;
53use metadata::Metadata;
54
55/// Flags to alter the behavior of the reader
56///
57/// # Example
58///
59/// ```rust
60/// # use xisf_rs::ReadOptions;
61/// let opts = ReadOptions::new()
62///     .import_fits_keywords(false)
63///     .clamp_to_bounds(false);
64/// ```
65#[derive(Clone, Debug)]
66pub struct ReadOptions {
67    pub(crate) import_fits_keywords: bool,
68    pub(crate) fits_keywords_as_properties: bool,
69    pub(crate) clamp_to_bounds: bool,
70}
71impl ReadOptions {
72    /// Alias for [`Default::default()`]
73    pub fn new() -> Self {
74        Default::default()
75    }
76    /// Read FITSKeyword headers from the XML header
77    pub fn import_fits_keywords(&mut self, import: bool) -> &mut Self {
78        self.import_fits_keywords = import;
79        self
80    }
81    /// Import FITSKeyword headers as XISF &lt;Property&gt; tags with the prefix FITS:
82    ///
83    /// Has no effect if [`Self::import_fits_keywords()`] is false
84    ///
85    /// <div class="warning">Not currently respected</div>
86    pub fn fits_keywords_as_properties(&mut self, convert: bool) -> &mut Self {
87        self.fits_keywords_as_properties = convert;
88        self
89    }
90    /// Clamp all pixel samples to the range specified in the bounds attribute
91    ///
92    /// For floating-point images: NaNs, infinities, and negative zeros are replaced with the lower bound
93    ///
94    /// <div class="warning">Not currently respected</div>
95    pub fn clamp_to_bounds(&mut self, clamp: bool) -> &mut Self {
96        self.clamp_to_bounds = clamp;
97        self
98    }
99}
100impl Default for ReadOptions {
101    fn default() -> Self {
102        Self {
103            import_fits_keywords: true,
104            fits_keywords_as_properties: false,
105            clamp_to_bounds: true,
106        }
107    }
108}
109
110
111/// Flags to alter the behavior of the writer
112///
113/// # Example
114///
115/// ```rust
116/// # use xisf_rs::WriteOptions;
117/// # use xisf_rs::data_block::{ChecksumAlgorithm, CompressionAlgorithm, CompressionLevel};
118/// let opts = WriteOptions::new("My Awesome Astronomy Program")
119///     .checksum_algorithm(Some(ChecksumAlgorithm::Sha3_512))
120///     .compression_algorithm(Some((CompressionAlgorithm::Zlib, CompressionLevel::AUTO)));
121/// ```
122#[derive(Clone, Debug)]
123pub struct WriteOptions {
124    /// Name of the application using this library
125    pub(crate) creator_application: String,
126    /// Write FITS headers as FITSKeyword elements in the XML header
127    pub(crate) export_fits_keywords: bool,
128    /// Algorithm used for XISF data block checksum calculations
129    pub(crate) checksum_alg: Option<ChecksumAlgorithm>,
130    /// Algorithm used to compress XISF data blocks
131    pub(crate) compression_alg: Option<(CompressionAlgorithm, CompressionLevel)>,
132    /// Lower bound for floating-point pixel samples
133    pub(crate) fp_lower_bound: f64,
134    /// Upper bound for floating-point pixel samples
135    pub(crate) fp_upper_bound: f64,
136    /// Data blocks are allocated with block sizes of integer multiples of this value, in bytes
137    pub(crate) block_alignment_size: u16,
138    /// Max size (in bytes) that an XISF data block can be before it can no longer be inlined/embedded.
139    /// Recommended value: 3/4 the size of block_alignment_size (or a multiple of it), since base64 takes 4 chars to encode 3 bytes.
140    /// That is, a maximum-size inline data block can be base64-encoded into a buffer the same size as the block alignment size
141    pub(crate) max_inline_block_size: u16,
142}
143impl WriteOptions {
144    /// Creates a new `WriteOptions` with the given app name,
145    /// and all other options set to their default values
146    pub fn new(app_name: impl Into<String>) -> Self {
147        Self {
148            creator_application: app_name.into(),
149            export_fits_keywords: true,
150            checksum_alg: None,
151            compression_alg: None,
152            fp_lower_bound: 0.0,
153            fp_upper_bound: 1.0,
154            block_alignment_size: 4096,
155            max_inline_block_size: 3072, // a block of 3072 bytes takes 4096 bytes in base64 encoding
156        }
157    }
158
159    /// Name of the application using this library
160    pub fn app_name(&mut self, name: String) -> &mut Self {
161        self.creator_application = name;
162        self
163    }
164    /// Write FITS headers as FITSKeyword elements in the XML header
165    pub fn export_fits_keywords(&mut self, export: bool) -> &mut Self {
166        self.export_fits_keywords = export;
167        self
168    }
169    /// Algorithm used for XISF data block checksum calculations
170    pub fn checksum_algorithm(&mut self, alg: Option<ChecksumAlgorithm>) -> &mut Self {
171        self.checksum_alg = alg;
172        self
173    }
174    /// Algorithm used to compress XISF data blocks
175    pub fn compression_algorithm(&mut self, alg: Option<(CompressionAlgorithm, CompressionLevel)>) -> &mut Self {
176        self.compression_alg = alg;
177        self
178    }
179    /// Lower bound for floating-point pixel samples
180    pub fn fp_lower_bound(&mut self, low: f64) -> &mut Self {
181        self.fp_lower_bound = low;
182        self
183    }
184    /// Upper bound for floating-point pixel samples
185    pub fn fp_upper_bound(&mut self, high: f64) -> &mut Self {
186        self.fp_upper_bound = high;
187        self
188    }
189    /// Data blocks are allocated with block sizes of integer multiples of this value, in bytes
190    pub fn block_alignment_size(&mut self, size: u16) -> &mut Self {
191        self.block_alignment_size = size;
192        self
193    }
194    /// Max size (in bytes) that an XISF data block can be before it can no longer be inlined/embedded.
195    /// Recommended value: 3/4 the size of block_alignment_size (or a multiple of it), since base64 takes 4 chars to encode 3 bytes.
196    /// That is, a maximum-size inline data block can be base64-encoded into a buffer the same size as the block alignment size
197    pub fn max_inline_block_size(&mut self, size: u16) -> &mut Self {
198        self.max_inline_block_size = size;
199        self
200    }
201}
202
203fn report(kind: ParseNodeErrorKind) -> Report<ParseNodeError> {
204    report!(context(kind))
205}
206const fn context(kind: ParseNodeErrorKind) -> ParseNodeError {
207    ParseNodeError::new("xisf", kind)
208}
209
210/// An XISF file
211///
212/// Not limited to monolithic files (that is, XISH files are also supported)
213#[derive(Clone, Debug)]
214pub struct XISF {
215    images: Vec<Image>,
216    properties: HashMap<String, PropertyContent>,
217    metadata: Metadata,
218}
219impl XISF {
220    /// Opens a file from disk
221    pub fn open(filename: impl AsRef<Path>, opts: &ReadOptions) -> Result<(Self, Context), ReadFileError> {
222        let filename_path = filename.as_ref();
223        let filename_str = filename_path.to_string_lossy().to_string();
224        let _span_guard = tracing::debug_span!("open", filename = filename_str).entered();
225
226        let f = File::open(filename_path)
227            .change_context(ReadFileError)
228            .attach_printable_lazy(|| format!("Failed to open file {filename_str} for reading"))?;
229        let mut reader = BufReader::new(f);
230
231        let extension = filename_path.extension()
232            .and_then(|ext| ext.to_str())
233            .map(|ext| ext.to_lowercase());
234
235        let mut header_buf;
236        let ctx;
237        if let Some("xisf") = extension.as_deref() {
238            // verify that the first 8 bytes of the file are XISF0100
239            const CORRECT_SIGNATURE: [u8; 8] = *b"XISF0100";
240            let mut signature_buf = [0u8; 8];
241            reader
242                .read_exact(&mut signature_buf)
243                .change_context(ReadFileError)
244                .attach_printable("Failed to read 8-byte field \"file format signature\" at start of file")?;
245            if signature_buf != CORRECT_SIGNATURE {
246                return Err(report!(ReadFileError))
247                    .attach_printable(format!("Illegal file format signature: expected {CORRECT_SIGNATURE:?} (XISF0100), found {signature_buf:?}"));
248            }
249
250            // next 4 bytes are a little-endian encoded unsigned integer specifying the length of the XML header
251            let header_length = reader
252                .read_u32::<LittleEndian>()
253                .change_context(ReadFileError)
254                .attach_printable("Error parsing 4-byte field \"XML header length\" as little-endian u32")?;
255            tracing::debug!("Header size: {} bytes", header_length);
256
257            const RESERVED_BYTES: i64 = 4;
258            reader
259                .seek_relative(RESERVED_BYTES)
260                .change_context(ReadFileError)
261                .attach_printable("Failed to skip 4 reserved bytes")?;
262
263            // read header to buffer
264            header_buf = vec![0u8; header_length as usize];
265            reader
266                .read_exact(&mut header_buf)
267                .change_context(ReadFileError)
268                .attach_printable_lazy(|| format!("Failed to read {header_length}-byte XML header from file"))?;
269            ctx = Context::monolithic(reader);
270        } else if let Some("xish") = extension.as_deref() {
271            header_buf = vec![];
272            reader.read_to_end(&mut header_buf)
273                .change_context(ReadFileError)
274                .attach_printable("Failed to read XML header from XISH file")?;
275
276            // this unwrap is safe because:
277            // 1. when called on "dir/file.ext", returns Some("dir")
278            // 2. when called on "file.ext", returns Some("")
279            // 3. we know at minimum that the path contains a file because we just opened it
280            ctx = Context::distributed(filename_path.parent().unwrap().to_owned());
281        } else if let Some(bad) = extension {
282            return Err(report!(ReadFileError))
283                .attach_printable(format!("Unsupported file extension: {bad}"))
284        } else {
285            return Err(report!(ReadFileError))
286                .attach_printable("File must have an extension to be able to distinguish XISF files from XISH files")
287        };
288
289        // parse the header
290        let xml = XmlParser::default().parse_string(header_buf)
291            .change_context(ReadFileError)
292            .attach_printable("Failed to parse XML header")?;
293
294        // TODO: make an interface for xmlsec that fits my needs
295        // - supports embedded keys, propagates errors instead of panicking, cross-platform
296        // // verify signature before reading anything at all
297        // let ctx = XmlSecSignatureContext::new();
298        // match ctx.verify_document(&xml) {
299        //     Ok(true) | Err(XmlSecError::NodeNotFound) => (),
300        //     Ok(false) => return Err(report!(ReadFileError))
301        //         .attach_printable("XML header failed cryptographic signature verification"),
302        //     Err(e) => return Err(report!(e))
303        //         .change_context(ReadFileError)
304        //         .attach_printable("Error while verifying cryptographic signature of XML header"),
305        // }
306
307        // verify xml declaration version and encoding
308        let version = unsafe { CStr::from_ptr((*xml.doc_ptr()).version as *const i8) };
309        if version.to_bytes() != "1.0".as_bytes() {
310            return Err(report!(ReadFileError))
311                .attach_printable("XISF spec requires XML version 1.0 in XML declaration")
312        }
313
314        let encoding = unsafe { CStr::from_ptr((*xml.doc_ptr()).encoding as *const i8) };
315        if encoding.to_bytes() != "UTF-8".as_bytes() {
316            return Err(report!(ReadFileError))
317                .attach_printable("XISF spec requires UTF-8 encoding in XML declaration")
318        }
319
320        let root = xml.get_root_readonly()
321            .ok_or(report!(ReadFileError))
322            .attach_printable("No root element found in XML header")?;
323
324        // we need to pass down a global xpath context in order to resolve <Reference> elements
325        let xpath = XpathContext::new(&xml)
326            .map_err(|_| report!(ReadFileError))
327            .attach_printable("Failed to create XPATH context for XML header")?;
328
329        // xisf root element should have a default namespace, but does not associate a prefix with it
330        // in order to select these nodes by name with xpath, we have to assign them a prefix ourselves
331        // ! spec doesn't require this namespace to exist -- how to handle?
332        // frankly I don't know see the point of having the namespace is if you don't make it mandatory
333        xpath.register_namespace("xisf", "http://www.pixinsight.com/xisf")
334            .map_err(|_| report!(ReadFileError))
335            .attach_printable("Failed to associate prefix to xisf namespace in XML header")?;
336
337        if root.get_name() != "xisf" {
338            return Err(report!(ReadFileError))
339                .attach_printable("Root element in XML header must be named \"xisf\"");
340        } else {
341            Ok((
342                Self::parse_root_node(root, &xpath, opts)
343                    .change_context(ReadFileError)?,
344                ctx
345            ))
346
347        }
348    }
349
350    fn parse_root_node(node: RoNode, xpath: &XpathContext, opts: &ReadOptions) -> Result<XISF, ParseNodeError> {
351
352        // * this is mutable because we use .remove() instead of .get()
353        // that way, after we've extracted everything we recognize,
354        // we can just iterate through what remains and emit warnings
355        // saying we don't know what so-and-so attribute means
356        let mut attrs = node.get_attributes();
357
358        // do not validate namespace and schema attributes (spec says SHOULD have them, not MUST)
359        // version MUST be 1.0, though
360        match attrs.remove("version").as_deref() {
361            Some("1.0") => (),
362            None => return Err(report(MissingAttr))
363                .attach_printable("Missing version attribute for <xisf> element in XML header"),
364            Some(bad) => return Err(report(InvalidAttr))
365                .attach_printable(format!("Invalid version attribute for <xisf> element in XML header: expected \"1.0\", found \"{bad}\"")),
366        }
367
368        // we don't actually care if xsi:schemaLocation exists, or what its value is
369        // just calling remove here so it doesn't create a warning below if it exists
370        attrs.remove("schemaLocation");
371
372        for remaining in attrs.into_iter() {
373            tracing::warn!("Ignoring unrecognized attribute {}=\"{}\"", remaining.0, remaining.1);
374        }
375
376        let mut images = vec![];
377        let mut properties = HashMap::new();
378        let mut metadata = None;
379        for mut child in node.get_child_nodes() {
380            child = child.follow_reference(xpath).change_context(context(InvalidReference))?;
381            match child.get_name().as_str() {
382                "Image" => images.push(Image::parse_node(child, xpath, opts)?),
383                "Property" => {
384                    let prop = Property::parse_node(child)?;
385                    if properties.insert(prop.id.clone(), prop.content).is_some() {
386                        tracing::warn!("Duplicate property found with id {} -- discarding the previous one", prop.id);
387                    }
388                },
389                "Metadata" => {
390                    if metadata.replace(Metadata::parse_node(child, xpath)?).is_some() {
391                        tracing::warn!("Duplicate Metadata element found -- discarding the previous one");
392                    }
393                }
394                // TODO: check if the unrecognized node has a uid tag with a reference to it somewhere before emitting a warning
395                _ => tracing::warn!("Ignoring unrecognized child node <{}>", child.get_name()),
396            }
397        }
398        let metadata = metadata
399            .ok_or(report(MissingChild))
400            .attach_printable("Missing Metadata element")?;
401
402        Ok(XISF {
403            images,
404            properties,
405            metadata,
406        })
407    }
408
409    /// Returns an iterator over all images in the file
410    pub fn images(&self) -> impl Iterator<Item = &Image> {
411        self.images.iter()
412    }
413    /// Returns the total number of images in the file
414    ///
415    /// <div class="warning">
416    ///
417    /// Although XISF is colloquially an image format, the XISF spec allows for files with no images at all.
418    /// Such files may instead have arbitrary data stored in tables, or root-level XISF properties.
419    ///
420    /// </div>
421    pub fn num_images(&self) -> usize {
422        self.images.len()
423    }
424    /// Returns a reference to the `i`-th image in the file
425    ///
426    /// # Panics
427    /// If `i` is outside the range `0..num_images()`
428    pub fn image(&self, i: usize) -> &Image {
429        &self.images[i]
430    }
431
432    /// Returns true iff an XISF property is present with the given ID
433    pub fn has_property(&self, id: impl AsRef<str>) -> bool {
434        self.properties.contains_key(id.as_ref())
435    }
436
437    /// Attempts to parse an XISF property with the given ID as type T
438    ///
439    /// To read a value and comment pair, use the pattern `let (value, comment) = properties.parse_property("ID", &xisf)?;`
440    pub fn parse_property<T: FromProperty>(&self, id: impl AsRef<str>, ctx: &Context) -> Result<T, ReadPropertyError> {
441        let content = self.properties.get(id.as_ref())
442            .ok_or(report!(ReadPropertyError::NotFound))?;
443        T::from_property(&content, ctx)
444            .change_context(ReadPropertyError::InvalidFormat)
445    }
446    /// Returns the raw content of the XISF property matching the given ID`
447    pub fn raw_property(&self, id: impl AsRef<str>) -> Option<&PropertyContent> {
448        self.properties.get(id.as_ref())
449    }
450    /// Iterates through all XISF properties as (id, type+value+comment) tuples,
451    /// in the order they appear in file, returned as raw unparsed strings/data blocks.
452    pub fn all_raw_properties(&self) -> impl Iterator<Item = (&String, &PropertyContent)> {
453        self.properties.iter()
454    }
455
456    /// Returns true iff the Metadata element contains an XISF property with the given ID
457    pub fn has_metadata(&self, id: impl AsRef<str>) -> bool {
458        self.metadata.contains_key(id.as_ref())
459    }
460
461    /// Attempts to parse an XISF property from the Metadata element with the given ID as type T
462    ///
463    /// To read a value and comment pair, use the pattern `let (value, comment) = properties.parse_property("ID", &xisf)?;`
464    pub fn parse_metadata<T: FromProperty>(&self, id: impl AsRef<str>, ctx: &Context) -> Result<T, ReadPropertyError> {
465        let content = self.metadata.get(id.as_ref())
466            .ok_or(report!(ReadPropertyError::NotFound))?;
467        T::from_property(&content, ctx)
468            .change_context(ReadPropertyError::InvalidFormat)
469    }
470    /// Returns the raw content of the XISF property in the Metadata element matching the given ID`
471    pub fn raw_metadata(&self, id: impl AsRef<str>) -> Option<&PropertyContent> {
472        self.metadata.get(id.as_ref())
473    }
474    /// Iterates through all XISF properties as (id, type+value+comment) tuples,
475    /// in the order they appear in file, returned as raw unparsed strings/data blocks.
476    pub fn all_raw_metadata(&self) -> impl Iterator<Item = (&String, &PropertyContent)> {
477        self.metadata.iter()
478    }
479}