1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
//! Scans a progressive JPEG file to find which byte ranges of the file are critical for displaying it at key stages.
//! This knowledge can be used to serve JPEGs optimally over HTTP/2 connections. This library can generate cf-priority-change headers
//! compatible with [prioritization syntax used by Cloudflare](https://blog.cloudflare.com/parallel-streaming-of-progressive-images/).

pub use crate::error::*;
use crate::jpeg::*;
use std::fmt::Write;

mod error;
/// For advanced usage, low-level access to the basic JPEG structure
pub mod jpeg {
    pub(crate) mod byteorder;
    pub(crate) mod decoder;
    pub(crate) mod marker;
    pub(crate) mod parser;

    pub use marker::Marker;
    pub use parser::Component;
    pub use parser::Dimensions;
    pub use parser::CodingProcess;
    pub use parser::ScanInfo;
    pub use parser::FrameInfo;
    pub use decoder::Decoder;
    pub use decoder::MarkerData;
    pub use decoder::MarkerPosition;
}

#[cfg(target_arch = "wasm32")]
use wasm_bindgen::prelude::*;

/// Key positions in a progressive image file
///
/// ```rust,no_run
/// # let input_file = vec![];
/// cloudflare_soos::Scans::from_file(&input_file)?.cf_priority_change_headers()?;
/// # Ok::<_, cloudflare_soos::Error>(())
/// ```
#[derive(Debug, Clone, Default, PartialEq)]
pub struct Scans {
    /// Byte position where metadata ends.
    /// This many bytes are preceeding start of pixel data.
    /// It's usually <200 bytes, unless the image has color profiles or other bloat.
    pub metadata_end: Option<usize>,
    /// Byte position where the first (lowest-quality) progressive scan ends.
    /// This many bytes are needed to display anything on screen.
    /// It's usually 12-15% of the file size.
    pub first_scan_end: Option<usize>,
    /// Byte position where most of ok-quality pixel data ends.
    /// This many bytes are needed to display a good-enough image.
    /// It's usually about 50% of the file size.
    pub good_scan_end: Option<usize>,
    /// Size of the whole image file, in bytes
    pub file_size: usize,
}

impl Scans {
/// Analyze an image file to find byte ranges of its metadata and progressive scans
pub fn from_file(input_file: &[u8]) -> Result<Self> {
    let mut decoder = Decoder::new(input_file);
    let markers = decoder.decode()?;

    let mut found = Scans::default();
    found.file_size = input_file.len();
    let mut seen_sof = false;
    let mut seen_dc = false;
    let mut number_of_components = 3;
    let mut is_progressive = false;
    let mut dc_components_seen = [false; 3];
    let mut luma_coeff_bits_missing = [255u8; 16]; // there are 64 coefs, but only the first 16 are important for us

    for m in markers {
        // markers specify only their starting position, so to know the end position,
        // we need to wait until the next marker.
        if seen_sof {
            if found.metadata_end.is_none() {
                found.metadata_end = Some(m.position);
                if !is_progressive {
                    break;
                }
            } else if seen_dc {
                if found.first_scan_end.is_none() {
                    found.first_scan_end = Some(m.position);
                }
                // mark scan as reaching good-enough quality when either enough bits were sent (the cuttoff value is empirical),
                // or we've crossed half of the file, so we have to stop searching anyway.
                else if found.good_scan_end.is_none() && (m.position >= input_file.len()/2 || coeff_fill_factor(&luma_coeff_bits_missing) >= 91) {
                    found.good_scan_end = Some(m.position);
                    break;
                }
            }
        }

        match m.marker {
            // Image dimensions
            MarkerData::Frame(frame) => {
                number_of_components = frame.components.len();
                is_progressive = frame.coding_process == CodingProcess::DctProgressive;
                seen_sof = true;
            },
            // Progressive scan
            MarkerData::Scan(scan) => {
                // DC is 0th component
                if *scan.spectral_selection.start() == 0 {
                    for comp in scan.component_indices.iter().copied() {
                        if let Some(seen) = dc_components_seen.get_mut(comp as usize) {
                            *seen = true;
                        }
                    }
                }
                // DC may be split into multiple scans. Don't show incomplete DC to avoid green faces problem.
                if dc_components_seen.iter().take(number_of_components).all(|&v| v) {
                    // We don't care how many bits of DC were sent. As soon as there is something in full color, we're good.
                    seen_dc = true;
                }

                // Measure only quality of luma, assuming encoders know how to sensibly interleave chroma
                if scan.component_indices.contains(&0) {
                    for bit in scan.spectral_selection {
                        if (bit as usize) < luma_coeff_bits_missing.len() {
                            luma_coeff_bits_missing[bit as usize] = luma_coeff_bits_missing[bit as usize].min(scan.successive_approximation_low);
                        }
                    }
                }
            },
            MarkerData::Other(_) => {},
        }
    }
    Ok(found)
}
}

/// Checks if there are enough coefficient bits sent for the image to look good enough.
/// This is entirely speculative. Allows either few precise ACs or many OK-ish ACs.
/// It is *not* JPEG's quality scale!
/// FIXME: this should be scaled by quantization table
fn coeff_fill_factor(bits_missing: &[u8; 16]) -> u16 {
    let mut coeff_fill_factor = 0;
    // includes DC, because posterization is also an issue
    for missing in bits_missing {
        coeff_fill_factor += match missing {
            0 => 12,
            1 => 8,
            2 => 5,
            3 => 1,
            _ => 0,
        }
    }
    coeff_fill_factor
}

#[cfg(target_arch = "wasm32")]
#[global_allocator]
static ALLOC: wee_alloc::WeeAlloc = wee_alloc::WeeAlloc::INIT;

#[cfg(target_arch = "wasm32")]
#[wasm_bindgen]
pub fn cf_priority_change_header_wasm(image: &[u8]) -> Option<String> {
    cf_priority_change_header(image).ok()
}

impl Scans {
/// Assumes the priorities are:
///
///  * 50 = critical js/css, fonts + image metadata
///  * 30 = regular js/css, followed by other images + JPEG DC
///  * 20 = low-priority image bodies
///  * 10 = idle
///
/// Returns `cf-priority` and `cf-priority-change` header values, respectively.
pub fn cf_priority_change_headers(&self) -> Result<(String, String)> {
    let mut pri_header = "30/1"; // default for images
    let mut change_header = String::with_capacity(127);

    if let Some(metadata_end) = self.metadata_end {
        // if there's a fat color profile or Adobe Garbage,
        // then sending "just" the metadata is not fast, and shouldn't be prioritized.
        let fat_metadata_limit = ((self.file_size / 8) + 100).min(2000);
        if metadata_end < fat_metadata_limit {
            pri_header = "50/0";
        }

        // if there's DC preview, then send it quickly. If it's a baseline image
        // (or progressive, but huge), it'll have to wait.
        let next_priority = match self.first_scan_end {
            Some(n) if n < 25000 => "30/0", // small DC can be downloaded in one go, to save on re-rendering
            Some(_) => "30/1",
            None if self.file_size < 2000 => "30/n", // tiny baseline image
            None => "20/n", // baseline image
        };
        let _ = write!(change_header, "{}:{}", metadata_end, next_priority);
    }
    if let Some(first_scan_end) = self.first_scan_end {
        if !change_header.is_empty() {change_header.push(',');}
        let is_next_scan_small = self.good_scan_end.map_or(false, |s| s < 100_000);
        let scan_concurrency = if is_next_scan_small { '1' } else { 'n' };
        let _ = write!(change_header, "{}:20/{}", first_scan_end, scan_concurrency);
    }
    if let Some(good_scan_end) = self.good_scan_end {
        if !change_header.is_empty() {change_header.push(',');}
        let file_is_small = self.file_size < 100_000;
        let scan_concurrency = if file_is_small { '1' } else { 'n' };
        let _ = write!(change_header, "{}:10/{}", good_scan_end, scan_concurrency);
    }
    if !change_header.is_empty() {
        Ok((String::from(pri_header), change_header))
    } else {
        Err(Error::Format("Can't find useful scans"))
    }
}
}

#[cfg(test)]
fn s(a: &str, b: &str) -> (String, String) { (a.into(), b.into()) }

#[test]
fn test_baseline() {
    assert!((Scans {
        metadata_end: None,
        first_scan_end: None,
        good_scan_end: None,
        file_size: 100_000}).cf_priority_change_headers().is_err());

    // regular baseline image
    assert_eq!(s("50/0", "101:20/n"), (Scans {
        metadata_end: Some(101),
        first_scan_end: None,
        good_scan_end: None,
        file_size: 100_000}).cf_priority_change_headers().unwrap());

    // tiny image
    assert_eq!(s("50/0", "101:30/n"), (Scans {
        metadata_end: Some(101),
        first_scan_end: None,
        good_scan_end: None,
        file_size: 1000}).cf_priority_change_headers().unwrap());

    // fat metadata
    assert_eq!(s("30/1", "9999:20/n"), (Scans {
        metadata_end: Some(9999),
        first_scan_end: None,
        good_scan_end: None,
        file_size: 100_000}).cf_priority_change_headers().unwrap());

    // relatively fat metadata
    assert_eq!(s("30/1", "1000:20/n"), (Scans {
        metadata_end: Some(1000),
        first_scan_end: None,
        good_scan_end: None,
        file_size: 3000}).cf_priority_change_headers().unwrap());
}

#[test]
fn test_progressive() {
    assert_eq!(s("50/0", "1000:30/0,10000:20/n,100000:10/n"), (Scans {
        metadata_end: Some(1_000),
        first_scan_end: Some(10_000),
        good_scan_end: Some(100_000),
        file_size: 200_000}).cf_priority_change_headers().unwrap());

    // fat metadata
    assert_eq!(s("30/1", "4000:30/0,10000:20/n,100000:10/n"), (Scans {
        metadata_end: Some(4_000),
        first_scan_end: Some(10_000),
        good_scan_end: Some(100_000),
        file_size: 200_000}).cf_priority_change_headers().unwrap());

    // fat DC
    assert_eq!(s("50/0", "1000:30/1,50000:20/n,100000:10/n"), (Scans {
        metadata_end: Some(1_000),
        first_scan_end: Some(50_000),
        good_scan_end: Some(100_000),
        file_size: 200_000}).cf_priority_change_headers().unwrap());

    // small good scan
    assert_eq!(s("50/0", "1000:30/0,10000:20/1,11000:10/n"), (Scans {
        metadata_end: Some(1_000),
        first_scan_end: Some(10_000),
        good_scan_end: Some(11_000),
        file_size: 200_000}).cf_priority_change_headers().unwrap());
}