cloudflare_soos/
lib.rs

1//! Scans a progressive JPEG file to find which byte ranges of the file are critical for displaying it at key stages.
2//! This knowledge can be used to serve JPEGs optimally over HTTP/2 connections. This library can generate cf-priority-change headers
3//! compatible with [prioritization syntax used by Cloudflare](https://blog.cloudflare.com/parallel-streaming-of-progressive-images/).
4
5use std::fmt::Display;
6
7mod error;
8pub use crate::error::*;
9
10/// For advanced usage, low-level access to the basic JPEG structure
11pub mod jpeg;
12
13#[cfg(feature = "gif")]
14mod gif;
15mod png;
16
17#[cfg(target_arch = "wasm32")]
18use wasm_bindgen::prelude::*;
19
20/// Key positions in a progressive image file
21///
22/// ```rust,no_run
23/// # let input_file = vec![];
24/// cloudflare_soos::Scans::from_file(&input_file)?.cf_priority_change_headers()?;
25/// # Ok::<_, cloudflare_soos::Error>(())
26/// ```
27#[derive(Debug, Clone, Default, PartialEq)]
28#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
29pub struct Scans {
30    /// Byte position where metadata ends.
31    /// This many bytes are preceeding start of pixel data.
32    /// It's usually <200 bytes, unless the image has color profiles or other bloat.
33    pub metadata_end: Option<usize>,
34    /// All metadata + minimum amount of data to make browsers render _anything_
35    /// (in case they don't reserve space based on metadata)
36    pub frame_render_start: Option<usize>,
37    /// Byte position where the first (lowest-quality) progressive scan ends.
38    /// This many bytes are needed to display anything on screen.
39    /// It's usually 12-15% of the file size.
40    pub first_scan_end: Option<usize>,
41    /// Byte position where most of ok-quality pixel data ends.
42    /// This many bytes are needed to display a good-enough image.
43    /// It's usually about 50% of the file size.
44    pub good_scan_end: Option<usize>,
45    /// Size of the whole image file, in bytes. The size is only used for heuristics,
46    /// so the value may be approximate, but it must be greater than all other
47    /// positions set in this struct.
48    pub file_size: usize,
49}
50
51#[cfg(target_arch = "wasm32")]
52#[global_allocator]
53static ALLOC: wee_alloc::WeeAlloc = wee_alloc::WeeAlloc::INIT;
54
55#[cfg(target_arch = "wasm32")]
56#[wasm_bindgen]
57pub fn cf_priority_change_header_wasm(image: &[u8]) -> Option<(String, String)> {
58    Scans::from_file(image).and_then(|v| v.cf_priority_change_headers()).ok()
59}
60
61#[cfg(target_arch = "wasm32")]
62#[wasm_bindgen]
63pub fn rfc9218_priority_change_headers_wasm(image: &[u8]) -> Option<String> {
64    Scans::from_file(image).and_then(|v| v.rfc9218_priority_change_headers()).ok()
65}
66
67// minimum chunk size for h2 and rfc9218 (h3) prioritization changes
68// there's no point changing priority for 20 bytes, H/2 frame takes half of that
69const MIN_H2_CHUNK_SIZE: usize = 20;
70// there's no point changing priority for 32 bytes, H/3 frame's headers takes 16 bytes
71const MIN_H3_CHUNK_SIZE: usize = 32;
72
73#[derive(Debug)]
74enum Concurrency {
75    ExclusiveSequential, // 0
76    SharedSequential,    // 1
77    Shared,              // n
78}
79
80impl Default for Concurrency {
81    fn default() -> Self {
82        Self::ExclusiveSequential
83    }
84}
85
86impl Display for Concurrency {
87    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
88        let value = match self {
89            Concurrency::ExclusiveSequential => "0",
90            Concurrency::SharedSequential => "1",
91            Concurrency::Shared => "n",
92        };
93        write!(f, "{value}")
94    }
95}
96
97#[derive(Default)]
98struct PriorityChanges {
99    next_offset: usize,
100    changes: Vec<PriorityChange>,
101}
102
103struct PriorityChange {
104    offset: usize,
105    http2_priority: u8,
106    concurrency: Concurrency,
107}
108
109impl PriorityChange {
110    fn get_http2_priority_chunk(&self) -> String {
111        format!("{}:{}", self.offset, self.get_http2_priority())
112    }
113
114    #[inline(always)]
115    fn get_http2_priority(&self) -> String {
116        format!("{}/{}", self.http2_priority, self.concurrency)
117    }
118
119    fn get_http3_priority_chunk(&self) -> String {
120        format!("{};{}", self.offset, self.get_http3_priority())
121    }
122
123    #[inline(always)]
124    fn get_http3_priority(&self) -> String {
125        // the lower urgency level the more priority it has over other frames
126        let urgency = match self.http2_priority {
127            0..=9 => 6,
128            10..=19 => 5,
129            20..=29 => 4,
130            30..=39 => 3,
131            40..=49 => 2,
132            50..=59 => 1,
133            60..=63 => 0,
134            _ => 7, // not supported
135        };
136
137        let (urgency, incremental) = match self.concurrency {
138            Concurrency::ExclusiveSequential => (urgency, false),
139            // Shared concurrency reduces priority (urgency is a reversed measure)
140            Concurrency::SharedSequential => (urgency + 1, false),
141            Concurrency::Shared => (urgency + 1, true),
142        };
143
144        if incremental {
145            format!("u={urgency};i")
146        } else {
147            format!("u={urgency};i=?0")
148        }
149    }
150}
151
152impl PriorityChanges {
153    fn add(&mut self, offset: usize, http2_priority: u8, concurrency: Concurrency) {
154        self.changes.push(PriorityChange {
155            offset: self.next_offset,
156            http2_priority,
157            concurrency,
158        });
159
160        self.next_offset = offset;
161    }
162
163    fn has_changes(&self) -> bool {
164        self.changes.len() > 1
165    }
166
167    fn get_http2_priority(&self) -> Result<String> {
168        self.changes
169            .first()
170            .map(PriorityChange::get_http2_priority)
171            .ok_or(Error::Format("no priority header"))
172    }
173
174    fn get_http2_priority_changes(&self) -> Result<String> {
175        let mut min_pos = 0;
176        let change = self
177            .changes
178            .iter()
179            // return changes w/o first priority
180            .skip(1)
181            .filter(|&p| {
182                // filter out small chunks
183                let keep = p.offset > min_pos;
184                min_pos = p.offset + MIN_H2_CHUNK_SIZE;
185                keep
186            })
187            .map(PriorityChange::get_http2_priority_chunk)
188            .collect::<Vec<String>>()
189            .join(",");
190
191        if change.is_empty() {
192            return Err(Error::Format("Can't find useful scans"));
193        }
194
195        Ok(change)
196    }
197
198    fn get_http3_priority(&self) -> Result<String> {
199        self.changes
200            .first()
201            .map(PriorityChange::get_http3_priority)
202            .ok_or(Error::Format("no priority header"))
203    }
204
205    fn get_rfc9218_priority_changes(&self) -> Result<String> {
206        let mut min_pos = 0;
207        let change = self
208            .changes
209            .iter()
210            // returns changes w/o first priority
211            .skip(1)
212            .filter(|&p| {
213                // filter out small chunks
214                let keep = p.offset > min_pos;
215                min_pos = p.offset + MIN_H3_CHUNK_SIZE;
216                keep
217            })
218            .map(PriorityChange::get_http3_priority_chunk)
219            .collect::<Vec<String>>()
220            .join(" ");
221
222        if change.is_empty() {
223            return Err(Error::Format("Can't find useful scans"));
224        }
225
226        Ok(format!("cf-chb=({change})"))
227    }
228}
229
230impl Scans {
231    /// Analyze an image file to find byte ranges of its metadata and progressive scans
232    pub fn from_file(input_file: &[u8]) -> Result<Self> {
233        match input_file {
234            [0xff, ..] => crate::jpeg::scans(input_file),
235            #[cfg(feature = "gif")]
236            [b'G', ..] => crate::gif::scans(input_file),
237            [0x89, ..] => crate::png::scans(input_file),
238            _ => Err(Error::Unsupported),
239        }
240    }
241
242    /// Assumes the HTTP2 priorities are:
243    ///
244    ///  * 50 = critical js/css, fonts + image metadata
245    ///  * 30 = regular js/css, followed by other images + JPEG DC
246    ///  * 20 = low-priority image bodies
247    ///  * 10 = idle
248    ///
249    /// Returns `cf-priority` and `cf-priority-change` header values, respectively.
250    pub fn cf_priority_change_headers(&self) -> Result<(String, String)> {
251        let priority_changes = self.get_priority_changes()?;
252        Ok((
253            priority_changes.get_http2_priority()?,
254            priority_changes.get_http2_priority_changes()?,
255        ))
256    }
257
258    /// <https://www.rfc-editor.org/rfc/rfc9218.html>
259    pub fn rfc9218_priority_change_headers(&self) -> Result<String> {
260        let priority_changes = self.get_priority_changes()?;
261        Ok(format!("{},{}",
262            priority_changes.get_http3_priority()?,
263            priority_changes.get_rfc9218_priority_changes()?,
264        ))
265    }
266
267    fn get_priority_changes(&self) -> Result<PriorityChanges> {
268        let mut metadata_end = self.metadata_end.unwrap_or(0);
269        let is_progressive = self
270            .first_scan_end
271            .map_or(false, |len| len < self.file_size / 2);
272
273        // if there's a fat color profile or Adobe Garbage,
274        // then sending "just" the metadata is not fast, and shouldn't be prioritized.
275        let fat_metadata_limit = ((self.file_size / 8) + 180).min(2000);
276        let fat_frame_start_limit = ((self.file_size / 8) + 500).min(8000);
277
278        // if the metadata can cheaply include first frame/scan info, then send both at once (reduce H/2 framing overhead)
279        let rendered_anything = self
280            .frame_render_start
281            .or(self.first_scan_end)
282            .or(self.good_scan_end);
283        if let Some(rendered_anything) = rendered_anything {
284            if rendered_anything < fat_metadata_limit
285                && rendered_anything < metadata_end + metadata_end / 8 + 100
286            {
287                metadata_end = rendered_anything;
288            }
289        }
290
291        // let mut chunks = Vec::with_capacity(8);
292        let mut priority_changes = PriorityChanges::default();
293
294        // This is important, because it decides when the whole image starts sending
295        if is_progressive && metadata_end < fat_metadata_limit {
296            // "50/0" fast and worth accelerating
297            priority_changes.add(metadata_end, 50, Concurrency::ExclusiveSequential);
298        } else if self.file_size < 1200
299            || is_progressive
300            || rendered_anything.map_or(false, |n| n < 2000)
301        {
302            // "30/1" at least we can show something quick
303            priority_changes.add(metadata_end, 30, Concurrency::SharedSequential);
304        } else {
305            // "21/n" lost cause: baseline image with bloated metadata
306            priority_changes.add(metadata_end, 21, Concurrency::Shared);
307        };
308
309        // Browsers don't always reserve space based on metadata availability alone,
310        // so here's trying again, with more data
311        if let Some(frame_render_start) = self.frame_render_start {
312            if frame_render_start > metadata_end
313                && self
314                    .first_scan_end
315                    .map_or(true, |dc| frame_render_start < dc)
316            {
317                if frame_render_start < fat_frame_start_limit {
318                    if frame_render_start < 1000 {
319                        priority_changes.add(frame_render_start, 50, Concurrency::SharedSequential);
320                    } else {
321                        priority_changes.add(frame_render_start, 40, Concurrency::SharedSequential);
322                    }
323                } else {
324                    priority_changes.add(frame_render_start, 30, Concurrency::Shared);
325                }
326            }
327        }
328
329        if let Some(first_scan_end) = self.first_scan_end {
330            // small DC can be downloaded in one go, to save on re-rendering
331            priority_changes.add(
332                first_scan_end,
333                30,
334                if first_scan_end < 25000 {
335                    Concurrency::ExclusiveSequential
336                } else {
337                    Concurrency::SharedSequential
338                },
339            );
340        }
341
342        if let Some(good_scan_end) = self.good_scan_end {
343            priority_changes.add(
344                good_scan_end,
345                20,
346                if good_scan_end < 100_000 {
347                    Concurrency::SharedSequential
348                } else {
349                    Concurrency::Shared
350                },
351            );
352        }
353
354        let rendered_already = self.first_scan_end.is_some() || self.good_scan_end.is_some();
355        let bytes_left = self
356            .file_size
357            .saturating_sub(self.good_scan_end.or(self.first_scan_end).unwrap_or(0));
358        let is_big = bytes_left > 80_000;
359        let is_tiny = bytes_left < 1_000;
360
361        let (priority, concurrency) = if rendered_already {
362            // if it's on screen, it's not urgent to send anything more
363            (
364                10,
365                if is_big {
366                    Concurrency::Shared
367                } else {
368                    Concurrency::SharedSequential
369                },
370            )
371        } else if is_tiny {
372            (30, Concurrency::SharedSequential)
373        } else if is_big {
374            (20, Concurrency::Shared)
375        } else {
376            (20, Concurrency::SharedSequential)
377        };
378
379        priority_changes.add(self.file_size, priority, concurrency);
380        if !priority_changes.has_changes() {
381            return Err(Error::Format("Can't find useful scans"));
382        }
383        Ok(priority_changes)
384    }
385}
386
387#[cfg(test)]
388fn s(a: &str, b: &str) -> (String, String) {
389    (a.into(), b.into())
390}
391
392#[test]
393fn test_baseline() {
394    {
395        let scans = Scans {
396            metadata_end: None,
397            frame_render_start: None,
398            first_scan_end: None,
399            good_scan_end: None,
400            file_size: 100_000,
401        };
402
403        let res = scans.cf_priority_change_headers();
404        assert!(res.is_err(), "expected error, h/2: {res:?}");
405
406        let res = scans.rfc9218_priority_change_headers();
407        assert!(res.is_err(), "expected error, h/3: {res:?}");
408    }
409
410    {
411        // regular baseline image
412        let scans = Scans {
413            metadata_end: Some(101),
414            frame_render_start: Some(181),
415            first_scan_end: None,
416            good_scan_end: None,
417            file_size: 100_000,
418        };
419
420        assert_eq!(
421            s("30/1", "181:20/n"),
422            scans.cf_priority_change_headers().unwrap(),
423            "regular baseline image, h/2"
424        );
425
426        assert_eq!(
427            "u=4;i=?0,cf-chb=(181;u=5;i)".to_string(),
428            scans.rfc9218_priority_change_headers().unwrap(),
429            "regular baseline image, h/3"
430        );
431    }
432
433    {
434        // tiny image
435        let scans = Scans {
436            metadata_end: Some(101),
437            frame_render_start: None,
438            first_scan_end: None,
439            good_scan_end: None,
440            file_size: 1000,
441        };
442
443        assert_eq!(
444            s("30/1", "101:20/1"),
445            scans.cf_priority_change_headers().unwrap(),
446            "tiny image, h/2"
447        );
448        assert_eq!(
449            "u=4;i=?0,cf-chb=(101;u=5;i=?0)".to_string(),
450            scans.rfc9218_priority_change_headers().unwrap(),
451            "tiny image, h/3"
452        );
453    }
454
455    {
456        // fat metadata
457        let scans = Scans {
458            metadata_end: Some(9999),
459            frame_render_start: None,
460            first_scan_end: None,
461            good_scan_end: None,
462            file_size: 100_000,
463        };
464
465        assert_eq!(
466            s("21/n", "9999:20/n"),
467            scans.cf_priority_change_headers().unwrap(),
468            "fat metadata, h/2"
469        );
470
471        assert_eq!(
472            "u=5;i,cf-chb=(9999;u=5;i)".to_string(),
473            scans.rfc9218_priority_change_headers().unwrap(),
474            "fat metadata, h/3"
475        );
476    }
477
478    {
479        // relatively fat metadata
480        let scans = Scans {
481            metadata_end: Some(1000),
482            frame_render_start: None,
483            first_scan_end: None,
484            good_scan_end: None,
485            file_size: 3000,
486        };
487        assert_eq!(
488            s("21/n", "1000:20/1"),
489            scans.cf_priority_change_headers().unwrap(),
490            "relatively fat metadata, h/2"
491        );
492
493        assert_eq!(
494            "u=5;i,cf-chb=(1000;u=5;i=?0)".to_string(),
495            scans.rfc9218_priority_change_headers().unwrap(),
496            "relatively fat metadata, h/3"
497        );
498    }
499}
500
501#[test]
502fn test_progressive() {
503    {
504        let scans = Scans {
505            metadata_end: Some(1_000),
506            frame_render_start: None,
507            first_scan_end: Some(10_000),
508            good_scan_end: Some(100_000),
509            file_size: 200_000,
510        };
511        assert_eq!(
512            s("50/0", "1000:30/0,10000:20/n,100000:10/n"),
513            scans.cf_priority_change_headers().unwrap(),
514            "scan, h/2"
515        );
516
517        assert_eq!(
518            "u=1;i=?0,cf-chb=(1000;u=3;i=?0 10000;u=5;i 100000;u=6;i)".to_string(),
519            scans.rfc9218_priority_change_headers().unwrap(),
520            "scan, h/3"
521        );
522    }
523
524    {
525        // fat metadata
526        let scans = Scans {
527            metadata_end: Some(4_000),
528            frame_render_start: None,
529            first_scan_end: Some(10_000),
530            good_scan_end: Some(100_000),
531            file_size: 200_000,
532        };
533
534        assert_eq!(
535            s("30/1", "4000:30/0,10000:20/n,100000:10/n"),
536            scans.cf_priority_change_headers().unwrap(),
537            "fat metadata, h/2"
538        );
539
540        assert_eq!(
541            "u=4;i=?0,cf-chb=(4000;u=3;i=?0 10000;u=5;i 100000;u=6;i)".to_string(),
542            scans.rfc9218_priority_change_headers().unwrap(),
543            "fat metadata, h/3"
544        );
545    }
546
547    {
548        // fat DC
549        let scans = Scans {
550            metadata_end: Some(1_000),
551            frame_render_start: None,
552            first_scan_end: Some(50_000),
553            good_scan_end: Some(100_000),
554            file_size: 200_000,
555        };
556
557        assert_eq!(
558            s("50/0", "1000:30/1,50000:20/n,100000:10/n"),
559            scans.cf_priority_change_headers().unwrap(),
560            "fat DC, h/2"
561        );
562
563        assert_eq!(
564            "u=1;i=?0,cf-chb=(1000;u=4;i=?0 50000;u=5;i 100000;u=6;i)".to_string(),
565            scans.rfc9218_priority_change_headers().unwrap(),
566            "fat DC, h/3"
567        );
568    }
569
570    {
571        // small good scan
572        let scans = Scans {
573            metadata_end: Some(1_000),
574            frame_render_start: None,
575            first_scan_end: Some(10_000),
576            good_scan_end: Some(11_000),
577            file_size: 200_000,
578        };
579
580        assert_eq!(
581            s("50/0", "1000:30/0,10000:20/1,11000:10/n"),
582            scans.cf_priority_change_headers().unwrap(),
583            "small good scan, h/2"
584        );
585
586        assert_eq!(
587            "u=1;i=?0,cf-chb=(1000;u=3;i=?0 10000;u=5;i=?0 11000;u=6;i)".to_string(),
588            scans.rfc9218_priority_change_headers().unwrap(),
589            "small good scan, h/3"
590        );
591    }
592}