Skip to main content

agent_sdk_toolkit/workspace/
read_pipeline.rs

1//! Format-aware workspace read pipeline and metadata records. Use this module to
2//! detect file kind, choose bounded extraction behavior, and describe truncation or
3//! parser fallbacks. Pipeline functions read local files but must not leak raw binary
4//! content by default.
5//!
6use std::path::Path;
7
8use serde::{Deserialize, Serialize};
9
10#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
11/// Workspace workspace read detection request or result value.
12/// Creating the value does not touch the filesystem; workspace executors document read, write, edit, or search effects.
13pub struct WorkspaceReadDetection {
14    /// Kind/category for this record, capability, event, or detected
15    /// resource.
16    pub kind: WorkspaceFileKind,
17    /// Detected or declared MIME type used for reader selection and
18    /// provider-safe summaries.
19    pub mime_type: String,
20    /// Lowercase file extension used as one detection signal; it is not
21    /// trusted as sole authority.
22    pub extension: Option<String>,
23    /// Whether the input is treated as binary so raw bytes are not exposed by
24    /// default.
25    pub binary: bool,
26    /// Confidence level for file-kind detection.
27    pub confidence: WorkspaceFileTypeConfidence,
28}
29
30#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
31/// Workspace workspace media metadata request or result value.
32/// Creating the value does not touch the filesystem; workspace executors document read, write, edit, or search effects.
33pub struct WorkspaceMediaMetadata {
34    /// Detected media, document, archive, or parser format.
35    pub format: String,
36    /// Detected image or media width in pixels when available.
37    pub width: Option<u32>,
38    /// Detected image or media height in pixels when available.
39    pub height: Option<u32>,
40    /// Decoded image color type when the parser can determine it.
41    pub color_type: Option<String>,
42    /// Whether the parser decoded the media/document enough to produce
43    /// structured metadata.
44    pub decoded: bool,
45    /// Parser or fallback path that produced this metadata.
46    pub parser: String,
47    /// Descriptions of embedded previews discovered in RAW or container
48    /// media.
49    pub embedded_previews: Vec<WorkspaceEmbeddedPreviewMetadata>,
50    /// RAW sensor metadata discovered without demosaicing full image data.
51    pub raw_sensor: Option<WorkspaceRawSensorMetadata>,
52    /// Apple Photos adjustment sidecar metadata, when a sidecar is present.
53    pub apple_photos: Option<WorkspaceApplePhotosMetadata>,
54    /// Non-fatal warnings from bounded readers, parsers, or policy
55    /// downgrades.
56    pub warnings: Vec<String>,
57}
58
59#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
60/// Workspace workspace document metadata request or result value.
61/// Creating the value does not touch the filesystem; workspace executors document read, write, edit, or search effects.
62pub struct WorkspaceDocumentMetadata {
63    /// Parser or fallback path that produced this metadata.
64    pub parser: String,
65    /// Count of page items observed or included in this record.
66    pub page_count: Option<usize>,
67    /// Number of text characters extracted before truncation or parser
68    /// limits.
69    pub extracted_chars: usize,
70    /// OCR requirement or sidecar metadata for scanned PDFs/images.
71    pub ocr: Option<WorkspaceOcrMetadata>,
72    /// Non-fatal warnings from bounded readers, parsers, or policy
73    /// downgrades.
74    pub warnings: Vec<String>,
75}
76
77#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
78/// Workspace workspace ocr metadata request or result value.
79/// Creating the value does not touch the filesystem; workspace executors document read, write, edit, or search effects.
80pub struct WorkspaceOcrMetadata {
81    /// Parser or fallback path that produced this metadata.
82    pub parser: String,
83    /// Path to a sidecar file used for OCR, Apple Photos adjustments, or
84    /// legacy extraction.
85    pub sidecar_path: Option<String>,
86    /// Observed byte length for the source, sidecar, or extracted record.
87    pub byte_len: u64,
88    /// Number of text characters extracted before truncation or parser
89    /// limits.
90    pub extracted_chars: usize,
91    /// Whether output was shortened by byte, item, page, archive, or parser
92    /// limits.
93    pub truncated: bool,
94    /// Non-fatal warnings from bounded readers, parsers, or policy
95    /// downgrades.
96    pub warnings: Vec<String>,
97}
98
99#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
100/// Workspace workspace embedded preview metadata request or result value.
101/// Creating the value does not touch the filesystem; workspace executors document read, write, edit, or search effects.
102pub struct WorkspaceEmbeddedPreviewMetadata {
103    /// Detected or declared MIME type used for reader selection and
104    /// provider-safe summaries.
105    pub mime_type: String,
106    /// Observed byte length for the source, sidecar, or extracted record.
107    pub byte_len: u64,
108    /// Byte offset where this excerpt, prefix, or sample begins.
109    pub offset: Option<u64>,
110    /// Stable hash for the bytes or canonical payload used for stale checks
111    /// and fingerprints.
112    pub content_hash: String,
113}
114
115#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
116/// Workspace workspace raw sensor metadata request or result value.
117/// Creating the value does not touch the filesystem; workspace executors document read, write, edit, or search effects.
118pub struct WorkspaceRawSensorMetadata {
119    /// Optional bits per sample value.
120    /// When absent, callers should use the documented default or skip that optional behavior.
121    pub bits_per_sample: Option<u16>,
122    /// Optional compression value.
123    /// When absent, callers should use the documented default or skip that optional behavior.
124    pub compression: Option<u16>,
125    /// Optional photometric interpretation value.
126    /// When absent, callers should use the documented default or skip that optional behavior.
127    pub photometric_interpretation: Option<u16>,
128    /// Count of strip items observed or included in this record.
129    pub strip_count: usize,
130    /// strip byte len used for bounds checks, summaries, or truncation
131    /// evidence.
132    pub strip_byte_len: u64,
133    /// Whether decoded pixels is enabled.
134    /// Policy, validation, or routing code uses this flag to choose the explicit behavior.
135    pub decoded_pixels: bool,
136    /// Deterministic sample hash used for stale checks, package evidence, or
137    /// replay comparisons.
138    pub sample_hash: Option<String>,
139    /// Non-fatal warnings from bounded readers, parsers, or policy
140    /// downgrades.
141    pub warnings: Vec<String>,
142}
143
144#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
145/// Workspace workspace apple photos metadata request or result value.
146/// Creating the value does not touch the filesystem; workspace executors document read, write, edit, or search effects.
147pub struct WorkspaceApplePhotosMetadata {
148    /// Path to a sidecar file used for OCR, Apple Photos adjustments, or
149    /// legacy extraction.
150    pub sidecar_path: String,
151    /// Observed byte length for the source, sidecar, or extracted record.
152    pub byte_len: u64,
153    /// Count of adjustment items observed or included in this record.
154    pub adjustment_count: usize,
155    /// Parser or fallback path that produced this metadata.
156    pub parser: String,
157    /// Non-fatal warnings from bounded readers, parsers, or policy
158    /// downgrades.
159    pub warnings: Vec<String>,
160}
161
162#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
163/// Workspace workspace archive entry request or result value.
164/// Creating the value does not touch the filesystem; workspace executors document read, write, edit, or search effects.
165pub struct WorkspaceArchiveEntry {
166    /// Workspace-relative or resource path selected by the request or result.
167    pub path: String,
168    /// Observed byte length for the source, sidecar, or extracted record.
169    pub byte_len: u64,
170    /// Whether directory is enabled.
171    /// Policy, validation, or routing code uses this flag to choose the explicit behavior.
172    pub directory: bool,
173}
174
175#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
176/// Workspace workspace archive metadata request or result value.
177/// Creating the value does not touch the filesystem; workspace executors document read, write, edit, or search effects.
178pub struct WorkspaceArchiveMetadata {
179    /// Parser or fallback path that produced this metadata.
180    pub parser: String,
181    /// Count of entry items observed or included in this record.
182    pub entry_count: usize,
183    /// Bounded entries included in this record. Limits and truncation are
184    /// represented by companion metadata when applicable.
185    pub entries: Vec<WorkspaceArchiveEntry>,
186    /// Whether output was shortened by byte, item, page, archive, or parser
187    /// limits.
188    pub truncated: bool,
189    /// Non-fatal warnings from bounded readers, parsers, or policy
190    /// downgrades.
191    pub warnings: Vec<String>,
192}
193
194#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
195/// Workspace workspace sqlite table metadata request or result value.
196/// Creating the value does not touch the filesystem; workspace executors document read, write, edit, or search effects.
197pub struct WorkspaceSqliteTableMetadata {
198    /// Human-readable or protocol-visible name for this SDK item.
199    pub name: String,
200    /// Kind/category for this record, capability, event, or detected
201    /// resource.
202    pub kind: String,
203    /// Bounded columns included in this record. Limits and truncation are
204    /// represented by companion metadata when applicable.
205    pub columns: Vec<String>,
206    /// Bounded sample rows included in this record. Limits and truncation are
207    /// represented by companion metadata when applicable.
208    pub sample_rows: Vec<Vec<String>>,
209}
210
211#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
212/// Workspace workspace sqlite metadata request or result value.
213/// Creating the value does not touch the filesystem; workspace executors document read, write, edit, or search effects.
214pub struct WorkspaceSqliteMetadata {
215    /// Parser or fallback path that produced this metadata.
216    pub parser: String,
217    /// Count of table items observed or included in this record.
218    pub table_count: usize,
219    /// Bounded tables included in this record. Limits and truncation are
220    /// represented by companion metadata when applicable.
221    pub tables: Vec<WorkspaceSqliteTableMetadata>,
222    /// Whether output was shortened by byte, item, page, archive, or parser
223    /// limits.
224    pub truncated: bool,
225    /// Non-fatal warnings from bounded readers, parsers, or policy
226    /// downgrades.
227    pub warnings: Vec<String>,
228}
229
230#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
231/// Workspace workspace resource metadata request or result value.
232/// Creating the value does not touch the filesystem; workspace executors document read, write, edit, or search effects.
233pub struct WorkspaceResourceMetadata {
234    /// URI scheme resolved by the resource reader.
235    pub scheme: String,
236    /// Source label or ref for this item; it is metadata and does not fetch
237    /// content by itself.
238    pub source: String,
239    /// Observed byte length for the source, sidecar, or extracted record.
240    pub byte_len: u64,
241    /// Parser or fallback path that produced this metadata.
242    pub parser: String,
243    /// Non-fatal warnings from bounded readers, parsers, or policy
244    /// downgrades.
245    pub warnings: Vec<String>,
246}
247
248#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
249#[serde(rename_all = "snake_case")]
250/// Enumerates the finite workspace file kind cases.
251/// Serialized names are part of the SDK contract; update fixtures when variants change.
252pub enum WorkspaceFileKind {
253    /// Use this variant when the contract needs to represent text; selecting it has no side effect by itself.
254    Text,
255    /// Use this variant when the contract needs to represent markdown; selecting it has no side effect by itself.
256    Markdown,
257    /// Use this variant when the contract needs to represent json; selecting it has no side effect by itself.
258    Json,
259    /// Use this variant when the contract needs to represent pdf; selecting it has no side effect by itself.
260    Pdf,
261    /// Use this variant when the contract needs to represent image; selecting it has no side effect by itself.
262    Image,
263    /// Use this variant when the contract needs to represent raw image; selecting it has no side effect by itself.
264    RawImage,
265    /// Use this variant when the contract needs to represent office document; selecting it has no side effect by itself.
266    OfficeDocument,
267    /// Use this variant when the contract needs to represent archive; selecting it has no side effect by itself.
268    Archive,
269    /// Use this variant when the contract needs to represent sqlite database; selecting it has no side effect by itself.
270    SqliteDatabase,
271    /// Use this variant when the contract needs to represent url resource; selecting it has no side effect by itself.
272    UrlResource,
273    /// Use this variant when the contract needs to represent binary; selecting it has no side effect by itself.
274    Binary,
275}
276
277#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
278#[serde(rename_all = "snake_case")]
279/// Enumerates the finite workspace file type confidence cases.
280/// Serialized names are part of the SDK contract; update fixtures when variants change.
281pub enum WorkspaceFileTypeConfidence {
282    /// Use this variant when the contract needs to represent magic; selecting it has no side effect by itself.
283    Magic,
284    /// Use this variant when the contract needs to represent extension; selecting it has no side effect by itself.
285    Extension,
286    /// Use this variant when the contract needs to represent utf8; selecting it has no side effect by itself.
287    Utf8,
288    /// Use this variant when the contract needs to represent fallback; selecting it has no side effect by itself.
289    Fallback,
290}
291
292#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
293#[serde(rename_all = "snake_case")]
294/// Enumerates the finite workspace reader step cases.
295/// Serialized names are part of the SDK contract; update fixtures when variants change.
296pub enum WorkspaceReaderStep {
297    /// Use this variant when the contract needs to represent detect file type; selecting it has no side effect by itself.
298    DetectFileType,
299    /// Use this variant when the contract needs to represent decode utf8 text; selecting it has no side effect by itself.
300    DecodeUtf8Text,
301    /// Use this variant when the contract needs to represent extract pdf text; selecting it has no side effect by itself.
302    ExtractPdfText,
303    /// Use this variant when the contract needs to represent inspect image metadata; selecting it has no side effect by itself.
304    InspectImageMetadata,
305    /// Use this variant when the contract needs to represent inspect raw image metadata; selecting it has no side effect by itself.
306    InspectRawImageMetadata,
307    /// Use this variant when the contract needs to represent inspect raw preview; selecting it has no side effect by itself.
308    InspectRawPreview,
309    /// Use this variant when the contract needs to represent inspect apple photos adjustments; selecting it has no side effect by itself.
310    InspectApplePhotosAdjustments,
311    /// Use this variant when the contract needs to represent apply ocr fallback; selecting it has no side effect by itself.
312    ApplyOcrFallback,
313    /// Use this variant when the contract needs to represent extract office text; selecting it has no side effect by itself.
314    ExtractOfficeText,
315    /// Use this variant when the contract needs to represent extract legacy office text; selecting it has no side effect by itself.
316    ExtractLegacyOfficeText,
317    /// Use this variant when the contract needs to represent list archive entries; selecting it has no side effect by itself.
318    ListArchiveEntries,
319    /// Use this variant when the contract needs to represent inspect sqlite database; selecting it has no side effect by itself.
320    InspectSqliteDatabase,
321    /// Use this variant when the contract needs to represent read data url; selecting it has no side effect by itself.
322    ReadDataUrl,
323    /// Use this variant when the contract needs to represent fail closed external resource; selecting it has no side effect by itself.
324    FailClosedExternalResource,
325    /// Use this variant when the contract needs to represent read bounded prefix; selecting it has no side effect by itself.
326    ReadBoundedPrefix,
327    /// Use this variant when the contract needs to represent summarize binary; selecting it has no side effect by itself.
328    SummarizeBinary,
329}
330
331/// Detect workspace file.
332/// This inspects the path and byte prefix to choose a reader route and performs no I/O.
333pub fn detect_workspace_file(path: &Path, bytes: &[u8]) -> WorkspaceReadDetection {
334    let extension = path
335        .extension()
336        .and_then(|extension| extension.to_str())
337        .map(|extension| extension.to_ascii_lowercase());
338
339    if bytes.starts_with(b"%PDF-") {
340        return detected(
341            WorkspaceFileKind::Pdf,
342            "application/pdf",
343            extension,
344            true,
345            WorkspaceFileTypeConfidence::Magic,
346        );
347    }
348
349    if bytes.starts_with(b"SQLite format 3\0") {
350        return detected(
351            WorkspaceFileKind::SqliteDatabase,
352            "application/vnd.sqlite3",
353            extension,
354            true,
355            WorkspaceFileTypeConfidence::Magic,
356        );
357    }
358
359    if let Some((kind, mime_type)) = detect_magic_image(bytes, extension.as_deref()) {
360        return detected(
361            kind,
362            mime_type,
363            extension,
364            true,
365            WorkspaceFileTypeConfidence::Magic,
366        );
367    }
368
369    if bytes.starts_with(b"PK\x03\x04") {
370        return match extension.as_deref() {
371            Some("docx") => detected(
372                WorkspaceFileKind::OfficeDocument,
373                "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
374                extension,
375                true,
376                WorkspaceFileTypeConfidence::Magic,
377            ),
378            Some("xlsx") => detected(
379                WorkspaceFileKind::OfficeDocument,
380                "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
381                extension,
382                true,
383                WorkspaceFileTypeConfidence::Magic,
384            ),
385            Some("pptx") => detected(
386                WorkspaceFileKind::OfficeDocument,
387                "application/vnd.openxmlformats-officedocument.presentationml.presentation",
388                extension,
389                true,
390                WorkspaceFileTypeConfidence::Magic,
391            ),
392            _ => detected(
393                WorkspaceFileKind::Archive,
394                "application/zip",
395                extension,
396                true,
397                WorkspaceFileTypeConfidence::Magic,
398            ),
399        };
400    }
401
402    if bytes.starts_with(b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1") {
403        return detected(
404            WorkspaceFileKind::OfficeDocument,
405            "application/vnd.ms-office",
406            extension,
407            true,
408            WorkspaceFileTypeConfidence::Magic,
409        );
410    }
411
412    if bytes.starts_with(b"\x1f\x8b") {
413        return detected(
414            WorkspaceFileKind::Archive,
415            "application/gzip",
416            extension,
417            true,
418            WorkspaceFileTypeConfidence::Magic,
419        );
420    }
421
422    if is_tar_archive(bytes) {
423        return detected(
424            WorkspaceFileKind::Archive,
425            "application/x-tar",
426            extension,
427            true,
428            WorkspaceFileTypeConfidence::Magic,
429        );
430    }
431
432    if is_raw_extension(extension.as_deref()) {
433        return detected(
434            WorkspaceFileKind::RawImage,
435            "image/x-raw",
436            extension,
437            true,
438            WorkspaceFileTypeConfidence::Extension,
439        );
440    }
441
442    if let Some((kind, mime_type)) = detect_extension_kind(extension.as_deref()) {
443        if matches!(
444            kind,
445            WorkspaceFileKind::Text | WorkspaceFileKind::Markdown | WorkspaceFileKind::Json
446        ) && !is_utf8_text(bytes)
447        {
448            return detected(
449                WorkspaceFileKind::Binary,
450                "application/octet-stream",
451                extension,
452                true,
453                WorkspaceFileTypeConfidence::Fallback,
454            );
455        }
456        let binary = !matches!(
457            kind,
458            WorkspaceFileKind::Text | WorkspaceFileKind::Markdown | WorkspaceFileKind::Json
459        );
460        return detected(
461            kind,
462            mime_type,
463            extension,
464            binary,
465            WorkspaceFileTypeConfidence::Extension,
466        );
467    }
468
469    if is_utf8_text(bytes) {
470        return detected(
471            WorkspaceFileKind::Text,
472            "text/plain; charset=utf-8",
473            extension,
474            false,
475            WorkspaceFileTypeConfidence::Utf8,
476        );
477    }
478
479    detected(
480        WorkspaceFileKind::Binary,
481        "application/octet-stream",
482        extension,
483        true,
484        WorkspaceFileTypeConfidence::Fallback,
485    )
486}
487
488fn detected(
489    kind: WorkspaceFileKind,
490    mime_type: &str,
491    extension: Option<String>,
492    binary: bool,
493    confidence: WorkspaceFileTypeConfidence,
494) -> WorkspaceReadDetection {
495    WorkspaceReadDetection {
496        kind,
497        mime_type: mime_type.to_string(),
498        extension,
499        binary,
500        confidence,
501    }
502}
503
504fn detect_magic_image(
505    bytes: &[u8],
506    extension: Option<&str>,
507) -> Option<(WorkspaceFileKind, &'static str)> {
508    if bytes.starts_with(b"\x89PNG\r\n\x1a\n") {
509        return Some((WorkspaceFileKind::Image, "image/png"));
510    }
511    if bytes.starts_with(b"\xff\xd8\xff") {
512        return Some((WorkspaceFileKind::Image, "image/jpeg"));
513    }
514    if bytes.starts_with(b"GIF87a") || bytes.starts_with(b"GIF89a") {
515        return Some((WorkspaceFileKind::Image, "image/gif"));
516    }
517    if bytes.starts_with(b"II*\0") || bytes.starts_with(b"MM\0*") {
518        if is_raw_extension(extension) {
519            return Some((WorkspaceFileKind::RawImage, "image/x-raw"));
520        }
521        return Some((WorkspaceFileKind::Image, "image/tiff"));
522    }
523    if bytes.len() >= 12 && bytes.starts_with(b"RIFF") && &bytes[8..12] == b"WEBP" {
524        return Some((WorkspaceFileKind::Image, "image/webp"));
525    }
526    if bytes.len() >= 12 && &bytes[4..8] == b"ftyp" {
527        let brand = &bytes[8..12];
528        if matches!(
529            brand,
530            b"heic" | b"heix" | b"hevc" | b"hevx" | b"mif1" | b"msf1"
531        ) {
532            return Some((WorkspaceFileKind::Image, "image/heic"));
533        }
534        if matches!(brand, b"avif" | b"avis") {
535            return Some((WorkspaceFileKind::Image, "image/avif"));
536        }
537        if matches!(brand, b"crx " | b"crx2" | b"crx3") {
538            return Some((WorkspaceFileKind::RawImage, "image/x-canon-cr3"));
539        }
540    }
541    None
542}
543
544fn detect_extension_kind(extension: Option<&str>) -> Option<(WorkspaceFileKind, &'static str)> {
545    match extension? {
546        "md" | "markdown" => Some((WorkspaceFileKind::Markdown, "text/markdown; charset=utf-8")),
547        "json" => Some((WorkspaceFileKind::Json, "application/json")),
548        "txt" | "rs" | "toml" | "yaml" | "yml" | "js" | "ts" | "tsx" | "jsx" | "py" | "go"
549        | "java" | "c" | "cc" | "cpp" | "h" | "hpp" | "swift" | "sh" | "zsh" | "bash" => {
550            Some((WorkspaceFileKind::Text, "text/plain; charset=utf-8"))
551        }
552        "pdf" => Some((WorkspaceFileKind::Pdf, "application/pdf")),
553        "png" => Some((WorkspaceFileKind::Image, "image/png")),
554        "jpg" | "jpeg" => Some((WorkspaceFileKind::Image, "image/jpeg")),
555        "gif" => Some((WorkspaceFileKind::Image, "image/gif")),
556        "webp" => Some((WorkspaceFileKind::Image, "image/webp")),
557        "heic" | "heif" => Some((WorkspaceFileKind::Image, "image/heic")),
558        "avif" => Some((WorkspaceFileKind::Image, "image/avif")),
559        "tif" | "tiff" => Some((WorkspaceFileKind::Image, "image/tiff")),
560        "doc" | "docx" | "xls" | "xlsx" | "ppt" | "pptx" | "rtf" | "epub" => Some((
561            WorkspaceFileKind::OfficeDocument,
562            "application/octet-stream",
563        )),
564        "sqlite" | "sqlite3" | "db" => {
565            Some((WorkspaceFileKind::SqliteDatabase, "application/vnd.sqlite3"))
566        }
567        "zip" | "tar" | "tgz" | "gz" => {
568            Some((WorkspaceFileKind::Archive, "application/octet-stream"))
569        }
570        _ => None,
571    }
572}
573
574fn is_raw_extension(extension: Option<&str>) -> bool {
575    matches!(
576        extension,
577        Some(
578            "dng"
579                | "cr2"
580                | "cr3"
581                | "nef"
582                | "arw"
583                | "raf"
584                | "rw2"
585                | "orf"
586                | "pef"
587                | "srw"
588                | "x3f"
589                | "erf"
590                | "kdc"
591        )
592    )
593}
594
595fn is_utf8_text(bytes: &[u8]) -> bool {
596    std::str::from_utf8(bytes).is_ok() && !bytes.contains(&0)
597}
598
599fn is_tar_archive(bytes: &[u8]) -> bool {
600    bytes.len() > 262 && bytes.get(257..262) == Some(b"ustar".as_slice())
601}