agent_sdk_toolkit/workspace/read_pipeline.rs
1//! Format-aware workspace read pipeline and metadata records. Use this module to
2//! detect file kind, choose bounded extraction behavior, and describe truncation or
3//! parser fallbacks. Pipeline functions read local files but must not leak raw binary
4//! content by default.
5//!
6use std::path::Path;
7
8use serde::{Deserialize, Serialize};
9
10#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
11/// Workspace workspace read detection request or result value.
12/// Creating the value does not touch the filesystem; workspace executors document read, write, edit, or search effects.
13pub struct WorkspaceReadDetection {
14 /// Kind/category for this record, capability, event, or detected
15 /// resource.
16 pub kind: WorkspaceFileKind,
17 /// Detected or declared MIME type used for reader selection and
18 /// provider-safe summaries.
19 pub mime_type: String,
20 /// Lowercase file extension used as one detection signal; it is not
21 /// trusted as sole authority.
22 pub extension: Option<String>,
23 /// Whether the input is treated as binary so raw bytes are not exposed by
24 /// default.
25 pub binary: bool,
26 /// Confidence level for file-kind detection.
27 pub confidence: WorkspaceFileTypeConfidence,
28}
29
30#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
31/// Workspace workspace media metadata request or result value.
32/// Creating the value does not touch the filesystem; workspace executors document read, write, edit, or search effects.
33pub struct WorkspaceMediaMetadata {
34 /// Detected media, document, archive, or parser format.
35 pub format: String,
36 /// Detected image or media width in pixels when available.
37 pub width: Option<u32>,
38 /// Detected image or media height in pixels when available.
39 pub height: Option<u32>,
40 /// Decoded image color type when the parser can determine it.
41 pub color_type: Option<String>,
42 /// Whether the parser decoded the media/document enough to produce
43 /// structured metadata.
44 pub decoded: bool,
45 /// Parser or fallback path that produced this metadata.
46 pub parser: String,
47 /// Descriptions of embedded previews discovered in RAW or container
48 /// media.
49 pub embedded_previews: Vec<WorkspaceEmbeddedPreviewMetadata>,
50 /// RAW sensor metadata discovered without demosaicing full image data.
51 pub raw_sensor: Option<WorkspaceRawSensorMetadata>,
52 /// Apple Photos adjustment sidecar metadata, when a sidecar is present.
53 pub apple_photos: Option<WorkspaceApplePhotosMetadata>,
54 /// Non-fatal warnings from bounded readers, parsers, or policy
55 /// downgrades.
56 pub warnings: Vec<String>,
57}
58
59#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
60/// Workspace workspace document metadata request or result value.
61/// Creating the value does not touch the filesystem; workspace executors document read, write, edit, or search effects.
62pub struct WorkspaceDocumentMetadata {
63 /// Parser or fallback path that produced this metadata.
64 pub parser: String,
65 /// Count of page items observed or included in this record.
66 pub page_count: Option<usize>,
67 /// Number of text characters extracted before truncation or parser
68 /// limits.
69 pub extracted_chars: usize,
70 /// OCR requirement or sidecar metadata for scanned PDFs/images.
71 pub ocr: Option<WorkspaceOcrMetadata>,
72 /// Non-fatal warnings from bounded readers, parsers, or policy
73 /// downgrades.
74 pub warnings: Vec<String>,
75}
76
77#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
78/// Workspace workspace ocr metadata request or result value.
79/// Creating the value does not touch the filesystem; workspace executors document read, write, edit, or search effects.
80pub struct WorkspaceOcrMetadata {
81 /// Parser or fallback path that produced this metadata.
82 pub parser: String,
83 /// Path to a sidecar file used for OCR, Apple Photos adjustments, or
84 /// legacy extraction.
85 pub sidecar_path: Option<String>,
86 /// Observed byte length for the source, sidecar, or extracted record.
87 pub byte_len: u64,
88 /// Number of text characters extracted before truncation or parser
89 /// limits.
90 pub extracted_chars: usize,
91 /// Whether output was shortened by byte, item, page, archive, or parser
92 /// limits.
93 pub truncated: bool,
94 /// Non-fatal warnings from bounded readers, parsers, or policy
95 /// downgrades.
96 pub warnings: Vec<String>,
97}
98
99#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
100/// Workspace workspace embedded preview metadata request or result value.
101/// Creating the value does not touch the filesystem; workspace executors document read, write, edit, or search effects.
102pub struct WorkspaceEmbeddedPreviewMetadata {
103 /// Detected or declared MIME type used for reader selection and
104 /// provider-safe summaries.
105 pub mime_type: String,
106 /// Observed byte length for the source, sidecar, or extracted record.
107 pub byte_len: u64,
108 /// Byte offset where this excerpt, prefix, or sample begins.
109 pub offset: Option<u64>,
110 /// Stable hash for the bytes or canonical payload used for stale checks
111 /// and fingerprints.
112 pub content_hash: String,
113}
114
115#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
116/// Workspace workspace raw sensor metadata request or result value.
117/// Creating the value does not touch the filesystem; workspace executors document read, write, edit, or search effects.
118pub struct WorkspaceRawSensorMetadata {
119 /// Optional bits per sample value.
120 /// When absent, callers should use the documented default or skip that optional behavior.
121 pub bits_per_sample: Option<u16>,
122 /// Optional compression value.
123 /// When absent, callers should use the documented default or skip that optional behavior.
124 pub compression: Option<u16>,
125 /// Optional photometric interpretation value.
126 /// When absent, callers should use the documented default or skip that optional behavior.
127 pub photometric_interpretation: Option<u16>,
128 /// Count of strip items observed or included in this record.
129 pub strip_count: usize,
130 /// strip byte len used for bounds checks, summaries, or truncation
131 /// evidence.
132 pub strip_byte_len: u64,
133 /// Whether decoded pixels is enabled.
134 /// Policy, validation, or routing code uses this flag to choose the explicit behavior.
135 pub decoded_pixels: bool,
136 /// Deterministic sample hash used for stale checks, package evidence, or
137 /// replay comparisons.
138 pub sample_hash: Option<String>,
139 /// Non-fatal warnings from bounded readers, parsers, or policy
140 /// downgrades.
141 pub warnings: Vec<String>,
142}
143
144#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
145/// Workspace workspace apple photos metadata request or result value.
146/// Creating the value does not touch the filesystem; workspace executors document read, write, edit, or search effects.
147pub struct WorkspaceApplePhotosMetadata {
148 /// Path to a sidecar file used for OCR, Apple Photos adjustments, or
149 /// legacy extraction.
150 pub sidecar_path: String,
151 /// Observed byte length for the source, sidecar, or extracted record.
152 pub byte_len: u64,
153 /// Count of adjustment items observed or included in this record.
154 pub adjustment_count: usize,
155 /// Parser or fallback path that produced this metadata.
156 pub parser: String,
157 /// Non-fatal warnings from bounded readers, parsers, or policy
158 /// downgrades.
159 pub warnings: Vec<String>,
160}
161
162#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
163/// Workspace workspace archive entry request or result value.
164/// Creating the value does not touch the filesystem; workspace executors document read, write, edit, or search effects.
165pub struct WorkspaceArchiveEntry {
166 /// Workspace-relative or resource path selected by the request or result.
167 pub path: String,
168 /// Observed byte length for the source, sidecar, or extracted record.
169 pub byte_len: u64,
170 /// Whether directory is enabled.
171 /// Policy, validation, or routing code uses this flag to choose the explicit behavior.
172 pub directory: bool,
173}
174
175#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
176/// Workspace workspace archive metadata request or result value.
177/// Creating the value does not touch the filesystem; workspace executors document read, write, edit, or search effects.
178pub struct WorkspaceArchiveMetadata {
179 /// Parser or fallback path that produced this metadata.
180 pub parser: String,
181 /// Count of entry items observed or included in this record.
182 pub entry_count: usize,
183 /// Bounded entries included in this record. Limits and truncation are
184 /// represented by companion metadata when applicable.
185 pub entries: Vec<WorkspaceArchiveEntry>,
186 /// Whether output was shortened by byte, item, page, archive, or parser
187 /// limits.
188 pub truncated: bool,
189 /// Non-fatal warnings from bounded readers, parsers, or policy
190 /// downgrades.
191 pub warnings: Vec<String>,
192}
193
194#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
195/// Workspace workspace sqlite table metadata request or result value.
196/// Creating the value does not touch the filesystem; workspace executors document read, write, edit, or search effects.
197pub struct WorkspaceSqliteTableMetadata {
198 /// Human-readable or protocol-visible name for this SDK item.
199 pub name: String,
200 /// Kind/category for this record, capability, event, or detected
201 /// resource.
202 pub kind: String,
203 /// Bounded columns included in this record. Limits and truncation are
204 /// represented by companion metadata when applicable.
205 pub columns: Vec<String>,
206 /// Bounded sample rows included in this record. Limits and truncation are
207 /// represented by companion metadata when applicable.
208 pub sample_rows: Vec<Vec<String>>,
209}
210
211#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
212/// Workspace workspace sqlite metadata request or result value.
213/// Creating the value does not touch the filesystem; workspace executors document read, write, edit, or search effects.
214pub struct WorkspaceSqliteMetadata {
215 /// Parser or fallback path that produced this metadata.
216 pub parser: String,
217 /// Count of table items observed or included in this record.
218 pub table_count: usize,
219 /// Bounded tables included in this record. Limits and truncation are
220 /// represented by companion metadata when applicable.
221 pub tables: Vec<WorkspaceSqliteTableMetadata>,
222 /// Whether output was shortened by byte, item, page, archive, or parser
223 /// limits.
224 pub truncated: bool,
225 /// Non-fatal warnings from bounded readers, parsers, or policy
226 /// downgrades.
227 pub warnings: Vec<String>,
228}
229
230#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
231/// Workspace workspace resource metadata request or result value.
232/// Creating the value does not touch the filesystem; workspace executors document read, write, edit, or search effects.
233pub struct WorkspaceResourceMetadata {
234 /// URI scheme resolved by the resource reader.
235 pub scheme: String,
236 /// Source label or ref for this item; it is metadata and does not fetch
237 /// content by itself.
238 pub source: String,
239 /// Observed byte length for the source, sidecar, or extracted record.
240 pub byte_len: u64,
241 /// Parser or fallback path that produced this metadata.
242 pub parser: String,
243 /// Non-fatal warnings from bounded readers, parsers, or policy
244 /// downgrades.
245 pub warnings: Vec<String>,
246}
247
248#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
249#[serde(rename_all = "snake_case")]
250/// Enumerates the finite workspace file kind cases.
251/// Serialized names are part of the SDK contract; update fixtures when variants change.
252pub enum WorkspaceFileKind {
253 /// Use this variant when the contract needs to represent text; selecting it has no side effect by itself.
254 Text,
255 /// Use this variant when the contract needs to represent markdown; selecting it has no side effect by itself.
256 Markdown,
257 /// Use this variant when the contract needs to represent json; selecting it has no side effect by itself.
258 Json,
259 /// Use this variant when the contract needs to represent pdf; selecting it has no side effect by itself.
260 Pdf,
261 /// Use this variant when the contract needs to represent image; selecting it has no side effect by itself.
262 Image,
263 /// Use this variant when the contract needs to represent raw image; selecting it has no side effect by itself.
264 RawImage,
265 /// Use this variant when the contract needs to represent office document; selecting it has no side effect by itself.
266 OfficeDocument,
267 /// Use this variant when the contract needs to represent archive; selecting it has no side effect by itself.
268 Archive,
269 /// Use this variant when the contract needs to represent sqlite database; selecting it has no side effect by itself.
270 SqliteDatabase,
271 /// Use this variant when the contract needs to represent url resource; selecting it has no side effect by itself.
272 UrlResource,
273 /// Use this variant when the contract needs to represent binary; selecting it has no side effect by itself.
274 Binary,
275}
276
277#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
278#[serde(rename_all = "snake_case")]
279/// Enumerates the finite workspace file type confidence cases.
280/// Serialized names are part of the SDK contract; update fixtures when variants change.
281pub enum WorkspaceFileTypeConfidence {
282 /// Use this variant when the contract needs to represent magic; selecting it has no side effect by itself.
283 Magic,
284 /// Use this variant when the contract needs to represent extension; selecting it has no side effect by itself.
285 Extension,
286 /// Use this variant when the contract needs to represent utf8; selecting it has no side effect by itself.
287 Utf8,
288 /// Use this variant when the contract needs to represent fallback; selecting it has no side effect by itself.
289 Fallback,
290}
291
292#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
293#[serde(rename_all = "snake_case")]
294/// Enumerates the finite workspace reader step cases.
295/// Serialized names are part of the SDK contract; update fixtures when variants change.
296pub enum WorkspaceReaderStep {
297 /// Use this variant when the contract needs to represent detect file type; selecting it has no side effect by itself.
298 DetectFileType,
299 /// Use this variant when the contract needs to represent decode utf8 text; selecting it has no side effect by itself.
300 DecodeUtf8Text,
301 /// Use this variant when the contract needs to represent extract pdf text; selecting it has no side effect by itself.
302 ExtractPdfText,
303 /// Use this variant when the contract needs to represent inspect image metadata; selecting it has no side effect by itself.
304 InspectImageMetadata,
305 /// Use this variant when the contract needs to represent inspect raw image metadata; selecting it has no side effect by itself.
306 InspectRawImageMetadata,
307 /// Use this variant when the contract needs to represent inspect raw preview; selecting it has no side effect by itself.
308 InspectRawPreview,
309 /// Use this variant when the contract needs to represent inspect apple photos adjustments; selecting it has no side effect by itself.
310 InspectApplePhotosAdjustments,
311 /// Use this variant when the contract needs to represent apply ocr fallback; selecting it has no side effect by itself.
312 ApplyOcrFallback,
313 /// Use this variant when the contract needs to represent extract office text; selecting it has no side effect by itself.
314 ExtractOfficeText,
315 /// Use this variant when the contract needs to represent extract legacy office text; selecting it has no side effect by itself.
316 ExtractLegacyOfficeText,
317 /// Use this variant when the contract needs to represent list archive entries; selecting it has no side effect by itself.
318 ListArchiveEntries,
319 /// Use this variant when the contract needs to represent inspect sqlite database; selecting it has no side effect by itself.
320 InspectSqliteDatabase,
321 /// Use this variant when the contract needs to represent read data url; selecting it has no side effect by itself.
322 ReadDataUrl,
323 /// Use this variant when the contract needs to represent fail closed external resource; selecting it has no side effect by itself.
324 FailClosedExternalResource,
325 /// Use this variant when the contract needs to represent read bounded prefix; selecting it has no side effect by itself.
326 ReadBoundedPrefix,
327 /// Use this variant when the contract needs to represent summarize binary; selecting it has no side effect by itself.
328 SummarizeBinary,
329}
330
331/// Detect workspace file.
332/// This inspects the path and byte prefix to choose a reader route and performs no I/O.
333pub fn detect_workspace_file(path: &Path, bytes: &[u8]) -> WorkspaceReadDetection {
334 let extension = path
335 .extension()
336 .and_then(|extension| extension.to_str())
337 .map(|extension| extension.to_ascii_lowercase());
338
339 if bytes.starts_with(b"%PDF-") {
340 return detected(
341 WorkspaceFileKind::Pdf,
342 "application/pdf",
343 extension,
344 true,
345 WorkspaceFileTypeConfidence::Magic,
346 );
347 }
348
349 if bytes.starts_with(b"SQLite format 3\0") {
350 return detected(
351 WorkspaceFileKind::SqliteDatabase,
352 "application/vnd.sqlite3",
353 extension,
354 true,
355 WorkspaceFileTypeConfidence::Magic,
356 );
357 }
358
359 if let Some((kind, mime_type)) = detect_magic_image(bytes, extension.as_deref()) {
360 return detected(
361 kind,
362 mime_type,
363 extension,
364 true,
365 WorkspaceFileTypeConfidence::Magic,
366 );
367 }
368
369 if bytes.starts_with(b"PK\x03\x04") {
370 return match extension.as_deref() {
371 Some("docx") => detected(
372 WorkspaceFileKind::OfficeDocument,
373 "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
374 extension,
375 true,
376 WorkspaceFileTypeConfidence::Magic,
377 ),
378 Some("xlsx") => detected(
379 WorkspaceFileKind::OfficeDocument,
380 "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
381 extension,
382 true,
383 WorkspaceFileTypeConfidence::Magic,
384 ),
385 Some("pptx") => detected(
386 WorkspaceFileKind::OfficeDocument,
387 "application/vnd.openxmlformats-officedocument.presentationml.presentation",
388 extension,
389 true,
390 WorkspaceFileTypeConfidence::Magic,
391 ),
392 _ => detected(
393 WorkspaceFileKind::Archive,
394 "application/zip",
395 extension,
396 true,
397 WorkspaceFileTypeConfidence::Magic,
398 ),
399 };
400 }
401
402 if bytes.starts_with(b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1") {
403 return detected(
404 WorkspaceFileKind::OfficeDocument,
405 "application/vnd.ms-office",
406 extension,
407 true,
408 WorkspaceFileTypeConfidence::Magic,
409 );
410 }
411
412 if bytes.starts_with(b"\x1f\x8b") {
413 return detected(
414 WorkspaceFileKind::Archive,
415 "application/gzip",
416 extension,
417 true,
418 WorkspaceFileTypeConfidence::Magic,
419 );
420 }
421
422 if is_tar_archive(bytes) {
423 return detected(
424 WorkspaceFileKind::Archive,
425 "application/x-tar",
426 extension,
427 true,
428 WorkspaceFileTypeConfidence::Magic,
429 );
430 }
431
432 if is_raw_extension(extension.as_deref()) {
433 return detected(
434 WorkspaceFileKind::RawImage,
435 "image/x-raw",
436 extension,
437 true,
438 WorkspaceFileTypeConfidence::Extension,
439 );
440 }
441
442 if let Some((kind, mime_type)) = detect_extension_kind(extension.as_deref()) {
443 if matches!(
444 kind,
445 WorkspaceFileKind::Text | WorkspaceFileKind::Markdown | WorkspaceFileKind::Json
446 ) && !is_utf8_text(bytes)
447 {
448 return detected(
449 WorkspaceFileKind::Binary,
450 "application/octet-stream",
451 extension,
452 true,
453 WorkspaceFileTypeConfidence::Fallback,
454 );
455 }
456 let binary = !matches!(
457 kind,
458 WorkspaceFileKind::Text | WorkspaceFileKind::Markdown | WorkspaceFileKind::Json
459 );
460 return detected(
461 kind,
462 mime_type,
463 extension,
464 binary,
465 WorkspaceFileTypeConfidence::Extension,
466 );
467 }
468
469 if is_utf8_text(bytes) {
470 return detected(
471 WorkspaceFileKind::Text,
472 "text/plain; charset=utf-8",
473 extension,
474 false,
475 WorkspaceFileTypeConfidence::Utf8,
476 );
477 }
478
479 detected(
480 WorkspaceFileKind::Binary,
481 "application/octet-stream",
482 extension,
483 true,
484 WorkspaceFileTypeConfidence::Fallback,
485 )
486}
487
488fn detected(
489 kind: WorkspaceFileKind,
490 mime_type: &str,
491 extension: Option<String>,
492 binary: bool,
493 confidence: WorkspaceFileTypeConfidence,
494) -> WorkspaceReadDetection {
495 WorkspaceReadDetection {
496 kind,
497 mime_type: mime_type.to_string(),
498 extension,
499 binary,
500 confidence,
501 }
502}
503
504fn detect_magic_image(
505 bytes: &[u8],
506 extension: Option<&str>,
507) -> Option<(WorkspaceFileKind, &'static str)> {
508 if bytes.starts_with(b"\x89PNG\r\n\x1a\n") {
509 return Some((WorkspaceFileKind::Image, "image/png"));
510 }
511 if bytes.starts_with(b"\xff\xd8\xff") {
512 return Some((WorkspaceFileKind::Image, "image/jpeg"));
513 }
514 if bytes.starts_with(b"GIF87a") || bytes.starts_with(b"GIF89a") {
515 return Some((WorkspaceFileKind::Image, "image/gif"));
516 }
517 if bytes.starts_with(b"II*\0") || bytes.starts_with(b"MM\0*") {
518 if is_raw_extension(extension) {
519 return Some((WorkspaceFileKind::RawImage, "image/x-raw"));
520 }
521 return Some((WorkspaceFileKind::Image, "image/tiff"));
522 }
523 if bytes.len() >= 12 && bytes.starts_with(b"RIFF") && &bytes[8..12] == b"WEBP" {
524 return Some((WorkspaceFileKind::Image, "image/webp"));
525 }
526 if bytes.len() >= 12 && &bytes[4..8] == b"ftyp" {
527 let brand = &bytes[8..12];
528 if matches!(
529 brand,
530 b"heic" | b"heix" | b"hevc" | b"hevx" | b"mif1" | b"msf1"
531 ) {
532 return Some((WorkspaceFileKind::Image, "image/heic"));
533 }
534 if matches!(brand, b"avif" | b"avis") {
535 return Some((WorkspaceFileKind::Image, "image/avif"));
536 }
537 if matches!(brand, b"crx " | b"crx2" | b"crx3") {
538 return Some((WorkspaceFileKind::RawImage, "image/x-canon-cr3"));
539 }
540 }
541 None
542}
543
544fn detect_extension_kind(extension: Option<&str>) -> Option<(WorkspaceFileKind, &'static str)> {
545 match extension? {
546 "md" | "markdown" => Some((WorkspaceFileKind::Markdown, "text/markdown; charset=utf-8")),
547 "json" => Some((WorkspaceFileKind::Json, "application/json")),
548 "txt" | "rs" | "toml" | "yaml" | "yml" | "js" | "ts" | "tsx" | "jsx" | "py" | "go"
549 | "java" | "c" | "cc" | "cpp" | "h" | "hpp" | "swift" | "sh" | "zsh" | "bash" => {
550 Some((WorkspaceFileKind::Text, "text/plain; charset=utf-8"))
551 }
552 "pdf" => Some((WorkspaceFileKind::Pdf, "application/pdf")),
553 "png" => Some((WorkspaceFileKind::Image, "image/png")),
554 "jpg" | "jpeg" => Some((WorkspaceFileKind::Image, "image/jpeg")),
555 "gif" => Some((WorkspaceFileKind::Image, "image/gif")),
556 "webp" => Some((WorkspaceFileKind::Image, "image/webp")),
557 "heic" | "heif" => Some((WorkspaceFileKind::Image, "image/heic")),
558 "avif" => Some((WorkspaceFileKind::Image, "image/avif")),
559 "tif" | "tiff" => Some((WorkspaceFileKind::Image, "image/tiff")),
560 "doc" | "docx" | "xls" | "xlsx" | "ppt" | "pptx" | "rtf" | "epub" => Some((
561 WorkspaceFileKind::OfficeDocument,
562 "application/octet-stream",
563 )),
564 "sqlite" | "sqlite3" | "db" => {
565 Some((WorkspaceFileKind::SqliteDatabase, "application/vnd.sqlite3"))
566 }
567 "zip" | "tar" | "tgz" | "gz" => {
568 Some((WorkspaceFileKind::Archive, "application/octet-stream"))
569 }
570 _ => None,
571 }
572}
573
574fn is_raw_extension(extension: Option<&str>) -> bool {
575 matches!(
576 extension,
577 Some(
578 "dng"
579 | "cr2"
580 | "cr3"
581 | "nef"
582 | "arw"
583 | "raf"
584 | "rw2"
585 | "orf"
586 | "pef"
587 | "srw"
588 | "x3f"
589 | "erf"
590 | "kdc"
591 )
592 )
593}
594
595fn is_utf8_text(bytes: &[u8]) -> bool {
596 std::str::from_utf8(bytes).is_ok() && !bytes.contains(&0)
597}
598
599fn is_tar_archive(bytes: &[u8]) -> bool {
600 bytes.len() > 262 && bytes.get(257..262) == Some(b"ustar".as_slice())
601}