pub struct Record<T: BodyKind> { /* private fields */ }Expand description
A single WARC record.
A record can be constructed by a RecordBuilder, or by reading from a stream.
The associated type T indicates the representation of this record’s body.
A record is guaranteed to be valid according to the specification it conforms to, except:
- The validity of the WARC-Record-ID header is not checked
- Date information not in the UTC timezone will be silently converted to UTC
Use the Display trait to generate the formatted representation.
Implementations§
Source§impl<T: BodyKind> Record<T>
impl<T: BodyKind> Record<T>
Sourcepub fn new() -> Record<EmptyBody>
pub fn new() -> Record<EmptyBody>
Create a new empty record with default values.
Using a RecordBuilder is more efficient when creating records from known data.
The record returned contains an empty body, and the following fields:
- WARC-Record-ID: generated by
generate_record_id() - WARC-Date: the current moment in time
- WARC-Type: resource
- WARC-Content-Length: 0
Examples found in repository?
5fn main() -> Result<(), std::io::Error> {
6 let date = Utc::now();
7 let body = format!("wrote to the file on {}", date);
8 let body = body.into_bytes();
9
10 let mut headers = Record::<BufferedBody>::new();
11 headers.set_warc_type(RecordType::WarcInfo);
12 headers.set_date(date);
13 headers
14 .set_header(WarcHeader::IPAddress, "127.0.0.1")
15 .expect("BUG: should be a valid IP address");
16 let record = headers.add_body(body);
17
18 let mut file = WarcWriter::from_path("warc_example.warc")?;
19
20 let bytes_written = file.write(&record)?;
21
22 println!("{} bytes written.", bytes_written);
23
24 Ok(())
25}Sourcepub fn with_body<B: Into<Vec<u8>>>(body: B) -> Record<BufferedBody>
pub fn with_body<B: Into<Vec<u8>>>(body: B) -> Record<BufferedBody>
Create a new empty record with a known body.
Using a RecordBuilder is more efficient when creating records from known data.
The record returned contains the passed body buffer, and the following fields:
- WARC-Record-ID: generated by
generate_record_id() - WARC-Date: the current moment in time
- WARC-Type: resource
- WARC-Content-Length:
body.len()
Sourcepub fn generate_record_id() -> String
pub fn generate_record_id() -> String
Generate and return a new value suitable for use in the WARC-Record-ID header.
§Compatibility
The standard only places a small number of constraints on this field:
- This value is globally unique “for its period of use”
- This value is a valid URI
- This value “clearly indicate[s] a documented and registered scheme to which it conforms.”
These guarantees will be upheld by all generated outputs, where the “period of use” is presumed to be indefinite and unlimited.
However, any specific algorithm used to generate values is not part of the crate’s public API for purposes of semantic versioning.
§Implementation
The current implementation generates random values based on UUID version 4.
Examples found in repository?
6fn main() {
7 let body = "hello warc! 👋".to_owned();
8
9 let headers = RawRecordHeader {
10 version: "1.0".to_owned(),
11 headers: vec![
12 (
13 WarcHeader::RecordID,
14 Record::<BufferedBody>::generate_record_id().into_bytes(),
15 ),
16 (
17 WarcHeader::WarcType,
18 RecordType::WarcInfo.to_string().into_bytes(),
19 ),
20 (
21 WarcHeader::Date,
22 Utc::now()
23 .to_rfc3339_opts(SecondsFormat::Secs, true)
24 .into_bytes(),
25 ),
26 (WarcHeader::IPAddress, "127.0.0.1".to_owned().into_bytes()),
27 (
28 WarcHeader::ContentLength,
29 body.len().to_string().into_bytes(),
30 ),
31 ]
32 .into_iter()
33 .collect(),
34 };
35
36 println!("{}{}", headers, body);
37}More examples
6fn main() -> Result<(), std::io::Error> {
7 let date = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
8 let body = format!("wrote to the file on {}", date);
9 let body = body.into_bytes();
10
11 let headers = RawRecordHeader {
12 version: "1.0".to_owned(),
13 headers: vec![
14 (
15 WarcHeader::RecordID,
16 Record::<BufferedBody>::generate_record_id().into_bytes(),
17 ),
18 (
19 WarcHeader::WarcType,
20 RecordType::WarcInfo.to_string().into_bytes(),
21 ),
22 (WarcHeader::Date, date.into_bytes()),
23 (WarcHeader::IPAddress, "127.0.0.1".to_owned().into_bytes()),
24 (
25 WarcHeader::ContentLength,
26 body.len().to_string().into_bytes(),
27 ),
28 ]
29 .into_iter()
30 .collect(),
31 };
32
33 let mut file = WarcWriter::from_path("warc_example.warc")?;
34
35 let bytes_written = file.write_raw(headers, &body)?;
36
37 println!("{} bytes written.", bytes_written);
38
39 Ok(())
40}6fn main() -> Result<(), std::io::Error> {
7 let date = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
8 let body = format!("wrote to the file on {}", date);
9 let body = body.into_bytes();
10
11 let headers = RawRecordHeader {
12 version: "1.0".to_owned(),
13 headers: vec![
14 (
15 WarcHeader::RecordID,
16 Record::<BufferedBody>::generate_record_id().into_bytes(),
17 ),
18 (
19 WarcHeader::WarcType,
20 RecordType::WarcInfo.to_string().into_bytes(),
21 ),
22 (WarcHeader::Date, date.into_bytes()),
23 (WarcHeader::IPAddress, "127.0.0.1".to_owned().into_bytes()),
24 (
25 WarcHeader::ContentLength,
26 body.len().to_string().into_bytes(),
27 ),
28 ]
29 .into_iter()
30 .collect(),
31 };
32
33 let mut file = WarcWriter::from_path_gzip("warc_example.warc.gz")?;
34
35 let bytes_written = file.write_raw(headers, &body)?;
36
37 // NB: the compression stream must be finish()ed, or the file will be truncated
38 let gzip_stream = file.into_inner()?;
39 gzip_stream.finish().into_result()?;
40
41 println!("{} bytes written.", bytes_written);
42
43 Ok(())
44}Sourcepub fn warc_version(&self) -> &str
pub fn warc_version(&self) -> &str
Return the WARC version string of this record.
Sourcepub fn set_warc_version<S: Into<String>>(&mut self, id: S)
pub fn set_warc_version<S: Into<String>>(&mut self, id: S)
Set the WARC version string of this record.
Sourcepub fn warc_id(&self) -> &str
pub fn warc_id(&self) -> &str
Return the WARC-Record-ID header for this record.
Examples found in repository?
4fn main() -> Result<(), std::io::Error> {
5 let file = WarcReader::from_path("warc_example.warc")?;
6
7 let mut count = 0;
8 for record in file.iter_records() {
9 count += 1;
10 match record {
11 Err(err) => println!("ERROR: {}\r\n", err),
12 Ok(record) => {
13 println!("{}: {}", WarcHeader::RecordID, record.warc_id(),);
14 println!("{}: {}", WarcHeader::Date, record.date(),);
15 println!();
16 }
17 }
18 }
19
20 println!("Total records: {}", count);
21
22 Ok(())
23}More examples
4fn main() -> Result<(), std::io::Error> {
5 let file = WarcReader::from_path_gzip("warc_example.warc.gz")?;
6
7 let mut count = 0;
8 for record in file.iter_records() {
9 count += 1;
10 match record {
11 Err(err) => println!("ERROR: {}\r\n", err),
12 Ok(record) => {
13 println!("{}: {}", WarcHeader::RecordID, record.warc_id());
14 println!("{}: {}", WarcHeader::Date, record.date());
15 println!();
16 }
17 }
18 }
19
20 println!("Total records: {}", count);
21
22 Ok(())
23}Sourcepub fn set_warc_id<S: Into<String>>(&mut self, id: S)
pub fn set_warc_id<S: Into<String>>(&mut self, id: S)
Set the WARC-Record-ID header for this record.
Note that this value is not checked for validity.
Sourcepub fn warc_type(&self) -> &RecordType
pub fn warc_type(&self) -> &RecordType
Return the WARC-Type header for this record.
Sourcepub fn set_warc_type(&mut self, type_: RecordType)
pub fn set_warc_type(&mut self, type_: RecordType)
Set the WARC-Type header for this record.
Examples found in repository?
5fn main() -> Result<(), std::io::Error> {
6 let date = Utc::now();
7 let body = format!("wrote to the file on {}", date);
8 let body = body.into_bytes();
9
10 let mut headers = Record::<BufferedBody>::new();
11 headers.set_warc_type(RecordType::WarcInfo);
12 headers.set_date(date);
13 headers
14 .set_header(WarcHeader::IPAddress, "127.0.0.1")
15 .expect("BUG: should be a valid IP address");
16 let record = headers.add_body(body);
17
18 let mut file = WarcWriter::from_path("warc_example.warc")?;
19
20 let bytes_written = file.write(&record)?;
21
22 println!("{} bytes written.", bytes_written);
23
24 Ok(())
25}Sourcepub fn date(&self) -> &DateTime<Utc>
pub fn date(&self) -> &DateTime<Utc>
Return the WARC-Date header for this record.
Examples found in repository?
4fn main() -> Result<(), std::io::Error> {
5 let file = WarcReader::from_path("warc_example.warc")?;
6
7 let mut count = 0;
8 for record in file.iter_records() {
9 count += 1;
10 match record {
11 Err(err) => println!("ERROR: {}\r\n", err),
12 Ok(record) => {
13 println!("{}: {}", WarcHeader::RecordID, record.warc_id(),);
14 println!("{}: {}", WarcHeader::Date, record.date(),);
15 println!();
16 }
17 }
18 }
19
20 println!("Total records: {}", count);
21
22 Ok(())
23}More examples
4fn main() -> Result<(), std::io::Error> {
5 let file = WarcReader::from_path_gzip("warc_example.warc.gz")?;
6
7 let mut count = 0;
8 for record in file.iter_records() {
9 count += 1;
10 match record {
11 Err(err) => println!("ERROR: {}\r\n", err),
12 Ok(record) => {
13 println!("{}: {}", WarcHeader::RecordID, record.warc_id());
14 println!("{}: {}", WarcHeader::Date, record.date());
15 println!();
16 }
17 }
18 }
19
20 println!("Total records: {}", count);
21
22 Ok(())
23}Sourcepub fn set_date(&mut self, date: DateTime<Utc>)
pub fn set_date(&mut self, date: DateTime<Utc>)
Set the WARC-Date header for this record.
Examples found in repository?
5fn main() -> Result<(), std::io::Error> {
6 let date = Utc::now();
7 let body = format!("wrote to the file on {}", date);
8 let body = body.into_bytes();
9
10 let mut headers = Record::<BufferedBody>::new();
11 headers.set_warc_type(RecordType::WarcInfo);
12 headers.set_date(date);
13 headers
14 .set_header(WarcHeader::IPAddress, "127.0.0.1")
15 .expect("BUG: should be a valid IP address");
16 let record = headers.add_body(body);
17
18 let mut file = WarcWriter::from_path("warc_example.warc")?;
19
20 let bytes_written = file.write(&record)?;
21
22 println!("{} bytes written.", bytes_written);
23
24 Ok(())
25}Sourcepub fn truncated_type(&self) -> &Option<TruncatedType>
pub fn truncated_type(&self) -> &Option<TruncatedType>
Return the WARC-Truncated header for this record.
Sourcepub fn set_truncated_type(&mut self, truncated_type: TruncatedType)
pub fn set_truncated_type(&mut self, truncated_type: TruncatedType)
Set the WARC-Truncated header for this record.
Sourcepub fn clear_truncated_type(&mut self)
pub fn clear_truncated_type(&mut self)
Remove the WARC-Truncated header for this record.
Sourcepub fn header(&self, header: WarcHeader) -> Option<Cow<'_, str>>
pub fn header(&self, header: WarcHeader) -> Option<Cow<'_, str>>
Return the WARC header requested if present in this record, or None.
Examples found in repository?
10fn main() -> std::io::Result<()> {
11 let mut args = std::env::args_os().skip(1);
12
13 let warc_name = args
14 .next()
15 .ok_or_else(|| usage_err!("compressed warc filename not supplied"))?;
16
17 let filtered_file_names: Vec<_> = args.map(|s| s.to_string_lossy().to_string()).collect();
18 if filtered_file_names.is_empty() {
19 Err(usage_err!("one or more filtered file names not supplied"))?;
20 }
21
22 let mut file = WarcReader::from_path_gzip(warc_name)?;
23
24 let mut count = 0;
25 let mut skipped = 0;
26 let mut stream_iter = file.stream_records();
27 while let Some(record) = stream_iter.next_item() {
28 let record = record.expect("read of headers ok");
29 count += 1;
30 match record.header(WarcHeader::TargetURI).map(|s| s.to_string()) {
31 Some(v) if has_matching_filename(&v, &filtered_file_names) => {
32 println!("Matches filename, skipping record");
33 skipped += 1;
34 }
35 _ => {
36 let buffered = record.into_buffered().expect("read of record ok");
37 println!(
38 "Found record. Data:\n{}",
39 String::from_utf8_lossy(buffered.body())
40 );
41 }
42 }
43 }
44
45 println!("Total records: {}\nSkipped records: {}", count, skipped);
46
47 Ok(())
48}Sourcepub fn set_header<V>(
&mut self,
header: WarcHeader,
value: V,
) -> Result<Option<Cow<'_, str>>, WarcError>
pub fn set_header<V>( &mut self, header: WarcHeader, value: V, ) -> Result<Option<Cow<'_, str>>, WarcError>
Set a WARC header in this record, returning the previous value if present.
§Errors
If setting a header whose value has a well-formedness test, an error is returned if the value is not well-formed.
Examples found in repository?
5fn main() -> Result<(), std::io::Error> {
6 let date = Utc::now();
7 let body = format!("wrote to the file on {}", date);
8 let body = body.into_bytes();
9
10 let mut headers = Record::<BufferedBody>::new();
11 headers.set_warc_type(RecordType::WarcInfo);
12 headers.set_date(date);
13 headers
14 .set_header(WarcHeader::IPAddress, "127.0.0.1")
15 .expect("BUG: should be a valid IP address");
16 let record = headers.add_body(body);
17
18 let mut file = WarcWriter::from_path("warc_example.warc")?;
19
20 let bytes_written = file.write(&record)?;
21
22 println!("{} bytes written.", bytes_written);
23
24 Ok(())
25}Sourcepub fn content_length(&self) -> u64
pub fn content_length(&self) -> u64
Return the Content-Length header for this record.
This value is guaranteed to match the actual length of the body.
Source§impl Record<EmptyBody>
impl Record<EmptyBody>
Sourcepub fn add_body<B: Into<Vec<u8>>>(self, body: B) -> Record<BufferedBody>
pub fn add_body<B: Into<Vec<u8>>>(self, body: B) -> Record<BufferedBody>
Add a known body to this record, transforming it into a buffered body record.
Examples found in repository?
5fn main() -> Result<(), std::io::Error> {
6 let date = Utc::now();
7 let body = format!("wrote to the file on {}", date);
8 let body = body.into_bytes();
9
10 let mut headers = Record::<BufferedBody>::new();
11 headers.set_warc_type(RecordType::WarcInfo);
12 headers.set_date(date);
13 headers
14 .set_header(WarcHeader::IPAddress, "127.0.0.1")
15 .expect("BUG: should be a valid IP address");
16 let record = headers.add_body(body);
17
18 let mut file = WarcWriter::from_path("warc_example.warc")?;
19
20 let bytes_written = file.write(&record)?;
21
22 println!("{} bytes written.", bytes_written);
23
24 Ok(())
25}Sourcepub fn add_fixed_stream<'r, R: Read + 'r>(
self,
stream: &'r mut R,
len: &'r mut u64,
) -> Result<Record<StreamingBody<'r, R>>>
pub fn add_fixed_stream<'r, R: Read + 'r>( self, stream: &'r mut R, len: &'r mut u64, ) -> Result<Record<StreamingBody<'r, R>>>
Add a streaming body to this record, whose expected size may not match the actual stream length.
Source§impl Record<BufferedBody>
impl Record<BufferedBody>
Sourcepub fn strip_body(self) -> Record<EmptyBody>
pub fn strip_body(self) -> Record<EmptyBody>
Strip the body from this record.
Sourcepub fn body(&self) -> &[u8] ⓘ
pub fn body(&self) -> &[u8] ⓘ
Return the body of this record.
Examples found in repository?
10fn main() -> std::io::Result<()> {
11 let mut args = std::env::args_os().skip(1);
12
13 let warc_name = args
14 .next()
15 .ok_or_else(|| usage_err!("compressed warc filename not supplied"))?;
16
17 let filtered_file_names: Vec<_> = args.map(|s| s.to_string_lossy().to_string()).collect();
18 if filtered_file_names.is_empty() {
19 Err(usage_err!("one or more filtered file names not supplied"))?;
20 }
21
22 let mut file = WarcReader::from_path_gzip(warc_name)?;
23
24 let mut count = 0;
25 let mut skipped = 0;
26 let mut stream_iter = file.stream_records();
27 while let Some(record) = stream_iter.next_item() {
28 let record = record.expect("read of headers ok");
29 count += 1;
30 match record.header(WarcHeader::TargetURI).map(|s| s.to_string()) {
31 Some(v) if has_matching_filename(&v, &filtered_file_names) => {
32 println!("Matches filename, skipping record");
33 skipped += 1;
34 }
35 _ => {
36 let buffered = record.into_buffered().expect("read of record ok");
37 println!(
38 "Found record. Data:\n{}",
39 String::from_utf8_lossy(buffered.body())
40 );
41 }
42 }
43 }
44
45 println!("Total records: {}\nSkipped records: {}", count, skipped);
46
47 Ok(())
48}Sourcepub fn body_mut(&mut self) -> &mut [u8] ⓘ
pub fn body_mut(&mut self) -> &mut [u8] ⓘ
Return a reference to mutate the body of this record, but without changing its length.
To update the body of the record or change its length, use the replace_body method
instead.
Sourcepub fn replace_body<V: Into<Vec<u8>>>(&mut self, new_body: V)
pub fn replace_body<V: Into<Vec<u8>>>(&mut self, new_body: V)
Replace the body of this record with the given body.
Sourcepub fn into_raw_parts(self) -> (RawRecordHeader, Vec<u8>)
pub fn into_raw_parts(self) -> (RawRecordHeader, Vec<u8>)
Transform this record into a raw record containing the same data.
Source§impl<'t, T: Read + 't> Record<StreamingBody<'t, T>>
impl<'t, T: Read + 't> Record<StreamingBody<'t, T>>
Sourcepub fn into_buffered(self) -> Result<Record<BufferedBody>>
pub fn into_buffered(self) -> Result<Record<BufferedBody>>
Returns a record with a buffered body by collecting the streaming body.
§Errors
This method can fail if the underlying stream returns an error. If this happens, the state of the stream is not guaranteed.
Examples found in repository?
10fn main() -> std::io::Result<()> {
11 let mut args = std::env::args_os().skip(1);
12
13 let warc_name = args
14 .next()
15 .ok_or_else(|| usage_err!("compressed warc filename not supplied"))?;
16
17 let filtered_file_names: Vec<_> = args.map(|s| s.to_string_lossy().to_string()).collect();
18 if filtered_file_names.is_empty() {
19 Err(usage_err!("one or more filtered file names not supplied"))?;
20 }
21
22 let mut file = WarcReader::from_path_gzip(warc_name)?;
23
24 let mut count = 0;
25 let mut skipped = 0;
26 let mut stream_iter = file.stream_records();
27 while let Some(record) = stream_iter.next_item() {
28 let record = record.expect("read of headers ok");
29 count += 1;
30 match record.header(WarcHeader::TargetURI).map(|s| s.to_string()) {
31 Some(v) if has_matching_filename(&v, &filtered_file_names) => {
32 println!("Matches filename, skipping record");
33 skipped += 1;
34 }
35 _ => {
36 let buffered = record.into_buffered().expect("read of record ok");
37 println!(
38 "Found record. Data:\n{}",
39 String::from_utf8_lossy(buffered.body())
40 );
41 }
42 }
43 }
44
45 println!("Total records: {}\nSkipped records: {}", count, skipped);
46
47 Ok(())
48}Trait Implementations§
Source§impl Clone for Record<BufferedBody>
impl Clone for Record<BufferedBody>
Source§impl Default for Record<BufferedBody>
impl Default for Record<BufferedBody>
Source§impl Display for Record<BufferedBody>
impl Display for Record<BufferedBody>
Source§impl<'t, T: Read + 't> Read for Record<StreamingBody<'t, T>>
impl<'t, T: Read + 't> Read for Record<StreamingBody<'t, T>>
Source§fn read(&mut self, dst: &mut [u8]) -> Result<usize, Error>
fn read(&mut self, dst: &mut [u8]) -> Result<usize, Error>
1.36.0 · Source§fn read_vectored(&mut self, bufs: &mut [IoSliceMut<'_>]) -> Result<usize, Error>
fn read_vectored(&mut self, bufs: &mut [IoSliceMut<'_>]) -> Result<usize, Error>
read, except that it reads into a slice of buffers. Read moreSource§fn is_read_vectored(&self) -> bool
fn is_read_vectored(&self) -> bool
can_vector)1.0.0 · Source§fn read_to_end(&mut self, buf: &mut Vec<u8>) -> Result<usize, Error>
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> Result<usize, Error>
buf. Read more1.0.0 · Source§fn read_to_string(&mut self, buf: &mut String) -> Result<usize, Error>
fn read_to_string(&mut self, buf: &mut String) -> Result<usize, Error>
buf. Read more1.6.0 · Source§fn read_exact(&mut self, buf: &mut [u8]) -> Result<(), Error>
fn read_exact(&mut self, buf: &mut [u8]) -> Result<(), Error>
buf. Read moreSource§fn read_buf(&mut self, buf: BorrowedCursor<'_>) -> Result<(), Error>
fn read_buf(&mut self, buf: BorrowedCursor<'_>) -> Result<(), Error>
read_buf)Source§fn read_buf_exact(&mut self, cursor: BorrowedCursor<'_>) -> Result<(), Error>
fn read_buf_exact(&mut self, cursor: BorrowedCursor<'_>) -> Result<(), Error>
read_buf)cursor. Read more1.0.0 · Source§fn by_ref(&mut self) -> &mut Selfwhere
Self: Sized,
fn by_ref(&mut self) -> &mut Selfwhere
Self: Sized,
Read. Read more