use std::io::BufRead;
use std::marker::PhantomData;
use countio::Counter;
use url::Url;
use crate::{
parse::Parser,
record::{BYTE_LIMIT, RECORD_LIMIT},
Error,
};
pub struct TxtParser<R, D> {
record_type: PhantomData<D>,
pub(crate) reader: Counter<R>,
pub(crate) records: usize,
}
impl<R, D> TxtParser<R, D> {
pub(crate) fn from_reader(reader: R) -> Self {
Self {
record_type: PhantomData,
reader: Counter::new(reader),
records: 0,
}
}
pub fn get_ref(&self) -> &R {
self.reader.get_ref()
}
pub fn get_mut(&mut self) -> &mut R {
self.reader.get_mut()
}
pub fn into_inner(self) -> R {
self.reader.into_inner()
}
pub(crate) fn try_if_readable(&mut self) -> Result<(), Error> {
if self.records + 1 > RECORD_LIMIT {
return Err(Error::EntryLimit { over: 1 });
}
if self.reader.reader_bytes() > BYTE_LIMIT {
let over = self.reader.reader_bytes() - BYTE_LIMIT;
return Err(Error::ByteLimit { over });
}
Ok(())
}
}
impl<R: BufRead> Parser<R, Url> for TxtParser<R, Url> {
type Error = Error;
fn new(reader: R) -> Result<Self, Self::Error> {
Ok(Self::from_reader(reader))
}
fn read(&mut self) -> Result<Option<Url>, Self::Error> {
loop {
self.try_if_readable()?;
let mut buf = String::new();
if self.reader.read_line(&mut buf)? == 0 {
return Ok(None);
}
self.records += 1;
match Url::parse(buf.as_str()) {
Ok(address) => return Ok(Some(address)),
Err(_) => continue,
}
}
}
fn close(self) -> Result<R, Self::Error> {
Ok(self.into_inner())
}
}
impl<R, D> std::fmt::Debug for TxtParser<R, D> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("TxtParser")
.field("bytes", &self.reader.reader_bytes())
.field("records", &self.records)
.finish()
}
}
#[cfg(feature = "tokio")]
#[cfg_attr(docsrs, doc(cfg(feature = "tokio")))]
mod tokio {
use async_trait::async_trait;
use tokio::io::{AsyncBufRead, AsyncBufReadExt};
use url::Url;
use crate::{
parse::{AsyncParser, TxtParser},
Error,
};
#[async_trait]
impl<R: AsyncBufRead + Unpin + Send> AsyncParser<R, Url> for TxtParser<R, Url> {
type Error = Error;
async fn new(reader: R) -> Result<Self, Self::Error> {
Ok(Self::from_reader(reader))
}
async fn read(&mut self) -> Result<Option<Url>, Self::Error> {
loop {
self.try_if_readable()?;
let mut buf = String::new();
if self.reader.read_line(&mut buf).await? == 0 {
return Ok(None);
}
self.records += 1;
match Url::parse(buf.as_str()) {
Ok(address) => return Ok(Some(address)),
Err(_) => continue,
}
}
}
async fn close(self) -> Result<R, Self::Error> {
Ok(self.into_inner())
}
}
}
#[cfg(test)]
mod test {
use url::Url;
use crate::{parse::TxtParser, Error};
#[test]
fn synk() -> Result<(), Error> {
use crate::parse::Parser;
let buf = r#"https://www.example.com/file1.html
https://www.example.com/file2.html"#
.as_bytes();
let mut parser = TxtParser::new(buf)?;
let url = parser.read()?;
parser.close()?;
let exp = Url::parse("https://www.example.com/file1.html");
assert_eq!(url, exp.ok());
Ok(())
}
#[cfg(feature = "tokio")]
#[tokio::test]
async fn asynk() -> Result<(), Error> {
use crate::parse::AsyncParser;
let buf = r#"https://www.example.com/file1.html
https://www.example.com/file2.html"#
.as_bytes();
let mut parser = TxtParser::new(buf).await?;
let url = parser.read().await?;
parser.close().await?;
let exp = Url::parse("https://www.example.com/file1.html");
assert_eq!(url, exp.ok());
Ok(())
}
}