use log::trace;
use std::collections::HashMap;
use crate::crawling::ExitCode;
use crate::crawling::Request;
use crate::database::crawler::structs::CrawlerDatabase;
use crate::database::error::SmuggleDatabaseErrorExtension;
use crate::database::id::CrawlLogEntryId;
use crate::database::id::NumericDatabseId;
use crate::database::id::RequestId;
use crate::database::DatabaseError;
use crate::database::sqlite_helper::*;
impl CrawlerDatabase {
pub fn store_request(
&mut self,
request: &Request
) -> Result<RequestId, DatabaseError> {
self.base().assert_writable("store_request")?;
trace!("crawler_db.store_request()");
let url_id = self.base_mut().get_or_add_url_id(&request.url)?;
let mut statement = self.connection().prepare_cached(
"INSERT INTO request (
crawl_log_id,
url_id,
time_sent_unix_utc,
robotstxt_approved,
request_duration_ms,
exit_code,
server_last_modified_unix_utc,
http_status_code,
http_etag
) Values (?,?,?,?,?, ?,?,?,?)
")?;
statement.execute((
request.crawl_log_entry,
url_id,
request.time_sent.timestamp(),
request.robotstxt_approved,
request.request_duration_ms,
request.exit_code.to_number(),
to_unix_timestamp_opt(request.server_last_modified),
request.http.as_ref().map(|h| h.status_code),
request.http.as_ref().map(|h| h.etag.clone()),
))?;
return Ok(RequestId::new(self.connection().last_insert_rowid()));
}
pub fn get_request(&self, request_id: RequestId) -> Result<Request, DatabaseError> {
trace!("crawler_db.get_request()");
self.connection().query_row("
SELECT
crawl_log_id,
url_id,
time_sent_unix_utc,
request_duration_ms,
robotstxt_approved,
exit_code,
server_last_modified_unix_utc,
http_status_code,
http_etag
FROM request
WHERE request_id = ?
", (request_id,), |row| {
Ok(Request{
crawl_log_entry: CrawlLogEntryId::new(row.get(0)?),
url: self.base().get_url_by_id(row.get(1)?)
.smuggle_through_rusqlite()?,
time_sent: from_unix_timestamp_or_epoch(row.get(2)?),
request_duration_ms: row.get(3)?,
robotstxt_approved: row.get(4)?,
exit_code: ExitCode::from_number(row.get(5)?),
server_last_modified: from_unix_timestamp_opt(row.get(6)?),
http: http_summary_from_row(row,7,8)?,
})
}).map_err(Into::into)
}
pub fn get_request_bulk(
&self, request_ids: &[RequestId]
) -> Result<HashMap<RequestId,Request>, DatabaseError> {
trace!("crawler_db.get_request_bulk()");
let mut get_request_statement = self.connection().prepare("
SELECT
crawl_log_id,
url_id,
time_sent_unix_utc,
request_duration_ms,
robotstxt_approved,
exit_code,
server_last_modified_unix_utc,
http_status_code,
http_etag
FROM request
WHERE request_id = ?
")?;
let mut results = HashMap::with_capacity(request_ids.len());
for request_id in request_ids {
results.insert(
*request_id,
get_request_statement.query_row((request_id,), |row| {
Ok(Request{
crawl_log_entry: CrawlLogEntryId::new(row.get(0)?),
url: self.base().get_url_by_id(row.get(1)?)
.smuggle_through_rusqlite()?,
time_sent: from_unix_timestamp_or_epoch(row.get(2)?),
request_duration_ms: row.get(3)?,
robotstxt_approved: row.get(4)?,
exit_code: ExitCode::from_number(row.get(5)?),
server_last_modified: from_unix_timestamp_opt(row.get(6)?),
http: http_summary_from_row(row,7,8)?,
})
})?);
}
return Ok(results);
}
}