unobtanium 3.0.0

Opinioated Web search engine library with crawler and viewer companion.
Documentation
use blake2::Blake2b512;
use blake2::Digest;
use serde::{Serialize,Deserialize};
use whatlang::Lang;
use whatlang::detect_lang;

use crate::types::Blake2b512Digest;

/// Collection of text inside a file sorted by semantic meaning
#[derive(Debug,Clone,Serialize,Deserialize,Default)]
pub struct TextPile {
	
	/// Normal paragraph text that makes up the main content of a file
	#[serde(default, skip_serializing_if="String::is_empty")]
	pub text: String,

	/// Text without any meaningful semantics attached
	#[serde(default, skip_serializing_if="String::is_empty")]
	pub secondary_text: String,

	/// Major headlines, in html this is h2 and h3 (and additional h1)
	#[serde(default, skip_serializing_if="String::is_empty")]
	pub big_headlines: String,

	/// Minor headlines
	#[serde(default, skip_serializing_if="String::is_empty")]
	pub small_headlines: String,

	/// Text that is marked as being code
	#[serde(default, skip_serializing_if="String::is_empty")]
	pub code_text: String,

	/// Text that is marked as being quoted
	#[serde(default, skip_serializing_if="String::is_empty")]
	pub quote_text: String,
	
}

impl TextPile {

	pub fn new() -> Self {
		Default::default()
	}

	pub fn is_empty(&self) -> bool {
		return self.text.is_empty()
			&& self.secondary_text.is_empty()
			&& self.big_headlines.is_empty()
			&& self.small_headlines.is_empty()
			&& self.code_text.is_empty()
			&& self.quote_text.is_empty()
		;
	}

	pub fn trim(&mut self) {
		self.text            = self.text.trim().to_string();
		self.secondary_text  = self.secondary_text.trim().to_string();
		self.big_headlines   = self.big_headlines.trim().to_string();
		self.small_headlines = self.small_headlines.trim().to_string();
		self.code_text       = self.code_text.trim().to_string();
		self.quote_text      = self.quote_text.trim().to_string();
	}

	pub fn to_opt(self) -> Option<Self> {
		if self.is_empty() {
			return None;
		} else {
			return Some(self);
		}
	}

	pub fn add_text(&mut self, new: &str) {
		if new.is_empty() { return; }
		if !self.text.is_empty() { self.text += "\n"; }
		self.text += new;
	}

	pub fn add_secondary_text(&mut self, new: &str) {
		if new.is_empty() { return; }
		if !self.secondary_text.is_empty() { self.secondary_text += "\n"; }
		self.secondary_text += new;
	}

	pub fn add_big_headline(&mut self, new: &str) {
		if new.is_empty() { return; }
		if !self.big_headlines.is_empty() { self.big_headlines += "\n"; }
		self.big_headlines += new;
	}

	pub fn add_small_headline(&mut self, new: &str) {
		if new.is_empty() { return; }
		if !self.small_headlines.is_empty() { self.small_headlines += "\n"; }
		self.small_headlines += new;
	}

	pub fn add_code_text(&mut self, new: &str) {
		if new.is_empty() { return; }
		if !self.code_text.is_empty() { self.code_text += "\n"; }
		self.code_text += new;
	}

	pub fn add_quote_text(&mut self, new: &str) {
		if new.is_empty() { return; }
		if !self.quote_text.is_empty() { self.quote_text += "\n"; }
		self.quote_text += new;
	}

	pub fn calculate_blake2b512_digest(&self) -> Blake2b512Digest {
		let mut hasher = Blake2b512::new();
		let spacer = "\n\n\n";
		
		hasher.update(&self.text);
		hasher.update(spacer);
		hasher.update(&self.secondary_text);
		hasher.update(spacer);
		hasher.update(&self.big_headlines);
		hasher.update(spacer);
		hasher.update(&self.small_headlines);
		hasher.update(spacer);
		hasher.update(&self.code_text);
		hasher.update(spacer);
		hasher.update(&self.quote_text);
		
		return hasher.into();
	}

	pub fn detect_text_main_language(&self) -> Option<Lang> {
		detect_lang(&self.text)
	}
	
	pub fn detect_secondary_text_main_language(&self) -> Option<Lang> {
		detect_lang(&self.secondary_text)
	}
}