#![allow(dead_code)]
use crate::db::AnalyticsDb;
use crate::error::Result;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum EntityType {
Person,
Organization,
Location,
Actor,
Theme,
All,
}
impl EntityType {
pub fn as_str(&self) -> &'static str {
match self {
Self::Person => "person",
Self::Organization => "organization",
Self::Location => "location",
Self::Actor => "actor",
Self::Theme => "theme",
Self::All => "all",
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EntityResult {
pub name: String,
pub entity_type: EntityType,
pub count: i64,
pub avg_tone: Option<f64>,
pub first_seen: Option<String>,
pub last_seen: Option<String>,
}
#[derive(Debug, Clone)]
pub struct EntitiesConfig {
pub entity_type: EntityType,
pub min_count: u32,
pub limit: u32,
pub start_date: Option<String>,
pub end_date: Option<String>,
}
impl Default for EntitiesConfig {
fn default() -> Self {
Self {
entity_type: EntityType::All,
min_count: 5,
limit: 50,
start_date: None,
end_date: None,
}
}
}
pub fn extract_entities(db: &AnalyticsDb, config: &EntitiesConfig) -> Result<Vec<EntityResult>> {
let mut results = Vec::new();
match config.entity_type {
EntityType::Actor => {
results.extend(extract_actors(db, config)?);
}
EntityType::Location => {
results.extend(extract_locations(db, config)?);
}
EntityType::Person => {
results.extend(extract_persons_from_gkg(db, config)?);
}
EntityType::Organization => {
results.extend(extract_organizations_from_gkg(db, config)?);
}
EntityType::Theme => {
results.extend(extract_themes_from_gkg(db, config)?);
}
EntityType::All => {
results.extend(extract_actors(db, config)?);
results.extend(extract_locations(db, config)?);
results.extend(extract_persons_from_gkg(db, config)?);
results.extend(extract_organizations_from_gkg(db, config)?);
}
}
results.sort_by(|a, b| b.count.cmp(&a.count));
results.truncate(config.limit as usize);
Ok(results)
}
fn build_date_filter(config: &EntitiesConfig, date_column: &str) -> String {
let mut conditions = Vec::new();
if let Some(ref start) = config.start_date {
conditions.push(format!("{} >= {}", date_column, start.replace('-', "")));
}
if let Some(ref end) = config.end_date {
conditions.push(format!("{} <= {}", date_column, end.replace('-', "")));
}
if conditions.is_empty() {
String::new()
} else {
format!("WHERE {}", conditions.join(" AND "))
}
}
fn extract_actors(db: &AnalyticsDb, config: &EntitiesConfig) -> Result<Vec<EntityResult>> {
let date_filter = build_date_filter(config, "sql_date");
let having_clause = format!("HAVING COUNT(*) >= {}", config.min_count);
let sql = format!(
r#"
SELECT
actor_code,
actor_name,
COUNT(*) as cnt,
AVG(avg_tone) as tone,
MIN(sql_date) as first_seen,
MAX(sql_date) as last_seen
FROM (
SELECT actor1_code as actor_code, actor1_name as actor_name, avg_tone, sql_date
FROM events
{}
WHERE actor1_code IS NOT NULL
UNION ALL
SELECT actor2_code as actor_code, actor2_name as actor_name, avg_tone, sql_date
FROM events
{}
WHERE actor2_code IS NOT NULL
)
GROUP BY actor_code, actor_name
{}
ORDER BY cnt DESC
LIMIT {}
"#,
date_filter, date_filter, having_clause, config.limit
);
let query_result = db.query(&sql)?;
let mut results = Vec::new();
for row in &query_result.rows {
let code = row.get(0).and_then(|v| v.as_str()).unwrap_or("");
let name = row.get(1).and_then(|v| v.as_str()).unwrap_or("");
let display_name = if !name.is_empty() {
format!("{} ({})", name, code)
} else {
code.to_string()
};
results.push(EntityResult {
name: display_name,
entity_type: EntityType::Actor,
count: row.get(2).and_then(|v| v.as_i64()).unwrap_or(0),
avg_tone: row.get(3).and_then(|v| v.as_f64()),
first_seen: row.get(4).and_then(|v| v.as_i64()).map(|d| d.to_string()),
last_seen: row.get(5).and_then(|v| v.as_i64()).map(|d| d.to_string()),
});
}
Ok(results)
}
fn extract_locations(db: &AnalyticsDb, config: &EntitiesConfig) -> Result<Vec<EntityResult>> {
let date_filter = build_date_filter(config, "sql_date");
let having_clause = format!("HAVING COUNT(*) >= {}", config.min_count);
let sql = format!(
r#"
SELECT
action_geo_fullname,
action_geo_country_code,
COUNT(*) as cnt,
AVG(avg_tone) as tone,
MIN(sql_date) as first_seen,
MAX(sql_date) as last_seen
FROM events
{}
{} action_geo_fullname IS NOT NULL
GROUP BY action_geo_fullname, action_geo_country_code
{}
ORDER BY cnt DESC
LIMIT {}
"#,
if date_filter.is_empty() { "WHERE" } else { &date_filter },
if date_filter.is_empty() { "" } else { "AND" },
having_clause,
config.limit
);
let query_result = db.query(&sql)?;
let mut results = Vec::new();
for row in &query_result.rows {
let name = row.get(0).and_then(|v| v.as_str()).unwrap_or("");
let country = row.get(1).and_then(|v| v.as_str()).unwrap_or("");
let display_name = if !country.is_empty() {
format!("{} [{}]", name, country)
} else {
name.to_string()
};
results.push(EntityResult {
name: display_name,
entity_type: EntityType::Location,
count: row.get(2).and_then(|v| v.as_i64()).unwrap_or(0),
avg_tone: row.get(3).and_then(|v| v.as_f64()),
first_seen: row.get(4).and_then(|v| v.as_i64()).map(|d| d.to_string()),
last_seen: row.get(5).and_then(|v| v.as_i64()).map(|d| d.to_string()),
});
}
Ok(results)
}
fn extract_persons_from_gkg(db: &AnalyticsDb, config: &EntitiesConfig) -> Result<Vec<EntityResult>> {
let having_clause = format!("HAVING COUNT(*) >= {}", config.min_count);
let sql = format!(
r#"
SELECT
person,
COUNT(*) as cnt,
AVG(tone) as tone,
MIN(date) as first_seen,
MAX(date) as last_seen
FROM (
SELECT UNNEST(persons) as person, tone, date
FROM gkg
WHERE persons IS NOT NULL
)
GROUP BY person
{}
ORDER BY cnt DESC
LIMIT {}
"#,
having_clause, config.limit
);
let query_result = db.query(&sql)?;
let mut results = Vec::new();
for row in &query_result.rows {
results.push(EntityResult {
name: row.get(0).and_then(|v| v.as_str()).unwrap_or("").to_string(),
entity_type: EntityType::Person,
count: row.get(1).and_then(|v| v.as_i64()).unwrap_or(0),
avg_tone: row.get(2).and_then(|v| v.as_f64()),
first_seen: row.get(3).and_then(|v| v.as_i64()).map(|d| d.to_string()),
last_seen: row.get(4).and_then(|v| v.as_i64()).map(|d| d.to_string()),
});
}
Ok(results)
}
fn extract_organizations_from_gkg(db: &AnalyticsDb, config: &EntitiesConfig) -> Result<Vec<EntityResult>> {
let having_clause = format!("HAVING COUNT(*) >= {}", config.min_count);
let sql = format!(
r#"
SELECT
org,
COUNT(*) as cnt,
AVG(tone) as tone,
MIN(date) as first_seen,
MAX(date) as last_seen
FROM (
SELECT UNNEST(organizations) as org, tone, date
FROM gkg
WHERE organizations IS NOT NULL
)
GROUP BY org
{}
ORDER BY cnt DESC
LIMIT {}
"#,
having_clause, config.limit
);
let query_result = db.query(&sql)?;
let mut results = Vec::new();
for row in &query_result.rows {
results.push(EntityResult {
name: row.get(0).and_then(|v| v.as_str()).unwrap_or("").to_string(),
entity_type: EntityType::Organization,
count: row.get(1).and_then(|v| v.as_i64()).unwrap_or(0),
avg_tone: row.get(2).and_then(|v| v.as_f64()),
first_seen: row.get(3).and_then(|v| v.as_i64()).map(|d| d.to_string()),
last_seen: row.get(4).and_then(|v| v.as_i64()).map(|d| d.to_string()),
});
}
Ok(results)
}
fn extract_themes_from_gkg(db: &AnalyticsDb, config: &EntitiesConfig) -> Result<Vec<EntityResult>> {
let having_clause = format!("HAVING COUNT(*) >= {}", config.min_count);
let sql = format!(
r#"
SELECT
theme,
COUNT(*) as cnt,
AVG(tone) as tone,
MIN(date) as first_seen,
MAX(date) as last_seen
FROM (
SELECT UNNEST(themes) as theme, tone, date
FROM gkg
WHERE themes IS NOT NULL
)
GROUP BY theme
{}
ORDER BY cnt DESC
LIMIT {}
"#,
having_clause, config.limit
);
let query_result = db.query(&sql)?;
let mut results = Vec::new();
for row in &query_result.rows {
results.push(EntityResult {
name: row.get(0).and_then(|v| v.as_str()).unwrap_or("").to_string(),
entity_type: EntityType::Theme,
count: row.get(1).and_then(|v| v.as_i64()).unwrap_or(0),
avg_tone: row.get(2).and_then(|v| v.as_f64()),
first_seen: row.get(3).and_then(|v| v.as_i64()).map(|d| d.to_string()),
last_seen: row.get(4).and_then(|v| v.as_i64()).map(|d| d.to_string()),
});
}
Ok(results)
}