use serde_json::Value;
use tail_fin_common::TailFinError;
use crate::types::Article;
pub fn parse_article_response(data: &Value, tweet_id: &str) -> Result<Article, TailFinError> {
let result = data
.pointer("/data/tweetResult/result")
.or_else(|| data.pointer("/data/tweet/result"))
.ok_or_else(|| TailFinError::Parse("Tweet not found in response".into()))?;
let tweet_data = result.get("tweet").unwrap_or(result);
let title = tweet_data
.pointer("/card/legacy/binding_values")
.and_then(|bv| {
bv.as_array().and_then(|arr| {
arr.iter().find_map(|item| {
let key = item.get("key").and_then(|k| k.as_str())?;
if key == "title" {
item.pointer("/value/string_value")
.and_then(|v| v.as_str())
.map(|s| s.to_string())
} else {
None
}
})
})
})
.unwrap_or_default();
let content_state = tweet_data
.pointer("/article_results/result/content_state")
.ok_or_else(|| {
TailFinError::Parse(
"No article content found. This tweet may not be an article.".into(),
)
})?;
let blocks = content_state
.get("blocks")
.and_then(|v| v.as_array())
.ok_or_else(|| TailFinError::Parse("Article has no blocks.".into()))?;
let entity_map = content_state.get("entityMap").unwrap_or(&Value::Null);
let markdown = blocks_to_markdown(blocks, entity_map);
Ok(Article {
tweet_id: tweet_id.to_string(),
title,
markdown,
})
}
fn blocks_to_markdown(blocks: &[Value], entity_map: &Value) -> String {
let mut lines: Vec<String> = Vec::new();
let mut i = 0;
while i < blocks.len() {
let block = &blocks[i];
let text = block.get("text").and_then(|v| v.as_str()).unwrap_or("");
let block_type = block
.get("type")
.and_then(|v| v.as_str())
.unwrap_or("unstyled");
let depth = block.get("depth").and_then(|v| v.as_u64()).unwrap_or(0) as usize;
if block_type == "code-block" {
let mut code_lines = vec![text.to_string()];
while i + 1 < blocks.len() {
let next = &blocks[i + 1];
let next_type = next.get("type").and_then(|v| v.as_str()).unwrap_or("");
if next_type != "code-block" {
break;
}
let next_text = next.get("text").and_then(|v| v.as_str()).unwrap_or("");
code_lines.push(next_text.to_string());
i += 1;
}
lines.push(format!("```\n{}\n```", code_lines.join("\n")));
i += 1;
continue;
}
let styled_text = apply_inline_styles(text, block, entity_map);
let indent = " ".repeat(depth);
let line = match block_type {
"header-one" => format!("# {}", styled_text),
"header-two" => format!("## {}", styled_text),
"header-three" => format!("### {}", styled_text),
"blockquote" => format!("> {}", styled_text),
"unordered-list-item" => format!("{}- {}", indent, styled_text),
"ordered-list-item" => format!("{}1. {}", indent, styled_text),
_ => styled_text,
};
lines.push(line);
i += 1;
}
lines.join("\n\n")
}
fn apply_inline_styles(text: &str, block: &Value, entity_map: &Value) -> String {
if text.is_empty() {
return String::new();
}
let chars: Vec<char> = text.chars().collect();
let len = chars.len();
let mut markers: Vec<(usize, usize, String, String)> = Vec::new();
if let Some(ranges) = block.get("inlineStyleRanges").and_then(|v| v.as_array()) {
for range in ranges {
let style = range.get("style").and_then(|v| v.as_str()).unwrap_or("");
let offset = range.get("offset").and_then(|v| v.as_u64()).unwrap_or(0) as usize;
let length = range.get("length").and_then(|v| v.as_u64()).unwrap_or(0) as usize;
let (open, close) = match style {
"BOLD" => ("**", "**"),
"ITALIC" => ("*", "*"),
"CODE" => ("`", "`"),
_ => continue,
};
markers.push((offset, length, open.to_string(), close.to_string()));
}
}
if let Some(ranges) = block.get("entityRanges").and_then(|v| v.as_array()) {
for range in ranges {
let key = range
.get("key")
.and_then(|v| v.as_u64())
.map(|k| k.to_string())
.or_else(|| {
range
.get("key")
.and_then(|v| v.as_str())
.map(|s| s.to_string())
});
let offset = range.get("offset").and_then(|v| v.as_u64()).unwrap_or(0) as usize;
let length = range.get("length").and_then(|v| v.as_u64()).unwrap_or(0) as usize;
if let Some(key) = key {
let entity = entity_map.get(&key);
if let Some(entity) = entity {
let entity_type = entity.get("type").and_then(|v| v.as_str()).unwrap_or("");
if entity_type == "LINK" {
if let Some(url) = entity.pointer("/data/url").and_then(|v| v.as_str()) {
markers.push((offset, length, "[".to_string(), format!("]({})", url)));
}
}
}
}
}
}
if markers.is_empty() {
return text.to_string();
}
markers.sort_by(|a, b| a.0.cmp(&b.0).then(b.1.cmp(&a.1)));
let mut opens: Vec<Vec<&str>> = vec![vec![]; len + 1];
let mut closes: Vec<Vec<&str>> = vec![vec![]; len + 1];
for (offset, length, open, close) in &markers {
let start = (*offset).min(len);
let end = (*offset + *length).min(len);
opens[start].push(open);
closes[end].push(close);
}
let mut result = String::with_capacity(text.len() * 2);
for i in 0..=len {
for close in closes[i].iter().rev() {
result.push_str(close);
}
for open in &opens[i] {
result.push_str(open);
}
if i < len {
result.push(chars[i]);
}
}
result
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::Value;
#[test]
fn test_parse_article_basic() {
let data = serde_json::json!({
"data": {
"tweetResult": {
"result": {
"rest_id": "123",
"article_results": {
"result": {
"content_state": {
"blocks": [
{ "text": "My Title", "type": "header-one", "depth": 0 },
{ "text": "A paragraph of text.", "type": "unstyled", "depth": 0 },
{ "text": "A quote", "type": "blockquote", "depth": 0 },
{ "text": "Item one", "type": "unordered-list-item", "depth": 0 },
{ "text": "Item two", "type": "unordered-list-item", "depth": 1 },
],
"entityMap": {}
}
}
}
}
}
}
});
let article = parse_article_response(&data, "123").unwrap();
assert_eq!(article.tweet_id, "123");
assert!(article.markdown.contains("# My Title"));
assert!(article.markdown.contains("A paragraph of text."));
assert!(article.markdown.contains("> A quote"));
assert!(article.markdown.contains("- Item one"));
assert!(article.markdown.contains(" - Item two"));
}
#[test]
fn test_parse_article_no_content() {
let data = serde_json::json!({
"data": {
"tweetResult": {
"result": {
"rest_id": "456",
"legacy": { "full_text": "Just a tweet" }
}
}
}
});
let err = parse_article_response(&data, "456");
assert!(err.is_err());
}
#[test]
fn test_blocks_to_markdown_code_single() {
let blocks =
vec![serde_json::json!({ "text": "fn main() {}", "type": "code-block", "depth": 0 })];
let md = blocks_to_markdown(&blocks, &Value::Null);
assert!(md.contains("```"));
assert!(md.contains("fn main() {}"));
}
#[test]
fn test_blocks_to_markdown_code_coalesced() {
let blocks = vec![
serde_json::json!({ "text": "line 1", "type": "code-block", "depth": 0 }),
serde_json::json!({ "text": "line 2", "type": "code-block", "depth": 0 }),
serde_json::json!({ "text": "line 3", "type": "code-block", "depth": 0 }),
];
let md = blocks_to_markdown(&blocks, &Value::Null);
assert_eq!(md.matches("```").count(), 2); assert!(md.contains("line 1\nline 2\nline 3"));
}
#[test]
fn test_blocks_to_markdown_inline_styles() {
let blocks = vec![serde_json::json!({
"text": "Hello bold and code world",
"type": "unstyled",
"depth": 0,
"inlineStyleRanges": [
{ "style": "BOLD", "offset": 6, "length": 4 },
{ "style": "CODE", "offset": 15, "length": 4 },
],
"entityRanges": []
})];
let md = blocks_to_markdown(&blocks, &Value::Null);
assert!(md.contains("**bold**"));
assert!(md.contains("`code`"));
}
#[test]
fn test_blocks_to_markdown_link_entity() {
let entity_map = serde_json::json!({
"0": {
"type": "LINK",
"data": { "url": "https://example.com" }
}
});
let blocks = vec![serde_json::json!({
"text": "Click here for more",
"type": "unstyled",
"depth": 0,
"inlineStyleRanges": [],
"entityRanges": [
{ "key": 0, "offset": 6, "length": 4 }
]
})];
let md = blocks_to_markdown(&blocks, &entity_map);
assert!(md.contains("[here](https://example.com)"));
}
}