tail-fin-twitter 0.5.1

Twitter/X adapter for tail-fin: timeline, search, profile, bookmarks, likes, thread, post, like, follow, block, bookmark, reply, trending, lists, article, download, notifications
Documentation
use serde_json::Value;

use tail_fin_common::TailFinError;

use crate::types::Article;

/// Parse a TweetResultByRestId response to extract article content as Markdown.
pub fn parse_article_response(data: &Value, tweet_id: &str) -> Result<Article, TailFinError> {
    let result = data
        .pointer("/data/tweetResult/result")
        .or_else(|| data.pointer("/data/tweet/result"))
        .ok_or_else(|| TailFinError::Parse("Tweet not found in response".into()))?;

    let tweet_data = result.get("tweet").unwrap_or(result);

    let title = tweet_data
        .pointer("/card/legacy/binding_values")
        .and_then(|bv| {
            bv.as_array().and_then(|arr| {
                arr.iter().find_map(|item| {
                    let key = item.get("key").and_then(|k| k.as_str())?;
                    if key == "title" {
                        item.pointer("/value/string_value")
                            .and_then(|v| v.as_str())
                            .map(|s| s.to_string())
                    } else {
                        None
                    }
                })
            })
        })
        .unwrap_or_default();

    let content_state = tweet_data
        .pointer("/article_results/result/content_state")
        .ok_or_else(|| {
            TailFinError::Parse(
                "No article content found. This tweet may not be an article.".into(),
            )
        })?;

    let blocks = content_state
        .get("blocks")
        .and_then(|v| v.as_array())
        .ok_or_else(|| TailFinError::Parse("Article has no blocks.".into()))?;

    let entity_map = content_state.get("entityMap").unwrap_or(&Value::Null);

    let markdown = blocks_to_markdown(blocks, entity_map);

    Ok(Article {
        tweet_id: tweet_id.to_string(),
        title,
        markdown,
    })
}

fn blocks_to_markdown(blocks: &[Value], entity_map: &Value) -> String {
    let mut lines: Vec<String> = Vec::new();
    let mut i = 0;

    while i < blocks.len() {
        let block = &blocks[i];
        let text = block.get("text").and_then(|v| v.as_str()).unwrap_or("");
        let block_type = block
            .get("type")
            .and_then(|v| v.as_str())
            .unwrap_or("unstyled");
        let depth = block.get("depth").and_then(|v| v.as_u64()).unwrap_or(0) as usize;

        // Coalesce adjacent code-block entries into one fenced block
        if block_type == "code-block" {
            let mut code_lines = vec![text.to_string()];
            while i + 1 < blocks.len() {
                let next = &blocks[i + 1];
                let next_type = next.get("type").and_then(|v| v.as_str()).unwrap_or("");
                if next_type != "code-block" {
                    break;
                }
                let next_text = next.get("text").and_then(|v| v.as_str()).unwrap_or("");
                code_lines.push(next_text.to_string());
                i += 1;
            }
            lines.push(format!("```\n{}\n```", code_lines.join("\n")));
            i += 1;
            continue;
        }

        let styled_text = apply_inline_styles(text, block, entity_map);
        let indent = "  ".repeat(depth);

        let line = match block_type {
            "header-one" => format!("# {}", styled_text),
            "header-two" => format!("## {}", styled_text),
            "header-three" => format!("### {}", styled_text),
            "blockquote" => format!("> {}", styled_text),
            "unordered-list-item" => format!("{}- {}", indent, styled_text),
            "ordered-list-item" => format!("{}1. {}", indent, styled_text),
            _ => styled_text,
        };

        lines.push(line);
        i += 1;
    }

    lines.join("\n\n")
}

/// Apply draft.js `inlineStyleRanges` and `entityRanges` to produce Markdown inline formatting.
///
/// Handles BOLD (**), ITALIC (*), CODE (`), and LINK entities ([text](url)).
fn apply_inline_styles(text: &str, block: &Value, entity_map: &Value) -> String {
    if text.is_empty() {
        return String::new();
    }

    let chars: Vec<char> = text.chars().collect();
    let len = chars.len();

    // Collect style markers: (offset, length, marker_open, marker_close)
    let mut markers: Vec<(usize, usize, String, String)> = Vec::new();

    // Inline styles (BOLD, ITALIC, CODE)
    if let Some(ranges) = block.get("inlineStyleRanges").and_then(|v| v.as_array()) {
        for range in ranges {
            let style = range.get("style").and_then(|v| v.as_str()).unwrap_or("");
            let offset = range.get("offset").and_then(|v| v.as_u64()).unwrap_or(0) as usize;
            let length = range.get("length").and_then(|v| v.as_u64()).unwrap_or(0) as usize;
            let (open, close) = match style {
                "BOLD" => ("**", "**"),
                "ITALIC" => ("*", "*"),
                "CODE" => ("`", "`"),
                _ => continue,
            };
            markers.push((offset, length, open.to_string(), close.to_string()));
        }
    }

    // Entity ranges (LINK, etc.)
    if let Some(ranges) = block.get("entityRanges").and_then(|v| v.as_array()) {
        for range in ranges {
            let key = range
                .get("key")
                .and_then(|v| v.as_u64())
                .map(|k| k.to_string())
                .or_else(|| {
                    range
                        .get("key")
                        .and_then(|v| v.as_str())
                        .map(|s| s.to_string())
                });
            let offset = range.get("offset").and_then(|v| v.as_u64()).unwrap_or(0) as usize;
            let length = range.get("length").and_then(|v| v.as_u64()).unwrap_or(0) as usize;

            if let Some(key) = key {
                let entity = entity_map.get(&key);
                if let Some(entity) = entity {
                    let entity_type = entity.get("type").and_then(|v| v.as_str()).unwrap_or("");
                    if entity_type == "LINK" {
                        if let Some(url) = entity.pointer("/data/url").and_then(|v| v.as_str()) {
                            markers.push((offset, length, "[".to_string(), format!("]({})", url)));
                        }
                    }
                }
            }
        }
    }

    if markers.is_empty() {
        return text.to_string();
    }

    // Sort by offset (stable), then longest first so outer wraps come first
    markers.sort_by(|a, b| a.0.cmp(&b.0).then(b.1.cmp(&a.1)));

    // Build result by inserting markers at character boundaries
    // Collect open/close events per char index
    let mut opens: Vec<Vec<&str>> = vec![vec![]; len + 1];
    let mut closes: Vec<Vec<&str>> = vec![vec![]; len + 1];

    for (offset, length, open, close) in &markers {
        let start = (*offset).min(len);
        let end = (*offset + *length).min(len);
        opens[start].push(open);
        closes[end].push(close);
    }

    let mut result = String::with_capacity(text.len() * 2);
    for i in 0..=len {
        // Close markers (reverse order for proper nesting)
        for close in closes[i].iter().rev() {
            result.push_str(close);
        }
        // Open markers
        for open in &opens[i] {
            result.push_str(open);
        }
        if i < len {
            result.push(chars[i]);
        }
    }

    result
}

#[cfg(test)]
mod tests {
    use super::*;
    use serde_json::Value;

    #[test]
    fn test_parse_article_basic() {
        let data = serde_json::json!({
            "data": {
                "tweetResult": {
                    "result": {
                        "rest_id": "123",
                        "article_results": {
                            "result": {
                                "content_state": {
                                    "blocks": [
                                        { "text": "My Title", "type": "header-one", "depth": 0 },
                                        { "text": "A paragraph of text.", "type": "unstyled", "depth": 0 },
                                        { "text": "A quote", "type": "blockquote", "depth": 0 },
                                        { "text": "Item one", "type": "unordered-list-item", "depth": 0 },
                                        { "text": "Item two", "type": "unordered-list-item", "depth": 1 },
                                    ],
                                    "entityMap": {}
                                }
                            }
                        }
                    }
                }
            }
        });

        let article = parse_article_response(&data, "123").unwrap();
        assert_eq!(article.tweet_id, "123");
        assert!(article.markdown.contains("# My Title"));
        assert!(article.markdown.contains("A paragraph of text."));
        assert!(article.markdown.contains("> A quote"));
        assert!(article.markdown.contains("- Item one"));
        assert!(article.markdown.contains("  - Item two"));
    }

    #[test]
    fn test_parse_article_no_content() {
        let data = serde_json::json!({
            "data": {
                "tweetResult": {
                    "result": {
                        "rest_id": "456",
                        "legacy": { "full_text": "Just a tweet" }
                    }
                }
            }
        });

        let err = parse_article_response(&data, "456");
        assert!(err.is_err());
    }

    #[test]
    fn test_blocks_to_markdown_code_single() {
        let blocks =
            vec![serde_json::json!({ "text": "fn main() {}", "type": "code-block", "depth": 0 })];
        let md = blocks_to_markdown(&blocks, &Value::Null);
        assert!(md.contains("```"));
        assert!(md.contains("fn main() {}"));
    }

    #[test]
    fn test_blocks_to_markdown_code_coalesced() {
        let blocks = vec![
            serde_json::json!({ "text": "line 1", "type": "code-block", "depth": 0 }),
            serde_json::json!({ "text": "line 2", "type": "code-block", "depth": 0 }),
            serde_json::json!({ "text": "line 3", "type": "code-block", "depth": 0 }),
        ];
        let md = blocks_to_markdown(&blocks, &Value::Null);
        // Should produce one fenced block, not three
        assert_eq!(md.matches("```").count(), 2); // opening + closing
        assert!(md.contains("line 1\nline 2\nline 3"));
    }

    #[test]
    fn test_blocks_to_markdown_inline_styles() {
        let blocks = vec![serde_json::json!({
            "text": "Hello bold and code world",
            "type": "unstyled",
            "depth": 0,
            "inlineStyleRanges": [
                { "style": "BOLD", "offset": 6, "length": 4 },
                { "style": "CODE", "offset": 15, "length": 4 },
            ],
            "entityRanges": []
        })];
        let md = blocks_to_markdown(&blocks, &Value::Null);
        assert!(md.contains("**bold**"));
        assert!(md.contains("`code`"));
    }

    #[test]
    fn test_blocks_to_markdown_link_entity() {
        let entity_map = serde_json::json!({
            "0": {
                "type": "LINK",
                "data": { "url": "https://example.com" }
            }
        });
        let blocks = vec![serde_json::json!({
            "text": "Click here for more",
            "type": "unstyled",
            "depth": 0,
            "inlineStyleRanges": [],
            "entityRanges": [
                { "key": 0, "offset": 6, "length": 4 }
            ]
        })];
        let md = blocks_to_markdown(&blocks, &entity_map);
        assert!(md.contains("[here](https://example.com)"));
    }
}