tail-fin-youtube 0.7.8

YouTube adapter for tail-fin: search, video, channel, comments, transcript via InnerTube API
Documentation
use serde_json::Value;

use crate::types::Comment;

/// Parse comments from InnerTube `/youtubei/v1/next` response.
pub fn parse_comments(data: &Value, count: usize) -> Vec<Comment> {
    let mut comments = Vec::new();

    let items = data
        .pointer("/onResponseReceivedEndpoints")
        .and_then(|v| v.as_array())
        .and_then(|endpoints| {
            endpoints.iter().find_map(|ep| {
                ep.pointer("/reloadContinuationItemsCommand/continuationItems")
                    .or_else(|| ep.pointer("/appendContinuationItemsAction/continuationItems"))
                    .and_then(|v| v.as_array())
            })
        });

    if let Some(items) = items {
        for item in items {
            if comments.len() >= count {
                break;
            }
            // Try legacy format: commentThreadRenderer/comment/commentRenderer
            if let Some(renderer) = item
                .pointer("/commentThreadRenderer/comment/commentRenderer")
                .or_else(|| item.get("commentRenderer"))
            {
                if let Some(c) = parse_comment_renderer(renderer) {
                    comments.push(c);
                    continue;
                }
            }
            // New format: commentThreadRenderer/commentViewModel/commentViewModel
            if let Some(vm) =
                item.pointer("/commentThreadRenderer/commentViewModel/commentViewModel")
            {
                if let Some(c) = parse_comment_view_model(vm) {
                    comments.push(c);
                }
            }
        }
    }

    comments
}

fn parse_comment_renderer(renderer: &Value) -> Option<Comment> {
    let author = renderer
        .pointer("/authorText/simpleText")
        .and_then(|v| v.as_str())
        .unwrap_or("")
        .to_string();

    let text = renderer
        .pointer("/contentText/runs")
        .and_then(|v| v.as_array())
        .map(|runs| {
            runs.iter()
                .filter_map(|r| r.get("text").and_then(|v| v.as_str()))
                .collect::<Vec<_>>()
                .join("")
        })
        .unwrap_or_default();

    let likes = renderer
        .pointer("/voteCount/simpleText")
        .and_then(|v| v.as_str())
        .map(|s| s.to_string());

    let published_at = renderer
        .pointer("/publishedTimeText/runs/0/text")
        .and_then(|v| v.as_str())
        .map(|s| s.to_string());

    let reply_count = renderer.pointer("/replyCount").and_then(|v| v.as_u64());

    Some(Comment {
        author,
        text,
        likes,
        published_at,
        reply_count,
    })
}

/// Parse a commentViewModel (new YouTube format) into a Comment.
fn parse_comment_view_model(vm: &Value) -> Option<Comment> {
    let author = vm
        .pointer("/commentAuthorBadge/commandRuns/0/onTap/innertubeCommand/browseEndpoint/canonicalBaseUrl")
        .and_then(|v| v.as_str())
        .map(|s| s.trim_start_matches('/').to_string())
        .or_else(|| {
            vm.pointer("/authorButtonA11y")
                .and_then(|v| v.as_str())
                .map(|s| s.to_string())
        })
        .unwrap_or_default();

    let text = vm
        .pointer("/contentText/content")
        .and_then(|v| v.as_str())
        .unwrap_or("")
        .to_string();

    let likes = vm
        .pointer("/toolbar/likeCountNotliked")
        .or_else(|| vm.pointer("/toolbar/likeCountLiked"))
        .and_then(|v| v.as_str())
        .map(|s| s.to_string());

    let published_at = vm
        .pointer("/publishedTimeText/content")
        .and_then(|v| v.as_str())
        .map(|s| s.to_string());

    let reply_count = vm.pointer("/toolbar/replyCount").and_then(|v| {
        v.as_str()
            .and_then(|s| s.parse::<u64>().ok())
            .or_else(|| v.as_u64())
    });

    Some(Comment {
        author,
        text,
        likes,
        published_at,
        reply_count,
    })
}

/// Parse comments from frameworkUpdates.entityBatchUpdate.mutations (new YouTube format).
///
/// This is the approach used by OpenCLI — YouTube now returns comment data
/// as commentEntityPayload objects in the mutations array.
pub fn parse_comments_from_mutations(data: &Value, count: usize) -> Vec<Comment> {
    let mut comments = Vec::new();

    let mutations = data
        .pointer("/frameworkUpdates/entityBatchUpdate/mutations")
        .and_then(|v| v.as_array());

    if let Some(mutations) = mutations {
        for mutation in mutations {
            if comments.len() >= count {
                break;
            }
            let payload = mutation.pointer("/payload/commentEntityPayload");
            if let Some(p) = payload {
                let author = p
                    .pointer("/author/displayName")
                    .and_then(|v| v.as_str())
                    .unwrap_or("")
                    .to_string();
                let text = p
                    .pointer("/properties/content/content")
                    .and_then(|v| v.as_str())
                    .unwrap_or("")
                    .to_string();
                let likes = p
                    .pointer("/toolbar/likeCountNotliked")
                    .and_then(|v| v.as_str())
                    .map(|s| s.to_string());
                let published_at = p
                    .pointer("/properties/publishedTime")
                    .and_then(|v| v.as_str())
                    .map(|s| s.to_string());
                let reply_count = p.pointer("/toolbar/replyCount").and_then(|v| {
                    v.as_str()
                        .and_then(|s| s.parse::<u64>().ok())
                        .or_else(|| v.as_u64())
                });

                if !author.is_empty() || !text.is_empty() {
                    comments.push(Comment {
                        author,
                        text,
                        likes,
                        published_at,
                        reply_count,
                    });
                }
            }
        }
    }

    comments
}

#[cfg(test)]
mod tests {
    use super::*;

    // ── parse_comments_from_mutations ────────────────────────────────────

    #[test]
    fn test_parse_comments_from_mutations() {
        let data = serde_json::json!({
            "frameworkUpdates": {
                "entityBatchUpdate": {
                    "mutations": [
                        {
                            "payload": {
                                "commentEntityPayload": {
                                    "author": { "displayName": "Alice" },
                                    "properties": {
                                        "content": { "content": "Great video!" },
                                        "publishedTime": "2 days ago"
                                    },
                                    "toolbar": {
                                        "likeCountNotliked": "42",
                                        "replyCount": "3"
                                    }
                                }
                            }
                        }
                    ]
                }
            }
        });
        let comments = parse_comments_from_mutations(&data, 10);
        assert_eq!(comments.len(), 1);
        assert_eq!(comments[0].author, "Alice");
        assert_eq!(comments[0].text, "Great video!");
        assert_eq!(comments[0].likes.as_deref(), Some("42"));
        assert_eq!(comments[0].published_at.as_deref(), Some("2 days ago"));
        assert_eq!(comments[0].reply_count, Some(3));
    }

    #[test]
    fn test_parse_comments_from_mutations_limit() {
        let mutation = serde_json::json!({
            "payload": {
                "commentEntityPayload": {
                    "author": { "displayName": "Bob" },
                    "properties": { "content": { "content": "nice" } },
                    "toolbar": {}
                }
            }
        });
        let data = serde_json::json!({
            "frameworkUpdates": {
                "entityBatchUpdate": {
                    "mutations": [mutation.clone(), mutation.clone(), mutation.clone()]
                }
            }
        });
        let comments = parse_comments_from_mutations(&data, 2);
        assert_eq!(comments.len(), 2);
    }

    #[test]
    fn test_parse_comments_from_mutations_empty_skipped() {
        // Mutations with empty author AND empty text should be skipped
        let data = serde_json::json!({
            "frameworkUpdates": {
                "entityBatchUpdate": {
                    "mutations": [{
                        "payload": {
                            "commentEntityPayload": {
                                "author": { "displayName": "" },
                                "properties": { "content": { "content": "" } },
                                "toolbar": {}
                            }
                        }
                    }]
                }
            }
        });
        let comments = parse_comments_from_mutations(&data, 10);
        assert!(comments.is_empty());
    }

    // ── parse_comments ──────────────────────────────────────────────────

    #[test]
    fn test_parse_comments_legacy_format() {
        let data = serde_json::json!({
            "onResponseReceivedEndpoints": [{
                "reloadContinuationItemsCommand": {
                    "continuationItems": [{
                        "commentThreadRenderer": {
                            "comment": {
                                "commentRenderer": {
                                    "authorText": { "simpleText": "TestUser" },
                                    "contentText": {
                                        "runs": [
                                            { "text": "Hello " },
                                            { "text": "world!" }
                                        ]
                                    },
                                    "voteCount": { "simpleText": "15" },
                                    "publishedTimeText": { "runs": [{ "text": "1 day ago" }] },
                                    "replyCount": 5
                                }
                            }
                        }
                    }]
                }
            }]
        });
        let comments = parse_comments(&data, 10);
        assert_eq!(comments.len(), 1);
        assert_eq!(comments[0].author, "TestUser");
        assert_eq!(comments[0].text, "Hello world!");
        assert_eq!(comments[0].likes.as_deref(), Some("15"));
        assert_eq!(comments[0].published_at.as_deref(), Some("1 day ago"));
        assert_eq!(comments[0].reply_count, Some(5));
    }

    #[test]
    fn test_parse_comments_view_model_format() {
        let data = serde_json::json!({
            "onResponseReceivedEndpoints": [{
                "reloadContinuationItemsCommand": {
                    "continuationItems": [{
                        "commentThreadRenderer": {
                            "commentViewModel": {
                                "commentViewModel": {
                                    "authorButtonA11y": "ViewModelUser",
                                    "contentText": { "content": "VM comment text" },
                                    "toolbar": {
                                        "likeCountNotliked": "99",
                                        "replyCount": "7"
                                    },
                                    "publishedTimeText": { "content": "3 hours ago" }
                                }
                            }
                        }
                    }]
                }
            }]
        });
        let comments = parse_comments(&data, 10);
        assert_eq!(comments.len(), 1);
        assert_eq!(comments[0].author, "ViewModelUser");
        assert_eq!(comments[0].text, "VM comment text");
        assert_eq!(comments[0].likes.as_deref(), Some("99"));
        assert_eq!(comments[0].published_at.as_deref(), Some("3 hours ago"));
        assert_eq!(comments[0].reply_count, Some(7));
    }

    #[test]
    fn test_parse_comments_append_continuation_path() {
        let data = serde_json::json!({
            "onResponseReceivedEndpoints": [{
                "appendContinuationItemsAction": {
                    "continuationItems": [{
                        "commentThreadRenderer": {
                            "comment": {
                                "commentRenderer": {
                                    "authorText": { "simpleText": "AppendUser" },
                                    "contentText": { "runs": [{ "text": "appended" }] }
                                }
                            }
                        }
                    }]
                }
            }]
        });
        let comments = parse_comments(&data, 10);
        assert_eq!(comments.len(), 1);
        assert_eq!(comments[0].author, "AppendUser");
        assert_eq!(comments[0].text, "appended");
    }

    #[test]
    fn test_parse_comments_count_limit() {
        let make_comment = |name: &str| {
            serde_json::json!({
                "commentThreadRenderer": {
                    "comment": {
                        "commentRenderer": {
                            "authorText": { "simpleText": name },
                            "contentText": { "runs": [{ "text": "hi" }] }
                        }
                    }
                }
            })
        };
        let data = serde_json::json!({
            "onResponseReceivedEndpoints": [{
                "reloadContinuationItemsCommand": {
                    "continuationItems": [
                        make_comment("A"),
                        make_comment("B"),
                        make_comment("C")
                    ]
                }
            }]
        });
        let comments = parse_comments(&data, 2);
        assert_eq!(comments.len(), 2);
    }

    #[test]
    fn test_parse_comments_empty_response() {
        let data = serde_json::json!({});
        assert!(parse_comments(&data, 10).is_empty());
    }
}