1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# SPDX-License-Identifier: PolyForm-Noncommercial-1.0.0
[]
= "twitter"
# Host-anchored pattern. `matches()` (provider.rs) uses `Regex::is_match`,
# which SEARCHES (unanchored) — so an un-anchored `(?:x|twitter)\.com/...`
# pattern matches the substring inside mirror hosts (api.fxtwitter.com,
# vxtwitter.com, fixupx.com, …). That caused nab to re-apply this rule to its
# OWN rewritten FxTwitter URL (infinite-rewrite / double-fetch). The
# `^https?://(?:www\.|mobile\.)?` prefix pins the host to the real x.com /
# twitter.com (optionally with www./mobile.), so mirrors never match.
#
# A single regex with two alternatives keeps this rule to ONE pattern entry:
# - `(?:x|twitter)\.com/.+/status/\d+` — status posts, handled by the
# FxTwitter rewrite + JSON extraction below (incl. long-form X Articles
# reachable via /status/).
# - `x\.com/i/article/\d+` — direct X Article URLs. These have NO /status/<id>
# so they CANNOT be rewritten to FxTwitter (article-id != status-id;
# FxTwitter is keyed on status-id). We still match them so the URL is
# classified as Twitter/X-family rather than falling through to a raw fetch
# that returns a JS shell. The `[rewrite].from` regex deliberately does NOT
# cover this shape, so `rewrite_url` is a no-op and the URL is left untouched
# for the authenticated engine render path. A separate Rust engine keys its
# render on the SAME URL shape:
# (?i)^https?://(?:www\.|mobile\.)?x\.com/i/article/\d+
# INTEGRATION NOTE: because this whole rule is engine="api" (one engine per
# [site]), engine_for_url() returns Api — NOT Browser — for article URLs too.
# If the engine render path keys on engine_for_url()==Browser it will be
# shadowed; it must key on the article URL regex independently (after the API
# provider's try_extract returns None). The clean alternative — a separate
# defaults/twitter-article.toml with engine="browser" registered in
# mod.rs::embedded_rules() — is out of scope for this two-file change.
= [
"(?i)^https?://(?:www\\.|mobile\\.)?(?:(?:x|twitter)\\.com/.+/status/\\d+|x\\.com/i/article/\\d+)",
]
[]
# Host-anchored to match [site].patterns pattern 1 only. `www.`/`mobile.` are
# accepted and dropped; the handle ($1) and status-id ($2) are preserved. This
# regex intentionally does NOT match /i/article/<id> URLs (no FxTwitter article
# endpoint exists), so those pass through `rewrite_url` unchanged.
= "(?i)^https?://(?:www\\.|mobile\\.)?(?:x|twitter)\\.com/([^/?#]+)/status/(\\d+).*"
= "https://api.fxtwitter.com/$1/status/$2"
[]
= "application/json"
# FxTwitter returns {"code":404,"tweet":null} with HTTP 200 for deleted/private
# tweets. Guard extraction on the tweet object being present and non-null so
# the provider fails fast (rather than emitting a confusing "check json paths"
# warning) and lets nab fall back to the normal HTTP fetch path.
= ".tweet"
[]
= ".tweet.author.name"
= ".tweet.author.screen_name"
= ".tweet.text"
= ".tweet.created_at"
= ".tweet.url"
= ".tweet.likes"
= ".tweet.retweets"
= ".tweet.replies"
= ".tweet.views"
# X Articles / Notes (long-form posts) — fxtwitter puts full content in .tweet.article
= ".tweet.article.title"
= ".tweet.article.preview_text"
= ".tweet.article.content.blocks[].text"
[]
= """
## @{author_handle} ({author_name})
# {article_title}
{article_content}
{text}
📊 {likes|number} likes · {retweets|number} reposts · {replies|number} replies
👁 {views|number} views
🕐 {date}
[View on X]({url})
"""
[]
= "Twitter/X"
= "{author_name} (@{author_handle})"
= ""
= "date"
= "url"
[]
= "likes"
= "retweets"
= "replies"
= "views"