This commit is contained in:
stefanodvx 2025-04-14 13:05:43 +02:00
parent 264c97183e
commit 3faede7b1c
74 changed files with 6228 additions and 1 deletions

181
ext/twitter/main.go Normal file
View file

@ -0,0 +1,181 @@
package twitter
import (
"encoding/json"
"fmt"
"io"
"net/http"
"regexp"
"govd/enums"
"govd/models"
"govd/util"
)
const (
apiHostname = "x.com"
apiEndpoint = "https://x.com/i/api/graphql/zZXycP0V6H7m-2r0mOnFcA/TweetDetail"
)
var HTTPClient = &http.Client{}
var ShortExtractor = &models.Extractor{
Name: "Twitter (Short)",
CodeName: "twitter:short",
Type: enums.ExtractorTypeSingle,
Category: enums.ExtractorCategorySocial,
URLPattern: regexp.MustCompile(`https?://t\.co/(?P<id>\w+)`),
IsRedirect: true,
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
req, err := http.NewRequest("GET", ctx.MatchedContentURL, nil)
if err != nil {
return nil, fmt.Errorf("failed to create req: %w", err)
}
req.Header.Set("User-Agent", util.ChromeUA)
res, err := HTTPClient.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err)
}
defer res.Body.Close()
body, err := io.ReadAll(res.Body)
if err != nil {
return nil, fmt.Errorf("failed to read body: %w", err)
}
matchedURL := Extractor.URLPattern.FindStringSubmatch(string(body))
if matchedURL == nil {
return nil, fmt.Errorf("failed to find url in body")
}
return &models.ExtractorResponse{
URL: matchedURL[0],
}, nil
},
}
var Extractor = &models.Extractor{
Name: "Twitter",
CodeName: "twitter",
Type: enums.ExtractorTypeSingle,
Category: enums.ExtractorCategorySocial,
URLPattern: regexp.MustCompile(`https?:\/\/(vx)?(twitter|x)\.com\/([^\/]+)\/status\/(?P<id>\d+)`),
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
mediaList, err := MediaListFromAPI(ctx)
if err != nil {
return nil, fmt.Errorf("failed to get media: %w", err)
}
return &models.ExtractorResponse{
MediaList: mediaList,
}, nil
},
}
func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
var mediaList []*models.Media
tweetData, err := GetTweetAPI(ctx.MatchedContentID)
if err != nil {
return nil, fmt.Errorf("failed to get tweet data: %w", err)
}
caption := CleanCaption(tweetData.FullText)
var mediaEntities []MediaEntity
if tweetData.ExtendedEntities != nil && len(tweetData.ExtendedEntities.Media) > 0 {
mediaEntities = tweetData.ExtendedEntities.Media
} else if tweetData.Entities != nil && len(tweetData.Entities.Media) > 0 {
mediaEntities = tweetData.Entities.Media
} else {
return nil, fmt.Errorf("no media found in tweet")
}
for _, mediaEntity := range mediaEntities {
media := ctx.Extractor.NewMedia(
ctx.MatchedContentID,
ctx.MatchedContentURL,
)
media.SetCaption(caption)
switch mediaEntity.Type {
case "video", "animated_gif":
formats, err := ExtractVideoFormats(&mediaEntity)
if err != nil {
return nil, err
}
for _, format := range formats {
media.AddFormat(format)
}
case "photo":
media.AddFormat(&models.MediaFormat{
Type: enums.MediaTypePhoto,
FormatID: "photo",
URL: []string{mediaEntity.MediaURLHTTPS},
})
}
if len(media.Formats) > 0 {
mediaList = append(mediaList, media)
}
}
return mediaList, nil
}
func GetTweetAPI(tweetID string) (*Tweet, error) {
cookies, err := util.ParseCookieFile("twitter.txt")
if err != nil {
return nil, fmt.Errorf("failed to get cookies: %w", err)
}
headers := BuildAPIHeaders(cookies)
if headers == nil {
return nil, fmt.Errorf("failed to build headers. check cookies")
}
query := BuildAPIQuery(tweetID)
req, err := http.NewRequest("GET", apiEndpoint, nil)
if err != nil {
return nil, fmt.Errorf("failed to create req: %w", err)
}
for key, value := range headers {
req.Header.Set(key, value)
}
for _, cookie := range cookies {
req.AddCookie(cookie)
}
q := req.URL.Query()
for key, value := range query {
q.Add(key, value)
}
req.URL.RawQuery = q.Encode()
resp, err := HTTPClient.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("invalid response code: %s", resp.Status)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to read body: %w", err)
}
var apiResponse APIResponse
err = json.Unmarshal(body, &apiResponse)
if err != nil {
return nil, fmt.Errorf("failed to parse response: %w", err)
}
tweet, err := FindTweetData(&apiResponse, tweetID)
if err != nil {
return nil, fmt.Errorf("failed to get tweet data: %w", err)
}
return tweet, nil
}

72
ext/twitter/models.go Normal file
View file

@ -0,0 +1,72 @@
package twitter
type APIResponse struct {
Data struct {
ThreadedConversationWithInjectionsV2 struct {
Instructions []struct {
Entries []struct {
EntryID string `json:"entryId"`
Content struct {
ItemContent struct {
TweetResults struct {
Result TweetResult `json:"result"`
} `json:"tweet_results"`
} `json:"itemContent"`
} `json:"content"`
} `json:"entries"`
} `json:"instructions"`
} `json:"threaded_conversation_with_injections_v2"`
} `json:"data"`
}
type TweetResult struct {
Tweet *Tweet `json:"tweet,omitempty"`
Legacy *Tweet `json:"legacy,omitempty"`
RestID string `json:"rest_id,omitempty"`
Core *Core `json:"core,omitempty"`
}
type Core struct {
UserResults struct {
Result struct {
Legacy *UserLegacy `json:"legacy,omitempty"`
} `json:"result"`
} `json:"user_results"`
}
type UserLegacy struct {
ScreenName string `json:"screen_name"`
Name string `json:"name"`
}
type Tweet struct {
FullText string `json:"full_text"`
ExtendedEntities *ExtendedEntities `json:"extended_entities,omitempty"`
Entities *ExtendedEntities `json:"entities,omitempty"`
CreatedAt string `json:"created_at"`
ID string `json:"id_str"`
}
type ExtendedEntities struct {
Media []MediaEntity `json:"media,omitempty"`
}
type MediaEntity struct {
Type string `json:"type"`
MediaURLHTTPS string `json:"media_url_https"`
ExpandedURL string `json:"expanded_url"`
URL string `json:"url"`
VideoInfo *VideoInfo `json:"video_info,omitempty"`
}
type VideoInfo struct {
DurationMillis int `json:"duration_millis"`
Variants []Variant `json:"variants"`
AspectRatio []int `json:"aspect_ratio"`
}
type Variant struct {
Bitrate int `json:"bitrate,omitempty"`
ContentType string `json:"content_type"`
URL string `json:"url"`
}

162
ext/twitter/util.go Normal file
View file

@ -0,0 +1,162 @@
package twitter
import (
"encoding/json"
"fmt"
"govd/enums"
"govd/models"
"govd/util"
"net/http"
"regexp"
"strconv"
"strings"
)
const authToken = "AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA"
var resolutionRegex = regexp.MustCompile(`(\d+)x(\d+)`)
func BuildAPIHeaders(cookies []*http.Cookie) map[string]string {
var csrfToken string
for _, cookie := range cookies {
if cookie.Name == "ct0" {
csrfToken = cookie.Value
break
}
}
if csrfToken == "" {
return nil
}
headers := map[string]string{
"authorization": fmt.Sprintf("Bearer %s", authToken),
"user-agent": util.ChromeUA,
"x-twitter-auth-type": "OAuth2Session",
"x-twitter-client-language": "en",
"x-twitter-active-user": "yes",
}
if csrfToken != "" {
headers["x-csrf-token"] = csrfToken
}
return headers
}
func BuildAPIQuery(tweetID string) map[string]string {
variables := map[string]interface{}{
"focalTweetId": tweetID,
"includePromotedContent": true,
"with_rux_injections": false,
"withBirdwatchNotes": true,
"withCommunity": true,
"withDownvotePerspective": false,
"withQuickPromoteEligibilityTweetFields": true,
"withReactionsMetadata": false,
"withReactionsPerspective": false,
"withSuperFollowsTweetFields": true,
"withSuperFollowsUserFields": true,
"withV2Timeline": true,
"withVoice": true,
}
features := map[string]interface{}{
"graphql_is_translatable_rweb_tweet_is_translatable_enabled": false,
"interactive_text_enabled": true,
"responsive_web_edit_tweet_api_enabled": true,
"responsive_web_enhance_cards_enabled": true,
"responsive_web_graphql_timeline_navigation_enabled": false,
"responsive_web_text_conversations_enabled": false,
"responsive_web_uc_gql_enabled": true,
"standardized_nudges_misinfo": true,
"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": false,
"tweetypie_unmention_optimization_enabled": true,
"unified_cards_ad_metadata_container_dynamic_card_content_query_enabled": true,
"verified_phone_label_enabled": false,
"vibe_api_enabled": true,
}
variablesJSON, _ := json.Marshal(variables)
featuresJSON, _ := json.Marshal(features)
return map[string]string{
"variables": string(variablesJSON),
"features": string(featuresJSON),
}
}
func CleanCaption(caption string) string {
if caption == "" {
return ""
}
regex := regexp.MustCompile(`https?://t\.co/\S+`)
return strings.TrimSpace(regex.ReplaceAllString(caption, ""))
}
func ExtractVideoFormats(media *MediaEntity) ([]*models.MediaFormat, error) {
var formats []*models.MediaFormat
if media.VideoInfo == nil {
return formats, nil
}
duration := int64(media.VideoInfo.DurationMillis / 1000)
for _, variant := range media.VideoInfo.Variants {
if variant.ContentType == "video/mp4" {
width, height := extractResolution(variant.URL)
formats = append(formats, &models.MediaFormat{
Type: enums.MediaTypeVideo,
FormatID: fmt.Sprintf("mp4_%d", variant.Bitrate),
URL: []string{variant.URL},
VideoCodec: enums.MediaCodecAVC,
AudioCodec: enums.MediaCodecAAC,
Duration: duration,
Thumbnail: []string{media.MediaURLHTTPS},
Width: width,
Height: height,
Bitrate: int64(variant.Bitrate),
})
}
}
return formats, nil
}
func extractResolution(url string) (int64, int64) {
matches := resolutionRegex.FindStringSubmatch(url)
if len(matches) >= 3 {
width, _ := strconv.ParseInt(matches[1], 10, 64)
height, _ := strconv.ParseInt(matches[2], 10, 64)
return width, height
}
return 0, 0
}
func FindTweetData(resp *APIResponse, tweetID string) (*Tweet, error) {
instructions := resp.Data.ThreadedConversationWithInjectionsV2.Instructions
if len(instructions) == 0 {
return nil, fmt.Errorf("nessuna istruzione trovata nella risposta")
}
entries := instructions[0].Entries
entryID := fmt.Sprintf("tweet-%s", tweetID)
for _, entry := range entries {
if entry.EntryID == entryID {
result := entry.Content.ItemContent.TweetResults.Result
if result.Tweet != nil {
return result.Tweet, nil
}
if result.Legacy != nil {
return result.Legacy, nil
}
return nil, fmt.Errorf("struttura del tweet non valida")
}
}
return nil, fmt.Errorf("tweet non trovato nella risposta")
}