Init
This commit is contained in:
parent
264c97183e
commit
3faede7b1c
74 changed files with 6228 additions and 1 deletions
181
ext/twitter/main.go
Normal file
181
ext/twitter/main.go
Normal file
|
@ -0,0 +1,181 @@
|
|||
package twitter
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"regexp"
|
||||
|
||||
"govd/enums"
|
||||
"govd/models"
|
||||
"govd/util"
|
||||
)
|
||||
|
||||
const (
|
||||
apiHostname = "x.com"
|
||||
apiEndpoint = "https://x.com/i/api/graphql/zZXycP0V6H7m-2r0mOnFcA/TweetDetail"
|
||||
)
|
||||
|
||||
var HTTPClient = &http.Client{}
|
||||
|
||||
var ShortExtractor = &models.Extractor{
|
||||
Name: "Twitter (Short)",
|
||||
CodeName: "twitter:short",
|
||||
Type: enums.ExtractorTypeSingle,
|
||||
Category: enums.ExtractorCategorySocial,
|
||||
URLPattern: regexp.MustCompile(`https?://t\.co/(?P<id>\w+)`),
|
||||
IsRedirect: true,
|
||||
|
||||
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
||||
req, err := http.NewRequest("GET", ctx.MatchedContentURL, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create req: %w", err)
|
||||
}
|
||||
req.Header.Set("User-Agent", util.ChromeUA)
|
||||
res, err := HTTPClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to send request: %w", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
body, err := io.ReadAll(res.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read body: %w", err)
|
||||
}
|
||||
matchedURL := Extractor.URLPattern.FindStringSubmatch(string(body))
|
||||
if matchedURL == nil {
|
||||
return nil, fmt.Errorf("failed to find url in body")
|
||||
}
|
||||
return &models.ExtractorResponse{
|
||||
URL: matchedURL[0],
|
||||
}, nil
|
||||
},
|
||||
}
|
||||
|
||||
var Extractor = &models.Extractor{
|
||||
Name: "Twitter",
|
||||
CodeName: "twitter",
|
||||
Type: enums.ExtractorTypeSingle,
|
||||
Category: enums.ExtractorCategorySocial,
|
||||
URLPattern: regexp.MustCompile(`https?:\/\/(vx)?(twitter|x)\.com\/([^\/]+)\/status\/(?P<id>\d+)`),
|
||||
|
||||
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
||||
mediaList, err := MediaListFromAPI(ctx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get media: %w", err)
|
||||
}
|
||||
return &models.ExtractorResponse{
|
||||
MediaList: mediaList,
|
||||
}, nil
|
||||
},
|
||||
}
|
||||
|
||||
func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
|
||||
var mediaList []*models.Media
|
||||
|
||||
tweetData, err := GetTweetAPI(ctx.MatchedContentID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get tweet data: %w", err)
|
||||
}
|
||||
|
||||
caption := CleanCaption(tweetData.FullText)
|
||||
|
||||
var mediaEntities []MediaEntity
|
||||
if tweetData.ExtendedEntities != nil && len(tweetData.ExtendedEntities.Media) > 0 {
|
||||
mediaEntities = tweetData.ExtendedEntities.Media
|
||||
} else if tweetData.Entities != nil && len(tweetData.Entities.Media) > 0 {
|
||||
mediaEntities = tweetData.Entities.Media
|
||||
} else {
|
||||
return nil, fmt.Errorf("no media found in tweet")
|
||||
}
|
||||
|
||||
for _, mediaEntity := range mediaEntities {
|
||||
media := ctx.Extractor.NewMedia(
|
||||
ctx.MatchedContentID,
|
||||
ctx.MatchedContentURL,
|
||||
)
|
||||
media.SetCaption(caption)
|
||||
|
||||
switch mediaEntity.Type {
|
||||
case "video", "animated_gif":
|
||||
formats, err := ExtractVideoFormats(&mediaEntity)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, format := range formats {
|
||||
media.AddFormat(format)
|
||||
}
|
||||
case "photo":
|
||||
media.AddFormat(&models.MediaFormat{
|
||||
Type: enums.MediaTypePhoto,
|
||||
FormatID: "photo",
|
||||
URL: []string{mediaEntity.MediaURLHTTPS},
|
||||
})
|
||||
}
|
||||
|
||||
if len(media.Formats) > 0 {
|
||||
mediaList = append(mediaList, media)
|
||||
}
|
||||
}
|
||||
|
||||
return mediaList, nil
|
||||
}
|
||||
|
||||
func GetTweetAPI(tweetID string) (*Tweet, error) {
|
||||
cookies, err := util.ParseCookieFile("twitter.txt")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get cookies: %w", err)
|
||||
}
|
||||
headers := BuildAPIHeaders(cookies)
|
||||
if headers == nil {
|
||||
return nil, fmt.Errorf("failed to build headers. check cookies")
|
||||
}
|
||||
query := BuildAPIQuery(tweetID)
|
||||
|
||||
req, err := http.NewRequest("GET", apiEndpoint, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create req: %w", err)
|
||||
}
|
||||
|
||||
for key, value := range headers {
|
||||
req.Header.Set(key, value)
|
||||
}
|
||||
|
||||
for _, cookie := range cookies {
|
||||
req.AddCookie(cookie)
|
||||
}
|
||||
|
||||
q := req.URL.Query()
|
||||
for key, value := range query {
|
||||
q.Add(key, value)
|
||||
}
|
||||
req.URL.RawQuery = q.Encode()
|
||||
|
||||
resp, err := HTTPClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to send request: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("invalid response code: %s", resp.Status)
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read body: %w", err)
|
||||
}
|
||||
|
||||
var apiResponse APIResponse
|
||||
err = json.Unmarshal(body, &apiResponse)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse response: %w", err)
|
||||
}
|
||||
|
||||
tweet, err := FindTweetData(&apiResponse, tweetID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get tweet data: %w", err)
|
||||
}
|
||||
|
||||
return tweet, nil
|
||||
}
|
72
ext/twitter/models.go
Normal file
72
ext/twitter/models.go
Normal file
|
@ -0,0 +1,72 @@
|
|||
package twitter
|
||||
|
||||
type APIResponse struct {
|
||||
Data struct {
|
||||
ThreadedConversationWithInjectionsV2 struct {
|
||||
Instructions []struct {
|
||||
Entries []struct {
|
||||
EntryID string `json:"entryId"`
|
||||
Content struct {
|
||||
ItemContent struct {
|
||||
TweetResults struct {
|
||||
Result TweetResult `json:"result"`
|
||||
} `json:"tweet_results"`
|
||||
} `json:"itemContent"`
|
||||
} `json:"content"`
|
||||
} `json:"entries"`
|
||||
} `json:"instructions"`
|
||||
} `json:"threaded_conversation_with_injections_v2"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
type TweetResult struct {
|
||||
Tweet *Tweet `json:"tweet,omitempty"`
|
||||
Legacy *Tweet `json:"legacy,omitempty"`
|
||||
RestID string `json:"rest_id,omitempty"`
|
||||
Core *Core `json:"core,omitempty"`
|
||||
}
|
||||
|
||||
type Core struct {
|
||||
UserResults struct {
|
||||
Result struct {
|
||||
Legacy *UserLegacy `json:"legacy,omitempty"`
|
||||
} `json:"result"`
|
||||
} `json:"user_results"`
|
||||
}
|
||||
|
||||
type UserLegacy struct {
|
||||
ScreenName string `json:"screen_name"`
|
||||
Name string `json:"name"`
|
||||
}
|
||||
|
||||
type Tweet struct {
|
||||
FullText string `json:"full_text"`
|
||||
ExtendedEntities *ExtendedEntities `json:"extended_entities,omitempty"`
|
||||
Entities *ExtendedEntities `json:"entities,omitempty"`
|
||||
CreatedAt string `json:"created_at"`
|
||||
ID string `json:"id_str"`
|
||||
}
|
||||
|
||||
type ExtendedEntities struct {
|
||||
Media []MediaEntity `json:"media,omitempty"`
|
||||
}
|
||||
|
||||
type MediaEntity struct {
|
||||
Type string `json:"type"`
|
||||
MediaURLHTTPS string `json:"media_url_https"`
|
||||
ExpandedURL string `json:"expanded_url"`
|
||||
URL string `json:"url"`
|
||||
VideoInfo *VideoInfo `json:"video_info,omitempty"`
|
||||
}
|
||||
|
||||
type VideoInfo struct {
|
||||
DurationMillis int `json:"duration_millis"`
|
||||
Variants []Variant `json:"variants"`
|
||||
AspectRatio []int `json:"aspect_ratio"`
|
||||
}
|
||||
|
||||
type Variant struct {
|
||||
Bitrate int `json:"bitrate,omitempty"`
|
||||
ContentType string `json:"content_type"`
|
||||
URL string `json:"url"`
|
||||
}
|
162
ext/twitter/util.go
Normal file
162
ext/twitter/util.go
Normal file
|
@ -0,0 +1,162 @@
|
|||
package twitter
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"govd/enums"
|
||||
"govd/models"
|
||||
"govd/util"
|
||||
"net/http"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const authToken = "AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA"
|
||||
|
||||
var resolutionRegex = regexp.MustCompile(`(\d+)x(\d+)`)
|
||||
|
||||
func BuildAPIHeaders(cookies []*http.Cookie) map[string]string {
|
||||
var csrfToken string
|
||||
for _, cookie := range cookies {
|
||||
if cookie.Name == "ct0" {
|
||||
csrfToken = cookie.Value
|
||||
break
|
||||
}
|
||||
}
|
||||
if csrfToken == "" {
|
||||
return nil
|
||||
}
|
||||
headers := map[string]string{
|
||||
"authorization": fmt.Sprintf("Bearer %s", authToken),
|
||||
"user-agent": util.ChromeUA,
|
||||
"x-twitter-auth-type": "OAuth2Session",
|
||||
"x-twitter-client-language": "en",
|
||||
"x-twitter-active-user": "yes",
|
||||
}
|
||||
|
||||
if csrfToken != "" {
|
||||
headers["x-csrf-token"] = csrfToken
|
||||
}
|
||||
|
||||
return headers
|
||||
}
|
||||
|
||||
func BuildAPIQuery(tweetID string) map[string]string {
|
||||
variables := map[string]interface{}{
|
||||
"focalTweetId": tweetID,
|
||||
"includePromotedContent": true,
|
||||
"with_rux_injections": false,
|
||||
"withBirdwatchNotes": true,
|
||||
"withCommunity": true,
|
||||
"withDownvotePerspective": false,
|
||||
"withQuickPromoteEligibilityTweetFields": true,
|
||||
"withReactionsMetadata": false,
|
||||
"withReactionsPerspective": false,
|
||||
"withSuperFollowsTweetFields": true,
|
||||
"withSuperFollowsUserFields": true,
|
||||
"withV2Timeline": true,
|
||||
"withVoice": true,
|
||||
}
|
||||
|
||||
features := map[string]interface{}{
|
||||
"graphql_is_translatable_rweb_tweet_is_translatable_enabled": false,
|
||||
"interactive_text_enabled": true,
|
||||
"responsive_web_edit_tweet_api_enabled": true,
|
||||
"responsive_web_enhance_cards_enabled": true,
|
||||
"responsive_web_graphql_timeline_navigation_enabled": false,
|
||||
"responsive_web_text_conversations_enabled": false,
|
||||
"responsive_web_uc_gql_enabled": true,
|
||||
"standardized_nudges_misinfo": true,
|
||||
"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": false,
|
||||
"tweetypie_unmention_optimization_enabled": true,
|
||||
"unified_cards_ad_metadata_container_dynamic_card_content_query_enabled": true,
|
||||
"verified_phone_label_enabled": false,
|
||||
"vibe_api_enabled": true,
|
||||
}
|
||||
|
||||
variablesJSON, _ := json.Marshal(variables)
|
||||
featuresJSON, _ := json.Marshal(features)
|
||||
|
||||
return map[string]string{
|
||||
"variables": string(variablesJSON),
|
||||
"features": string(featuresJSON),
|
||||
}
|
||||
}
|
||||
|
||||
func CleanCaption(caption string) string {
|
||||
if caption == "" {
|
||||
return ""
|
||||
}
|
||||
regex := regexp.MustCompile(`https?://t\.co/\S+`)
|
||||
return strings.TrimSpace(regex.ReplaceAllString(caption, ""))
|
||||
}
|
||||
|
||||
func ExtractVideoFormats(media *MediaEntity) ([]*models.MediaFormat, error) {
|
||||
var formats []*models.MediaFormat
|
||||
|
||||
if media.VideoInfo == nil {
|
||||
return formats, nil
|
||||
}
|
||||
|
||||
duration := int64(media.VideoInfo.DurationMillis / 1000)
|
||||
|
||||
for _, variant := range media.VideoInfo.Variants {
|
||||
if variant.ContentType == "video/mp4" {
|
||||
width, height := extractResolution(variant.URL)
|
||||
|
||||
formats = append(formats, &models.MediaFormat{
|
||||
Type: enums.MediaTypeVideo,
|
||||
FormatID: fmt.Sprintf("mp4_%d", variant.Bitrate),
|
||||
URL: []string{variant.URL},
|
||||
VideoCodec: enums.MediaCodecAVC,
|
||||
AudioCodec: enums.MediaCodecAAC,
|
||||
Duration: duration,
|
||||
Thumbnail: []string{media.MediaURLHTTPS},
|
||||
Width: width,
|
||||
Height: height,
|
||||
Bitrate: int64(variant.Bitrate),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return formats, nil
|
||||
}
|
||||
|
||||
func extractResolution(url string) (int64, int64) {
|
||||
matches := resolutionRegex.FindStringSubmatch(url)
|
||||
if len(matches) >= 3 {
|
||||
width, _ := strconv.ParseInt(matches[1], 10, 64)
|
||||
height, _ := strconv.ParseInt(matches[2], 10, 64)
|
||||
return width, height
|
||||
}
|
||||
return 0, 0
|
||||
}
|
||||
|
||||
func FindTweetData(resp *APIResponse, tweetID string) (*Tweet, error) {
|
||||
instructions := resp.Data.ThreadedConversationWithInjectionsV2.Instructions
|
||||
if len(instructions) == 0 {
|
||||
return nil, fmt.Errorf("nessuna istruzione trovata nella risposta")
|
||||
}
|
||||
|
||||
entries := instructions[0].Entries
|
||||
entryID := fmt.Sprintf("tweet-%s", tweetID)
|
||||
|
||||
for _, entry := range entries {
|
||||
if entry.EntryID == entryID {
|
||||
result := entry.Content.ItemContent.TweetResults.Result
|
||||
|
||||
if result.Tweet != nil {
|
||||
return result.Tweet, nil
|
||||
}
|
||||
|
||||
if result.Legacy != nil {
|
||||
return result.Legacy, nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("struttura del tweet non valida")
|
||||
}
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("tweet non trovato nella risposta")
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue