govd/ext/twitter/main.go
stefanodvx 3e351e7e43 use host matching instead of regex (desc)
why? using regex on every single message the bot receives, even simple patterns, can be very harmful for your cpu lol
2025-04-16 14:11:55 +02:00

188 lines
4.6 KiB
Go

package twitter
import (
"encoding/json"
"fmt"
"io"
"net/http"
"regexp"
"govd/enums"
"govd/models"
"govd/util"
)
const (
apiHostname = "x.com"
apiEndpoint = "https://x.com/i/api/graphql/zZXycP0V6H7m-2r0mOnFcA/TweetDetail"
)
var httpSession = util.GetHTTPSession()
var ShortExtractor = &models.Extractor{
Name: "Twitter (Short)",
CodeName: "twitter:short",
Type: enums.ExtractorTypeSingle,
Category: enums.ExtractorCategorySocial,
URLPattern: regexp.MustCompile(`https?://t\.co/(?P<id>\w+)`),
Host: []string{"t.co"},
IsRedirect: true,
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
req, err := http.NewRequest("GET", ctx.MatchedContentURL, nil)
if err != nil {
return nil, fmt.Errorf("failed to create req: %w", err)
}
req.Header.Set("User-Agent", util.ChromeUA)
res, err := httpSession.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err)
}
defer res.Body.Close()
body, err := io.ReadAll(res.Body)
if err != nil {
return nil, fmt.Errorf("failed to read body: %w", err)
}
matchedURL := Extractor.URLPattern.FindStringSubmatch(string(body))
if matchedURL == nil {
return nil, fmt.Errorf("failed to find url in body")
}
return &models.ExtractorResponse{
URL: matchedURL[0],
}, nil
},
}
var Extractor = &models.Extractor{
Name: "Twitter",
CodeName: "twitter",
Type: enums.ExtractorTypeSingle,
Category: enums.ExtractorCategorySocial,
URLPattern: regexp.MustCompile(`https?:\/\/(vx)?(twitter|x)\.com\/([^\/]+)\/status\/(?P<id>\d+)`),
Host: []string{
"twitter.com",
"x.com",
"vxx.com",
"vxtwitter.com",
},
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
mediaList, err := MediaListFromAPI(ctx)
if err != nil {
return nil, fmt.Errorf("failed to get media: %w", err)
}
return &models.ExtractorResponse{
MediaList: mediaList,
}, nil
},
}
func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
var mediaList []*models.Media
tweetData, err := GetTweetAPI(ctx.MatchedContentID)
if err != nil {
return nil, fmt.Errorf("failed to get tweet data: %w", err)
}
caption := CleanCaption(tweetData.FullText)
var mediaEntities []MediaEntity
if tweetData.ExtendedEntities != nil && len(tweetData.ExtendedEntities.Media) > 0 {
mediaEntities = tweetData.ExtendedEntities.Media
} else if tweetData.Entities != nil && len(tweetData.Entities.Media) > 0 {
mediaEntities = tweetData.Entities.Media
} else {
return nil, fmt.Errorf("no media found in tweet")
}
for _, mediaEntity := range mediaEntities {
media := ctx.Extractor.NewMedia(
ctx.MatchedContentID,
ctx.MatchedContentURL,
)
media.SetCaption(caption)
switch mediaEntity.Type {
case "video", "animated_gif":
formats, err := ExtractVideoFormats(&mediaEntity)
if err != nil {
return nil, err
}
for _, format := range formats {
media.AddFormat(format)
}
case "photo":
media.AddFormat(&models.MediaFormat{
Type: enums.MediaTypePhoto,
FormatID: "photo",
URL: []string{mediaEntity.MediaURLHTTPS},
})
}
if len(media.Formats) > 0 {
mediaList = append(mediaList, media)
}
}
return mediaList, nil
}
func GetTweetAPI(tweetID string) (*Tweet, error) {
cookies, err := util.ParseCookieFile("twitter.txt")
if err != nil {
return nil, fmt.Errorf("failed to get cookies: %w", err)
}
headers := BuildAPIHeaders(cookies)
if headers == nil {
return nil, fmt.Errorf("failed to build headers. check cookies")
}
query := BuildAPIQuery(tweetID)
req, err := http.NewRequest("GET", apiEndpoint, nil)
if err != nil {
return nil, fmt.Errorf("failed to create req: %w", err)
}
for key, value := range headers {
req.Header.Set(key, value)
}
for _, cookie := range cookies {
req.AddCookie(cookie)
}
q := req.URL.Query()
for key, value := range query {
q.Add(key, value)
}
req.URL.RawQuery = q.Encode()
resp, err := httpSession.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("invalid response code: %s", resp.Status)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to read body: %w", err)
}
var apiResponse APIResponse
err = json.Unmarshal(body, &apiResponse)
if err != nil {
return nil, fmt.Errorf("failed to parse response: %w", err)
}
tweet, err := FindTweetData(&apiResponse, tweetID)
if err != nil {
return nil, fmt.Errorf("failed to get tweet data: %w", err)
}
return tweet, nil
}