govd/ext/twitter/main.go
2025-04-28 17:14:23 +02:00

203 lines
4.9 KiB
Go

package twitter
import (
"fmt"
"io"
"net/http"
"regexp"
"govd/enums"
"govd/models"
"govd/util"
"github.com/bytedance/sonic"
"github.com/pkg/errors"
)
const (
apiHostname = "x.com"
apiBase = "https://" + apiHostname + "/i/api/graphql/"
apiEndpoint = apiBase + "2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId"
)
var ShortExtractor = &models.Extractor{
Name: "Twitter (Short)",
CodeName: "twitter_short",
Type: enums.ExtractorTypeSingle,
Category: enums.ExtractorCategorySocial,
URLPattern: regexp.MustCompile(`https?://t\.co/(?P<id>\w+)`),
Host: []string{"t.co"},
IsRedirect: true,
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
client := util.GetHTTPClient(ctx.Extractor.CodeName)
req, err := http.NewRequest(http.MethodGet, ctx.MatchedContentURL, nil)
if err != nil {
return nil, fmt.Errorf("failed to create req: %w", err)
}
req.Header.Set("User-Agent", util.ChromeUA)
res, err := client.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err)
}
defer res.Body.Close()
body, err := io.ReadAll(res.Body)
if err != nil {
return nil, fmt.Errorf("failed to read body: %w", err)
}
matchedURL := Extractor.URLPattern.FindStringSubmatch(string(body))
if matchedURL == nil {
return nil, errors.New("failed to find url in body")
}
return &models.ExtractorResponse{
URL: matchedURL[0],
}, nil
},
}
var Extractor = &models.Extractor{
Name: "Twitter",
CodeName: "twitter",
Type: enums.ExtractorTypeSingle,
Category: enums.ExtractorCategorySocial,
URLPattern: regexp.MustCompile(`https?:\/\/(vx)?(twitter|x)\.com\/([^\/]+)\/status\/(?P<id>\d+)`),
Host: []string{
"twitter.com",
"x.com",
"vxtwitter.com",
},
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
mediaList, err := MediaListFromAPI(ctx)
if err != nil {
return nil, fmt.Errorf("failed to get media: %w", err)
}
return &models.ExtractorResponse{
MediaList: mediaList,
}, nil
},
}
func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
var mediaList []*models.Media
client := util.GetHTTPClient(ctx.Extractor.CodeName)
tweetData, err := GetTweetAPI(
client, ctx.MatchedContentID)
if err != nil {
return nil, fmt.Errorf("failed to get tweet data: %w", err)
}
caption := CleanCaption(tweetData.FullText)
var mediaEntities []MediaEntity
switch {
case tweetData.ExtendedEntities != nil && len(tweetData.ExtendedEntities.Media) > 0:
mediaEntities = tweetData.ExtendedEntities.Media
case tweetData.Entities != nil && len(tweetData.Entities.Media) > 0:
mediaEntities = tweetData.Entities.Media
default:
return nil, nil
}
for _, mediaEntity := range mediaEntities {
media := ctx.Extractor.NewMedia(
ctx.MatchedContentID,
ctx.MatchedContentURL,
)
media.SetCaption(caption)
switch mediaEntity.Type {
case "video", "animated_gif":
formats, err := ExtractVideoFormats(&mediaEntity)
if err != nil {
return nil, err
}
for _, format := range formats {
media.AddFormat(format)
}
case "photo":
media.AddFormat(&models.MediaFormat{
Type: enums.MediaTypePhoto,
FormatID: "photo",
URL: []string{mediaEntity.MediaURLHTTPS},
})
}
if len(media.Formats) > 0 {
mediaList = append(mediaList, media)
}
}
return mediaList, nil
}
func GetTweetAPI(
client models.HTTPClient,
tweetID string,
) (*Tweet, error) {
cookies, err := util.ParseCookieFile("twitter.txt")
if err != nil {
return nil, fmt.Errorf("failed to get cookies: %w", err)
}
headers := BuildAPIHeaders(cookies)
if headers == nil {
return nil, errors.New("failed to build headers. check cookies")
}
query := BuildAPIQuery(tweetID)
req, err := http.NewRequest(http.MethodGet, apiEndpoint, nil)
if err != nil {
return nil, fmt.Errorf("failed to create req: %w", err)
}
for key, value := range headers {
req.Header.Set(key, value)
}
for _, cookie := range cookies {
req.AddCookie(cookie)
}
q := req.URL.Query()
for key, value := range query {
q.Add(key, value)
}
req.URL.RawQuery = q.Encode()
resp, err := client.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("invalid response code: %s", resp.Status)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to read body: %w", err)
}
var apiResponse APIResponse
err = sonic.ConfigFastest.Unmarshal(body, &apiResponse)
if err != nil {
return nil, fmt.Errorf("failed to parse response: %w", err)
}
result := apiResponse.Data.TweetResult.Result
if result == nil {
return nil, errors.New("failed to get tweet result")
}
var tweet *Tweet
switch {
case result.Tweet != nil:
tweet = result.Tweet
case result.Legacy != nil:
tweet = result.Legacy
default:
return nil, errors.New("failed to get tweet data")
}
return tweet, nil
}