set configuration for each extractor

This commit is contained in:
stefanodvx 2025-04-20 12:19:14 +02:00
parent 6baa965534
commit 0a63df9ce6
19 changed files with 337 additions and 175 deletions

View file

@ -16,8 +16,6 @@ import (
// feel free to open PR, if you want to
// add support for the official Instagram API
var httpSession = util.GetHTTPSession()
const (
apiHostname = "api.igram.world"
apiKey = "aaeaf2805cea6abef3f9d2b6a666fce62fd9d612a43ab772bb50ce81455112e0"
@ -39,6 +37,7 @@ var Extractor = &models.Extractor{
URLPattern: regexp.MustCompile(`https:\/\/(www\.)?instagram\.com\/(reel|p|tv)\/(?P<id>[a-zA-Z0-9_-]+)`),
Host: instagramHost,
IsRedirect: false,
Client: util.GetHTTPSession("instagram"),
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
mediaList, err := MediaListFromAPI(ctx, false)
@ -50,12 +49,13 @@ var Extractor = &models.Extractor{
var StoriesExtractor = &models.Extractor{
Name: "Instagram Stories",
CodeName: "instagram:stories",
CodeName: "instagram_stories",
Type: enums.ExtractorTypeSingle,
Category: enums.ExtractorCategorySocial,
URLPattern: regexp.MustCompile(`https:\/\/(www\.)?instagram\.com\/stories\/[a-zA-Z0-9._]+\/(?P<id>\d+)`),
Host: instagramHost,
IsRedirect: false,
Client: util.GetHTTPSession("instagram_stories"),
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
mediaList, err := MediaListFromAPI(ctx, true)
@ -67,16 +67,15 @@ var StoriesExtractor = &models.Extractor{
var ShareURLExtractor = &models.Extractor{
Name: "Instagram Share URL",
CodeName: "instagram:share",
CodeName: "instagram_share",
Type: enums.ExtractorTypeSingle,
Category: enums.ExtractorCategorySocial,
URLPattern: regexp.MustCompile(`https?:\/\/(www\.)?instagram\.com\/share\/((reels?|video|s|p)\/)?(?P<id>[^\/\?]+)`),
Host: instagramHost,
IsRedirect: true,
Client: util.GetHTTPSession("instagram_share"),
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
// temporary fix for public instances
edgeProxyClient := util.GetEdgeProxyClient()
req, err := http.NewRequest(
http.MethodGet,
ctx.MatchedContentURL,
@ -85,7 +84,7 @@ var ShareURLExtractor = &models.Extractor{
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
resp, err := edgeProxyClient.Do(req)
resp, err := ctx.Extractor.Client.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err)
}
@ -103,13 +102,13 @@ func MediaListFromAPI(
) ([]*models.Media, error) {
var mediaList []*models.Media
postURL := ctx.MatchedContentURL
details, err := GetVideoAPI(postURL)
details, err := GetVideoAPI(ctx, postURL)
if err != nil {
return nil, fmt.Errorf("failed to get post: %w", err)
}
var caption string
if !stories {
caption, err = GetPostCaption(postURL)
caption, err = GetPostCaption(ctx, postURL)
if err != nil {
return nil, fmt.Errorf("failed to get caption: %w", err)
}
@ -157,7 +156,10 @@ func MediaListFromAPI(
return mediaList, nil
}
func GetVideoAPI(contentURL string) (*IGramResponse, error) {
func GetVideoAPI(
ctx *models.DownloadContext,
contentURL string,
) (*IGramResponse, error) {
apiURL := fmt.Sprintf(
"https://%s/api/convert",
apiHostname,
@ -173,7 +175,7 @@ func GetVideoAPI(contentURL string) (*IGramResponse, error) {
req.Header.Set("Content-Type", "application/json")
req.Header.Set("User-Agent", util.ChromeUA)
resp, err := httpSession.Do(req)
resp, err := ctx.Extractor.Client.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err)
}

View file

@ -4,6 +4,7 @@ import (
"crypto/sha256"
"encoding/hex"
"fmt"
"govd/models"
"govd/util"
"html"
"io"
@ -95,9 +96,9 @@ func GetCDNURL(contentURL string) (string, error) {
}
func GetPostCaption(
ctx *models.DownloadContext,
postURL string,
) (string, error) {
edgeProxyClient := util.GetEdgeProxyClient()
req, err := http.NewRequest(
http.MethodGet,
postURL,
@ -121,7 +122,7 @@ func GetPostCaption(
req.Header.Set("Cache-Control", "no-cache")
req.Header.Set("TE", "trailers")
resp, err := edgeProxyClient.Do(req)
resp, err := ctx.Extractor.Client.Do(req)
if err != nil {
return "", fmt.Errorf("failed to send request: %w", err)
}

View file

@ -19,8 +19,7 @@ const (
)
var (
httpSession = util.GetHTTPSession()
validHost = []string{
validHost = []string{
"com", "fr", "de", "ch", "jp", "cl", "ca", "it", "co\\.uk", "nz", "ru", "com\\.au",
"at", "pt", "co\\.kr", "es", "com\\.mx", "dk", "ph", "th", "com\\.uy", "co", "nl",
"info", "kr", "ie", "vn", "com\\.vn", "ec", "mx", "in", "pe", "co\\.at", "hu",
@ -33,7 +32,7 @@ var (
var ShortExtractor = &models.Extractor{
Name: "Pinterest (Short)",
CodeName: "pinterest:short",
CodeName: "pinterest_short",
Type: enums.ExtractorTypeSingle,
Category: enums.ExtractorCategorySocial,
URLPattern: regexp.MustCompile(pinValidURLPattern),
@ -45,6 +44,7 @@ var ShortExtractor = &models.Extractor{
return domains
}(),
IsRedirect: true,
Client: util.GetHTTPSession("pinterest_short"),
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
shortURL := fmt.Sprintf(shortenerAPIFormat, ctx.MatchedContentID)
@ -72,6 +72,7 @@ var Extractor = &models.Extractor{
}
return domains
}(),
Client: util.GetHTTPSession("pinterest"),
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
media, err := ExtractPinMedia(ctx)
@ -88,7 +89,7 @@ func ExtractPinMedia(ctx *models.DownloadContext) ([]*models.Media, error) {
pinID := ctx.MatchedContentID
contentURL := ctx.MatchedContentURL
pinData, err := GetPinData(pinID)
pinData, err := GetPinData(ctx, pinID)
if err != nil {
return nil, err
}
@ -158,7 +159,10 @@ func ExtractPinMedia(ctx *models.DownloadContext) ([]*models.Media, error) {
return nil, fmt.Errorf("no media found for pin ID: %s", pinID)
}
func GetPinData(pinID string) (*PinData, error) {
func GetPinData(
ctx *models.DownloadContext,
pinID string,
) (*PinData, error) {
params := BuildPinRequestParams(pinID)
req, err := http.NewRequest(http.MethodGet, pinResourceEndpoint, nil)
@ -175,7 +179,7 @@ func GetPinData(pinID string) (*PinData, error) {
// fix 403 error
req.Header.Set("X-Pinterest-PWS-Handler", "www/[username].js")
resp, err := httpSession.Do(req)
resp, err := ctx.Extractor.Client.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err)
}

View file

@ -13,8 +13,7 @@ import (
)
var (
httpSession = util.GetHTTPSession()
baseHost = []string{
baseHost = []string{
"reddit.com",
"redditmedia.com",
"old.reddit.com",
@ -24,12 +23,13 @@ var (
var ShortExtractor = &models.Extractor{
Name: "Reddit (Short)",
CodeName: "reddit:short",
CodeName: "reddit_short",
Type: enums.ExtractorTypeSingle,
Category: enums.ExtractorCategorySocial,
URLPattern: regexp.MustCompile(`https?://(?P<host>(?:\w+\.)?reddit(?:media)?\.com)/(?P<slug>(?:(?:r|user)/[^/]+/)?s/(?P<id>[^/?#&]+))`),
Host: baseHost,
IsRedirect: true,
Client: util.GetHTTPSession("reddit_short"),
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
req, err := http.NewRequest(http.MethodGet, ctx.MatchedContentURL, nil)
@ -46,7 +46,7 @@ var ShortExtractor = &models.Extractor{
req.AddCookie(cookie)
}
res, err := httpSession.Do(req)
res, err := ctx.Extractor.Client.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err)
}
@ -67,6 +67,7 @@ var Extractor = &models.Extractor{
Category: enums.ExtractorCategorySocial,
URLPattern: regexp.MustCompile(`https?://(?P<host>(?:\w+\.)?reddit(?:media)?\.com)/(?P<slug>(?:(?:r|user)/[^/]+/)?comments/(?P<id>[^/?#&]+))`),
Host: baseHost,
Client: util.GetHTTPSession("reddit"),
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
mediaList, err := MediaListFromAPI(ctx)
@ -86,7 +87,7 @@ func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
contentID := ctx.MatchedContentID
contentURL := ctx.MatchedContentURL
manifest, err := GetRedditData(host, slug)
manifest, err := GetRedditData(ctx, host, slug)
if err != nil {
return nil, err
}
@ -222,7 +223,11 @@ func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
return mediaList, nil
}
func GetRedditData(host string, slug string) (RedditResponse, error) {
func GetRedditData(
ctx *models.DownloadContext,
host string,
slug string,
) (RedditResponse, error) {
url := fmt.Sprintf("https://%s/%s/.json", host, slug)
req, err := http.NewRequest(http.MethodGet, url, nil)
@ -239,7 +244,7 @@ func GetRedditData(host string, slug string) (RedditResponse, error) {
req.AddCookie(cookie)
}
res, err := httpSession.Do(req)
res, err := ctx.Extractor.Client.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err)
}
@ -252,7 +257,7 @@ func GetRedditData(host string, slug string) (RedditResponse, error) {
altHost = "www.reddit.com"
}
return GetRedditData(altHost, slug)
return GetRedditData(ctx, altHost, slug)
}
var response RedditResponse

View file

@ -18,8 +18,6 @@ const (
)
var (
session = util.GetHTTPSession()
baseApiHeaders = map[string]string{
"referer": "https://www.redgifs.com/",
"origin": "https://www.redgifs.com",
@ -37,6 +35,7 @@ var Extractor = &models.Extractor{
"redgifs.com",
"thumbs2.redgifs.com",
},
Client: util.GetHTTPSession("redgifs"),
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
mediaList, err := MediaListFromAPI(ctx)
@ -52,7 +51,7 @@ var Extractor = &models.Extractor{
func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
var mediaList []*models.Media
response, err := GetVideo(ctx.MatchedContentID)
response, err := GetVideo(ctx)
if err != nil {
return nil, fmt.Errorf("failed to get from api: %w", err)
}
@ -116,13 +115,14 @@ func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
return mediaList, nil
}
func GetVideo(videoID string) (*Response, error) {
func GetVideo(ctx *models.DownloadContext) (*Response, error) {
videoID := ctx.MatchedContentID
url := videoEndpoint + videoID + "?views=true"
req, err := http.NewRequest(http.MethodGet, url, nil)
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
token, err := GetAccessToken()
token, err := GetAccessToken(ctx)
if err != nil {
return nil, fmt.Errorf("failed to get access token: %w", err)
}
@ -132,7 +132,7 @@ func GetVideo(videoID string) (*Response, error) {
for k, v := range baseApiHeaders {
req.Header.Set(k, v)
}
res, err := session.Do(req)
res, err := ctx.Extractor.Client.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err)
}

View file

@ -2,6 +2,7 @@ package redgifs
import (
"fmt"
"govd/models"
"govd/util"
"net/http"
"time"
@ -11,22 +12,22 @@ import (
var accessToken *Token
func GetAccessToken() (*Token, error) {
func GetAccessToken(ctx *models.DownloadContext) (*Token, error) {
if accessToken == nil || time.Now().Unix() >= accessToken.ExpiresIn {
if err := RefreshAccessToken(); err != nil {
if err := RefreshAccessToken(ctx); err != nil {
return nil, err
}
}
return accessToken, nil
}
func RefreshAccessToken() error {
func RefreshAccessToken(ctx *models.DownloadContext) error {
req, err := http.NewRequest(http.MethodGet, tokenEndpoint, nil)
if err != nil {
return fmt.Errorf("failed to create request: %w", err)
}
req.Header.Set("User-Agent", util.ChromeUA)
res, err := session.Do(req)
res, err := ctx.Extractor.Client.Do(req)
if err != nil {
return fmt.Errorf("failed to send request: %w", err)
}

View file

@ -24,8 +24,7 @@ const (
)
var (
httpSession = util.GetHTTPSession()
baseHost = []string{
baseHost = []string{
"tiktok.com",
"vxtiktok.com",
"vm.tiktok.com",
@ -45,6 +44,7 @@ var VMExtractor = &models.Extractor{
URLPattern: regexp.MustCompile(`https:\/\/((?:vm|vt|www)\.)?(vx)?tiktok\.com\/(?:t\/)?(?P<id>[a-zA-Z0-9]+)`),
Host: baseHost,
IsRedirect: true,
Client: util.GetHTTPSession("tiktokvm"),
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
location, err := util.GetLocationURL(ctx.MatchedContentURL, "")
@ -64,6 +64,7 @@ var Extractor = &models.Extractor{
Category: enums.ExtractorCategorySocial,
URLPattern: regexp.MustCompile(`https?:\/\/((www|m)\.)?(vx)?tiktok\.com\/((?:embed|@[\w\.-]+)\/)?(v(ideo)?|p(hoto)?)\/(?P<id>[0-9]+)`),
Host: baseHost,
Client: util.GetHTTPSession("tiktok"),
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
mediaList, err := MediaListFromAPI(ctx)
@ -79,7 +80,7 @@ var Extractor = &models.Extractor{
func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
var mediaList []*models.Media
details, err := GetVideoAPI(ctx.MatchedContentID)
details, err := GetVideoAPI(ctx)
if err != nil {
return nil, fmt.Errorf("failed to get from api: %w", err)
}
@ -137,7 +138,8 @@ func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
return mediaList, nil
}
func GetVideoAPI(awemeID string) (*AwemeDetails, error) {
func GetVideoAPI(ctx *models.DownloadContext) (*AwemeDetails, error) {
awemeID := ctx.MatchedContentID
apiURL := fmt.Sprintf(
"https://%s/aweme/v1/multi/aweme/detail/",
apiHostname,
@ -161,7 +163,7 @@ func GetVideoAPI(awemeID string) (*AwemeDetails, error) {
req.Header.Set("Accept", "application/json")
req.Header.Set("X-Argus", "")
resp, err := httpSession.Do(req)
resp, err := ctx.Extractor.Client.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err)
}

View file

@ -18,16 +18,15 @@ const (
apiEndpoint = "https://x.com/i/api/graphql/zZXycP0V6H7m-2r0mOnFcA/TweetDetail"
)
var httpSession = util.GetHTTPSession()
var ShortExtractor = &models.Extractor{
Name: "Twitter (Short)",
CodeName: "twitter:short",
CodeName: "twitter_short",
Type: enums.ExtractorTypeSingle,
Category: enums.ExtractorCategorySocial,
URLPattern: regexp.MustCompile(`https?://t\.co/(?P<id>\w+)`),
Host: []string{"t.co"},
IsRedirect: true,
Client: util.GetHTTPSession("twitter_short"),
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
req, err := http.NewRequest(http.MethodGet, ctx.MatchedContentURL, nil)
@ -35,7 +34,7 @@ var ShortExtractor = &models.Extractor{
return nil, fmt.Errorf("failed to create req: %w", err)
}
req.Header.Set("User-Agent", util.ChromeUA)
res, err := httpSession.Do(req)
res, err := ctx.Extractor.Client.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err)
}
@ -66,6 +65,7 @@ var Extractor = &models.Extractor{
"vxx.com",
"vxtwitter.com",
},
Client: util.GetHTTPSession("twitter"),
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
mediaList, err := MediaListFromAPI(ctx)
@ -81,7 +81,7 @@ var Extractor = &models.Extractor{
func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
var mediaList []*models.Media
tweetData, err := GetTweetAPI(ctx.MatchedContentID)
tweetData, err := GetTweetAPI(ctx)
if err != nil {
return nil, fmt.Errorf("failed to get tweet data: %w", err)
}
@ -129,7 +129,8 @@ func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
return mediaList, nil
}
func GetTweetAPI(tweetID string) (*Tweet, error) {
func GetTweetAPI(ctx *models.DownloadContext) (*Tweet, error) {
tweetID := ctx.MatchedContentID
cookies, err := util.ParseCookieFile("twitter.txt")
if err != nil {
return nil, fmt.Errorf("failed to get cookies: %w", err)
@ -159,7 +160,7 @@ func GetTweetAPI(tweetID string) (*Tweet, error) {
}
req.URL.RawQuery = q.Encode()
resp, err := httpSession.Do(req)
resp, err := ctx.Extractor.Client.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err)
}