set configuration for each extractor

This commit is contained in:
stefanodvx 2025-04-20 12:19:14 +02:00
parent 6baa965534
commit 0a63df9ce6
19 changed files with 337 additions and 175 deletions

2
.gitignore vendored
View file

@ -7,6 +7,8 @@
old/ old/
.env .env
ext-cfg.yaml
.idea/ .idea/
downloads downloads

44
config/main.go Normal file
View file

@ -0,0 +1,44 @@
package config
import (
"fmt"
"os"
"govd/models"
"gopkg.in/yaml.v3"
)
var extractorConfigs map[string]*models.ExtractorConfig
func LoadExtractorConfigs() error {
extractorConfigs = make(map[string]*models.ExtractorConfig)
configPath := "ext-cfg.yaml"
_, err := os.Stat(configPath)
if os.IsNotExist(err) {
return nil
}
data, err := os.ReadFile(configPath)
if err != nil {
return fmt.Errorf("errore nella lettura del file di configurazione: %w", err)
}
var rawConfig map[string]*models.ExtractorConfig
if err := yaml.Unmarshal(data, &rawConfig); err != nil {
return fmt.Errorf("errore nella decodifica del file YAML: %w", err)
}
for codeName, config := range rawConfig {
extractorConfigs[codeName] = config
}
return nil
}
func GetExtractorConfig(codeName string) *models.ExtractorConfig {
if config, exists := extractorConfigs[codeName]; exists {
return config
}
return nil
}

5
ext-cfg-example.yaml Normal file
View file

@ -0,0 +1,5 @@
instagram_share:
edge_proxy: https://example.com
reddit:
https_proxy: https://example.com

View file

@ -16,8 +16,6 @@ import (
// feel free to open PR, if you want to // feel free to open PR, if you want to
// add support for the official Instagram API // add support for the official Instagram API
var httpSession = util.GetHTTPSession()
const ( const (
apiHostname = "api.igram.world" apiHostname = "api.igram.world"
apiKey = "aaeaf2805cea6abef3f9d2b6a666fce62fd9d612a43ab772bb50ce81455112e0" apiKey = "aaeaf2805cea6abef3f9d2b6a666fce62fd9d612a43ab772bb50ce81455112e0"
@ -39,6 +37,7 @@ var Extractor = &models.Extractor{
URLPattern: regexp.MustCompile(`https:\/\/(www\.)?instagram\.com\/(reel|p|tv)\/(?P<id>[a-zA-Z0-9_-]+)`), URLPattern: regexp.MustCompile(`https:\/\/(www\.)?instagram\.com\/(reel|p|tv)\/(?P<id>[a-zA-Z0-9_-]+)`),
Host: instagramHost, Host: instagramHost,
IsRedirect: false, IsRedirect: false,
Client: util.GetHTTPSession("instagram"),
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) { Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
mediaList, err := MediaListFromAPI(ctx, false) mediaList, err := MediaListFromAPI(ctx, false)
@ -50,12 +49,13 @@ var Extractor = &models.Extractor{
var StoriesExtractor = &models.Extractor{ var StoriesExtractor = &models.Extractor{
Name: "Instagram Stories", Name: "Instagram Stories",
CodeName: "instagram:stories", CodeName: "instagram_stories",
Type: enums.ExtractorTypeSingle, Type: enums.ExtractorTypeSingle,
Category: enums.ExtractorCategorySocial, Category: enums.ExtractorCategorySocial,
URLPattern: regexp.MustCompile(`https:\/\/(www\.)?instagram\.com\/stories\/[a-zA-Z0-9._]+\/(?P<id>\d+)`), URLPattern: regexp.MustCompile(`https:\/\/(www\.)?instagram\.com\/stories\/[a-zA-Z0-9._]+\/(?P<id>\d+)`),
Host: instagramHost, Host: instagramHost,
IsRedirect: false, IsRedirect: false,
Client: util.GetHTTPSession("instagram_stories"),
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) { Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
mediaList, err := MediaListFromAPI(ctx, true) mediaList, err := MediaListFromAPI(ctx, true)
@ -67,16 +67,15 @@ var StoriesExtractor = &models.Extractor{
var ShareURLExtractor = &models.Extractor{ var ShareURLExtractor = &models.Extractor{
Name: "Instagram Share URL", Name: "Instagram Share URL",
CodeName: "instagram:share", CodeName: "instagram_share",
Type: enums.ExtractorTypeSingle, Type: enums.ExtractorTypeSingle,
Category: enums.ExtractorCategorySocial, Category: enums.ExtractorCategorySocial,
URLPattern: regexp.MustCompile(`https?:\/\/(www\.)?instagram\.com\/share\/((reels?|video|s|p)\/)?(?P<id>[^\/\?]+)`), URLPattern: regexp.MustCompile(`https?:\/\/(www\.)?instagram\.com\/share\/((reels?|video|s|p)\/)?(?P<id>[^\/\?]+)`),
Host: instagramHost, Host: instagramHost,
IsRedirect: true, IsRedirect: true,
Client: util.GetHTTPSession("instagram_share"),
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) { Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
// temporary fix for public instances
edgeProxyClient := util.GetEdgeProxyClient()
req, err := http.NewRequest( req, err := http.NewRequest(
http.MethodGet, http.MethodGet,
ctx.MatchedContentURL, ctx.MatchedContentURL,
@ -85,7 +84,7 @@ var ShareURLExtractor = &models.Extractor{
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err) return nil, fmt.Errorf("failed to create request: %w", err)
} }
resp, err := edgeProxyClient.Do(req) resp, err := ctx.Extractor.Client.Do(req)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err) return nil, fmt.Errorf("failed to send request: %w", err)
} }
@ -103,13 +102,13 @@ func MediaListFromAPI(
) ([]*models.Media, error) { ) ([]*models.Media, error) {
var mediaList []*models.Media var mediaList []*models.Media
postURL := ctx.MatchedContentURL postURL := ctx.MatchedContentURL
details, err := GetVideoAPI(postURL) details, err := GetVideoAPI(ctx, postURL)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to get post: %w", err) return nil, fmt.Errorf("failed to get post: %w", err)
} }
var caption string var caption string
if !stories { if !stories {
caption, err = GetPostCaption(postURL) caption, err = GetPostCaption(ctx, postURL)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to get caption: %w", err) return nil, fmt.Errorf("failed to get caption: %w", err)
} }
@ -157,7 +156,10 @@ func MediaListFromAPI(
return mediaList, nil return mediaList, nil
} }
func GetVideoAPI(contentURL string) (*IGramResponse, error) { func GetVideoAPI(
ctx *models.DownloadContext,
contentURL string,
) (*IGramResponse, error) {
apiURL := fmt.Sprintf( apiURL := fmt.Sprintf(
"https://%s/api/convert", "https://%s/api/convert",
apiHostname, apiHostname,
@ -173,7 +175,7 @@ func GetVideoAPI(contentURL string) (*IGramResponse, error) {
req.Header.Set("Content-Type", "application/json") req.Header.Set("Content-Type", "application/json")
req.Header.Set("User-Agent", util.ChromeUA) req.Header.Set("User-Agent", util.ChromeUA)
resp, err := httpSession.Do(req) resp, err := ctx.Extractor.Client.Do(req)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err) return nil, fmt.Errorf("failed to send request: %w", err)
} }

View file

@ -4,6 +4,7 @@ import (
"crypto/sha256" "crypto/sha256"
"encoding/hex" "encoding/hex"
"fmt" "fmt"
"govd/models"
"govd/util" "govd/util"
"html" "html"
"io" "io"
@ -95,9 +96,9 @@ func GetCDNURL(contentURL string) (string, error) {
} }
func GetPostCaption( func GetPostCaption(
ctx *models.DownloadContext,
postURL string, postURL string,
) (string, error) { ) (string, error) {
edgeProxyClient := util.GetEdgeProxyClient()
req, err := http.NewRequest( req, err := http.NewRequest(
http.MethodGet, http.MethodGet,
postURL, postURL,
@ -121,7 +122,7 @@ func GetPostCaption(
req.Header.Set("Cache-Control", "no-cache") req.Header.Set("Cache-Control", "no-cache")
req.Header.Set("TE", "trailers") req.Header.Set("TE", "trailers")
resp, err := edgeProxyClient.Do(req) resp, err := ctx.Extractor.Client.Do(req)
if err != nil { if err != nil {
return "", fmt.Errorf("failed to send request: %w", err) return "", fmt.Errorf("failed to send request: %w", err)
} }

View file

@ -19,7 +19,6 @@ const (
) )
var ( var (
httpSession = util.GetHTTPSession()
validHost = []string{ validHost = []string{
"com", "fr", "de", "ch", "jp", "cl", "ca", "it", "co\\.uk", "nz", "ru", "com\\.au", "com", "fr", "de", "ch", "jp", "cl", "ca", "it", "co\\.uk", "nz", "ru", "com\\.au",
"at", "pt", "co\\.kr", "es", "com\\.mx", "dk", "ph", "th", "com\\.uy", "co", "nl", "at", "pt", "co\\.kr", "es", "com\\.mx", "dk", "ph", "th", "com\\.uy", "co", "nl",
@ -33,7 +32,7 @@ var (
var ShortExtractor = &models.Extractor{ var ShortExtractor = &models.Extractor{
Name: "Pinterest (Short)", Name: "Pinterest (Short)",
CodeName: "pinterest:short", CodeName: "pinterest_short",
Type: enums.ExtractorTypeSingle, Type: enums.ExtractorTypeSingle,
Category: enums.ExtractorCategorySocial, Category: enums.ExtractorCategorySocial,
URLPattern: regexp.MustCompile(pinValidURLPattern), URLPattern: regexp.MustCompile(pinValidURLPattern),
@ -45,6 +44,7 @@ var ShortExtractor = &models.Extractor{
return domains return domains
}(), }(),
IsRedirect: true, IsRedirect: true,
Client: util.GetHTTPSession("pinterest_short"),
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) { Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
shortURL := fmt.Sprintf(shortenerAPIFormat, ctx.MatchedContentID) shortURL := fmt.Sprintf(shortenerAPIFormat, ctx.MatchedContentID)
@ -72,6 +72,7 @@ var Extractor = &models.Extractor{
} }
return domains return domains
}(), }(),
Client: util.GetHTTPSession("pinterest"),
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) { Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
media, err := ExtractPinMedia(ctx) media, err := ExtractPinMedia(ctx)
@ -88,7 +89,7 @@ func ExtractPinMedia(ctx *models.DownloadContext) ([]*models.Media, error) {
pinID := ctx.MatchedContentID pinID := ctx.MatchedContentID
contentURL := ctx.MatchedContentURL contentURL := ctx.MatchedContentURL
pinData, err := GetPinData(pinID) pinData, err := GetPinData(ctx, pinID)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -158,7 +159,10 @@ func ExtractPinMedia(ctx *models.DownloadContext) ([]*models.Media, error) {
return nil, fmt.Errorf("no media found for pin ID: %s", pinID) return nil, fmt.Errorf("no media found for pin ID: %s", pinID)
} }
func GetPinData(pinID string) (*PinData, error) { func GetPinData(
ctx *models.DownloadContext,
pinID string,
) (*PinData, error) {
params := BuildPinRequestParams(pinID) params := BuildPinRequestParams(pinID)
req, err := http.NewRequest(http.MethodGet, pinResourceEndpoint, nil) req, err := http.NewRequest(http.MethodGet, pinResourceEndpoint, nil)
@ -175,7 +179,7 @@ func GetPinData(pinID string) (*PinData, error) {
// fix 403 error // fix 403 error
req.Header.Set("X-Pinterest-PWS-Handler", "www/[username].js") req.Header.Set("X-Pinterest-PWS-Handler", "www/[username].js")
resp, err := httpSession.Do(req) resp, err := ctx.Extractor.Client.Do(req)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err) return nil, fmt.Errorf("failed to send request: %w", err)
} }

View file

@ -13,7 +13,6 @@ import (
) )
var ( var (
httpSession = util.GetHTTPSession()
baseHost = []string{ baseHost = []string{
"reddit.com", "reddit.com",
"redditmedia.com", "redditmedia.com",
@ -24,12 +23,13 @@ var (
var ShortExtractor = &models.Extractor{ var ShortExtractor = &models.Extractor{
Name: "Reddit (Short)", Name: "Reddit (Short)",
CodeName: "reddit:short", CodeName: "reddit_short",
Type: enums.ExtractorTypeSingle, Type: enums.ExtractorTypeSingle,
Category: enums.ExtractorCategorySocial, Category: enums.ExtractorCategorySocial,
URLPattern: regexp.MustCompile(`https?://(?P<host>(?:\w+\.)?reddit(?:media)?\.com)/(?P<slug>(?:(?:r|user)/[^/]+/)?s/(?P<id>[^/?#&]+))`), URLPattern: regexp.MustCompile(`https?://(?P<host>(?:\w+\.)?reddit(?:media)?\.com)/(?P<slug>(?:(?:r|user)/[^/]+/)?s/(?P<id>[^/?#&]+))`),
Host: baseHost, Host: baseHost,
IsRedirect: true, IsRedirect: true,
Client: util.GetHTTPSession("reddit_short"),
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) { Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
req, err := http.NewRequest(http.MethodGet, ctx.MatchedContentURL, nil) req, err := http.NewRequest(http.MethodGet, ctx.MatchedContentURL, nil)
@ -46,7 +46,7 @@ var ShortExtractor = &models.Extractor{
req.AddCookie(cookie) req.AddCookie(cookie)
} }
res, err := httpSession.Do(req) res, err := ctx.Extractor.Client.Do(req)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err) return nil, fmt.Errorf("failed to send request: %w", err)
} }
@ -67,6 +67,7 @@ var Extractor = &models.Extractor{
Category: enums.ExtractorCategorySocial, Category: enums.ExtractorCategorySocial,
URLPattern: regexp.MustCompile(`https?://(?P<host>(?:\w+\.)?reddit(?:media)?\.com)/(?P<slug>(?:(?:r|user)/[^/]+/)?comments/(?P<id>[^/?#&]+))`), URLPattern: regexp.MustCompile(`https?://(?P<host>(?:\w+\.)?reddit(?:media)?\.com)/(?P<slug>(?:(?:r|user)/[^/]+/)?comments/(?P<id>[^/?#&]+))`),
Host: baseHost, Host: baseHost,
Client: util.GetHTTPSession("reddit"),
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) { Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
mediaList, err := MediaListFromAPI(ctx) mediaList, err := MediaListFromAPI(ctx)
@ -86,7 +87,7 @@ func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
contentID := ctx.MatchedContentID contentID := ctx.MatchedContentID
contentURL := ctx.MatchedContentURL contentURL := ctx.MatchedContentURL
manifest, err := GetRedditData(host, slug) manifest, err := GetRedditData(ctx, host, slug)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -222,7 +223,11 @@ func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
return mediaList, nil return mediaList, nil
} }
func GetRedditData(host string, slug string) (RedditResponse, error) { func GetRedditData(
ctx *models.DownloadContext,
host string,
slug string,
) (RedditResponse, error) {
url := fmt.Sprintf("https://%s/%s/.json", host, slug) url := fmt.Sprintf("https://%s/%s/.json", host, slug)
req, err := http.NewRequest(http.MethodGet, url, nil) req, err := http.NewRequest(http.MethodGet, url, nil)
@ -239,7 +244,7 @@ func GetRedditData(host string, slug string) (RedditResponse, error) {
req.AddCookie(cookie) req.AddCookie(cookie)
} }
res, err := httpSession.Do(req) res, err := ctx.Extractor.Client.Do(req)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err) return nil, fmt.Errorf("failed to send request: %w", err)
} }
@ -252,7 +257,7 @@ func GetRedditData(host string, slug string) (RedditResponse, error) {
altHost = "www.reddit.com" altHost = "www.reddit.com"
} }
return GetRedditData(altHost, slug) return GetRedditData(ctx, altHost, slug)
} }
var response RedditResponse var response RedditResponse

View file

@ -18,8 +18,6 @@ const (
) )
var ( var (
session = util.GetHTTPSession()
baseApiHeaders = map[string]string{ baseApiHeaders = map[string]string{
"referer": "https://www.redgifs.com/", "referer": "https://www.redgifs.com/",
"origin": "https://www.redgifs.com", "origin": "https://www.redgifs.com",
@ -37,6 +35,7 @@ var Extractor = &models.Extractor{
"redgifs.com", "redgifs.com",
"thumbs2.redgifs.com", "thumbs2.redgifs.com",
}, },
Client: util.GetHTTPSession("redgifs"),
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) { Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
mediaList, err := MediaListFromAPI(ctx) mediaList, err := MediaListFromAPI(ctx)
@ -52,7 +51,7 @@ var Extractor = &models.Extractor{
func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) { func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
var mediaList []*models.Media var mediaList []*models.Media
response, err := GetVideo(ctx.MatchedContentID) response, err := GetVideo(ctx)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to get from api: %w", err) return nil, fmt.Errorf("failed to get from api: %w", err)
} }
@ -116,13 +115,14 @@ func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
return mediaList, nil return mediaList, nil
} }
func GetVideo(videoID string) (*Response, error) { func GetVideo(ctx *models.DownloadContext) (*Response, error) {
videoID := ctx.MatchedContentID
url := videoEndpoint + videoID + "?views=true" url := videoEndpoint + videoID + "?views=true"
req, err := http.NewRequest(http.MethodGet, url, nil) req, err := http.NewRequest(http.MethodGet, url, nil)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err) return nil, fmt.Errorf("failed to create request: %w", err)
} }
token, err := GetAccessToken() token, err := GetAccessToken(ctx)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to get access token: %w", err) return nil, fmt.Errorf("failed to get access token: %w", err)
} }
@ -132,7 +132,7 @@ func GetVideo(videoID string) (*Response, error) {
for k, v := range baseApiHeaders { for k, v := range baseApiHeaders {
req.Header.Set(k, v) req.Header.Set(k, v)
} }
res, err := session.Do(req) res, err := ctx.Extractor.Client.Do(req)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err) return nil, fmt.Errorf("failed to send request: %w", err)
} }

View file

@ -2,6 +2,7 @@ package redgifs
import ( import (
"fmt" "fmt"
"govd/models"
"govd/util" "govd/util"
"net/http" "net/http"
"time" "time"
@ -11,22 +12,22 @@ import (
var accessToken *Token var accessToken *Token
func GetAccessToken() (*Token, error) { func GetAccessToken(ctx *models.DownloadContext) (*Token, error) {
if accessToken == nil || time.Now().Unix() >= accessToken.ExpiresIn { if accessToken == nil || time.Now().Unix() >= accessToken.ExpiresIn {
if err := RefreshAccessToken(); err != nil { if err := RefreshAccessToken(ctx); err != nil {
return nil, err return nil, err
} }
} }
return accessToken, nil return accessToken, nil
} }
func RefreshAccessToken() error { func RefreshAccessToken(ctx *models.DownloadContext) error {
req, err := http.NewRequest(http.MethodGet, tokenEndpoint, nil) req, err := http.NewRequest(http.MethodGet, tokenEndpoint, nil)
if err != nil { if err != nil {
return fmt.Errorf("failed to create request: %w", err) return fmt.Errorf("failed to create request: %w", err)
} }
req.Header.Set("User-Agent", util.ChromeUA) req.Header.Set("User-Agent", util.ChromeUA)
res, err := session.Do(req) res, err := ctx.Extractor.Client.Do(req)
if err != nil { if err != nil {
return fmt.Errorf("failed to send request: %w", err) return fmt.Errorf("failed to send request: %w", err)
} }

View file

@ -24,7 +24,6 @@ const (
) )
var ( var (
httpSession = util.GetHTTPSession()
baseHost = []string{ baseHost = []string{
"tiktok.com", "tiktok.com",
"vxtiktok.com", "vxtiktok.com",
@ -45,6 +44,7 @@ var VMExtractor = &models.Extractor{
URLPattern: regexp.MustCompile(`https:\/\/((?:vm|vt|www)\.)?(vx)?tiktok\.com\/(?:t\/)?(?P<id>[a-zA-Z0-9]+)`), URLPattern: regexp.MustCompile(`https:\/\/((?:vm|vt|www)\.)?(vx)?tiktok\.com\/(?:t\/)?(?P<id>[a-zA-Z0-9]+)`),
Host: baseHost, Host: baseHost,
IsRedirect: true, IsRedirect: true,
Client: util.GetHTTPSession("tiktokvm"),
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) { Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
location, err := util.GetLocationURL(ctx.MatchedContentURL, "") location, err := util.GetLocationURL(ctx.MatchedContentURL, "")
@ -64,6 +64,7 @@ var Extractor = &models.Extractor{
Category: enums.ExtractorCategorySocial, Category: enums.ExtractorCategorySocial,
URLPattern: regexp.MustCompile(`https?:\/\/((www|m)\.)?(vx)?tiktok\.com\/((?:embed|@[\w\.-]+)\/)?(v(ideo)?|p(hoto)?)\/(?P<id>[0-9]+)`), URLPattern: regexp.MustCompile(`https?:\/\/((www|m)\.)?(vx)?tiktok\.com\/((?:embed|@[\w\.-]+)\/)?(v(ideo)?|p(hoto)?)\/(?P<id>[0-9]+)`),
Host: baseHost, Host: baseHost,
Client: util.GetHTTPSession("tiktok"),
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) { Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
mediaList, err := MediaListFromAPI(ctx) mediaList, err := MediaListFromAPI(ctx)
@ -79,7 +80,7 @@ var Extractor = &models.Extractor{
func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) { func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
var mediaList []*models.Media var mediaList []*models.Media
details, err := GetVideoAPI(ctx.MatchedContentID) details, err := GetVideoAPI(ctx)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to get from api: %w", err) return nil, fmt.Errorf("failed to get from api: %w", err)
} }
@ -137,7 +138,8 @@ func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
return mediaList, nil return mediaList, nil
} }
func GetVideoAPI(awemeID string) (*AwemeDetails, error) { func GetVideoAPI(ctx *models.DownloadContext) (*AwemeDetails, error) {
awemeID := ctx.MatchedContentID
apiURL := fmt.Sprintf( apiURL := fmt.Sprintf(
"https://%s/aweme/v1/multi/aweme/detail/", "https://%s/aweme/v1/multi/aweme/detail/",
apiHostname, apiHostname,
@ -161,7 +163,7 @@ func GetVideoAPI(awemeID string) (*AwemeDetails, error) {
req.Header.Set("Accept", "application/json") req.Header.Set("Accept", "application/json")
req.Header.Set("X-Argus", "") req.Header.Set("X-Argus", "")
resp, err := httpSession.Do(req) resp, err := ctx.Extractor.Client.Do(req)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err) return nil, fmt.Errorf("failed to send request: %w", err)
} }

View file

@ -18,16 +18,15 @@ const (
apiEndpoint = "https://x.com/i/api/graphql/zZXycP0V6H7m-2r0mOnFcA/TweetDetail" apiEndpoint = "https://x.com/i/api/graphql/zZXycP0V6H7m-2r0mOnFcA/TweetDetail"
) )
var httpSession = util.GetHTTPSession()
var ShortExtractor = &models.Extractor{ var ShortExtractor = &models.Extractor{
Name: "Twitter (Short)", Name: "Twitter (Short)",
CodeName: "twitter:short", CodeName: "twitter_short",
Type: enums.ExtractorTypeSingle, Type: enums.ExtractorTypeSingle,
Category: enums.ExtractorCategorySocial, Category: enums.ExtractorCategorySocial,
URLPattern: regexp.MustCompile(`https?://t\.co/(?P<id>\w+)`), URLPattern: regexp.MustCompile(`https?://t\.co/(?P<id>\w+)`),
Host: []string{"t.co"}, Host: []string{"t.co"},
IsRedirect: true, IsRedirect: true,
Client: util.GetHTTPSession("twitter_short"),
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) { Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
req, err := http.NewRequest(http.MethodGet, ctx.MatchedContentURL, nil) req, err := http.NewRequest(http.MethodGet, ctx.MatchedContentURL, nil)
@ -35,7 +34,7 @@ var ShortExtractor = &models.Extractor{
return nil, fmt.Errorf("failed to create req: %w", err) return nil, fmt.Errorf("failed to create req: %w", err)
} }
req.Header.Set("User-Agent", util.ChromeUA) req.Header.Set("User-Agent", util.ChromeUA)
res, err := httpSession.Do(req) res, err := ctx.Extractor.Client.Do(req)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err) return nil, fmt.Errorf("failed to send request: %w", err)
} }
@ -66,6 +65,7 @@ var Extractor = &models.Extractor{
"vxx.com", "vxx.com",
"vxtwitter.com", "vxtwitter.com",
}, },
Client: util.GetHTTPSession("twitter"),
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) { Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
mediaList, err := MediaListFromAPI(ctx) mediaList, err := MediaListFromAPI(ctx)
@ -81,7 +81,7 @@ var Extractor = &models.Extractor{
func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) { func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
var mediaList []*models.Media var mediaList []*models.Media
tweetData, err := GetTweetAPI(ctx.MatchedContentID) tweetData, err := GetTweetAPI(ctx)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to get tweet data: %w", err) return nil, fmt.Errorf("failed to get tweet data: %w", err)
} }
@ -129,7 +129,8 @@ func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
return mediaList, nil return mediaList, nil
} }
func GetTweetAPI(tweetID string) (*Tweet, error) { func GetTweetAPI(ctx *models.DownloadContext) (*Tweet, error) {
tweetID := ctx.MatchedContentID
cookies, err := util.ParseCookieFile("twitter.txt") cookies, err := util.ParseCookieFile("twitter.txt")
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to get cookies: %w", err) return nil, fmt.Errorf("failed to get cookies: %w", err)
@ -159,7 +160,7 @@ func GetTweetAPI(tweetID string) (*Tweet, error) {
} }
req.URL.RawQuery = q.Encode() req.URL.RawQuery = q.Encode()
resp, err := httpSession.Do(req) resp, err := ctx.Extractor.Client.Do(req)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err) return nil, fmt.Errorf("failed to send request: %w", err)
} }

1
go.mod
View file

@ -26,6 +26,7 @@ require (
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
golang.org/x/arch v0.0.0-20210923205945-b76863e36670 // indirect golang.org/x/arch v0.0.0-20210923205945-b76863e36670 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
) )
require ( require (

View file

@ -3,6 +3,7 @@ package main
import ( import (
"fmt" "fmt"
"govd/bot" "govd/bot"
"govd/config"
"govd/database" "govd/database"
"govd/util" "govd/util"
"log" "log"
@ -20,6 +21,10 @@ func main() {
if err != nil { if err != nil {
log.Fatal("error loading .env file") log.Fatal("error loading .env file")
} }
err = config.LoadExtractorConfigs()
if err != nil {
log.Fatalf("error loading extractor configs: %v", err)
}
profilerPort, err := strconv.Atoi(os.Getenv("PROFILER_PORT")) profilerPort, err := strconv.Atoi(os.Getenv("PROFILER_PORT"))
if err == nil && profilerPort > 0 { if err == nil && profilerPort > 0 {

View file

@ -14,6 +14,7 @@ type Extractor struct {
Host []string Host []string
IsDRM bool IsDRM bool
IsRedirect bool IsRedirect bool
Client HTTPClient
Run func(*DownloadContext) (*ExtractorResponse, error) Run func(*DownloadContext) (*ExtractorResponse, error)
} }
@ -33,3 +34,10 @@ func (extractor *Extractor) NewMedia(
ExtractorCodeName: extractor.CodeName, ExtractorCodeName: extractor.CodeName,
} }
} }
type ExtractorConfig struct {
HTTPProxy string `yaml:"http_proxy"`
HTTPSProxy string `yaml:"https_proxy"`
NoProxy string `yaml:"no_proxy"`
EdgeProxyURL string `yaml:"edge_proxy_url"`
}

7
models/http.go Normal file
View file

@ -0,0 +1,7 @@
package models
import "net/http"
type HTTPClient interface {
Do(req *http.Request) (*http.Response, error)
}

View file

@ -21,6 +21,8 @@ import (
"github.com/google/uuid" "github.com/google/uuid"
) )
var downloadHTTPSession = GetDefaultHTTPSession()
func DefaultConfig() *models.DownloadConfig { func DefaultConfig() *models.DownloadConfig {
downloadsDir := os.Getenv("DOWNLOADS_DIR") downloadsDir := os.Getenv("DOWNLOADS_DIR")
if downloadsDir == "" { if downloadsDir == "" {
@ -171,7 +173,7 @@ func downloadInMemory(
return nil, fmt.Errorf("failed to create request: %w", err) return nil, fmt.Errorf("failed to create request: %w", err)
} }
resp, err := httpSession.Do(req) resp, err := downloadHTTPSession.Do(req)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to download file: %w", err) return nil, fmt.Errorf("failed to download file: %w", err)
} }
@ -362,7 +364,7 @@ func getFileSize(ctx context.Context, fileURL string, timeout time.Duration) (in
return 0, fmt.Errorf("failed to create request: %w", err) return 0, fmt.Errorf("failed to create request: %w", err)
} }
resp, err := httpSession.Do(req) resp, err := downloadHTTPSession.Do(req)
if err != nil { if err != nil {
return 0, fmt.Errorf("failed to get file size: %w", err) return 0, fmt.Errorf("failed to get file size: %w", err)
} }
@ -419,7 +421,7 @@ func downloadChunk(
} }
req.Header.Add("Range", fmt.Sprintf("bytes=%d-%d", chunk[0], chunk[1])) req.Header.Add("Range", fmt.Sprintf("bytes=%d-%d", chunk[0], chunk[1]))
resp, err := httpSession.Do(req) resp, err := downloadHTTPSession.Do(req)
if err != nil { if err != nil {
return nil, fmt.Errorf("download failed: %w", err) return nil, fmt.Errorf("download failed: %w", err)
} }

View file

@ -1,111 +0,0 @@
package util
import (
"bytes"
"encoding/json"
"fmt"
"govd/models"
"io"
"net/http"
"net/url"
"os"
"sync"
"time"
)
var (
edgeProxyClient *EdgeProxyClient
edgeProxyClientOnce sync.Once
)
type EdgeProxyClient struct {
*http.Client
}
func GetEdgeProxyClient() *EdgeProxyClient {
edgeProxyClientOnce.Do(func() {
edgeProxyClient = &EdgeProxyClient{
Client: &http.Client{
Transport: baseTransport,
Timeout: 60 * time.Second,
},
}
})
return edgeProxyClient
}
func (c *EdgeProxyClient) Do(req *http.Request) (*http.Response, error) {
proxyURL := os.Getenv("EDGE_PROXY_URL")
if proxyURL == "" {
return nil, fmt.Errorf("EDGE_PROXY_URL environment variable is not set")
}
targetURL := req.URL.String()
encodedURL := url.QueryEscape(targetURL)
proxyURLWithParam := proxyURL + "?url=" + encodedURL
var bodyBytes []byte
var err error
if req.Body != nil {
bodyBytes, err = io.ReadAll(req.Body)
if err != nil {
return nil, fmt.Errorf("error reading request body: %w", err)
}
req.Body.Close()
req.Body = io.NopCloser(bytes.NewBuffer(bodyBytes))
}
proxyReq, err := http.NewRequest(
req.Method,
proxyURLWithParam,
bytes.NewBuffer(bodyBytes),
)
if err != nil {
return nil, fmt.Errorf("error creating proxy request: %w", err)
}
for name, values := range req.Header {
for _, value := range values {
proxyReq.Header.Add(name, value)
}
}
proxyResp, err := c.Client.Do(proxyReq)
if err != nil {
return nil, fmt.Errorf("proxy request failed: %w", err)
}
defer proxyResp.Body.Close()
body, err := io.ReadAll(proxyResp.Body)
if err != nil {
return nil, fmt.Errorf("error reading proxy response: %w", err)
}
var response models.ProxyResponse
if err := json.Unmarshal(body, &response); err != nil {
return nil, fmt.Errorf("error parsing proxy response: %w", err)
}
resp := &http.Response{
StatusCode: response.StatusCode,
Status: fmt.Sprintf("%d %s", response.StatusCode, http.StatusText(response.StatusCode)),
Body: io.NopCloser(bytes.NewBufferString(response.Text)),
Header: make(http.Header),
Request: req,
}
parsedResponseURL, err := url.Parse(response.URL)
if err != nil {
return nil, fmt.Errorf("error parsing response URL: %w", err)
}
resp.Request.URL = parsedResponseURL
for name, value := range response.Headers {
resp.Header.Set(name, value)
}
for _, cookie := range response.Cookies {
resp.Header.Add("Set-Cookie", cookie)
}
return resp, nil
}

View file

@ -1,16 +1,46 @@
package util package util
import ( import (
"bytes"
"encoding/json"
"fmt"
"govd/config"
"govd/models"
"io"
"log"
"net" "net"
"net/http" "net/http"
"net/url"
"strings"
"sync" "sync"
"time" "time"
) )
type EdgeProxyClient struct {
*http.Client
proxyURL string
}
var ( var (
httpSession *http.Client httpSession *http.Client
httpSessionOnce sync.Once httpSessionOnce sync.Once
baseTransport = &http.Transport{
extractorsHttpSession = make(map[string]models.HTTPClient)
)
func GetDefaultHTTPSession() *http.Client {
httpSessionOnce.Do(func() {
httpSession = &http.Client{
Transport: GetBaseTransport(),
Timeout: 60 * time.Second,
}
})
return httpSession
}
func GetBaseTransport() *http.Transport {
return &http.Transport{
Proxy: http.ProxyFromEnvironment, Proxy: http.ProxyFromEnvironment,
DialContext: (&net.Dialer{ DialContext: (&net.Dialer{
Timeout: 30 * time.Second, Timeout: 30 * time.Second,
@ -26,14 +56,167 @@ var (
ResponseHeaderTimeout: 10 * time.Second, ResponseHeaderTimeout: 10 * time.Second,
DisableCompression: false, DisableCompression: false,
} }
) }
func GetHTTPSession() *http.Client { func GetHTTPSession(extractor string) models.HTTPClient {
httpSessionOnce.Do(func() { if client, ok := extractorsHttpSession[extractor]; ok {
httpSession = &http.Client{ return client
Transport: baseTransport, }
cfg := config.GetExtractorConfig(extractor)
if cfg == nil {
return GetDefaultHTTPSession()
}
if cfg.EdgeProxyURL != "" {
client := GetEdgeProxyClient(cfg.EdgeProxyURL)
extractorsHttpSession[extractor] = client
return client
}
transport := GetBaseTransport()
client := &http.Client{
Transport: transport,
Timeout: 60 * time.Second, Timeout: 60 * time.Second,
} }
})
return httpSession if cfg.HTTPProxy == "" && cfg.HTTPSProxy == "" {
extractorsHttpSession[extractor] = client
return client
}
var httpProxyURL, httpsProxyURL *url.URL
var err error
if cfg.HTTPProxy != "" {
if httpProxyURL, err = url.Parse(cfg.HTTPProxy); err != nil {
log.Printf("warning: invalid HTTP proxy URL '%s': %v\n", cfg.HTTPProxy, err)
}
}
if cfg.HTTPSProxy != "" {
if httpsProxyURL, err = url.Parse(cfg.HTTPSProxy); err != nil {
log.Printf("warning: invalid HTTPS proxy URL '%s': %v\n", cfg.HTTPSProxy, err)
}
}
if httpProxyURL != nil || httpsProxyURL != nil {
noProxyList := strings.Split(cfg.NoProxy, ",")
for i := range noProxyList {
noProxyList[i] = strings.TrimSpace(noProxyList[i])
}
transport.Proxy = func(req *http.Request) (*url.URL, error) {
if cfg.NoProxy != "" {
host := req.URL.Hostname()
for _, p := range noProxyList {
if p == "" {
continue
}
if p == host || (strings.HasPrefix(p, ".") && strings.HasSuffix(host, p)) {
return nil, nil
}
}
}
if req.URL.Scheme == "https" && httpsProxyURL != nil {
return httpsProxyURL, nil
}
if req.URL.Scheme == "http" && httpProxyURL != nil {
return httpProxyURL, nil
}
if httpsProxyURL != nil {
return httpsProxyURL, nil
}
return httpProxyURL, nil
}
}
extractorsHttpSession[extractor] = client
return client
}
func GetEdgeProxyClient(proxyURL string) *EdgeProxyClient {
edgeProxyClient := &EdgeProxyClient{
Client: &http.Client{
Transport: GetBaseTransport(),
Timeout: 60 * time.Second,
},
proxyURL: proxyURL,
}
return edgeProxyClient
}
func (c *EdgeProxyClient) Do(req *http.Request) (*http.Response, error) {
if c.proxyURL == "" {
return nil, fmt.Errorf("proxy URL is not set")
}
targetURL := req.URL.String()
encodedURL := url.QueryEscape(targetURL)
proxyURLWithParam := c.proxyURL + "?url=" + encodedURL
var bodyBytes []byte
var err error
if req.Body != nil {
bodyBytes, err = io.ReadAll(req.Body)
if err != nil {
return nil, fmt.Errorf("error reading request body: %w", err)
}
req.Body.Close()
req.Body = io.NopCloser(bytes.NewBuffer(bodyBytes))
}
proxyReq, err := http.NewRequest(
req.Method,
proxyURLWithParam,
bytes.NewBuffer(bodyBytes),
)
if err != nil {
return nil, fmt.Errorf("error creating proxy request: %w", err)
}
for name, values := range req.Header {
for _, value := range values {
proxyReq.Header.Add(name, value)
}
}
proxyResp, err := c.Client.Do(proxyReq)
if err != nil {
return nil, fmt.Errorf("proxy request failed: %w", err)
}
defer proxyResp.Body.Close()
body, err := io.ReadAll(proxyResp.Body)
if err != nil {
return nil, fmt.Errorf("error reading proxy response: %w", err)
}
var response models.ProxyResponse
if err := json.Unmarshal(body, &response); err != nil {
return nil, fmt.Errorf("error parsing proxy response: %w", err)
}
resp := &http.Response{
StatusCode: response.StatusCode,
Status: fmt.Sprintf("%d %s", response.StatusCode, http.StatusText(response.StatusCode)),
Body: io.NopCloser(bytes.NewBufferString(response.Text)),
Header: make(http.Header),
Request: req,
}
parsedResponseURL, err := url.Parse(response.URL)
if err != nil {
return nil, fmt.Errorf("error parsing response URL: %w", err)
}
resp.Request.URL = parsedResponseURL
for name, value := range response.Headers {
resp.Header.Set(name, value)
}
for _, cookie := range response.Cookies {
resp.Header.Add("Set-Cookie", cookie)
}
return resp, nil
} }

View file

@ -29,7 +29,7 @@ func GetLocationURL(
userAgent = ChromeUA userAgent = ChromeUA
} }
req.Header.Set("User-Agent", userAgent) req.Header.Set("User-Agent", userAgent)
session := GetHTTPSession() session := GetDefaultHTTPSession()
resp, err := session.Do(req) resp, err := session.Do(req)
if err != nil { if err != nil {
return "", fmt.Errorf("failed to send request: %w", err) return "", fmt.Errorf("failed to send request: %w", err)