set configuration for each extractor
This commit is contained in:
parent
6baa965534
commit
0a63df9ce6
19 changed files with 337 additions and 175 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -7,6 +7,8 @@
|
|||
old/
|
||||
|
||||
.env
|
||||
ext-cfg.yaml
|
||||
|
||||
.idea/
|
||||
|
||||
downloads
|
||||
|
|
44
config/main.go
Normal file
44
config/main.go
Normal file
|
@ -0,0 +1,44 @@
|
|||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"govd/models"
|
||||
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
var extractorConfigs map[string]*models.ExtractorConfig
|
||||
|
||||
func LoadExtractorConfigs() error {
|
||||
extractorConfigs = make(map[string]*models.ExtractorConfig)
|
||||
configPath := "ext-cfg.yaml"
|
||||
|
||||
_, err := os.Stat(configPath)
|
||||
if os.IsNotExist(err) {
|
||||
return nil
|
||||
}
|
||||
data, err := os.ReadFile(configPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("errore nella lettura del file di configurazione: %w", err)
|
||||
}
|
||||
|
||||
var rawConfig map[string]*models.ExtractorConfig
|
||||
|
||||
if err := yaml.Unmarshal(data, &rawConfig); err != nil {
|
||||
return fmt.Errorf("errore nella decodifica del file YAML: %w", err)
|
||||
}
|
||||
for codeName, config := range rawConfig {
|
||||
extractorConfigs[codeName] = config
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func GetExtractorConfig(codeName string) *models.ExtractorConfig {
|
||||
if config, exists := extractorConfigs[codeName]; exists {
|
||||
return config
|
||||
}
|
||||
return nil
|
||||
}
|
5
ext-cfg-example.yaml
Normal file
5
ext-cfg-example.yaml
Normal file
|
@ -0,0 +1,5 @@
|
|||
instagram_share:
|
||||
edge_proxy: https://example.com
|
||||
|
||||
reddit:
|
||||
https_proxy: https://example.com
|
|
@ -16,8 +16,6 @@ import (
|
|||
// feel free to open PR, if you want to
|
||||
// add support for the official Instagram API
|
||||
|
||||
var httpSession = util.GetHTTPSession()
|
||||
|
||||
const (
|
||||
apiHostname = "api.igram.world"
|
||||
apiKey = "aaeaf2805cea6abef3f9d2b6a666fce62fd9d612a43ab772bb50ce81455112e0"
|
||||
|
@ -39,6 +37,7 @@ var Extractor = &models.Extractor{
|
|||
URLPattern: regexp.MustCompile(`https:\/\/(www\.)?instagram\.com\/(reel|p|tv)\/(?P<id>[a-zA-Z0-9_-]+)`),
|
||||
Host: instagramHost,
|
||||
IsRedirect: false,
|
||||
Client: util.GetHTTPSession("instagram"),
|
||||
|
||||
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
||||
mediaList, err := MediaListFromAPI(ctx, false)
|
||||
|
@ -50,12 +49,13 @@ var Extractor = &models.Extractor{
|
|||
|
||||
var StoriesExtractor = &models.Extractor{
|
||||
Name: "Instagram Stories",
|
||||
CodeName: "instagram:stories",
|
||||
CodeName: "instagram_stories",
|
||||
Type: enums.ExtractorTypeSingle,
|
||||
Category: enums.ExtractorCategorySocial,
|
||||
URLPattern: regexp.MustCompile(`https:\/\/(www\.)?instagram\.com\/stories\/[a-zA-Z0-9._]+\/(?P<id>\d+)`),
|
||||
Host: instagramHost,
|
||||
IsRedirect: false,
|
||||
Client: util.GetHTTPSession("instagram_stories"),
|
||||
|
||||
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
||||
mediaList, err := MediaListFromAPI(ctx, true)
|
||||
|
@ -67,16 +67,15 @@ var StoriesExtractor = &models.Extractor{
|
|||
|
||||
var ShareURLExtractor = &models.Extractor{
|
||||
Name: "Instagram Share URL",
|
||||
CodeName: "instagram:share",
|
||||
CodeName: "instagram_share",
|
||||
Type: enums.ExtractorTypeSingle,
|
||||
Category: enums.ExtractorCategorySocial,
|
||||
URLPattern: regexp.MustCompile(`https?:\/\/(www\.)?instagram\.com\/share\/((reels?|video|s|p)\/)?(?P<id>[^\/\?]+)`),
|
||||
Host: instagramHost,
|
||||
IsRedirect: true,
|
||||
Client: util.GetHTTPSession("instagram_share"),
|
||||
|
||||
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
||||
// temporary fix for public instances
|
||||
edgeProxyClient := util.GetEdgeProxyClient()
|
||||
req, err := http.NewRequest(
|
||||
http.MethodGet,
|
||||
ctx.MatchedContentURL,
|
||||
|
@ -85,7 +84,7 @@ var ShareURLExtractor = &models.Extractor{
|
|||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
resp, err := edgeProxyClient.Do(req)
|
||||
resp, err := ctx.Extractor.Client.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to send request: %w", err)
|
||||
}
|
||||
|
@ -103,13 +102,13 @@ func MediaListFromAPI(
|
|||
) ([]*models.Media, error) {
|
||||
var mediaList []*models.Media
|
||||
postURL := ctx.MatchedContentURL
|
||||
details, err := GetVideoAPI(postURL)
|
||||
details, err := GetVideoAPI(ctx, postURL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get post: %w", err)
|
||||
}
|
||||
var caption string
|
||||
if !stories {
|
||||
caption, err = GetPostCaption(postURL)
|
||||
caption, err = GetPostCaption(ctx, postURL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get caption: %w", err)
|
||||
}
|
||||
|
@ -157,7 +156,10 @@ func MediaListFromAPI(
|
|||
return mediaList, nil
|
||||
}
|
||||
|
||||
func GetVideoAPI(contentURL string) (*IGramResponse, error) {
|
||||
func GetVideoAPI(
|
||||
ctx *models.DownloadContext,
|
||||
contentURL string,
|
||||
) (*IGramResponse, error) {
|
||||
apiURL := fmt.Sprintf(
|
||||
"https://%s/api/convert",
|
||||
apiHostname,
|
||||
|
@ -173,7 +175,7 @@ func GetVideoAPI(contentURL string) (*IGramResponse, error) {
|
|||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("User-Agent", util.ChromeUA)
|
||||
|
||||
resp, err := httpSession.Do(req)
|
||||
resp, err := ctx.Extractor.Client.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to send request: %w", err)
|
||||
}
|
||||
|
|
|
@ -4,6 +4,7 @@ import (
|
|||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"govd/models"
|
||||
"govd/util"
|
||||
"html"
|
||||
"io"
|
||||
|
@ -95,9 +96,9 @@ func GetCDNURL(contentURL string) (string, error) {
|
|||
}
|
||||
|
||||
func GetPostCaption(
|
||||
ctx *models.DownloadContext,
|
||||
postURL string,
|
||||
) (string, error) {
|
||||
edgeProxyClient := util.GetEdgeProxyClient()
|
||||
req, err := http.NewRequest(
|
||||
http.MethodGet,
|
||||
postURL,
|
||||
|
@ -121,7 +122,7 @@ func GetPostCaption(
|
|||
req.Header.Set("Cache-Control", "no-cache")
|
||||
req.Header.Set("TE", "trailers")
|
||||
|
||||
resp, err := edgeProxyClient.Do(req)
|
||||
resp, err := ctx.Extractor.Client.Do(req)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to send request: %w", err)
|
||||
}
|
||||
|
|
|
@ -19,8 +19,7 @@ const (
|
|||
)
|
||||
|
||||
var (
|
||||
httpSession = util.GetHTTPSession()
|
||||
validHost = []string{
|
||||
validHost = []string{
|
||||
"com", "fr", "de", "ch", "jp", "cl", "ca", "it", "co\\.uk", "nz", "ru", "com\\.au",
|
||||
"at", "pt", "co\\.kr", "es", "com\\.mx", "dk", "ph", "th", "com\\.uy", "co", "nl",
|
||||
"info", "kr", "ie", "vn", "com\\.vn", "ec", "mx", "in", "pe", "co\\.at", "hu",
|
||||
|
@ -33,7 +32,7 @@ var (
|
|||
|
||||
var ShortExtractor = &models.Extractor{
|
||||
Name: "Pinterest (Short)",
|
||||
CodeName: "pinterest:short",
|
||||
CodeName: "pinterest_short",
|
||||
Type: enums.ExtractorTypeSingle,
|
||||
Category: enums.ExtractorCategorySocial,
|
||||
URLPattern: regexp.MustCompile(pinValidURLPattern),
|
||||
|
@ -45,6 +44,7 @@ var ShortExtractor = &models.Extractor{
|
|||
return domains
|
||||
}(),
|
||||
IsRedirect: true,
|
||||
Client: util.GetHTTPSession("pinterest_short"),
|
||||
|
||||
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
||||
shortURL := fmt.Sprintf(shortenerAPIFormat, ctx.MatchedContentID)
|
||||
|
@ -72,6 +72,7 @@ var Extractor = &models.Extractor{
|
|||
}
|
||||
return domains
|
||||
}(),
|
||||
Client: util.GetHTTPSession("pinterest"),
|
||||
|
||||
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
||||
media, err := ExtractPinMedia(ctx)
|
||||
|
@ -88,7 +89,7 @@ func ExtractPinMedia(ctx *models.DownloadContext) ([]*models.Media, error) {
|
|||
pinID := ctx.MatchedContentID
|
||||
contentURL := ctx.MatchedContentURL
|
||||
|
||||
pinData, err := GetPinData(pinID)
|
||||
pinData, err := GetPinData(ctx, pinID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -158,7 +159,10 @@ func ExtractPinMedia(ctx *models.DownloadContext) ([]*models.Media, error) {
|
|||
return nil, fmt.Errorf("no media found for pin ID: %s", pinID)
|
||||
}
|
||||
|
||||
func GetPinData(pinID string) (*PinData, error) {
|
||||
func GetPinData(
|
||||
ctx *models.DownloadContext,
|
||||
pinID string,
|
||||
) (*PinData, error) {
|
||||
params := BuildPinRequestParams(pinID)
|
||||
|
||||
req, err := http.NewRequest(http.MethodGet, pinResourceEndpoint, nil)
|
||||
|
@ -175,7 +179,7 @@ func GetPinData(pinID string) (*PinData, error) {
|
|||
// fix 403 error
|
||||
req.Header.Set("X-Pinterest-PWS-Handler", "www/[username].js")
|
||||
|
||||
resp, err := httpSession.Do(req)
|
||||
resp, err := ctx.Extractor.Client.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to send request: %w", err)
|
||||
}
|
||||
|
|
|
@ -13,8 +13,7 @@ import (
|
|||
)
|
||||
|
||||
var (
|
||||
httpSession = util.GetHTTPSession()
|
||||
baseHost = []string{
|
||||
baseHost = []string{
|
||||
"reddit.com",
|
||||
"redditmedia.com",
|
||||
"old.reddit.com",
|
||||
|
@ -24,12 +23,13 @@ var (
|
|||
|
||||
var ShortExtractor = &models.Extractor{
|
||||
Name: "Reddit (Short)",
|
||||
CodeName: "reddit:short",
|
||||
CodeName: "reddit_short",
|
||||
Type: enums.ExtractorTypeSingle,
|
||||
Category: enums.ExtractorCategorySocial,
|
||||
URLPattern: regexp.MustCompile(`https?://(?P<host>(?:\w+\.)?reddit(?:media)?\.com)/(?P<slug>(?:(?:r|user)/[^/]+/)?s/(?P<id>[^/?#&]+))`),
|
||||
Host: baseHost,
|
||||
IsRedirect: true,
|
||||
Client: util.GetHTTPSession("reddit_short"),
|
||||
|
||||
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
||||
req, err := http.NewRequest(http.MethodGet, ctx.MatchedContentURL, nil)
|
||||
|
@ -46,7 +46,7 @@ var ShortExtractor = &models.Extractor{
|
|||
req.AddCookie(cookie)
|
||||
}
|
||||
|
||||
res, err := httpSession.Do(req)
|
||||
res, err := ctx.Extractor.Client.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to send request: %w", err)
|
||||
}
|
||||
|
@ -67,6 +67,7 @@ var Extractor = &models.Extractor{
|
|||
Category: enums.ExtractorCategorySocial,
|
||||
URLPattern: regexp.MustCompile(`https?://(?P<host>(?:\w+\.)?reddit(?:media)?\.com)/(?P<slug>(?:(?:r|user)/[^/]+/)?comments/(?P<id>[^/?#&]+))`),
|
||||
Host: baseHost,
|
||||
Client: util.GetHTTPSession("reddit"),
|
||||
|
||||
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
||||
mediaList, err := MediaListFromAPI(ctx)
|
||||
|
@ -86,7 +87,7 @@ func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
|
|||
contentID := ctx.MatchedContentID
|
||||
contentURL := ctx.MatchedContentURL
|
||||
|
||||
manifest, err := GetRedditData(host, slug)
|
||||
manifest, err := GetRedditData(ctx, host, slug)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -222,7 +223,11 @@ func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
|
|||
return mediaList, nil
|
||||
}
|
||||
|
||||
func GetRedditData(host string, slug string) (RedditResponse, error) {
|
||||
func GetRedditData(
|
||||
ctx *models.DownloadContext,
|
||||
host string,
|
||||
slug string,
|
||||
) (RedditResponse, error) {
|
||||
url := fmt.Sprintf("https://%s/%s/.json", host, slug)
|
||||
|
||||
req, err := http.NewRequest(http.MethodGet, url, nil)
|
||||
|
@ -239,7 +244,7 @@ func GetRedditData(host string, slug string) (RedditResponse, error) {
|
|||
req.AddCookie(cookie)
|
||||
}
|
||||
|
||||
res, err := httpSession.Do(req)
|
||||
res, err := ctx.Extractor.Client.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to send request: %w", err)
|
||||
}
|
||||
|
@ -252,7 +257,7 @@ func GetRedditData(host string, slug string) (RedditResponse, error) {
|
|||
altHost = "www.reddit.com"
|
||||
}
|
||||
|
||||
return GetRedditData(altHost, slug)
|
||||
return GetRedditData(ctx, altHost, slug)
|
||||
}
|
||||
|
||||
var response RedditResponse
|
||||
|
|
|
@ -18,8 +18,6 @@ const (
|
|||
)
|
||||
|
||||
var (
|
||||
session = util.GetHTTPSession()
|
||||
|
||||
baseApiHeaders = map[string]string{
|
||||
"referer": "https://www.redgifs.com/",
|
||||
"origin": "https://www.redgifs.com",
|
||||
|
@ -37,6 +35,7 @@ var Extractor = &models.Extractor{
|
|||
"redgifs.com",
|
||||
"thumbs2.redgifs.com",
|
||||
},
|
||||
Client: util.GetHTTPSession("redgifs"),
|
||||
|
||||
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
||||
mediaList, err := MediaListFromAPI(ctx)
|
||||
|
@ -52,7 +51,7 @@ var Extractor = &models.Extractor{
|
|||
func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
|
||||
var mediaList []*models.Media
|
||||
|
||||
response, err := GetVideo(ctx.MatchedContentID)
|
||||
response, err := GetVideo(ctx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get from api: %w", err)
|
||||
}
|
||||
|
@ -116,13 +115,14 @@ func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
|
|||
return mediaList, nil
|
||||
}
|
||||
|
||||
func GetVideo(videoID string) (*Response, error) {
|
||||
func GetVideo(ctx *models.DownloadContext) (*Response, error) {
|
||||
videoID := ctx.MatchedContentID
|
||||
url := videoEndpoint + videoID + "?views=true"
|
||||
req, err := http.NewRequest(http.MethodGet, url, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
token, err := GetAccessToken()
|
||||
token, err := GetAccessToken(ctx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get access token: %w", err)
|
||||
}
|
||||
|
@ -132,7 +132,7 @@ func GetVideo(videoID string) (*Response, error) {
|
|||
for k, v := range baseApiHeaders {
|
||||
req.Header.Set(k, v)
|
||||
}
|
||||
res, err := session.Do(req)
|
||||
res, err := ctx.Extractor.Client.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to send request: %w", err)
|
||||
}
|
||||
|
|
|
@ -2,6 +2,7 @@ package redgifs
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"govd/models"
|
||||
"govd/util"
|
||||
"net/http"
|
||||
"time"
|
||||
|
@ -11,22 +12,22 @@ import (
|
|||
|
||||
var accessToken *Token
|
||||
|
||||
func GetAccessToken() (*Token, error) {
|
||||
func GetAccessToken(ctx *models.DownloadContext) (*Token, error) {
|
||||
if accessToken == nil || time.Now().Unix() >= accessToken.ExpiresIn {
|
||||
if err := RefreshAccessToken(); err != nil {
|
||||
if err := RefreshAccessToken(ctx); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return accessToken, nil
|
||||
}
|
||||
|
||||
func RefreshAccessToken() error {
|
||||
func RefreshAccessToken(ctx *models.DownloadContext) error {
|
||||
req, err := http.NewRequest(http.MethodGet, tokenEndpoint, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
req.Header.Set("User-Agent", util.ChromeUA)
|
||||
res, err := session.Do(req)
|
||||
res, err := ctx.Extractor.Client.Do(req)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to send request: %w", err)
|
||||
}
|
||||
|
|
|
@ -24,8 +24,7 @@ const (
|
|||
)
|
||||
|
||||
var (
|
||||
httpSession = util.GetHTTPSession()
|
||||
baseHost = []string{
|
||||
baseHost = []string{
|
||||
"tiktok.com",
|
||||
"vxtiktok.com",
|
||||
"vm.tiktok.com",
|
||||
|
@ -45,6 +44,7 @@ var VMExtractor = &models.Extractor{
|
|||
URLPattern: regexp.MustCompile(`https:\/\/((?:vm|vt|www)\.)?(vx)?tiktok\.com\/(?:t\/)?(?P<id>[a-zA-Z0-9]+)`),
|
||||
Host: baseHost,
|
||||
IsRedirect: true,
|
||||
Client: util.GetHTTPSession("tiktokvm"),
|
||||
|
||||
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
||||
location, err := util.GetLocationURL(ctx.MatchedContentURL, "")
|
||||
|
@ -64,6 +64,7 @@ var Extractor = &models.Extractor{
|
|||
Category: enums.ExtractorCategorySocial,
|
||||
URLPattern: regexp.MustCompile(`https?:\/\/((www|m)\.)?(vx)?tiktok\.com\/((?:embed|@[\w\.-]+)\/)?(v(ideo)?|p(hoto)?)\/(?P<id>[0-9]+)`),
|
||||
Host: baseHost,
|
||||
Client: util.GetHTTPSession("tiktok"),
|
||||
|
||||
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
||||
mediaList, err := MediaListFromAPI(ctx)
|
||||
|
@ -79,7 +80,7 @@ var Extractor = &models.Extractor{
|
|||
func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
|
||||
var mediaList []*models.Media
|
||||
|
||||
details, err := GetVideoAPI(ctx.MatchedContentID)
|
||||
details, err := GetVideoAPI(ctx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get from api: %w", err)
|
||||
}
|
||||
|
@ -137,7 +138,8 @@ func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
|
|||
return mediaList, nil
|
||||
}
|
||||
|
||||
func GetVideoAPI(awemeID string) (*AwemeDetails, error) {
|
||||
func GetVideoAPI(ctx *models.DownloadContext) (*AwemeDetails, error) {
|
||||
awemeID := ctx.MatchedContentID
|
||||
apiURL := fmt.Sprintf(
|
||||
"https://%s/aweme/v1/multi/aweme/detail/",
|
||||
apiHostname,
|
||||
|
@ -161,7 +163,7 @@ func GetVideoAPI(awemeID string) (*AwemeDetails, error) {
|
|||
req.Header.Set("Accept", "application/json")
|
||||
req.Header.Set("X-Argus", "")
|
||||
|
||||
resp, err := httpSession.Do(req)
|
||||
resp, err := ctx.Extractor.Client.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to send request: %w", err)
|
||||
}
|
||||
|
|
|
@ -18,16 +18,15 @@ const (
|
|||
apiEndpoint = "https://x.com/i/api/graphql/zZXycP0V6H7m-2r0mOnFcA/TweetDetail"
|
||||
)
|
||||
|
||||
var httpSession = util.GetHTTPSession()
|
||||
|
||||
var ShortExtractor = &models.Extractor{
|
||||
Name: "Twitter (Short)",
|
||||
CodeName: "twitter:short",
|
||||
CodeName: "twitter_short",
|
||||
Type: enums.ExtractorTypeSingle,
|
||||
Category: enums.ExtractorCategorySocial,
|
||||
URLPattern: regexp.MustCompile(`https?://t\.co/(?P<id>\w+)`),
|
||||
Host: []string{"t.co"},
|
||||
IsRedirect: true,
|
||||
Client: util.GetHTTPSession("twitter_short"),
|
||||
|
||||
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
||||
req, err := http.NewRequest(http.MethodGet, ctx.MatchedContentURL, nil)
|
||||
|
@ -35,7 +34,7 @@ var ShortExtractor = &models.Extractor{
|
|||
return nil, fmt.Errorf("failed to create req: %w", err)
|
||||
}
|
||||
req.Header.Set("User-Agent", util.ChromeUA)
|
||||
res, err := httpSession.Do(req)
|
||||
res, err := ctx.Extractor.Client.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to send request: %w", err)
|
||||
}
|
||||
|
@ -66,6 +65,7 @@ var Extractor = &models.Extractor{
|
|||
"vxx.com",
|
||||
"vxtwitter.com",
|
||||
},
|
||||
Client: util.GetHTTPSession("twitter"),
|
||||
|
||||
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
||||
mediaList, err := MediaListFromAPI(ctx)
|
||||
|
@ -81,7 +81,7 @@ var Extractor = &models.Extractor{
|
|||
func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
|
||||
var mediaList []*models.Media
|
||||
|
||||
tweetData, err := GetTweetAPI(ctx.MatchedContentID)
|
||||
tweetData, err := GetTweetAPI(ctx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get tweet data: %w", err)
|
||||
}
|
||||
|
@ -129,7 +129,8 @@ func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
|
|||
return mediaList, nil
|
||||
}
|
||||
|
||||
func GetTweetAPI(tweetID string) (*Tweet, error) {
|
||||
func GetTweetAPI(ctx *models.DownloadContext) (*Tweet, error) {
|
||||
tweetID := ctx.MatchedContentID
|
||||
cookies, err := util.ParseCookieFile("twitter.txt")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get cookies: %w", err)
|
||||
|
@ -159,7 +160,7 @@ func GetTweetAPI(tweetID string) (*Tweet, error) {
|
|||
}
|
||||
req.URL.RawQuery = q.Encode()
|
||||
|
||||
resp, err := httpSession.Do(req)
|
||||
resp, err := ctx.Extractor.Client.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to send request: %w", err)
|
||||
}
|
||||
|
|
1
go.mod
1
go.mod
|
@ -26,6 +26,7 @@ require (
|
|||
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
|
||||
golang.org/x/arch v0.0.0-20210923205945-b76863e36670 // indirect
|
||||
gopkg.in/yaml.v2 v2.4.0 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
)
|
||||
|
||||
require (
|
||||
|
|
5
main.go
5
main.go
|
@ -3,6 +3,7 @@ package main
|
|||
import (
|
||||
"fmt"
|
||||
"govd/bot"
|
||||
"govd/config"
|
||||
"govd/database"
|
||||
"govd/util"
|
||||
"log"
|
||||
|
@ -20,6 +21,10 @@ func main() {
|
|||
if err != nil {
|
||||
log.Fatal("error loading .env file")
|
||||
}
|
||||
err = config.LoadExtractorConfigs()
|
||||
if err != nil {
|
||||
log.Fatalf("error loading extractor configs: %v", err)
|
||||
}
|
||||
|
||||
profilerPort, err := strconv.Atoi(os.Getenv("PROFILER_PORT"))
|
||||
if err == nil && profilerPort > 0 {
|
||||
|
|
|
@ -14,6 +14,7 @@ type Extractor struct {
|
|||
Host []string
|
||||
IsDRM bool
|
||||
IsRedirect bool
|
||||
Client HTTPClient
|
||||
|
||||
Run func(*DownloadContext) (*ExtractorResponse, error)
|
||||
}
|
||||
|
@ -33,3 +34,10 @@ func (extractor *Extractor) NewMedia(
|
|||
ExtractorCodeName: extractor.CodeName,
|
||||
}
|
||||
}
|
||||
|
||||
type ExtractorConfig struct {
|
||||
HTTPProxy string `yaml:"http_proxy"`
|
||||
HTTPSProxy string `yaml:"https_proxy"`
|
||||
NoProxy string `yaml:"no_proxy"`
|
||||
EdgeProxyURL string `yaml:"edge_proxy_url"`
|
||||
}
|
||||
|
|
7
models/http.go
Normal file
7
models/http.go
Normal file
|
@ -0,0 +1,7 @@
|
|||
package models
|
||||
|
||||
import "net/http"
|
||||
|
||||
type HTTPClient interface {
|
||||
Do(req *http.Request) (*http.Response, error)
|
||||
}
|
|
@ -21,6 +21,8 @@ import (
|
|||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
var downloadHTTPSession = GetDefaultHTTPSession()
|
||||
|
||||
func DefaultConfig() *models.DownloadConfig {
|
||||
downloadsDir := os.Getenv("DOWNLOADS_DIR")
|
||||
if downloadsDir == "" {
|
||||
|
@ -171,7 +173,7 @@ func downloadInMemory(
|
|||
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
|
||||
resp, err := httpSession.Do(req)
|
||||
resp, err := downloadHTTPSession.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to download file: %w", err)
|
||||
}
|
||||
|
@ -362,7 +364,7 @@ func getFileSize(ctx context.Context, fileURL string, timeout time.Duration) (in
|
|||
return 0, fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
|
||||
resp, err := httpSession.Do(req)
|
||||
resp, err := downloadHTTPSession.Do(req)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("failed to get file size: %w", err)
|
||||
}
|
||||
|
@ -419,7 +421,7 @@ func downloadChunk(
|
|||
}
|
||||
req.Header.Add("Range", fmt.Sprintf("bytes=%d-%d", chunk[0], chunk[1]))
|
||||
|
||||
resp, err := httpSession.Do(req)
|
||||
resp, err := downloadHTTPSession.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("download failed: %w", err)
|
||||
}
|
||||
|
|
|
@ -1,111 +0,0 @@
|
|||
package util
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"govd/models"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
var (
|
||||
edgeProxyClient *EdgeProxyClient
|
||||
edgeProxyClientOnce sync.Once
|
||||
)
|
||||
|
||||
type EdgeProxyClient struct {
|
||||
*http.Client
|
||||
}
|
||||
|
||||
func GetEdgeProxyClient() *EdgeProxyClient {
|
||||
edgeProxyClientOnce.Do(func() {
|
||||
edgeProxyClient = &EdgeProxyClient{
|
||||
Client: &http.Client{
|
||||
Transport: baseTransport,
|
||||
Timeout: 60 * time.Second,
|
||||
},
|
||||
}
|
||||
})
|
||||
return edgeProxyClient
|
||||
}
|
||||
|
||||
func (c *EdgeProxyClient) Do(req *http.Request) (*http.Response, error) {
|
||||
proxyURL := os.Getenv("EDGE_PROXY_URL")
|
||||
if proxyURL == "" {
|
||||
return nil, fmt.Errorf("EDGE_PROXY_URL environment variable is not set")
|
||||
}
|
||||
targetURL := req.URL.String()
|
||||
encodedURL := url.QueryEscape(targetURL)
|
||||
proxyURLWithParam := proxyURL + "?url=" + encodedURL
|
||||
|
||||
var bodyBytes []byte
|
||||
var err error
|
||||
|
||||
if req.Body != nil {
|
||||
bodyBytes, err = io.ReadAll(req.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error reading request body: %w", err)
|
||||
}
|
||||
req.Body.Close()
|
||||
req.Body = io.NopCloser(bytes.NewBuffer(bodyBytes))
|
||||
}
|
||||
|
||||
proxyReq, err := http.NewRequest(
|
||||
req.Method,
|
||||
proxyURLWithParam,
|
||||
bytes.NewBuffer(bodyBytes),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error creating proxy request: %w", err)
|
||||
}
|
||||
|
||||
for name, values := range req.Header {
|
||||
for _, value := range values {
|
||||
proxyReq.Header.Add(name, value)
|
||||
}
|
||||
}
|
||||
|
||||
proxyResp, err := c.Client.Do(proxyReq)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("proxy request failed: %w", err)
|
||||
}
|
||||
defer proxyResp.Body.Close()
|
||||
|
||||
body, err := io.ReadAll(proxyResp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error reading proxy response: %w", err)
|
||||
}
|
||||
|
||||
var response models.ProxyResponse
|
||||
if err := json.Unmarshal(body, &response); err != nil {
|
||||
return nil, fmt.Errorf("error parsing proxy response: %w", err)
|
||||
}
|
||||
|
||||
resp := &http.Response{
|
||||
StatusCode: response.StatusCode,
|
||||
Status: fmt.Sprintf("%d %s", response.StatusCode, http.StatusText(response.StatusCode)),
|
||||
Body: io.NopCloser(bytes.NewBufferString(response.Text)),
|
||||
Header: make(http.Header),
|
||||
Request: req,
|
||||
}
|
||||
parsedResponseURL, err := url.Parse(response.URL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error parsing response URL: %w", err)
|
||||
}
|
||||
resp.Request.URL = parsedResponseURL
|
||||
|
||||
for name, value := range response.Headers {
|
||||
resp.Header.Set(name, value)
|
||||
}
|
||||
|
||||
for _, cookie := range response.Cookies {
|
||||
resp.Header.Add("Set-Cookie", cookie)
|
||||
}
|
||||
|
||||
return resp, nil
|
||||
}
|
205
util/http.go
205
util/http.go
|
@ -1,16 +1,46 @@
|
|||
package util
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"govd/config"
|
||||
"govd/models"
|
||||
"io"
|
||||
"log"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
type EdgeProxyClient struct {
|
||||
*http.Client
|
||||
|
||||
proxyURL string
|
||||
}
|
||||
|
||||
var (
|
||||
httpSession *http.Client
|
||||
httpSessionOnce sync.Once
|
||||
baseTransport = &http.Transport{
|
||||
|
||||
extractorsHttpSession = make(map[string]models.HTTPClient)
|
||||
)
|
||||
|
||||
func GetDefaultHTTPSession() *http.Client {
|
||||
httpSessionOnce.Do(func() {
|
||||
httpSession = &http.Client{
|
||||
Transport: GetBaseTransport(),
|
||||
Timeout: 60 * time.Second,
|
||||
}
|
||||
})
|
||||
return httpSession
|
||||
}
|
||||
|
||||
func GetBaseTransport() *http.Transport {
|
||||
return &http.Transport{
|
||||
Proxy: http.ProxyFromEnvironment,
|
||||
DialContext: (&net.Dialer{
|
||||
Timeout: 30 * time.Second,
|
||||
|
@ -26,14 +56,167 @@ var (
|
|||
ResponseHeaderTimeout: 10 * time.Second,
|
||||
DisableCompression: false,
|
||||
}
|
||||
)
|
||||
|
||||
func GetHTTPSession() *http.Client {
|
||||
httpSessionOnce.Do(func() {
|
||||
httpSession = &http.Client{
|
||||
Transport: baseTransport,
|
||||
Timeout: 60 * time.Second,
|
||||
}
|
||||
})
|
||||
return httpSession
|
||||
}
|
||||
|
||||
func GetHTTPSession(extractor string) models.HTTPClient {
|
||||
if client, ok := extractorsHttpSession[extractor]; ok {
|
||||
return client
|
||||
}
|
||||
|
||||
cfg := config.GetExtractorConfig(extractor)
|
||||
if cfg == nil {
|
||||
return GetDefaultHTTPSession()
|
||||
}
|
||||
|
||||
if cfg.EdgeProxyURL != "" {
|
||||
client := GetEdgeProxyClient(cfg.EdgeProxyURL)
|
||||
extractorsHttpSession[extractor] = client
|
||||
return client
|
||||
}
|
||||
|
||||
transport := GetBaseTransport()
|
||||
client := &http.Client{
|
||||
Transport: transport,
|
||||
Timeout: 60 * time.Second,
|
||||
}
|
||||
|
||||
if cfg.HTTPProxy == "" && cfg.HTTPSProxy == "" {
|
||||
extractorsHttpSession[extractor] = client
|
||||
return client
|
||||
}
|
||||
|
||||
var httpProxyURL, httpsProxyURL *url.URL
|
||||
var err error
|
||||
|
||||
if cfg.HTTPProxy != "" {
|
||||
if httpProxyURL, err = url.Parse(cfg.HTTPProxy); err != nil {
|
||||
log.Printf("warning: invalid HTTP proxy URL '%s': %v\n", cfg.HTTPProxy, err)
|
||||
}
|
||||
}
|
||||
|
||||
if cfg.HTTPSProxy != "" {
|
||||
if httpsProxyURL, err = url.Parse(cfg.HTTPSProxy); err != nil {
|
||||
log.Printf("warning: invalid HTTPS proxy URL '%s': %v\n", cfg.HTTPSProxy, err)
|
||||
}
|
||||
}
|
||||
|
||||
if httpProxyURL != nil || httpsProxyURL != nil {
|
||||
noProxyList := strings.Split(cfg.NoProxy, ",")
|
||||
for i := range noProxyList {
|
||||
noProxyList[i] = strings.TrimSpace(noProxyList[i])
|
||||
}
|
||||
|
||||
transport.Proxy = func(req *http.Request) (*url.URL, error) {
|
||||
if cfg.NoProxy != "" {
|
||||
host := req.URL.Hostname()
|
||||
for _, p := range noProxyList {
|
||||
if p == "" {
|
||||
continue
|
||||
}
|
||||
if p == host || (strings.HasPrefix(p, ".") && strings.HasSuffix(host, p)) {
|
||||
return nil, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
if req.URL.Scheme == "https" && httpsProxyURL != nil {
|
||||
return httpsProxyURL, nil
|
||||
}
|
||||
if req.URL.Scheme == "http" && httpProxyURL != nil {
|
||||
return httpProxyURL, nil
|
||||
}
|
||||
if httpsProxyURL != nil {
|
||||
return httpsProxyURL, nil
|
||||
}
|
||||
return httpProxyURL, nil
|
||||
}
|
||||
}
|
||||
|
||||
extractorsHttpSession[extractor] = client
|
||||
return client
|
||||
}
|
||||
|
||||
func GetEdgeProxyClient(proxyURL string) *EdgeProxyClient {
|
||||
edgeProxyClient := &EdgeProxyClient{
|
||||
Client: &http.Client{
|
||||
Transport: GetBaseTransport(),
|
||||
Timeout: 60 * time.Second,
|
||||
},
|
||||
proxyURL: proxyURL,
|
||||
}
|
||||
return edgeProxyClient
|
||||
}
|
||||
|
||||
func (c *EdgeProxyClient) Do(req *http.Request) (*http.Response, error) {
|
||||
if c.proxyURL == "" {
|
||||
return nil, fmt.Errorf("proxy URL is not set")
|
||||
}
|
||||
targetURL := req.URL.String()
|
||||
encodedURL := url.QueryEscape(targetURL)
|
||||
proxyURLWithParam := c.proxyURL + "?url=" + encodedURL
|
||||
|
||||
var bodyBytes []byte
|
||||
var err error
|
||||
|
||||
if req.Body != nil {
|
||||
bodyBytes, err = io.ReadAll(req.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error reading request body: %w", err)
|
||||
}
|
||||
req.Body.Close()
|
||||
req.Body = io.NopCloser(bytes.NewBuffer(bodyBytes))
|
||||
}
|
||||
|
||||
proxyReq, err := http.NewRequest(
|
||||
req.Method,
|
||||
proxyURLWithParam,
|
||||
bytes.NewBuffer(bodyBytes),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error creating proxy request: %w", err)
|
||||
}
|
||||
|
||||
for name, values := range req.Header {
|
||||
for _, value := range values {
|
||||
proxyReq.Header.Add(name, value)
|
||||
}
|
||||
}
|
||||
|
||||
proxyResp, err := c.Client.Do(proxyReq)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("proxy request failed: %w", err)
|
||||
}
|
||||
defer proxyResp.Body.Close()
|
||||
|
||||
body, err := io.ReadAll(proxyResp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error reading proxy response: %w", err)
|
||||
}
|
||||
|
||||
var response models.ProxyResponse
|
||||
if err := json.Unmarshal(body, &response); err != nil {
|
||||
return nil, fmt.Errorf("error parsing proxy response: %w", err)
|
||||
}
|
||||
|
||||
resp := &http.Response{
|
||||
StatusCode: response.StatusCode,
|
||||
Status: fmt.Sprintf("%d %s", response.StatusCode, http.StatusText(response.StatusCode)),
|
||||
Body: io.NopCloser(bytes.NewBufferString(response.Text)),
|
||||
Header: make(http.Header),
|
||||
Request: req,
|
||||
}
|
||||
parsedResponseURL, err := url.Parse(response.URL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error parsing response URL: %w", err)
|
||||
}
|
||||
resp.Request.URL = parsedResponseURL
|
||||
|
||||
for name, value := range response.Headers {
|
||||
resp.Header.Set(name, value)
|
||||
}
|
||||
|
||||
for _, cookie := range response.Cookies {
|
||||
resp.Header.Add("Set-Cookie", cookie)
|
||||
}
|
||||
|
||||
return resp, nil
|
||||
}
|
||||
|
|
|
@ -29,7 +29,7 @@ func GetLocationURL(
|
|||
userAgent = ChromeUA
|
||||
}
|
||||
req.Header.Set("User-Agent", userAgent)
|
||||
session := GetHTTPSession()
|
||||
session := GetDefaultHTTPSession()
|
||||
resp, err := session.Do(req)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to send request: %w", err)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue