From 0a63df9ce6db5e8b2e2ef8d1faf3cdb6c3744d58 Mon Sep 17 00:00:00 2001 From: stefanodvx <69367859+stefanodvx@users.noreply.github.com> Date: Sun, 20 Apr 2025 12:19:14 +0200 Subject: [PATCH] set configuration for each extractor --- .gitignore | 2 + config/main.go | 44 +++++++++ ext-cfg-example.yaml | 5 ++ ext/instagram/main.go | 24 ++--- ext/instagram/util.go | 5 +- ext/pinterest/main.go | 16 ++-- ext/reddit/main.go | 21 +++-- ext/redgifs/main.go | 12 +-- ext/redgifs/util.go | 9 +- ext/tiktok/main.go | 12 +-- ext/twitter/main.go | 15 ++-- go.mod | 1 + main.go | 5 ++ models/ext.go | 8 ++ models/http.go | 7 ++ util/download.go | 8 +- util/edgeproxy.go | 111 ----------------------- util/http.go | 205 +++++++++++++++++++++++++++++++++++++++--- util/misc.go | 2 +- 19 files changed, 337 insertions(+), 175 deletions(-) create mode 100644 config/main.go create mode 100644 ext-cfg-example.yaml create mode 100644 models/http.go delete mode 100644 util/edgeproxy.go diff --git a/.gitignore b/.gitignore index 2397ca0..1403f8b 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,8 @@ old/ .env +ext-cfg.yaml + .idea/ downloads diff --git a/config/main.go b/config/main.go new file mode 100644 index 0000000..fe3a129 --- /dev/null +++ b/config/main.go @@ -0,0 +1,44 @@ +package config + +import ( + "fmt" + "os" + + "govd/models" + + "gopkg.in/yaml.v3" +) + +var extractorConfigs map[string]*models.ExtractorConfig + +func LoadExtractorConfigs() error { + extractorConfigs = make(map[string]*models.ExtractorConfig) + configPath := "ext-cfg.yaml" + + _, err := os.Stat(configPath) + if os.IsNotExist(err) { + return nil + } + data, err := os.ReadFile(configPath) + if err != nil { + return fmt.Errorf("errore nella lettura del file di configurazione: %w", err) + } + + var rawConfig map[string]*models.ExtractorConfig + + if err := yaml.Unmarshal(data, &rawConfig); err != nil { + return fmt.Errorf("errore nella decodifica del file YAML: %w", err) + } + for codeName, config := range rawConfig { + extractorConfigs[codeName] = config + } + + return nil +} + +func GetExtractorConfig(codeName string) *models.ExtractorConfig { + if config, exists := extractorConfigs[codeName]; exists { + return config + } + return nil +} diff --git a/ext-cfg-example.yaml b/ext-cfg-example.yaml new file mode 100644 index 0000000..b50c21d --- /dev/null +++ b/ext-cfg-example.yaml @@ -0,0 +1,5 @@ +instagram_share: + edge_proxy: https://example.com + +reddit: + https_proxy: https://example.com \ No newline at end of file diff --git a/ext/instagram/main.go b/ext/instagram/main.go index 6e86a27..cecfbbc 100644 --- a/ext/instagram/main.go +++ b/ext/instagram/main.go @@ -16,8 +16,6 @@ import ( // feel free to open PR, if you want to // add support for the official Instagram API -var httpSession = util.GetHTTPSession() - const ( apiHostname = "api.igram.world" apiKey = "aaeaf2805cea6abef3f9d2b6a666fce62fd9d612a43ab772bb50ce81455112e0" @@ -39,6 +37,7 @@ var Extractor = &models.Extractor{ URLPattern: regexp.MustCompile(`https:\/\/(www\.)?instagram\.com\/(reel|p|tv)\/(?P[a-zA-Z0-9_-]+)`), Host: instagramHost, IsRedirect: false, + Client: util.GetHTTPSession("instagram"), Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) { mediaList, err := MediaListFromAPI(ctx, false) @@ -50,12 +49,13 @@ var Extractor = &models.Extractor{ var StoriesExtractor = &models.Extractor{ Name: "Instagram Stories", - CodeName: "instagram:stories", + CodeName: "instagram_stories", Type: enums.ExtractorTypeSingle, Category: enums.ExtractorCategorySocial, URLPattern: regexp.MustCompile(`https:\/\/(www\.)?instagram\.com\/stories\/[a-zA-Z0-9._]+\/(?P\d+)`), Host: instagramHost, IsRedirect: false, + Client: util.GetHTTPSession("instagram_stories"), Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) { mediaList, err := MediaListFromAPI(ctx, true) @@ -67,16 +67,15 @@ var StoriesExtractor = &models.Extractor{ var ShareURLExtractor = &models.Extractor{ Name: "Instagram Share URL", - CodeName: "instagram:share", + CodeName: "instagram_share", Type: enums.ExtractorTypeSingle, Category: enums.ExtractorCategorySocial, URLPattern: regexp.MustCompile(`https?:\/\/(www\.)?instagram\.com\/share\/((reels?|video|s|p)\/)?(?P[^\/\?]+)`), Host: instagramHost, IsRedirect: true, + Client: util.GetHTTPSession("instagram_share"), Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) { - // temporary fix for public instances - edgeProxyClient := util.GetEdgeProxyClient() req, err := http.NewRequest( http.MethodGet, ctx.MatchedContentURL, @@ -85,7 +84,7 @@ var ShareURLExtractor = &models.Extractor{ if err != nil { return nil, fmt.Errorf("failed to create request: %w", err) } - resp, err := edgeProxyClient.Do(req) + resp, err := ctx.Extractor.Client.Do(req) if err != nil { return nil, fmt.Errorf("failed to send request: %w", err) } @@ -103,13 +102,13 @@ func MediaListFromAPI( ) ([]*models.Media, error) { var mediaList []*models.Media postURL := ctx.MatchedContentURL - details, err := GetVideoAPI(postURL) + details, err := GetVideoAPI(ctx, postURL) if err != nil { return nil, fmt.Errorf("failed to get post: %w", err) } var caption string if !stories { - caption, err = GetPostCaption(postURL) + caption, err = GetPostCaption(ctx, postURL) if err != nil { return nil, fmt.Errorf("failed to get caption: %w", err) } @@ -157,7 +156,10 @@ func MediaListFromAPI( return mediaList, nil } -func GetVideoAPI(contentURL string) (*IGramResponse, error) { +func GetVideoAPI( + ctx *models.DownloadContext, + contentURL string, +) (*IGramResponse, error) { apiURL := fmt.Sprintf( "https://%s/api/convert", apiHostname, @@ -173,7 +175,7 @@ func GetVideoAPI(contentURL string) (*IGramResponse, error) { req.Header.Set("Content-Type", "application/json") req.Header.Set("User-Agent", util.ChromeUA) - resp, err := httpSession.Do(req) + resp, err := ctx.Extractor.Client.Do(req) if err != nil { return nil, fmt.Errorf("failed to send request: %w", err) } diff --git a/ext/instagram/util.go b/ext/instagram/util.go index b059b3a..fce58dd 100644 --- a/ext/instagram/util.go +++ b/ext/instagram/util.go @@ -4,6 +4,7 @@ import ( "crypto/sha256" "encoding/hex" "fmt" + "govd/models" "govd/util" "html" "io" @@ -95,9 +96,9 @@ func GetCDNURL(contentURL string) (string, error) { } func GetPostCaption( + ctx *models.DownloadContext, postURL string, ) (string, error) { - edgeProxyClient := util.GetEdgeProxyClient() req, err := http.NewRequest( http.MethodGet, postURL, @@ -121,7 +122,7 @@ func GetPostCaption( req.Header.Set("Cache-Control", "no-cache") req.Header.Set("TE", "trailers") - resp, err := edgeProxyClient.Do(req) + resp, err := ctx.Extractor.Client.Do(req) if err != nil { return "", fmt.Errorf("failed to send request: %w", err) } diff --git a/ext/pinterest/main.go b/ext/pinterest/main.go index d50dfe9..c68158d 100644 --- a/ext/pinterest/main.go +++ b/ext/pinterest/main.go @@ -19,8 +19,7 @@ const ( ) var ( - httpSession = util.GetHTTPSession() - validHost = []string{ + validHost = []string{ "com", "fr", "de", "ch", "jp", "cl", "ca", "it", "co\\.uk", "nz", "ru", "com\\.au", "at", "pt", "co\\.kr", "es", "com\\.mx", "dk", "ph", "th", "com\\.uy", "co", "nl", "info", "kr", "ie", "vn", "com\\.vn", "ec", "mx", "in", "pe", "co\\.at", "hu", @@ -33,7 +32,7 @@ var ( var ShortExtractor = &models.Extractor{ Name: "Pinterest (Short)", - CodeName: "pinterest:short", + CodeName: "pinterest_short", Type: enums.ExtractorTypeSingle, Category: enums.ExtractorCategorySocial, URLPattern: regexp.MustCompile(pinValidURLPattern), @@ -45,6 +44,7 @@ var ShortExtractor = &models.Extractor{ return domains }(), IsRedirect: true, + Client: util.GetHTTPSession("pinterest_short"), Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) { shortURL := fmt.Sprintf(shortenerAPIFormat, ctx.MatchedContentID) @@ -72,6 +72,7 @@ var Extractor = &models.Extractor{ } return domains }(), + Client: util.GetHTTPSession("pinterest"), Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) { media, err := ExtractPinMedia(ctx) @@ -88,7 +89,7 @@ func ExtractPinMedia(ctx *models.DownloadContext) ([]*models.Media, error) { pinID := ctx.MatchedContentID contentURL := ctx.MatchedContentURL - pinData, err := GetPinData(pinID) + pinData, err := GetPinData(ctx, pinID) if err != nil { return nil, err } @@ -158,7 +159,10 @@ func ExtractPinMedia(ctx *models.DownloadContext) ([]*models.Media, error) { return nil, fmt.Errorf("no media found for pin ID: %s", pinID) } -func GetPinData(pinID string) (*PinData, error) { +func GetPinData( + ctx *models.DownloadContext, + pinID string, +) (*PinData, error) { params := BuildPinRequestParams(pinID) req, err := http.NewRequest(http.MethodGet, pinResourceEndpoint, nil) @@ -175,7 +179,7 @@ func GetPinData(pinID string) (*PinData, error) { // fix 403 error req.Header.Set("X-Pinterest-PWS-Handler", "www/[username].js") - resp, err := httpSession.Do(req) + resp, err := ctx.Extractor.Client.Do(req) if err != nil { return nil, fmt.Errorf("failed to send request: %w", err) } diff --git a/ext/reddit/main.go b/ext/reddit/main.go index 1cf111f..6c305f5 100644 --- a/ext/reddit/main.go +++ b/ext/reddit/main.go @@ -13,8 +13,7 @@ import ( ) var ( - httpSession = util.GetHTTPSession() - baseHost = []string{ + baseHost = []string{ "reddit.com", "redditmedia.com", "old.reddit.com", @@ -24,12 +23,13 @@ var ( var ShortExtractor = &models.Extractor{ Name: "Reddit (Short)", - CodeName: "reddit:short", + CodeName: "reddit_short", Type: enums.ExtractorTypeSingle, Category: enums.ExtractorCategorySocial, URLPattern: regexp.MustCompile(`https?://(?P(?:\w+\.)?reddit(?:media)?\.com)/(?P(?:(?:r|user)/[^/]+/)?s/(?P[^/?#&]+))`), Host: baseHost, IsRedirect: true, + Client: util.GetHTTPSession("reddit_short"), Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) { req, err := http.NewRequest(http.MethodGet, ctx.MatchedContentURL, nil) @@ -46,7 +46,7 @@ var ShortExtractor = &models.Extractor{ req.AddCookie(cookie) } - res, err := httpSession.Do(req) + res, err := ctx.Extractor.Client.Do(req) if err != nil { return nil, fmt.Errorf("failed to send request: %w", err) } @@ -67,6 +67,7 @@ var Extractor = &models.Extractor{ Category: enums.ExtractorCategorySocial, URLPattern: regexp.MustCompile(`https?://(?P(?:\w+\.)?reddit(?:media)?\.com)/(?P(?:(?:r|user)/[^/]+/)?comments/(?P[^/?#&]+))`), Host: baseHost, + Client: util.GetHTTPSession("reddit"), Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) { mediaList, err := MediaListFromAPI(ctx) @@ -86,7 +87,7 @@ func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) { contentID := ctx.MatchedContentID contentURL := ctx.MatchedContentURL - manifest, err := GetRedditData(host, slug) + manifest, err := GetRedditData(ctx, host, slug) if err != nil { return nil, err } @@ -222,7 +223,11 @@ func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) { return mediaList, nil } -func GetRedditData(host string, slug string) (RedditResponse, error) { +func GetRedditData( + ctx *models.DownloadContext, + host string, + slug string, +) (RedditResponse, error) { url := fmt.Sprintf("https://%s/%s/.json", host, slug) req, err := http.NewRequest(http.MethodGet, url, nil) @@ -239,7 +244,7 @@ func GetRedditData(host string, slug string) (RedditResponse, error) { req.AddCookie(cookie) } - res, err := httpSession.Do(req) + res, err := ctx.Extractor.Client.Do(req) if err != nil { return nil, fmt.Errorf("failed to send request: %w", err) } @@ -252,7 +257,7 @@ func GetRedditData(host string, slug string) (RedditResponse, error) { altHost = "www.reddit.com" } - return GetRedditData(altHost, slug) + return GetRedditData(ctx, altHost, slug) } var response RedditResponse diff --git a/ext/redgifs/main.go b/ext/redgifs/main.go index 6b22b0b..780ed39 100644 --- a/ext/redgifs/main.go +++ b/ext/redgifs/main.go @@ -18,8 +18,6 @@ const ( ) var ( - session = util.GetHTTPSession() - baseApiHeaders = map[string]string{ "referer": "https://www.redgifs.com/", "origin": "https://www.redgifs.com", @@ -37,6 +35,7 @@ var Extractor = &models.Extractor{ "redgifs.com", "thumbs2.redgifs.com", }, + Client: util.GetHTTPSession("redgifs"), Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) { mediaList, err := MediaListFromAPI(ctx) @@ -52,7 +51,7 @@ var Extractor = &models.Extractor{ func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) { var mediaList []*models.Media - response, err := GetVideo(ctx.MatchedContentID) + response, err := GetVideo(ctx) if err != nil { return nil, fmt.Errorf("failed to get from api: %w", err) } @@ -116,13 +115,14 @@ func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) { return mediaList, nil } -func GetVideo(videoID string) (*Response, error) { +func GetVideo(ctx *models.DownloadContext) (*Response, error) { + videoID := ctx.MatchedContentID url := videoEndpoint + videoID + "?views=true" req, err := http.NewRequest(http.MethodGet, url, nil) if err != nil { return nil, fmt.Errorf("failed to create request: %w", err) } - token, err := GetAccessToken() + token, err := GetAccessToken(ctx) if err != nil { return nil, fmt.Errorf("failed to get access token: %w", err) } @@ -132,7 +132,7 @@ func GetVideo(videoID string) (*Response, error) { for k, v := range baseApiHeaders { req.Header.Set(k, v) } - res, err := session.Do(req) + res, err := ctx.Extractor.Client.Do(req) if err != nil { return nil, fmt.Errorf("failed to send request: %w", err) } diff --git a/ext/redgifs/util.go b/ext/redgifs/util.go index a065e8f..7765e3d 100644 --- a/ext/redgifs/util.go +++ b/ext/redgifs/util.go @@ -2,6 +2,7 @@ package redgifs import ( "fmt" + "govd/models" "govd/util" "net/http" "time" @@ -11,22 +12,22 @@ import ( var accessToken *Token -func GetAccessToken() (*Token, error) { +func GetAccessToken(ctx *models.DownloadContext) (*Token, error) { if accessToken == nil || time.Now().Unix() >= accessToken.ExpiresIn { - if err := RefreshAccessToken(); err != nil { + if err := RefreshAccessToken(ctx); err != nil { return nil, err } } return accessToken, nil } -func RefreshAccessToken() error { +func RefreshAccessToken(ctx *models.DownloadContext) error { req, err := http.NewRequest(http.MethodGet, tokenEndpoint, nil) if err != nil { return fmt.Errorf("failed to create request: %w", err) } req.Header.Set("User-Agent", util.ChromeUA) - res, err := session.Do(req) + res, err := ctx.Extractor.Client.Do(req) if err != nil { return fmt.Errorf("failed to send request: %w", err) } diff --git a/ext/tiktok/main.go b/ext/tiktok/main.go index 03049af..29b0d9d 100644 --- a/ext/tiktok/main.go +++ b/ext/tiktok/main.go @@ -24,8 +24,7 @@ const ( ) var ( - httpSession = util.GetHTTPSession() - baseHost = []string{ + baseHost = []string{ "tiktok.com", "vxtiktok.com", "vm.tiktok.com", @@ -45,6 +44,7 @@ var VMExtractor = &models.Extractor{ URLPattern: regexp.MustCompile(`https:\/\/((?:vm|vt|www)\.)?(vx)?tiktok\.com\/(?:t\/)?(?P[a-zA-Z0-9]+)`), Host: baseHost, IsRedirect: true, + Client: util.GetHTTPSession("tiktokvm"), Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) { location, err := util.GetLocationURL(ctx.MatchedContentURL, "") @@ -64,6 +64,7 @@ var Extractor = &models.Extractor{ Category: enums.ExtractorCategorySocial, URLPattern: regexp.MustCompile(`https?:\/\/((www|m)\.)?(vx)?tiktok\.com\/((?:embed|@[\w\.-]+)\/)?(v(ideo)?|p(hoto)?)\/(?P[0-9]+)`), Host: baseHost, + Client: util.GetHTTPSession("tiktok"), Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) { mediaList, err := MediaListFromAPI(ctx) @@ -79,7 +80,7 @@ var Extractor = &models.Extractor{ func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) { var mediaList []*models.Media - details, err := GetVideoAPI(ctx.MatchedContentID) + details, err := GetVideoAPI(ctx) if err != nil { return nil, fmt.Errorf("failed to get from api: %w", err) } @@ -137,7 +138,8 @@ func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) { return mediaList, nil } -func GetVideoAPI(awemeID string) (*AwemeDetails, error) { +func GetVideoAPI(ctx *models.DownloadContext) (*AwemeDetails, error) { + awemeID := ctx.MatchedContentID apiURL := fmt.Sprintf( "https://%s/aweme/v1/multi/aweme/detail/", apiHostname, @@ -161,7 +163,7 @@ func GetVideoAPI(awemeID string) (*AwemeDetails, error) { req.Header.Set("Accept", "application/json") req.Header.Set("X-Argus", "") - resp, err := httpSession.Do(req) + resp, err := ctx.Extractor.Client.Do(req) if err != nil { return nil, fmt.Errorf("failed to send request: %w", err) } diff --git a/ext/twitter/main.go b/ext/twitter/main.go index 70c871b..b794cc7 100644 --- a/ext/twitter/main.go +++ b/ext/twitter/main.go @@ -18,16 +18,15 @@ const ( apiEndpoint = "https://x.com/i/api/graphql/zZXycP0V6H7m-2r0mOnFcA/TweetDetail" ) -var httpSession = util.GetHTTPSession() - var ShortExtractor = &models.Extractor{ Name: "Twitter (Short)", - CodeName: "twitter:short", + CodeName: "twitter_short", Type: enums.ExtractorTypeSingle, Category: enums.ExtractorCategorySocial, URLPattern: regexp.MustCompile(`https?://t\.co/(?P\w+)`), Host: []string{"t.co"}, IsRedirect: true, + Client: util.GetHTTPSession("twitter_short"), Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) { req, err := http.NewRequest(http.MethodGet, ctx.MatchedContentURL, nil) @@ -35,7 +34,7 @@ var ShortExtractor = &models.Extractor{ return nil, fmt.Errorf("failed to create req: %w", err) } req.Header.Set("User-Agent", util.ChromeUA) - res, err := httpSession.Do(req) + res, err := ctx.Extractor.Client.Do(req) if err != nil { return nil, fmt.Errorf("failed to send request: %w", err) } @@ -66,6 +65,7 @@ var Extractor = &models.Extractor{ "vxx.com", "vxtwitter.com", }, + Client: util.GetHTTPSession("twitter"), Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) { mediaList, err := MediaListFromAPI(ctx) @@ -81,7 +81,7 @@ var Extractor = &models.Extractor{ func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) { var mediaList []*models.Media - tweetData, err := GetTweetAPI(ctx.MatchedContentID) + tweetData, err := GetTweetAPI(ctx) if err != nil { return nil, fmt.Errorf("failed to get tweet data: %w", err) } @@ -129,7 +129,8 @@ func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) { return mediaList, nil } -func GetTweetAPI(tweetID string) (*Tweet, error) { +func GetTweetAPI(ctx *models.DownloadContext) (*Tweet, error) { + tweetID := ctx.MatchedContentID cookies, err := util.ParseCookieFile("twitter.txt") if err != nil { return nil, fmt.Errorf("failed to get cookies: %w", err) @@ -159,7 +160,7 @@ func GetTweetAPI(tweetID string) (*Tweet, error) { } req.URL.RawQuery = q.Encode() - resp, err := httpSession.Do(req) + resp, err := ctx.Extractor.Client.Do(req) if err != nil { return nil, fmt.Errorf("failed to send request: %w", err) } diff --git a/go.mod b/go.mod index 57ad268..0e6d174 100644 --- a/go.mod +++ b/go.mod @@ -26,6 +26,7 @@ require ( github.com/twitchyliquid64/golang-asm v0.15.1 // indirect golang.org/x/arch v0.0.0-20210923205945-b76863e36670 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect ) require ( diff --git a/main.go b/main.go index 03d7e09..dd90de6 100644 --- a/main.go +++ b/main.go @@ -3,6 +3,7 @@ package main import ( "fmt" "govd/bot" + "govd/config" "govd/database" "govd/util" "log" @@ -20,6 +21,10 @@ func main() { if err != nil { log.Fatal("error loading .env file") } + err = config.LoadExtractorConfigs() + if err != nil { + log.Fatalf("error loading extractor configs: %v", err) + } profilerPort, err := strconv.Atoi(os.Getenv("PROFILER_PORT")) if err == nil && profilerPort > 0 { diff --git a/models/ext.go b/models/ext.go index 3e905c7..81984f5 100644 --- a/models/ext.go +++ b/models/ext.go @@ -14,6 +14,7 @@ type Extractor struct { Host []string IsDRM bool IsRedirect bool + Client HTTPClient Run func(*DownloadContext) (*ExtractorResponse, error) } @@ -33,3 +34,10 @@ func (extractor *Extractor) NewMedia( ExtractorCodeName: extractor.CodeName, } } + +type ExtractorConfig struct { + HTTPProxy string `yaml:"http_proxy"` + HTTPSProxy string `yaml:"https_proxy"` + NoProxy string `yaml:"no_proxy"` + EdgeProxyURL string `yaml:"edge_proxy_url"` +} diff --git a/models/http.go b/models/http.go new file mode 100644 index 0000000..a41deb8 --- /dev/null +++ b/models/http.go @@ -0,0 +1,7 @@ +package models + +import "net/http" + +type HTTPClient interface { + Do(req *http.Request) (*http.Response, error) +} diff --git a/util/download.go b/util/download.go index b414782..091cf93 100644 --- a/util/download.go +++ b/util/download.go @@ -21,6 +21,8 @@ import ( "github.com/google/uuid" ) +var downloadHTTPSession = GetDefaultHTTPSession() + func DefaultConfig() *models.DownloadConfig { downloadsDir := os.Getenv("DOWNLOADS_DIR") if downloadsDir == "" { @@ -171,7 +173,7 @@ func downloadInMemory( return nil, fmt.Errorf("failed to create request: %w", err) } - resp, err := httpSession.Do(req) + resp, err := downloadHTTPSession.Do(req) if err != nil { return nil, fmt.Errorf("failed to download file: %w", err) } @@ -362,7 +364,7 @@ func getFileSize(ctx context.Context, fileURL string, timeout time.Duration) (in return 0, fmt.Errorf("failed to create request: %w", err) } - resp, err := httpSession.Do(req) + resp, err := downloadHTTPSession.Do(req) if err != nil { return 0, fmt.Errorf("failed to get file size: %w", err) } @@ -419,7 +421,7 @@ func downloadChunk( } req.Header.Add("Range", fmt.Sprintf("bytes=%d-%d", chunk[0], chunk[1])) - resp, err := httpSession.Do(req) + resp, err := downloadHTTPSession.Do(req) if err != nil { return nil, fmt.Errorf("download failed: %w", err) } diff --git a/util/edgeproxy.go b/util/edgeproxy.go deleted file mode 100644 index d6641db..0000000 --- a/util/edgeproxy.go +++ /dev/null @@ -1,111 +0,0 @@ -package util - -import ( - "bytes" - "encoding/json" - "fmt" - "govd/models" - "io" - "net/http" - "net/url" - "os" - "sync" - "time" -) - -var ( - edgeProxyClient *EdgeProxyClient - edgeProxyClientOnce sync.Once -) - -type EdgeProxyClient struct { - *http.Client -} - -func GetEdgeProxyClient() *EdgeProxyClient { - edgeProxyClientOnce.Do(func() { - edgeProxyClient = &EdgeProxyClient{ - Client: &http.Client{ - Transport: baseTransport, - Timeout: 60 * time.Second, - }, - } - }) - return edgeProxyClient -} - -func (c *EdgeProxyClient) Do(req *http.Request) (*http.Response, error) { - proxyURL := os.Getenv("EDGE_PROXY_URL") - if proxyURL == "" { - return nil, fmt.Errorf("EDGE_PROXY_URL environment variable is not set") - } - targetURL := req.URL.String() - encodedURL := url.QueryEscape(targetURL) - proxyURLWithParam := proxyURL + "?url=" + encodedURL - - var bodyBytes []byte - var err error - - if req.Body != nil { - bodyBytes, err = io.ReadAll(req.Body) - if err != nil { - return nil, fmt.Errorf("error reading request body: %w", err) - } - req.Body.Close() - req.Body = io.NopCloser(bytes.NewBuffer(bodyBytes)) - } - - proxyReq, err := http.NewRequest( - req.Method, - proxyURLWithParam, - bytes.NewBuffer(bodyBytes), - ) - if err != nil { - return nil, fmt.Errorf("error creating proxy request: %w", err) - } - - for name, values := range req.Header { - for _, value := range values { - proxyReq.Header.Add(name, value) - } - } - - proxyResp, err := c.Client.Do(proxyReq) - if err != nil { - return nil, fmt.Errorf("proxy request failed: %w", err) - } - defer proxyResp.Body.Close() - - body, err := io.ReadAll(proxyResp.Body) - if err != nil { - return nil, fmt.Errorf("error reading proxy response: %w", err) - } - - var response models.ProxyResponse - if err := json.Unmarshal(body, &response); err != nil { - return nil, fmt.Errorf("error parsing proxy response: %w", err) - } - - resp := &http.Response{ - StatusCode: response.StatusCode, - Status: fmt.Sprintf("%d %s", response.StatusCode, http.StatusText(response.StatusCode)), - Body: io.NopCloser(bytes.NewBufferString(response.Text)), - Header: make(http.Header), - Request: req, - } - parsedResponseURL, err := url.Parse(response.URL) - if err != nil { - return nil, fmt.Errorf("error parsing response URL: %w", err) - } - resp.Request.URL = parsedResponseURL - - for name, value := range response.Headers { - resp.Header.Set(name, value) - } - - for _, cookie := range response.Cookies { - resp.Header.Add("Set-Cookie", cookie) - } - - return resp, nil -} diff --git a/util/http.go b/util/http.go index e88e207..e15fd57 100644 --- a/util/http.go +++ b/util/http.go @@ -1,16 +1,46 @@ package util import ( + "bytes" + "encoding/json" + "fmt" + "govd/config" + "govd/models" + "io" + "log" "net" "net/http" + "net/url" + "strings" "sync" "time" ) +type EdgeProxyClient struct { + *http.Client + + proxyURL string +} + var ( httpSession *http.Client httpSessionOnce sync.Once - baseTransport = &http.Transport{ + + extractorsHttpSession = make(map[string]models.HTTPClient) +) + +func GetDefaultHTTPSession() *http.Client { + httpSessionOnce.Do(func() { + httpSession = &http.Client{ + Transport: GetBaseTransport(), + Timeout: 60 * time.Second, + } + }) + return httpSession +} + +func GetBaseTransport() *http.Transport { + return &http.Transport{ Proxy: http.ProxyFromEnvironment, DialContext: (&net.Dialer{ Timeout: 30 * time.Second, @@ -26,14 +56,167 @@ var ( ResponseHeaderTimeout: 10 * time.Second, DisableCompression: false, } -) - -func GetHTTPSession() *http.Client { - httpSessionOnce.Do(func() { - httpSession = &http.Client{ - Transport: baseTransport, - Timeout: 60 * time.Second, - } - }) - return httpSession +} + +func GetHTTPSession(extractor string) models.HTTPClient { + if client, ok := extractorsHttpSession[extractor]; ok { + return client + } + + cfg := config.GetExtractorConfig(extractor) + if cfg == nil { + return GetDefaultHTTPSession() + } + + if cfg.EdgeProxyURL != "" { + client := GetEdgeProxyClient(cfg.EdgeProxyURL) + extractorsHttpSession[extractor] = client + return client + } + + transport := GetBaseTransport() + client := &http.Client{ + Transport: transport, + Timeout: 60 * time.Second, + } + + if cfg.HTTPProxy == "" && cfg.HTTPSProxy == "" { + extractorsHttpSession[extractor] = client + return client + } + + var httpProxyURL, httpsProxyURL *url.URL + var err error + + if cfg.HTTPProxy != "" { + if httpProxyURL, err = url.Parse(cfg.HTTPProxy); err != nil { + log.Printf("warning: invalid HTTP proxy URL '%s': %v\n", cfg.HTTPProxy, err) + } + } + + if cfg.HTTPSProxy != "" { + if httpsProxyURL, err = url.Parse(cfg.HTTPSProxy); err != nil { + log.Printf("warning: invalid HTTPS proxy URL '%s': %v\n", cfg.HTTPSProxy, err) + } + } + + if httpProxyURL != nil || httpsProxyURL != nil { + noProxyList := strings.Split(cfg.NoProxy, ",") + for i := range noProxyList { + noProxyList[i] = strings.TrimSpace(noProxyList[i]) + } + + transport.Proxy = func(req *http.Request) (*url.URL, error) { + if cfg.NoProxy != "" { + host := req.URL.Hostname() + for _, p := range noProxyList { + if p == "" { + continue + } + if p == host || (strings.HasPrefix(p, ".") && strings.HasSuffix(host, p)) { + return nil, nil + } + } + } + if req.URL.Scheme == "https" && httpsProxyURL != nil { + return httpsProxyURL, nil + } + if req.URL.Scheme == "http" && httpProxyURL != nil { + return httpProxyURL, nil + } + if httpsProxyURL != nil { + return httpsProxyURL, nil + } + return httpProxyURL, nil + } + } + + extractorsHttpSession[extractor] = client + return client +} + +func GetEdgeProxyClient(proxyURL string) *EdgeProxyClient { + edgeProxyClient := &EdgeProxyClient{ + Client: &http.Client{ + Transport: GetBaseTransport(), + Timeout: 60 * time.Second, + }, + proxyURL: proxyURL, + } + return edgeProxyClient +} + +func (c *EdgeProxyClient) Do(req *http.Request) (*http.Response, error) { + if c.proxyURL == "" { + return nil, fmt.Errorf("proxy URL is not set") + } + targetURL := req.URL.String() + encodedURL := url.QueryEscape(targetURL) + proxyURLWithParam := c.proxyURL + "?url=" + encodedURL + + var bodyBytes []byte + var err error + + if req.Body != nil { + bodyBytes, err = io.ReadAll(req.Body) + if err != nil { + return nil, fmt.Errorf("error reading request body: %w", err) + } + req.Body.Close() + req.Body = io.NopCloser(bytes.NewBuffer(bodyBytes)) + } + + proxyReq, err := http.NewRequest( + req.Method, + proxyURLWithParam, + bytes.NewBuffer(bodyBytes), + ) + if err != nil { + return nil, fmt.Errorf("error creating proxy request: %w", err) + } + + for name, values := range req.Header { + for _, value := range values { + proxyReq.Header.Add(name, value) + } + } + + proxyResp, err := c.Client.Do(proxyReq) + if err != nil { + return nil, fmt.Errorf("proxy request failed: %w", err) + } + defer proxyResp.Body.Close() + + body, err := io.ReadAll(proxyResp.Body) + if err != nil { + return nil, fmt.Errorf("error reading proxy response: %w", err) + } + + var response models.ProxyResponse + if err := json.Unmarshal(body, &response); err != nil { + return nil, fmt.Errorf("error parsing proxy response: %w", err) + } + + resp := &http.Response{ + StatusCode: response.StatusCode, + Status: fmt.Sprintf("%d %s", response.StatusCode, http.StatusText(response.StatusCode)), + Body: io.NopCloser(bytes.NewBufferString(response.Text)), + Header: make(http.Header), + Request: req, + } + parsedResponseURL, err := url.Parse(response.URL) + if err != nil { + return nil, fmt.Errorf("error parsing response URL: %w", err) + } + resp.Request.URL = parsedResponseURL + + for name, value := range response.Headers { + resp.Header.Set(name, value) + } + + for _, cookie := range response.Cookies { + resp.Header.Add("Set-Cookie", cookie) + } + + return resp, nil } diff --git a/util/misc.go b/util/misc.go index 0abb4c6..5fdbb92 100644 --- a/util/misc.go +++ b/util/misc.go @@ -29,7 +29,7 @@ func GetLocationURL( userAgent = ChromeUA } req.Header.Set("User-Agent", userAgent) - session := GetHTTPSession() + session := GetDefaultHTTPSession() resp, err := session.Do(req) if err != nil { return "", fmt.Errorf("failed to send request: %w", err)