set configuration for each extractor
This commit is contained in:
parent
6baa965534
commit
0a63df9ce6
19 changed files with 337 additions and 175 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -7,6 +7,8 @@
|
||||||
old/
|
old/
|
||||||
|
|
||||||
.env
|
.env
|
||||||
|
ext-cfg.yaml
|
||||||
|
|
||||||
.idea/
|
.idea/
|
||||||
|
|
||||||
downloads
|
downloads
|
||||||
|
|
44
config/main.go
Normal file
44
config/main.go
Normal file
|
@ -0,0 +1,44 @@
|
||||||
|
package config
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"govd/models"
|
||||||
|
|
||||||
|
"gopkg.in/yaml.v3"
|
||||||
|
)
|
||||||
|
|
||||||
|
var extractorConfigs map[string]*models.ExtractorConfig
|
||||||
|
|
||||||
|
func LoadExtractorConfigs() error {
|
||||||
|
extractorConfigs = make(map[string]*models.ExtractorConfig)
|
||||||
|
configPath := "ext-cfg.yaml"
|
||||||
|
|
||||||
|
_, err := os.Stat(configPath)
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
data, err := os.ReadFile(configPath)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("errore nella lettura del file di configurazione: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var rawConfig map[string]*models.ExtractorConfig
|
||||||
|
|
||||||
|
if err := yaml.Unmarshal(data, &rawConfig); err != nil {
|
||||||
|
return fmt.Errorf("errore nella decodifica del file YAML: %w", err)
|
||||||
|
}
|
||||||
|
for codeName, config := range rawConfig {
|
||||||
|
extractorConfigs[codeName] = config
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetExtractorConfig(codeName string) *models.ExtractorConfig {
|
||||||
|
if config, exists := extractorConfigs[codeName]; exists {
|
||||||
|
return config
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
5
ext-cfg-example.yaml
Normal file
5
ext-cfg-example.yaml
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
instagram_share:
|
||||||
|
edge_proxy: https://example.com
|
||||||
|
|
||||||
|
reddit:
|
||||||
|
https_proxy: https://example.com
|
|
@ -16,8 +16,6 @@ import (
|
||||||
// feel free to open PR, if you want to
|
// feel free to open PR, if you want to
|
||||||
// add support for the official Instagram API
|
// add support for the official Instagram API
|
||||||
|
|
||||||
var httpSession = util.GetHTTPSession()
|
|
||||||
|
|
||||||
const (
|
const (
|
||||||
apiHostname = "api.igram.world"
|
apiHostname = "api.igram.world"
|
||||||
apiKey = "aaeaf2805cea6abef3f9d2b6a666fce62fd9d612a43ab772bb50ce81455112e0"
|
apiKey = "aaeaf2805cea6abef3f9d2b6a666fce62fd9d612a43ab772bb50ce81455112e0"
|
||||||
|
@ -39,6 +37,7 @@ var Extractor = &models.Extractor{
|
||||||
URLPattern: regexp.MustCompile(`https:\/\/(www\.)?instagram\.com\/(reel|p|tv)\/(?P<id>[a-zA-Z0-9_-]+)`),
|
URLPattern: regexp.MustCompile(`https:\/\/(www\.)?instagram\.com\/(reel|p|tv)\/(?P<id>[a-zA-Z0-9_-]+)`),
|
||||||
Host: instagramHost,
|
Host: instagramHost,
|
||||||
IsRedirect: false,
|
IsRedirect: false,
|
||||||
|
Client: util.GetHTTPSession("instagram"),
|
||||||
|
|
||||||
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
||||||
mediaList, err := MediaListFromAPI(ctx, false)
|
mediaList, err := MediaListFromAPI(ctx, false)
|
||||||
|
@ -50,12 +49,13 @@ var Extractor = &models.Extractor{
|
||||||
|
|
||||||
var StoriesExtractor = &models.Extractor{
|
var StoriesExtractor = &models.Extractor{
|
||||||
Name: "Instagram Stories",
|
Name: "Instagram Stories",
|
||||||
CodeName: "instagram:stories",
|
CodeName: "instagram_stories",
|
||||||
Type: enums.ExtractorTypeSingle,
|
Type: enums.ExtractorTypeSingle,
|
||||||
Category: enums.ExtractorCategorySocial,
|
Category: enums.ExtractorCategorySocial,
|
||||||
URLPattern: regexp.MustCompile(`https:\/\/(www\.)?instagram\.com\/stories\/[a-zA-Z0-9._]+\/(?P<id>\d+)`),
|
URLPattern: regexp.MustCompile(`https:\/\/(www\.)?instagram\.com\/stories\/[a-zA-Z0-9._]+\/(?P<id>\d+)`),
|
||||||
Host: instagramHost,
|
Host: instagramHost,
|
||||||
IsRedirect: false,
|
IsRedirect: false,
|
||||||
|
Client: util.GetHTTPSession("instagram_stories"),
|
||||||
|
|
||||||
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
||||||
mediaList, err := MediaListFromAPI(ctx, true)
|
mediaList, err := MediaListFromAPI(ctx, true)
|
||||||
|
@ -67,16 +67,15 @@ var StoriesExtractor = &models.Extractor{
|
||||||
|
|
||||||
var ShareURLExtractor = &models.Extractor{
|
var ShareURLExtractor = &models.Extractor{
|
||||||
Name: "Instagram Share URL",
|
Name: "Instagram Share URL",
|
||||||
CodeName: "instagram:share",
|
CodeName: "instagram_share",
|
||||||
Type: enums.ExtractorTypeSingle,
|
Type: enums.ExtractorTypeSingle,
|
||||||
Category: enums.ExtractorCategorySocial,
|
Category: enums.ExtractorCategorySocial,
|
||||||
URLPattern: regexp.MustCompile(`https?:\/\/(www\.)?instagram\.com\/share\/((reels?|video|s|p)\/)?(?P<id>[^\/\?]+)`),
|
URLPattern: regexp.MustCompile(`https?:\/\/(www\.)?instagram\.com\/share\/((reels?|video|s|p)\/)?(?P<id>[^\/\?]+)`),
|
||||||
Host: instagramHost,
|
Host: instagramHost,
|
||||||
IsRedirect: true,
|
IsRedirect: true,
|
||||||
|
Client: util.GetHTTPSession("instagram_share"),
|
||||||
|
|
||||||
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
||||||
// temporary fix for public instances
|
|
||||||
edgeProxyClient := util.GetEdgeProxyClient()
|
|
||||||
req, err := http.NewRequest(
|
req, err := http.NewRequest(
|
||||||
http.MethodGet,
|
http.MethodGet,
|
||||||
ctx.MatchedContentURL,
|
ctx.MatchedContentURL,
|
||||||
|
@ -85,7 +84,7 @@ var ShareURLExtractor = &models.Extractor{
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to create request: %w", err)
|
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||||
}
|
}
|
||||||
resp, err := edgeProxyClient.Do(req)
|
resp, err := ctx.Extractor.Client.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to send request: %w", err)
|
return nil, fmt.Errorf("failed to send request: %w", err)
|
||||||
}
|
}
|
||||||
|
@ -103,13 +102,13 @@ func MediaListFromAPI(
|
||||||
) ([]*models.Media, error) {
|
) ([]*models.Media, error) {
|
||||||
var mediaList []*models.Media
|
var mediaList []*models.Media
|
||||||
postURL := ctx.MatchedContentURL
|
postURL := ctx.MatchedContentURL
|
||||||
details, err := GetVideoAPI(postURL)
|
details, err := GetVideoAPI(ctx, postURL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to get post: %w", err)
|
return nil, fmt.Errorf("failed to get post: %w", err)
|
||||||
}
|
}
|
||||||
var caption string
|
var caption string
|
||||||
if !stories {
|
if !stories {
|
||||||
caption, err = GetPostCaption(postURL)
|
caption, err = GetPostCaption(ctx, postURL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to get caption: %w", err)
|
return nil, fmt.Errorf("failed to get caption: %w", err)
|
||||||
}
|
}
|
||||||
|
@ -157,7 +156,10 @@ func MediaListFromAPI(
|
||||||
return mediaList, nil
|
return mediaList, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetVideoAPI(contentURL string) (*IGramResponse, error) {
|
func GetVideoAPI(
|
||||||
|
ctx *models.DownloadContext,
|
||||||
|
contentURL string,
|
||||||
|
) (*IGramResponse, error) {
|
||||||
apiURL := fmt.Sprintf(
|
apiURL := fmt.Sprintf(
|
||||||
"https://%s/api/convert",
|
"https://%s/api/convert",
|
||||||
apiHostname,
|
apiHostname,
|
||||||
|
@ -173,7 +175,7 @@ func GetVideoAPI(contentURL string) (*IGramResponse, error) {
|
||||||
req.Header.Set("Content-Type", "application/json")
|
req.Header.Set("Content-Type", "application/json")
|
||||||
req.Header.Set("User-Agent", util.ChromeUA)
|
req.Header.Set("User-Agent", util.ChromeUA)
|
||||||
|
|
||||||
resp, err := httpSession.Do(req)
|
resp, err := ctx.Extractor.Client.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to send request: %w", err)
|
return nil, fmt.Errorf("failed to send request: %w", err)
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,6 +4,7 @@ import (
|
||||||
"crypto/sha256"
|
"crypto/sha256"
|
||||||
"encoding/hex"
|
"encoding/hex"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"govd/models"
|
||||||
"govd/util"
|
"govd/util"
|
||||||
"html"
|
"html"
|
||||||
"io"
|
"io"
|
||||||
|
@ -95,9 +96,9 @@ func GetCDNURL(contentURL string) (string, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetPostCaption(
|
func GetPostCaption(
|
||||||
|
ctx *models.DownloadContext,
|
||||||
postURL string,
|
postURL string,
|
||||||
) (string, error) {
|
) (string, error) {
|
||||||
edgeProxyClient := util.GetEdgeProxyClient()
|
|
||||||
req, err := http.NewRequest(
|
req, err := http.NewRequest(
|
||||||
http.MethodGet,
|
http.MethodGet,
|
||||||
postURL,
|
postURL,
|
||||||
|
@ -121,7 +122,7 @@ func GetPostCaption(
|
||||||
req.Header.Set("Cache-Control", "no-cache")
|
req.Header.Set("Cache-Control", "no-cache")
|
||||||
req.Header.Set("TE", "trailers")
|
req.Header.Set("TE", "trailers")
|
||||||
|
|
||||||
resp, err := edgeProxyClient.Do(req)
|
resp, err := ctx.Extractor.Client.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", fmt.Errorf("failed to send request: %w", err)
|
return "", fmt.Errorf("failed to send request: %w", err)
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,8 +19,7 @@ const (
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
httpSession = util.GetHTTPSession()
|
validHost = []string{
|
||||||
validHost = []string{
|
|
||||||
"com", "fr", "de", "ch", "jp", "cl", "ca", "it", "co\\.uk", "nz", "ru", "com\\.au",
|
"com", "fr", "de", "ch", "jp", "cl", "ca", "it", "co\\.uk", "nz", "ru", "com\\.au",
|
||||||
"at", "pt", "co\\.kr", "es", "com\\.mx", "dk", "ph", "th", "com\\.uy", "co", "nl",
|
"at", "pt", "co\\.kr", "es", "com\\.mx", "dk", "ph", "th", "com\\.uy", "co", "nl",
|
||||||
"info", "kr", "ie", "vn", "com\\.vn", "ec", "mx", "in", "pe", "co\\.at", "hu",
|
"info", "kr", "ie", "vn", "com\\.vn", "ec", "mx", "in", "pe", "co\\.at", "hu",
|
||||||
|
@ -33,7 +32,7 @@ var (
|
||||||
|
|
||||||
var ShortExtractor = &models.Extractor{
|
var ShortExtractor = &models.Extractor{
|
||||||
Name: "Pinterest (Short)",
|
Name: "Pinterest (Short)",
|
||||||
CodeName: "pinterest:short",
|
CodeName: "pinterest_short",
|
||||||
Type: enums.ExtractorTypeSingle,
|
Type: enums.ExtractorTypeSingle,
|
||||||
Category: enums.ExtractorCategorySocial,
|
Category: enums.ExtractorCategorySocial,
|
||||||
URLPattern: regexp.MustCompile(pinValidURLPattern),
|
URLPattern: regexp.MustCompile(pinValidURLPattern),
|
||||||
|
@ -45,6 +44,7 @@ var ShortExtractor = &models.Extractor{
|
||||||
return domains
|
return domains
|
||||||
}(),
|
}(),
|
||||||
IsRedirect: true,
|
IsRedirect: true,
|
||||||
|
Client: util.GetHTTPSession("pinterest_short"),
|
||||||
|
|
||||||
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
||||||
shortURL := fmt.Sprintf(shortenerAPIFormat, ctx.MatchedContentID)
|
shortURL := fmt.Sprintf(shortenerAPIFormat, ctx.MatchedContentID)
|
||||||
|
@ -72,6 +72,7 @@ var Extractor = &models.Extractor{
|
||||||
}
|
}
|
||||||
return domains
|
return domains
|
||||||
}(),
|
}(),
|
||||||
|
Client: util.GetHTTPSession("pinterest"),
|
||||||
|
|
||||||
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
||||||
media, err := ExtractPinMedia(ctx)
|
media, err := ExtractPinMedia(ctx)
|
||||||
|
@ -88,7 +89,7 @@ func ExtractPinMedia(ctx *models.DownloadContext) ([]*models.Media, error) {
|
||||||
pinID := ctx.MatchedContentID
|
pinID := ctx.MatchedContentID
|
||||||
contentURL := ctx.MatchedContentURL
|
contentURL := ctx.MatchedContentURL
|
||||||
|
|
||||||
pinData, err := GetPinData(pinID)
|
pinData, err := GetPinData(ctx, pinID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@ -158,7 +159,10 @@ func ExtractPinMedia(ctx *models.DownloadContext) ([]*models.Media, error) {
|
||||||
return nil, fmt.Errorf("no media found for pin ID: %s", pinID)
|
return nil, fmt.Errorf("no media found for pin ID: %s", pinID)
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetPinData(pinID string) (*PinData, error) {
|
func GetPinData(
|
||||||
|
ctx *models.DownloadContext,
|
||||||
|
pinID string,
|
||||||
|
) (*PinData, error) {
|
||||||
params := BuildPinRequestParams(pinID)
|
params := BuildPinRequestParams(pinID)
|
||||||
|
|
||||||
req, err := http.NewRequest(http.MethodGet, pinResourceEndpoint, nil)
|
req, err := http.NewRequest(http.MethodGet, pinResourceEndpoint, nil)
|
||||||
|
@ -175,7 +179,7 @@ func GetPinData(pinID string) (*PinData, error) {
|
||||||
// fix 403 error
|
// fix 403 error
|
||||||
req.Header.Set("X-Pinterest-PWS-Handler", "www/[username].js")
|
req.Header.Set("X-Pinterest-PWS-Handler", "www/[username].js")
|
||||||
|
|
||||||
resp, err := httpSession.Do(req)
|
resp, err := ctx.Extractor.Client.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to send request: %w", err)
|
return nil, fmt.Errorf("failed to send request: %w", err)
|
||||||
}
|
}
|
||||||
|
|
|
@ -13,8 +13,7 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
httpSession = util.GetHTTPSession()
|
baseHost = []string{
|
||||||
baseHost = []string{
|
|
||||||
"reddit.com",
|
"reddit.com",
|
||||||
"redditmedia.com",
|
"redditmedia.com",
|
||||||
"old.reddit.com",
|
"old.reddit.com",
|
||||||
|
@ -24,12 +23,13 @@ var (
|
||||||
|
|
||||||
var ShortExtractor = &models.Extractor{
|
var ShortExtractor = &models.Extractor{
|
||||||
Name: "Reddit (Short)",
|
Name: "Reddit (Short)",
|
||||||
CodeName: "reddit:short",
|
CodeName: "reddit_short",
|
||||||
Type: enums.ExtractorTypeSingle,
|
Type: enums.ExtractorTypeSingle,
|
||||||
Category: enums.ExtractorCategorySocial,
|
Category: enums.ExtractorCategorySocial,
|
||||||
URLPattern: regexp.MustCompile(`https?://(?P<host>(?:\w+\.)?reddit(?:media)?\.com)/(?P<slug>(?:(?:r|user)/[^/]+/)?s/(?P<id>[^/?#&]+))`),
|
URLPattern: regexp.MustCompile(`https?://(?P<host>(?:\w+\.)?reddit(?:media)?\.com)/(?P<slug>(?:(?:r|user)/[^/]+/)?s/(?P<id>[^/?#&]+))`),
|
||||||
Host: baseHost,
|
Host: baseHost,
|
||||||
IsRedirect: true,
|
IsRedirect: true,
|
||||||
|
Client: util.GetHTTPSession("reddit_short"),
|
||||||
|
|
||||||
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
||||||
req, err := http.NewRequest(http.MethodGet, ctx.MatchedContentURL, nil)
|
req, err := http.NewRequest(http.MethodGet, ctx.MatchedContentURL, nil)
|
||||||
|
@ -46,7 +46,7 @@ var ShortExtractor = &models.Extractor{
|
||||||
req.AddCookie(cookie)
|
req.AddCookie(cookie)
|
||||||
}
|
}
|
||||||
|
|
||||||
res, err := httpSession.Do(req)
|
res, err := ctx.Extractor.Client.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to send request: %w", err)
|
return nil, fmt.Errorf("failed to send request: %w", err)
|
||||||
}
|
}
|
||||||
|
@ -67,6 +67,7 @@ var Extractor = &models.Extractor{
|
||||||
Category: enums.ExtractorCategorySocial,
|
Category: enums.ExtractorCategorySocial,
|
||||||
URLPattern: regexp.MustCompile(`https?://(?P<host>(?:\w+\.)?reddit(?:media)?\.com)/(?P<slug>(?:(?:r|user)/[^/]+/)?comments/(?P<id>[^/?#&]+))`),
|
URLPattern: regexp.MustCompile(`https?://(?P<host>(?:\w+\.)?reddit(?:media)?\.com)/(?P<slug>(?:(?:r|user)/[^/]+/)?comments/(?P<id>[^/?#&]+))`),
|
||||||
Host: baseHost,
|
Host: baseHost,
|
||||||
|
Client: util.GetHTTPSession("reddit"),
|
||||||
|
|
||||||
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
||||||
mediaList, err := MediaListFromAPI(ctx)
|
mediaList, err := MediaListFromAPI(ctx)
|
||||||
|
@ -86,7 +87,7 @@ func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
|
||||||
contentID := ctx.MatchedContentID
|
contentID := ctx.MatchedContentID
|
||||||
contentURL := ctx.MatchedContentURL
|
contentURL := ctx.MatchedContentURL
|
||||||
|
|
||||||
manifest, err := GetRedditData(host, slug)
|
manifest, err := GetRedditData(ctx, host, slug)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@ -222,7 +223,11 @@ func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
|
||||||
return mediaList, nil
|
return mediaList, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetRedditData(host string, slug string) (RedditResponse, error) {
|
func GetRedditData(
|
||||||
|
ctx *models.DownloadContext,
|
||||||
|
host string,
|
||||||
|
slug string,
|
||||||
|
) (RedditResponse, error) {
|
||||||
url := fmt.Sprintf("https://%s/%s/.json", host, slug)
|
url := fmt.Sprintf("https://%s/%s/.json", host, slug)
|
||||||
|
|
||||||
req, err := http.NewRequest(http.MethodGet, url, nil)
|
req, err := http.NewRequest(http.MethodGet, url, nil)
|
||||||
|
@ -239,7 +244,7 @@ func GetRedditData(host string, slug string) (RedditResponse, error) {
|
||||||
req.AddCookie(cookie)
|
req.AddCookie(cookie)
|
||||||
}
|
}
|
||||||
|
|
||||||
res, err := httpSession.Do(req)
|
res, err := ctx.Extractor.Client.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to send request: %w", err)
|
return nil, fmt.Errorf("failed to send request: %w", err)
|
||||||
}
|
}
|
||||||
|
@ -252,7 +257,7 @@ func GetRedditData(host string, slug string) (RedditResponse, error) {
|
||||||
altHost = "www.reddit.com"
|
altHost = "www.reddit.com"
|
||||||
}
|
}
|
||||||
|
|
||||||
return GetRedditData(altHost, slug)
|
return GetRedditData(ctx, altHost, slug)
|
||||||
}
|
}
|
||||||
|
|
||||||
var response RedditResponse
|
var response RedditResponse
|
||||||
|
|
|
@ -18,8 +18,6 @@ const (
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
session = util.GetHTTPSession()
|
|
||||||
|
|
||||||
baseApiHeaders = map[string]string{
|
baseApiHeaders = map[string]string{
|
||||||
"referer": "https://www.redgifs.com/",
|
"referer": "https://www.redgifs.com/",
|
||||||
"origin": "https://www.redgifs.com",
|
"origin": "https://www.redgifs.com",
|
||||||
|
@ -37,6 +35,7 @@ var Extractor = &models.Extractor{
|
||||||
"redgifs.com",
|
"redgifs.com",
|
||||||
"thumbs2.redgifs.com",
|
"thumbs2.redgifs.com",
|
||||||
},
|
},
|
||||||
|
Client: util.GetHTTPSession("redgifs"),
|
||||||
|
|
||||||
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
||||||
mediaList, err := MediaListFromAPI(ctx)
|
mediaList, err := MediaListFromAPI(ctx)
|
||||||
|
@ -52,7 +51,7 @@ var Extractor = &models.Extractor{
|
||||||
func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
|
func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
|
||||||
var mediaList []*models.Media
|
var mediaList []*models.Media
|
||||||
|
|
||||||
response, err := GetVideo(ctx.MatchedContentID)
|
response, err := GetVideo(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to get from api: %w", err)
|
return nil, fmt.Errorf("failed to get from api: %w", err)
|
||||||
}
|
}
|
||||||
|
@ -116,13 +115,14 @@ func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
|
||||||
return mediaList, nil
|
return mediaList, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetVideo(videoID string) (*Response, error) {
|
func GetVideo(ctx *models.DownloadContext) (*Response, error) {
|
||||||
|
videoID := ctx.MatchedContentID
|
||||||
url := videoEndpoint + videoID + "?views=true"
|
url := videoEndpoint + videoID + "?views=true"
|
||||||
req, err := http.NewRequest(http.MethodGet, url, nil)
|
req, err := http.NewRequest(http.MethodGet, url, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to create request: %w", err)
|
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||||
}
|
}
|
||||||
token, err := GetAccessToken()
|
token, err := GetAccessToken(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to get access token: %w", err)
|
return nil, fmt.Errorf("failed to get access token: %w", err)
|
||||||
}
|
}
|
||||||
|
@ -132,7 +132,7 @@ func GetVideo(videoID string) (*Response, error) {
|
||||||
for k, v := range baseApiHeaders {
|
for k, v := range baseApiHeaders {
|
||||||
req.Header.Set(k, v)
|
req.Header.Set(k, v)
|
||||||
}
|
}
|
||||||
res, err := session.Do(req)
|
res, err := ctx.Extractor.Client.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to send request: %w", err)
|
return nil, fmt.Errorf("failed to send request: %w", err)
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,6 +2,7 @@ package redgifs
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"govd/models"
|
||||||
"govd/util"
|
"govd/util"
|
||||||
"net/http"
|
"net/http"
|
||||||
"time"
|
"time"
|
||||||
|
@ -11,22 +12,22 @@ import (
|
||||||
|
|
||||||
var accessToken *Token
|
var accessToken *Token
|
||||||
|
|
||||||
func GetAccessToken() (*Token, error) {
|
func GetAccessToken(ctx *models.DownloadContext) (*Token, error) {
|
||||||
if accessToken == nil || time.Now().Unix() >= accessToken.ExpiresIn {
|
if accessToken == nil || time.Now().Unix() >= accessToken.ExpiresIn {
|
||||||
if err := RefreshAccessToken(); err != nil {
|
if err := RefreshAccessToken(ctx); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return accessToken, nil
|
return accessToken, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func RefreshAccessToken() error {
|
func RefreshAccessToken(ctx *models.DownloadContext) error {
|
||||||
req, err := http.NewRequest(http.MethodGet, tokenEndpoint, nil)
|
req, err := http.NewRequest(http.MethodGet, tokenEndpoint, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to create request: %w", err)
|
return fmt.Errorf("failed to create request: %w", err)
|
||||||
}
|
}
|
||||||
req.Header.Set("User-Agent", util.ChromeUA)
|
req.Header.Set("User-Agent", util.ChromeUA)
|
||||||
res, err := session.Do(req)
|
res, err := ctx.Extractor.Client.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to send request: %w", err)
|
return fmt.Errorf("failed to send request: %w", err)
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,8 +24,7 @@ const (
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
httpSession = util.GetHTTPSession()
|
baseHost = []string{
|
||||||
baseHost = []string{
|
|
||||||
"tiktok.com",
|
"tiktok.com",
|
||||||
"vxtiktok.com",
|
"vxtiktok.com",
|
||||||
"vm.tiktok.com",
|
"vm.tiktok.com",
|
||||||
|
@ -45,6 +44,7 @@ var VMExtractor = &models.Extractor{
|
||||||
URLPattern: regexp.MustCompile(`https:\/\/((?:vm|vt|www)\.)?(vx)?tiktok\.com\/(?:t\/)?(?P<id>[a-zA-Z0-9]+)`),
|
URLPattern: regexp.MustCompile(`https:\/\/((?:vm|vt|www)\.)?(vx)?tiktok\.com\/(?:t\/)?(?P<id>[a-zA-Z0-9]+)`),
|
||||||
Host: baseHost,
|
Host: baseHost,
|
||||||
IsRedirect: true,
|
IsRedirect: true,
|
||||||
|
Client: util.GetHTTPSession("tiktokvm"),
|
||||||
|
|
||||||
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
||||||
location, err := util.GetLocationURL(ctx.MatchedContentURL, "")
|
location, err := util.GetLocationURL(ctx.MatchedContentURL, "")
|
||||||
|
@ -64,6 +64,7 @@ var Extractor = &models.Extractor{
|
||||||
Category: enums.ExtractorCategorySocial,
|
Category: enums.ExtractorCategorySocial,
|
||||||
URLPattern: regexp.MustCompile(`https?:\/\/((www|m)\.)?(vx)?tiktok\.com\/((?:embed|@[\w\.-]+)\/)?(v(ideo)?|p(hoto)?)\/(?P<id>[0-9]+)`),
|
URLPattern: regexp.MustCompile(`https?:\/\/((www|m)\.)?(vx)?tiktok\.com\/((?:embed|@[\w\.-]+)\/)?(v(ideo)?|p(hoto)?)\/(?P<id>[0-9]+)`),
|
||||||
Host: baseHost,
|
Host: baseHost,
|
||||||
|
Client: util.GetHTTPSession("tiktok"),
|
||||||
|
|
||||||
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
||||||
mediaList, err := MediaListFromAPI(ctx)
|
mediaList, err := MediaListFromAPI(ctx)
|
||||||
|
@ -79,7 +80,7 @@ var Extractor = &models.Extractor{
|
||||||
func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
|
func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
|
||||||
var mediaList []*models.Media
|
var mediaList []*models.Media
|
||||||
|
|
||||||
details, err := GetVideoAPI(ctx.MatchedContentID)
|
details, err := GetVideoAPI(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to get from api: %w", err)
|
return nil, fmt.Errorf("failed to get from api: %w", err)
|
||||||
}
|
}
|
||||||
|
@ -137,7 +138,8 @@ func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
|
||||||
return mediaList, nil
|
return mediaList, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetVideoAPI(awemeID string) (*AwemeDetails, error) {
|
func GetVideoAPI(ctx *models.DownloadContext) (*AwemeDetails, error) {
|
||||||
|
awemeID := ctx.MatchedContentID
|
||||||
apiURL := fmt.Sprintf(
|
apiURL := fmt.Sprintf(
|
||||||
"https://%s/aweme/v1/multi/aweme/detail/",
|
"https://%s/aweme/v1/multi/aweme/detail/",
|
||||||
apiHostname,
|
apiHostname,
|
||||||
|
@ -161,7 +163,7 @@ func GetVideoAPI(awemeID string) (*AwemeDetails, error) {
|
||||||
req.Header.Set("Accept", "application/json")
|
req.Header.Set("Accept", "application/json")
|
||||||
req.Header.Set("X-Argus", "")
|
req.Header.Set("X-Argus", "")
|
||||||
|
|
||||||
resp, err := httpSession.Do(req)
|
resp, err := ctx.Extractor.Client.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to send request: %w", err)
|
return nil, fmt.Errorf("failed to send request: %w", err)
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,16 +18,15 @@ const (
|
||||||
apiEndpoint = "https://x.com/i/api/graphql/zZXycP0V6H7m-2r0mOnFcA/TweetDetail"
|
apiEndpoint = "https://x.com/i/api/graphql/zZXycP0V6H7m-2r0mOnFcA/TweetDetail"
|
||||||
)
|
)
|
||||||
|
|
||||||
var httpSession = util.GetHTTPSession()
|
|
||||||
|
|
||||||
var ShortExtractor = &models.Extractor{
|
var ShortExtractor = &models.Extractor{
|
||||||
Name: "Twitter (Short)",
|
Name: "Twitter (Short)",
|
||||||
CodeName: "twitter:short",
|
CodeName: "twitter_short",
|
||||||
Type: enums.ExtractorTypeSingle,
|
Type: enums.ExtractorTypeSingle,
|
||||||
Category: enums.ExtractorCategorySocial,
|
Category: enums.ExtractorCategorySocial,
|
||||||
URLPattern: regexp.MustCompile(`https?://t\.co/(?P<id>\w+)`),
|
URLPattern: regexp.MustCompile(`https?://t\.co/(?P<id>\w+)`),
|
||||||
Host: []string{"t.co"},
|
Host: []string{"t.co"},
|
||||||
IsRedirect: true,
|
IsRedirect: true,
|
||||||
|
Client: util.GetHTTPSession("twitter_short"),
|
||||||
|
|
||||||
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
||||||
req, err := http.NewRequest(http.MethodGet, ctx.MatchedContentURL, nil)
|
req, err := http.NewRequest(http.MethodGet, ctx.MatchedContentURL, nil)
|
||||||
|
@ -35,7 +34,7 @@ var ShortExtractor = &models.Extractor{
|
||||||
return nil, fmt.Errorf("failed to create req: %w", err)
|
return nil, fmt.Errorf("failed to create req: %w", err)
|
||||||
}
|
}
|
||||||
req.Header.Set("User-Agent", util.ChromeUA)
|
req.Header.Set("User-Agent", util.ChromeUA)
|
||||||
res, err := httpSession.Do(req)
|
res, err := ctx.Extractor.Client.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to send request: %w", err)
|
return nil, fmt.Errorf("failed to send request: %w", err)
|
||||||
}
|
}
|
||||||
|
@ -66,6 +65,7 @@ var Extractor = &models.Extractor{
|
||||||
"vxx.com",
|
"vxx.com",
|
||||||
"vxtwitter.com",
|
"vxtwitter.com",
|
||||||
},
|
},
|
||||||
|
Client: util.GetHTTPSession("twitter"),
|
||||||
|
|
||||||
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
||||||
mediaList, err := MediaListFromAPI(ctx)
|
mediaList, err := MediaListFromAPI(ctx)
|
||||||
|
@ -81,7 +81,7 @@ var Extractor = &models.Extractor{
|
||||||
func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
|
func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
|
||||||
var mediaList []*models.Media
|
var mediaList []*models.Media
|
||||||
|
|
||||||
tweetData, err := GetTweetAPI(ctx.MatchedContentID)
|
tweetData, err := GetTweetAPI(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to get tweet data: %w", err)
|
return nil, fmt.Errorf("failed to get tweet data: %w", err)
|
||||||
}
|
}
|
||||||
|
@ -129,7 +129,8 @@ func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
|
||||||
return mediaList, nil
|
return mediaList, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetTweetAPI(tweetID string) (*Tweet, error) {
|
func GetTweetAPI(ctx *models.DownloadContext) (*Tweet, error) {
|
||||||
|
tweetID := ctx.MatchedContentID
|
||||||
cookies, err := util.ParseCookieFile("twitter.txt")
|
cookies, err := util.ParseCookieFile("twitter.txt")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to get cookies: %w", err)
|
return nil, fmt.Errorf("failed to get cookies: %w", err)
|
||||||
|
@ -159,7 +160,7 @@ func GetTweetAPI(tweetID string) (*Tweet, error) {
|
||||||
}
|
}
|
||||||
req.URL.RawQuery = q.Encode()
|
req.URL.RawQuery = q.Encode()
|
||||||
|
|
||||||
resp, err := httpSession.Do(req)
|
resp, err := ctx.Extractor.Client.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to send request: %w", err)
|
return nil, fmt.Errorf("failed to send request: %w", err)
|
||||||
}
|
}
|
||||||
|
|
1
go.mod
1
go.mod
|
@ -26,6 +26,7 @@ require (
|
||||||
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
|
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
|
||||||
golang.org/x/arch v0.0.0-20210923205945-b76863e36670 // indirect
|
golang.org/x/arch v0.0.0-20210923205945-b76863e36670 // indirect
|
||||||
gopkg.in/yaml.v2 v2.4.0 // indirect
|
gopkg.in/yaml.v2 v2.4.0 // indirect
|
||||||
|
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||||
)
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
|
|
5
main.go
5
main.go
|
@ -3,6 +3,7 @@ package main
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"govd/bot"
|
"govd/bot"
|
||||||
|
"govd/config"
|
||||||
"govd/database"
|
"govd/database"
|
||||||
"govd/util"
|
"govd/util"
|
||||||
"log"
|
"log"
|
||||||
|
@ -20,6 +21,10 @@ func main() {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatal("error loading .env file")
|
log.Fatal("error loading .env file")
|
||||||
}
|
}
|
||||||
|
err = config.LoadExtractorConfigs()
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("error loading extractor configs: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
profilerPort, err := strconv.Atoi(os.Getenv("PROFILER_PORT"))
|
profilerPort, err := strconv.Atoi(os.Getenv("PROFILER_PORT"))
|
||||||
if err == nil && profilerPort > 0 {
|
if err == nil && profilerPort > 0 {
|
||||||
|
|
|
@ -14,6 +14,7 @@ type Extractor struct {
|
||||||
Host []string
|
Host []string
|
||||||
IsDRM bool
|
IsDRM bool
|
||||||
IsRedirect bool
|
IsRedirect bool
|
||||||
|
Client HTTPClient
|
||||||
|
|
||||||
Run func(*DownloadContext) (*ExtractorResponse, error)
|
Run func(*DownloadContext) (*ExtractorResponse, error)
|
||||||
}
|
}
|
||||||
|
@ -33,3 +34,10 @@ func (extractor *Extractor) NewMedia(
|
||||||
ExtractorCodeName: extractor.CodeName,
|
ExtractorCodeName: extractor.CodeName,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type ExtractorConfig struct {
|
||||||
|
HTTPProxy string `yaml:"http_proxy"`
|
||||||
|
HTTPSProxy string `yaml:"https_proxy"`
|
||||||
|
NoProxy string `yaml:"no_proxy"`
|
||||||
|
EdgeProxyURL string `yaml:"edge_proxy_url"`
|
||||||
|
}
|
||||||
|
|
7
models/http.go
Normal file
7
models/http.go
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
package models
|
||||||
|
|
||||||
|
import "net/http"
|
||||||
|
|
||||||
|
type HTTPClient interface {
|
||||||
|
Do(req *http.Request) (*http.Response, error)
|
||||||
|
}
|
|
@ -21,6 +21,8 @@ import (
|
||||||
"github.com/google/uuid"
|
"github.com/google/uuid"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var downloadHTTPSession = GetDefaultHTTPSession()
|
||||||
|
|
||||||
func DefaultConfig() *models.DownloadConfig {
|
func DefaultConfig() *models.DownloadConfig {
|
||||||
downloadsDir := os.Getenv("DOWNLOADS_DIR")
|
downloadsDir := os.Getenv("DOWNLOADS_DIR")
|
||||||
if downloadsDir == "" {
|
if downloadsDir == "" {
|
||||||
|
@ -171,7 +173,7 @@ func downloadInMemory(
|
||||||
return nil, fmt.Errorf("failed to create request: %w", err)
|
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
resp, err := httpSession.Do(req)
|
resp, err := downloadHTTPSession.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to download file: %w", err)
|
return nil, fmt.Errorf("failed to download file: %w", err)
|
||||||
}
|
}
|
||||||
|
@ -362,7 +364,7 @@ func getFileSize(ctx context.Context, fileURL string, timeout time.Duration) (in
|
||||||
return 0, fmt.Errorf("failed to create request: %w", err)
|
return 0, fmt.Errorf("failed to create request: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
resp, err := httpSession.Do(req)
|
resp, err := downloadHTTPSession.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, fmt.Errorf("failed to get file size: %w", err)
|
return 0, fmt.Errorf("failed to get file size: %w", err)
|
||||||
}
|
}
|
||||||
|
@ -419,7 +421,7 @@ func downloadChunk(
|
||||||
}
|
}
|
||||||
req.Header.Add("Range", fmt.Sprintf("bytes=%d-%d", chunk[0], chunk[1]))
|
req.Header.Add("Range", fmt.Sprintf("bytes=%d-%d", chunk[0], chunk[1]))
|
||||||
|
|
||||||
resp, err := httpSession.Do(req)
|
resp, err := downloadHTTPSession.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("download failed: %w", err)
|
return nil, fmt.Errorf("download failed: %w", err)
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,111 +0,0 @@
|
||||||
package util
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"encoding/json"
|
|
||||||
"fmt"
|
|
||||||
"govd/models"
|
|
||||||
"io"
|
|
||||||
"net/http"
|
|
||||||
"net/url"
|
|
||||||
"os"
|
|
||||||
"sync"
|
|
||||||
"time"
|
|
||||||
)
|
|
||||||
|
|
||||||
var (
|
|
||||||
edgeProxyClient *EdgeProxyClient
|
|
||||||
edgeProxyClientOnce sync.Once
|
|
||||||
)
|
|
||||||
|
|
||||||
type EdgeProxyClient struct {
|
|
||||||
*http.Client
|
|
||||||
}
|
|
||||||
|
|
||||||
func GetEdgeProxyClient() *EdgeProxyClient {
|
|
||||||
edgeProxyClientOnce.Do(func() {
|
|
||||||
edgeProxyClient = &EdgeProxyClient{
|
|
||||||
Client: &http.Client{
|
|
||||||
Transport: baseTransport,
|
|
||||||
Timeout: 60 * time.Second,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
})
|
|
||||||
return edgeProxyClient
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *EdgeProxyClient) Do(req *http.Request) (*http.Response, error) {
|
|
||||||
proxyURL := os.Getenv("EDGE_PROXY_URL")
|
|
||||||
if proxyURL == "" {
|
|
||||||
return nil, fmt.Errorf("EDGE_PROXY_URL environment variable is not set")
|
|
||||||
}
|
|
||||||
targetURL := req.URL.String()
|
|
||||||
encodedURL := url.QueryEscape(targetURL)
|
|
||||||
proxyURLWithParam := proxyURL + "?url=" + encodedURL
|
|
||||||
|
|
||||||
var bodyBytes []byte
|
|
||||||
var err error
|
|
||||||
|
|
||||||
if req.Body != nil {
|
|
||||||
bodyBytes, err = io.ReadAll(req.Body)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("error reading request body: %w", err)
|
|
||||||
}
|
|
||||||
req.Body.Close()
|
|
||||||
req.Body = io.NopCloser(bytes.NewBuffer(bodyBytes))
|
|
||||||
}
|
|
||||||
|
|
||||||
proxyReq, err := http.NewRequest(
|
|
||||||
req.Method,
|
|
||||||
proxyURLWithParam,
|
|
||||||
bytes.NewBuffer(bodyBytes),
|
|
||||||
)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("error creating proxy request: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
for name, values := range req.Header {
|
|
||||||
for _, value := range values {
|
|
||||||
proxyReq.Header.Add(name, value)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
proxyResp, err := c.Client.Do(proxyReq)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("proxy request failed: %w", err)
|
|
||||||
}
|
|
||||||
defer proxyResp.Body.Close()
|
|
||||||
|
|
||||||
body, err := io.ReadAll(proxyResp.Body)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("error reading proxy response: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
var response models.ProxyResponse
|
|
||||||
if err := json.Unmarshal(body, &response); err != nil {
|
|
||||||
return nil, fmt.Errorf("error parsing proxy response: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
resp := &http.Response{
|
|
||||||
StatusCode: response.StatusCode,
|
|
||||||
Status: fmt.Sprintf("%d %s", response.StatusCode, http.StatusText(response.StatusCode)),
|
|
||||||
Body: io.NopCloser(bytes.NewBufferString(response.Text)),
|
|
||||||
Header: make(http.Header),
|
|
||||||
Request: req,
|
|
||||||
}
|
|
||||||
parsedResponseURL, err := url.Parse(response.URL)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("error parsing response URL: %w", err)
|
|
||||||
}
|
|
||||||
resp.Request.URL = parsedResponseURL
|
|
||||||
|
|
||||||
for name, value := range response.Headers {
|
|
||||||
resp.Header.Set(name, value)
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, cookie := range response.Cookies {
|
|
||||||
resp.Header.Add("Set-Cookie", cookie)
|
|
||||||
}
|
|
||||||
|
|
||||||
return resp, nil
|
|
||||||
}
|
|
205
util/http.go
205
util/http.go
|
@ -1,16 +1,46 @@
|
||||||
package util
|
package util
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"govd/config"
|
||||||
|
"govd/models"
|
||||||
|
"io"
|
||||||
|
"log"
|
||||||
"net"
|
"net"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"net/url"
|
||||||
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type EdgeProxyClient struct {
|
||||||
|
*http.Client
|
||||||
|
|
||||||
|
proxyURL string
|
||||||
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
httpSession *http.Client
|
httpSession *http.Client
|
||||||
httpSessionOnce sync.Once
|
httpSessionOnce sync.Once
|
||||||
baseTransport = &http.Transport{
|
|
||||||
|
extractorsHttpSession = make(map[string]models.HTTPClient)
|
||||||
|
)
|
||||||
|
|
||||||
|
func GetDefaultHTTPSession() *http.Client {
|
||||||
|
httpSessionOnce.Do(func() {
|
||||||
|
httpSession = &http.Client{
|
||||||
|
Transport: GetBaseTransport(),
|
||||||
|
Timeout: 60 * time.Second,
|
||||||
|
}
|
||||||
|
})
|
||||||
|
return httpSession
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetBaseTransport() *http.Transport {
|
||||||
|
return &http.Transport{
|
||||||
Proxy: http.ProxyFromEnvironment,
|
Proxy: http.ProxyFromEnvironment,
|
||||||
DialContext: (&net.Dialer{
|
DialContext: (&net.Dialer{
|
||||||
Timeout: 30 * time.Second,
|
Timeout: 30 * time.Second,
|
||||||
|
@ -26,14 +56,167 @@ var (
|
||||||
ResponseHeaderTimeout: 10 * time.Second,
|
ResponseHeaderTimeout: 10 * time.Second,
|
||||||
DisableCompression: false,
|
DisableCompression: false,
|
||||||
}
|
}
|
||||||
)
|
}
|
||||||
|
|
||||||
func GetHTTPSession() *http.Client {
|
func GetHTTPSession(extractor string) models.HTTPClient {
|
||||||
httpSessionOnce.Do(func() {
|
if client, ok := extractorsHttpSession[extractor]; ok {
|
||||||
httpSession = &http.Client{
|
return client
|
||||||
Transport: baseTransport,
|
}
|
||||||
Timeout: 60 * time.Second,
|
|
||||||
}
|
cfg := config.GetExtractorConfig(extractor)
|
||||||
})
|
if cfg == nil {
|
||||||
return httpSession
|
return GetDefaultHTTPSession()
|
||||||
|
}
|
||||||
|
|
||||||
|
if cfg.EdgeProxyURL != "" {
|
||||||
|
client := GetEdgeProxyClient(cfg.EdgeProxyURL)
|
||||||
|
extractorsHttpSession[extractor] = client
|
||||||
|
return client
|
||||||
|
}
|
||||||
|
|
||||||
|
transport := GetBaseTransport()
|
||||||
|
client := &http.Client{
|
||||||
|
Transport: transport,
|
||||||
|
Timeout: 60 * time.Second,
|
||||||
|
}
|
||||||
|
|
||||||
|
if cfg.HTTPProxy == "" && cfg.HTTPSProxy == "" {
|
||||||
|
extractorsHttpSession[extractor] = client
|
||||||
|
return client
|
||||||
|
}
|
||||||
|
|
||||||
|
var httpProxyURL, httpsProxyURL *url.URL
|
||||||
|
var err error
|
||||||
|
|
||||||
|
if cfg.HTTPProxy != "" {
|
||||||
|
if httpProxyURL, err = url.Parse(cfg.HTTPProxy); err != nil {
|
||||||
|
log.Printf("warning: invalid HTTP proxy URL '%s': %v\n", cfg.HTTPProxy, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if cfg.HTTPSProxy != "" {
|
||||||
|
if httpsProxyURL, err = url.Parse(cfg.HTTPSProxy); err != nil {
|
||||||
|
log.Printf("warning: invalid HTTPS proxy URL '%s': %v\n", cfg.HTTPSProxy, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if httpProxyURL != nil || httpsProxyURL != nil {
|
||||||
|
noProxyList := strings.Split(cfg.NoProxy, ",")
|
||||||
|
for i := range noProxyList {
|
||||||
|
noProxyList[i] = strings.TrimSpace(noProxyList[i])
|
||||||
|
}
|
||||||
|
|
||||||
|
transport.Proxy = func(req *http.Request) (*url.URL, error) {
|
||||||
|
if cfg.NoProxy != "" {
|
||||||
|
host := req.URL.Hostname()
|
||||||
|
for _, p := range noProxyList {
|
||||||
|
if p == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if p == host || (strings.HasPrefix(p, ".") && strings.HasSuffix(host, p)) {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if req.URL.Scheme == "https" && httpsProxyURL != nil {
|
||||||
|
return httpsProxyURL, nil
|
||||||
|
}
|
||||||
|
if req.URL.Scheme == "http" && httpProxyURL != nil {
|
||||||
|
return httpProxyURL, nil
|
||||||
|
}
|
||||||
|
if httpsProxyURL != nil {
|
||||||
|
return httpsProxyURL, nil
|
||||||
|
}
|
||||||
|
return httpProxyURL, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
extractorsHttpSession[extractor] = client
|
||||||
|
return client
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetEdgeProxyClient(proxyURL string) *EdgeProxyClient {
|
||||||
|
edgeProxyClient := &EdgeProxyClient{
|
||||||
|
Client: &http.Client{
|
||||||
|
Transport: GetBaseTransport(),
|
||||||
|
Timeout: 60 * time.Second,
|
||||||
|
},
|
||||||
|
proxyURL: proxyURL,
|
||||||
|
}
|
||||||
|
return edgeProxyClient
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *EdgeProxyClient) Do(req *http.Request) (*http.Response, error) {
|
||||||
|
if c.proxyURL == "" {
|
||||||
|
return nil, fmt.Errorf("proxy URL is not set")
|
||||||
|
}
|
||||||
|
targetURL := req.URL.String()
|
||||||
|
encodedURL := url.QueryEscape(targetURL)
|
||||||
|
proxyURLWithParam := c.proxyURL + "?url=" + encodedURL
|
||||||
|
|
||||||
|
var bodyBytes []byte
|
||||||
|
var err error
|
||||||
|
|
||||||
|
if req.Body != nil {
|
||||||
|
bodyBytes, err = io.ReadAll(req.Body)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("error reading request body: %w", err)
|
||||||
|
}
|
||||||
|
req.Body.Close()
|
||||||
|
req.Body = io.NopCloser(bytes.NewBuffer(bodyBytes))
|
||||||
|
}
|
||||||
|
|
||||||
|
proxyReq, err := http.NewRequest(
|
||||||
|
req.Method,
|
||||||
|
proxyURLWithParam,
|
||||||
|
bytes.NewBuffer(bodyBytes),
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("error creating proxy request: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
for name, values := range req.Header {
|
||||||
|
for _, value := range values {
|
||||||
|
proxyReq.Header.Add(name, value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
proxyResp, err := c.Client.Do(proxyReq)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("proxy request failed: %w", err)
|
||||||
|
}
|
||||||
|
defer proxyResp.Body.Close()
|
||||||
|
|
||||||
|
body, err := io.ReadAll(proxyResp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("error reading proxy response: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var response models.ProxyResponse
|
||||||
|
if err := json.Unmarshal(body, &response); err != nil {
|
||||||
|
return nil, fmt.Errorf("error parsing proxy response: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
resp := &http.Response{
|
||||||
|
StatusCode: response.StatusCode,
|
||||||
|
Status: fmt.Sprintf("%d %s", response.StatusCode, http.StatusText(response.StatusCode)),
|
||||||
|
Body: io.NopCloser(bytes.NewBufferString(response.Text)),
|
||||||
|
Header: make(http.Header),
|
||||||
|
Request: req,
|
||||||
|
}
|
||||||
|
parsedResponseURL, err := url.Parse(response.URL)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("error parsing response URL: %w", err)
|
||||||
|
}
|
||||||
|
resp.Request.URL = parsedResponseURL
|
||||||
|
|
||||||
|
for name, value := range response.Headers {
|
||||||
|
resp.Header.Set(name, value)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, cookie := range response.Cookies {
|
||||||
|
resp.Header.Add("Set-Cookie", cookie)
|
||||||
|
}
|
||||||
|
|
||||||
|
return resp, nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,7 +29,7 @@ func GetLocationURL(
|
||||||
userAgent = ChromeUA
|
userAgent = ChromeUA
|
||||||
}
|
}
|
||||||
req.Header.Set("User-Agent", userAgent)
|
req.Header.Set("User-Agent", userAgent)
|
||||||
session := GetHTTPSession()
|
session := GetDefaultHTTPSession()
|
||||||
resp, err := session.Do(req)
|
resp, err := session.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", fmt.Errorf("failed to send request: %w", err)
|
return "", fmt.Errorf("failed to send request: %w", err)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue