Init
This commit is contained in:
parent
264c97183e
commit
3faede7b1c
74 changed files with 6228 additions and 1 deletions
169
ext/instagram/main.go
Normal file
169
ext/instagram/main.go
Normal file
|
@ -0,0 +1,169 @@
|
|||
package instagram
|
||||
|
||||
import (
|
||||
"crypto/tls"
|
||||
"fmt"
|
||||
"govd/enums"
|
||||
"govd/models"
|
||||
"govd/util"
|
||||
"io"
|
||||
"net/http"
|
||||
"regexp"
|
||||
|
||||
"github.com/quic-go/quic-go"
|
||||
"github.com/quic-go/quic-go/http3"
|
||||
)
|
||||
|
||||
// as a public service, we can't use the official API
|
||||
// so we use igram.world API, a third-party service
|
||||
// that provides a similar functionality
|
||||
// feel free to open PR, if you want to
|
||||
// add support for the official Instagram API
|
||||
|
||||
const (
|
||||
apiHostname = "api.igram.world"
|
||||
apiKey = "aaeaf2805cea6abef3f9d2b6a666fce62fd9d612a43ab772bb50ce81455112e0"
|
||||
apiTimestamp = "1742201548873"
|
||||
|
||||
// todo: Implement a proper way
|
||||
// to get the API key and timestamp
|
||||
)
|
||||
|
||||
var HTTPClient = &http.Client{
|
||||
Transport: &http3.Transport{
|
||||
TLSClientConfig: &tls.Config{
|
||||
InsecureSkipVerify: true,
|
||||
},
|
||||
QUICConfig: &quic.Config{
|
||||
MaxIncomingStreams: -1,
|
||||
EnableDatagrams: true,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
var Extractor = &models.Extractor{
|
||||
Name: "Instagram",
|
||||
CodeName: "instagram",
|
||||
Type: enums.ExtractorTypeSingle,
|
||||
Category: enums.ExtractorCategorySocial,
|
||||
URLPattern: regexp.MustCompile(`https:\/\/www\.instagram\.com\/(reel|p|tv)\/(?P<id>[a-zA-Z0-9_-]+)`),
|
||||
IsRedirect: false,
|
||||
|
||||
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
||||
mediaList, err := MediaListFromAPI(ctx, false)
|
||||
return &models.ExtractorResponse{
|
||||
MediaList: mediaList,
|
||||
}, err
|
||||
},
|
||||
}
|
||||
|
||||
var StoriesExtractor = &models.Extractor{
|
||||
Name: "Instagram Stories",
|
||||
CodeName: "instagram:stories",
|
||||
Type: enums.ExtractorTypeSingle,
|
||||
Category: enums.ExtractorCategorySocial,
|
||||
URLPattern: regexp.MustCompile(`https:\/\/www\.instagram\.com\/stories\/[a-zA-Z0-9._]+\/(?P<id>\d+)`),
|
||||
IsRedirect: false,
|
||||
|
||||
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
||||
mediaList, err := MediaListFromAPI(ctx, true)
|
||||
return &models.ExtractorResponse{
|
||||
MediaList: mediaList,
|
||||
}, err
|
||||
},
|
||||
}
|
||||
|
||||
func MediaListFromAPI(
|
||||
ctx *models.DownloadContext,
|
||||
stories bool,
|
||||
) ([]*models.Media, error) {
|
||||
var mediaList []*models.Media
|
||||
postURL := ctx.MatchedContentURL
|
||||
details, err := GetVideoAPI(postURL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get post: %w", err)
|
||||
}
|
||||
var caption string
|
||||
if !stories {
|
||||
caption, err = GetPostCaption(postURL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get caption: %w", err)
|
||||
}
|
||||
}
|
||||
for _, item := range details.Items {
|
||||
media := ctx.Extractor.NewMedia(
|
||||
ctx.MatchedContentID,
|
||||
ctx.MatchedContentURL,
|
||||
)
|
||||
media.SetCaption(caption)
|
||||
urlObj := item.URL[0]
|
||||
contentURL, err := GetCDNURL(urlObj.URL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
thumbnailURL, err := GetCDNURL(item.Thumb)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
fileExt := urlObj.Ext
|
||||
formatID := urlObj.Type
|
||||
switch fileExt {
|
||||
case "mp4":
|
||||
media.AddFormat(&models.MediaFormat{
|
||||
Type: enums.MediaTypeVideo,
|
||||
FormatID: formatID,
|
||||
URL: []string{contentURL},
|
||||
VideoCodec: enums.MediaCodecAVC,
|
||||
AudioCodec: enums.MediaCodecAAC,
|
||||
Thumbnail: []string{thumbnailURL},
|
||||
},
|
||||
)
|
||||
case "jpg", "webp", "heic", "jpeg":
|
||||
media.AddFormat(&models.MediaFormat{
|
||||
Type: enums.MediaTypePhoto,
|
||||
FormatID: formatID,
|
||||
URL: []string{contentURL},
|
||||
})
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown format: %s", fileExt)
|
||||
}
|
||||
mediaList = append(mediaList, media)
|
||||
}
|
||||
|
||||
return mediaList, nil
|
||||
}
|
||||
|
||||
func GetVideoAPI(contentURL string) (*IGramResponse, error) {
|
||||
apiURL := fmt.Sprintf(
|
||||
"https://%s/api/convert",
|
||||
apiHostname,
|
||||
)
|
||||
payload, err := BuildSignedPayload(contentURL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to build signed payload: %w", err)
|
||||
}
|
||||
req, err := http.NewRequest("POST", apiURL, payload)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("User-Agent", util.ChromeUA)
|
||||
|
||||
resp, err := HTTPClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to send request: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("failed to get response: %s", resp.Status)
|
||||
}
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read response body: %w", err)
|
||||
}
|
||||
response, err := ParseIGramResponse(body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse response: %w", err)
|
||||
}
|
||||
return response, nil
|
||||
}
|
19
ext/instagram/models.go
Normal file
19
ext/instagram/models.go
Normal file
|
@ -0,0 +1,19 @@
|
|||
package instagram
|
||||
|
||||
type IGramResponse struct {
|
||||
Items []*IGramMedia `json:"items"`
|
||||
}
|
||||
|
||||
type IGramMedia struct {
|
||||
URL []*MediaURL `json:"url"`
|
||||
Thumb string `json:"thumb"`
|
||||
Hosting string `json:"hosting"`
|
||||
Timestamp int `json:"timestamp"`
|
||||
}
|
||||
|
||||
type MediaURL struct {
|
||||
URL string `json:"url"`
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type"`
|
||||
Ext string `json:"ext"`
|
||||
}
|
139
ext/instagram/util.go
Normal file
139
ext/instagram/util.go
Normal file
|
@ -0,0 +1,139 @@
|
|||
package instagram
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"govd/util"
|
||||
"html"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
var captionPattern = regexp.MustCompile(
|
||||
`(?s)<meta property="og:title" content=".*?: "(.*?)""`,
|
||||
)
|
||||
|
||||
func BuildSignedPayload(contentURL string) (io.Reader, error) {
|
||||
timestamp := fmt.Sprintf("%d", time.Now().UnixMilli())
|
||||
hash := sha256.New()
|
||||
_, err := io.WriteString(
|
||||
hash,
|
||||
contentURL+timestamp+apiKey,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error writing to SHA256 hash: %w", err)
|
||||
}
|
||||
secretBytes := hash.Sum(nil)
|
||||
secretString := fmt.Sprintf("%x", secretBytes)
|
||||
secretString = strings.ToLower(secretString)
|
||||
payload := map[string]string{
|
||||
"url": contentURL,
|
||||
"ts": timestamp,
|
||||
"_ts": apiTimestamp,
|
||||
"_tsc": "0", // ?
|
||||
"_s": secretString,
|
||||
}
|
||||
parsedPayload, err := json.Marshal(payload)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error marshalling payload: %w", err)
|
||||
}
|
||||
reader := strings.NewReader(string(parsedPayload))
|
||||
return reader, nil
|
||||
}
|
||||
|
||||
func ParseIGramResponse(body []byte) (*IGramResponse, error) {
|
||||
var rawResponse interface{}
|
||||
if err := json.Unmarshal(body, &rawResponse); err != nil {
|
||||
return nil, fmt.Errorf("failed to decode response: %w", err)
|
||||
}
|
||||
switch rawResponse.(type) {
|
||||
case []interface{}:
|
||||
// array of IGramMedia
|
||||
var media []*IGramMedia
|
||||
if err := json.Unmarshal(body, &media); err != nil {
|
||||
return nil, fmt.Errorf("failed to decode response: %w", err)
|
||||
}
|
||||
return &IGramResponse{
|
||||
Items: media,
|
||||
}, nil
|
||||
case map[string]interface{}:
|
||||
// single IGramMedia
|
||||
var media IGramMedia
|
||||
if err := json.Unmarshal(body, &media); err != nil {
|
||||
return nil, fmt.Errorf("failed to decode response: %w", err)
|
||||
}
|
||||
return &IGramResponse{
|
||||
Items: []*IGramMedia{&media},
|
||||
}, nil
|
||||
default:
|
||||
return nil, fmt.Errorf("unexpected response type: %T", rawResponse)
|
||||
}
|
||||
}
|
||||
|
||||
func GetCDNURL(contentURL string) (string, error) {
|
||||
parsedUrl, err := url.Parse(contentURL)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("can't parse igram URL: %v", err)
|
||||
}
|
||||
queryParams, err := url.ParseQuery(parsedUrl.RawQuery)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("can't unescape igram URL: %v", err)
|
||||
}
|
||||
cdnURL := queryParams.Get("uri")
|
||||
return cdnURL, nil
|
||||
}
|
||||
|
||||
func GetPostCaption(
|
||||
postURL string,
|
||||
) (string, error) {
|
||||
req, err := http.NewRequest(
|
||||
http.MethodGet,
|
||||
postURL,
|
||||
nil,
|
||||
)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
req.Header.Set("User-Agent", util.ChromeUA)
|
||||
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
|
||||
req.Header.Set("Accept-Language", "it-IT,it;q=0.8,en-US;q=0.5,en;q=0.3")
|
||||
req.Header.Set("Referer", "https://www.instagram.com/accounts/onetap/?next=%2F")
|
||||
req.Header.Set("Alt-Used", "www.instagram.com")
|
||||
req.Header.Set("Connection", "keep-alive")
|
||||
req.Header.Set("Cookie", `csrftoken=Ib2Zuvf1y9HkDwXFxkdang; sessionid=8569455296%3AIFQiov2eYfTdSd%3A19%3AAYfVHnaxecWGWhyzxvz60vu5qLn05DyKgN_tTZUXTA; ds_user_id=8569455296; mid=Z_j1vQAEAAGVUE3KuxMR7vBonGBw; ig_did=BC48C8B7-D71B-49EF-8195-F9DE37A57B49; rur="CLN\0548569455296\0541775905137:01f7ebda5b896815e9279bb86a572db6bdc8ebccf3e1f8d5327e2bc5ca187fd5cd932b66"; wd=513x594; datr=x_X4Z_CHqpwtjaRKq7PtCNu3`)
|
||||
req.Header.Set("Upgrade-Insecure-Requests", "1")
|
||||
req.Header.Set("Sec-Fetch-Dest", "document")
|
||||
req.Header.Set("Sec-Fetch-Mode", "navigate")
|
||||
req.Header.Set("Sec-Fetch-Site", "same-origin")
|
||||
req.Header.Set("Priority", "u=0, i")
|
||||
req.Header.Set("Pragma", "no-cache")
|
||||
req.Header.Set("Cache-Control", "no-cache")
|
||||
req.Header.Set("TE", "trailers")
|
||||
|
||||
resp, err := HTTPClient.Do(req)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to send request: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return "", fmt.Errorf("failed to get response: %s", resp.Status)
|
||||
}
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to read response body: %w", err)
|
||||
}
|
||||
|
||||
matches := captionPattern.FindStringSubmatch(string(body))
|
||||
if len(matches) < 2 {
|
||||
// post has no caption most likely
|
||||
return "", nil
|
||||
} else {
|
||||
return html.UnescapeString(matches[1]), nil
|
||||
}
|
||||
}
|
24
ext/main.go
Normal file
24
ext/main.go
Normal file
|
@ -0,0 +1,24 @@
|
|||
package ext
|
||||
|
||||
import (
|
||||
"govd/ext/instagram"
|
||||
"govd/ext/pinterest"
|
||||
"govd/ext/reddit"
|
||||
"govd/ext/tiktok"
|
||||
"govd/ext/twitter"
|
||||
"govd/models"
|
||||
)
|
||||
|
||||
var List = []*models.Extractor{
|
||||
tiktok.Extractor,
|
||||
tiktok.VMExtractor,
|
||||
instagram.Extractor,
|
||||
instagram.StoriesExtractor,
|
||||
twitter.Extractor,
|
||||
twitter.ShortExtractor,
|
||||
pinterest.Extractor,
|
||||
pinterest.ShortExtractor,
|
||||
reddit.Extractor,
|
||||
reddit.ShortExtractor,
|
||||
// todo: add every ext lol
|
||||
}
|
172
ext/pinterest/main.go
Normal file
172
ext/pinterest/main.go
Normal file
|
@ -0,0 +1,172 @@
|
|||
package pinterest
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"regexp"
|
||||
|
||||
"govd/enums"
|
||||
"govd/models"
|
||||
"govd/util"
|
||||
)
|
||||
|
||||
const (
|
||||
pinResourceEndpoint = "https://www.pinterest.com/resource/PinResource/get/"
|
||||
shortenerAPIFormat = "https://api.pinterest.com/url_shortener/%s/redirect/"
|
||||
)
|
||||
|
||||
var ShortExtractor = &models.Extractor{
|
||||
Name: "Pinterest (Short)",
|
||||
CodeName: "pinterest:short",
|
||||
Type: enums.ExtractorTypeSingle,
|
||||
Category: enums.ExtractorCategorySocial,
|
||||
URLPattern: regexp.MustCompile(`https?://(\w+\.)?pin\.\w+/(?P<id>\w+)`),
|
||||
IsRedirect: true,
|
||||
|
||||
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
||||
shortURL := fmt.Sprintf(shortenerAPIFormat, ctx.MatchedContentID)
|
||||
location, err := util.GetLocationURL(shortURL, "")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get real url: %w", err)
|
||||
}
|
||||
return &models.ExtractorResponse{
|
||||
URL: location,
|
||||
}, nil
|
||||
},
|
||||
}
|
||||
|
||||
var Extractor = &models.Extractor{
|
||||
Name: "Pinterest",
|
||||
CodeName: "pinterest",
|
||||
Type: enums.ExtractorTypeSingle,
|
||||
Category: enums.ExtractorCategorySocial,
|
||||
URLPattern: regexp.MustCompile(`https?://(\w+\.)?pinterest[\.\w]+/pin/(?P<id>\d+)`),
|
||||
|
||||
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
||||
media, err := ExtractPinMedia(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &models.ExtractorResponse{
|
||||
MediaList: media,
|
||||
}, nil
|
||||
},
|
||||
}
|
||||
|
||||
func ExtractPinMedia(ctx *models.DownloadContext) ([]*models.Media, error) {
|
||||
pinID := ctx.MatchedContentID
|
||||
contentURL := ctx.MatchedContentURL
|
||||
|
||||
pinData, err := GetPinData(pinID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
media := ctx.Extractor.NewMedia(pinID, contentURL)
|
||||
media.SetCaption(pinData.Title)
|
||||
|
||||
if pinData.Videos != nil && pinData.Videos.VideoList != nil {
|
||||
formats, err := ParseVideoObject(pinData.Videos)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, format := range formats {
|
||||
media.AddFormat(format)
|
||||
}
|
||||
return []*models.Media{media}, nil
|
||||
}
|
||||
|
||||
if pinData.StoryPinData != nil && len(pinData.StoryPinData.Pages) > 0 {
|
||||
for _, page := range pinData.StoryPinData.Pages {
|
||||
for _, block := range page.Blocks {
|
||||
if block.BlockType == 3 && block.Video != nil { // blockType 3 = Video
|
||||
formats, err := ParseVideoObject(block.Video)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, format := range formats {
|
||||
media.AddFormat(format)
|
||||
}
|
||||
return []*models.Media{media}, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if pinData.Images != nil && pinData.Images.Orig != nil {
|
||||
imageURL := pinData.Images.Orig.URL
|
||||
media.AddFormat(&models.MediaFormat{
|
||||
FormatID: "photo",
|
||||
Type: enums.MediaTypePhoto,
|
||||
URL: []string{imageURL},
|
||||
})
|
||||
return []*models.Media{media}, nil
|
||||
} else if pinData.StoryPinData != nil && len(pinData.StoryPinData.Pages) > 0 {
|
||||
for _, page := range pinData.StoryPinData.Pages {
|
||||
if page.Image != nil && page.Image.Images.Originals != nil {
|
||||
media.AddFormat(&models.MediaFormat{
|
||||
FormatID: "photo",
|
||||
Type: enums.MediaTypePhoto,
|
||||
URL: []string{page.Image.Images.Originals.URL},
|
||||
})
|
||||
return []*models.Media{media}, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if pinData.Embed != nil && pinData.Embed.Type == "gif" {
|
||||
media.AddFormat(&models.MediaFormat{
|
||||
FormatID: "gif",
|
||||
Type: enums.MediaTypeVideo,
|
||||
VideoCodec: enums.MediaCodecAVC,
|
||||
URL: []string{pinData.Embed.Src},
|
||||
})
|
||||
return []*models.Media{media}, nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("no media found for pin ID: %s", pinID)
|
||||
}
|
||||
|
||||
func GetPinData(pinID string) (*PinData, error) {
|
||||
params := BuildPinRequestParams(pinID)
|
||||
|
||||
req, err := http.NewRequest("GET", pinResourceEndpoint, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
q := req.URL.Query()
|
||||
for key, value := range params {
|
||||
q.Add(key, value)
|
||||
}
|
||||
req.URL.RawQuery = q.Encode()
|
||||
req.Header.Set("User-Agent", util.ChromeUA)
|
||||
|
||||
// fix 403 error
|
||||
req.Header.Set("X-Pinterest-PWS-Handler", "www/[username].js")
|
||||
|
||||
client := &http.Client{}
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to send request: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("bad response: %s", resp.Status)
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read response body: %w", err)
|
||||
}
|
||||
|
||||
var pinResponse PinResponse
|
||||
err = json.Unmarshal(body, &pinResponse)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse response: %w", err)
|
||||
}
|
||||
|
||||
return &pinResponse.ResourceResponse.Data, nil
|
||||
}
|
62
ext/pinterest/models.go
Normal file
62
ext/pinterest/models.go
Normal file
|
@ -0,0 +1,62 @@
|
|||
package pinterest
|
||||
|
||||
type PinResponse struct {
|
||||
ResourceResponse struct {
|
||||
Data PinData `json:"data"`
|
||||
} `json:"resource_response"`
|
||||
}
|
||||
|
||||
type PinData struct {
|
||||
ID string `json:"id"`
|
||||
Title string `json:"title"`
|
||||
Description string `json:"description"`
|
||||
Images *Images `json:"images,omitempty"`
|
||||
Videos *Videos `json:"videos,omitempty"`
|
||||
StoryPinData *StoryPin `json:"story_pin_data,omitempty"`
|
||||
Embed *Embed `json:"embed,omitempty"`
|
||||
}
|
||||
|
||||
type Images struct {
|
||||
Orig *ImageObject `json:"orig"`
|
||||
}
|
||||
|
||||
type ImageObject struct {
|
||||
URL string `json:"url"`
|
||||
Width int `json:"width"`
|
||||
Height int `json:"height"`
|
||||
}
|
||||
|
||||
type Videos struct {
|
||||
VideoList map[string]*VideoObject `json:"video_list"`
|
||||
}
|
||||
|
||||
type VideoObject struct {
|
||||
URL string `json:"url"`
|
||||
Width int64 `json:"width"`
|
||||
Height int64 `json:"height"`
|
||||
Duration int64 `json:"duration"`
|
||||
Thumbnail string `json:"thumbnail"`
|
||||
}
|
||||
|
||||
type StoryPin struct {
|
||||
Pages []Page `json:"pages"`
|
||||
}
|
||||
|
||||
type Page struct {
|
||||
Blocks []Block `json:"blocks"`
|
||||
Image *struct {
|
||||
Images struct {
|
||||
Originals *ImageObject `json:"originals"`
|
||||
} `json:"images"`
|
||||
} `json:"image,omitempty"`
|
||||
}
|
||||
|
||||
type Block struct {
|
||||
BlockType int `json:"block_type"`
|
||||
Video *Videos `json:"video,omitempty"`
|
||||
}
|
||||
|
||||
type Embed struct {
|
||||
Type string `json:"type"`
|
||||
Src string `json:"src"`
|
||||
}
|
55
ext/pinterest/util.go
Normal file
55
ext/pinterest/util.go
Normal file
|
@ -0,0 +1,55 @@
|
|||
package pinterest
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"govd/enums"
|
||||
"govd/models"
|
||||
"govd/util/parser"
|
||||
)
|
||||
|
||||
func ParseVideoObject(videoObj *Videos) ([]*models.MediaFormat, error) {
|
||||
var formats []*models.MediaFormat
|
||||
|
||||
for key, video := range videoObj.VideoList {
|
||||
if key != "HLS" {
|
||||
formats = append(formats, &models.MediaFormat{
|
||||
FormatID: key,
|
||||
URL: []string{video.URL},
|
||||
Type: enums.MediaTypeVideo,
|
||||
VideoCodec: enums.MediaCodecAVC,
|
||||
AudioCodec: enums.MediaCodecAAC,
|
||||
Width: video.Width,
|
||||
Height: video.Height,
|
||||
Duration: video.Duration / 1000,
|
||||
Thumbnail: []string{video.Thumbnail},
|
||||
})
|
||||
} else {
|
||||
hlsFormats, err := parser.ParseM3U8FromURL(video.URL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to extract hls formats: %w", err)
|
||||
}
|
||||
for _, hlsFormat := range hlsFormats {
|
||||
hlsFormat.Duration = video.Duration / 1000
|
||||
hlsFormat.Thumbnail = []string{video.Thumbnail}
|
||||
formats = append(formats, hlsFormat)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return formats, nil
|
||||
}
|
||||
|
||||
func BuildPinRequestParams(pinID string) map[string]string {
|
||||
options := map[string]interface{}{
|
||||
"options": map[string]interface{}{
|
||||
"field_set_key": "unauth_react_main_pin",
|
||||
"id": pinID,
|
||||
},
|
||||
}
|
||||
|
||||
jsonData, _ := json.Marshal(options)
|
||||
return map[string]string{
|
||||
"data": string(jsonData),
|
||||
}
|
||||
}
|
267
ext/reddit/main.go
Normal file
267
ext/reddit/main.go
Normal file
|
@ -0,0 +1,267 @@
|
|||
package reddit
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"regexp"
|
||||
|
||||
"govd/enums"
|
||||
"govd/models"
|
||||
"govd/util"
|
||||
)
|
||||
|
||||
var HTTPClient = &http.Client{
|
||||
CheckRedirect: func(req *http.Request, via []*http.Request) error {
|
||||
if len(via) >= 10 {
|
||||
return fmt.Errorf("stopped after 10 redirects")
|
||||
}
|
||||
return nil
|
||||
},
|
||||
}
|
||||
|
||||
var ShortExtractor = &models.Extractor{
|
||||
Name: "Reddit (Short)",
|
||||
CodeName: "reddit:short",
|
||||
Type: enums.ExtractorTypeSingle,
|
||||
Category: enums.ExtractorCategorySocial,
|
||||
URLPattern: regexp.MustCompile(`https?://(?P<host>(?:\w+\.)?reddit(?:media)?\.com)/(?P<slug>(?:(?:r|user)/[^/]+/)?s/(?P<id>[^/?#&]+))`),
|
||||
IsRedirect: true,
|
||||
|
||||
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
||||
req, err := http.NewRequest("GET", ctx.MatchedContentURL, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
|
||||
req.Header.Set("User-Agent", util.ChromeUA)
|
||||
cookies, err := util.ParseCookieFile("reddit.txt")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get cookies: %w", err)
|
||||
}
|
||||
for _, cookie := range cookies {
|
||||
req.AddCookie(cookie)
|
||||
}
|
||||
|
||||
res, err := HTTPClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to send request: %w", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
location := res.Request.URL.String()
|
||||
|
||||
return &models.ExtractorResponse{
|
||||
URL: location,
|
||||
}, nil
|
||||
},
|
||||
}
|
||||
|
||||
var Extractor = &models.Extractor{
|
||||
Name: "Reddit",
|
||||
CodeName: "reddit",
|
||||
Type: enums.ExtractorTypeSingle,
|
||||
Category: enums.ExtractorCategorySocial,
|
||||
URLPattern: regexp.MustCompile(`https?://(?P<host>(?:\w+\.)?reddit(?:media)?\.com)/(?P<slug>(?:(?:r|user)/[^/]+/)?comments/(?P<id>[^/?#&]+))`),
|
||||
|
||||
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
||||
mediaList, err := MediaListFromAPI(ctx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get media: %w", err)
|
||||
}
|
||||
return &models.ExtractorResponse{
|
||||
MediaList: mediaList,
|
||||
}, nil
|
||||
},
|
||||
}
|
||||
|
||||
func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
|
||||
host := ctx.MatchedGroups["host"]
|
||||
slug := ctx.MatchedGroups["slug"]
|
||||
|
||||
contentID := ctx.MatchedContentID
|
||||
contentURL := ctx.MatchedContentURL
|
||||
|
||||
manifest, err := GetRedditData(host, slug)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(manifest) == 0 || len(manifest[0].Data.Children) == 0 {
|
||||
return nil, fmt.Errorf("no data found in response")
|
||||
}
|
||||
|
||||
data := manifest[0].Data.Children[0].Data
|
||||
title := data.Title
|
||||
isNsfw := data.Over18
|
||||
var mediaList []*models.Media
|
||||
|
||||
if !data.IsVideo {
|
||||
// check for single photo
|
||||
if data.Preview != nil && len(data.Preview.Images) > 0 {
|
||||
media := ctx.Extractor.NewMedia(contentID, contentURL)
|
||||
media.SetCaption(title)
|
||||
if isNsfw {
|
||||
media.NSFW = true
|
||||
}
|
||||
|
||||
image := data.Preview.Images[0]
|
||||
|
||||
// check for video preview (GIF)
|
||||
if data.Preview.RedditVideoPreview != nil {
|
||||
formats, err := GetHLSFormats(
|
||||
data.Preview.RedditVideoPreview.FallbackURL,
|
||||
image.Source.URL,
|
||||
data.Preview.RedditVideoPreview.Duration,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, format := range formats {
|
||||
media.AddFormat(format)
|
||||
}
|
||||
|
||||
mediaList = append(mediaList, media)
|
||||
return mediaList, nil
|
||||
}
|
||||
|
||||
// check for MP4 variant (animated GIF)
|
||||
if image.Variants.MP4 != nil {
|
||||
media.AddFormat(&models.MediaFormat{
|
||||
FormatID: "gif",
|
||||
Type: enums.MediaTypeVideo,
|
||||
VideoCodec: enums.MediaCodecAVC,
|
||||
AudioCodec: enums.MediaCodecAAC,
|
||||
URL: []string{util.FixURL(image.Variants.MP4.Source.URL)},
|
||||
Thumbnail: []string{util.FixURL(image.Source.URL)},
|
||||
})
|
||||
|
||||
mediaList = append(mediaList, media)
|
||||
return mediaList, nil
|
||||
}
|
||||
|
||||
// regular photo
|
||||
media.AddFormat(&models.MediaFormat{
|
||||
FormatID: "photo",
|
||||
Type: enums.MediaTypePhoto,
|
||||
URL: []string{util.FixURL(image.Source.URL)},
|
||||
})
|
||||
|
||||
mediaList = append(mediaList, media)
|
||||
return mediaList, nil
|
||||
}
|
||||
|
||||
// check for gallery/collection
|
||||
if len(data.MediaMetadata) > 0 {
|
||||
for key, obj := range data.MediaMetadata {
|
||||
if obj.E == "Image" {
|
||||
media := ctx.Extractor.NewMedia(key, contentURL)
|
||||
media.SetCaption(title)
|
||||
if isNsfw {
|
||||
media.NSFW = true
|
||||
}
|
||||
|
||||
media.AddFormat(&models.MediaFormat{
|
||||
FormatID: "photo",
|
||||
Type: enums.MediaTypePhoto,
|
||||
URL: []string{util.FixURL(obj.S.U)},
|
||||
})
|
||||
|
||||
mediaList = append(mediaList, media)
|
||||
}
|
||||
}
|
||||
|
||||
return mediaList, nil
|
||||
}
|
||||
} else {
|
||||
// video
|
||||
media := ctx.Extractor.NewMedia(contentID, contentURL)
|
||||
media.SetCaption(title)
|
||||
if isNsfw {
|
||||
media.NSFW = true
|
||||
}
|
||||
|
||||
var redditVideo *RedditVideo
|
||||
|
||||
if data.Media != nil && data.Media.RedditVideo != nil {
|
||||
redditVideo = data.Media.RedditVideo
|
||||
} else if data.SecureMedia != nil && data.SecureMedia.RedditVideo != nil {
|
||||
redditVideo = data.SecureMedia.RedditVideo
|
||||
}
|
||||
|
||||
if redditVideo != nil {
|
||||
thumbnail := data.Thumbnail
|
||||
|
||||
if (thumbnail == "nsfw" || thumbnail == "spoiler") && data.Preview != nil && len(data.Preview.Images) > 0 {
|
||||
thumbnail = data.Preview.Images[0].Source.URL
|
||||
}
|
||||
|
||||
formats, err := GetHLSFormats(
|
||||
redditVideo.FallbackURL,
|
||||
thumbnail,
|
||||
redditVideo.Duration,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, format := range formats {
|
||||
media.AddFormat(format)
|
||||
}
|
||||
|
||||
mediaList = append(mediaList, media)
|
||||
return mediaList, nil
|
||||
}
|
||||
}
|
||||
|
||||
return mediaList, nil
|
||||
}
|
||||
|
||||
func GetRedditData(host string, slug string) (RedditResponse, error) {
|
||||
url := fmt.Sprintf("https://%s/%s/.json", host, slug)
|
||||
|
||||
req, err := http.NewRequest("GET", url, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
|
||||
req.Header.Set("User-Agent", util.ChromeUA)
|
||||
cookies, err := util.ParseCookieFile("reddit.txt")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get cookies: %w", err)
|
||||
}
|
||||
for _, cookie := range cookies {
|
||||
req.AddCookie(cookie)
|
||||
}
|
||||
|
||||
res, err := HTTPClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to send request: %w", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
if res.StatusCode != http.StatusOK {
|
||||
// try with alternative domain
|
||||
altHost := "old.reddit.com"
|
||||
if host == "old.reddit.com" {
|
||||
altHost = "www.reddit.com"
|
||||
}
|
||||
|
||||
return GetRedditData(altHost, slug)
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(res.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read response body: %w", err)
|
||||
}
|
||||
|
||||
var response RedditResponse
|
||||
err = json.Unmarshal(body, &response)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse response: %w", err)
|
||||
}
|
||||
|
||||
return response, nil
|
||||
}
|
74
ext/reddit/models.go
Normal file
74
ext/reddit/models.go
Normal file
|
@ -0,0 +1,74 @@
|
|||
package reddit
|
||||
|
||||
type RedditResponse []struct {
|
||||
Data struct {
|
||||
Children []struct {
|
||||
Data PostData `json:"data"`
|
||||
} `json:"children"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
type PostData struct {
|
||||
ID string `json:"id"`
|
||||
Title string `json:"title"`
|
||||
IsVideo bool `json:"is_video"`
|
||||
Thumbnail string `json:"thumbnail"`
|
||||
Media *Media `json:"media"`
|
||||
Preview *Preview `json:"preview"`
|
||||
MediaMetadata map[string]MediaMetadata `json:"media_metadata"`
|
||||
SecureMedia *Media `json:"secure_media"`
|
||||
Over18 bool `json:"over_18"`
|
||||
}
|
||||
|
||||
type Media struct {
|
||||
RedditVideo *RedditVideo `json:"reddit_video"`
|
||||
}
|
||||
|
||||
type RedditVideo struct {
|
||||
FallbackURL string `json:"fallback_url"`
|
||||
HLSURL string `json:"hls_url"`
|
||||
DashURL string `json:"dash_url"`
|
||||
Duration int64 `json:"duration"`
|
||||
Height int64 `json:"height"`
|
||||
Width int64 `json:"width"`
|
||||
ScrubberMediaURL string `json:"scrubber_media_url"`
|
||||
}
|
||||
|
||||
type Preview struct {
|
||||
Images []Image `json:"images"`
|
||||
RedditVideoPreview *RedditVideoPreview `json:"reddit_video_preview"`
|
||||
}
|
||||
|
||||
type Image struct {
|
||||
Source ImageSource `json:"source"`
|
||||
Variants ImageVariants `json:"variants"`
|
||||
}
|
||||
|
||||
type ImageSource struct {
|
||||
URL string `json:"url"`
|
||||
Width int64 `json:"width"`
|
||||
Height int64 `json:"height"`
|
||||
}
|
||||
|
||||
type ImageVariants struct {
|
||||
MP4 *MP4Variant `json:"mp4"`
|
||||
}
|
||||
|
||||
type MP4Variant struct {
|
||||
Source ImageSource `json:"source"`
|
||||
}
|
||||
|
||||
type RedditVideoPreview struct {
|
||||
FallbackURL string `json:"fallback_url"`
|
||||
Duration int64 `json:"duration"`
|
||||
}
|
||||
|
||||
type MediaMetadata struct {
|
||||
Status string `json:"status"`
|
||||
E string `json:"e"`
|
||||
S struct {
|
||||
U string `json:"u"`
|
||||
X int64 `json:"x"`
|
||||
Y int64 `json:"y"`
|
||||
} `json:"s"`
|
||||
}
|
39
ext/reddit/util.go
Normal file
39
ext/reddit/util.go
Normal file
|
@ -0,0 +1,39 @@
|
|||
package reddit
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"govd/models"
|
||||
"govd/util"
|
||||
"govd/util/parser"
|
||||
"regexp"
|
||||
)
|
||||
|
||||
const (
|
||||
hlsURLFormat = "https://v.redd.it/%s/HLSPlaylist.m3u8"
|
||||
)
|
||||
|
||||
var videoURLPattern = regexp.MustCompile(`https?://v\.redd\.it/([^/]+)`)
|
||||
|
||||
func GetHLSFormats(videoURL string, thumbnail string, duration int64) ([]*models.MediaFormat, error) {
|
||||
matches := videoURLPattern.FindStringSubmatch(videoURL)
|
||||
if len(matches) < 2 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
videoID := matches[1]
|
||||
hlsURL := fmt.Sprintf(hlsURLFormat, videoID)
|
||||
|
||||
formats, err := parser.ParseM3U8FromURL(hlsURL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, format := range formats {
|
||||
format.Duration = duration
|
||||
if thumbnail != "" {
|
||||
format.Thumbnail = []string{util.FixURL(thumbnail)}
|
||||
}
|
||||
}
|
||||
|
||||
return formats, nil
|
||||
}
|
184
ext/tiktok/main.go
Normal file
184
ext/tiktok/main.go
Normal file
|
@ -0,0 +1,184 @@
|
|||
package tiktok
|
||||
|
||||
import (
|
||||
"crypto/tls"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"regexp"
|
||||
|
||||
"github.com/quic-go/quic-go"
|
||||
"github.com/quic-go/quic-go/http3"
|
||||
|
||||
"govd/enums"
|
||||
"govd/models"
|
||||
"govd/util"
|
||||
)
|
||||
|
||||
const (
|
||||
apiHostname = "api16-normal-c-useast1a.tiktokv.com"
|
||||
installationID = "7127307272354596614"
|
||||
appName = "musical_ly"
|
||||
appID = "1233"
|
||||
appVersion = "37.1.4"
|
||||
manifestAppVersion = "2023508030"
|
||||
packageID = "com.zhiliaoapp.musically/" + manifestAppVersion
|
||||
appUserAgent = packageID + " (Linux; U; Android 13; en_US; Pixel 7; Build/TD1A.220804.031; Cronet/58.0.2991.0)"
|
||||
)
|
||||
|
||||
var HTTPClient = &http.Client{
|
||||
Transport: &http3.Transport{
|
||||
TLSClientConfig: &tls.Config{
|
||||
InsecureSkipVerify: true,
|
||||
},
|
||||
QUICConfig: &quic.Config{
|
||||
MaxIncomingStreams: -1,
|
||||
EnableDatagrams: true,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
var VMExtractor = &models.Extractor{
|
||||
Name: "TikTok VM",
|
||||
CodeName: "tiktokvm",
|
||||
Type: enums.ExtractorTypeSingle,
|
||||
Category: enums.ExtractorCategorySocial,
|
||||
URLPattern: regexp.MustCompile(`https:\/\/((?:vm|vt|www)\.)?(vx)?tiktok\.com\/(?:t\/)?(?P<id>[a-zA-Z0-9]+)`),
|
||||
IsRedirect: true,
|
||||
|
||||
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
||||
location, err := util.GetLocationURL(ctx.MatchedContentURL, "")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get url location: %w", err)
|
||||
}
|
||||
return &models.ExtractorResponse{
|
||||
URL: location,
|
||||
}, nil
|
||||
},
|
||||
}
|
||||
|
||||
var Extractor = &models.Extractor{
|
||||
Name: "TikTok",
|
||||
CodeName: "tiktok",
|
||||
Type: enums.ExtractorTypeSingle,
|
||||
Category: enums.ExtractorCategorySocial,
|
||||
URLPattern: regexp.MustCompile(`https?:\/\/((www|m)\.)?(vx)?tiktok\.com\/((?:embed|@[\w\.-]+)\/)?(v(ideo)?|p(hoto)?)\/(?P<id>[0-9]+)`),
|
||||
|
||||
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
||||
mediaList, err := MediaListFromAPI(ctx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get media: %w", err)
|
||||
}
|
||||
return &models.ExtractorResponse{
|
||||
MediaList: mediaList,
|
||||
}, nil
|
||||
},
|
||||
}
|
||||
|
||||
func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
|
||||
var mediaList []*models.Media
|
||||
|
||||
details, err := GetVideoAPI(ctx.MatchedContentID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get from api: %w", err)
|
||||
}
|
||||
caption := details.Desc
|
||||
isImageSlide := details.ImagePostInfo != nil
|
||||
if !isImageSlide {
|
||||
media := ctx.Extractor.NewMedia(
|
||||
ctx.MatchedContentID,
|
||||
ctx.MatchedContentURL,
|
||||
)
|
||||
media.SetCaption(caption)
|
||||
video := details.Video
|
||||
|
||||
// generic PlayAddr
|
||||
if video.PlayAddr != nil {
|
||||
format, err := ParsePlayAddr(video, video.PlayAddr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse playaddr: %w", err)
|
||||
}
|
||||
media.AddFormat(format)
|
||||
}
|
||||
// hevc PlayAddr
|
||||
if video.PlayAddrBytevc1 != nil {
|
||||
format, err := ParsePlayAddr(video, video.PlayAddrBytevc1)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse playaddr: %w", err)
|
||||
}
|
||||
media.AddFormat(format)
|
||||
}
|
||||
// h264 PlayAddr
|
||||
if video.PlayAddrH264 != nil {
|
||||
format, err := ParsePlayAddr(video, video.PlayAddrH264)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse playaddr: %w", err)
|
||||
}
|
||||
media.AddFormat(format)
|
||||
}
|
||||
mediaList = append(mediaList, media)
|
||||
} else {
|
||||
images := details.ImagePostInfo.Images
|
||||
for _, image := range images {
|
||||
media := ctx.Extractor.NewMedia(
|
||||
ctx.MatchedContentID,
|
||||
ctx.MatchedContentURL,
|
||||
)
|
||||
media.SetCaption(caption)
|
||||
media.AddFormat(&models.MediaFormat{
|
||||
FormatID: "image",
|
||||
Type: enums.MediaTypePhoto,
|
||||
URL: image.DisplayImage.URLList,
|
||||
})
|
||||
mediaList = append(mediaList, media)
|
||||
}
|
||||
}
|
||||
return mediaList, nil
|
||||
}
|
||||
|
||||
func GetVideoAPI(awemeID string) (*AwemeDetails, error) {
|
||||
apiURL := fmt.Sprintf(
|
||||
"https://%s/aweme/v1/multi/aweme/detail/",
|
||||
apiHostname,
|
||||
)
|
||||
queryParams, err := BuildAPIQuery()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to build api query: %w", err)
|
||||
}
|
||||
postData := BuildPostData(awemeID)
|
||||
|
||||
req, err := http.NewRequest(
|
||||
http.MethodPost,
|
||||
apiURL,
|
||||
postData,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
req.URL.RawQuery = queryParams.Encode()
|
||||
req.Header.Set("User-Agent", appUserAgent)
|
||||
req.Header.Set("Accept", "application/json")
|
||||
req.Header.Set("X-Argus", "")
|
||||
|
||||
resp, err := HTTPClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to send request: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read response body: %w", err)
|
||||
}
|
||||
|
||||
var data *Response
|
||||
err = json.Unmarshal(body, &data)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to unmarshal response: %w", err)
|
||||
}
|
||||
videoData, err := FindVideoData(data, awemeID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to find video data: %w", err)
|
||||
}
|
||||
return videoData, nil
|
||||
}
|
65
ext/tiktok/models.go
Normal file
65
ext/tiktok/models.go
Normal file
|
@ -0,0 +1,65 @@
|
|||
package tiktok
|
||||
|
||||
type Response struct {
|
||||
AwemeDetails []AwemeDetails `json:"aweme_details"`
|
||||
StatusCode int `json:"status_code"`
|
||||
StatusMsg string `json:"status_msg"`
|
||||
}
|
||||
|
||||
type Cover struct {
|
||||
Height int64 `json:"height"`
|
||||
URI string `json:"uri"`
|
||||
URLList []string `json:"url_list"`
|
||||
URLPrefix any `json:"url_prefix"`
|
||||
Width int64 `json:"width"`
|
||||
}
|
||||
|
||||
type PlayAddr struct {
|
||||
DataSize int64 `json:"data_size"`
|
||||
FileCs string `json:"file_cs"`
|
||||
FileHash string `json:"file_hash"`
|
||||
Height int64 `json:"height"`
|
||||
URI string `json:"uri"`
|
||||
URLKey string `json:"url_key"`
|
||||
URLList []string `json:"url_list"`
|
||||
Width int64 `json:"width"`
|
||||
}
|
||||
|
||||
type Image struct {
|
||||
DisplayImage *DisplayImage `json:"display_image"`
|
||||
}
|
||||
|
||||
type DisplayImage struct {
|
||||
Height int `json:"height"`
|
||||
URI string `json:"uri"`
|
||||
URLList []string `json:"url_list"`
|
||||
URLPrefix any `json:"url_prefix"`
|
||||
Width int `json:"width"`
|
||||
}
|
||||
|
||||
type ImagePostInfo struct {
|
||||
Images []Image `json:"images"`
|
||||
MusicVolume float64 `json:"music_volume"`
|
||||
PostExtra string `json:"post_extra"`
|
||||
Title string `json:"title"`
|
||||
}
|
||||
|
||||
type Video struct {
|
||||
CdnURLExpired int64 `json:"cdn_url_expired"`
|
||||
Cover Cover `json:"cover"`
|
||||
Duration int64 `json:"duration"`
|
||||
HasWatermark bool `json:"has_watermark"`
|
||||
Height int64 `json:"height"`
|
||||
PlayAddr *PlayAddr `json:"play_addr"`
|
||||
PlayAddrBytevc1 *PlayAddr `json:"play_addr_bytevc1"`
|
||||
PlayAddrH264 *PlayAddr `json:"play_addr_h264"`
|
||||
Width int64 `json:"width"`
|
||||
}
|
||||
|
||||
type AwemeDetails struct {
|
||||
AwemeID string `json:"aweme_id"`
|
||||
AwemeType int `json:"aweme_type"`
|
||||
Desc string `json:"desc"`
|
||||
Video *Video `json:"video"`
|
||||
ImagePostInfo *ImagePostInfo `json:"image_post_info"`
|
||||
}
|
177
ext/tiktok/util.go
Normal file
177
ext/tiktok/util.go
Normal file
|
@ -0,0 +1,177 @@
|
|||
package tiktok
|
||||
|
||||
import (
|
||||
"crypto/rand"
|
||||
"fmt"
|
||||
"math/big"
|
||||
"net/url"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
|
||||
"govd/enums"
|
||||
"govd/models"
|
||||
"govd/util"
|
||||
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
func BuildAPIQuery() (url.Values, error) {
|
||||
requestTicket := strconv.Itoa(int(time.Now().Unix()) * 1000)
|
||||
clientDeviceID := uuid.New().String()
|
||||
versionCode, err := GetAppVersionCode(appVersion)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get app version code: %w", err)
|
||||
}
|
||||
return url.Values{
|
||||
"device_platform": []string{"android"},
|
||||
"os": []string{"android"},
|
||||
"ssmix": []string{"0"}, // what is this?
|
||||
"_rticket": []string{requestTicket},
|
||||
"cdid": []string{clientDeviceID},
|
||||
"channel": []string{"googleplay"},
|
||||
"aid": []string{appID},
|
||||
"app_name": []string{appName},
|
||||
"version_code": []string{versionCode},
|
||||
"version_name": []string{appVersion},
|
||||
"manifest_version_code": []string{manifestAppVersion},
|
||||
"update_version_code": []string{manifestAppVersion},
|
||||
"ab_version": []string{appVersion},
|
||||
"resolution": []string{"1080*2400"},
|
||||
"dpi": []string{"420"},
|
||||
"device_type": []string{"Pixel 7"},
|
||||
"device_brand": []string{"Google"},
|
||||
"language": []string{"en"},
|
||||
"os_api": []string{"29"},
|
||||
"os_version": []string{"13"},
|
||||
"ac": []string{"wifi"},
|
||||
"is_pad": []string{"0"},
|
||||
"current_region": []string{"US"},
|
||||
"app_type": []string{"normal"},
|
||||
"last_install_time": []string{GetRandomInstallTime()},
|
||||
"timezone_name": []string{"America/New_York"},
|
||||
"residence": []string{"US"},
|
||||
"app_language": []string{"en"},
|
||||
"timezone_offset": []string{"-14400"},
|
||||
"host_abi": []string{"armeabi-v7a"},
|
||||
"locale": []string{"en"},
|
||||
"ac2": []string{"wifi5g"},
|
||||
"uoo": []string{"1"}, // what is this?
|
||||
"carrier_region": []string{"US"},
|
||||
"build_number": []string{appVersion},
|
||||
"region": []string{"US"},
|
||||
"ts": []string{strconv.Itoa(int(time.Now().Unix()))},
|
||||
"iid": []string{installationID},
|
||||
"device_id": []string{GetRandomDeviceID()},
|
||||
"openudid": []string{GetRandomUdid()},
|
||||
}, nil
|
||||
}
|
||||
|
||||
func ParsePlayAddr(
|
||||
video *Video,
|
||||
playAddr *PlayAddr,
|
||||
) (*models.MediaFormat, error) {
|
||||
formatID := playAddr.URLKey
|
||||
if formatID == "" {
|
||||
return nil, errors.New("url_key not found")
|
||||
}
|
||||
videoCodec := enums.MediaCodecHEVC
|
||||
if strings.Contains(formatID, "h264") {
|
||||
videoCodec = enums.MediaCodecAVC
|
||||
}
|
||||
videoURL := playAddr.URLList
|
||||
videoDuration := video.Duration / 1000
|
||||
videoWidth := playAddr.Width
|
||||
videoHeight := playAddr.Height
|
||||
videoCover := &video.Cover
|
||||
videoThumbnailURLs := videoCover.URLList
|
||||
|
||||
return &models.MediaFormat{
|
||||
Type: enums.MediaTypeVideo,
|
||||
FormatID: formatID,
|
||||
URL: videoURL,
|
||||
VideoCodec: videoCodec,
|
||||
AudioCodec: enums.MediaCodecAAC,
|
||||
Duration: videoDuration,
|
||||
Thumbnail: videoThumbnailURLs,
|
||||
Width: videoWidth,
|
||||
Height: videoHeight,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func GetRandomInstallTime() string {
|
||||
currentTime := int(time.Now().Unix())
|
||||
minOffset := big.NewInt(86400)
|
||||
maxOffset := big.NewInt(1123200)
|
||||
diff := new(big.Int).Sub(maxOffset, minOffset)
|
||||
randomOffset, _ := rand.Int(rand.Reader, diff)
|
||||
randomOffset.Add(randomOffset, minOffset)
|
||||
result := currentTime - int(randomOffset.Int64())
|
||||
return strconv.Itoa(result)
|
||||
}
|
||||
|
||||
func GetRandomUdid() string {
|
||||
const charset = "0123456789abcdef"
|
||||
result := make([]byte, 16)
|
||||
|
||||
for i := range result {
|
||||
index, _ := rand.Int(rand.Reader, big.NewInt(int64(len(charset))))
|
||||
result[i] = charset[index.Int64()]
|
||||
}
|
||||
return string(result)
|
||||
}
|
||||
|
||||
func GetRandomDeviceID() string {
|
||||
minNum := big.NewInt(7250000000000000000)
|
||||
maxNum := big.NewInt(7351147085025500000)
|
||||
diff := new(big.Int).Sub(maxNum, minNum)
|
||||
randNum, _ := rand.Int(rand.Reader, diff)
|
||||
result := new(big.Int).Add(randNum, minNum)
|
||||
return result.String()
|
||||
}
|
||||
|
||||
func BuildPostData(awemeID string) *strings.Reader {
|
||||
data := url.Values{
|
||||
"aweme_ids": []string{fmt.Sprintf("[%s]", awemeID)},
|
||||
"request_source": []string{"0"},
|
||||
}
|
||||
return strings.NewReader(data.Encode())
|
||||
|
||||
}
|
||||
|
||||
func GetAppVersionCode(version string) (string, error) {
|
||||
parts := strings.Split(version, ".")
|
||||
|
||||
var result strings.Builder
|
||||
for _, part := range parts {
|
||||
num, err := strconv.Atoi(part)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to parse version part: %w", err)
|
||||
}
|
||||
_, err = fmt.Fprintf(&result, "%02d", num)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to format version part: %w", err)
|
||||
}
|
||||
}
|
||||
return result.String(), nil
|
||||
}
|
||||
|
||||
func FindVideoData(
|
||||
resp *Response,
|
||||
expectedAwemeID string,
|
||||
) (*AwemeDetails, error) {
|
||||
if resp.StatusCode == 2053 {
|
||||
return nil, util.ErrUnavailable
|
||||
}
|
||||
if resp.AwemeDetails == nil {
|
||||
return nil, errors.New("aweme_details is nil")
|
||||
}
|
||||
for _, item := range resp.AwemeDetails {
|
||||
if item.AwemeID == expectedAwemeID {
|
||||
return &item, nil
|
||||
}
|
||||
}
|
||||
return nil, errors.New("matching aweme_id not found")
|
||||
}
|
181
ext/twitter/main.go
Normal file
181
ext/twitter/main.go
Normal file
|
@ -0,0 +1,181 @@
|
|||
package twitter
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"regexp"
|
||||
|
||||
"govd/enums"
|
||||
"govd/models"
|
||||
"govd/util"
|
||||
)
|
||||
|
||||
const (
|
||||
apiHostname = "x.com"
|
||||
apiEndpoint = "https://x.com/i/api/graphql/zZXycP0V6H7m-2r0mOnFcA/TweetDetail"
|
||||
)
|
||||
|
||||
var HTTPClient = &http.Client{}
|
||||
|
||||
var ShortExtractor = &models.Extractor{
|
||||
Name: "Twitter (Short)",
|
||||
CodeName: "twitter:short",
|
||||
Type: enums.ExtractorTypeSingle,
|
||||
Category: enums.ExtractorCategorySocial,
|
||||
URLPattern: regexp.MustCompile(`https?://t\.co/(?P<id>\w+)`),
|
||||
IsRedirect: true,
|
||||
|
||||
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
||||
req, err := http.NewRequest("GET", ctx.MatchedContentURL, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create req: %w", err)
|
||||
}
|
||||
req.Header.Set("User-Agent", util.ChromeUA)
|
||||
res, err := HTTPClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to send request: %w", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
body, err := io.ReadAll(res.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read body: %w", err)
|
||||
}
|
||||
matchedURL := Extractor.URLPattern.FindStringSubmatch(string(body))
|
||||
if matchedURL == nil {
|
||||
return nil, fmt.Errorf("failed to find url in body")
|
||||
}
|
||||
return &models.ExtractorResponse{
|
||||
URL: matchedURL[0],
|
||||
}, nil
|
||||
},
|
||||
}
|
||||
|
||||
var Extractor = &models.Extractor{
|
||||
Name: "Twitter",
|
||||
CodeName: "twitter",
|
||||
Type: enums.ExtractorTypeSingle,
|
||||
Category: enums.ExtractorCategorySocial,
|
||||
URLPattern: regexp.MustCompile(`https?:\/\/(vx)?(twitter|x)\.com\/([^\/]+)\/status\/(?P<id>\d+)`),
|
||||
|
||||
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
||||
mediaList, err := MediaListFromAPI(ctx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get media: %w", err)
|
||||
}
|
||||
return &models.ExtractorResponse{
|
||||
MediaList: mediaList,
|
||||
}, nil
|
||||
},
|
||||
}
|
||||
|
||||
func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
|
||||
var mediaList []*models.Media
|
||||
|
||||
tweetData, err := GetTweetAPI(ctx.MatchedContentID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get tweet data: %w", err)
|
||||
}
|
||||
|
||||
caption := CleanCaption(tweetData.FullText)
|
||||
|
||||
var mediaEntities []MediaEntity
|
||||
if tweetData.ExtendedEntities != nil && len(tweetData.ExtendedEntities.Media) > 0 {
|
||||
mediaEntities = tweetData.ExtendedEntities.Media
|
||||
} else if tweetData.Entities != nil && len(tweetData.Entities.Media) > 0 {
|
||||
mediaEntities = tweetData.Entities.Media
|
||||
} else {
|
||||
return nil, fmt.Errorf("no media found in tweet")
|
||||
}
|
||||
|
||||
for _, mediaEntity := range mediaEntities {
|
||||
media := ctx.Extractor.NewMedia(
|
||||
ctx.MatchedContentID,
|
||||
ctx.MatchedContentURL,
|
||||
)
|
||||
media.SetCaption(caption)
|
||||
|
||||
switch mediaEntity.Type {
|
||||
case "video", "animated_gif":
|
||||
formats, err := ExtractVideoFormats(&mediaEntity)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, format := range formats {
|
||||
media.AddFormat(format)
|
||||
}
|
||||
case "photo":
|
||||
media.AddFormat(&models.MediaFormat{
|
||||
Type: enums.MediaTypePhoto,
|
||||
FormatID: "photo",
|
||||
URL: []string{mediaEntity.MediaURLHTTPS},
|
||||
})
|
||||
}
|
||||
|
||||
if len(media.Formats) > 0 {
|
||||
mediaList = append(mediaList, media)
|
||||
}
|
||||
}
|
||||
|
||||
return mediaList, nil
|
||||
}
|
||||
|
||||
func GetTweetAPI(tweetID string) (*Tweet, error) {
|
||||
cookies, err := util.ParseCookieFile("twitter.txt")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get cookies: %w", err)
|
||||
}
|
||||
headers := BuildAPIHeaders(cookies)
|
||||
if headers == nil {
|
||||
return nil, fmt.Errorf("failed to build headers. check cookies")
|
||||
}
|
||||
query := BuildAPIQuery(tweetID)
|
||||
|
||||
req, err := http.NewRequest("GET", apiEndpoint, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create req: %w", err)
|
||||
}
|
||||
|
||||
for key, value := range headers {
|
||||
req.Header.Set(key, value)
|
||||
}
|
||||
|
||||
for _, cookie := range cookies {
|
||||
req.AddCookie(cookie)
|
||||
}
|
||||
|
||||
q := req.URL.Query()
|
||||
for key, value := range query {
|
||||
q.Add(key, value)
|
||||
}
|
||||
req.URL.RawQuery = q.Encode()
|
||||
|
||||
resp, err := HTTPClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to send request: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("invalid response code: %s", resp.Status)
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read body: %w", err)
|
||||
}
|
||||
|
||||
var apiResponse APIResponse
|
||||
err = json.Unmarshal(body, &apiResponse)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse response: %w", err)
|
||||
}
|
||||
|
||||
tweet, err := FindTweetData(&apiResponse, tweetID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get tweet data: %w", err)
|
||||
}
|
||||
|
||||
return tweet, nil
|
||||
}
|
72
ext/twitter/models.go
Normal file
72
ext/twitter/models.go
Normal file
|
@ -0,0 +1,72 @@
|
|||
package twitter
|
||||
|
||||
type APIResponse struct {
|
||||
Data struct {
|
||||
ThreadedConversationWithInjectionsV2 struct {
|
||||
Instructions []struct {
|
||||
Entries []struct {
|
||||
EntryID string `json:"entryId"`
|
||||
Content struct {
|
||||
ItemContent struct {
|
||||
TweetResults struct {
|
||||
Result TweetResult `json:"result"`
|
||||
} `json:"tweet_results"`
|
||||
} `json:"itemContent"`
|
||||
} `json:"content"`
|
||||
} `json:"entries"`
|
||||
} `json:"instructions"`
|
||||
} `json:"threaded_conversation_with_injections_v2"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
type TweetResult struct {
|
||||
Tweet *Tweet `json:"tweet,omitempty"`
|
||||
Legacy *Tweet `json:"legacy,omitempty"`
|
||||
RestID string `json:"rest_id,omitempty"`
|
||||
Core *Core `json:"core,omitempty"`
|
||||
}
|
||||
|
||||
type Core struct {
|
||||
UserResults struct {
|
||||
Result struct {
|
||||
Legacy *UserLegacy `json:"legacy,omitempty"`
|
||||
} `json:"result"`
|
||||
} `json:"user_results"`
|
||||
}
|
||||
|
||||
type UserLegacy struct {
|
||||
ScreenName string `json:"screen_name"`
|
||||
Name string `json:"name"`
|
||||
}
|
||||
|
||||
type Tweet struct {
|
||||
FullText string `json:"full_text"`
|
||||
ExtendedEntities *ExtendedEntities `json:"extended_entities,omitempty"`
|
||||
Entities *ExtendedEntities `json:"entities,omitempty"`
|
||||
CreatedAt string `json:"created_at"`
|
||||
ID string `json:"id_str"`
|
||||
}
|
||||
|
||||
type ExtendedEntities struct {
|
||||
Media []MediaEntity `json:"media,omitempty"`
|
||||
}
|
||||
|
||||
type MediaEntity struct {
|
||||
Type string `json:"type"`
|
||||
MediaURLHTTPS string `json:"media_url_https"`
|
||||
ExpandedURL string `json:"expanded_url"`
|
||||
URL string `json:"url"`
|
||||
VideoInfo *VideoInfo `json:"video_info,omitempty"`
|
||||
}
|
||||
|
||||
type VideoInfo struct {
|
||||
DurationMillis int `json:"duration_millis"`
|
||||
Variants []Variant `json:"variants"`
|
||||
AspectRatio []int `json:"aspect_ratio"`
|
||||
}
|
||||
|
||||
type Variant struct {
|
||||
Bitrate int `json:"bitrate,omitempty"`
|
||||
ContentType string `json:"content_type"`
|
||||
URL string `json:"url"`
|
||||
}
|
162
ext/twitter/util.go
Normal file
162
ext/twitter/util.go
Normal file
|
@ -0,0 +1,162 @@
|
|||
package twitter
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"govd/enums"
|
||||
"govd/models"
|
||||
"govd/util"
|
||||
"net/http"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const authToken = "AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA"
|
||||
|
||||
var resolutionRegex = regexp.MustCompile(`(\d+)x(\d+)`)
|
||||
|
||||
func BuildAPIHeaders(cookies []*http.Cookie) map[string]string {
|
||||
var csrfToken string
|
||||
for _, cookie := range cookies {
|
||||
if cookie.Name == "ct0" {
|
||||
csrfToken = cookie.Value
|
||||
break
|
||||
}
|
||||
}
|
||||
if csrfToken == "" {
|
||||
return nil
|
||||
}
|
||||
headers := map[string]string{
|
||||
"authorization": fmt.Sprintf("Bearer %s", authToken),
|
||||
"user-agent": util.ChromeUA,
|
||||
"x-twitter-auth-type": "OAuth2Session",
|
||||
"x-twitter-client-language": "en",
|
||||
"x-twitter-active-user": "yes",
|
||||
}
|
||||
|
||||
if csrfToken != "" {
|
||||
headers["x-csrf-token"] = csrfToken
|
||||
}
|
||||
|
||||
return headers
|
||||
}
|
||||
|
||||
func BuildAPIQuery(tweetID string) map[string]string {
|
||||
variables := map[string]interface{}{
|
||||
"focalTweetId": tweetID,
|
||||
"includePromotedContent": true,
|
||||
"with_rux_injections": false,
|
||||
"withBirdwatchNotes": true,
|
||||
"withCommunity": true,
|
||||
"withDownvotePerspective": false,
|
||||
"withQuickPromoteEligibilityTweetFields": true,
|
||||
"withReactionsMetadata": false,
|
||||
"withReactionsPerspective": false,
|
||||
"withSuperFollowsTweetFields": true,
|
||||
"withSuperFollowsUserFields": true,
|
||||
"withV2Timeline": true,
|
||||
"withVoice": true,
|
||||
}
|
||||
|
||||
features := map[string]interface{}{
|
||||
"graphql_is_translatable_rweb_tweet_is_translatable_enabled": false,
|
||||
"interactive_text_enabled": true,
|
||||
"responsive_web_edit_tweet_api_enabled": true,
|
||||
"responsive_web_enhance_cards_enabled": true,
|
||||
"responsive_web_graphql_timeline_navigation_enabled": false,
|
||||
"responsive_web_text_conversations_enabled": false,
|
||||
"responsive_web_uc_gql_enabled": true,
|
||||
"standardized_nudges_misinfo": true,
|
||||
"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": false,
|
||||
"tweetypie_unmention_optimization_enabled": true,
|
||||
"unified_cards_ad_metadata_container_dynamic_card_content_query_enabled": true,
|
||||
"verified_phone_label_enabled": false,
|
||||
"vibe_api_enabled": true,
|
||||
}
|
||||
|
||||
variablesJSON, _ := json.Marshal(variables)
|
||||
featuresJSON, _ := json.Marshal(features)
|
||||
|
||||
return map[string]string{
|
||||
"variables": string(variablesJSON),
|
||||
"features": string(featuresJSON),
|
||||
}
|
||||
}
|
||||
|
||||
func CleanCaption(caption string) string {
|
||||
if caption == "" {
|
||||
return ""
|
||||
}
|
||||
regex := regexp.MustCompile(`https?://t\.co/\S+`)
|
||||
return strings.TrimSpace(regex.ReplaceAllString(caption, ""))
|
||||
}
|
||||
|
||||
func ExtractVideoFormats(media *MediaEntity) ([]*models.MediaFormat, error) {
|
||||
var formats []*models.MediaFormat
|
||||
|
||||
if media.VideoInfo == nil {
|
||||
return formats, nil
|
||||
}
|
||||
|
||||
duration := int64(media.VideoInfo.DurationMillis / 1000)
|
||||
|
||||
for _, variant := range media.VideoInfo.Variants {
|
||||
if variant.ContentType == "video/mp4" {
|
||||
width, height := extractResolution(variant.URL)
|
||||
|
||||
formats = append(formats, &models.MediaFormat{
|
||||
Type: enums.MediaTypeVideo,
|
||||
FormatID: fmt.Sprintf("mp4_%d", variant.Bitrate),
|
||||
URL: []string{variant.URL},
|
||||
VideoCodec: enums.MediaCodecAVC,
|
||||
AudioCodec: enums.MediaCodecAAC,
|
||||
Duration: duration,
|
||||
Thumbnail: []string{media.MediaURLHTTPS},
|
||||
Width: width,
|
||||
Height: height,
|
||||
Bitrate: int64(variant.Bitrate),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return formats, nil
|
||||
}
|
||||
|
||||
func extractResolution(url string) (int64, int64) {
|
||||
matches := resolutionRegex.FindStringSubmatch(url)
|
||||
if len(matches) >= 3 {
|
||||
width, _ := strconv.ParseInt(matches[1], 10, 64)
|
||||
height, _ := strconv.ParseInt(matches[2], 10, 64)
|
||||
return width, height
|
||||
}
|
||||
return 0, 0
|
||||
}
|
||||
|
||||
func FindTweetData(resp *APIResponse, tweetID string) (*Tweet, error) {
|
||||
instructions := resp.Data.ThreadedConversationWithInjectionsV2.Instructions
|
||||
if len(instructions) == 0 {
|
||||
return nil, fmt.Errorf("nessuna istruzione trovata nella risposta")
|
||||
}
|
||||
|
||||
entries := instructions[0].Entries
|
||||
entryID := fmt.Sprintf("tweet-%s", tweetID)
|
||||
|
||||
for _, entry := range entries {
|
||||
if entry.EntryID == entryID {
|
||||
result := entry.Content.ItemContent.TweetResults.Result
|
||||
|
||||
if result.Tweet != nil {
|
||||
return result.Tweet, nil
|
||||
}
|
||||
|
||||
if result.Legacy != nil {
|
||||
return result.Legacy, nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("struttura del tweet non valida")
|
||||
}
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("tweet non trovato nella risposta")
|
||||
}
|
74
ext/util.go
Normal file
74
ext/util.go
Normal file
|
@ -0,0 +1,74 @@
|
|||
package ext
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"govd/models"
|
||||
)
|
||||
|
||||
var maxRedirects = 5
|
||||
|
||||
func CtxByURL(url string) (*models.DownloadContext, error) {
|
||||
var redirectCount int
|
||||
|
||||
currentURL := url
|
||||
|
||||
for redirectCount <= maxRedirects {
|
||||
for _, extractor := range List {
|
||||
matches := extractor.URLPattern.FindStringSubmatch(currentURL)
|
||||
if matches == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
groupNames := extractor.URLPattern.SubexpNames()
|
||||
if len(matches) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
groups := make(map[string]string)
|
||||
for i, name := range groupNames {
|
||||
if name != "" {
|
||||
groups[name] = matches[i]
|
||||
}
|
||||
}
|
||||
groups["match"] = matches[0]
|
||||
|
||||
ctx := &models.DownloadContext{
|
||||
MatchedContentID: groups["id"],
|
||||
MatchedContentURL: groups["match"],
|
||||
MatchedGroups: groups,
|
||||
Extractor: extractor,
|
||||
}
|
||||
|
||||
if !extractor.IsRedirect {
|
||||
return ctx, nil
|
||||
}
|
||||
|
||||
response, err := extractor.Run(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if response.URL == "" {
|
||||
return nil, fmt.Errorf("no URL found in response")
|
||||
}
|
||||
|
||||
currentURL = response.URL
|
||||
redirectCount++
|
||||
|
||||
break
|
||||
}
|
||||
|
||||
if redirectCount > maxRedirects {
|
||||
return nil, fmt.Errorf("exceeded maximum number of redirects (%d)", maxRedirects)
|
||||
}
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func ByCodeName(codeName string) *models.Extractor {
|
||||
for _, extractor := range List {
|
||||
if extractor.CodeName == codeName {
|
||||
return extractor
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue