Init
This commit is contained in:
parent
264c97183e
commit
3faede7b1c
74 changed files with 6228 additions and 1 deletions
267
ext/reddit/main.go
Normal file
267
ext/reddit/main.go
Normal file
|
@ -0,0 +1,267 @@
|
|||
package reddit
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"regexp"
|
||||
|
||||
"govd/enums"
|
||||
"govd/models"
|
||||
"govd/util"
|
||||
)
|
||||
|
||||
var HTTPClient = &http.Client{
|
||||
CheckRedirect: func(req *http.Request, via []*http.Request) error {
|
||||
if len(via) >= 10 {
|
||||
return fmt.Errorf("stopped after 10 redirects")
|
||||
}
|
||||
return nil
|
||||
},
|
||||
}
|
||||
|
||||
var ShortExtractor = &models.Extractor{
|
||||
Name: "Reddit (Short)",
|
||||
CodeName: "reddit:short",
|
||||
Type: enums.ExtractorTypeSingle,
|
||||
Category: enums.ExtractorCategorySocial,
|
||||
URLPattern: regexp.MustCompile(`https?://(?P<host>(?:\w+\.)?reddit(?:media)?\.com)/(?P<slug>(?:(?:r|user)/[^/]+/)?s/(?P<id>[^/?#&]+))`),
|
||||
IsRedirect: true,
|
||||
|
||||
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
||||
req, err := http.NewRequest("GET", ctx.MatchedContentURL, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
|
||||
req.Header.Set("User-Agent", util.ChromeUA)
|
||||
cookies, err := util.ParseCookieFile("reddit.txt")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get cookies: %w", err)
|
||||
}
|
||||
for _, cookie := range cookies {
|
||||
req.AddCookie(cookie)
|
||||
}
|
||||
|
||||
res, err := HTTPClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to send request: %w", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
location := res.Request.URL.String()
|
||||
|
||||
return &models.ExtractorResponse{
|
||||
URL: location,
|
||||
}, nil
|
||||
},
|
||||
}
|
||||
|
||||
var Extractor = &models.Extractor{
|
||||
Name: "Reddit",
|
||||
CodeName: "reddit",
|
||||
Type: enums.ExtractorTypeSingle,
|
||||
Category: enums.ExtractorCategorySocial,
|
||||
URLPattern: regexp.MustCompile(`https?://(?P<host>(?:\w+\.)?reddit(?:media)?\.com)/(?P<slug>(?:(?:r|user)/[^/]+/)?comments/(?P<id>[^/?#&]+))`),
|
||||
|
||||
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
||||
mediaList, err := MediaListFromAPI(ctx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get media: %w", err)
|
||||
}
|
||||
return &models.ExtractorResponse{
|
||||
MediaList: mediaList,
|
||||
}, nil
|
||||
},
|
||||
}
|
||||
|
||||
func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
|
||||
host := ctx.MatchedGroups["host"]
|
||||
slug := ctx.MatchedGroups["slug"]
|
||||
|
||||
contentID := ctx.MatchedContentID
|
||||
contentURL := ctx.MatchedContentURL
|
||||
|
||||
manifest, err := GetRedditData(host, slug)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(manifest) == 0 || len(manifest[0].Data.Children) == 0 {
|
||||
return nil, fmt.Errorf("no data found in response")
|
||||
}
|
||||
|
||||
data := manifest[0].Data.Children[0].Data
|
||||
title := data.Title
|
||||
isNsfw := data.Over18
|
||||
var mediaList []*models.Media
|
||||
|
||||
if !data.IsVideo {
|
||||
// check for single photo
|
||||
if data.Preview != nil && len(data.Preview.Images) > 0 {
|
||||
media := ctx.Extractor.NewMedia(contentID, contentURL)
|
||||
media.SetCaption(title)
|
||||
if isNsfw {
|
||||
media.NSFW = true
|
||||
}
|
||||
|
||||
image := data.Preview.Images[0]
|
||||
|
||||
// check for video preview (GIF)
|
||||
if data.Preview.RedditVideoPreview != nil {
|
||||
formats, err := GetHLSFormats(
|
||||
data.Preview.RedditVideoPreview.FallbackURL,
|
||||
image.Source.URL,
|
||||
data.Preview.RedditVideoPreview.Duration,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, format := range formats {
|
||||
media.AddFormat(format)
|
||||
}
|
||||
|
||||
mediaList = append(mediaList, media)
|
||||
return mediaList, nil
|
||||
}
|
||||
|
||||
// check for MP4 variant (animated GIF)
|
||||
if image.Variants.MP4 != nil {
|
||||
media.AddFormat(&models.MediaFormat{
|
||||
FormatID: "gif",
|
||||
Type: enums.MediaTypeVideo,
|
||||
VideoCodec: enums.MediaCodecAVC,
|
||||
AudioCodec: enums.MediaCodecAAC,
|
||||
URL: []string{util.FixURL(image.Variants.MP4.Source.URL)},
|
||||
Thumbnail: []string{util.FixURL(image.Source.URL)},
|
||||
})
|
||||
|
||||
mediaList = append(mediaList, media)
|
||||
return mediaList, nil
|
||||
}
|
||||
|
||||
// regular photo
|
||||
media.AddFormat(&models.MediaFormat{
|
||||
FormatID: "photo",
|
||||
Type: enums.MediaTypePhoto,
|
||||
URL: []string{util.FixURL(image.Source.URL)},
|
||||
})
|
||||
|
||||
mediaList = append(mediaList, media)
|
||||
return mediaList, nil
|
||||
}
|
||||
|
||||
// check for gallery/collection
|
||||
if len(data.MediaMetadata) > 0 {
|
||||
for key, obj := range data.MediaMetadata {
|
||||
if obj.E == "Image" {
|
||||
media := ctx.Extractor.NewMedia(key, contentURL)
|
||||
media.SetCaption(title)
|
||||
if isNsfw {
|
||||
media.NSFW = true
|
||||
}
|
||||
|
||||
media.AddFormat(&models.MediaFormat{
|
||||
FormatID: "photo",
|
||||
Type: enums.MediaTypePhoto,
|
||||
URL: []string{util.FixURL(obj.S.U)},
|
||||
})
|
||||
|
||||
mediaList = append(mediaList, media)
|
||||
}
|
||||
}
|
||||
|
||||
return mediaList, nil
|
||||
}
|
||||
} else {
|
||||
// video
|
||||
media := ctx.Extractor.NewMedia(contentID, contentURL)
|
||||
media.SetCaption(title)
|
||||
if isNsfw {
|
||||
media.NSFW = true
|
||||
}
|
||||
|
||||
var redditVideo *RedditVideo
|
||||
|
||||
if data.Media != nil && data.Media.RedditVideo != nil {
|
||||
redditVideo = data.Media.RedditVideo
|
||||
} else if data.SecureMedia != nil && data.SecureMedia.RedditVideo != nil {
|
||||
redditVideo = data.SecureMedia.RedditVideo
|
||||
}
|
||||
|
||||
if redditVideo != nil {
|
||||
thumbnail := data.Thumbnail
|
||||
|
||||
if (thumbnail == "nsfw" || thumbnail == "spoiler") && data.Preview != nil && len(data.Preview.Images) > 0 {
|
||||
thumbnail = data.Preview.Images[0].Source.URL
|
||||
}
|
||||
|
||||
formats, err := GetHLSFormats(
|
||||
redditVideo.FallbackURL,
|
||||
thumbnail,
|
||||
redditVideo.Duration,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, format := range formats {
|
||||
media.AddFormat(format)
|
||||
}
|
||||
|
||||
mediaList = append(mediaList, media)
|
||||
return mediaList, nil
|
||||
}
|
||||
}
|
||||
|
||||
return mediaList, nil
|
||||
}
|
||||
|
||||
func GetRedditData(host string, slug string) (RedditResponse, error) {
|
||||
url := fmt.Sprintf("https://%s/%s/.json", host, slug)
|
||||
|
||||
req, err := http.NewRequest("GET", url, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
|
||||
req.Header.Set("User-Agent", util.ChromeUA)
|
||||
cookies, err := util.ParseCookieFile("reddit.txt")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get cookies: %w", err)
|
||||
}
|
||||
for _, cookie := range cookies {
|
||||
req.AddCookie(cookie)
|
||||
}
|
||||
|
||||
res, err := HTTPClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to send request: %w", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
if res.StatusCode != http.StatusOK {
|
||||
// try with alternative domain
|
||||
altHost := "old.reddit.com"
|
||||
if host == "old.reddit.com" {
|
||||
altHost = "www.reddit.com"
|
||||
}
|
||||
|
||||
return GetRedditData(altHost, slug)
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(res.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read response body: %w", err)
|
||||
}
|
||||
|
||||
var response RedditResponse
|
||||
err = json.Unmarshal(body, &response)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse response: %w", err)
|
||||
}
|
||||
|
||||
return response, nil
|
||||
}
|
74
ext/reddit/models.go
Normal file
74
ext/reddit/models.go
Normal file
|
@ -0,0 +1,74 @@
|
|||
package reddit
|
||||
|
||||
type RedditResponse []struct {
|
||||
Data struct {
|
||||
Children []struct {
|
||||
Data PostData `json:"data"`
|
||||
} `json:"children"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
type PostData struct {
|
||||
ID string `json:"id"`
|
||||
Title string `json:"title"`
|
||||
IsVideo bool `json:"is_video"`
|
||||
Thumbnail string `json:"thumbnail"`
|
||||
Media *Media `json:"media"`
|
||||
Preview *Preview `json:"preview"`
|
||||
MediaMetadata map[string]MediaMetadata `json:"media_metadata"`
|
||||
SecureMedia *Media `json:"secure_media"`
|
||||
Over18 bool `json:"over_18"`
|
||||
}
|
||||
|
||||
type Media struct {
|
||||
RedditVideo *RedditVideo `json:"reddit_video"`
|
||||
}
|
||||
|
||||
type RedditVideo struct {
|
||||
FallbackURL string `json:"fallback_url"`
|
||||
HLSURL string `json:"hls_url"`
|
||||
DashURL string `json:"dash_url"`
|
||||
Duration int64 `json:"duration"`
|
||||
Height int64 `json:"height"`
|
||||
Width int64 `json:"width"`
|
||||
ScrubberMediaURL string `json:"scrubber_media_url"`
|
||||
}
|
||||
|
||||
type Preview struct {
|
||||
Images []Image `json:"images"`
|
||||
RedditVideoPreview *RedditVideoPreview `json:"reddit_video_preview"`
|
||||
}
|
||||
|
||||
type Image struct {
|
||||
Source ImageSource `json:"source"`
|
||||
Variants ImageVariants `json:"variants"`
|
||||
}
|
||||
|
||||
type ImageSource struct {
|
||||
URL string `json:"url"`
|
||||
Width int64 `json:"width"`
|
||||
Height int64 `json:"height"`
|
||||
}
|
||||
|
||||
type ImageVariants struct {
|
||||
MP4 *MP4Variant `json:"mp4"`
|
||||
}
|
||||
|
||||
type MP4Variant struct {
|
||||
Source ImageSource `json:"source"`
|
||||
}
|
||||
|
||||
type RedditVideoPreview struct {
|
||||
FallbackURL string `json:"fallback_url"`
|
||||
Duration int64 `json:"duration"`
|
||||
}
|
||||
|
||||
type MediaMetadata struct {
|
||||
Status string `json:"status"`
|
||||
E string `json:"e"`
|
||||
S struct {
|
||||
U string `json:"u"`
|
||||
X int64 `json:"x"`
|
||||
Y int64 `json:"y"`
|
||||
} `json:"s"`
|
||||
}
|
39
ext/reddit/util.go
Normal file
39
ext/reddit/util.go
Normal file
|
@ -0,0 +1,39 @@
|
|||
package reddit
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"govd/models"
|
||||
"govd/util"
|
||||
"govd/util/parser"
|
||||
"regexp"
|
||||
)
|
||||
|
||||
const (
|
||||
hlsURLFormat = "https://v.redd.it/%s/HLSPlaylist.m3u8"
|
||||
)
|
||||
|
||||
var videoURLPattern = regexp.MustCompile(`https?://v\.redd\.it/([^/]+)`)
|
||||
|
||||
func GetHLSFormats(videoURL string, thumbnail string, duration int64) ([]*models.MediaFormat, error) {
|
||||
matches := videoURLPattern.FindStringSubmatch(videoURL)
|
||||
if len(matches) < 2 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
videoID := matches[1]
|
||||
hlsURL := fmt.Sprintf(hlsURLFormat, videoID)
|
||||
|
||||
formats, err := parser.ParseM3U8FromURL(hlsURL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, format := range formats {
|
||||
format.Duration = duration
|
||||
if thumbnail != "" {
|
||||
format.Thumbnail = []string{util.FixURL(thumbnail)}
|
||||
}
|
||||
}
|
||||
|
||||
return formats, nil
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue