This commit is contained in:
stefanodvx 2025-04-14 13:05:43 +02:00
parent 264c97183e
commit 3faede7b1c
74 changed files with 6228 additions and 1 deletions

267
ext/reddit/main.go Normal file
View file

@ -0,0 +1,267 @@
package reddit
import (
"encoding/json"
"fmt"
"io"
"net/http"
"regexp"
"govd/enums"
"govd/models"
"govd/util"
)
var HTTPClient = &http.Client{
CheckRedirect: func(req *http.Request, via []*http.Request) error {
if len(via) >= 10 {
return fmt.Errorf("stopped after 10 redirects")
}
return nil
},
}
var ShortExtractor = &models.Extractor{
Name: "Reddit (Short)",
CodeName: "reddit:short",
Type: enums.ExtractorTypeSingle,
Category: enums.ExtractorCategorySocial,
URLPattern: regexp.MustCompile(`https?://(?P<host>(?:\w+\.)?reddit(?:media)?\.com)/(?P<slug>(?:(?:r|user)/[^/]+/)?s/(?P<id>[^/?#&]+))`),
IsRedirect: true,
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
req, err := http.NewRequest("GET", ctx.MatchedContentURL, nil)
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
req.Header.Set("User-Agent", util.ChromeUA)
cookies, err := util.ParseCookieFile("reddit.txt")
if err != nil {
return nil, fmt.Errorf("failed to get cookies: %w", err)
}
for _, cookie := range cookies {
req.AddCookie(cookie)
}
res, err := HTTPClient.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err)
}
defer res.Body.Close()
location := res.Request.URL.String()
return &models.ExtractorResponse{
URL: location,
}, nil
},
}
var Extractor = &models.Extractor{
Name: "Reddit",
CodeName: "reddit",
Type: enums.ExtractorTypeSingle,
Category: enums.ExtractorCategorySocial,
URLPattern: regexp.MustCompile(`https?://(?P<host>(?:\w+\.)?reddit(?:media)?\.com)/(?P<slug>(?:(?:r|user)/[^/]+/)?comments/(?P<id>[^/?#&]+))`),
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
mediaList, err := MediaListFromAPI(ctx)
if err != nil {
return nil, fmt.Errorf("failed to get media: %w", err)
}
return &models.ExtractorResponse{
MediaList: mediaList,
}, nil
},
}
func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
host := ctx.MatchedGroups["host"]
slug := ctx.MatchedGroups["slug"]
contentID := ctx.MatchedContentID
contentURL := ctx.MatchedContentURL
manifest, err := GetRedditData(host, slug)
if err != nil {
return nil, err
}
if len(manifest) == 0 || len(manifest[0].Data.Children) == 0 {
return nil, fmt.Errorf("no data found in response")
}
data := manifest[0].Data.Children[0].Data
title := data.Title
isNsfw := data.Over18
var mediaList []*models.Media
if !data.IsVideo {
// check for single photo
if data.Preview != nil && len(data.Preview.Images) > 0 {
media := ctx.Extractor.NewMedia(contentID, contentURL)
media.SetCaption(title)
if isNsfw {
media.NSFW = true
}
image := data.Preview.Images[0]
// check for video preview (GIF)
if data.Preview.RedditVideoPreview != nil {
formats, err := GetHLSFormats(
data.Preview.RedditVideoPreview.FallbackURL,
image.Source.URL,
data.Preview.RedditVideoPreview.Duration,
)
if err != nil {
return nil, err
}
for _, format := range formats {
media.AddFormat(format)
}
mediaList = append(mediaList, media)
return mediaList, nil
}
// check for MP4 variant (animated GIF)
if image.Variants.MP4 != nil {
media.AddFormat(&models.MediaFormat{
FormatID: "gif",
Type: enums.MediaTypeVideo,
VideoCodec: enums.MediaCodecAVC,
AudioCodec: enums.MediaCodecAAC,
URL: []string{util.FixURL(image.Variants.MP4.Source.URL)},
Thumbnail: []string{util.FixURL(image.Source.URL)},
})
mediaList = append(mediaList, media)
return mediaList, nil
}
// regular photo
media.AddFormat(&models.MediaFormat{
FormatID: "photo",
Type: enums.MediaTypePhoto,
URL: []string{util.FixURL(image.Source.URL)},
})
mediaList = append(mediaList, media)
return mediaList, nil
}
// check for gallery/collection
if len(data.MediaMetadata) > 0 {
for key, obj := range data.MediaMetadata {
if obj.E == "Image" {
media := ctx.Extractor.NewMedia(key, contentURL)
media.SetCaption(title)
if isNsfw {
media.NSFW = true
}
media.AddFormat(&models.MediaFormat{
FormatID: "photo",
Type: enums.MediaTypePhoto,
URL: []string{util.FixURL(obj.S.U)},
})
mediaList = append(mediaList, media)
}
}
return mediaList, nil
}
} else {
// video
media := ctx.Extractor.NewMedia(contentID, contentURL)
media.SetCaption(title)
if isNsfw {
media.NSFW = true
}
var redditVideo *RedditVideo
if data.Media != nil && data.Media.RedditVideo != nil {
redditVideo = data.Media.RedditVideo
} else if data.SecureMedia != nil && data.SecureMedia.RedditVideo != nil {
redditVideo = data.SecureMedia.RedditVideo
}
if redditVideo != nil {
thumbnail := data.Thumbnail
if (thumbnail == "nsfw" || thumbnail == "spoiler") && data.Preview != nil && len(data.Preview.Images) > 0 {
thumbnail = data.Preview.Images[0].Source.URL
}
formats, err := GetHLSFormats(
redditVideo.FallbackURL,
thumbnail,
redditVideo.Duration,
)
if err != nil {
return nil, err
}
for _, format := range formats {
media.AddFormat(format)
}
mediaList = append(mediaList, media)
return mediaList, nil
}
}
return mediaList, nil
}
func GetRedditData(host string, slug string) (RedditResponse, error) {
url := fmt.Sprintf("https://%s/%s/.json", host, slug)
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
req.Header.Set("User-Agent", util.ChromeUA)
cookies, err := util.ParseCookieFile("reddit.txt")
if err != nil {
return nil, fmt.Errorf("failed to get cookies: %w", err)
}
for _, cookie := range cookies {
req.AddCookie(cookie)
}
res, err := HTTPClient.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err)
}
defer res.Body.Close()
if res.StatusCode != http.StatusOK {
// try with alternative domain
altHost := "old.reddit.com"
if host == "old.reddit.com" {
altHost = "www.reddit.com"
}
return GetRedditData(altHost, slug)
}
body, err := io.ReadAll(res.Body)
if err != nil {
return nil, fmt.Errorf("failed to read response body: %w", err)
}
var response RedditResponse
err = json.Unmarshal(body, &response)
if err != nil {
return nil, fmt.Errorf("failed to parse response: %w", err)
}
return response, nil
}

74
ext/reddit/models.go Normal file
View file

@ -0,0 +1,74 @@
package reddit
type RedditResponse []struct {
Data struct {
Children []struct {
Data PostData `json:"data"`
} `json:"children"`
} `json:"data"`
}
type PostData struct {
ID string `json:"id"`
Title string `json:"title"`
IsVideo bool `json:"is_video"`
Thumbnail string `json:"thumbnail"`
Media *Media `json:"media"`
Preview *Preview `json:"preview"`
MediaMetadata map[string]MediaMetadata `json:"media_metadata"`
SecureMedia *Media `json:"secure_media"`
Over18 bool `json:"over_18"`
}
type Media struct {
RedditVideo *RedditVideo `json:"reddit_video"`
}
type RedditVideo struct {
FallbackURL string `json:"fallback_url"`
HLSURL string `json:"hls_url"`
DashURL string `json:"dash_url"`
Duration int64 `json:"duration"`
Height int64 `json:"height"`
Width int64 `json:"width"`
ScrubberMediaURL string `json:"scrubber_media_url"`
}
type Preview struct {
Images []Image `json:"images"`
RedditVideoPreview *RedditVideoPreview `json:"reddit_video_preview"`
}
type Image struct {
Source ImageSource `json:"source"`
Variants ImageVariants `json:"variants"`
}
type ImageSource struct {
URL string `json:"url"`
Width int64 `json:"width"`
Height int64 `json:"height"`
}
type ImageVariants struct {
MP4 *MP4Variant `json:"mp4"`
}
type MP4Variant struct {
Source ImageSource `json:"source"`
}
type RedditVideoPreview struct {
FallbackURL string `json:"fallback_url"`
Duration int64 `json:"duration"`
}
type MediaMetadata struct {
Status string `json:"status"`
E string `json:"e"`
S struct {
U string `json:"u"`
X int64 `json:"x"`
Y int64 `json:"y"`
} `json:"s"`
}

39
ext/reddit/util.go Normal file
View file

@ -0,0 +1,39 @@
package reddit
import (
"fmt"
"govd/models"
"govd/util"
"govd/util/parser"
"regexp"
)
const (
hlsURLFormat = "https://v.redd.it/%s/HLSPlaylist.m3u8"
)
var videoURLPattern = regexp.MustCompile(`https?://v\.redd\.it/([^/]+)`)
func GetHLSFormats(videoURL string, thumbnail string, duration int64) ([]*models.MediaFormat, error) {
matches := videoURLPattern.FindStringSubmatch(videoURL)
if len(matches) < 2 {
return nil, nil
}
videoID := matches[1]
hlsURL := fmt.Sprintf(hlsURLFormat, videoID)
formats, err := parser.ParseM3U8FromURL(hlsURL)
if err != nil {
return nil, err
}
for _, format := range formats {
format.Duration = duration
if thumbnail != "" {
format.Thumbnail = []string{util.FixURL(thumbnail)}
}
}
return formats, nil
}