9gag: new extractor

This commit is contained in:
stefanodvx 2025-04-18 12:48:37 +02:00
parent 3b625c8b0a
commit 60365973d8
6 changed files with 286 additions and 1 deletions

View file

@ -2,6 +2,7 @@ package ext
import (
"govd/ext/instagram"
"govd/ext/ninegag"
"govd/ext/pinterest"
"govd/ext/reddit"
"govd/ext/tiktok"
@ -21,5 +22,6 @@ var List = []*models.Extractor{
pinterest.ShortExtractor,
reddit.Extractor,
reddit.ShortExtractor,
ninegag.Extractor,
// todo: add every ext lol
}

132
ext/ninegag/main.go Normal file
View file

@ -0,0 +1,132 @@
package ninegag
import (
"fmt"
"net/http"
"regexp"
"govd/enums"
"govd/models"
"govd/util"
"github.com/bytedance/sonic"
)
const (
apiEndpoint = "https://9gag.com/v1/post"
postNotFound = "Post not found"
)
// 9gag gives 403 unless you use
// real browser TLS fingerprint
var httpSession = util.NewChromeClient()
var Extractor = &models.Extractor{
Name: "9GAG",
CodeName: "ninegag",
Type: enums.ExtractorTypeSingle,
Category: enums.ExtractorCategorySocial,
URLPattern: regexp.MustCompile(`https?://(?:www\.)?9gag\.com/gag/(?P<id>[^/?&#]+)`),
Host: []string{"9gag.com"},
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
mediaList, err := MediaListFromAPI(ctx)
if err != nil {
return nil, fmt.Errorf("failed to get media: %w", err)
}
return &models.ExtractorResponse{
MediaList: mediaList,
}, nil
},
}
func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
var mediaList []*models.Media
contentID := ctx.MatchedContentID
contentURL := ctx.MatchedContentURL
postData, err := GetPostData(contentID)
if err != nil {
return nil, fmt.Errorf("failed to get post data: %w", err)
}
media := ctx.Extractor.NewMedia(contentID, contentURL)
media.SetCaption(postData.Title)
if postData.Nsfw == 1 {
media.NSFW = true
}
switch postData.Type {
case "Photo":
bestPhoto, err := FindBestPhoto(postData.Images)
if err != nil {
return nil, err
}
media.AddFormat(&models.MediaFormat{
FormatID: "photo",
Type: enums.MediaTypePhoto,
URL: []string{bestPhoto.URL},
Width: int64(bestPhoto.Width),
Height: int64(bestPhoto.Height),
})
case "Animated":
videoFormats, err := ParseVideoFormats(postData.Images)
if err != nil {
return nil, err
}
for _, format := range videoFormats {
media.AddFormat(format)
}
}
if len(media.Formats) > 0 {
mediaList = append(mediaList, media)
}
return mediaList, nil
}
func GetPostData(postID string) (*Post, error) {
url := apiEndpoint + "?id=" + postID
req, err := http.NewRequest(http.MethodGet, url, nil)
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
req.Header.Set("User-Agent", util.ChromeUA)
resp, err := httpSession.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("invalid status code: %d", resp.StatusCode)
}
var response Response
decoder := sonic.ConfigFastest.NewDecoder(resp.Body)
err = decoder.Decode(&response)
if err != nil {
return nil, fmt.Errorf("failed to decode response: %w", err)
}
if response.Meta != nil && response.Meta.Status != "Success" {
return nil, fmt.Errorf("API error: %s", response.Meta.Status)
}
if response.Meta != nil && response.Meta.ErrorMessage == postNotFound {
return nil, util.ErrUnavailable
}
if response.Data == nil || response.Data.Post == nil {
return nil, fmt.Errorf("no post data found")
}
return response.Data.Post, nil
}

43
ext/ninegag/models.go Normal file
View file

@ -0,0 +1,43 @@
package ninegag
type Response struct {
Meta *Meta `json:"meta"`
Data *Data `json:"data"`
}
type Meta struct {
Timestamp int `json:"timestamp"`
Status string `json:"status"`
Sid string `json:"sid"`
ErrorMessage string `json:"errorMessage"`
}
type Media struct {
Width int `json:"width"`
Height int `json:"height"`
URL string `json:"url"`
HasAudio int `json:"hasAudio"`
Duration int `json:"duration"`
Vp8URL string `json:"vp8Url"`
H265URL string `json:"h265Url"`
Vp9URL string `json:"vp9Url"`
Av1URL string `json:"av1Url"`
}
type Post struct {
ID string `json:"id"`
URL string `json:"url"`
Title string `json:"title"`
Description string `json:"description"`
Type string `json:"type"`
Nsfw int `json:"nsfw"`
CreationTs int `json:"creationTs"`
GamFlagged bool `json:"gamFlagged"`
IsVoteMasked int `json:"isVoteMasked"`
HasLongPostCover int `json:"hasLongPostCover"`
Images map[string]*Media `json:"images"`
}
type Data struct {
Post *Post `json:"post"`
}

106
ext/ninegag/util.go Normal file
View file

@ -0,0 +1,106 @@
package ninegag
import (
"fmt"
"strings"
"govd/enums"
"govd/models"
)
func FindBestPhoto(
images map[string]*Media,
) (*Media, error) {
var bestPhoto *Media
var maxWidth int
for _, photo := range images {
if !strings.HasSuffix(photo.URL, ".jpg") {
continue
}
if photo.Width > maxWidth {
maxWidth = photo.Width
bestPhoto = photo
}
}
if bestPhoto == nil {
return nil, fmt.Errorf("no photo found in post")
}
return bestPhoto, nil
}
func ParseVideoFormats(
images map[string]*Media,
) ([]*models.MediaFormat, error) {
var formats []*models.MediaFormat
var video *Media
var thumbnailURL string
for _, media := range images {
if media.Duration > 0 {
video = media
}
if strings.HasSuffix(media.URL, ".jpg") {
thumbnailURL = media.URL
}
}
if video == nil {
return nil, fmt.Errorf("no video found in post")
}
codecMapping := map[string]struct {
Field string
Codec enums.MediaCodec
}{
"url": {"URL", enums.MediaCodecAVC},
"h265Url": {"H265URL", enums.MediaCodecHEVC},
"vp8Url": {"Vp8URL", enums.MediaCodecVP8},
"vp9Url": {"Vp9URL", enums.MediaCodecVP9},
"av1Url": {"Av1URL", enums.MediaCodecAV1},
}
for _, mapping := range codecMapping {
url := getField(video, mapping.Field)
if url == "" {
continue
}
format := &models.MediaFormat{
FormatID: fmt.Sprintf("video_%s", mapping.Codec),
Type: enums.MediaTypeVideo,
VideoCodec: mapping.Codec,
AudioCodec: enums.MediaCodecAAC,
URL: []string{url},
Width: int64(video.Width),
Height: int64(video.Height),
Duration: int64(video.Duration),
}
if thumbnailURL != "" {
format.Thumbnail = []string{thumbnailURL}
}
formats = append(formats, format)
}
return formats, nil
}
func getField(media *Media, fieldName string) string {
switch fieldName {
case "URL":
return media.URL
case "H265URL":
return media.H265URL
case "Vp8URL":
return media.Vp8URL
case "Vp9URL":
return media.Vp9URL
case "Av1URL":
return media.Av1URL
default:
return ""
}
}