9gag: new extractor
This commit is contained in:
parent
3b625c8b0a
commit
60365973d8
6 changed files with 286 additions and 1 deletions
|
@ -2,6 +2,7 @@ package ext
|
|||
|
||||
import (
|
||||
"govd/ext/instagram"
|
||||
"govd/ext/ninegag"
|
||||
"govd/ext/pinterest"
|
||||
"govd/ext/reddit"
|
||||
"govd/ext/tiktok"
|
||||
|
@ -21,5 +22,6 @@ var List = []*models.Extractor{
|
|||
pinterest.ShortExtractor,
|
||||
reddit.Extractor,
|
||||
reddit.ShortExtractor,
|
||||
ninegag.Extractor,
|
||||
// todo: add every ext lol
|
||||
}
|
||||
|
|
132
ext/ninegag/main.go
Normal file
132
ext/ninegag/main.go
Normal file
|
@ -0,0 +1,132 @@
|
|||
package ninegag
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"regexp"
|
||||
|
||||
"govd/enums"
|
||||
"govd/models"
|
||||
"govd/util"
|
||||
|
||||
"github.com/bytedance/sonic"
|
||||
)
|
||||
|
||||
const (
|
||||
apiEndpoint = "https://9gag.com/v1/post"
|
||||
postNotFound = "Post not found"
|
||||
)
|
||||
|
||||
// 9gag gives 403 unless you use
|
||||
// real browser TLS fingerprint
|
||||
var httpSession = util.NewChromeClient()
|
||||
|
||||
var Extractor = &models.Extractor{
|
||||
Name: "9GAG",
|
||||
CodeName: "ninegag",
|
||||
Type: enums.ExtractorTypeSingle,
|
||||
Category: enums.ExtractorCategorySocial,
|
||||
URLPattern: regexp.MustCompile(`https?://(?:www\.)?9gag\.com/gag/(?P<id>[^/?&#]+)`),
|
||||
Host: []string{"9gag.com"},
|
||||
|
||||
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
||||
mediaList, err := MediaListFromAPI(ctx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get media: %w", err)
|
||||
}
|
||||
return &models.ExtractorResponse{
|
||||
MediaList: mediaList,
|
||||
}, nil
|
||||
},
|
||||
}
|
||||
|
||||
func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
|
||||
var mediaList []*models.Media
|
||||
|
||||
contentID := ctx.MatchedContentID
|
||||
contentURL := ctx.MatchedContentURL
|
||||
|
||||
postData, err := GetPostData(contentID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get post data: %w", err)
|
||||
}
|
||||
|
||||
media := ctx.Extractor.NewMedia(contentID, contentURL)
|
||||
media.SetCaption(postData.Title)
|
||||
|
||||
if postData.Nsfw == 1 {
|
||||
media.NSFW = true
|
||||
}
|
||||
|
||||
switch postData.Type {
|
||||
case "Photo":
|
||||
bestPhoto, err := FindBestPhoto(postData.Images)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
media.AddFormat(&models.MediaFormat{
|
||||
FormatID: "photo",
|
||||
Type: enums.MediaTypePhoto,
|
||||
URL: []string{bestPhoto.URL},
|
||||
Width: int64(bestPhoto.Width),
|
||||
Height: int64(bestPhoto.Height),
|
||||
})
|
||||
|
||||
case "Animated":
|
||||
videoFormats, err := ParseVideoFormats(postData.Images)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, format := range videoFormats {
|
||||
media.AddFormat(format)
|
||||
}
|
||||
}
|
||||
|
||||
if len(media.Formats) > 0 {
|
||||
mediaList = append(mediaList, media)
|
||||
}
|
||||
|
||||
return mediaList, nil
|
||||
}
|
||||
|
||||
func GetPostData(postID string) (*Post, error) {
|
||||
url := apiEndpoint + "?id=" + postID
|
||||
req, err := http.NewRequest(http.MethodGet, url, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
req.Header.Set("User-Agent", util.ChromeUA)
|
||||
|
||||
resp, err := httpSession.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to send request: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("invalid status code: %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
var response Response
|
||||
decoder := sonic.ConfigFastest.NewDecoder(resp.Body)
|
||||
err = decoder.Decode(&response)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to decode response: %w", err)
|
||||
}
|
||||
|
||||
if response.Meta != nil && response.Meta.Status != "Success" {
|
||||
return nil, fmt.Errorf("API error: %s", response.Meta.Status)
|
||||
}
|
||||
|
||||
if response.Meta != nil && response.Meta.ErrorMessage == postNotFound {
|
||||
return nil, util.ErrUnavailable
|
||||
}
|
||||
|
||||
if response.Data == nil || response.Data.Post == nil {
|
||||
return nil, fmt.Errorf("no post data found")
|
||||
}
|
||||
|
||||
return response.Data.Post, nil
|
||||
}
|
43
ext/ninegag/models.go
Normal file
43
ext/ninegag/models.go
Normal file
|
@ -0,0 +1,43 @@
|
|||
package ninegag
|
||||
|
||||
type Response struct {
|
||||
Meta *Meta `json:"meta"`
|
||||
Data *Data `json:"data"`
|
||||
}
|
||||
|
||||
type Meta struct {
|
||||
Timestamp int `json:"timestamp"`
|
||||
Status string `json:"status"`
|
||||
Sid string `json:"sid"`
|
||||
ErrorMessage string `json:"errorMessage"`
|
||||
}
|
||||
|
||||
type Media struct {
|
||||
Width int `json:"width"`
|
||||
Height int `json:"height"`
|
||||
URL string `json:"url"`
|
||||
HasAudio int `json:"hasAudio"`
|
||||
Duration int `json:"duration"`
|
||||
Vp8URL string `json:"vp8Url"`
|
||||
H265URL string `json:"h265Url"`
|
||||
Vp9URL string `json:"vp9Url"`
|
||||
Av1URL string `json:"av1Url"`
|
||||
}
|
||||
|
||||
type Post struct {
|
||||
ID string `json:"id"`
|
||||
URL string `json:"url"`
|
||||
Title string `json:"title"`
|
||||
Description string `json:"description"`
|
||||
Type string `json:"type"`
|
||||
Nsfw int `json:"nsfw"`
|
||||
CreationTs int `json:"creationTs"`
|
||||
GamFlagged bool `json:"gamFlagged"`
|
||||
IsVoteMasked int `json:"isVoteMasked"`
|
||||
HasLongPostCover int `json:"hasLongPostCover"`
|
||||
Images map[string]*Media `json:"images"`
|
||||
}
|
||||
|
||||
type Data struct {
|
||||
Post *Post `json:"post"`
|
||||
}
|
106
ext/ninegag/util.go
Normal file
106
ext/ninegag/util.go
Normal file
|
@ -0,0 +1,106 @@
|
|||
package ninegag
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"govd/enums"
|
||||
"govd/models"
|
||||
)
|
||||
|
||||
func FindBestPhoto(
|
||||
images map[string]*Media,
|
||||
) (*Media, error) {
|
||||
var bestPhoto *Media
|
||||
var maxWidth int
|
||||
|
||||
for _, photo := range images {
|
||||
if !strings.HasSuffix(photo.URL, ".jpg") {
|
||||
continue
|
||||
}
|
||||
if photo.Width > maxWidth {
|
||||
maxWidth = photo.Width
|
||||
bestPhoto = photo
|
||||
}
|
||||
}
|
||||
|
||||
if bestPhoto == nil {
|
||||
return nil, fmt.Errorf("no photo found in post")
|
||||
}
|
||||
|
||||
return bestPhoto, nil
|
||||
}
|
||||
|
||||
func ParseVideoFormats(
|
||||
images map[string]*Media,
|
||||
) ([]*models.MediaFormat, error) {
|
||||
var formats []*models.MediaFormat
|
||||
var video *Media
|
||||
var thumbnailURL string
|
||||
|
||||
for _, media := range images {
|
||||
if media.Duration > 0 {
|
||||
video = media
|
||||
}
|
||||
if strings.HasSuffix(media.URL, ".jpg") {
|
||||
thumbnailURL = media.URL
|
||||
}
|
||||
}
|
||||
if video == nil {
|
||||
return nil, fmt.Errorf("no video found in post")
|
||||
}
|
||||
|
||||
codecMapping := map[string]struct {
|
||||
Field string
|
||||
Codec enums.MediaCodec
|
||||
}{
|
||||
"url": {"URL", enums.MediaCodecAVC},
|
||||
"h265Url": {"H265URL", enums.MediaCodecHEVC},
|
||||
"vp8Url": {"Vp8URL", enums.MediaCodecVP8},
|
||||
"vp9Url": {"Vp9URL", enums.MediaCodecVP9},
|
||||
"av1Url": {"Av1URL", enums.MediaCodecAV1},
|
||||
}
|
||||
|
||||
for _, mapping := range codecMapping {
|
||||
url := getField(video, mapping.Field)
|
||||
if url == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
format := &models.MediaFormat{
|
||||
FormatID: fmt.Sprintf("video_%s", mapping.Codec),
|
||||
Type: enums.MediaTypeVideo,
|
||||
VideoCodec: mapping.Codec,
|
||||
AudioCodec: enums.MediaCodecAAC,
|
||||
URL: []string{url},
|
||||
Width: int64(video.Width),
|
||||
Height: int64(video.Height),
|
||||
Duration: int64(video.Duration),
|
||||
}
|
||||
|
||||
if thumbnailURL != "" {
|
||||
format.Thumbnail = []string{thumbnailURL}
|
||||
}
|
||||
|
||||
formats = append(formats, format)
|
||||
}
|
||||
|
||||
return formats, nil
|
||||
}
|
||||
|
||||
func getField(media *Media, fieldName string) string {
|
||||
switch fieldName {
|
||||
case "URL":
|
||||
return media.URL
|
||||
case "H265URL":
|
||||
return media.H265URL
|
||||
case "Vp8URL":
|
||||
return media.Vp8URL
|
||||
case "Vp9URL":
|
||||
return media.Vp9URL
|
||||
case "Av1URL":
|
||||
return media.Av1URL
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
1
go.mod
1
go.mod
|
@ -25,6 +25,7 @@ require (
|
|||
github.com/stretchr/testify v1.10.0 // indirect
|
||||
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
|
||||
golang.org/x/arch v0.0.0-20210923205945-b76863e36670 // indirect
|
||||
gopkg.in/yaml.v2 v2.4.0 // indirect
|
||||
)
|
||||
|
||||
require (
|
||||
|
|
3
go.sum
3
go.sum
|
@ -107,8 +107,9 @@ golang.org/x/tools v0.0.0-20181030221726-6c7e314b6563/go.mod h1:n7NCudcB/nEzxVGm
|
|||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
gopkg.in/yaml.v2 v2.2.7/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10=
|
||||
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
|
||||
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue