Compare commits

...

4 commits

Author SHA1 Message Date
root
9a53e5c89c Merge branch 'main' of https://github.com/govdbot/govd
All checks were successful
Build and deploy / build-and-push-image (push) Successful in 8m37s
2025-04-28 16:00:02 +00:00
dogghi no
c7a2612056
clean code, add golangci config, preallocate some slices and avoid copying body before parsing json (#9) 2025-04-28 17:23:40 +02:00
Michele
0a146c515c
Update google chrome extension URL (#8) 2025-04-28 17:23:29 +02:00
stefanodvx
68d578e642 more code cleanup 👍 💢 2025-04-28 17:14:23 +02:00
14 changed files with 75 additions and 65 deletions

1
.gitignore vendored
View file

@ -6,6 +6,7 @@
main_test.go
old/
bin/
.env
ext-cfg.yaml

19
.golangci.yml Normal file
View file

@ -0,0 +1,19 @@
run:
timeout: 5m
linters:
enable:
- bodyclose
- gocritic
- unconvert
- ineffassign
- staticcheck
- prealloc
- nilerr
- gosimple
- asasalint
disable:
- errcheck
issues:
exclude-use-default: false

View file

@ -1,5 +1,5 @@
# authentication
some extractors require cookies to access the content. you can export them from your browser in netscape format and place the file in `cookies` folder (e.g. `cookies/reddit.txt`). you can easily export cookies using _Get cookies.txt LOCALLY_ extension for your browser ([chrome](https://chrome.google.com/webstore/detail/get-cookies-txt-locally/nhdogjmejiglipccpnnnanhbledajbpd) - [firefox](https://addons.mozilla.org/en-US/firefox/addon/get-cookies-txt-locally/)).
some extractors require cookies to access the content. you can export them from your browser in netscape format and place the file in `cookies` folder (e.g. `cookies/reddit.txt`). you can easily export cookies using _Get cookies.txt LOCALLY_ extension for your browser ([chrome](https://chromewebstore.google.com/detail/cclelndahbckbenkjhflpdbgdldlbecc?utm_source=item-share-cb) - [firefox](https://addons.mozilla.org/en-US/firefox/addon/get-cookies-txt-locally/)).
extractors that **need** authentication:
- reddit

View file

@ -114,7 +114,7 @@ func HandleDefaultStoredFormatDownload(
storedMedias[0],
isCaptionEnabled,
)
var medias []*models.DownloadedMedia
medias := make([]*models.DownloadedMedia, 0, len(storedMedias))
for _, media := range storedMedias {
medias = append(medias, &models.DownloadedMedia{
FilePath: "",

View file

@ -103,12 +103,12 @@ func StoreMedias(
msgs []gotgbot.Message,
medias []*models.DownloadedMedia,
) error {
var storedMedias []*models.Media
if len(medias) == 0 {
return errors.New("no media to store")
}
storedMedias := make([]*models.Media, 0, len(medias))
for idx, msg := range msgs {
fileID := GetMessageFileID(&msg)
if len(fileID) == 0 {

View file

@ -137,12 +137,12 @@ func GetEmbedMediaList(
}
func GetIGramMediaList(ctx *models.DownloadContext) ([]*models.Media, error) {
var mediaList []*models.Media
postURL := ctx.MatchedContentURL
details, err := GetFromIGram(ctx, postURL)
if err != nil {
return nil, fmt.Errorf("failed to get post: %w", err)
}
mediaList := make([]*models.Media, 0, len(details.Items))
for _, item := range details.Items {
media := ctx.Extractor.NewMedia(
ctx.MatchedContentID,

View file

@ -1,6 +1,7 @@
package instagram
import (
"bytes"
"crypto/rand"
"crypto/sha256"
"encoding/hex"
@ -137,14 +138,14 @@ func ParseGQLMedia(
func ParseEmbedGQL(
body []byte,
) (*Media, error) {
match := embedPattern.FindStringSubmatch(string(body))
match := embedPattern.FindSubmatch(body)
if len(match) < 2 {
return nil, errors.New("failed to find JSON in response")
}
jsonData := match[1]
var data map[string]any
if err := json5.Unmarshal([]byte(jsonData), &data); err != nil {
if err := json5.Unmarshal(jsonData, &data); err != nil {
return nil, fmt.Errorf("failed to unmarshal JSON: %w", err)
}
igCtx := util.TraverseJSON(data, "contextJSON")
@ -193,39 +194,27 @@ func BuildIGramPayload(contentURL string) (io.Reader, error) {
if err != nil {
return nil, fmt.Errorf("error marshalling payload: %w", err)
}
reader := strings.NewReader(string(parsedPayload))
reader := bytes.NewReader(parsedPayload)
return reader, nil
}
func ParseIGramResponse(body []byte) (*IGramResponse, error) {
var rawResponse any
// try to unmarshal as a single IGramMedia and then as a slice
var media IGramMedia
if err := sonic.ConfigFastest.Unmarshal(body, &rawResponse); err != nil {
return nil, fmt.Errorf("failed to decode response1: %w", err)
}
switch rawResponse.(type) {
case []any:
// array of IGramMedia
var media []*IGramMedia
if err := sonic.ConfigFastest.Unmarshal(body, &media); err != nil {
return nil, fmt.Errorf("failed to decode response2: %w", err)
// try with slice
var mediaList []*IGramMedia
if err := sonic.ConfigFastest.Unmarshal(body, &mediaList); err != nil {
return nil, fmt.Errorf("failed to decode response: %w", err)
}
return &IGramResponse{
Items: media,
Items: mediaList,
}, nil
case map[string]any:
// single IGramMedia
var media IGramMedia
if err := sonic.ConfigFastest.Unmarshal(body, &media); err != nil {
return nil, fmt.Errorf("failed to decode response3: %w", err)
}
return &IGramResponse{
Items: []*IGramMedia{&media},
}, nil
default:
return nil, fmt.Errorf("unexpected response type: %T", rawResponse)
}
}
func GetCDNURL(contentURL string) (string, error) {

View file

@ -36,7 +36,6 @@ func FindBestPhoto(
func ParseVideoFormats(
images map[string]*Media,
) ([]*models.MediaFormat, error) {
var formats []*models.MediaFormat
var video *Media
var thumbnailURL string
@ -63,6 +62,8 @@ func ParseVideoFormats(
"av1Url": {"Av1URL", enums.MediaCodecAV1},
}
formats := make([]*models.MediaFormat, 0, len(codecMapping))
for _, mapping := range codecMapping {
url := getField(video, mapping.Field)
if url == "" {

View file

@ -31,6 +31,7 @@ func ParseVideoObject(videoObj *Videos) ([]*models.MediaFormat, error) {
if err != nil {
return nil, fmt.Errorf("failed to extract hls formats: %w", err)
}
formats = make([]*models.MediaFormat, 0, len(hlsFormats))
for _, hlsFormat := range hlsFormats {
hlsFormat.Duration = video.Duration / 1000
hlsFormat.Thumbnail = []string{video.Thumbnail}

View file

@ -176,7 +176,7 @@ func GetVideoAPI(
decoder := sonic.ConfigFastest.NewDecoder(resp.Body)
err = decoder.Decode(&data)
if err != nil {
return nil, fmt.Errorf("failed to unmarshal response: %w", err)
return nil, fmt.Errorf("failed to decode response: %w", err)
}
videoData, err := FindVideoData(data, awemeID)
if err != nil {

View file

@ -45,12 +45,12 @@ var ShortExtractor = &models.Extractor{
if err != nil {
return nil, fmt.Errorf("failed to read body: %w", err)
}
matchedURL := Extractor.URLPattern.FindStringSubmatch(string(body))
matchedURL := Extractor.URLPattern.FindSubmatch(body)
if matchedURL == nil {
return nil, errors.New("failed to find url in body")
}
return &models.ExtractorResponse{
URL: matchedURL[0],
URL: string(matchedURL[0]),
}, nil
},
}
@ -91,11 +91,12 @@ func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
caption := CleanCaption(tweetData.FullText)
var mediaEntities []MediaEntity
if tweetData.ExtendedEntities != nil && len(tweetData.ExtendedEntities.Media) > 0 {
switch {
case tweetData.ExtendedEntities != nil && len(tweetData.ExtendedEntities.Media) > 0:
mediaEntities = tweetData.ExtendedEntities.Media
} else if tweetData.Entities != nil && len(tweetData.Entities.Media) > 0 {
case tweetData.Entities != nil && len(tweetData.Entities.Media) > 0:
mediaEntities = tweetData.Entities.Media
} else {
default:
return nil, nil
}
@ -173,13 +174,9 @@ func GetTweetAPI(
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("invalid response code: %s", resp.Status)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to read body: %w", err)
}
var apiResponse APIResponse
err = sonic.ConfigFastest.Unmarshal(body, &apiResponse)
err = sonic.ConfigFastest.NewDecoder(resp.Body).Decode(&apiResponse)
if err != nil {
return nil, fmt.Errorf("failed to parse response: %w", err)
}
@ -190,11 +187,12 @@ func GetTweetAPI(
}
var tweet *Tweet
if result.Tweet != nil {
switch {
case result.Tweet != nil:
tweet = result.Tweet
} else if result.Legacy != nil {
case result.Legacy != nil:
tweet = result.Legacy
} else {
default:
return nil, errors.New("failed to get tweet data")
}
return tweet, nil

View file

@ -246,7 +246,7 @@ func (media *Media) GetSortedFormats() []*MediaFormat {
}
// combine the best video and audio into a final list
var finalSortedList []*MediaFormat
finalSortedList := make([]*MediaFormat, 0, len(groupedVideos)+len(groupedAudios)+len(media.Formats))
for _, best := range groupedVideos {
finalSortedList = append(finalSortedList, best)
}

View file

@ -107,13 +107,10 @@ func copyHeaders(source, destination http.Header) {
}
func parseProxyResponse(proxyResp *http.Response, originalReq *http.Request) (*http.Response, error) {
body, err := io.ReadAll(proxyResp.Body)
if err != nil {
return nil, fmt.Errorf("error reading proxy response: %w", err)
}
var response models.EdgeProxyResponse
if err := sonic.ConfigFastest.Unmarshal(body, &response); err != nil {
decoder := sonic.ConfigFastest.NewDecoder(proxyResp.Body)
if err := decoder.Decode(&response); err != nil {
return nil, fmt.Errorf("error parsing proxy response: %w", err)
}

View file

@ -246,33 +246,37 @@ func parseVariantType(
}
func getVideoCodec(codecs string) enums.MediaCodec {
if strings.Contains(codecs, "avc") || strings.Contains(codecs, "h264") {
switch {
case strings.Contains(codecs, "avc"), strings.Contains(codecs, "h264"):
return enums.MediaCodecAVC
} else if strings.Contains(codecs, "hvc") || strings.Contains(codecs, "h265") {
case strings.Contains(codecs, "hvc"), strings.Contains(codecs, "h265"):
return enums.MediaCodecHEVC
} else if strings.Contains(codecs, "av01") {
case strings.Contains(codecs, "av01"):
return enums.MediaCodecAV1
} else if strings.Contains(codecs, "vp9") {
case strings.Contains(codecs, "vp9"):
return enums.MediaCodecVP9
} else if strings.Contains(codecs, "vp8") {
case strings.Contains(codecs, "vp8"):
return enums.MediaCodecVP8
}
default:
return ""
}
}
func getAudioCodec(codecs string) enums.MediaCodec {
if strings.Contains(codecs, "mp4a") {
switch {
case strings.Contains(codecs, "mp4a"):
return enums.MediaCodecAAC
} else if strings.Contains(codecs, "opus") {
case strings.Contains(codecs, "opus"):
return enums.MediaCodecOpus
} else if strings.Contains(codecs, "mp3") {
case strings.Contains(codecs, "mp3"):
return enums.MediaCodecMP3
} else if strings.Contains(codecs, "flac") {
case strings.Contains(codecs, "flac"):
return enums.MediaCodecFLAC
} else if strings.Contains(codecs, "vorbis") {
case strings.Contains(codecs, "vorbis"):
return enums.MediaCodecVorbis
}
default:
return ""
}
}
func resolveURL(base *url.URL, uri string) string {