diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..706da5e --- /dev/null +++ b/.env.example @@ -0,0 +1,10 @@ +BOT_API_URL=https://api.telegram.org +BOT_TOKEN=12345678:ABC-DEF1234ghIkl-zyx57W2P0s + +DB_HOST=localhost +DB_PORT=3306 +DB_NAME=govd +DB_USER=govd +DB_PASSWORD=password + +REPO_URL=https://github.com/govdbot/govd \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2397ca0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,16 @@ +*.exe +*.json +*.txt +*.py +*.html + +old/ + +.env +.idea/ + +downloads + +govd + +.DS_Store \ No newline at end of file diff --git a/LICENSE b/LICENSE index fed9d8e..d1021ee 100644 --- a/LICENSE +++ b/LICENSE @@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..82c30e9 --- /dev/null +++ b/README.md @@ -0,0 +1,53 @@ +# govd + +a telegram bot for downloading media from various platforms + +this project was born after the discontinuation of a highly popular bot known as UVD, and draws significant inspiration from [yt-dlp](https://github.com/yt-dlp/yt-dlp) + +- official instance: [@govd_bot](https://t.me/govd_bot) +- support group: [govdsupport](https://t.me/govdsupport) + +## features + +- download media from various platforms +- download videos, photos, and audio +- inline mode support +- group chat support with customizable settings +- media caption support + +## dependencies + +- ffmpeg >= 6.1.1 +- libheif >= 1.19.7 +- pkg-config +- mysql db + + +## botapi + +to avoid limits on files, you should host your own telegram botapi. public bot instance is currently running under a botapi fork, [tdlight-telegram-bot-api](https://github.com/tdlight-team/tdlight-telegram-bot-api) + +## installation + +```bash +git clone https://github.com/govdbot/govd.git +cd govd +# edit .env file with your bot token and database credentials +sh build.sh +``` + +## cookies + +some extractors require cookies for download. to add your cookies, just insert a txt file in cookies folder (netscape format) + +## todo + +- [ ] add more extractors +- [ ] switch to sonic json parser +- [ ] switch to native libav +- [ ] add tests +- [ ] add dockerfile and compose +- [ ] improve error handling +- [ ] add support for telegram wehbhooks +- [ ] switch to pgsql (?) +- [ ] better API (?) \ No newline at end of file diff --git a/bot/core/default.go b/bot/core/default.go new file mode 100644 index 0000000..883a125 --- /dev/null +++ b/bot/core/default.go @@ -0,0 +1,134 @@ +package core + +import ( + "fmt" + "govd/database" + "govd/models" + + "github.com/PaulSonOfLars/gotgbot/v2" + "github.com/PaulSonOfLars/gotgbot/v2/ext" +) + +func HandleDefaultFormatDownload( + bot *gotgbot.Bot, + ctx *ext.Context, + dlCtx *models.DownloadContext, +) error { + storedMedias, err := database.GetDefaultMedias( + dlCtx.Extractor.CodeName, + dlCtx.MatchedContentID, + ) + if err != nil { + return fmt.Errorf("failed to get default medias: %w", err) + } + + if len(storedMedias) > 0 { + return HandleDefaultStoredFormatDownload( + bot, ctx, dlCtx, storedMedias, + ) + } + + response, err := dlCtx.Extractor.Run(dlCtx) + if err != nil { + return fmt.Errorf("extractor fetch run failed: %w", err) + } + + mediaList := response.MediaList + if len(mediaList) == 0 { + return fmt.Errorf("no media found for content ID: %s", dlCtx.MatchedContentID) + } + + for i := range mediaList { + defaultFormat := mediaList[i].GetDefaultFormat() + if defaultFormat == nil { + return fmt.Errorf("no default format found for media at index %d", i) + } + if len(defaultFormat.URL) == 0 { + return fmt.Errorf("media format at index %d has no URL", i) + } + // ensure we can merge video and audio formats + ensureMergeFormats(mediaList[i], defaultFormat) + mediaList[i].Format = defaultFormat + } + + medias, err := DownloadMedias(mediaList, nil) + if err != nil { + return fmt.Errorf("failed to download media list: %w", err) + } + + if len(medias) == 0 { + return fmt.Errorf("no formats downloaded") + } + + isCaptionEnabled := true + if dlCtx.GroupSettings != nil && !*dlCtx.GroupSettings.Captions { + isCaptionEnabled = false + } + messageCaption := FormatCaption( + mediaList[0], + isCaptionEnabled, + ) + + // plugins act as post-processing for the media. + // they are run after the media is downloaded + // and before it is sent to the user + // this allows for things like merging audio and video, etc. + for _, media := range medias { + for _, plugin := range media.Media.Format.Plugins { + err = plugin(media) + if err != nil { + return fmt.Errorf("failed to run plugin: %w", err) + } + } + } + + _, err = SendMedias( + bot, ctx, dlCtx, + medias, + &models.SendMediaFormatsOptions{ + Caption: messageCaption, + IsStored: false, + }, + ) + if err != nil { + return fmt.Errorf("failed to send formats: %w", err) + } + + return nil +} + +func HandleDefaultStoredFormatDownload( + bot *gotgbot.Bot, + ctx *ext.Context, + dlCtx *models.DownloadContext, + storedMedias []*models.Media, +) error { + isCaptionEnabled := true + if dlCtx.GroupSettings != nil && !*dlCtx.GroupSettings.Captions { + isCaptionEnabled = false + } + messageCaption := FormatCaption( + storedMedias[0], + isCaptionEnabled, + ) + var formats []*models.DownloadedMedia + for _, media := range storedMedias { + formats = append(formats, &models.DownloadedMedia{ + FilePath: "", + ThumbnailFilePath: "", + Media: media, + }) + } + _, err := SendMedias( + bot, ctx, dlCtx, + formats, + &models.SendMediaFormatsOptions{ + Caption: messageCaption, + IsStored: true, + }, + ) + if err != nil { + return fmt.Errorf("failed to send media: %w", err) + } + return nil +} diff --git a/bot/core/download.go b/bot/core/download.go new file mode 100644 index 0000000..f84d749 --- /dev/null +++ b/bot/core/download.go @@ -0,0 +1,187 @@ +package core + +import ( + "context" + "fmt" + "path/filepath" + "sort" + "sync" + + "govd/enums" + "govd/models" + "govd/util" +) + +func downloadMediaItem( + ctx context.Context, + media *models.Media, + config *models.DownloadConfig, + idx int, +) (*models.DownloadedMedia, error) { + if config == nil { + config = util.DefaultConfig() + } + + format := media.Format + if format == nil { + return nil, fmt.Errorf("media format is nil") + } + + fileName := format.GetFileName() + var filePath string + var thumbnailFilePath string + + if format.Type != enums.MediaTypePhoto { + if len(format.Segments) == 0 { + path, err := util.DownloadFile( + ctx, format.URL, + fileName, config, + ) + if err != nil { + return nil, fmt.Errorf("failed to download file: %w", err) + } + filePath = path + } else { + path, err := util.DownloadFileWithSegments( + ctx, format.Segments, + fileName, config, + ) + if err != nil { + return nil, fmt.Errorf("failed to download segments: %w", err) + } + filePath = path + } + + if format.Type == enums.MediaTypeVideo || format.Type == enums.MediaTypeAudio { + path, err := getFileThumbnail(format, filePath) + if err != nil { + return nil, fmt.Errorf("failed to get thumbnail: %w", err) + } + thumbnailFilePath = path + } + + if format.Type == enums.MediaTypeVideo { + if format.Width == 0 || format.Height == 0 || format.Duration == 0 { + insertVideoInfo(format, filePath) + } + } + } else { + file, err := util.DownloadFileInMemory(ctx, format.URL, config) + if err != nil { + return nil, fmt.Errorf("failed to download image: %w", err) + } + path := filepath.Join(config.DownloadDir, fileName) + if err := util.ImgToJPEG(file, path); err != nil { + return nil, fmt.Errorf("failed to convert image: %w", err) + } + filePath = path + } + + return &models.DownloadedMedia{ + FilePath: filePath, + ThumbnailFilePath: thumbnailFilePath, + Media: media, + Index: idx, + }, nil +} + +func StartDownloadTask( + media *models.Media, + idx int, + config *models.DownloadConfig, +) (*models.DownloadedMedia, error) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + return downloadMediaItem(ctx, media, config, idx) +} + +func StartConcurrentDownload( + media *models.Media, + resultsChan chan<- models.DownloadedMedia, + config *models.DownloadConfig, + errChan chan<- error, + wg *sync.WaitGroup, + idx int, +) { + defer wg.Done() + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + result, err := downloadMediaItem(ctx, media, config, idx) + if err != nil { + errChan <- err + return + } + + resultsChan <- *result +} + +func DownloadMedia( + media *models.Media, + config *models.DownloadConfig, +) (*models.DownloadedMedia, error) { + return StartDownloadTask(media, 0, config) +} + +func DownloadMedias( + medias []*models.Media, + config *models.DownloadConfig, +) ([]*models.DownloadedMedia, error) { + if len(medias) == 0 { + return []*models.DownloadedMedia{}, nil + } + + if len(medias) == 1 { + result, err := DownloadMedia(medias[0], config) + if err != nil { + return nil, err + } + return []*models.DownloadedMedia{result}, nil + } + + resultsChan := make(chan models.DownloadedMedia, len(medias)) + errChan := make(chan error, len(medias)) + var wg sync.WaitGroup + + for idx, media := range medias { + wg.Add(1) + go StartConcurrentDownload(media, resultsChan, config, errChan, &wg, idx) + } + + go func() { + wg.Wait() + close(resultsChan) + close(errChan) + }() + + var results []*models.DownloadedMedia + var firstError error + + select { + case err := <-errChan: + if err != nil { + firstError = err + } + default: + // no errors (yet) + } + + for result := range resultsChan { + resultCopy := result // create a copy to avoid pointer issues + results = append(results, &resultCopy) + } + + if firstError != nil { + return results, firstError + } + + if len(results) > 1 { + sort.SliceStable(results, func(i, j int) bool { + return results[i].Index < results[j].Index + }) + } + + return results, nil +} diff --git a/bot/core/inline.go b/bot/core/inline.go new file mode 100644 index 0000000..b81eae5 --- /dev/null +++ b/bot/core/inline.go @@ -0,0 +1,251 @@ +package core + +import ( + "fmt" + "log" + + "github.com/google/uuid" + "github.com/pkg/errors" + + "govd/database" + "govd/enums" + "govd/models" + "govd/util" + + "github.com/PaulSonOfLars/gotgbot/v2" + "github.com/PaulSonOfLars/gotgbot/v2/ext" +) + +var InlineTasks = make(map[string]*models.DownloadContext) + +func HandleInline( + bot *gotgbot.Bot, + ctx *ext.Context, + dlCtx *models.DownloadContext, +) error { + if dlCtx.Extractor.Type != enums.ExtractorTypeSingle { + return util.ErrNotImplemented + } + contentID := dlCtx.MatchedContentID + cached, err := database.GetDefaultMedias( + dlCtx.Extractor.CodeName, + contentID, + ) + if err != nil { + return err + } + if len(cached) > 0 { + if len(cached) > 1 { + return util.ErrInlineMediaGroup + } + err = HandleInlineCached( + bot, ctx, + dlCtx, cached[0], + ) + if err != nil { + return err + } + return nil + } + err = StartInlineTask(bot, ctx, dlCtx) + if err != nil { + return err + } + return nil +} + +func HandleInlineCached( + bot *gotgbot.Bot, + ctx *ext.Context, + dlCtx *models.DownloadContext, + media *models.Media, +) error { + var result gotgbot.InlineQueryResult + + format := media.Format + resultID := fmt.Sprintf("%d:%s", ctx.EffectiveUser.Id, format.FormatID) + resultTitle := "share" + mediaCaption := FormatCaption(media, true) + _, inputFileType := format.GetFormatInfo() + + switch inputFileType { + case "photo": + result = &gotgbot.InlineQueryResultCachedPhoto{ + Id: resultID, + PhotoFileId: format.FileID, + Title: resultTitle, + Caption: mediaCaption, + ParseMode: "HTML", + } + case "video": + result = &gotgbot.InlineQueryResultCachedVideo{ + Id: resultID, + VideoFileId: format.FileID, + Title: resultTitle, + Caption: mediaCaption, + ParseMode: "HTML", + } + case "audio": + result = &gotgbot.InlineQueryResultCachedAudio{ + Id: resultID, + AudioFileId: format.FileID, + Caption: mediaCaption, + ParseMode: "HTML", + } + case "document": + result = &gotgbot.InlineQueryResultCachedDocument{ + Id: resultID, + DocumentFileId: format.FileID, + Title: resultTitle, + Caption: mediaCaption, + ParseMode: "HTML", + } + default: + return errors.New("unsupported input file type") + } + ctx.InlineQuery.Answer( + bot, []gotgbot.InlineQueryResult{result}, + &gotgbot.AnswerInlineQueryOpts{ + CacheTime: 1, + IsPersonal: true, + }, + ) + return nil +} + +func HandleInlineCachedResult( + bot *gotgbot.Bot, + ctx *ext.Context, + dlCtx *models.DownloadContext, + media *models.Media, +) error { + format := media.Format + messageCaption := FormatCaption(media, true) + inputMedia, err := format.GetInputMediaWithFileID(messageCaption) + if err != nil { + return err + } + + _, _, err = bot.EditMessageMedia( + inputMedia, + &gotgbot.EditMessageMediaOpts{ + InlineMessageId: ctx.ChosenInlineResult.InlineMessageId, + }, + ) + if err != nil { + return err + } + return nil +} + +func StartInlineTask( + bot *gotgbot.Bot, + ctx *ext.Context, + dlCtx *models.DownloadContext, +) error { + randomID, err := uuid.NewUUID() + if err != nil { + return errors.New("could not generate task ID") + } + taskID := randomID.String() + inlineResult := &gotgbot.InlineQueryResultArticle{ + Id: taskID, + Title: "share", + InputMessageContent: &gotgbot.InputTextMessageContent{ + MessageText: "loading media plese wait...", + ParseMode: "HTML", + LinkPreviewOptions: &gotgbot.LinkPreviewOptions{ + IsDisabled: true, + }, + }, + ReplyMarkup: &gotgbot.InlineKeyboardMarkup{ + InlineKeyboard: [][]gotgbot.InlineKeyboardButton{ + { + { + Text: "...", + CallbackData: "inline:loading", + }, + }, + }, + }, + } + ok, err := ctx.InlineQuery.Answer( + bot, []gotgbot.InlineQueryResult{inlineResult}, + &gotgbot.AnswerInlineQueryOpts{ + CacheTime: 1, + IsPersonal: true, + }, + ) + if err != nil { + log.Println("failed to answer inline query:", err) + } + if !ok { + log.Println("failed to answer inline query") + return nil + } + InlineTasks[taskID] = dlCtx + return nil +} + +func GetInlineFormat( + bot *gotgbot.Bot, + ctx *ext.Context, + dlCtx *models.DownloadContext, + mediaChan chan<- *models.Media, + errChan chan<- error, +) { + response, err := dlCtx.Extractor.Run(dlCtx) + if err != nil { + errChan <- fmt.Errorf("failed to get media: %w", err) + return + } + mediaList := response.MediaList + if len(mediaList) == 0 { + errChan <- fmt.Errorf("no media found for content ID: %s", dlCtx.MatchedContentID) + } + if len(mediaList) > 1 { + errChan <- util.ErrInlineMediaGroup + return + } + for i := range mediaList { + defaultFormat := mediaList[i].GetDefaultFormat() + if defaultFormat == nil { + errChan <- fmt.Errorf("no default format found for media at index %d", i) + return + } + if len(defaultFormat.URL) == 0 { + errChan <- fmt.Errorf("media format at index %d has no URL", i) + return + } + // ensure we can merge video and audio formats + ensureMergeFormats(mediaList[i], defaultFormat) + mediaList[i].Format = defaultFormat + } + messageCaption := FormatCaption(mediaList[0], true) + medias, err := DownloadMedias(mediaList, nil) + if err != nil { + errChan <- fmt.Errorf("failed to download medias: %w", err) + return + } + msgs, err := SendMedias( + bot, ctx, dlCtx, + medias, &models.SendMediaFormatsOptions{ + Caption: messageCaption, + }, + ) + if err != nil { + errChan <- fmt.Errorf("failed to send media: %w", err) + return + } + msg := &msgs[0] + msg.Delete(bot, nil) + err = StoreMedias( + dlCtx, msgs, + medias, + ) + if err != nil { + errChan <- fmt.Errorf("failed to store media: %w", err) + return + } + mediaChan <- medias[0].Media +} diff --git a/bot/core/main.go b/bot/core/main.go new file mode 100644 index 0000000..5cafc17 --- /dev/null +++ b/bot/core/main.go @@ -0,0 +1,147 @@ +package core + +import ( + "fmt" + "os" + "slices" + "time" + + "github.com/pkg/errors" + + "govd/enums" + "govd/models" + "govd/util" + + "github.com/PaulSonOfLars/gotgbot/v2" + "github.com/PaulSonOfLars/gotgbot/v2/ext" +) + +func HandleDownloadRequest( + bot *gotgbot.Bot, + ctx *ext.Context, + dlCtx *models.DownloadContext, +) error { + chatID := ctx.EffectiveMessage.Chat.Id + if dlCtx.Extractor.Type == enums.ExtractorTypeSingle { + TypingEffect(bot, ctx, chatID) + err := HandleDefaultFormatDownload(bot, ctx, dlCtx) + if err != nil { + return err + } + return nil + } + return util.ErrUnsupportedExtractorType +} + +func SendMedias( + bot *gotgbot.Bot, + ctx *ext.Context, + dlCtx *models.DownloadContext, + medias []*models.DownloadedMedia, + options *models.SendMediaFormatsOptions, +) ([]gotgbot.Message, error) { + var chatID int64 + var messageOptions *gotgbot.SendMediaGroupOpts + + if dlCtx.GroupSettings != nil { + if len(medias) > dlCtx.GroupSettings.MediaGroupLimit { + return nil, util.ErrMediaGroupLimitExceeded + } + if !*dlCtx.GroupSettings.NSFW { + for _, media := range medias { + if media.Media.NSFW { + return nil, util.ErrNSFWNotAllowed + } + } + } + } + + switch { + case ctx.Message != nil: + chatID = ctx.EffectiveMessage.Chat.Id + messageOptions = &gotgbot.SendMediaGroupOpts{ + ReplyParameters: &gotgbot.ReplyParameters{ + MessageId: ctx.EffectiveMessage.MessageId, + }, + } + case ctx.CallbackQuery != nil: + chatID = ctx.CallbackQuery.Message.GetChat().Id + messageOptions = nil + case ctx.InlineQuery != nil: + chatID = ctx.InlineQuery.From.Id + messageOptions = nil + case ctx.ChosenInlineResult != nil: + chatID = ctx.ChosenInlineResult.From.Id + messageOptions = &gotgbot.SendMediaGroupOpts{ + DisableNotification: true, + } + default: + return nil, errors.New("failed to get chat id") + } + + var sentMessages []gotgbot.Message + + mediaGroupChunks := slices.Collect( + slices.Chunk(medias, 10), + ) + + for _, chunk := range mediaGroupChunks { + var inputMediaList []gotgbot.InputMedia + for idx, media := range chunk { + var caption string + + if idx == 0 { + caption = options.Caption + } + inputMedia, err := media.Media.Format.GetInputMedia( + media.FilePath, + media.ThumbnailFilePath, + caption, + ) + if err != nil { + return nil, fmt.Errorf("failed to get input media: %w", err) + } + inputMediaList = append(inputMediaList, inputMedia) + } + mediaType := chunk[0].Media.Format.Type + SendingEffect(bot, ctx, chatID, mediaType) + msgs, err := bot.SendMediaGroup( + chatID, + inputMediaList, + messageOptions, + ) + if err != nil { + return nil, err + } + + for _, media := range chunk { + if media.FilePath != "" { + os.Remove(media.FilePath) + } + if media.ThumbnailFilePath != "" { + os.Remove(media.ThumbnailFilePath) + } + } + + sentMessages = append(sentMessages, msgs...) + if sentMessages[0].Chat.Type != "private" { + if len(mediaGroupChunks) > 1 { + time.Sleep(3 * time.Second) + } // avoid floodwait? + } + } + if len(sentMessages) == 0 { + return nil, errors.New("no messages sent") + } + if !options.IsStored { + err := StoreMedias( + dlCtx, + sentMessages, + medias, + ) + if err != nil { + return nil, fmt.Errorf("failed to cache formats: %w", err) + } + } + return sentMessages, nil +} diff --git a/bot/core/util.go b/bot/core/util.go new file mode 100644 index 0000000..1a66d2b --- /dev/null +++ b/bot/core/util.go @@ -0,0 +1,280 @@ +package core + +import ( + "context" + "fmt" + "log" + "path/filepath" + "strings" + + "github.com/pkg/errors" + + "govd/database" + "govd/enums" + "govd/models" + "govd/plugins" + "govd/util" + "govd/util/av" + + "github.com/PaulSonOfLars/gotgbot/v2" + "github.com/PaulSonOfLars/gotgbot/v2/ext" +) + +func getFileThumbnail( + format *models.MediaFormat, + filePath string, +) (string, error) { + fileDir := filepath.Dir(filePath) + fileName := filepath.Base(filePath) + fileExt := filepath.Ext(fileName) + fileBaseName := fileName[:len(fileName)-len(fileExt)] + thumbnailFilePath := filepath.Join(fileDir, fileBaseName+".thumb.jpeg") + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + if len(format.Thumbnail) > 0 { + file, err := util.DownloadFileInMemory(ctx, format.Thumbnail, nil) + if err != nil { + return "", fmt.Errorf("failed to download file in memory: %w", err) + } + err = util.ImgToJPEG(file, thumbnailFilePath) + if err != nil { + return "", fmt.Errorf("failed to convert to JPEG: %w", err) + } + return thumbnailFilePath, nil + } + if format.Type == enums.MediaTypeVideo { + err := av.ExtractVideoThumbnail(filePath, thumbnailFilePath) + if err != nil { + return "", fmt.Errorf("failed to extract video thumbnail: %w", err) + } + return thumbnailFilePath, nil + } + return "", nil +} + +func insertVideoInfo( + format *models.MediaFormat, + filePath string, +) { + width, height, duration := av.GetVideoInfo(filePath) + format.Width = width + format.Height = height + format.Duration = duration +} + +func GetMessageFileID(msg *gotgbot.Message) string { + switch { + case msg.Video != nil: + return msg.Video.FileId + case msg.Animation != nil: + return msg.Animation.FileId + case msg.Photo != nil: + return msg.Photo[len(msg.Photo)-1].FileId + case msg.Document != nil: + return msg.Document.FileId + case msg.Audio != nil: + return msg.Audio.FileId + case msg.Voice != nil: + return msg.Voice.FileId + default: + return "" + } +} + +func GetMessageFileSize(msg *gotgbot.Message) int64 { + switch { + case msg.Video != nil: + return msg.Video.FileSize + case msg.Animation != nil: + return msg.Animation.FileSize + case msg.Photo != nil: + return msg.Photo[len(msg.Photo)-1].FileSize + case msg.Document != nil: + return msg.Document.FileSize + case msg.Audio != nil: + return msg.Audio.FileSize + case msg.Voice != nil: + return msg.Voice.FileSize + default: + return 0 + } +} + +func StoreMedias( + dlCtx *models.DownloadContext, + msgs []gotgbot.Message, + medias []*models.DownloadedMedia, +) error { + var storedMedias []*models.Media + if len(medias) == 0 { + return fmt.Errorf("no media to store") + } + for idx, msg := range msgs { + fileID := GetMessageFileID(&msg) + if len(fileID) == 0 { + return fmt.Errorf("no file ID found for media at index %d", idx) + } + fileSize := GetMessageFileSize(&msg) + medias[idx].Media.Format.FileID = fileID + medias[idx].Media.Format.FileSize = fileSize + storedMedias = append( + storedMedias, + medias[idx].Media, + ) + } + for _, media := range storedMedias { + err := database.StoreMedia( + dlCtx.Extractor.CodeName, + media.ContentID, + media, + ) + if err != nil { + return fmt.Errorf("failed to store media: %w", err) + } + } + return nil +} + +func FormatCaption( + media *models.Media, + isEnabled bool, +) string { + newCaption := fmt.Sprintf( + "source - @govd_bot\n", + media.ContentURL, + ) + if isEnabled && media.Caption.Valid { + text := media.Caption.String + if len(text) > 600 { + text = text[:600] + "..." + } + newCaption += fmt.Sprintf( + "
%s
\n", + util.EscapeCaption(text), + ) + } + return newCaption +} + +func TypingEffect( + bot *gotgbot.Bot, + ctx *ext.Context, + chatID int64, +) { + bot.SendChatAction( + chatID, + "typing", + nil, + ) +} + +func SendingEffect( + bot *gotgbot.Bot, + ctx *ext.Context, + chatID int64, + mediaType enums.MediaType, +) { + action := "upload_document" + if mediaType == enums.MediaTypeVideo { + action = "upload_video" + } + if mediaType == enums.MediaTypeAudio { + action = "upload_audio" + } + if mediaType == enums.MediaTypePhoto { + action = "upload_photo" + } + bot.SendChatAction( + chatID, + action, + nil, + ) +} + +func HandleErrorMessage( + bot *gotgbot.Bot, + ctx *ext.Context, + err error, +) { + currentError := err + for currentError != nil { + var botError *util.Error + if errors.As(currentError, &botError) { + SendErrorMessage(bot, ctx, fmt.Sprintf( + "error occurred when downloading: %s", + currentError.Error(), + )) + return + } + currentError = errors.Unwrap(currentError) + } + + lastError := util.GetLastError(err) + errorMessage := fmt.Sprintf( + "error occurred when downloading: %s", + lastError.Error(), + ) + + if strings.Contains(errorMessage, bot.Token) { + errorMessage = "telegram related error, probably connection issue" + } + + SendErrorMessage(bot, ctx, errorMessage) + +} + +func SendErrorMessage( + bot *gotgbot.Bot, + ctx *ext.Context, + errorMessage string, +) { + log.Println(errorMessage) + + switch { + case ctx.Update.Message != nil: + ctx.EffectiveMessage.Reply( + bot, + errorMessage, + nil, + ) + case ctx.Update.InlineQuery != nil: + ctx.InlineQuery.Answer( + bot, + nil, + &gotgbot.AnswerInlineQueryOpts{ + CacheTime: 1, + Button: &gotgbot.InlineQueryResultsButton{ + Text: errorMessage, + StartParameter: "start", + }, + }, + ) + case ctx.ChosenInlineResult != nil: + bot.EditMessageText( + errorMessage, + &gotgbot.EditMessageTextOpts{ + InlineMessageId: ctx.ChosenInlineResult.InlineMessageId, + }) + } +} + +func ensureMergeFormats( + media *models.Media, + videoFormat *models.MediaFormat, +) { + if videoFormat.Type != enums.MediaTypeVideo { + return + } + if videoFormat.AudioCodec != "" { + return + } + // video with no audio + audioFormat := media.GetDefaultAudioFormat() + if audioFormat == nil { + return + } + videoFormat.AudioCodec = audioFormat.AudioCodec + videoFormat.Plugins = append(videoFormat.Plugins, plugins.MergeAudio) +} diff --git a/bot/handlers/ext.go b/bot/handlers/ext.go new file mode 100644 index 0000000..c973e94 --- /dev/null +++ b/bot/handlers/ext.go @@ -0,0 +1,44 @@ +package handlers + +import ( + extractors "govd/ext" + "strings" + + "github.com/PaulSonOfLars/gotgbot/v2" + "github.com/PaulSonOfLars/gotgbot/v2/ext" +) + +func ExtractorsHandler(bot *gotgbot.Bot, ctx *ext.Context) error { + ctx.CallbackQuery.Answer(bot, nil) + + messageText := "available extractors:\n" + extractorNames := make([]string, 0, len(extractors.List)) + for _, extractor := range extractors.List { + if extractor.IsRedirect { + continue + } + extractorNames = append(extractorNames, extractor.Name) + } + messageText += strings.Join(extractorNames, ", ") + + ctx.EffectiveMessage.EditText( + bot, + messageText, + &gotgbot.EditMessageTextOpts{ + LinkPreviewOptions: &gotgbot.LinkPreviewOptions{ + IsDisabled: true, + }, + ReplyMarkup: gotgbot.InlineKeyboardMarkup{ + InlineKeyboard: [][]gotgbot.InlineKeyboardButton{ + { + { + Text: "back", + CallbackData: "start", + }, + }, + }, + }, + }, + ) + return nil +} diff --git a/bot/handlers/help.go b/bot/handlers/help.go new file mode 100644 index 0000000..4e88583 --- /dev/null +++ b/bot/handlers/help.go @@ -0,0 +1,45 @@ +package handlers + +import ( + "github.com/PaulSonOfLars/gotgbot/v2" + "github.com/PaulSonOfLars/gotgbot/v2/ext" +) + +var helpMessage = "usage:\n" + + "- you can add the bot to a group " + + "to start catching sent links\n" + + "- you can send a link to the bot privately " + + "to download the media too\n\n" + + "group commands:\n" + + "- /settings = show current settings\n" + + "- /captions (true|false) = enable/disable descriptions\n" + + "- /nsfw (true|false) = enable/disable nsfw content\n" + + "- /limit (int) = set max items in media groups\n\n" + + "note: the bot is still in beta, " + + "so expect some bugs and missing features.\n" + +var helpKeyboard = gotgbot.InlineKeyboardMarkup{ + InlineKeyboard: [][]gotgbot.InlineKeyboardButton{ + { + { + Text: "back", + CallbackData: "start", + }, + }, + }, +} + +func HelpHandler(bot *gotgbot.Bot, ctx *ext.Context) error { + ctx.CallbackQuery.Answer(bot, nil) + ctx.EffectiveMessage.EditText( + bot, + helpMessage, + &gotgbot.EditMessageTextOpts{ + LinkPreviewOptions: &gotgbot.LinkPreviewOptions{ + IsDisabled: true, + }, + ReplyMarkup: helpKeyboard, + }, + ) + return nil +} diff --git a/bot/handlers/inline.go b/bot/handlers/inline.go new file mode 100644 index 0000000..3140491 --- /dev/null +++ b/bot/handlers/inline.go @@ -0,0 +1,91 @@ +package handlers + +import ( + "context" + "govd/bot/core" + "govd/models" + "govd/util" + "strings" + "time" + + extractors "govd/ext" + + "github.com/PaulSonOfLars/gotgbot/v2" + "github.com/PaulSonOfLars/gotgbot/v2/ext" +) + +func InlineDownloadHandler( + bot *gotgbot.Bot, + ctx *ext.Context, +) error { + url := strings.TrimSpace(ctx.InlineQuery.Query) + if url == "" { + ctx.InlineQuery.Answer(bot, []gotgbot.InlineQueryResult{}, &gotgbot.AnswerInlineQueryOpts{ + CacheTime: 1, + IsPersonal: true, + }) + return nil + } + dlCtx, err := extractors.CtxByURL(url) + if err != nil || dlCtx == nil || dlCtx.Extractor == nil { + ctx.InlineQuery.Answer(bot, []gotgbot.InlineQueryResult{}, &gotgbot.AnswerInlineQueryOpts{ + CacheTime: 1, + IsPersonal: true, + }) + return nil + } + return core.HandleInline(bot, ctx, dlCtx) +} + +func InlineDownloadResultHandler( + bot *gotgbot.Bot, + ctx *ext.Context, +) error { + dlCtx, ok := core.InlineTasks[ctx.ChosenInlineResult.ResultId] + if !ok { + return nil + } + defer delete(core.InlineTasks, ctx.ChosenInlineResult.ResultId) + + mediaChan := make(chan *models.Media, 1) + errChan := make(chan error, 1) + timeout, cancel := context.WithTimeout( + context.Background(), + 5*time.Minute, + ) + defer cancel() + + go core.GetInlineFormat( + bot, ctx, dlCtx, + mediaChan, errChan, + ) + select { + case media := <-mediaChan: + err := core.HandleInlineCachedResult( + bot, ctx, + dlCtx, media, + ) + if err != nil { + core.HandleErrorMessage(bot, ctx, err) + return nil + } + case err := <-errChan: + core.HandleErrorMessage(bot, ctx, err) + return nil + case <-timeout.Done(): + core.HandleErrorMessage(bot, ctx, util.ErrTimeout) + return nil + } + return nil +} + +func InlineLoadingHandler( + bot *gotgbot.Bot, + ctx *ext.Context, +) error { + ctx.CallbackQuery.Answer(bot, &gotgbot.AnswerCallbackQueryOpts{ + Text: "wait !", + ShowAlert: true, + }) + return nil +} diff --git a/bot/handlers/instances.go b/bot/handlers/instances.go new file mode 100644 index 0000000..241658c --- /dev/null +++ b/bot/handlers/instances.go @@ -0,0 +1,72 @@ +package handlers + +import ( + "fmt" + "os" + "runtime" + "strings" + + "github.com/PaulSonOfLars/gotgbot/v2" + "github.com/PaulSonOfLars/gotgbot/v2/ext" +) + +var buildHash = "unknown" +var branchName = "unknown" + +func getInstanceMessage() string { + return "current instance\n" + + "go version: %s\n" + + "build: %s\n" + + "branch: %s\n\n" + + "public instances\n" + + "- @govd_bot | main official instance\n" + + "\nwant to add your own instance? reach us on @govdsupport" +} + +func InstancesHandler(bot *gotgbot.Bot, ctx *ext.Context) error { + var commitURL string + var branchURL string + + repoURL := os.Getenv("REPO_URL") + if repoURL != "" { + commitURL = fmt.Sprintf( + "%s/tree/%s", + repoURL, + buildHash, + ) + branchURL = fmt.Sprintf( + "%s/tree/%s", + repoURL, + branchName, + ) + } + messageText := fmt.Sprintf( + getInstanceMessage(), + strings.TrimPrefix(runtime.Version(), "go"), + commitURL, + buildHash, + branchURL, + branchName, + ) + ctx.CallbackQuery.Answer(bot, nil) + ctx.EffectiveMessage.EditText( + bot, + messageText, + &gotgbot.EditMessageTextOpts{ + LinkPreviewOptions: &gotgbot.LinkPreviewOptions{ + IsDisabled: true, + }, + ReplyMarkup: gotgbot.InlineKeyboardMarkup{ + InlineKeyboard: [][]gotgbot.InlineKeyboardButton{ + { + { + Text: "back", + CallbackData: "start", + }, + }, + }, + }, + }, + ) + return nil +} diff --git a/bot/handlers/settings.go b/bot/handlers/settings.go new file mode 100644 index 0000000..db665bb --- /dev/null +++ b/bot/handlers/settings.go @@ -0,0 +1,213 @@ +package handlers + +import ( + "fmt" + "govd/database" + "govd/util" + "strconv" + "strings" + + "github.com/PaulSonOfLars/gotgbot/v2" + "github.com/PaulSonOfLars/gotgbot/v2/ext" +) + +func SettingsHandler(bot *gotgbot.Bot, ctx *ext.Context) error { + if ctx.EffectiveMessage.Chat.Type == "private" { + ctx.EffectiveMessage.Reply( + bot, + "use this command in group chats only", + nil, + ) + return nil + } + settings, err := database.GetGroupSettings(ctx.EffectiveMessage.Chat.Id) + if err != nil { + return err + } + ctx.EffectiveMessage.Reply( + bot, + fmt.Sprintf( + "settings for this group\n\ncaptions: %s\nnsfw: %s\nmedia group limit: %d", + strconv.FormatBool(*settings.Captions), + strconv.FormatBool(*settings.NSFW), + settings.MediaGroupLimit, + ), + nil, + ) + return nil +} + +func CaptionsHandler(bot *gotgbot.Bot, ctx *ext.Context) error { + if ctx.EffectiveMessage.Chat.Type == "private" { + return nil + } + + chatID := ctx.EffectiveMessage.Chat.Id + userID := ctx.EffectiveMessage.From.Id + + args := ctx.Args() + if len(args) != 2 { + ctx.EffectiveMessage.Reply( + bot, + "usage: /captions (true|false)", + nil, + ) + return nil + } + if !util.IsUserAdmin(bot, chatID, userID) { + ctx.EffectiveMessage.Reply( + bot, + "you don't have permission to change settings", + nil, + ) + return nil + } + userInput := strings.ToLower(args[1]) + value, err := strconv.ParseBool(userInput) + if err != nil { + ctx.EffectiveMessage.Reply( + bot, + fmt.Sprintf("invalid value (%s), use true or false", userInput), + nil, + ) + return nil + } + settings, err := database.GetGroupSettings(chatID) + if err != nil { + return err + } + settings.Captions = &value + err = database.UpdateGroupSettings(chatID, settings) + if err != nil { + return err + } + var message string + if value { + message = "captions enabled" + } else { + message = "captions disabled" + } + ctx.EffectiveMessage.Reply( + bot, + message, + nil, + ) + return nil +} + +func NSFWHandler(bot *gotgbot.Bot, ctx *ext.Context) error { + if ctx.EffectiveMessage.Chat.Type == "private" { + return nil + } + + chatID := ctx.EffectiveMessage.Chat.Id + userID := ctx.EffectiveMessage.From.Id + + args := ctx.Args() + if len(args) != 2 { + ctx.EffectiveMessage.Reply( + bot, + "usage: /nsfw (true|false)", + nil, + ) + return nil + } + if !util.IsUserAdmin(bot, chatID, userID) { + ctx.EffectiveMessage.Reply( + bot, + "you don't have permission to change settings", + nil, + ) + return nil + } + userInput := strings.ToLower(args[1]) + value, err := strconv.ParseBool(userInput) + if err != nil { + ctx.EffectiveMessage.Reply( + bot, + fmt.Sprintf("invalid value (%s), use true or false", userInput), + nil, + ) + return nil + } + settings, err := database.GetGroupSettings(chatID) + if err != nil { + return err + } + settings.NSFW = &value + err = database.UpdateGroupSettings(chatID, settings) + if err != nil { + return err + } + var message string + if value { + message = "nsfw enabled" + } else { + message = "nsfw disabled" + } + ctx.EffectiveMessage.Reply( + bot, + message, + nil, + ) + return nil +} + +func MediaGroupLimitHandler(bot *gotgbot.Bot, ctx *ext.Context) error { + if ctx.EffectiveMessage.Chat.Type == "private" { + return nil + } + + chatID := ctx.EffectiveMessage.Chat.Id + userID := ctx.EffectiveMessage.From.Id + + args := ctx.Args() + if len(args) != 2 { + ctx.EffectiveMessage.Reply( + bot, + "usage: /limit (int)", + nil, + ) + return nil + } + if !util.IsUserAdmin(bot, chatID, userID) { + ctx.EffectiveMessage.Reply( + bot, + "you don't have permission to change settings", + nil, + ) + return nil + } + value, err := strconv.Atoi(args[1]) + if err != nil { + ctx.EffectiveMessage.Reply( + bot, + fmt.Sprintf("invalid value (%s), use a number", args[1]), + nil, + ) + return nil + } + if value < 1 || value > 20 { + ctx.EffectiveMessage.Reply( + bot, + "media group limit must be between 1 and 20", + nil, + ) + return nil + } + settings, err := database.GetGroupSettings(chatID) + if err != nil { + return err + } + settings.MediaGroupLimit = value + err = database.UpdateGroupSettings(chatID, settings) + if err != nil { + return err + } + ctx.EffectiveMessage.Reply( + bot, + fmt.Sprintf("media group limit set to %d", value), + nil, + ) + return nil +} diff --git a/bot/handlers/start.go b/bot/handlers/start.go new file mode 100644 index 0000000..bbe9de0 --- /dev/null +++ b/bot/handlers/start.go @@ -0,0 +1,88 @@ +package handlers + +import ( + "fmt" + "os" + + "github.com/PaulSonOfLars/gotgbot/v2" + "github.com/PaulSonOfLars/gotgbot/v2/ext" +) + +var startMessage = "govd is an open-source telegram bot " + + "that allows you to download medias from " + + "various platforms. the project born after " + + "the discontinuation of an " + + "highly popular bot, known as UVD." + +func getStartKeyboard(bot *gotgbot.Bot) gotgbot.InlineKeyboardMarkup { + return gotgbot.InlineKeyboardMarkup{ + InlineKeyboard: [][]gotgbot.InlineKeyboardButton{ + { + { + Text: "add to group", + Url: fmt.Sprintf( + "https://t.me/%s?startgroup=true", + bot.Username, + ), + }, + }, + { + { + Text: "usage", + CallbackData: "help", + }, + { + Text: "stats", + CallbackData: "stats", + }, + }, + { + { + Text: "extractors", + CallbackData: "extractors", + }, + { + Text: "support", + Url: "https://t.me/govdsupport", + }, + }, + { + { + Text: "instances", + CallbackData: "instances", + }, + { + Text: "github", + Url: os.Getenv("REPO_URL"), + }, + }, + }, + } +} + +func StartHandler(bot *gotgbot.Bot, ctx *ext.Context) error { + if ctx.EffectiveMessage.Chat.Type != "private" { + return nil + } + keyboard := getStartKeyboard(bot) + if ctx.Update.Message != nil { + ctx.EffectiveMessage.Reply( + bot, + startMessage, + &gotgbot.SendMessageOpts{ + ReplyMarkup: &keyboard, + }, + ) + } else if ctx.Update.CallbackQuery != nil { + ctx.CallbackQuery.Answer(bot, nil) + ctx.EffectiveMessage.EditText( + bot, + startMessage, + &gotgbot.EditMessageTextOpts{ + ReplyMarkup: keyboard, + }, + ) + + } + return nil +} diff --git a/bot/handlers/stats.go b/bot/handlers/stats.go new file mode 100644 index 0000000..24abfe8 --- /dev/null +++ b/bot/handlers/stats.go @@ -0,0 +1,89 @@ +package handlers + +import ( + "fmt" + "govd/database" + "time" + + "github.com/PaulSonOfLars/gotgbot/v2" + "github.com/PaulSonOfLars/gotgbot/v2/ext" +) + +type Stats struct { + TotalUsers int64 + TotalGroups int64 + TotalDailyUsers int64 + TotalMedia int64 + UpdatedAt time.Time +} + +var lastSavedStats *Stats + +var statsMessage = "users: %d\nusers today: %d\ngroups: %d\ndownloads: %d\n\nupdates every 10 minutes" + +func StatsHandler(bot *gotgbot.Bot, ctx *ext.Context) error { + if ctx.EffectiveMessage.Chat.Type != "private" { + return nil + } + ctx.CallbackQuery.Answer(bot, nil) + stats := GetStats() + ctx.EffectiveMessage.EditText( + bot, + fmt.Sprintf( + statsMessage, + stats.TotalUsers, + stats.TotalDailyUsers, + stats.TotalGroups, + stats.TotalMedia, + ), + &gotgbot.EditMessageTextOpts{ + ReplyMarkup: gotgbot.InlineKeyboardMarkup{ + InlineKeyboard: [][]gotgbot.InlineKeyboardButton{ + { + { + Text: "back", + CallbackData: "start", + }, + }, + }, + }, + }, + ) + return nil +} + +func UpdateStats() { + totalUsers, err := database.GetUsersCount() + if err != nil { + return + } + totalGroups, err := database.GetGroupsCount() + if err != nil { + return + } + totalDailyUsers, err := database.GetDailyUserCount() + if err != nil { + return + } + totalMedia, err := database.GetMediaCount() + if err != nil { + return + } + lastSavedStats = &Stats{ + TotalUsers: totalUsers, + TotalGroups: totalGroups, + TotalDailyUsers: totalDailyUsers, + TotalMedia: totalMedia, + UpdatedAt: time.Now(), + } +} + +func GetStats() *Stats { + if lastSavedStats == nil { + UpdateStats() + } + if lastSavedStats.UpdatedAt.Add(10 * time.Minute).Before(time.Now()) { + UpdateStats() + } + return lastSavedStats +} diff --git a/bot/handlers/url.go b/bot/handlers/url.go new file mode 100644 index 0000000..775fb0b --- /dev/null +++ b/bot/handlers/url.go @@ -0,0 +1,65 @@ +package handlers + +import ( + "govd/bot/core" + "govd/database" + extractors "govd/ext" + + "github.com/PaulSonOfLars/gotgbot/v2" + "github.com/PaulSonOfLars/gotgbot/v2/ext" + "github.com/PaulSonOfLars/gotgbot/v2/ext/handlers/filters/message" +) + +func URLHandler(bot *gotgbot.Bot, ctx *ext.Context) error { + messageURL := getMessageURL(ctx.EffectiveMessage) + if messageURL == "" { + return nil + } + dlCtx, err := extractors.CtxByURL(messageURL) + if err != nil { + core.HandleErrorMessage( + bot, ctx, err) + return nil + } + if dlCtx == nil || dlCtx.Extractor == nil { + return nil + } + userID := ctx.EffectiveMessage.From.Id + if ctx.EffectiveMessage.Chat.Type != "private" { + settings, err := database.GetGroupSettings(ctx.EffectiveMessage.Chat.Id) + if err != nil { + return err + } + dlCtx.GroupSettings = settings + } + if userID != 1087968824 { + // groupAnonymousBot + _, err = database.GetUser(userID) + if err != nil { + return err + } + } + err = core.HandleDownloadRequest(bot, ctx, dlCtx) + if err != nil { + core.HandleErrorMessage( + bot, ctx, err) + } + return nil +} + +func URLFilter(msg *gotgbot.Message) bool { + return message.Text(msg) && !message.Command(msg) && containsURL(msg) +} + +func containsURL(msg *gotgbot.Message) bool { + return message.Entity("url")(msg) +} + +func getMessageURL(msg *gotgbot.Message) string { + for _, entity := range msg.Entities { + if entity.Type == "url" { + return msg.Text[entity.Offset : entity.Offset+entity.Length] + } + } + return "" +} diff --git a/bot/main.go b/bot/main.go new file mode 100644 index 0000000..6ae9a35 --- /dev/null +++ b/bot/main.go @@ -0,0 +1,118 @@ +package bot + +import ( + "log" + "os" + "time" + + botHandlers "govd/bot/handlers" + + "github.com/PaulSonOfLars/gotgbot/v2" + "github.com/PaulSonOfLars/gotgbot/v2/ext" + "github.com/PaulSonOfLars/gotgbot/v2/ext/handlers" + "github.com/PaulSonOfLars/gotgbot/v2/ext/handlers/filters/callbackquery" + "github.com/PaulSonOfLars/gotgbot/v2/ext/handlers/filters/choseninlineresult" + "github.com/PaulSonOfLars/gotgbot/v2/ext/handlers/filters/inlinequery" +) + +var AllowedUpdates = []string{ + "message", + "callback_query", + "inline_query", + "chosen_inline_result", +} + +func Start() { + token := os.Getenv("BOT_TOKEN") + if token == "" { + log.Fatalf("BOT_TOKEN is not provided") + } + b, err := gotgbot.NewBot(token, &gotgbot.BotOpts{ + BotClient: NewBotClient(), + }) + if err != nil { + log.Fatalf("failed to create bot: %v", err) + } + dispatcher := ext.NewDispatcher(&ext.DispatcherOpts{ + Error: func(b *gotgbot.Bot, ctx *ext.Context, err error) ext.DispatcherAction { + log.Println("an error occurred while handling update:", err.Error()) + return ext.DispatcherActionNoop + }, + MaxRoutines: ext.DefaultMaxRoutines, + }) + updater := ext.NewUpdater(dispatcher, nil) + registerHandlers(dispatcher) + err = updater.StartPolling(b, &ext.PollingOpts{ + DropPendingUpdates: true, + GetUpdatesOpts: &gotgbot.GetUpdatesOpts{ + Timeout: 9 * 60, + RequestOpts: &gotgbot.RequestOpts{ + Timeout: time.Minute * 10, + }, + AllowedUpdates: AllowedUpdates, + }, + }) + if err != nil { + log.Fatalf("failed to start polling: %v", err) + } + log.Printf("bot started on: %s\n", b.User.Username) +} + +func registerHandlers(dispatcher *ext.Dispatcher) { + dispatcher.AddHandler(handlers.NewMessage( + botHandlers.URLFilter, + botHandlers.URLHandler, + )) + dispatcher.AddHandler(handlers.NewCommand( + "start", + botHandlers.StartHandler, + )) + dispatcher.AddHandler(handlers.NewCallback( + callbackquery.Equal("start"), + botHandlers.StartHandler, + )) + dispatcher.AddHandler(handlers.NewCallback( + callbackquery.Equal("help"), + botHandlers.HelpHandler, + )) + dispatcher.AddHandler(handlers.NewCommand( + "settings", + botHandlers.SettingsHandler, + )) + dispatcher.AddHandler(handlers.NewCommand( + "captions", + botHandlers.CaptionsHandler, + )) + dispatcher.AddHandler(handlers.NewCommand( + "nsfw", + botHandlers.NSFWHandler, + )) + dispatcher.AddHandler(handlers.NewCommand( + "limit", + botHandlers.MediaGroupLimitHandler, + )) + dispatcher.AddHandler(handlers.NewCallback( + callbackquery.Equal("stats"), + botHandlers.StatsHandler, + )) + dispatcher.AddHandler(handlers.NewCallback( + callbackquery.Equal("extractors"), + botHandlers.ExtractorsHandler, + )) + dispatcher.AddHandler(handlers.NewCallback( + callbackquery.Equal("instances"), + botHandlers.InstancesHandler, + )) + dispatcher.AddHandler(handlers.NewInlineQuery( + inlinequery.All, + botHandlers.InlineDownloadHandler, + )) + dispatcher.AddHandler(handlers.NewChosenInlineResult( + choseninlineresult.All, + botHandlers.InlineDownloadResultHandler, + )) + dispatcher.AddHandler(handlers.NewCallback( + callbackquery.Equal("inline:loading"), + botHandlers.InlineLoadingHandler, + )) +} diff --git a/bot/middleware.go b/bot/middleware.go new file mode 100644 index 0000000..ce4ea5d --- /dev/null +++ b/bot/middleware.go @@ -0,0 +1,56 @@ +package bot + +import ( + "context" + "encoding/json" + "log" + "net/http" + "os" + "strings" + "time" + + "github.com/PaulSonOfLars/gotgbot/v2" +) + +type BotClient struct { + gotgbot.BotClient +} + +func (b BotClient) RequestWithContext( + ctx context.Context, + token string, + method string, + params map[string]string, + data map[string]gotgbot.FileReader, + opts *gotgbot.RequestOpts, +) (json.RawMessage, error) { + if strings.HasPrefix(method, "send") || method == "copyMessage" { + params["allow_sending_without_reply"] = "true" + } + if strings.HasPrefix(method, "send") || strings.HasPrefix(method, "edit") { + params["parse_mode"] = "HTML" + } + val, err := b.BotClient.RequestWithContext(ctx, token, method, params, data, opts) + if err != nil { + return nil, err + } + return val, err +} + +func NewBotClient() BotClient { + botAPIURL := os.Getenv("BOT_API_URL") + if botAPIURL == "" { + log.Println("BOT_API_URL is not provided, using default") + botAPIURL = gotgbot.DefaultAPIURL + } + return BotClient{ + BotClient: &gotgbot.BaseBotClient{ + Client: http.Client{}, + UseTestEnvironment: false, + DefaultRequestOpts: &gotgbot.RequestOpts{ + Timeout: 10 * time.Minute, + APIURL: botAPIURL, + }, + }, + } +} diff --git a/build.sh b/build.sh new file mode 100644 index 0000000..76c8294 --- /dev/null +++ b/build.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +COMMIT_HASH=$(git rev-parse --short HEAD) +BRANCH_NAME=$(git branch --show-current) + +PACKAGE_PATH="govd/bot/handlers" + +echo "Building with commit hash: ${COMMIT_HASH}" +echo "Branch name: ${BRANCH_NAME}" +go build -ldflags="-X '${PACKAGE_PATH}.buildHash=${COMMIT_HASH}' -X '${PACKAGE_PATH}.branchName=${BRANCH_NAME}'" + +if [ $? -eq 0 ]; then + echo "Build completed successfully" +else + echo "Build failed" + exit 1 +fi \ No newline at end of file diff --git a/cookies/.gitignore b/cookies/.gitignore new file mode 100644 index 0000000..314f02b --- /dev/null +++ b/cookies/.gitignore @@ -0,0 +1 @@ +*.txt \ No newline at end of file diff --git a/database/main.go b/database/main.go new file mode 100644 index 0000000..dab7caa --- /dev/null +++ b/database/main.go @@ -0,0 +1,65 @@ +package database + +import ( + "govd/models" + + "fmt" + "log" + "os" + "time" + + "gorm.io/driver/mysql" + "gorm.io/gorm" + "gorm.io/gorm/logger" +) + +var DB *gorm.DB + +func Start() { + host := os.Getenv("DB_HOST") + port := os.Getenv("DB_PORT") + user := os.Getenv("DB_USER") + password := os.Getenv("DB_PASSWORD") + dbname := os.Getenv("DB_NAME") + + connectionString := fmt.Sprintf( + "%s:%s@tcp(%s:%s)/%s?charset=utf8mb4&parseTime=True", + user, password, host, port, dbname, + ) + db, err := gorm.Open(mysql.Open(connectionString), &gorm.Config{ + Logger: logger.Default.LogMode(logger.Silent), + NowFunc: func() time.Time { + utc, _ := time.LoadLocation("Europe/Rome") + return time.Now().In(utc) + }, + }) + if err != nil { + log.Fatalf("failed to connect to database: %v", err) + } + DB = db + sqlDB, err := DB.DB() + if err != nil { + log.Fatalf("failed to get database connection: %v", err) + } + err = sqlDB.Ping() + if err != nil { + log.Fatalf("failed to ping database: %v", err) + } + err = migrateDatabase() + if err != nil { + log.Fatalf("failed to migrate database: %v", err) + } +} + +func migrateDatabase() error { + err := DB.AutoMigrate( + &models.Media{}, + &models.MediaFormat{}, + &models.GroupSettings{}, + &models.User{}, + ) + if err != nil { + return err + } + return nil +} diff --git a/database/media.go b/database/media.go new file mode 100644 index 0000000..9553bbd --- /dev/null +++ b/database/media.go @@ -0,0 +1,60 @@ +package database + +import ( + "fmt" + + "govd/models" + + "gorm.io/gorm" +) + +func GetDefaultMedias( + extractorCodeName string, + contentID string, +) ([]*models.Media, error) { + var mediaList []*models.Media + + err := DB. + Where(&models.Media{ + ExtractorCodeName: extractorCodeName, + ContentID: contentID, + }). + Preload("Format", "is_default = ?", true). + Find(&mediaList). + Error + + if err != nil { + return nil, fmt.Errorf("failed to get stored media list: %w", err) + } + + return mediaList, nil +} + +func StoreMedia( + extractorCodeName string, + contentID string, + media *models.Media, +) error { + return DB.Transaction(func(tx *gorm.DB) error { + if err := tx.Where(models.Media{ + ExtractorCodeName: extractorCodeName, + ContentID: contentID, + }).FirstOrCreate(&media).Error; err != nil { + return fmt.Errorf("failed to get or create media: %w", err) + } + if media.Format != nil { + format := media.Format + format.MediaID = media.ID + + if err := tx.Where(models.MediaFormat{ + MediaID: format.MediaID, + FormatID: format.FormatID, + Type: format.Type, + }).FirstOrCreate(format).Error; err != nil { + return fmt.Errorf("failed to get or create format: %w", err) + } + } + + return nil + }) +} diff --git a/database/settings.go b/database/settings.go new file mode 100644 index 0000000..1697079 --- /dev/null +++ b/database/settings.go @@ -0,0 +1,37 @@ +package database + +import ( + "govd/models" +) + +func GetGroupSettings( + chatID int64, +) (*models.GroupSettings, error) { + var groupSettings models.GroupSettings + err := DB. + Where(&models.GroupSettings{ + ChatID: chatID, + }). + FirstOrCreate(&groupSettings). + Error + if err != nil { + return nil, err + } + return &groupSettings, nil +} + +func UpdateGroupSettings( + chatID int64, + settings *models.GroupSettings, +) error { + err := DB. + Where(&models.GroupSettings{ + ChatID: chatID, + }). + Updates(settings). + Error + if err != nil { + return err + } + return nil +} diff --git a/database/stats.go b/database/stats.go new file mode 100644 index 0000000..d1da140 --- /dev/null +++ b/database/stats.go @@ -0,0 +1,52 @@ +package database + +import "govd/models" + +func GetMediaCount() (int64, error) { + var count int64 + err := DB. + Model(&models.Media{}). + Count(&count). + Error + if err != nil { + return 0, err + } + return count, nil +} + +func GetUsersCount() (int64, error) { + var count int64 + err := DB. + Model(&models.User{}). + Count(&count). + Error + if err != nil { + return 0, err + } + return count, nil +} + +func GetGroupsCount() (int64, error) { + var count int64 + err := DB. + Model(&models.GroupSettings{}). + Count(&count). + Error + if err != nil { + return 0, err + } + return count, nil +} + +func GetDailyUserCount() (int64, error) { + var count int64 + err := DB. + Model(&models.User{}). + Where("DATE(last_used) = DATE(NOW())"). + Count(&count). + Error + if err != nil { + return 0, err + } + return count, nil +} diff --git a/database/user.go b/database/user.go new file mode 100644 index 0000000..84e8cd9 --- /dev/null +++ b/database/user.go @@ -0,0 +1,38 @@ +package database + +import "govd/models" + +func GetUser( + userID int64, +) (*models.User, error) { + var user models.User + err := DB. + Where(&models.User{ + UserID: userID, + }). + FirstOrCreate(&user). + Error + if err != nil { + return nil, err + } + go UpdateUserStatus(userID) + return &user, nil +} + +func UpdateUserStatus( + userID int64, +) error { + err := DB. + Model(&models.User{}). + Where(&models.User{ + UserID: userID, + }). + Updates(&models.User{ + LastUsed: DB.NowFunc(), + }). + Error + if err != nil { + return err + } + return nil +} diff --git a/enums/chat_type.go b/enums/chat_type.go new file mode 100644 index 0000000..7865731 --- /dev/null +++ b/enums/chat_type.go @@ -0,0 +1,8 @@ +package enums + +type ChatType string + +const ( + ChatTypePrivate ChatType = "private" + ChatTypeGroup ChatType = "group" +) diff --git a/enums/extractor_category.go b/enums/extractor_category.go new file mode 100644 index 0000000..cb16f75 --- /dev/null +++ b/enums/extractor_category.go @@ -0,0 +1,9 @@ +package enums + +type ExtractorCategory string + +const ( + ExtractorCategorySocial ExtractorCategory = "social" + ExtractorCategoryStreaming ExtractorCategory = "streaming" + ExtractorCategoryMusic ExtractorCategory = "music" +) diff --git a/enums/extractor_type.go b/enums/extractor_type.go new file mode 100644 index 0000000..54b0600 --- /dev/null +++ b/enums/extractor_type.go @@ -0,0 +1,7 @@ +package enums + +type ExtractorType string + +const ( + ExtractorTypeSingle ExtractorType = "single" +) diff --git a/enums/media_codec.go b/enums/media_codec.go new file mode 100644 index 0000000..0b7173a --- /dev/null +++ b/enums/media_codec.go @@ -0,0 +1,17 @@ +package enums + +type MediaCodec string + +const ( + MediaCodecAVC MediaCodec = "avc" + MediaCodecHEVC MediaCodec = "hevc" + MediaCodecVP9 MediaCodec = "vp9" + MediaCodecVP8 MediaCodec = "vp8" + MediaCodecAV1 MediaCodec = "av1" + MediaCodecAAC MediaCodec = "aac" + MediaCodecOpus MediaCodec = "opus" + MediaCodecVorbis MediaCodec = "vorbis" + MediaCodecMP3 MediaCodec = "mp3" + MediaCodecFLAC MediaCodec = "flac" + MediaCodecWebP MediaCodec = "webp" +) diff --git a/enums/media_type.go b/enums/media_type.go new file mode 100644 index 0000000..3f1be0a --- /dev/null +++ b/enums/media_type.go @@ -0,0 +1,9 @@ +package enums + +type MediaType string + +const ( + MediaTypeVideo MediaType = "video" + MediaTypeAudio MediaType = "audio" + MediaTypePhoto MediaType = "photo" +) diff --git a/ext/instagram/main.go b/ext/instagram/main.go new file mode 100644 index 0000000..9107046 --- /dev/null +++ b/ext/instagram/main.go @@ -0,0 +1,169 @@ +package instagram + +import ( + "crypto/tls" + "fmt" + "govd/enums" + "govd/models" + "govd/util" + "io" + "net/http" + "regexp" + + "github.com/quic-go/quic-go" + "github.com/quic-go/quic-go/http3" +) + +// as a public service, we can't use the official API +// so we use igram.world API, a third-party service +// that provides a similar functionality +// feel free to open PR, if you want to +// add support for the official Instagram API + +const ( + apiHostname = "api.igram.world" + apiKey = "aaeaf2805cea6abef3f9d2b6a666fce62fd9d612a43ab772bb50ce81455112e0" + apiTimestamp = "1742201548873" + + // todo: Implement a proper way + // to get the API key and timestamp +) + +var HTTPClient = &http.Client{ + Transport: &http3.Transport{ + TLSClientConfig: &tls.Config{ + InsecureSkipVerify: true, + }, + QUICConfig: &quic.Config{ + MaxIncomingStreams: -1, + EnableDatagrams: true, + }, + }, +} + +var Extractor = &models.Extractor{ + Name: "Instagram", + CodeName: "instagram", + Type: enums.ExtractorTypeSingle, + Category: enums.ExtractorCategorySocial, + URLPattern: regexp.MustCompile(`https:\/\/www\.instagram\.com\/(reel|p|tv)\/(?P[a-zA-Z0-9_-]+)`), + IsRedirect: false, + + Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) { + mediaList, err := MediaListFromAPI(ctx, false) + return &models.ExtractorResponse{ + MediaList: mediaList, + }, err + }, +} + +var StoriesExtractor = &models.Extractor{ + Name: "Instagram Stories", + CodeName: "instagram:stories", + Type: enums.ExtractorTypeSingle, + Category: enums.ExtractorCategorySocial, + URLPattern: regexp.MustCompile(`https:\/\/www\.instagram\.com\/stories\/[a-zA-Z0-9._]+\/(?P\d+)`), + IsRedirect: false, + + Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) { + mediaList, err := MediaListFromAPI(ctx, true) + return &models.ExtractorResponse{ + MediaList: mediaList, + }, err + }, +} + +func MediaListFromAPI( + ctx *models.DownloadContext, + stories bool, +) ([]*models.Media, error) { + var mediaList []*models.Media + postURL := ctx.MatchedContentURL + details, err := GetVideoAPI(postURL) + if err != nil { + return nil, fmt.Errorf("failed to get post: %w", err) + } + var caption string + if !stories { + caption, err = GetPostCaption(postURL) + if err != nil { + return nil, fmt.Errorf("failed to get caption: %w", err) + } + } + for _, item := range details.Items { + media := ctx.Extractor.NewMedia( + ctx.MatchedContentID, + ctx.MatchedContentURL, + ) + media.SetCaption(caption) + urlObj := item.URL[0] + contentURL, err := GetCDNURL(urlObj.URL) + if err != nil { + return nil, err + } + thumbnailURL, err := GetCDNURL(item.Thumb) + if err != nil { + return nil, err + } + fileExt := urlObj.Ext + formatID := urlObj.Type + switch fileExt { + case "mp4": + media.AddFormat(&models.MediaFormat{ + Type: enums.MediaTypeVideo, + FormatID: formatID, + URL: []string{contentURL}, + VideoCodec: enums.MediaCodecAVC, + AudioCodec: enums.MediaCodecAAC, + Thumbnail: []string{thumbnailURL}, + }, + ) + case "jpg", "webp", "heic", "jpeg": + media.AddFormat(&models.MediaFormat{ + Type: enums.MediaTypePhoto, + FormatID: formatID, + URL: []string{contentURL}, + }) + default: + return nil, fmt.Errorf("unknown format: %s", fileExt) + } + mediaList = append(mediaList, media) + } + + return mediaList, nil +} + +func GetVideoAPI(contentURL string) (*IGramResponse, error) { + apiURL := fmt.Sprintf( + "https://%s/api/convert", + apiHostname, + ) + payload, err := BuildSignedPayload(contentURL) + if err != nil { + return nil, fmt.Errorf("failed to build signed payload: %w", err) + } + req, err := http.NewRequest("POST", apiURL, payload) + if err != nil { + return nil, fmt.Errorf("failed to create request: %w", err) + } + req.Header.Set("Content-Type", "application/json") + req.Header.Set("User-Agent", util.ChromeUA) + + resp, err := HTTPClient.Do(req) + if err != nil { + return nil, fmt.Errorf("failed to send request: %w", err) + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("failed to get response: %s", resp.Status) + } + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read response body: %w", err) + } + response, err := ParseIGramResponse(body) + if err != nil { + return nil, fmt.Errorf("failed to parse response: %w", err) + } + return response, nil +} diff --git a/ext/instagram/models.go b/ext/instagram/models.go new file mode 100644 index 0000000..7e12bd6 --- /dev/null +++ b/ext/instagram/models.go @@ -0,0 +1,19 @@ +package instagram + +type IGramResponse struct { + Items []*IGramMedia `json:"items"` +} + +type IGramMedia struct { + URL []*MediaURL `json:"url"` + Thumb string `json:"thumb"` + Hosting string `json:"hosting"` + Timestamp int `json:"timestamp"` +} + +type MediaURL struct { + URL string `json:"url"` + Name string `json:"name"` + Type string `json:"type"` + Ext string `json:"ext"` +} diff --git a/ext/instagram/util.go b/ext/instagram/util.go new file mode 100644 index 0000000..21cd9a9 --- /dev/null +++ b/ext/instagram/util.go @@ -0,0 +1,139 @@ +package instagram + +import ( + "crypto/sha256" + "encoding/json" + "fmt" + "govd/util" + "html" + "io" + "net/http" + "net/url" + "regexp" + "strings" + "time" +) + +var captionPattern = regexp.MustCompile( + `(?s)\w+)`), + IsRedirect: true, + + Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) { + shortURL := fmt.Sprintf(shortenerAPIFormat, ctx.MatchedContentID) + location, err := util.GetLocationURL(shortURL, "") + if err != nil { + return nil, fmt.Errorf("failed to get real url: %w", err) + } + return &models.ExtractorResponse{ + URL: location, + }, nil + }, +} + +var Extractor = &models.Extractor{ + Name: "Pinterest", + CodeName: "pinterest", + Type: enums.ExtractorTypeSingle, + Category: enums.ExtractorCategorySocial, + URLPattern: regexp.MustCompile(`https?://(\w+\.)?pinterest[\.\w]+/pin/(?P\d+)`), + + Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) { + media, err := ExtractPinMedia(ctx) + if err != nil { + return nil, err + } + return &models.ExtractorResponse{ + MediaList: media, + }, nil + }, +} + +func ExtractPinMedia(ctx *models.DownloadContext) ([]*models.Media, error) { + pinID := ctx.MatchedContentID + contentURL := ctx.MatchedContentURL + + pinData, err := GetPinData(pinID) + if err != nil { + return nil, err + } + + media := ctx.Extractor.NewMedia(pinID, contentURL) + media.SetCaption(pinData.Title) + + if pinData.Videos != nil && pinData.Videos.VideoList != nil { + formats, err := ParseVideoObject(pinData.Videos) + if err != nil { + return nil, err + } + for _, format := range formats { + media.AddFormat(format) + } + return []*models.Media{media}, nil + } + + if pinData.StoryPinData != nil && len(pinData.StoryPinData.Pages) > 0 { + for _, page := range pinData.StoryPinData.Pages { + for _, block := range page.Blocks { + if block.BlockType == 3 && block.Video != nil { // blockType 3 = Video + formats, err := ParseVideoObject(block.Video) + if err != nil { + return nil, err + } + for _, format := range formats { + media.AddFormat(format) + } + return []*models.Media{media}, nil + } + } + } + } + + if pinData.Images != nil && pinData.Images.Orig != nil { + imageURL := pinData.Images.Orig.URL + media.AddFormat(&models.MediaFormat{ + FormatID: "photo", + Type: enums.MediaTypePhoto, + URL: []string{imageURL}, + }) + return []*models.Media{media}, nil + } else if pinData.StoryPinData != nil && len(pinData.StoryPinData.Pages) > 0 { + for _, page := range pinData.StoryPinData.Pages { + if page.Image != nil && page.Image.Images.Originals != nil { + media.AddFormat(&models.MediaFormat{ + FormatID: "photo", + Type: enums.MediaTypePhoto, + URL: []string{page.Image.Images.Originals.URL}, + }) + return []*models.Media{media}, nil + } + } + } + + if pinData.Embed != nil && pinData.Embed.Type == "gif" { + media.AddFormat(&models.MediaFormat{ + FormatID: "gif", + Type: enums.MediaTypeVideo, + VideoCodec: enums.MediaCodecAVC, + URL: []string{pinData.Embed.Src}, + }) + return []*models.Media{media}, nil + } + + return nil, fmt.Errorf("no media found for pin ID: %s", pinID) +} + +func GetPinData(pinID string) (*PinData, error) { + params := BuildPinRequestParams(pinID) + + req, err := http.NewRequest("GET", pinResourceEndpoint, nil) + if err != nil { + return nil, fmt.Errorf("failed to create request: %w", err) + } + q := req.URL.Query() + for key, value := range params { + q.Add(key, value) + } + req.URL.RawQuery = q.Encode() + req.Header.Set("User-Agent", util.ChromeUA) + + // fix 403 error + req.Header.Set("X-Pinterest-PWS-Handler", "www/[username].js") + + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + return nil, fmt.Errorf("failed to send request: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("bad response: %s", resp.Status) + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read response body: %w", err) + } + + var pinResponse PinResponse + err = json.Unmarshal(body, &pinResponse) + if err != nil { + return nil, fmt.Errorf("failed to parse response: %w", err) + } + + return &pinResponse.ResourceResponse.Data, nil +} diff --git a/ext/pinterest/models.go b/ext/pinterest/models.go new file mode 100644 index 0000000..68a918f --- /dev/null +++ b/ext/pinterest/models.go @@ -0,0 +1,62 @@ +package pinterest + +type PinResponse struct { + ResourceResponse struct { + Data PinData `json:"data"` + } `json:"resource_response"` +} + +type PinData struct { + ID string `json:"id"` + Title string `json:"title"` + Description string `json:"description"` + Images *Images `json:"images,omitempty"` + Videos *Videos `json:"videos,omitempty"` + StoryPinData *StoryPin `json:"story_pin_data,omitempty"` + Embed *Embed `json:"embed,omitempty"` +} + +type Images struct { + Orig *ImageObject `json:"orig"` +} + +type ImageObject struct { + URL string `json:"url"` + Width int `json:"width"` + Height int `json:"height"` +} + +type Videos struct { + VideoList map[string]*VideoObject `json:"video_list"` +} + +type VideoObject struct { + URL string `json:"url"` + Width int64 `json:"width"` + Height int64 `json:"height"` + Duration int64 `json:"duration"` + Thumbnail string `json:"thumbnail"` +} + +type StoryPin struct { + Pages []Page `json:"pages"` +} + +type Page struct { + Blocks []Block `json:"blocks"` + Image *struct { + Images struct { + Originals *ImageObject `json:"originals"` + } `json:"images"` + } `json:"image,omitempty"` +} + +type Block struct { + BlockType int `json:"block_type"` + Video *Videos `json:"video,omitempty"` +} + +type Embed struct { + Type string `json:"type"` + Src string `json:"src"` +} diff --git a/ext/pinterest/util.go b/ext/pinterest/util.go new file mode 100644 index 0000000..cff5aae --- /dev/null +++ b/ext/pinterest/util.go @@ -0,0 +1,55 @@ +package pinterest + +import ( + "encoding/json" + "fmt" + "govd/enums" + "govd/models" + "govd/util/parser" +) + +func ParseVideoObject(videoObj *Videos) ([]*models.MediaFormat, error) { + var formats []*models.MediaFormat + + for key, video := range videoObj.VideoList { + if key != "HLS" { + formats = append(formats, &models.MediaFormat{ + FormatID: key, + URL: []string{video.URL}, + Type: enums.MediaTypeVideo, + VideoCodec: enums.MediaCodecAVC, + AudioCodec: enums.MediaCodecAAC, + Width: video.Width, + Height: video.Height, + Duration: video.Duration / 1000, + Thumbnail: []string{video.Thumbnail}, + }) + } else { + hlsFormats, err := parser.ParseM3U8FromURL(video.URL) + if err != nil { + return nil, fmt.Errorf("failed to extract hls formats: %w", err) + } + for _, hlsFormat := range hlsFormats { + hlsFormat.Duration = video.Duration / 1000 + hlsFormat.Thumbnail = []string{video.Thumbnail} + formats = append(formats, hlsFormat) + } + } + } + + return formats, nil +} + +func BuildPinRequestParams(pinID string) map[string]string { + options := map[string]interface{}{ + "options": map[string]interface{}{ + "field_set_key": "unauth_react_main_pin", + "id": pinID, + }, + } + + jsonData, _ := json.Marshal(options) + return map[string]string{ + "data": string(jsonData), + } +} diff --git a/ext/reddit/main.go b/ext/reddit/main.go new file mode 100644 index 0000000..0500e97 --- /dev/null +++ b/ext/reddit/main.go @@ -0,0 +1,267 @@ +package reddit + +import ( + "encoding/json" + "fmt" + "io" + "net/http" + "regexp" + + "govd/enums" + "govd/models" + "govd/util" +) + +var HTTPClient = &http.Client{ + CheckRedirect: func(req *http.Request, via []*http.Request) error { + if len(via) >= 10 { + return fmt.Errorf("stopped after 10 redirects") + } + return nil + }, +} + +var ShortExtractor = &models.Extractor{ + Name: "Reddit (Short)", + CodeName: "reddit:short", + Type: enums.ExtractorTypeSingle, + Category: enums.ExtractorCategorySocial, + URLPattern: regexp.MustCompile(`https?://(?P(?:\w+\.)?reddit(?:media)?\.com)/(?P(?:(?:r|user)/[^/]+/)?s/(?P[^/?#&]+))`), + IsRedirect: true, + + Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) { + req, err := http.NewRequest("GET", ctx.MatchedContentURL, nil) + if err != nil { + return nil, fmt.Errorf("failed to create request: %w", err) + } + + req.Header.Set("User-Agent", util.ChromeUA) + cookies, err := util.ParseCookieFile("reddit.txt") + if err != nil { + return nil, fmt.Errorf("failed to get cookies: %w", err) + } + for _, cookie := range cookies { + req.AddCookie(cookie) + } + + res, err := HTTPClient.Do(req) + if err != nil { + return nil, fmt.Errorf("failed to send request: %w", err) + } + defer res.Body.Close() + + location := res.Request.URL.String() + + return &models.ExtractorResponse{ + URL: location, + }, nil + }, +} + +var Extractor = &models.Extractor{ + Name: "Reddit", + CodeName: "reddit", + Type: enums.ExtractorTypeSingle, + Category: enums.ExtractorCategorySocial, + URLPattern: regexp.MustCompile(`https?://(?P(?:\w+\.)?reddit(?:media)?\.com)/(?P(?:(?:r|user)/[^/]+/)?comments/(?P[^/?#&]+))`), + + Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) { + mediaList, err := MediaListFromAPI(ctx) + if err != nil { + return nil, fmt.Errorf("failed to get media: %w", err) + } + return &models.ExtractorResponse{ + MediaList: mediaList, + }, nil + }, +} + +func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) { + host := ctx.MatchedGroups["host"] + slug := ctx.MatchedGroups["slug"] + + contentID := ctx.MatchedContentID + contentURL := ctx.MatchedContentURL + + manifest, err := GetRedditData(host, slug) + if err != nil { + return nil, err + } + + if len(manifest) == 0 || len(manifest[0].Data.Children) == 0 { + return nil, fmt.Errorf("no data found in response") + } + + data := manifest[0].Data.Children[0].Data + title := data.Title + isNsfw := data.Over18 + var mediaList []*models.Media + + if !data.IsVideo { + // check for single photo + if data.Preview != nil && len(data.Preview.Images) > 0 { + media := ctx.Extractor.NewMedia(contentID, contentURL) + media.SetCaption(title) + if isNsfw { + media.NSFW = true + } + + image := data.Preview.Images[0] + + // check for video preview (GIF) + if data.Preview.RedditVideoPreview != nil { + formats, err := GetHLSFormats( + data.Preview.RedditVideoPreview.FallbackURL, + image.Source.URL, + data.Preview.RedditVideoPreview.Duration, + ) + if err != nil { + return nil, err + } + + for _, format := range formats { + media.AddFormat(format) + } + + mediaList = append(mediaList, media) + return mediaList, nil + } + + // check for MP4 variant (animated GIF) + if image.Variants.MP4 != nil { + media.AddFormat(&models.MediaFormat{ + FormatID: "gif", + Type: enums.MediaTypeVideo, + VideoCodec: enums.MediaCodecAVC, + AudioCodec: enums.MediaCodecAAC, + URL: []string{util.FixURL(image.Variants.MP4.Source.URL)}, + Thumbnail: []string{util.FixURL(image.Source.URL)}, + }) + + mediaList = append(mediaList, media) + return mediaList, nil + } + + // regular photo + media.AddFormat(&models.MediaFormat{ + FormatID: "photo", + Type: enums.MediaTypePhoto, + URL: []string{util.FixURL(image.Source.URL)}, + }) + + mediaList = append(mediaList, media) + return mediaList, nil + } + + // check for gallery/collection + if len(data.MediaMetadata) > 0 { + for key, obj := range data.MediaMetadata { + if obj.E == "Image" { + media := ctx.Extractor.NewMedia(key, contentURL) + media.SetCaption(title) + if isNsfw { + media.NSFW = true + } + + media.AddFormat(&models.MediaFormat{ + FormatID: "photo", + Type: enums.MediaTypePhoto, + URL: []string{util.FixURL(obj.S.U)}, + }) + + mediaList = append(mediaList, media) + } + } + + return mediaList, nil + } + } else { + // video + media := ctx.Extractor.NewMedia(contentID, contentURL) + media.SetCaption(title) + if isNsfw { + media.NSFW = true + } + + var redditVideo *RedditVideo + + if data.Media != nil && data.Media.RedditVideo != nil { + redditVideo = data.Media.RedditVideo + } else if data.SecureMedia != nil && data.SecureMedia.RedditVideo != nil { + redditVideo = data.SecureMedia.RedditVideo + } + + if redditVideo != nil { + thumbnail := data.Thumbnail + + if (thumbnail == "nsfw" || thumbnail == "spoiler") && data.Preview != nil && len(data.Preview.Images) > 0 { + thumbnail = data.Preview.Images[0].Source.URL + } + + formats, err := GetHLSFormats( + redditVideo.FallbackURL, + thumbnail, + redditVideo.Duration, + ) + if err != nil { + return nil, err + } + + for _, format := range formats { + media.AddFormat(format) + } + + mediaList = append(mediaList, media) + return mediaList, nil + } + } + + return mediaList, nil +} + +func GetRedditData(host string, slug string) (RedditResponse, error) { + url := fmt.Sprintf("https://%s/%s/.json", host, slug) + + req, err := http.NewRequest("GET", url, nil) + if err != nil { + return nil, fmt.Errorf("failed to create request: %w", err) + } + + req.Header.Set("User-Agent", util.ChromeUA) + cookies, err := util.ParseCookieFile("reddit.txt") + if err != nil { + return nil, fmt.Errorf("failed to get cookies: %w", err) + } + for _, cookie := range cookies { + req.AddCookie(cookie) + } + + res, err := HTTPClient.Do(req) + if err != nil { + return nil, fmt.Errorf("failed to send request: %w", err) + } + defer res.Body.Close() + + if res.StatusCode != http.StatusOK { + // try with alternative domain + altHost := "old.reddit.com" + if host == "old.reddit.com" { + altHost = "www.reddit.com" + } + + return GetRedditData(altHost, slug) + } + + body, err := io.ReadAll(res.Body) + if err != nil { + return nil, fmt.Errorf("failed to read response body: %w", err) + } + + var response RedditResponse + err = json.Unmarshal(body, &response) + if err != nil { + return nil, fmt.Errorf("failed to parse response: %w", err) + } + + return response, nil +} diff --git a/ext/reddit/models.go b/ext/reddit/models.go new file mode 100644 index 0000000..b074ce0 --- /dev/null +++ b/ext/reddit/models.go @@ -0,0 +1,74 @@ +package reddit + +type RedditResponse []struct { + Data struct { + Children []struct { + Data PostData `json:"data"` + } `json:"children"` + } `json:"data"` +} + +type PostData struct { + ID string `json:"id"` + Title string `json:"title"` + IsVideo bool `json:"is_video"` + Thumbnail string `json:"thumbnail"` + Media *Media `json:"media"` + Preview *Preview `json:"preview"` + MediaMetadata map[string]MediaMetadata `json:"media_metadata"` + SecureMedia *Media `json:"secure_media"` + Over18 bool `json:"over_18"` +} + +type Media struct { + RedditVideo *RedditVideo `json:"reddit_video"` +} + +type RedditVideo struct { + FallbackURL string `json:"fallback_url"` + HLSURL string `json:"hls_url"` + DashURL string `json:"dash_url"` + Duration int64 `json:"duration"` + Height int64 `json:"height"` + Width int64 `json:"width"` + ScrubberMediaURL string `json:"scrubber_media_url"` +} + +type Preview struct { + Images []Image `json:"images"` + RedditVideoPreview *RedditVideoPreview `json:"reddit_video_preview"` +} + +type Image struct { + Source ImageSource `json:"source"` + Variants ImageVariants `json:"variants"` +} + +type ImageSource struct { + URL string `json:"url"` + Width int64 `json:"width"` + Height int64 `json:"height"` +} + +type ImageVariants struct { + MP4 *MP4Variant `json:"mp4"` +} + +type MP4Variant struct { + Source ImageSource `json:"source"` +} + +type RedditVideoPreview struct { + FallbackURL string `json:"fallback_url"` + Duration int64 `json:"duration"` +} + +type MediaMetadata struct { + Status string `json:"status"` + E string `json:"e"` + S struct { + U string `json:"u"` + X int64 `json:"x"` + Y int64 `json:"y"` + } `json:"s"` +} diff --git a/ext/reddit/util.go b/ext/reddit/util.go new file mode 100644 index 0000000..cc6d352 --- /dev/null +++ b/ext/reddit/util.go @@ -0,0 +1,39 @@ +package reddit + +import ( + "fmt" + "govd/models" + "govd/util" + "govd/util/parser" + "regexp" +) + +const ( + hlsURLFormat = "https://v.redd.it/%s/HLSPlaylist.m3u8" +) + +var videoURLPattern = regexp.MustCompile(`https?://v\.redd\.it/([^/]+)`) + +func GetHLSFormats(videoURL string, thumbnail string, duration int64) ([]*models.MediaFormat, error) { + matches := videoURLPattern.FindStringSubmatch(videoURL) + if len(matches) < 2 { + return nil, nil + } + + videoID := matches[1] + hlsURL := fmt.Sprintf(hlsURLFormat, videoID) + + formats, err := parser.ParseM3U8FromURL(hlsURL) + if err != nil { + return nil, err + } + + for _, format := range formats { + format.Duration = duration + if thumbnail != "" { + format.Thumbnail = []string{util.FixURL(thumbnail)} + } + } + + return formats, nil +} diff --git a/ext/tiktok/main.go b/ext/tiktok/main.go new file mode 100644 index 0000000..5045f5b --- /dev/null +++ b/ext/tiktok/main.go @@ -0,0 +1,184 @@ +package tiktok + +import ( + "crypto/tls" + "encoding/json" + "fmt" + "io" + "net/http" + "regexp" + + "github.com/quic-go/quic-go" + "github.com/quic-go/quic-go/http3" + + "govd/enums" + "govd/models" + "govd/util" +) + +const ( + apiHostname = "api16-normal-c-useast1a.tiktokv.com" + installationID = "7127307272354596614" + appName = "musical_ly" + appID = "1233" + appVersion = "37.1.4" + manifestAppVersion = "2023508030" + packageID = "com.zhiliaoapp.musically/" + manifestAppVersion + appUserAgent = packageID + " (Linux; U; Android 13; en_US; Pixel 7; Build/TD1A.220804.031; Cronet/58.0.2991.0)" +) + +var HTTPClient = &http.Client{ + Transport: &http3.Transport{ + TLSClientConfig: &tls.Config{ + InsecureSkipVerify: true, + }, + QUICConfig: &quic.Config{ + MaxIncomingStreams: -1, + EnableDatagrams: true, + }, + }, +} + +var VMExtractor = &models.Extractor{ + Name: "TikTok VM", + CodeName: "tiktokvm", + Type: enums.ExtractorTypeSingle, + Category: enums.ExtractorCategorySocial, + URLPattern: regexp.MustCompile(`https:\/\/((?:vm|vt|www)\.)?(vx)?tiktok\.com\/(?:t\/)?(?P[a-zA-Z0-9]+)`), + IsRedirect: true, + + Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) { + location, err := util.GetLocationURL(ctx.MatchedContentURL, "") + if err != nil { + return nil, fmt.Errorf("failed to get url location: %w", err) + } + return &models.ExtractorResponse{ + URL: location, + }, nil + }, +} + +var Extractor = &models.Extractor{ + Name: "TikTok", + CodeName: "tiktok", + Type: enums.ExtractorTypeSingle, + Category: enums.ExtractorCategorySocial, + URLPattern: regexp.MustCompile(`https?:\/\/((www|m)\.)?(vx)?tiktok\.com\/((?:embed|@[\w\.-]+)\/)?(v(ideo)?|p(hoto)?)\/(?P[0-9]+)`), + + Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) { + mediaList, err := MediaListFromAPI(ctx) + if err != nil { + return nil, fmt.Errorf("failed to get media: %w", err) + } + return &models.ExtractorResponse{ + MediaList: mediaList, + }, nil + }, +} + +func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) { + var mediaList []*models.Media + + details, err := GetVideoAPI(ctx.MatchedContentID) + if err != nil { + return nil, fmt.Errorf("failed to get from api: %w", err) + } + caption := details.Desc + isImageSlide := details.ImagePostInfo != nil + if !isImageSlide { + media := ctx.Extractor.NewMedia( + ctx.MatchedContentID, + ctx.MatchedContentURL, + ) + media.SetCaption(caption) + video := details.Video + + // generic PlayAddr + if video.PlayAddr != nil { + format, err := ParsePlayAddr(video, video.PlayAddr) + if err != nil { + return nil, fmt.Errorf("failed to parse playaddr: %w", err) + } + media.AddFormat(format) + } + // hevc PlayAddr + if video.PlayAddrBytevc1 != nil { + format, err := ParsePlayAddr(video, video.PlayAddrBytevc1) + if err != nil { + return nil, fmt.Errorf("failed to parse playaddr: %w", err) + } + media.AddFormat(format) + } + // h264 PlayAddr + if video.PlayAddrH264 != nil { + format, err := ParsePlayAddr(video, video.PlayAddrH264) + if err != nil { + return nil, fmt.Errorf("failed to parse playaddr: %w", err) + } + media.AddFormat(format) + } + mediaList = append(mediaList, media) + } else { + images := details.ImagePostInfo.Images + for _, image := range images { + media := ctx.Extractor.NewMedia( + ctx.MatchedContentID, + ctx.MatchedContentURL, + ) + media.SetCaption(caption) + media.AddFormat(&models.MediaFormat{ + FormatID: "image", + Type: enums.MediaTypePhoto, + URL: image.DisplayImage.URLList, + }) + mediaList = append(mediaList, media) + } + } + return mediaList, nil +} + +func GetVideoAPI(awemeID string) (*AwemeDetails, error) { + apiURL := fmt.Sprintf( + "https://%s/aweme/v1/multi/aweme/detail/", + apiHostname, + ) + queryParams, err := BuildAPIQuery() + if err != nil { + return nil, fmt.Errorf("failed to build api query: %w", err) + } + postData := BuildPostData(awemeID) + + req, err := http.NewRequest( + http.MethodPost, + apiURL, + postData, + ) + if err != nil { + return nil, fmt.Errorf("failed to create request: %w", err) + } + req.URL.RawQuery = queryParams.Encode() + req.Header.Set("User-Agent", appUserAgent) + req.Header.Set("Accept", "application/json") + req.Header.Set("X-Argus", "") + + resp, err := HTTPClient.Do(req) + if err != nil { + return nil, fmt.Errorf("failed to send request: %w", err) + } + defer resp.Body.Close() + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read response body: %w", err) + } + + var data *Response + err = json.Unmarshal(body, &data) + if err != nil { + return nil, fmt.Errorf("failed to unmarshal response: %w", err) + } + videoData, err := FindVideoData(data, awemeID) + if err != nil { + return nil, fmt.Errorf("failed to find video data: %w", err) + } + return videoData, nil +} diff --git a/ext/tiktok/models.go b/ext/tiktok/models.go new file mode 100644 index 0000000..d04d36c --- /dev/null +++ b/ext/tiktok/models.go @@ -0,0 +1,65 @@ +package tiktok + +type Response struct { + AwemeDetails []AwemeDetails `json:"aweme_details"` + StatusCode int `json:"status_code"` + StatusMsg string `json:"status_msg"` +} + +type Cover struct { + Height int64 `json:"height"` + URI string `json:"uri"` + URLList []string `json:"url_list"` + URLPrefix any `json:"url_prefix"` + Width int64 `json:"width"` +} + +type PlayAddr struct { + DataSize int64 `json:"data_size"` + FileCs string `json:"file_cs"` + FileHash string `json:"file_hash"` + Height int64 `json:"height"` + URI string `json:"uri"` + URLKey string `json:"url_key"` + URLList []string `json:"url_list"` + Width int64 `json:"width"` +} + +type Image struct { + DisplayImage *DisplayImage `json:"display_image"` +} + +type DisplayImage struct { + Height int `json:"height"` + URI string `json:"uri"` + URLList []string `json:"url_list"` + URLPrefix any `json:"url_prefix"` + Width int `json:"width"` +} + +type ImagePostInfo struct { + Images []Image `json:"images"` + MusicVolume float64 `json:"music_volume"` + PostExtra string `json:"post_extra"` + Title string `json:"title"` +} + +type Video struct { + CdnURLExpired int64 `json:"cdn_url_expired"` + Cover Cover `json:"cover"` + Duration int64 `json:"duration"` + HasWatermark bool `json:"has_watermark"` + Height int64 `json:"height"` + PlayAddr *PlayAddr `json:"play_addr"` + PlayAddrBytevc1 *PlayAddr `json:"play_addr_bytevc1"` + PlayAddrH264 *PlayAddr `json:"play_addr_h264"` + Width int64 `json:"width"` +} + +type AwemeDetails struct { + AwemeID string `json:"aweme_id"` + AwemeType int `json:"aweme_type"` + Desc string `json:"desc"` + Video *Video `json:"video"` + ImagePostInfo *ImagePostInfo `json:"image_post_info"` +} diff --git a/ext/tiktok/util.go b/ext/tiktok/util.go new file mode 100644 index 0000000..2e85154 --- /dev/null +++ b/ext/tiktok/util.go @@ -0,0 +1,177 @@ +package tiktok + +import ( + "crypto/rand" + "fmt" + "math/big" + "net/url" + "strconv" + "strings" + "time" + + "github.com/pkg/errors" + + "govd/enums" + "govd/models" + "govd/util" + + "github.com/google/uuid" +) + +func BuildAPIQuery() (url.Values, error) { + requestTicket := strconv.Itoa(int(time.Now().Unix()) * 1000) + clientDeviceID := uuid.New().String() + versionCode, err := GetAppVersionCode(appVersion) + if err != nil { + return nil, fmt.Errorf("failed to get app version code: %w", err) + } + return url.Values{ + "device_platform": []string{"android"}, + "os": []string{"android"}, + "ssmix": []string{"0"}, // what is this? + "_rticket": []string{requestTicket}, + "cdid": []string{clientDeviceID}, + "channel": []string{"googleplay"}, + "aid": []string{appID}, + "app_name": []string{appName}, + "version_code": []string{versionCode}, + "version_name": []string{appVersion}, + "manifest_version_code": []string{manifestAppVersion}, + "update_version_code": []string{manifestAppVersion}, + "ab_version": []string{appVersion}, + "resolution": []string{"1080*2400"}, + "dpi": []string{"420"}, + "device_type": []string{"Pixel 7"}, + "device_brand": []string{"Google"}, + "language": []string{"en"}, + "os_api": []string{"29"}, + "os_version": []string{"13"}, + "ac": []string{"wifi"}, + "is_pad": []string{"0"}, + "current_region": []string{"US"}, + "app_type": []string{"normal"}, + "last_install_time": []string{GetRandomInstallTime()}, + "timezone_name": []string{"America/New_York"}, + "residence": []string{"US"}, + "app_language": []string{"en"}, + "timezone_offset": []string{"-14400"}, + "host_abi": []string{"armeabi-v7a"}, + "locale": []string{"en"}, + "ac2": []string{"wifi5g"}, + "uoo": []string{"1"}, // what is this? + "carrier_region": []string{"US"}, + "build_number": []string{appVersion}, + "region": []string{"US"}, + "ts": []string{strconv.Itoa(int(time.Now().Unix()))}, + "iid": []string{installationID}, + "device_id": []string{GetRandomDeviceID()}, + "openudid": []string{GetRandomUdid()}, + }, nil +} + +func ParsePlayAddr( + video *Video, + playAddr *PlayAddr, +) (*models.MediaFormat, error) { + formatID := playAddr.URLKey + if formatID == "" { + return nil, errors.New("url_key not found") + } + videoCodec := enums.MediaCodecHEVC + if strings.Contains(formatID, "h264") { + videoCodec = enums.MediaCodecAVC + } + videoURL := playAddr.URLList + videoDuration := video.Duration / 1000 + videoWidth := playAddr.Width + videoHeight := playAddr.Height + videoCover := &video.Cover + videoThumbnailURLs := videoCover.URLList + + return &models.MediaFormat{ + Type: enums.MediaTypeVideo, + FormatID: formatID, + URL: videoURL, + VideoCodec: videoCodec, + AudioCodec: enums.MediaCodecAAC, + Duration: videoDuration, + Thumbnail: videoThumbnailURLs, + Width: videoWidth, + Height: videoHeight, + }, nil +} + +func GetRandomInstallTime() string { + currentTime := int(time.Now().Unix()) + minOffset := big.NewInt(86400) + maxOffset := big.NewInt(1123200) + diff := new(big.Int).Sub(maxOffset, minOffset) + randomOffset, _ := rand.Int(rand.Reader, diff) + randomOffset.Add(randomOffset, minOffset) + result := currentTime - int(randomOffset.Int64()) + return strconv.Itoa(result) +} + +func GetRandomUdid() string { + const charset = "0123456789abcdef" + result := make([]byte, 16) + + for i := range result { + index, _ := rand.Int(rand.Reader, big.NewInt(int64(len(charset)))) + result[i] = charset[index.Int64()] + } + return string(result) +} + +func GetRandomDeviceID() string { + minNum := big.NewInt(7250000000000000000) + maxNum := big.NewInt(7351147085025500000) + diff := new(big.Int).Sub(maxNum, minNum) + randNum, _ := rand.Int(rand.Reader, diff) + result := new(big.Int).Add(randNum, minNum) + return result.String() +} + +func BuildPostData(awemeID string) *strings.Reader { + data := url.Values{ + "aweme_ids": []string{fmt.Sprintf("[%s]", awemeID)}, + "request_source": []string{"0"}, + } + return strings.NewReader(data.Encode()) + +} + +func GetAppVersionCode(version string) (string, error) { + parts := strings.Split(version, ".") + + var result strings.Builder + for _, part := range parts { + num, err := strconv.Atoi(part) + if err != nil { + return "", fmt.Errorf("failed to parse version part: %w", err) + } + _, err = fmt.Fprintf(&result, "%02d", num) + if err != nil { + return "", fmt.Errorf("failed to format version part: %w", err) + } + } + return result.String(), nil +} + +func FindVideoData( + resp *Response, + expectedAwemeID string, +) (*AwemeDetails, error) { + if resp.StatusCode == 2053 { + return nil, util.ErrUnavailable + } + if resp.AwemeDetails == nil { + return nil, errors.New("aweme_details is nil") + } + for _, item := range resp.AwemeDetails { + if item.AwemeID == expectedAwemeID { + return &item, nil + } + } + return nil, errors.New("matching aweme_id not found") +} diff --git a/ext/twitter/main.go b/ext/twitter/main.go new file mode 100644 index 0000000..fca8923 --- /dev/null +++ b/ext/twitter/main.go @@ -0,0 +1,181 @@ +package twitter + +import ( + "encoding/json" + "fmt" + "io" + "net/http" + "regexp" + + "govd/enums" + "govd/models" + "govd/util" +) + +const ( + apiHostname = "x.com" + apiEndpoint = "https://x.com/i/api/graphql/zZXycP0V6H7m-2r0mOnFcA/TweetDetail" +) + +var HTTPClient = &http.Client{} + +var ShortExtractor = &models.Extractor{ + Name: "Twitter (Short)", + CodeName: "twitter:short", + Type: enums.ExtractorTypeSingle, + Category: enums.ExtractorCategorySocial, + URLPattern: regexp.MustCompile(`https?://t\.co/(?P\w+)`), + IsRedirect: true, + + Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) { + req, err := http.NewRequest("GET", ctx.MatchedContentURL, nil) + if err != nil { + return nil, fmt.Errorf("failed to create req: %w", err) + } + req.Header.Set("User-Agent", util.ChromeUA) + res, err := HTTPClient.Do(req) + if err != nil { + return nil, fmt.Errorf("failed to send request: %w", err) + } + defer res.Body.Close() + body, err := io.ReadAll(res.Body) + if err != nil { + return nil, fmt.Errorf("failed to read body: %w", err) + } + matchedURL := Extractor.URLPattern.FindStringSubmatch(string(body)) + if matchedURL == nil { + return nil, fmt.Errorf("failed to find url in body") + } + return &models.ExtractorResponse{ + URL: matchedURL[0], + }, nil + }, +} + +var Extractor = &models.Extractor{ + Name: "Twitter", + CodeName: "twitter", + Type: enums.ExtractorTypeSingle, + Category: enums.ExtractorCategorySocial, + URLPattern: regexp.MustCompile(`https?:\/\/(vx)?(twitter|x)\.com\/([^\/]+)\/status\/(?P\d+)`), + + Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) { + mediaList, err := MediaListFromAPI(ctx) + if err != nil { + return nil, fmt.Errorf("failed to get media: %w", err) + } + return &models.ExtractorResponse{ + MediaList: mediaList, + }, nil + }, +} + +func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) { + var mediaList []*models.Media + + tweetData, err := GetTweetAPI(ctx.MatchedContentID) + if err != nil { + return nil, fmt.Errorf("failed to get tweet data: %w", err) + } + + caption := CleanCaption(tweetData.FullText) + + var mediaEntities []MediaEntity + if tweetData.ExtendedEntities != nil && len(tweetData.ExtendedEntities.Media) > 0 { + mediaEntities = tweetData.ExtendedEntities.Media + } else if tweetData.Entities != nil && len(tweetData.Entities.Media) > 0 { + mediaEntities = tweetData.Entities.Media + } else { + return nil, fmt.Errorf("no media found in tweet") + } + + for _, mediaEntity := range mediaEntities { + media := ctx.Extractor.NewMedia( + ctx.MatchedContentID, + ctx.MatchedContentURL, + ) + media.SetCaption(caption) + + switch mediaEntity.Type { + case "video", "animated_gif": + formats, err := ExtractVideoFormats(&mediaEntity) + if err != nil { + return nil, err + } + for _, format := range formats { + media.AddFormat(format) + } + case "photo": + media.AddFormat(&models.MediaFormat{ + Type: enums.MediaTypePhoto, + FormatID: "photo", + URL: []string{mediaEntity.MediaURLHTTPS}, + }) + } + + if len(media.Formats) > 0 { + mediaList = append(mediaList, media) + } + } + + return mediaList, nil +} + +func GetTweetAPI(tweetID string) (*Tweet, error) { + cookies, err := util.ParseCookieFile("twitter.txt") + if err != nil { + return nil, fmt.Errorf("failed to get cookies: %w", err) + } + headers := BuildAPIHeaders(cookies) + if headers == nil { + return nil, fmt.Errorf("failed to build headers. check cookies") + } + query := BuildAPIQuery(tweetID) + + req, err := http.NewRequest("GET", apiEndpoint, nil) + if err != nil { + return nil, fmt.Errorf("failed to create req: %w", err) + } + + for key, value := range headers { + req.Header.Set(key, value) + } + + for _, cookie := range cookies { + req.AddCookie(cookie) + } + + q := req.URL.Query() + for key, value := range query { + q.Add(key, value) + } + req.URL.RawQuery = q.Encode() + + resp, err := HTTPClient.Do(req) + if err != nil { + return nil, fmt.Errorf("failed to send request: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("invalid response code: %s", resp.Status) + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read body: %w", err) + } + + var apiResponse APIResponse + err = json.Unmarshal(body, &apiResponse) + if err != nil { + return nil, fmt.Errorf("failed to parse response: %w", err) + } + + tweet, err := FindTweetData(&apiResponse, tweetID) + if err != nil { + return nil, fmt.Errorf("failed to get tweet data: %w", err) + } + + return tweet, nil +} diff --git a/ext/twitter/models.go b/ext/twitter/models.go new file mode 100644 index 0000000..e0385a8 --- /dev/null +++ b/ext/twitter/models.go @@ -0,0 +1,72 @@ +package twitter + +type APIResponse struct { + Data struct { + ThreadedConversationWithInjectionsV2 struct { + Instructions []struct { + Entries []struct { + EntryID string `json:"entryId"` + Content struct { + ItemContent struct { + TweetResults struct { + Result TweetResult `json:"result"` + } `json:"tweet_results"` + } `json:"itemContent"` + } `json:"content"` + } `json:"entries"` + } `json:"instructions"` + } `json:"threaded_conversation_with_injections_v2"` + } `json:"data"` +} + +type TweetResult struct { + Tweet *Tweet `json:"tweet,omitempty"` + Legacy *Tweet `json:"legacy,omitempty"` + RestID string `json:"rest_id,omitempty"` + Core *Core `json:"core,omitempty"` +} + +type Core struct { + UserResults struct { + Result struct { + Legacy *UserLegacy `json:"legacy,omitempty"` + } `json:"result"` + } `json:"user_results"` +} + +type UserLegacy struct { + ScreenName string `json:"screen_name"` + Name string `json:"name"` +} + +type Tweet struct { + FullText string `json:"full_text"` + ExtendedEntities *ExtendedEntities `json:"extended_entities,omitempty"` + Entities *ExtendedEntities `json:"entities,omitempty"` + CreatedAt string `json:"created_at"` + ID string `json:"id_str"` +} + +type ExtendedEntities struct { + Media []MediaEntity `json:"media,omitempty"` +} + +type MediaEntity struct { + Type string `json:"type"` + MediaURLHTTPS string `json:"media_url_https"` + ExpandedURL string `json:"expanded_url"` + URL string `json:"url"` + VideoInfo *VideoInfo `json:"video_info,omitempty"` +} + +type VideoInfo struct { + DurationMillis int `json:"duration_millis"` + Variants []Variant `json:"variants"` + AspectRatio []int `json:"aspect_ratio"` +} + +type Variant struct { + Bitrate int `json:"bitrate,omitempty"` + ContentType string `json:"content_type"` + URL string `json:"url"` +} diff --git a/ext/twitter/util.go b/ext/twitter/util.go new file mode 100644 index 0000000..f99df0c --- /dev/null +++ b/ext/twitter/util.go @@ -0,0 +1,162 @@ +package twitter + +import ( + "encoding/json" + "fmt" + "govd/enums" + "govd/models" + "govd/util" + "net/http" + "regexp" + "strconv" + "strings" +) + +const authToken = "AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA" + +var resolutionRegex = regexp.MustCompile(`(\d+)x(\d+)`) + +func BuildAPIHeaders(cookies []*http.Cookie) map[string]string { + var csrfToken string + for _, cookie := range cookies { + if cookie.Name == "ct0" { + csrfToken = cookie.Value + break + } + } + if csrfToken == "" { + return nil + } + headers := map[string]string{ + "authorization": fmt.Sprintf("Bearer %s", authToken), + "user-agent": util.ChromeUA, + "x-twitter-auth-type": "OAuth2Session", + "x-twitter-client-language": "en", + "x-twitter-active-user": "yes", + } + + if csrfToken != "" { + headers["x-csrf-token"] = csrfToken + } + + return headers +} + +func BuildAPIQuery(tweetID string) map[string]string { + variables := map[string]interface{}{ + "focalTweetId": tweetID, + "includePromotedContent": true, + "with_rux_injections": false, + "withBirdwatchNotes": true, + "withCommunity": true, + "withDownvotePerspective": false, + "withQuickPromoteEligibilityTweetFields": true, + "withReactionsMetadata": false, + "withReactionsPerspective": false, + "withSuperFollowsTweetFields": true, + "withSuperFollowsUserFields": true, + "withV2Timeline": true, + "withVoice": true, + } + + features := map[string]interface{}{ + "graphql_is_translatable_rweb_tweet_is_translatable_enabled": false, + "interactive_text_enabled": true, + "responsive_web_edit_tweet_api_enabled": true, + "responsive_web_enhance_cards_enabled": true, + "responsive_web_graphql_timeline_navigation_enabled": false, + "responsive_web_text_conversations_enabled": false, + "responsive_web_uc_gql_enabled": true, + "standardized_nudges_misinfo": true, + "tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": false, + "tweetypie_unmention_optimization_enabled": true, + "unified_cards_ad_metadata_container_dynamic_card_content_query_enabled": true, + "verified_phone_label_enabled": false, + "vibe_api_enabled": true, + } + + variablesJSON, _ := json.Marshal(variables) + featuresJSON, _ := json.Marshal(features) + + return map[string]string{ + "variables": string(variablesJSON), + "features": string(featuresJSON), + } +} + +func CleanCaption(caption string) string { + if caption == "" { + return "" + } + regex := regexp.MustCompile(`https?://t\.co/\S+`) + return strings.TrimSpace(regex.ReplaceAllString(caption, "")) +} + +func ExtractVideoFormats(media *MediaEntity) ([]*models.MediaFormat, error) { + var formats []*models.MediaFormat + + if media.VideoInfo == nil { + return formats, nil + } + + duration := int64(media.VideoInfo.DurationMillis / 1000) + + for _, variant := range media.VideoInfo.Variants { + if variant.ContentType == "video/mp4" { + width, height := extractResolution(variant.URL) + + formats = append(formats, &models.MediaFormat{ + Type: enums.MediaTypeVideo, + FormatID: fmt.Sprintf("mp4_%d", variant.Bitrate), + URL: []string{variant.URL}, + VideoCodec: enums.MediaCodecAVC, + AudioCodec: enums.MediaCodecAAC, + Duration: duration, + Thumbnail: []string{media.MediaURLHTTPS}, + Width: width, + Height: height, + Bitrate: int64(variant.Bitrate), + }) + } + } + + return formats, nil +} + +func extractResolution(url string) (int64, int64) { + matches := resolutionRegex.FindStringSubmatch(url) + if len(matches) >= 3 { + width, _ := strconv.ParseInt(matches[1], 10, 64) + height, _ := strconv.ParseInt(matches[2], 10, 64) + return width, height + } + return 0, 0 +} + +func FindTweetData(resp *APIResponse, tweetID string) (*Tweet, error) { + instructions := resp.Data.ThreadedConversationWithInjectionsV2.Instructions + if len(instructions) == 0 { + return nil, fmt.Errorf("nessuna istruzione trovata nella risposta") + } + + entries := instructions[0].Entries + entryID := fmt.Sprintf("tweet-%s", tweetID) + + for _, entry := range entries { + if entry.EntryID == entryID { + result := entry.Content.ItemContent.TweetResults.Result + + if result.Tweet != nil { + return result.Tweet, nil + } + + if result.Legacy != nil { + return result.Legacy, nil + } + + return nil, fmt.Errorf("struttura del tweet non valida") + } + } + + return nil, fmt.Errorf("tweet non trovato nella risposta") +} diff --git a/ext/util.go b/ext/util.go new file mode 100644 index 0000000..3234b3a --- /dev/null +++ b/ext/util.go @@ -0,0 +1,74 @@ +package ext + +import ( + "fmt" + "govd/models" +) + +var maxRedirects = 5 + +func CtxByURL(url string) (*models.DownloadContext, error) { + var redirectCount int + + currentURL := url + + for redirectCount <= maxRedirects { + for _, extractor := range List { + matches := extractor.URLPattern.FindStringSubmatch(currentURL) + if matches == nil { + continue + } + + groupNames := extractor.URLPattern.SubexpNames() + if len(matches) == 0 { + continue + } + + groups := make(map[string]string) + for i, name := range groupNames { + if name != "" { + groups[name] = matches[i] + } + } + groups["match"] = matches[0] + + ctx := &models.DownloadContext{ + MatchedContentID: groups["id"], + MatchedContentURL: groups["match"], + MatchedGroups: groups, + Extractor: extractor, + } + + if !extractor.IsRedirect { + return ctx, nil + } + + response, err := extractor.Run(ctx) + if err != nil { + return nil, err + } + if response.URL == "" { + return nil, fmt.Errorf("no URL found in response") + } + + currentURL = response.URL + redirectCount++ + + break + } + + if redirectCount > maxRedirects { + return nil, fmt.Errorf("exceeded maximum number of redirects (%d)", maxRedirects) + } + } + return nil, nil +} + +func ByCodeName(codeName string) *models.Extractor { + for _, extractor := range List { + if extractor.CodeName == codeName { + return extractor + } + } + return nil +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..30bd434 --- /dev/null +++ b/go.mod @@ -0,0 +1,52 @@ +module govd + +go 1.24.0 + +require ( + github.com/PaulSonOfLars/gotgbot/v2 v2.0.0-rc.31 + github.com/google/uuid v1.6.0 + github.com/guregu/null/v6 v6.0.0 + github.com/joho/godotenv v1.5.1 + github.com/quic-go/quic-go v0.50.1 + github.com/strukturag/libheif v1.19.7 + github.com/tidwall/gjson v1.18.0 + github.com/u2takey/ffmpeg-go v0.5.0 + golang.org/x/image v0.26.0 + gorm.io/gorm v1.25.12 +) + +require ( + github.com/Eyevinn/dash-mpd v0.12.0 // indirect + github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df // indirect + github.com/go-sql-driver/mysql v1.7.0 // indirect + github.com/unki2aut/go-xsd-types v0.0.0-20200220223938-30e5405398f8 // indirect +) + +require ( + github.com/aki237/nscjar v0.0.0-20210417074043-bbb606196143 + github.com/aws/aws-sdk-go v1.55.6 // indirect + github.com/etherlabsio/go-m3u8 v1.0.0 + github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect + github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect + github.com/grafov/m3u8 v0.12.1 + github.com/jinzhu/inflection v1.0.0 // indirect + github.com/jinzhu/now v1.1.5 // indirect + github.com/jmespath/go-jmespath v0.4.0 // indirect + github.com/onsi/ginkgo/v2 v2.9.5 // indirect + github.com/pkg/errors v0.9.1 + github.com/quic-go/qpack v0.5.1 // indirect + github.com/tidwall/match v1.1.1 // indirect + github.com/tidwall/pretty v1.2.1 // indirect + github.com/u2takey/go-utils v0.3.1 // indirect + github.com/unki2aut/go-mpd v0.0.0-20250218132413-c6a2d2d492f4 + go.uber.org/mock v0.5.0 // indirect + golang.org/x/crypto v0.31.0 // indirect + golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 // indirect + golang.org/x/mod v0.18.0 // indirect + golang.org/x/net v0.33.0 // indirect + golang.org/x/sync v0.13.0 // indirect + golang.org/x/sys v0.32.0 // indirect + golang.org/x/text v0.24.0 // indirect + golang.org/x/tools v0.22.0 // indirect + gorm.io/driver/mysql v1.5.7 +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..f75457b --- /dev/null +++ b/go.sum @@ -0,0 +1,162 @@ +github.com/AlekSi/pointer v1.0.0/go.mod h1:1kjywbfcPFCmncIxtk6fIEub6LKrfMz3gc5QKVOSOA8= +github.com/Eyevinn/dash-mpd v0.12.0 h1:fFNE9KPLqe4OG79fYyT/KalmFbQT2vG4Z01ppmEC4Aw= +github.com/Eyevinn/dash-mpd v0.12.0/go.mod h1:yym2itvB74evfJFDZB99p700LQddQFsN1YCbk9t6mAA= +github.com/PaulSonOfLars/gotgbot/v2 v2.0.0-rc.31 h1:SIkzqC6Nv+znY4NGbWlJceWdns8QVmf9cwAYXd7Cg8k= +github.com/PaulSonOfLars/gotgbot/v2 v2.0.0-rc.31/go.mod h1:kL1v4iIjlalwm3gCYGvF4NLa3hs+aKEfRkNJvj4aoDU= +github.com/aki237/nscjar v0.0.0-20210417074043-bbb606196143 h1:PqRkQZW8lAlK2DnH9iSBfISmDxSChaoNJHwP0p7SD2Y= +github.com/aki237/nscjar v0.0.0-20210417074043-bbb606196143/go.mod h1:l0r3UsMujHR1bAYL7R0+6NXkHo/vIe+ja3xLZbUZNb8= +github.com/aws/aws-sdk-go v1.38.20/go.mod h1:hcU610XS61/+aQV88ixoOzUoG7v3b31pl2zKMmprdro= +github.com/aws/aws-sdk-go v1.55.6 h1:cSg4pvZ3m8dgYcgqB97MrcdjUmZ1BeMYKUxMMB89IPk= +github.com/aws/aws-sdk-go v1.55.6/go.mod h1:eRwEWoyTWFMVYVQzKMNHWP5/RV4xIUGMQfXQHfHkpNU= +github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df h1:GSoSVRLoBaFpOOds6QyY1L8AX7uoY+Ln3BHc22W40X0= +github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df/go.mod h1:hiVxq5OP2bUGBRNS3Z/bt/reCLFNbdcST6gISi1fiOM= +github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= +github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= +github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/disintegration/imaging v1.6.2/go.mod h1:44/5580QXChDfwIclfc/PCwrr44amcmDAg8hxG0Ewe4= +github.com/etherlabsio/go-m3u8 v1.0.0 h1:d3HJVr8wlbvJO20ksKEyvDYf4bcM7v8YV3W83fHswL0= +github.com/etherlabsio/go-m3u8 v1.0.0/go.mod h1:RzDiaXgaYnIEzZUmVUD/xMRFR7bY7U5JaCnp8XYLmXU= +github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= +github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas= +github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ= +github.com/go-logr/logr v1.2.4/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-sql-driver/mysql v1.7.0 h1:ueSltNNllEqE3qcWBTD0iQd3IpL/6U+mJxLkazJ7YPc= +github.com/go-sql-driver/mysql v1.7.0/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9S1MCJN5yJMI= +github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= +github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= +github.com/gogo/protobuf v1.3.1/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o= +github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= +github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 h1:yAJXTCF9TqKcTiHJAE8dj7HMvPfh66eeA2JYW7eFpSE= +github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= +github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/grafov/m3u8 v0.12.1 h1:DuP1uA1kvRRmGNAZ0m+ObLv1dvrfNO0TPx0c/enNk0s= +github.com/grafov/m3u8 v0.12.1/go.mod h1:nqzOkfBiZJENr52zTVd/Dcl03yzphIMbJqkXGu+u080= +github.com/guregu/null/v6 v6.0.0 h1:N14VRS+4di81i1PXRiprbQJ9EM9gqBa0+KVMeS/QSjQ= +github.com/guregu/null/v6 v6.0.0/go.mod h1:hrMIhIfrOZeLPZhROSn149tpw2gHkidAqxoXNyeX3iQ= +github.com/hashicorp/golang-lru v0.5.4/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4= +github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= +github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E= +github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc= +github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ= +github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= +github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= +github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= +github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= +github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= +github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= +github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= +github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= +github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/onsi/ginkgo/v2 v2.9.5 h1:+6Hr4uxzP4XIUyAkg61dWBw8lb/gc4/X5luuxN/EC+Q= +github.com/onsi/ginkgo/v2 v2.9.5/go.mod h1:tvAoo1QUJwNEU2ITftXTpR7R1RbCzoZUOs3RonqW57k= +github.com/onsi/gomega v1.27.6 h1:ENqfyGeS5AX/rlXDd/ETokDz93u0YufY1Pgxuy/PvWE= +github.com/onsi/gomega v1.27.6/go.mod h1:PIQNjfQwkP3aQAH7lf7j87O/5FiNr+ZR8+ipb+qQlhg= +github.com/panjf2000/ants/v2 v2.4.2/go.mod h1:f6F0NZVFsGCp5A7QW/Zj/m92atWwOkY0OIhFxRNFr4A= +github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/quic-go/qpack v0.5.1 h1:giqksBPnT/HDtZ6VhtFKgoLOWmlyo9Ei6u9PqzIMbhI= +github.com/quic-go/qpack v0.5.1/go.mod h1:+PC4XFrEskIVkcLzpEkbLqq1uCoxPhQuvK5rH1ZgaEg= +github.com/quic-go/quic-go v0.50.1 h1:unsgjFIUqW8a2oopkY7YNONpV1gYND6Nt9hnt1PN94Q= +github.com/quic-go/quic-go v0.50.1/go.mod h1:Vim6OmUvlYdwBhXP9ZVrtGmCMWa3wEqhq3NgYrI8b4E= +github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= +github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= +github.com/strukturag/libheif v1.19.7 h1:XMfSJvmnucTbiS6CSxxZmpx5XSPjdqkpA3wiL6+I2Iw= +github.com/strukturag/libheif v1.19.7/go.mod h1:E/PNRlmVtrtj9j2AvBZlrO4dsBDu6KfwDZn7X1Ce8Ks= +github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY= +github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= +github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA= +github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= +github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= +github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4= +github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= +github.com/u2takey/ffmpeg-go v0.5.0 h1:r7d86XuL7uLWJ5mzSeQ03uvjfIhiJYvsRAJFCW4uklU= +github.com/u2takey/ffmpeg-go v0.5.0/go.mod h1:ruZWkvC1FEiUNjmROowOAps3ZcWxEiOpFoHCvk97kGc= +github.com/u2takey/go-utils v0.3.1 h1:TaQTgmEZZeDHQFYfd+AdUT1cT4QJgJn/XVPELhHw4ys= +github.com/u2takey/go-utils v0.3.1/go.mod h1:6e+v5vEZ/6gu12w/DC2ixZdZtCrNokVxD0JUklcqdCs= +github.com/unki2aut/go-mpd v0.0.0-20250218132413-c6a2d2d492f4 h1:yPsATZRcBhrBQkxK9hsGo1cPHroobmw7Bptt+UJV0D8= +github.com/unki2aut/go-mpd v0.0.0-20250218132413-c6a2d2d492f4/go.mod h1:LITqXLCxxmcoHtOMgZh5NbcfS4RCrrADQXPVkYwF/cc= +github.com/unki2aut/go-xsd-types v0.0.0-20200220223938-30e5405398f8 h1:u0Bi6Mf8BKPQnxGJ7QubdMyhb0SJjnQU7kX0BA9eASk= +github.com/unki2aut/go-xsd-types v0.0.0-20200220223938-30e5405398f8/go.mod h1:uIeMfpmWIZ8SGp+fTfwDBWiiRn3aJm4b7rFSro9s++Q= +go.uber.org/mock v0.5.0 h1:KAMbZvZPyBPWgD14IrIQ38QCyjwpvVVV6K/bHl1IwQU= +go.uber.org/mock v0.5.0/go.mod h1:ge71pBPLYDk7QIi1LupWxdAykm7KIEFchiOqd6z7qMM= +gocv.io/x/gocv v0.25.0/go.mod h1:Rar2PS6DV+T4FL+PM535EImD/h13hGVaHhnCu1xarBs= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw= +golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54= +golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U= +golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk= +golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 h1:vr/HnozRka3pE4EsMEg1lgkXJkTFJCVUX+S/ZT6wYzM= +golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842/go.mod h1:XtvwrStGgqGPLc4cjQfWqZHG1YFdYs6swckp8vpsjnc= +golang.org/x/image v0.0.0-20191009234506-e7c1f5e7dbb8/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/image v0.26.0 h1:4XjIFEZWQmCZi6Wv8BoxsDhRU3RVnLX04dToTDAEPlY= +golang.org/x/image v0.26.0/go.mod h1:lcxbMFAovzpnJxzXS3nyL83K27tmqtKzIJpctK8YO5c= +golang.org/x/mod v0.18.0 h1:5+9lSbEzPSdWkH32vYPBwEpX8KwDbM52Ud9xBUvNlb0= +golang.org/x/mod v0.18.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE= +golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg= +golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I= +golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4= +golang.org/x/sync v0.13.0 h1:AauUjRAJ9OSnvULf/ARrrVywoJDy0YS2AwQ98I37610= +golang.org/x/sync v0.13.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200602225109-6fdc65e7d980/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.32.0 h1:s77OFDvIQeibCmezSnk/q6iAfkdiQaJi4VzroCFrN20= +golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0= +golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU= +golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= +golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20181030221726-6c7e314b6563/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.22.0 h1:gqSGLZqv+AI9lIQzniJ0nZDRG5GBPsSi+DRNHWNz6yA= +golang.org/x/tools v0.22.0/go.mod h1:aCwcsjqvq7Yqt6TNyX7QMU2enbQ/Gt0bo6krSeEri+c= +google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= +google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.7/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10= +gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gorm.io/driver/mysql v1.5.7 h1:MndhOPYOfEp2rHKgkZIhJ16eVUIRf2HmzgoPmh7FCWo= +gorm.io/driver/mysql v1.5.7/go.mod h1:sEtPWMiqiN1N1cMXoXmBbd8C6/l+TESwriotuRRpkDM= +gorm.io/gorm v1.25.7/go.mod h1:hbnx/Oo0ChWMn1BIhpy1oYozzpM15i4YPuHDmfYtwg8= +gorm.io/gorm v1.25.12 h1:I0u8i2hWQItBq1WfE0o2+WuL9+8L21K9e2HHSTE/0f8= +gorm.io/gorm v1.25.12/go.mod h1:xh7N7RHfYlNc5EmcI/El95gXusucDrQnHXe0+CgWcLQ= +sigs.k8s.io/yaml v1.2.0/go.mod h1:yfXDCHCao9+ENCvLSE62v9VSji2MKu5jeNfTrofGhJc= diff --git a/main.go b/main.go new file mode 100644 index 0000000..138075c --- /dev/null +++ b/main.go @@ -0,0 +1,25 @@ +package main + +import ( + "govd/bot" + "govd/database" + "govd/util" + "log" + + "github.com/joho/godotenv" +) + +func main() { + err := godotenv.Load() + if err != nil { + log.Fatal("error loading .env file") + } + ok := util.CheckFFmpeg() + if !ok { + log.Fatal("ffmpeg executable not found. please install it or add it to your PATH") + } + database.Start() + go bot.Start() + + select {} // keep the main goroutine alive +} diff --git a/models/ctx.go b/models/ctx.go new file mode 100644 index 0000000..7b2d164 --- /dev/null +++ b/models/ctx.go @@ -0,0 +1,9 @@ +package models + +type DownloadContext struct { + MatchedContentID string + MatchedContentURL string + MatchedGroups map[string]string + GroupSettings *GroupSettings + Extractor *Extractor +} diff --git a/models/download.go b/models/download.go new file mode 100644 index 0000000..1c5f772 --- /dev/null +++ b/models/download.go @@ -0,0 +1,14 @@ +package models + +import "time" + +type DownloadConfig struct { + ChunkSize int // size of each chunk in bytes + Concurrency int // maximum number of concurrent downloads + Timeout time.Duration // timeout for individual HTTP requests + DownloadDir string // directory to save downloaded files + RetryAttempts int // number of retry attempts per chunk + RetryDelay time.Duration // delay between retries + Remux bool // whether to remux the downloaded file with ffmpeg + ProgressUpdater func(float64) // optional function to report download progress +} diff --git a/models/ext.go b/models/ext.go new file mode 100644 index 0000000..7ec2326 --- /dev/null +++ b/models/ext.go @@ -0,0 +1,34 @@ +package models + +import ( + "govd/enums" + "regexp" +) + +type Extractor struct { + Name string + CodeName string + Type enums.ExtractorType + Category enums.ExtractorCategory + URLPattern *regexp.Regexp + IsDRM bool + IsRedirect bool + + Run func(*DownloadContext) (*ExtractorResponse, error) +} + +type ExtractorResponse struct { + MediaList []*Media + URL string // redirected URL +} + +func (extractor *Extractor) NewMedia( + contentID string, + contentURL string, +) *Media { + return &Media{ + ContentID: contentID, + ContentURL: contentURL, + ExtractorCodeName: extractor.CodeName, + } +} diff --git a/models/media.go b/models/media.go new file mode 100644 index 0000000..9fdcfff --- /dev/null +++ b/models/media.go @@ -0,0 +1,506 @@ +package models + +import ( + "fmt" + "os" + "path/filepath" + "sort" + "strings" + "time" + + "govd/enums" + + "github.com/PaulSonOfLars/gotgbot/v2" + "github.com/google/uuid" + "github.com/guregu/null/v6/zero" + "gorm.io/gorm" +) + +type Media struct { + ID uint `json:"-"` + ContentID string `gorm:"not null;index" json:"content_id"` + ContentURL string `gorm:"not null" json:"content_url"` + ExtractorCodeName string `gorm:"not null;index" json:"extractor_code_name"` + Caption zero.String `json:"caption"` + NSFW bool `gorm:"default:false" json:"nsfw"` + CreatedAt time.Time `json:"-"` + UpdatedAt time.Time `json:"-"` + DeletedAt gorm.DeletedAt `gorm:"index" json:"-"` + + Format *MediaFormat `json:"-"` + + Formats []*MediaFormat `gorm:"-" json:"formats"` +} + +type MediaFormat struct { + ID uint `json:"-"` + MediaID uint `gorm:"index:idx_media_format,priority:1;not null" json:"-"` + Type enums.MediaType `gorm:"not null;index:idx_media_type" json:"type"` + FormatID string `gorm:"not null;index" json:"format_id"` + FileID string `gorm:"not null;index" json:"-"` + VideoCodec enums.MediaCodec `json:"video_codec"` + AudioCodec enums.MediaCodec `json:"audio_codec"` + Duration int64 `json:"duration"` + Width int64 `json:"width"` + Height int64 `json:"height"` + Bitrate int64 `json:"bitrate"` + Title string `json:"title"` + Artist string `json:"artist"` + IsDefault bool `gorm:"default:false;index" json:"is_default"` + Segments []string `gorm:"-" json:"segments"` + FileSize int64 `json:"-"` + Plugins []Plugin `gorm:"-" json:"-"` + + CreatedAt time.Time `json:"-"` + UpdatedAt time.Time `json:"-"` + DeletedAt gorm.DeletedAt `gorm:"index" json:"-"` + + // api use only, not stored in database + URL []string `gorm:"-" json:"url"` + Thumbnail []string `gorm:"-" json:"thumbnail"` + + Media *Media `gorm:"foreignKey:MediaID" json:"-"` +} + +type DownloadedMedia struct { + FilePath string + ThumbnailFilePath string + Media *Media + Index int +} + +func (media *Media) GetFormat(formatID string) *MediaFormat { + for _, format := range media.Formats { + if format.FormatID == formatID { + return format + } + } + return nil +} + +func (media *Media) GetDefaultFormat() *MediaFormat { + format := media.GetDefaultVideoFormat() + if format != nil { + return format + } + format = media.GetDefaultAudioFormat() + if format != nil { + return format + } + format = media.GetDefaultPhotoFormat() + if format != nil { + return format + } + return nil +} + +func (media *Media) GetDefaultVideoFormat() *MediaFormat { + filtered := filterFormats(media.Formats, func(format *MediaFormat) bool { + return format.VideoCodec == enums.MediaCodecAVC + }) + if len(filtered) == 0 { + filtered = filterFormats(media.Formats, func(format *MediaFormat) bool { + return format.VideoCodec != "" + }) + } + if len(filtered) == 0 { + return nil + } + bestFormat := filtered[0] + for _, format := range filtered { + if format.Bitrate > bestFormat.Bitrate { + bestFormat = format + } + } + bestFormat.IsDefault = true + return bestFormat +} + +func (media *Media) GetDefaultAudioFormat() *MediaFormat { + filtered := filterFormats(media.Formats, func(format *MediaFormat) bool { + return format.VideoCodec == "" && + (format.AudioCodec == enums.MediaCodecAAC || + format.AudioCodec == enums.MediaCodecMP3) + }) + if len(filtered) == 0 { + filtered = filterFormats(media.Formats, func(format *MediaFormat) bool { + return format.VideoCodec == "" && format.AudioCodec != "" + }) + } + if len(filtered) == 0 { + return nil + } + bestFormat := filtered[0] + for _, format := range filtered { + if format.Bitrate > bestFormat.Bitrate { + bestFormat = format + } + } + bestFormat.IsDefault = true + return bestFormat +} + +func (media *Media) GetDefaultPhotoFormat() *MediaFormat { + filtered := filterFormats(media.Formats, func(format *MediaFormat) bool { + return format.Type == enums.MediaTypePhoto + }) + if len(filtered) == 0 { + return nil + } + filtered[0].IsDefault = true + return filtered[0] +} + +func (media *Media) GetAudioFromVideoFormat() *MediaFormat { + videoFormat := media.GetDefaultVideoFormat() + + if videoFormat == nil { + return nil + } + + return &MediaFormat{ + Type: enums.MediaTypeAudio, + FormatID: "AudioFromVideo", + URL: videoFormat.URL, + AudioCodec: enums.MediaCodecAAC, + Thumbnail: videoFormat.Thumbnail, + Duration: videoFormat.Duration, + Title: videoFormat.Title, + Artist: videoFormat.Artist, + } +} + +func (media *Media) SetCaption(caption string) { + if len(caption) == 0 { + return + } + media.Caption = zero.StringFrom(caption) +} + +func (media *Media) AddFormat(fmt *MediaFormat) { + media.Formats = append(media.Formats, fmt) +} + +func (media *Media) GetSortedFormats() []*MediaFormat { + // group by video format (codec, width, height) + groupedVideos := make(map[[3]int64]*MediaFormat) + for _, format := range media.Formats { + if format.Type == enums.MediaTypeVideo { + key := [3]int64{ + getCodecPriority(format.VideoCodec), + format.Width, + format.Height, + } + existing, ok := groupedVideos[key] + if !ok || format.Bitrate > existing.Bitrate { + groupedVideos[key] = format + } + } + } + + // group by audio format (codec, bitrate) + groupedAudios := make(map[[2]int64]*MediaFormat) + for _, format := range media.Formats { + if format.Type == enums.MediaTypeAudio { + key := [2]int64{ + getCodecPriority(format.AudioCodec), + format.Bitrate, + } + _, exists := groupedAudios[key] + if !exists { + groupedAudios[key] = format + } + } + } + + // combine the best video and audio into a final list + var finalSortedList []*MediaFormat + for _, best := range groupedVideos { + finalSortedList = append(finalSortedList, best) + } + for _, best := range groupedAudios { + finalSortedList = append(finalSortedList, best) + } + + for _, format := range media.Formats { + if format.Type != enums.MediaTypeVideo && format.Type != enums.MediaTypeAudio { + finalSortedList = append(finalSortedList, format) // for non-video and non-audio formats + } + } + + // sort the final list + sort.Slice(finalSortedList, func(i, j int) bool { + a, b := finalSortedList[i], finalSortedList[j] + // compare by type priority (video, audio, photo, etc.) + if cmp := getTypePriority(a.Type) - getTypePriority(b.Type); cmp != 0 { + return cmp < 0 + } + // compare by codec priority (for both video and audio) + if a.Type == enums.MediaTypeVideo { + if cmp := getCodecPriority(a.VideoCodec) - getCodecPriority(b.VideoCodec); cmp != 0 { + return cmp < 0 + } + } else if a.Type == enums.MediaTypeAudio { + if cmp := getCodecPriority(a.AudioCodec) - getCodecPriority(b.AudioCodec); cmp != 0 { + return cmp < 0 + } + } + // compare by width for videos + if cmp := a.Width - b.Width; cmp != 0 { + return cmp < 0 + } + // compare by height for videos + if cmp := a.Height - b.Height; cmp != 0 { + return cmp < 0 + } + // compare by bitrate (lower bitrate first) + return a.Bitrate-b.Bitrate < 0 + }) + + return finalSortedList +} + +func filterFormats( + formats []*MediaFormat, + condition func(*MediaFormat) bool, +) []*MediaFormat { + var filtered []*MediaFormat + for _, format := range formats { + if condition(format) { + filtered = append(filtered, format) + } + } + return filtered +} + +func getCodecPriority(codec enums.MediaCodec) int64 { + codecPriority := map[enums.MediaCodec]int64{ + enums.MediaCodecAVC: 1, + enums.MediaCodecHEVC: 2, + enums.MediaCodecMP3: 3, + enums.MediaCodecAAC: 4, + } + return codecPriority[codec] +} + +func getTypePriority(mediaType enums.MediaType) int64 { + typePriority := map[enums.MediaType]int64{ + enums.MediaTypeVideo: 1, + enums.MediaTypeAudio: 2, + enums.MediaTypePhoto: 3, + } + return typePriority[mediaType] +} + +// getFormatInfo returns the file extension and the InputMedia type. +func (format *MediaFormat) GetFormatInfo() (string, string) { + if format.Type == enums.MediaTypePhoto { + return "jpeg", "photo" + } + + videoCodec := format.VideoCodec + audioCodec := format.AudioCodec + + switch { + case videoCodec == enums.MediaCodecAVC && audioCodec == enums.MediaCodecAAC: + return "mp4", "video" + case videoCodec == enums.MediaCodecAVC && audioCodec == enums.MediaCodecMP3: + return "mp4", "video" + case videoCodec == enums.MediaCodecHEVC && audioCodec == enums.MediaCodecAAC: + return "mp4", "document" + case videoCodec == enums.MediaCodecHEVC && audioCodec == enums.MediaCodecMP3: + return "mp4", "document" + case videoCodec == enums.MediaCodecAV1 && audioCodec == enums.MediaCodecOpus: + return "webm", "document" + case videoCodec == enums.MediaCodecAV1 && audioCodec == enums.MediaCodecFLAC: + return "webm", "document" + case videoCodec == enums.MediaCodecVP9 && audioCodec == enums.MediaCodecOpus: + return "webm", "document" + case videoCodec == enums.MediaCodecVP9 && audioCodec == enums.MediaCodecFLAC: + return "webm", "document" + case videoCodec == enums.MediaCodecAVC && audioCodec == "": + return "mp4", "video" + case videoCodec == enums.MediaCodecHEVC && audioCodec == "": + return "mp4", "document" + case videoCodec == enums.MediaCodecAV1 && audioCodec == "": + return "webm", "document" + case videoCodec == enums.MediaCodecVP9 && audioCodec == "": + return "webm", "document" + case videoCodec == enums.MediaCodecVP8 && audioCodec == "": + return "webm", "document" + case videoCodec == enums.MediaCodecWebP && audioCodec == "": + return "webp", "video" + case videoCodec == "" && audioCodec == enums.MediaCodecMP3: + return "mp3", "audio" + case videoCodec == "" && audioCodec == enums.MediaCodecAAC: + return "m4a", "audio" + case videoCodec == "" && audioCodec == enums.MediaCodecOpus: + return "webm", "document" + case videoCodec == "" && audioCodec == enums.MediaCodecFLAC: + return "flac", "document" + case videoCodec == "" && audioCodec == enums.MediaCodecVorbis: + return "oga", "document" + default: + return "webm", "document" + } +} + +func (format *MediaFormat) GetInputMedia( + filePath string, + thumbnailFilePath string, + messageCaption string, +) (gotgbot.InputMedia, error) { + if format.FileID != "" { + return format.GetInputMediaWithFileID(messageCaption) + } + + _, inputMediaType := format.GetFormatInfo() + + fileObj, err := os.Open(filePath) + if err != nil { + return nil, fmt.Errorf("failed to open file: %w", err) + } + fileInputMedia := gotgbot.InputFileByReader( + filepath.Base(filePath), + fileObj, + ) + + var thumbnailFileInputMedia gotgbot.InputFile + if thumbnailFilePath != "" { + thumbnailFileObj, err := os.Open(thumbnailFilePath) + if err != nil { + return nil, fmt.Errorf("failed to open file: %w", err) + } + thumbnailFileInputMedia = gotgbot.InputFileByReader( + filepath.Base(thumbnailFilePath), + thumbnailFileObj, + ) + } + + if inputMediaType == "video" { + return &gotgbot.InputMediaVideo{ + Media: fileInputMedia, + Thumbnail: thumbnailFileInputMedia, + Width: format.Width, + Height: format.Height, + Duration: format.Duration, + Caption: messageCaption, + SupportsStreaming: true, + ParseMode: "HTML", + }, nil + } + if inputMediaType == "audio" { + return &gotgbot.InputMediaAudio{ + Media: fileInputMedia, + Thumbnail: thumbnailFileInputMedia, + Duration: format.Duration, + Performer: format.Artist, + Title: format.Title, + Caption: messageCaption, + ParseMode: "HTML", + }, nil + } + if inputMediaType == "photo" { + return &gotgbot.InputMediaPhoto{ + Media: fileInputMedia, + Caption: messageCaption, + ParseMode: "HTML", + }, nil + } + if inputMediaType == "document" { + return &gotgbot.InputMediaDocument{ + Media: fileInputMedia, + Thumbnail: thumbnailFileInputMedia, + Caption: messageCaption, + ParseMode: "HTML", + }, nil + } + return nil, fmt.Errorf("unknown input type: %s", inputMediaType) +} + +func (format *MediaFormat) GetInputMediaWithFileID( + messageCaption string, +) (gotgbot.InputMedia, error) { + _, inputMediaType := format.GetFormatInfo() + fileInputMedia := gotgbot.InputFileByID(format.FileID) + if inputMediaType == "video" { + return &gotgbot.InputMediaVideo{ + Media: fileInputMedia, + Caption: messageCaption, + ParseMode: "HTML", + }, nil + } + if inputMediaType == "audio" { + return &gotgbot.InputMediaAudio{ + Media: fileInputMedia, + Caption: messageCaption, + ParseMode: "HTML", + }, nil + } + if inputMediaType == "photo" { + return &gotgbot.InputMediaPhoto{ + Media: fileInputMedia, + Caption: messageCaption, + ParseMode: "HTML", + }, nil + } + if inputMediaType == "document" { + return &gotgbot.InputMediaDocument{ + Media: fileInputMedia, + Caption: messageCaption, + ParseMode: "HTML", + }, nil + } + return nil, fmt.Errorf("unknown input type: %s", inputMediaType) +} + +func (format *MediaFormat) GetFileName() string { + extension, _ := format.GetFormatInfo() + if format.Type == enums.MediaTypeAudio && format.Title != "" && format.Artist != "" { + return fmt.Sprintf("%s - %s.%s", format.Artist, format.Title, extension) + } else { + name := uuid.New().String() + name = strings.ReplaceAll(name, "-", "") + return fmt.Sprintf("%s.%s", name, extension) + } +} + +func (media *Media) HasVideo() bool { + for _, format := range media.Formats { + if format.Type == enums.MediaTypeVideo { + return true + } + } + return false +} + +func (media *Media) HasAudio() bool { + for _, format := range media.Formats { + if format.Type == enums.MediaTypeAudio { + return true + } + } + return false +} + +func (media *Media) HasPhoto() bool { + for _, format := range media.Formats { + if format.Type == enums.MediaTypePhoto { + return true + } + } + return false +} + +func (media *Media) SupportsAudio() bool { + for _, format := range media.Formats { + if format.AudioCodec != "" { + return true + } + } + return false +} + +func (media *Media) SupportsAudioFromVideo() bool { + return !media.HasAudio() && media.HasVideo() && media.SupportsAudio() +} diff --git a/models/misc.go b/models/misc.go new file mode 100644 index 0000000..a3d0e7d --- /dev/null +++ b/models/misc.go @@ -0,0 +1,11 @@ +package models + +type SendMediaFormatsOptions struct { + IsStored bool + Caption string +} + +type Chunk struct { + Data []byte + Idx int +} diff --git a/models/plugin.go b/models/plugin.go new file mode 100644 index 0000000..1b3c216 --- /dev/null +++ b/models/plugin.go @@ -0,0 +1,3 @@ +package models + +type Plugin = func(*DownloadedMedia) error diff --git a/models/settings.go b/models/settings.go new file mode 100644 index 0000000..c87069b --- /dev/null +++ b/models/settings.go @@ -0,0 +1,12 @@ +package models + +import "gorm.io/gorm" + +type GroupSettings struct { + gorm.Model + + ChatID int64 `gorm:"primaryKey"` + NSFW *bool `gorm:"default:false"` + Captions *bool `gorm:"default:false"` + MediaGroupLimit int `gorm:"default:10"` +} diff --git a/models/user.go b/models/user.go new file mode 100644 index 0000000..1b19285 --- /dev/null +++ b/models/user.go @@ -0,0 +1,14 @@ +package models + +import ( + "time" + + "gorm.io/gorm" +) + +type User struct { + gorm.Model + + UserID int64 `gorm:"primaryKey"` + LastUsed time.Time `gorm:"autoCreateTime"` +} diff --git a/plugins/main.go b/plugins/main.go new file mode 100644 index 0000000..24d7e6a --- /dev/null +++ b/plugins/main.go @@ -0,0 +1,7 @@ +package plugins + +import "govd/models" + +var List = []models.Plugin{ + MergeAudio, +} diff --git a/plugins/merge_audio.go b/plugins/merge_audio.go new file mode 100644 index 0000000..83f739b --- /dev/null +++ b/plugins/merge_audio.go @@ -0,0 +1,40 @@ +package plugins + +import ( + "context" + "fmt" + "govd/models" + "govd/util" + "govd/util/av" + + "github.com/pkg/errors" +) + +func MergeAudio(media *models.DownloadedMedia) error { + audioFormat := media.Media.GetDefaultAudioFormat() + if audioFormat == nil { + return errors.New("no audio format found") + } + + // download the audio file + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + audioFile, err := util.DownloadFile( + ctx, audioFormat.URL, + audioFormat.GetFileName(), nil, + ) + if err != nil { + return fmt.Errorf("failed to download audio file: %w", err) + } + + err = av.MergeVideoWithAudio( + media.FilePath, + audioFile, + ) + if err != nil { + return fmt.Errorf("failed to merge video with audio: %w", err) + } + + return nil +} diff --git a/util/av/audio.go b/util/av/audio.go new file mode 100644 index 0000000..8e58da7 --- /dev/null +++ b/util/av/audio.go @@ -0,0 +1,23 @@ +package av + +import ( + ffmpeg "github.com/u2takey/ffmpeg-go" +) + +func AudioFromVideo(videoPath string, audioPath string) error { + err := ffmpeg. + Input(videoPath). + Output(audioPath, ffmpeg.KwArgs{ + "map": "a", + "vn": nil, + "f": "mp3", + "ab": "128k", + }). + Silent(true). + OverWriteOutput(). + Run() + if err != nil { + return err + } + return nil +} diff --git a/util/av/merge_audio.go b/util/av/merge_audio.go new file mode 100644 index 0000000..aa30298 --- /dev/null +++ b/util/av/merge_audio.go @@ -0,0 +1,46 @@ +package av + +import ( + "fmt" + "os" + + ffmpeg "github.com/u2takey/ffmpeg-go" +) + +func MergeVideoWithAudio( + videoFile string, + audioFile string, +) error { + tempFileName := videoFile + ".temp" + outputFile := videoFile + + err := os.Rename(videoFile, tempFileName) + if err != nil { + return fmt.Errorf("failed to rename file: %w", err) + } + + defer os.Remove(tempFileName) + defer os.Remove(audioFile) + + videoStream := ffmpeg.Input(tempFileName) + audioStream := ffmpeg.Input(audioFile) + + err = ffmpeg.Output( + []*ffmpeg.Stream{videoStream, audioStream}, + outputFile, + ffmpeg.KwArgs{ + "map": []string{"0:v:0", "1:a:0"}, + "movflags": "+faststart", + "c:v": "copy", + "c:a": "copy", + }). + Silent(true). + OverWriteOutput(). + Run() + + if err != nil { + return fmt.Errorf("failed to merge files: %w", err) + } + + return nil +} diff --git a/util/av/remux.go b/util/av/remux.go new file mode 100644 index 0000000..efd24fe --- /dev/null +++ b/util/av/remux.go @@ -0,0 +1,35 @@ +package av + +import ( + "fmt" + "os" + + ffmpeg "github.com/u2takey/ffmpeg-go" +) + +func RemuxFile( + inputFile string, +) error { + tempFileName := inputFile + ".temp" + outputFile := inputFile + err := os.Rename(inputFile, tempFileName) + if err != nil { + return fmt.Errorf("failed to rename file: %v", err) + } + err = ffmpeg. + Input(tempFileName). + Output(outputFile, ffmpeg.KwArgs{ + "c": "copy", + }). + Silent(true). + OverWriteOutput(). + Run() + if err != nil { + return fmt.Errorf("failed to remux file: %v", err) + } + err = os.Remove(tempFileName) + if err != nil { + return fmt.Errorf("failed to remove temp file: %v", err) + } + return nil +} diff --git a/util/av/thumbnail.go b/util/av/thumbnail.go new file mode 100644 index 0000000..555f2be --- /dev/null +++ b/util/av/thumbnail.go @@ -0,0 +1,27 @@ +package av + +import ( + ffmpeg "github.com/u2takey/ffmpeg-go" +) + +func ExtractVideoThumbnail( + videoPath string, + thumbnailPath string, +) error { + err := ffmpeg. + Input(videoPath). + Output(thumbnailPath, ffmpeg.KwArgs{ + "vframes": 1, + "f": "image2", + "ss": "00:00:01", + "c:v": "mjpeg", + "q:v": 10, // not sure + }). + Silent(true). + OverWriteOutput(). + Run() + if err != nil { + return err + } + return nil +} diff --git a/util/av/videoinfo.go b/util/av/videoinfo.go new file mode 100644 index 0000000..638d6e6 --- /dev/null +++ b/util/av/videoinfo.go @@ -0,0 +1,18 @@ +package av + +import ( + "github.com/tidwall/gjson" + ffmpeg "github.com/u2takey/ffmpeg-go" +) + +func GetVideoInfo(filePath string) (int64, int64, int64) { + probeData, err := ffmpeg.Probe(filePath) + if err != nil { + return 0, 0, 0 + } + duration := gjson.Get(probeData, "format.duration").Int() + width := gjson.Get(probeData, "streams.0.width").Int() + height := gjson.Get(probeData, "streams.0.height").Int() + + return duration, width, height +} diff --git a/util/consts.go b/util/consts.go new file mode 100644 index 0000000..f8428fe --- /dev/null +++ b/util/consts.go @@ -0,0 +1,5 @@ +package util + +const ( + ChromeUA = "Mozilla/5.0 (Linux; Android 10; SM-G960U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.181 Mobile Safari/537.36" +) diff --git a/util/download.go b/util/download.go new file mode 100644 index 0000000..80a6c02 --- /dev/null +++ b/util/download.go @@ -0,0 +1,512 @@ +package util + +import ( + "bytes" + "context" + "fmt" + "io" + "math" + "net/http" + "os" + "path/filepath" + "sync" + "time" + + "govd/models" + "govd/util/av" +) + +func DefaultConfig() *models.DownloadConfig { + return &models.DownloadConfig{ + ChunkSize: 10 * 1024 * 1024, // 10MB + Concurrency: 4, + Timeout: 30 * time.Second, + DownloadDir: "downloads", + RetryAttempts: 3, + RetryDelay: 2 * time.Second, + Remux: true, + } +} + +func DownloadFile( + ctx context.Context, + URLList []string, + fileName string, + config *models.DownloadConfig, +) (string, error) { + if config == nil { + config = DefaultConfig() + } + + var errs []error + for _, fileURL := range URLList { + select { + case <-ctx.Done(): + return "", ctx.Err() + default: + // create the download directory if it doesn't exist + if err := ensureDownloadDir(config.DownloadDir); err != nil { + return "", err + } + + filePath := filepath.Join(config.DownloadDir, fileName) + err := runChunkedDownload(ctx, fileURL, filePath, config) + if err != nil { + errs = append(errs, err) + continue + } + + if config.Remux { + err := av.RemuxFile(filePath) + if err != nil { + return "", fmt.Errorf("remuxing failed: %w", err) + } + } + return filePath, nil + } + } + + return "", fmt.Errorf("%w: %v", ErrDownloadFailed, errs) +} + +func DownloadFileWithSegments( + ctx context.Context, + segmentURLs []string, + fileName string, + config *models.DownloadConfig, +) (string, error) { + if config == nil { + config = DefaultConfig() + } + if err := ensureDownloadDir(config.DownloadDir); err != nil { + return "", err + } + tempDir := filepath.Join(config.DownloadDir, "segments_"+time.Now().Format("20060102_150405")) + if err := os.MkdirAll(tempDir, 0755); err != nil { + return "", fmt.Errorf("failed to create temporary directory: %w", err) + } + downloadedFiles, err := DownloadSegments(ctx, segmentURLs, config) + if err != nil { + os.RemoveAll(tempDir) + return "", fmt.Errorf("failed to download segments: %w", err) + } + mergedFilePath, err := MergeSegmentFiles(ctx, downloadedFiles, fileName, config) + if err != nil { + os.RemoveAll(tempDir) + return "", fmt.Errorf("failed to merge segments: %w", err) + } + if err := os.RemoveAll(tempDir); err != nil { + return "", fmt.Errorf("failed to remove temporary directory: %w", err) + } + return mergedFilePath, nil +} + +func DownloadFileInMemory( + ctx context.Context, + URLList []string, + config *models.DownloadConfig, +) (*bytes.Reader, error) { + if config == nil { + config = DefaultConfig() + } + + var errs []error + for _, fileURL := range URLList { + select { + case <-ctx.Done(): + return nil, ctx.Err() + default: + data, err := downloadInMemory(ctx, fileURL, config.Timeout) + if err != nil { + errs = append(errs, err) + continue + } + return bytes.NewReader(data), nil + } + } + + return nil, fmt.Errorf("%w: %v", ErrDownloadFailed, errs) +} + +func downloadInMemory(ctx context.Context, fileURL string, timeout time.Duration) ([]byte, error) { + reqCtx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + + req, err := http.NewRequestWithContext(reqCtx, http.MethodGet, fileURL, nil) + if err != nil { + return nil, fmt.Errorf("failed to create request: %w", err) + } + + session := GetHTTPSession() + resp, err := session.Do(req) + if err != nil { + return nil, fmt.Errorf("failed to download file: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode) + } + + return io.ReadAll(resp.Body) +} + +func ensureDownloadDir(dir string) error { + if _, err := os.Stat(dir); os.IsNotExist(err) { + if err := os.MkdirAll(dir, 0755); err != nil { + return fmt.Errorf("failed to create downloads directory: %w", err) + } + } + return nil +} + +func runChunkedDownload( + ctx context.Context, + fileURL string, + filePath string, + config *models.DownloadConfig, +) error { + fileSize, err := getFileSize(ctx, fileURL, config.Timeout) + if err != nil { + return err + } + + file, err := os.Create(filePath) + if err != nil { + return fmt.Errorf("failed to create file: %w", err) + } + defer file.Close() + + // pre-allocate file size if possible + if fileSize > 0 { + if err := file.Truncate(int64(fileSize)); err != nil { + return fmt.Errorf("failed to allocate file space: %w", err) + } + } + + chunks := createChunks(fileSize, config.ChunkSize) + + semaphore := make(chan struct{}, config.Concurrency) + var wg sync.WaitGroup + + errChan := make(chan error, 1) + var downloadErr error + var errOnce sync.Once + + var completedChunks int64 + var completedBytes int64 + var progressMutex sync.Mutex + + downloadCtx, cancelDownload := context.WithCancel(ctx) + defer cancelDownload() + + for idx, chunk := range chunks { + wg.Add(1) + + go func(idx int, chunk [2]int) { + defer wg.Done() + + // respect concurrency limit + select { + case semaphore <- struct{}{}: + defer func() { <-semaphore }() + case <-downloadCtx.Done(): + return + } + + chunkData, err := downloadChunkWithRetry(downloadCtx, fileURL, chunk, config) + if err != nil { + errOnce.Do(func() { + downloadErr = fmt.Errorf("chunk %d: %w", idx, err) + cancelDownload() // cancel all other downloads + errChan <- downloadErr + }) + return + } + + if err := writeChunkToFile(file, chunkData, chunk[0]); err != nil { + errOnce.Do(func() { + downloadErr = fmt.Errorf("failed to write chunk %d: %w", idx, err) + cancelDownload() + errChan <- downloadErr + }) + return + } + + // update progress + chunkSize := chunk[1] - chunk[0] + 1 + progressMutex.Lock() + completedChunks++ + completedBytes += int64(chunkSize) + progress := float64(completedBytes) / float64(fileSize) + progressMutex.Unlock() + + // report progress if handler exists + if config.ProgressUpdater != nil { + config.ProgressUpdater(progress) + } + }(idx, chunk) + } + + go func() { + wg.Wait() + close(errChan) + }() + + select { + case err := <-errChan: + if err != nil { + // clean up partial download + os.Remove(filePath) + return err + } + case <-ctx.Done(): + cancelDownload() + os.Remove(filePath) + return ctx.Err() + } + + return nil +} + +func getFileSize(ctx context.Context, fileURL string, timeout time.Duration) (int, error) { + reqCtx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + + req, err := http.NewRequestWithContext(reqCtx, http.MethodHead, fileURL, nil) + if err != nil { + return 0, fmt.Errorf("failed to create request: %w", err) + } + + session := GetHTTPSession() + resp, err := session.Do(req) + if err != nil { + return 0, fmt.Errorf("failed to get file size: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return 0, fmt.Errorf("failed to get file info: status code %d", resp.StatusCode) + } + + return int(resp.ContentLength), nil +} + +func downloadChunkWithRetry( + ctx context.Context, + fileURL string, + chunk [2]int, + config *models.DownloadConfig, +) ([]byte, error) { + var lastErr error + + for attempt := 0; attempt <= config.RetryAttempts; attempt++ { + if attempt > 0 { + // wait before retry + select { + case <-ctx.Done(): + return nil, ctx.Err() + case <-time.After(config.RetryDelay): + } + } + + data, err := downloadChunk(ctx, fileURL, chunk, config.Timeout) + if err == nil { + return data, nil + } + + lastErr = err + } + + return nil, fmt.Errorf("all %d attempts failed: %w", config.RetryAttempts+1, lastErr) +} + +func downloadChunk( + ctx context.Context, + fileURL string, + chunk [2]int, + timeout time.Duration, +) ([]byte, error) { + reqCtx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + + req, err := http.NewRequestWithContext(reqCtx, http.MethodGet, fileURL, nil) + if err != nil { + return nil, fmt.Errorf("failed to create request: %w", err) + } + req.Header.Add("Range", fmt.Sprintf("bytes=%d-%d", chunk[0], chunk[1])) + + session := GetHTTPSession() + resp, err := session.Do(req) + if err != nil { + return nil, fmt.Errorf("download failed: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusPartialContent && resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode) + } + + return io.ReadAll(resp.Body) +} + +func writeChunkToFile(file *os.File, data []byte, offset int) error { + _, err := file.WriteAt(data, int64(offset)) + return err +} + +func createChunks(fileSize int, chunkSize int) [][2]int { + if fileSize <= 0 { + return [][2]int{{0, 0}} + } + + numChunks := int(math.Ceil(float64(fileSize) / float64(chunkSize))) + chunks := make([][2]int, numChunks) + + for i := 0; i < numChunks; i++ { + start := i * chunkSize + end := start + chunkSize - 1 + if end >= fileSize { + end = fileSize - 1 + } + chunks[i] = [2]int{start, end} + } + + return chunks +} + +func DownloadSegments( + ctx context.Context, + segmentURLs []string, + config *models.DownloadConfig, +) ([]string, error) { + if config == nil { + config = DefaultConfig() + } + + tempDir := filepath.Join(config.DownloadDir, "segments_"+time.Now().Format("20060102_150405")) + if err := os.MkdirAll(tempDir, 0755); err != nil { + return nil, fmt.Errorf("failed to create temporary directory: %w", err) + } + + semaphore := make(chan struct{}, config.Concurrency) + var wg sync.WaitGroup + + errChan := make(chan error, len(segmentURLs)) + + downloadedFiles := make([]string, len(segmentURLs)) + + for i, segmentURL := range segmentURLs { + wg.Add(1) + go func(idx int, url string) { + defer wg.Done() + + // acquire semaphore slot + semaphore <- struct{}{} + defer func() { <-semaphore }() + + segmentFileName := fmt.Sprintf("segment_%05d", idx) + segmentPath := filepath.Join(tempDir, segmentFileName) + + _, err := DownloadFile(ctx, []string{url}, segmentFileName, &models.DownloadConfig{ + ChunkSize: config.ChunkSize, + Concurrency: 3, // segments are typically small + Timeout: config.Timeout, + DownloadDir: tempDir, + RetryAttempts: config.RetryAttempts, + RetryDelay: config.RetryDelay, + Remux: false, // don't remux individual segments + ProgressUpdater: nil, // no progress updates for individual segments + }) + + if err != nil { + errChan <- fmt.Errorf("failed to download segment %d: %w", idx, err) + return + } + + downloadedFiles[idx] = segmentPath + }(i, segmentURL) + } + + go func() { + wg.Wait() + close(errChan) + }() + + for err := range errChan { + if err != nil { + os.RemoveAll(tempDir) + return nil, err + } + } + + return downloadedFiles, nil +} + +func MergeSegmentFiles( + ctx context.Context, + segmentPaths []string, + outputFileName string, + config *models.DownloadConfig, +) (string, error) { + if config == nil { + config = DefaultConfig() + } + + if err := ensureDownloadDir(config.DownloadDir); err != nil { + return "", err + } + + outputPath := filepath.Join(config.DownloadDir, outputFileName) + outputFile, err := os.Create(outputPath) + if err != nil { + return "", fmt.Errorf("failed to create output file: %w", err) + } + defer outputFile.Close() + + var totalBytes int64 + var processedBytes int64 + + if config.ProgressUpdater != nil { + for _, segmentPath := range segmentPaths { + fileInfo, err := os.Stat(segmentPath) + if err == nil { + totalBytes += fileInfo.Size() + } + } + } + + for i, segmentPath := range segmentPaths { + select { + case <-ctx.Done(): + return "", ctx.Err() + default: + segmentFile, err := os.Open(segmentPath) + if err != nil { + return "", fmt.Errorf("failed to open segment %d: %w", i, err) + } + + written, err := io.Copy(outputFile, segmentFile) + segmentFile.Close() + + if err != nil { + return "", fmt.Errorf("failed to copy segment %d: %w", i, err) + } + + if config.ProgressUpdater != nil && totalBytes > 0 { + processedBytes += written + progress := float64(processedBytes) / float64(totalBytes) + config.ProgressUpdater(progress) + } + } + } + + if config.Remux { + err := av.RemuxFile(outputPath) + if err != nil { + return "", fmt.Errorf("remuxing failed: %w", err) + } + } + + return outputPath, nil +} diff --git a/util/errors.go b/util/errors.go new file mode 100644 index 0000000..819e5cb --- /dev/null +++ b/util/errors.go @@ -0,0 +1,23 @@ +package util + +type Error struct { + Message string +} + +func (err *Error) Error() string { + return err.Message +} + +var ( + ErrUnavailable = &Error{Message: "this content is unavailable"} + ErrNotImplemented = &Error{Message: "this feature is not implemented"} + ErrTimeout = &Error{Message: "timeout error when downloading. try again"} + ErrUnknownRIFF = &Error{Message: "uknown RIFF format"} + ErrUnsupportedImageFormat = &Error{Message: "unsupported image format"} + ErrFileTooShort = &Error{Message: "file too short"} + ErrDownloadFailed = &Error{Message: "download failed"} + ErrUnsupportedExtractorType = &Error{Message: "unsupported extractor type"} + ErrMediaGroupLimitExceeded = &Error{Message: "media group limit exceeded for this group. try changing /settings"} + ErrNSFWNotAllowed = &Error{Message: "this content is marked as nsfw and can't be downloaded in this group. try changing /settings or use me privately"} + ErrInlineMediaGroup = &Error{Message: "you can't download media groups in inline mode. try using me in a private chat"} +) diff --git a/util/http.go b/util/http.go new file mode 100644 index 0000000..fbfee9b --- /dev/null +++ b/util/http.go @@ -0,0 +1,14 @@ +package util + +import ( + "net/http" + "time" +) + +var httpSession = &http.Client{ + Timeout: 20 * time.Second, +} + +func GetHTTPSession() *http.Client { + return httpSession +} diff --git a/util/img.go b/util/img.go new file mode 100644 index 0000000..69db8b1 --- /dev/null +++ b/util/img.go @@ -0,0 +1,126 @@ +package util + +import ( + "bytes" + "fmt" + "image" + "image/jpeg" + "io" + "os" + + _ "image/gif" + _ "image/png" + + _ "github.com/strukturag/libheif/go/heif" + _ "golang.org/x/image/webp" +) + +var ( + jpegMagic = []byte{0xFF, 0xD8, 0xFF} + pngMagic = []byte{0x89, 0x50, 0x4E, 0x47} + gifMagic = []byte{0x47, 0x49, 0x46} + riffMagic = []byte{0x52, 0x49, 0x46, 0x46} + webpMagic = []byte{0x57, 0x45, 0x42, 0x50} +) + +func ImgToJPEG(file io.ReadSeeker, outputPath string) error { + format, err := DetectImageFormat(file) + if err != nil { + return fmt.Errorf("failed to detect image format: %w", err) + } + + outputFile, err := os.Create(outputPath) + if err != nil { + return fmt.Errorf("failed to create output file: %w", err) + } + defer outputFile.Close() + + if format == "jpeg" { + if _, err = file.Seek(0, io.SeekStart); err != nil { + return fmt.Errorf("failed to reset file position: %w", err) + } + + if _, err = io.Copy(outputFile, file); err != nil { + os.Remove(outputPath) + return fmt.Errorf("failed to copy image: %w", err) + } + return nil + } + + if _, err = file.Seek(0, io.SeekStart); err != nil { + return fmt.Errorf("failed to reset file position: %w", err) + } + + img, _, err := image.Decode(file) + if err != nil { + return fmt.Errorf("failed to decode image: %w", err) + } + + err = jpeg.Encode(outputFile, img, nil) + if err != nil { + os.Remove(outputPath) + return fmt.Errorf("failed to encode image: %w", err) + } + + return nil +} + +func DetectImageFormat(file io.ReadSeeker) (string, error) { + header := make([]byte, 12) + + _, err := file.Read(header) + if err != nil { + return "", fmt.Errorf("failed to read file header: %w", err) + } + if _, err = file.Seek(0, io.SeekStart); err != nil { + return "", fmt.Errorf("failed to reset file position: %w", err) + } + if len(header) < 12 { + return "", ErrFileTooShort + } + if bytes.HasPrefix(header, jpegMagic) { + return "jpeg", nil + } + + if bytes.HasPrefix(header, pngMagic) { + return "png", nil + } + + if bytes.HasPrefix(header, gifMagic) { + return "gif", nil + } + + if isHEIF(header) { + return "heif", nil + } + + if bytes.HasPrefix(header, riffMagic) { + if bytes.Equal(header[8:12], webpMagic) { + return "webp", nil + } + return "", ErrUnknownRIFF + } + + return "", ErrUnsupportedImageFormat +} + +func isHEIF(header []byte) bool { + if len(header) < 12 { + return false + } + isHeifHeader := header[0] == 0x00 && header[1] == 0x00 && + header[2] == 0x00 && (header[3] == 0x18 || header[3] == 0x1C) && + bytes.Equal(header[4:8], []byte("ftyp")) + if !isHeifHeader { + return false + } + heifBrands := []string{"heic", "heix", "mif1", "msf1"} + brand := string(header[8:12]) + + for _, b := range heifBrands { + if brand == b { + return true + } + } + return false +} diff --git a/util/misc.go b/util/misc.go new file mode 100644 index 0000000..86cdf94 --- /dev/null +++ b/util/misc.go @@ -0,0 +1,112 @@ +package util + +import ( + "fmt" + "net/http" + "os" + "os/exec" + "path/filepath" + "strings" + + "github.com/pkg/errors" + + "github.com/PaulSonOfLars/gotgbot/v2" + "github.com/aki237/nscjar" +) + +func GetLocationURL( + url string, + userAgent string, +) (string, error) { + req, err := http.NewRequest("GET", url, nil) + if err != nil { + return "", fmt.Errorf("failed to create request: %w", err) + } + if userAgent == "" { + userAgent = ChromeUA + } + req.Header.Set("User-Agent", ChromeUA) + session := GetHTTPSession() + resp, err := session.Do(req) + if err != nil { + return "", fmt.Errorf("failed to send request: %w", err) + } + defer resp.Body.Close() + return resp.Request.URL.String(), nil +} + +func IsUserAdmin( + bot *gotgbot.Bot, + chatID int64, + userID int64, +) bool { + chatMember, err := bot.GetChatMember(chatID, userID, nil) + if err != nil { + return false + } + if chatMember == nil { + return false + } + status := chatMember.GetStatus() + switch status { + case "creator": + return true + case "administrator": + if chatMember.MergeChatMember().CanChangeInfo { + return true + } + return false + } + return false +} + +func EscapeCaption(str string) string { + // we wont use html.EscapeString + // cuz it will escape all the characters + // and we only need to escape < and > + chars := map[string]string{ + "<": "<", + ">": ">", + } + for k, v := range chars { + str = strings.ReplaceAll(str, k, v) + } + return str +} + +func GetLastError(err error) error { + var lastErr error = err + for { + unwrapped := errors.Unwrap(lastErr) + if unwrapped == nil { + break + } + lastErr = unwrapped + } + return lastErr +} + +func ParseCookieFile(fileName string) ([]*http.Cookie, error) { + cookiePath := filepath.Join("cookies", fileName) + cookieFile, err := os.Open(cookiePath) + if err != nil { + return nil, fmt.Errorf("failed to open cookie file: %w", err) + } + defer cookieFile.Close() + + var parser nscjar.Parser + cookies, err := parser.Unmarshal(cookieFile) + if err != nil { + return nil, fmt.Errorf("failed to parse cookie file: %w", err) + } + return cookies, nil +} + +func FixURL(url string) string { + return strings.ReplaceAll(url, "&", "&") +} + +func CheckFFmpeg() bool { + _, err := exec.LookPath("ffmpeg") + return err == nil +} diff --git a/util/parser/m3u8.go b/util/parser/m3u8.go new file mode 100644 index 0000000..f42cc26 --- /dev/null +++ b/util/parser/m3u8.go @@ -0,0 +1,182 @@ +package parser + +import ( + "bytes" + "fmt" + "io" + "net/http" + "net/url" + "strings" + "time" + + "github.com/pkg/errors" + + "govd/enums" + "govd/models" + + "github.com/grafov/m3u8" +) + +var httpClient = &http.Client{ + Timeout: 30 * time.Second, +} + +func ParseM3U8Content( + content []byte, + baseURL string, +) ([]*models.MediaFormat, error) { + baseURLObj, err := url.Parse(baseURL) + if err != nil { + return nil, fmt.Errorf("invalid base url: %w", err) + } + + buf := bytes.NewBuffer(content) + playlist, listType, err := m3u8.DecodeFrom(buf, true) + if err != nil { + return nil, fmt.Errorf("failed parsing m3u8: %w", err) + } + + var formats []*models.MediaFormat + + if listType == m3u8.MASTER { + masterpl := playlist.(*m3u8.MasterPlaylist) + + for _, variant := range masterpl.Variants { + if variant == nil || variant.URI == "" { + continue + } + + width, height := int64(0), int64(0) + if variant.Resolution != "" { + var w, h int + if _, err := fmt.Sscanf(variant.Resolution, "%dx%d", &w, &h); err == nil { + width, height = int64(w), int64(h) + } + } + + format := &models.MediaFormat{ + Type: enums.MediaTypeVideo, + FormatID: fmt.Sprintf("hls-%d", variant.Bandwidth/1000), + VideoCodec: getCodecFromCodecs(variant.Codecs), + AudioCodec: getAudioCodecFromCodecs(variant.Codecs), + Bitrate: int64(variant.Bandwidth), + Width: width, + Height: height, + } + + variantURL := resolveURL(baseURLObj, variant.URI) + format.URL = []string{variantURL} + + variantContent, err := fetchContent(variantURL) + if err == nil { + variantFormats, err := ParseM3U8Content(variantContent, variantURL) + if err == nil && len(variantFormats) > 0 { + format.Segments = variantFormats[0].Segments + if variantFormats[0].Duration > 0 { + format.Duration = variantFormats[0].Duration + } + } + } + + formats = append(formats, format) + } + + return formats, nil + } + + if listType == m3u8.MEDIA { + mediapl := playlist.(*m3u8.MediaPlaylist) + + var segments []string + var totalDuration float64 + + for _, segment := range mediapl.Segments { + if segment != nil && segment.URI != "" { + segmentURL := segment.URI + if !strings.HasPrefix(segmentURL, "http://") && !strings.HasPrefix(segmentURL, "https://") { + segmentURL = resolveURL(baseURLObj, segmentURL) + } + + segments = append(segments, segmentURL) + totalDuration += segment.Duration + } + } + + format := &models.MediaFormat{ + Type: enums.MediaTypeVideo, + FormatID: "hls", + VideoCodec: enums.MediaCodecAVC, + AudioCodec: enums.MediaCodecAAC, + Duration: int64(totalDuration), + URL: []string{baseURL}, + Segments: segments, + } + + return []*models.MediaFormat{format}, nil + } + + return nil, errors.New("unsupported m3u8 playlist type") +} + +func ParseM3U8FromURL(url string) ([]*models.MediaFormat, error) { + body, err := fetchContent(url) + if err != nil { + return nil, fmt.Errorf("failed to fetch m3u8 content: %w", err) + } + return ParseM3U8Content(body, url) +} + +func fetchContent(url string) ([]byte, error) { + resp, err := httpClient.Get(url) + if err != nil { + return nil, fmt.Errorf("failed to fetch content: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("server returned status code: %d", resp.StatusCode) + } + + return io.ReadAll(resp.Body) +} + +func getCodecFromCodecs(codecs string) enums.MediaCodec { + if strings.Contains(codecs, "avc") || strings.Contains(codecs, "h264") { + return enums.MediaCodecAVC + } else if strings.Contains(codecs, "hvc") || strings.Contains(codecs, "h265") { + return enums.MediaCodecHEVC + } else if strings.Contains(codecs, "av01") { + return enums.MediaCodecAV1 + } else if strings.Contains(codecs, "vp9") { + return enums.MediaCodecVP9 + } else if strings.Contains(codecs, "vp8") { + return enums.MediaCodecVP8 + } + return enums.MediaCodecAVC +} + +func getAudioCodecFromCodecs(codecs string) enums.MediaCodec { + if strings.Contains(codecs, "mp4a") { + return enums.MediaCodecAAC + } else if strings.Contains(codecs, "opus") { + return enums.MediaCodecOpus + } else if strings.Contains(codecs, "mp3") { + return enums.MediaCodecMP3 + } else if strings.Contains(codecs, "flac") { + return enums.MediaCodecFLAC + } else if strings.Contains(codecs, "vorbis") { + return enums.MediaCodecVorbis + } + return enums.MediaCodecAAC +} + +func resolveURL(base *url.URL, uri string) string { + if strings.HasPrefix(uri, "http://") || strings.HasPrefix(uri, "https://") { + return uri + } + ref, err := url.Parse(uri) + if err != nil { + return uri + } + return base.ResolveReference(ref).String() +} diff --git a/util/parser/mpd.go b/util/parser/mpd.go new file mode 100644 index 0000000..0bfe2c2 --- /dev/null +++ b/util/parser/mpd.go @@ -0,0 +1 @@ +package parser