Compare commits

...

10 commits

Author SHA1 Message Date
stefanodvx
c8d0666d1d fixes (desc)
Some checks failed
telegram message / notify (push) Has been cancelled
- m3u8 parser
- segments download
- video-audio merge
2025-04-23 01:44:25 +02:00
stefanodvx
330cc39583 trying to fix segmented download 2025-04-22 13:00:53 +02:00
HappyLoLTroll
1f618201f9
fix docker build (#7) 2025-04-22 12:14:39 +02:00
stefanodvx
4b1c801370 fixed typo 2025-04-22 12:12:42 +02:00
stefanodvx
c0c2e42436 added new instance 2025-04-22 12:11:20 +02:00
stefanodvx
34827fe852 reddit: prevent infinite loop on error 2025-04-22 12:06:13 +02:00
stefanodvx
7e69320d2c db: connection retry logic 2025-04-22 00:22:27 +02:00
stefanodvx
3e307658fa instagram: fixes share urls 2025-04-21 22:30:52 +02:00
stefanodvx
34219a848e instagram: fixes 2025-04-20 13:17:30 +02:00
stefanodvx
7dab9207b7 Update README.md 2025-04-20 13:10:17 +02:00
17 changed files with 502 additions and 294 deletions

View file

@ -1,19 +1,70 @@
FROM golang:alpine
FROM golang:bookworm
RUN apk update && \
apk upgrade && \
apk add --no-cache --repository=https://dl-cdn.alpinelinux.org/alpine/edge/community \
ffmpeg \
libheif \
libheif-dev \
ARG FFMPEG_VERSION=7.1
ARG LIBHEIF_VERSION=1.19.7
RUN apt-get update && \
apt-get upgrade -y && \
apt-get install -y --no-install-recommends \
bash \
git \
pkgconfig \
build-base
pkg-config \
build-essential \
tar \
wget \
xz-utils \
gcc \
cmake \
libde265-dev && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# libheif
ENV LIBHEIF_BUILD="https://github.com/strukturag/libheif/releases/download/v${LIBHEIF_VERSION}/libheif-${LIBHEIF_VERSION}.tar.gz"
RUN wget -O libheif.tar.gz ${LIBHEIF_BUILD} && \
mkdir -p libheif && \
tar -xzvf libheif.tar.gz -C libheif --strip-components=1 && \
rm libheif.tar.gz && \
cd libheif && \
mkdir build && \
cd build && \
cmake --preset=release .. && \
make && \
make install
# ffmpeg
RUN mkdir -p \
/usr/local/bin \
/usr/local/lib/pkgconfig/ \
/usr/local/lib/ \
/usr/local/include
RUN ARCH=$(uname -m) && \
if [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then \
echo "detected ARM architecture" && \
export FFMPEG_BUILD="https://github.com/BtbN/FFmpeg-Builds/releases/download/latest/ffmpeg-n${FFMPEG_VERSION}-latest-linuxarm64-gpl-shared-${FFMPEG_VERSION}.tar.xz"; \
else \
echo "detected x86_64 architecture" && \
export FFMPEG_BUILD="https://github.com/BtbN/FFmpeg-Builds/releases/download/latest/ffmpeg-n${FFMPEG_VERSION}-latest-linux64-gpl-shared-${FFMPEG_VERSION}.tar.xz"; \
fi && \
wget -O ffmpeg.tar.xz ${FFMPEG_BUILD} && \
mkdir -p ffmpeg && \
tar -xf ffmpeg.tar.xz -C ffmpeg --strip-components=1 && \
rm ffmpeg.tar.xz && \
cp -rv ffmpeg/bin/* /usr/local/bin/ && \
cp -rv ffmpeg/lib/* /usr/local/lib/ && \
cp -rv ffmpeg/include/* /usr/local/include/ && \
cp -rv ffmpeg/lib/pkgconfig/* /usr/local/lib/pkgconfig/ && \
ldconfig /usr/local
# env for building
ENV CGO_CFLAGS="-I/usr/local/include"
ENV CGO_LDFLAGS="-L/usr/local/lib"
ENV PKG_CONFIG_PATH="/usr/local/lib/pkgconfig"
WORKDIR /bot
RUN mkdir downloads
RUN mkdir -p downloads
COPY . .

View file

@ -27,7 +27,7 @@ this project was born after the discontinuation of a highly popular bot known as
# installation
## build
_this method only works on linux and macos, if you want to build the bot on windows, check [docker installation](#installation-with-docker) instead._
_this method only works on linux and macos, if you want to build the bot on windows, check [docker installation](#docker-recommended) instead._
1. clone the repository
```bash
@ -46,8 +46,6 @@ _this method only works on linux and macos, if you want to build the bot on wind
```
## docker (recommended)
> [!WARNING]
> this method is currently not working due to a wrong version of the libav (ffmpeg) library in the docker image. feel free to open a PR if you can fix it.
1. build the image using the dockerfile:
@ -71,31 +69,33 @@ _this method only works on linux and macos, if you want to build the bot on wind
```
# options
| variable | description | default |
|------------------------|----------------------------------------------|---------------------------------------|
| DB_HOST | database host | localhost |
| DB_PORT | database port | 3306 |
| DB_NAME | database name | govd |
| DB_USER | database user | govd |
| DB_PASSWORD | database password | password |
| BOT_API_URL | telegram bot api url | https://api.telegram.org |
| BOT_TOKEN | telegram bot token | 12345678:ABC-DEF1234ghIkl-zyx57W2P0s |
| CONCURRENT_UPDATES | max concurrent updates handled | 50 |
| LOG_DISPATCHER_ERRORS | log dispatcher errors | 0 |
| DOWNLOADS_DIR | directory for downloaded files | downloads |
| HTTP_PROXY [(?)](#proxying) | http proxy (optional) | |
| HTTPS_PROXY [(?)](#proxying) | https proxy (optional) | |
| NO_PROXY [(?)](#proxying) | no proxy domains (optional) | |
| EDGE_PROXY_URL [(?)](#proxying) | url of your edge proxy (optional) | |
| REPO_URL | project repository url | https://github.com/govdbot/govd |
| PROFILER_PORT | port for profiler http server (pprof) | 0 _(disabled)_ |
| variable | description | default |
|-------------------------------|----------------------------------------------|---------------------------------------|
| DB_HOST | database host | localhost |
| DB_PORT | database port | 3306 |
| DB_NAME | database name | govd |
| DB_USER | database user | govd |
| DB_PASSWORD | database password | password |
| BOT_API_URL | telegram bot api url | https://api.telegram.org |
| BOT_TOKEN | telegram bot token | 12345678:ABC-DEF1234ghIkl-zyx57W2P0s |
| CONCURRENT_UPDATES | max concurrent updates handled | 50 |
| LOG_DISPATCHER_ERRORS | log dispatcher errors | 0 |
| DOWNLOADS_DIR | directory for downloaded files | downloads |
| HTTP_PROXY [(?)](#proxying) | http proxy (optional) | |
| HTTPS_PROXY [(?)](#proxying) | https proxy (optional) | |
| NO_PROXY [(?)](#proxying) | no proxy domains (optional) | |
| REPO_URL | project repository url | https://github.com/govdbot/govd |
| PROFILER_PORT | port for profiler http server (pprof) | 0 _(disabled)_ |
**note:** to avoid limits on files, you should host your own telegram botapi. public bot instance is currently running under a botapi fork, [tdlight-telegram-bot-api](https://github.com/tdlight-team/tdlight-telegram-bot-api), but you can use the official botapi client too.
you can configure specific extractors options with `ext-cfg.yaml` file. documentation is not available yet, but you can check the source code for more information.
> [!IMPORTANT]
> to avoid limits on files, you should host your own telegram botapi and set `BOT_API_URL` variable according. public bot instance is currently running under a botapi fork, [tdlight-telegram-bot-api](https://github.com/tdlight-team/tdlight-telegram-bot-api), but you can use the official botapi client too.
# proxying
there are two types of proxying available: http and edge.
- **http proxy**: this is a standard http proxy that can be used to route requests through a proxy server. you can set the `HTTP_PROXY` and `HTTPS_PROXY` environment variables to use this feature. (SOCKS5 is supported too)
- **edge proxy**: this is a custom proxy that is used to route requests through a specific url. you can set the `EDGE_PROXY_URL` environment variable to use this feature. this is useful for routing requests through a specific server or service. however, this feature is not totally implemented yet.
- **edge proxy**: this is a custom proxy that is used to route requests through a specific url. currenrly, you can only set this proxy with `ext-cfg.yaml` file. this is useful for routing requests through a specific server or service. however, this feature is not totally implemented yet.
**note:** by settings `NO_PROXY` environment variable, you can specify domains that should not be proxied.

View file

@ -21,6 +21,7 @@ func getInstanceMessage() string {
"public instances\n" +
"- @govd_bot | main official instance\n" +
"- @govd_pingu_bot | pingu instance\n" +
"- @sbrugnadlbot | sbrugna instance\n" +
"\nwant to add your own instance? reach us on @govdsupport"
}

View file

@ -16,27 +16,7 @@ import (
var DB *gorm.DB
func Start() {
host := os.Getenv("DB_HOST")
port := os.Getenv("DB_PORT")
user := os.Getenv("DB_USER")
password := os.Getenv("DB_PASSWORD")
dbname := os.Getenv("DB_NAME")
connectionString := fmt.Sprintf(
"%s:%s@tcp(%s:%s)/%s?charset=utf8mb4&parseTime=True",
user, password, host, port, dbname,
)
db, err := gorm.Open(mysql.Open(connectionString), &gorm.Config{
Logger: logger.Default.LogMode(logger.Silent),
NowFunc: func() time.Time {
utc, _ := time.LoadLocation("Europe/Rome")
return time.Now().In(utc)
},
})
if err != nil {
log.Fatalf("failed to connect to database: %v", err)
}
DB = db
DB = connect()
sqlDB, err := DB.DB()
if err != nil {
log.Fatalf("failed to get database connection: %v", err)
@ -54,6 +34,47 @@ func Start() {
}
}
func connect() *gorm.DB {
host := os.Getenv("DB_HOST")
port := os.Getenv("DB_PORT")
user := os.Getenv("DB_USER")
password := os.Getenv("DB_PASSWORD")
dbname := os.Getenv("DB_NAME")
connectionString := fmt.Sprintf(
"%s:%s@tcp(%s:%s)/%s?charset=utf8mb4&parseTime=True",
user, password, host, port, dbname,
)
var conn *gorm.DB
var err error
maxRetries := 10
retryCount := 0
for retryCount < maxRetries {
conn, err = gorm.Open(mysql.Open(connectionString), &gorm.Config{
Logger: logger.Default.LogMode(logger.Silent),
NowFunc: func() time.Time {
utc, _ := time.LoadLocation("Europe/Rome")
return time.Now().In(utc)
},
})
if err == nil {
break
}
retryCount++
log.Printf("failed to connect to database (attempt %d/%d)", retryCount, maxRetries)
if retryCount < maxRetries {
time.Sleep(2 * time.Second)
}
}
if err != nil {
log.Fatalf("failed to connect to database after %d attempts: %v", maxRetries, err)
}
return conn
}
func migrateDatabase() error {
err := DB.AutoMigrate(
&models.Media{},

View file

@ -1,22 +1,24 @@
services:
govd-bot:
image: govd-bot
container_name: govd-bot
restart: unless-stopped
networks:
- govd-network
env_file:
- .env
depends_on:
- db
- db
db:
image: mysql
image: mariadb:latest
container_name: mariadb
restart: unless-stopped
environment:
MYSQL_DATABASE: govd
MYSQL_USER: govd
MYSQL_PASSWORD: password
MYSQL_ROOT_PASSWORD: example
MARIADB_DATABASE: govd
MARIADB_USER: govd
MARIADB_PASSWORD: password
MARIADB_ROOT_PASSWORD: example
networks:
- govd-network

View file

@ -74,22 +74,16 @@ var ShareURLExtractor = &models.Extractor{
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
client := util.GetHTTPClient(ctx.Extractor.CodeName)
req, err := http.NewRequest(
http.MethodGet,
redirectURL, err := util.GetLocationURL(
client,
ctx.MatchedContentURL,
nil,
igHeaders,
)
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
return nil, fmt.Errorf("failed to get url location: %w", err)
}
resp, err := client.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err)
}
defer resp.Body.Close()
return &models.ExtractorResponse{
URL: resp.Request.URL.String(),
URL: redirectURL,
}, nil
},
}

View file

@ -18,8 +18,26 @@ import (
"github.com/bytedance/sonic"
)
var captionPattern = regexp.MustCompile(
`(?s)<meta property="og:title" content=".*?: &quot;(.*?)&quot;"`,
var (
captionPattern = regexp.MustCompile(
`(?s)<meta property="og:title" content=".*?: &quot;(.*?)&quot;"`)
igHeaders = map[string]string{
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"Accept-Language": "en-GB,en;q=0.9",
"Cache-Control": "max-age=0",
"Dnt": "1",
"Priority": "u=0, i",
"Sec-Ch-Ua": `Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99`,
"Sec-Ch-Ua-Mobile": "?0",
"Sec-Ch-Ua-Platform": "macOS",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "none",
"Sec-Fetch-User": "?1",
"Upgrade-Insecure-Requests": "1",
"User-Agent": util.ChromeUA,
}
)
func BuildSignedPayload(contentURL string) (io.Reader, error) {

View file

@ -46,8 +46,9 @@ var ShortExtractor = &models.Extractor{
IsRedirect: true,
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
client := util.GetHTTPClient(ctx.Extractor.CodeName)
shortURL := fmt.Sprintf(shortenerAPIFormat, ctx.MatchedContentID)
location, err := util.GetLocationURL(shortURL, "")
location, err := util.GetLocationURL(client, shortURL, nil)
if err != nil {
return nil, fmt.Errorf("failed to get real url: %w", err)
}

View file

@ -88,7 +88,7 @@ func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
contentID := ctx.MatchedContentID
contentURL := ctx.MatchedContentURL
manifest, err := GetRedditData(client, host, slug)
manifest, err := GetRedditData(client, host, slug, false)
if err != nil {
return nil, err
}
@ -100,6 +100,7 @@ func MediaListFromAPI(ctx *models.DownloadContext) ([]*models.Media, error) {
data := manifest[0].Data.Children[0].Data
title := data.Title
isNsfw := data.Over18
var mediaList []*models.Media
if !data.IsVideo {
@ -228,6 +229,7 @@ func GetRedditData(
client models.HTTPClient,
host string,
slug string,
raise bool,
) (RedditResponse, error) {
url := fmt.Sprintf("https://%s/%s/.json", host, slug)
@ -252,13 +254,16 @@ func GetRedditData(
defer res.Body.Close()
if res.StatusCode != http.StatusOK {
if raise {
return nil, fmt.Errorf("failed to get reddit data: %s", res.Status)
}
// try with alternative domain
altHost := "old.reddit.com"
if host == "old.reddit.com" {
altHost = "www.reddit.com"
}
return GetRedditData(client, altHost, slug)
return GetRedditData(client, altHost, slug, true)
}
var response RedditResponse

View file

@ -1,13 +1,19 @@
package reddit
type RedditResponse []struct {
Data struct {
Children []struct {
Data PostData `json:"data"`
} `json:"children"`
} `json:"data"`
type Child struct {
Data *PostData `json:"data"`
}
type Data struct {
Children []*Child `json:"children"`
}
type ResponseItem struct {
Data *Data `json:"data"`
}
type RedditResponse []*ResponseItem
type PostData struct {
ID string `json:"id"`
Title string `json:"title"`

View file

@ -46,12 +46,13 @@ var VMExtractor = &models.Extractor{
IsRedirect: true,
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
location, err := util.GetLocationURL(ctx.MatchedContentURL, "")
client := util.GetHTTPClient(ctx.Extractor.CodeName)
redirectURL, err := util.GetLocationURL(client, ctx.MatchedContentURL, nil)
if err != nil {
return nil, fmt.Errorf("failed to get url location: %w", err)
}
return &models.ExtractorResponse{
URL: location,
URL: redirectURL,
}, nil
},
}

View file

@ -20,10 +20,22 @@ func MergeAudio(media *models.DownloadedMedia) error {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
audioFile, err := util.DownloadFile(
ctx, audioFormat.URL,
audioFormat.GetFileName(), nil,
)
var audioFile string
var err error
if len(audioFormat.Segments) == 0 {
audioFile, err = util.DownloadFile(
ctx, audioFormat.URL,
audioFormat.GetFileName(),
nil,
)
} else {
audioFile, err = util.DownloadFileWithSegments(
ctx, audioFormat.Segments,
audioFormat.GetFileName(),
nil,
)
}
if err != nil {
return fmt.Errorf("failed to download audio file: %w", err)
}

View file

@ -18,7 +18,6 @@ func MergeVideoWithAudio(
if err != nil {
return fmt.Errorf("failed to rename file: %w", err)
}
defer os.Remove(tempFileName)
defer os.Remove(audioFile)
@ -39,6 +38,7 @@ func MergeVideoWithAudio(
Run()
if err != nil {
os.Remove(outputFile)
return fmt.Errorf("failed to merge files: %w", err)
}

46
util/av/merge_segments.go Normal file
View file

@ -0,0 +1,46 @@
package av
import (
"fmt"
"os"
ffmpeg "github.com/u2takey/ffmpeg-go"
)
func MergeSegments(
segmentPaths []string,
outputPath string,
) (string, error) {
if len(segmentPaths) == 0 {
return "", fmt.Errorf("no segments to merge")
}
listFilePath := outputPath + ".segments.txt"
listFile, err := os.Create(listFilePath)
if err != nil {
return "", fmt.Errorf("failed to create segment list file: %w", err)
}
defer listFile.Close()
defer os.Remove(listFilePath)
for _, segmentPath := range segmentPaths {
fmt.Fprintf(listFile, "file '%s'\n", segmentPath)
}
err = ffmpeg.
Input(listFilePath, ffmpeg.KwArgs{
"f": "concat",
"safe": "0",
"protocol_whitelist": "file,pipe",
}).
Output(outputPath, ffmpeg.KwArgs{
"c": "copy",
"movflags": "+faststart",
}).
Silent(true).
OverWriteOutput().
Run()
if err != nil {
os.Remove(outputPath)
return "", fmt.Errorf("failed to merge segments: %w", err)
}
return outputPath, nil
}

View file

@ -1,7 +1,6 @@
package util
import (
"bufio"
"bytes"
"context"
"fmt"
@ -102,14 +101,12 @@ func DownloadFileWithSegments(
return "", fmt.Errorf("failed to create temporary directory: %w", err)
}
defer os.RemoveAll(tempDir)
downloadedFiles, err := downloadSegments(ctx, segmentURLs, config)
downloadedFiles, err := downloadSegments(ctx, tempDir, segmentURLs, config)
if err != nil {
os.RemoveAll(tempDir)
return "", fmt.Errorf("failed to download segments: %w", err)
}
mergedFilePath, err := MergeSegmentFiles(ctx, downloadedFiles, fileName, config)
mergedFilePath, err := av.MergeSegments(downloadedFiles, fileName)
if err != nil {
os.RemoveAll(tempDir)
return "", fmt.Errorf("failed to merge segments: %w", err)
@ -406,6 +403,39 @@ func downloadChunkWithRetry(
return nil, fmt.Errorf("all %d attempts failed: %w", config.RetryAttempts+1, lastErr)
}
func downloadFile(
ctx context.Context,
fileURL string,
filePath string,
timeout time.Duration,
) (string, error) {
reqCtx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()
req, err := http.NewRequestWithContext(reqCtx, http.MethodGet, fileURL, nil)
if err != nil {
return "", fmt.Errorf("failed to create request: %w", err)
}
resp, err := downloadHTTPSession.Do(req)
if err != nil {
return "", fmt.Errorf("failed to download file: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("unexpected status code: %d", resp.StatusCode)
}
file, err := os.Create(filePath)
if err != nil {
return "", fmt.Errorf("failed to create file: %w", err)
}
defer file.Close()
_, err = io.Copy(file, resp.Body)
if err != nil {
return "", fmt.Errorf("failed to write file: %w", err)
}
return filePath, nil
}
func downloadChunk(
ctx context.Context,
fileURL string,
@ -472,6 +502,7 @@ func createChunks(fileSize int, chunkSize int) [][2]int {
func downloadSegments(
ctx context.Context,
path string,
segmentURLs []string,
config *models.DownloadConfig,
) ([]string, error) {
@ -479,20 +510,9 @@ func downloadSegments(
config = DefaultConfig()
}
tempDir := filepath.Join(
config.DownloadDir,
"segments"+uuid.NewString(),
)
if err := os.MkdirAll(tempDir, 0755); err != nil {
return nil, fmt.Errorf("failed to create temporary directory: %w", err)
}
defer os.RemoveAll(tempDir)
semaphore := make(chan struct{}, config.Concurrency)
var wg sync.WaitGroup
errChan := make(chan error, len(segmentURLs))
var firstErr atomic.Value
downloadedFiles := make([]string, len(segmentURLs))
@ -526,120 +546,38 @@ func downloadSegments(
defer func() { <-semaphore }()
segmentFileName := fmt.Sprintf("segment_%05d", idx)
segmentPath := filepath.Join(tempDir, segmentFileName)
segmentPath := filepath.Join(path, segmentFileName)
_, err := DownloadFile(ctx, []string{url}, segmentFileName, &models.DownloadConfig{
ChunkSize: config.ChunkSize,
Concurrency: 3, // segments are typically small
Timeout: config.Timeout,
DownloadDir: tempDir,
RetryAttempts: config.RetryAttempts,
RetryDelay: config.RetryDelay,
Remux: false, // don't remux individual segments
ProgressUpdater: nil, // no progress updates for individual segments
})
filePath, err := downloadFile(
ctx, url, segmentPath,
config.Timeout,
)
if err != nil {
if firstErr.Load() == nil {
firstErr.Store(fmt.Errorf("failed to download segment %d: %w", idx, err))
cancelDownload() // Cancella tutte le altre download
cancelDownload()
}
return
}
downloadedFiles[idx] = segmentPath
downloadedFiles[idx] = filePath
}(i, segmentURL)
}
wg.Wait()
go func() {
wg.Wait()
close(errChan)
}()
if err := firstErr.Load(); err != nil {
return nil, err.(error)
}
for i, file := range downloadedFiles {
if file == "" {
return nil, fmt.Errorf("segment %d was not downloaded", i)
}
if _, err := os.Stat(file); os.IsNotExist(err) {
return nil, fmt.Errorf("segment %d file does not exist: %w", i, err)
}
}
return downloadedFiles, nil
}
func MergeSegmentFiles(
ctx context.Context,
segmentPaths []string,
outputFileName string,
config *models.DownloadConfig,
) (string, error) {
if config == nil {
config = DefaultConfig()
}
if err := EnsureDownloadDir(config.DownloadDir); err != nil {
return "", err
}
outputPath := filepath.Join(config.DownloadDir, outputFileName)
outputFile, err := os.Create(outputPath)
if err != nil {
return "", fmt.Errorf("failed to create output file: %w", err)
}
defer func() {
outputFile.Close()
if err != nil {
os.Remove(outputPath)
}
}()
bufferedWriter := bufio.NewWriterSize(
outputFile,
1024*1024,
) // 1MB buffer
var totalBytes int64
var processedBytes int64
if config.ProgressUpdater != nil {
for _, segmentPath := range segmentPaths {
fileInfo, err := os.Stat(segmentPath)
if err == nil {
totalBytes += fileInfo.Size()
}
}
}
for i, segmentPath := range segmentPaths {
select {
case <-ctx.Done():
bufferedWriter.Flush()
outputFile.Close()
os.Remove(outputPath)
return "", ctx.Err()
default:
segmentFile, err := os.Open(segmentPath)
if err != nil {
return "", fmt.Errorf("failed to open segment %d: %w", i, err)
}
written, err := io.Copy(bufferedWriter, segmentFile)
segmentFile.Close()
if err != nil {
return "", fmt.Errorf("failed to copy segment %d: %w", i, err)
}
if config.ProgressUpdater != nil && totalBytes > 0 {
processedBytes += written
progress := float64(processedBytes) / float64(totalBytes)
config.ProgressUpdater(progress)
}
}
}
if err := bufferedWriter.Flush(); err != nil {
return "", fmt.Errorf("failed to flush data: %w", err)
}
if config.Remux {
err := av.RemuxFile(outputPath)
if err != nil {
return "", fmt.Errorf("remuxing failed: %w", err)
}
}
return outputPath, nil
}

View file

@ -2,6 +2,7 @@ package util
import (
"fmt"
"govd/models"
"net/http"
"os"
"os/exec"
@ -18,19 +19,24 @@ import (
var cookiesCache = make(map[string][]*http.Cookie)
func GetLocationURL(
client models.HTTPClient,
url string,
userAgent string,
headers map[string]string,
) (string, error) {
if client == nil {
client = GetDefaultHTTPClient()
}
req, err := http.NewRequest(http.MethodGet, url, nil)
if err != nil {
return "", fmt.Errorf("failed to create request: %w", err)
}
if userAgent == "" {
userAgent = ChromeUA
for k, v := range headers {
req.Header.Set(k, v)
}
req.Header.Set("User-Agent", userAgent)
session := GetDefaultHTTPClient()
resp, err := session.Do(req)
if req.Header.Get("User-Agent") == "" {
req.Header.Set("User-Agent", ChromeUA)
}
resp, err := client.Do(req)
if err != nil {
return "", fmt.Errorf("failed to send request: %w", err)
}

View file

@ -36,88 +36,166 @@ func ParseM3U8Content(
return nil, fmt.Errorf("failed parsing m3u8: %w", err)
}
var formats []*models.MediaFormat
if listType == m3u8.MASTER {
masterpl := playlist.(*m3u8.MasterPlaylist)
for _, variant := range masterpl.Variants {
if variant == nil || variant.URI == "" {
continue
}
width, height := int64(0), int64(0)
if variant.Resolution != "" {
var w, h int
if _, err := fmt.Sscanf(variant.Resolution, "%dx%d", &w, &h); err == nil {
width, height = int64(w), int64(h)
}
}
format := &models.MediaFormat{
Type: enums.MediaTypeVideo,
FormatID: fmt.Sprintf("hls-%d", variant.Bandwidth/1000),
VideoCodec: getCodecFromCodecs(variant.Codecs),
AudioCodec: getAudioCodecFromCodecs(variant.Codecs),
Bitrate: int64(variant.Bandwidth),
Width: width,
Height: height,
}
variantURL := resolveURL(baseURLObj, variant.URI)
format.URL = []string{variantURL}
variantContent, err := fetchContent(variantURL)
if err == nil {
variantFormats, err := ParseM3U8Content(variantContent, variantURL)
if err == nil && len(variantFormats) > 0 {
format.Segments = variantFormats[0].Segments
if variantFormats[0].Duration > 0 {
format.Duration = variantFormats[0].Duration
}
}
}
formats = append(formats, format)
}
return formats, nil
}
if listType == m3u8.MEDIA {
mediapl := playlist.(*m3u8.MediaPlaylist)
var segments []string
var totalDuration float64
for _, segment := range mediapl.Segments {
if segment != nil && segment.URI != "" {
segmentURL := segment.URI
if !strings.HasPrefix(segmentURL, "http://") && !strings.HasPrefix(segmentURL, "https://") {
segmentURL = resolveURL(baseURLObj, segmentURL)
}
segments = append(segments, segmentURL)
totalDuration += segment.Duration
}
}
format := &models.MediaFormat{
Type: enums.MediaTypeVideo,
FormatID: "hls",
VideoCodec: enums.MediaCodecAVC,
AudioCodec: enums.MediaCodecAAC,
Duration: int64(totalDuration),
URL: []string{baseURL},
Segments: segments,
}
return []*models.MediaFormat{format}, nil
switch listType {
case m3u8.MASTER:
return parseMasterPlaylist(
playlist.(*m3u8.MasterPlaylist),
baseURLObj,
)
case m3u8.MEDIA:
return parseMediaPlaylist(
playlist.(*m3u8.MediaPlaylist),
baseURLObj,
)
}
return nil, errors.New("unsupported m3u8 playlist type")
}
func parseMasterPlaylist(
playlist *m3u8.MasterPlaylist,
baseURL *url.URL,
) ([]*models.MediaFormat, error) {
var formats []*models.MediaFormat
seenAlternatives := make(map[string]bool)
for _, variant := range playlist.Variants {
if variant == nil || variant.URI == "" {
continue
}
for _, alt := range variant.Alternatives {
if _, ok := seenAlternatives[alt.GroupId]; ok {
continue
}
seenAlternatives[alt.GroupId] = true
format := parseAlternative(
playlist.Variants,
alt, baseURL,
)
if format == nil {
continue
}
formats = append(formats, format)
}
width, height := getResolution(variant.Resolution)
mediaType, videoCodec, audioCodec := parseVariantType(variant)
variantURL := resolveURL(baseURL, variant.URI)
if variant.Audio != "" {
audioCodec = ""
}
format := &models.MediaFormat{
FormatID: fmt.Sprintf("hls-%d", variant.Bandwidth/1000),
Type: mediaType,
VideoCodec: videoCodec,
AudioCodec: audioCodec,
Bitrate: int64(variant.Bandwidth),
Width: int64(width),
Height: int64(height),
URL: []string{variantURL},
}
variantContent, err := fetchContent(variantURL)
if err == nil {
variantFormats, err := ParseM3U8Content(variantContent, variantURL)
if err == nil && len(variantFormats) > 0 {
format.Segments = variantFormats[0].Segments
if variantFormats[0].Duration > 0 {
format.Duration = variantFormats[0].Duration
}
}
}
formats = append(formats, format)
}
return formats, nil
}
func parseMediaPlaylist(
playlist *m3u8.MediaPlaylist,
baseURL *url.URL,
) ([]*models.MediaFormat, error) {
var segments []string
var totalDuration float64
initSegment := playlist.Map
if initSegment != nil && initSegment.URI != "" {
initSegmentURL := resolveURL(baseURL, initSegment.URI)
segments = append(segments, initSegmentURL)
}
for _, segment := range playlist.Segments {
if segment != nil && segment.URI != "" {
segmentURL := resolveURL(baseURL, segment.URI)
segments = append(segments, segmentURL)
totalDuration += segment.Duration
if segment.Limit > 0 {
// byterange not supported
break
}
}
}
format := &models.MediaFormat{
FormatID: "hls",
Duration: int64(totalDuration),
URL: []string{baseURL.String()},
Segments: segments,
}
return []*models.MediaFormat{format}, nil
}
func parseAlternative(
variants []*m3u8.Variant,
alternative *m3u8.Alternative,
baseURL *url.URL,
) *models.MediaFormat {
if alternative == nil || alternative.URI == "" {
return nil
}
if alternative.Type != "AUDIO" {
return nil
}
altURL := resolveURL(baseURL, alternative.URI)
audioCodec := getAudioAlternativeCodec(variants, alternative)
format := &models.MediaFormat{
FormatID: fmt.Sprintf("hls-%s", alternative.GroupId),
Type: enums.MediaTypeAudio,
AudioCodec: audioCodec,
URL: []string{altURL},
}
altContent, err := fetchContent(altURL)
if err == nil {
altFormats, err := ParseM3U8Content(altContent, altURL)
if err == nil && len(altFormats) > 0 {
format.Segments = altFormats[0].Segments
if altFormats[0].Duration > 0 {
format.Duration = altFormats[0].Duration
}
}
}
return format
}
func getAudioAlternativeCodec(
variants []*m3u8.Variant,
alt *m3u8.Alternative,
) enums.MediaCodec {
if alt == nil || alt.URI == "" {
return ""
}
if alt.Type != "AUDIO" {
return ""
}
for _, variant := range variants {
if variant == nil || variant.URI == "" {
continue
}
if variant.Audio != alt.GroupId {
continue
}
audioCodec := getAudioCodec(variant.Codecs)
if audioCodec != "" {
return audioCodec
}
}
return ""
}
func ParseM3U8FromURL(url string) ([]*models.MediaFormat, error) {
body, err := fetchContent(url)
if err != nil {
@ -140,7 +218,35 @@ func fetchContent(url string) ([]byte, error) {
return io.ReadAll(resp.Body)
}
func getCodecFromCodecs(codecs string) enums.MediaCodec {
func getResolution(
resolution string,
) (int64, int64) {
var width, height int
if _, err := fmt.Sscanf(resolution, "%dx%d", &width, &height); err == nil {
return int64(width), int64(height)
}
return 0, 0
}
func parseVariantType(
variant *m3u8.Variant,
) (enums.MediaType, enums.MediaCodec, enums.MediaCodec) {
var mediaType enums.MediaType
var videoCodec, audioCodec enums.MediaCodec
videoCodec = getVideoCodec(variant.Codecs)
audioCodec = getAudioCodec(variant.Codecs)
if videoCodec != "" {
mediaType = enums.MediaTypeVideo
} else if audioCodec != "" {
mediaType = enums.MediaTypeAudio
}
return mediaType, videoCodec, audioCodec
}
func getVideoCodec(codecs string) enums.MediaCodec {
if strings.Contains(codecs, "avc") || strings.Contains(codecs, "h264") {
return enums.MediaCodecAVC
} else if strings.Contains(codecs, "hvc") || strings.Contains(codecs, "h265") {
@ -152,10 +258,10 @@ func getCodecFromCodecs(codecs string) enums.MediaCodec {
} else if strings.Contains(codecs, "vp8") {
return enums.MediaCodecVP8
}
return enums.MediaCodecAVC
return ""
}
func getAudioCodecFromCodecs(codecs string) enums.MediaCodec {
func getAudioCodec(codecs string) enums.MediaCodec {
if strings.Contains(codecs, "mp4a") {
return enums.MediaCodecAAC
} else if strings.Contains(codecs, "opus") {
@ -167,7 +273,7 @@ func getAudioCodecFromCodecs(codecs string) enums.MediaCodec {
} else if strings.Contains(codecs, "vorbis") {
return enums.MediaCodecVorbis
}
return enums.MediaCodecAAC
return ""
}
func resolveURL(base *url.URL, uri string) string {