main: changes, fixes

- refactors http session usage across modules
- adjusts video info extraction
- improves temporary directory, concurrency limits, buffering, and error handling in download routines
This commit is contained in:
stefanodvx 2025-04-14 23:45:54 +02:00
parent 10c113f400
commit 58bd5827b3
12 changed files with 171 additions and 51 deletions

View file

@ -57,10 +57,10 @@ func insertVideoInfo(
format *models.MediaFormat, format *models.MediaFormat,
filePath string, filePath string,
) { ) {
width, height, duration := av.GetVideoInfo(filePath) duration, width, height := av.GetVideoInfo(filePath)
format.Duration = duration
format.Width = width format.Width = width
format.Height = height format.Height = height
format.Duration = duration
} }
func GetMessageFileID(msg *gotgbot.Message) string { func GetMessageFileID(msg *gotgbot.Message) string {

View file

@ -41,6 +41,9 @@ func Start() {
if err != nil { if err != nil {
log.Fatalf("failed to get database connection: %v", err) log.Fatalf("failed to get database connection: %v", err)
} }
sqlDB.SetMaxIdleConns(10)
sqlDB.SetMaxOpenConns(100)
sqlDB.SetConnMaxLifetime(time.Hour)
err = sqlDB.Ping() err = sqlDB.Ping()
if err != nil { if err != nil {
log.Fatalf("failed to ping database: %v", err) log.Fatalf("failed to ping database: %v", err)

View file

@ -16,6 +16,8 @@ import (
// feel free to open PR, if you want to // feel free to open PR, if you want to
// add support for the official Instagram API // add support for the official Instagram API
var httpSession = util.GetHTTPSession()
const ( const (
apiHostname = "api.igram.world" apiHostname = "api.igram.world"
apiKey = "aaeaf2805cea6abef3f9d2b6a666fce62fd9d612a43ab772bb50ce81455112e0" apiKey = "aaeaf2805cea6abef3f9d2b6a666fce62fd9d612a43ab772bb50ce81455112e0"
@ -42,8 +44,6 @@ var igHeaders = map[string]string{
"User-Agent": util.ChromeUA, "User-Agent": util.ChromeUA,
} }
var HTTPSession = util.NewHTTPSession()
var Extractor = &models.Extractor{ var Extractor = &models.Extractor{
Name: "Instagram", Name: "Instagram",
CodeName: "instagram", CodeName: "instagram",
@ -96,7 +96,7 @@ var ShareURLExtractor = &models.Extractor{
for k, v := range igHeaders { for k, v := range igHeaders {
req.Header.Set(k, v) req.Header.Set(k, v)
} }
resp, err := HTTPSession.Do(req) resp, err := httpSession.Do(req)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err) return nil, fmt.Errorf("failed to send request: %w", err)
} }
@ -183,7 +183,7 @@ func GetVideoAPI(contentURL string) (*IGramResponse, error) {
req.Header.Set("Content-Type", "application/json") req.Header.Set("Content-Type", "application/json")
req.Header.Set("User-Agent", util.ChromeUA) req.Header.Set("User-Agent", util.ChromeUA)
resp, err := HTTPSession.Do(req) resp, err := httpSession.Do(req)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err) return nil, fmt.Errorf("failed to send request: %w", err)
} }

View file

@ -115,7 +115,7 @@ func GetPostCaption(
req.Header.Set("Cache-Control", "no-cache") req.Header.Set("Cache-Control", "no-cache")
req.Header.Set("TE", "trailers") req.Header.Set("TE", "trailers")
resp, err := HTTPSession.Do(req) resp, err := httpSession.Do(req)
if err != nil { if err != nil {
return "", fmt.Errorf("failed to send request: %w", err) return "", fmt.Errorf("failed to send request: %w", err)
} }

View file

@ -17,7 +17,7 @@ const (
shortenerAPIFormat = "https://api.pinterest.com/url_shortener/%s/redirect/" shortenerAPIFormat = "https://api.pinterest.com/url_shortener/%s/redirect/"
) )
var HTTPSession = util.NewHTTPSession() var httpSession = util.GetHTTPSession()
var ShortExtractor = &models.Extractor{ var ShortExtractor = &models.Extractor{
Name: "Pinterest (Short)", Name: "Pinterest (Short)",
@ -148,7 +148,7 @@ func GetPinData(pinID string) (*PinData, error) {
// fix 403 error // fix 403 error
req.Header.Set("X-Pinterest-PWS-Handler", "www/[username].js") req.Header.Set("X-Pinterest-PWS-Handler", "www/[username].js")
resp, err := HTTPSession.Do(req) resp, err := httpSession.Do(req)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err) return nil, fmt.Errorf("failed to send request: %w", err)
} }

View file

@ -12,7 +12,7 @@ import (
"govd/util" "govd/util"
) )
var HTTPSession = util.NewHTTPSession() var httpSession = util.GetHTTPSession()
var ShortExtractor = &models.Extractor{ var ShortExtractor = &models.Extractor{
Name: "Reddit (Short)", Name: "Reddit (Short)",
@ -37,7 +37,7 @@ var ShortExtractor = &models.Extractor{
req.AddCookie(cookie) req.AddCookie(cookie)
} }
res, err := HTTPSession.Do(req) res, err := httpSession.Do(req)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err) return nil, fmt.Errorf("failed to send request: %w", err)
} }
@ -229,7 +229,7 @@ func GetRedditData(host string, slug string) (RedditResponse, error) {
req.AddCookie(cookie) req.AddCookie(cookie)
} }
res, err := HTTPSession.Do(req) res, err := httpSession.Do(req)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err) return nil, fmt.Errorf("failed to send request: %w", err)
} }

View file

@ -23,7 +23,7 @@ const (
appUserAgent = packageID + " (Linux; U; Android 13; en_US; Pixel 7; Build/TD1A.220804.031; Cronet/58.0.2991.0)" appUserAgent = packageID + " (Linux; U; Android 13; en_US; Pixel 7; Build/TD1A.220804.031; Cronet/58.0.2991.0)"
) )
var HTTPSession = util.NewHTTPSession() var httpSession = util.GetHTTPSession()
var VMExtractor = &models.Extractor{ var VMExtractor = &models.Extractor{
Name: "TikTok VM", Name: "TikTok VM",
@ -147,7 +147,7 @@ func GetVideoAPI(awemeID string) (*AwemeDetails, error) {
req.Header.Set("Accept", "application/json") req.Header.Set("Accept", "application/json")
req.Header.Set("X-Argus", "") req.Header.Set("X-Argus", "")
resp, err := HTTPSession.Do(req) resp, err := httpSession.Do(req)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err) return nil, fmt.Errorf("failed to send request: %w", err)
} }

View file

@ -17,7 +17,7 @@ const (
apiEndpoint = "https://x.com/i/api/graphql/zZXycP0V6H7m-2r0mOnFcA/TweetDetail" apiEndpoint = "https://x.com/i/api/graphql/zZXycP0V6H7m-2r0mOnFcA/TweetDetail"
) )
var HTTPSession = util.NewHTTPSession() var httpSession = util.GetHTTPSession()
var ShortExtractor = &models.Extractor{ var ShortExtractor = &models.Extractor{
Name: "Twitter (Short)", Name: "Twitter (Short)",
@ -33,7 +33,7 @@ var ShortExtractor = &models.Extractor{
return nil, fmt.Errorf("failed to create req: %w", err) return nil, fmt.Errorf("failed to create req: %w", err)
} }
req.Header.Set("User-Agent", util.ChromeUA) req.Header.Set("User-Agent", util.ChromeUA)
res, err := HTTPSession.Do(req) res, err := httpSession.Do(req)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err) return nil, fmt.Errorf("failed to send request: %w", err)
} }
@ -151,7 +151,7 @@ func GetTweetAPI(tweetID string) (*Tweet, error) {
} }
req.URL.RawQuery = q.Encode() req.URL.RawQuery = q.Encode()
resp, err := HTTPSession.Do(req) resp, err := httpSession.Do(req)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err) return nil, fmt.Errorf("failed to send request: %w", err)
} }

View file

@ -12,10 +12,7 @@ func ExtractVideoThumbnail(
Input(videoPath). Input(videoPath).
Output(thumbnailPath, ffmpeg.KwArgs{ Output(thumbnailPath, ffmpeg.KwArgs{
"vframes": 1, "vframes": 1,
"f": "image2",
"ss": "00:00:01", "ss": "00:00:01",
"c:v": "mjpeg",
"q:v": 10, // not sure
}). }).
Silent(true). Silent(true).
OverWriteOutput(). OverWriteOutput().

View file

@ -10,9 +10,9 @@ func GetVideoInfo(filePath string) (int64, int64, int64) {
if err != nil { if err != nil {
return 0, 0, 0 return 0, 0, 0
} }
duration := gjson.Get(probeData, "format.duration").Int() duration := gjson.Get(probeData, "format.duration").Float()
width := gjson.Get(probeData, "streams.0.width").Int() width := gjson.Get(probeData, "streams.0.width").Int()
height := gjson.Get(probeData, "streams.0.height").Int() height := gjson.Get(probeData, "streams.0.height").Int()
return duration, width, height return int64(duration), width, height
} }

View file

@ -1,19 +1,24 @@
package util package util
import ( import (
"bufio"
"bytes" "bytes"
"context" "context"
"fmt" "fmt"
"io" "io"
"log"
"math" "math"
"net/http" "net/http"
"os" "os"
"path/filepath" "path/filepath"
"runtime"
"sync" "sync"
"time" "time"
"govd/models" "govd/models"
"govd/util/av" "govd/util/av"
"github.com/google/uuid"
) )
func DefaultConfig() *models.DownloadConfig { func DefaultConfig() *models.DownloadConfig {
@ -81,10 +86,19 @@ func DownloadFileWithSegments(
if err := EnsureDownloadDir(config.DownloadDir); err != nil { if err := EnsureDownloadDir(config.DownloadDir); err != nil {
return "", err return "", err
} }
tempDir := filepath.Join(config.DownloadDir, "segments_"+time.Now().Format("20060102_150405")) tempDir := filepath.Join(
config.DownloadDir,
"segments"+uuid.NewString(),
)
if err := os.MkdirAll(tempDir, 0755); err != nil { if err := os.MkdirAll(tempDir, 0755); err != nil {
return "", fmt.Errorf("failed to create temporary directory: %w", err) return "", fmt.Errorf("failed to create temporary directory: %w", err)
} }
var cleanupErr error
defer func() {
if cleanupErr = os.RemoveAll(tempDir); cleanupErr != nil {
log.Printf("warning: failed to clean up temp directory %s: %v\n", tempDir, cleanupErr)
}
}()
downloadedFiles, err := DownloadSegments(ctx, segmentURLs, config) downloadedFiles, err := DownloadSegments(ctx, segmentURLs, config)
if err != nil { if err != nil {
os.RemoveAll(tempDir) os.RemoveAll(tempDir)
@ -128,17 +142,27 @@ func DownloadFileInMemory(
return nil, fmt.Errorf("%w: %v", ErrDownloadFailed, errs) return nil, fmt.Errorf("%w: %v", ErrDownloadFailed, errs)
} }
func downloadInMemory(ctx context.Context, fileURL string, timeout time.Duration) ([]byte, error) { func downloadInMemory(
ctx context.Context,
fileURL string,
timeout time.Duration,
) ([]byte, error) {
reqCtx, cancel := context.WithTimeout(ctx, timeout) reqCtx, cancel := context.WithTimeout(ctx, timeout)
defer cancel() defer cancel()
select {
case <-ctx.Done():
return nil, ctx.Err()
default:
// continue with the request
}
req, err := http.NewRequestWithContext(reqCtx, http.MethodGet, fileURL, nil) req, err := http.NewRequestWithContext(reqCtx, http.MethodGet, fileURL, nil)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err) return nil, fmt.Errorf("failed to create request: %w", err)
} }
session := GetHTTPSession() resp, err := httpSession.Do(req)
resp, err := session.Do(req)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to download file: %w", err) return nil, fmt.Errorf("failed to download file: %w", err)
} }
@ -148,7 +172,17 @@ func downloadInMemory(ctx context.Context, fileURL string, timeout time.Duration
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode) return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
} }
return io.ReadAll(resp.Body) var buf bytes.Buffer
if resp.ContentLength > 0 {
buf.Grow(int(resp.ContentLength))
}
_, err = io.Copy(&buf, resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to read response body: %w", err)
}
return buf.Bytes(), nil
} }
func EnsureDownloadDir(dir string) error { func EnsureDownloadDir(dir string) error {
@ -170,6 +204,12 @@ func runChunkedDownload(
filePath string, filePath string,
config *models.DownloadConfig, config *models.DownloadConfig,
) error { ) error {
// reduce concurrency if it's greater
// than the number of available CPUs
if runtime.NumCPU() < config.Concurrency && runtime.GOMAXPROCS(0) < config.Concurrency {
config.Concurrency = runtime.NumCPU()
}
fileSize, err := getFileSize(ctx, fileURL, config.Timeout) fileSize, err := getFileSize(ctx, fileURL, config.Timeout)
if err != nil { if err != nil {
return err return err
@ -193,7 +233,7 @@ func runChunkedDownload(
semaphore := make(chan struct{}, config.Concurrency) semaphore := make(chan struct{}, config.Concurrency)
var wg sync.WaitGroup var wg sync.WaitGroup
errChan := make(chan error, 1) errChan := make(chan error, len(chunks))
var downloadErr error var downloadErr error
var errOnce sync.Once var errOnce sync.Once
@ -252,22 +292,40 @@ func runChunkedDownload(
}(idx, chunk) }(idx, chunk)
} }
done := make(chan struct{})
go func() { go func() {
wg.Wait() wg.Wait()
close(errChan) close(errChan)
close(done)
}() }()
var multiErr []error
select { select {
case err := <-errChan: case err := <-errChan:
if err != nil { if err != nil {
// clean up partial download multiErr = append(multiErr, err)
os.Remove(filePath) // collect all errors
return err for e := range errChan {
if e != nil {
multiErr = append(multiErr, e)
} }
}
}
<-done
case <-ctx.Done(): case <-ctx.Done():
cancelDownload() cancelDownload()
<-done // wait for all goroutines to finish
os.Remove(filePath) os.Remove(filePath)
return ctx.Err() return ctx.Err()
case <-done:
// no errors
}
if len(multiErr) > 0 {
os.Remove(filePath)
return fmt.Errorf("multiple download errors: %v", multiErr)
} }
return nil return nil
@ -282,8 +340,7 @@ func getFileSize(ctx context.Context, fileURL string, timeout time.Duration) (in
return 0, fmt.Errorf("failed to create request: %w", err) return 0, fmt.Errorf("failed to create request: %w", err)
} }
session := GetHTTPSession() resp, err := httpSession.Do(req)
resp, err := session.Do(req)
if err != nil { if err != nil {
return 0, fmt.Errorf("failed to get file size: %w", err) return 0, fmt.Errorf("failed to get file size: %w", err)
} }
@ -340,8 +397,7 @@ func downloadChunk(
} }
req.Header.Add("Range", fmt.Sprintf("bytes=%d-%d", chunk[0], chunk[1])) req.Header.Add("Range", fmt.Sprintf("bytes=%d-%d", chunk[0], chunk[1]))
session := GetHTTPSession() resp, err := httpSession.Do(req)
resp, err := session.Do(req)
if err != nil { if err != nil {
return nil, fmt.Errorf("download failed: %w", err) return nil, fmt.Errorf("download failed: %w", err)
} }
@ -351,7 +407,18 @@ func downloadChunk(
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode) return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
} }
return io.ReadAll(resp.Body) var buf bytes.Buffer
if resp.ContentLength > 0 {
buf.Grow(int(resp.ContentLength))
} else {
buf.Grow(chunk[1] - chunk[0] + 1)
}
_, err = io.Copy(&buf, resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to read chunk data: %w", err)
}
return buf.Bytes(), nil
} }
func writeChunkToFile(file *os.File, data []byte, offset int) error { func writeChunkToFile(file *os.File, data []byte, offset int) error {
@ -388,7 +455,10 @@ func DownloadSegments(
config = DefaultConfig() config = DefaultConfig()
} }
tempDir := filepath.Join(config.DownloadDir, "segments_"+time.Now().Format("20060102_150405")) tempDir := filepath.Join(
config.DownloadDir,
"segments"+uuid.NewString(),
)
if err := os.MkdirAll(tempDir, 0755); err != nil { if err := os.MkdirAll(tempDir, 0755); err != nil {
return nil, fmt.Errorf("failed to create temporary directory: %w", err) return nil, fmt.Errorf("failed to create temporary directory: %w", err)
} }
@ -397,14 +467,26 @@ func DownloadSegments(
var wg sync.WaitGroup var wg sync.WaitGroup
errChan := make(chan error, len(segmentURLs)) errChan := make(chan error, len(segmentURLs))
var errMutex sync.Mutex
var firstErr error
downloadedFiles := make([]string, len(segmentURLs)) downloadedFiles := make([]string, len(segmentURLs))
downloadCtx, cancelDownload := context.WithCancel(ctx)
defer cancelDownload()
for i, segmentURL := range segmentURLs { for i, segmentURL := range segmentURLs {
wg.Add(1) wg.Add(1)
go func(idx int, url string) { go func(idx int, url string) {
defer wg.Done() defer wg.Done()
select {
case <-downloadCtx.Done():
return
default:
// continue with the download
}
// acquire semaphore slot // acquire semaphore slot
semaphore <- struct{}{} semaphore <- struct{}{}
defer func() { <-semaphore }() defer func() { <-semaphore }()
@ -424,7 +506,12 @@ func DownloadSegments(
}) })
if err != nil { if err != nil {
errChan <- fmt.Errorf("failed to download segment %d: %w", idx, err) errMutex.Lock()
if firstErr == nil {
firstErr = fmt.Errorf("failed to download segment %d: %w", idx, err)
cancelDownload() // Cancella tutte le altre download
}
errMutex.Unlock()
return return
} }
@ -466,7 +553,17 @@ func MergeSegmentFiles(
if err != nil { if err != nil {
return "", fmt.Errorf("failed to create output file: %w", err) return "", fmt.Errorf("failed to create output file: %w", err)
} }
defer outputFile.Close() defer func() {
outputFile.Close()
if err != nil {
os.Remove(outputPath)
}
}()
bufferedWriter := bufio.NewWriterSize(
outputFile,
1024*1024,
) // 1MB buffer
var totalBytes int64 var totalBytes int64
var processedBytes int64 var processedBytes int64
@ -483,6 +580,9 @@ func MergeSegmentFiles(
for i, segmentPath := range segmentPaths { for i, segmentPath := range segmentPaths {
select { select {
case <-ctx.Done(): case <-ctx.Done():
bufferedWriter.Flush()
outputFile.Close()
os.Remove(outputPath)
return "", ctx.Err() return "", ctx.Err()
default: default:
segmentFile, err := os.Open(segmentPath) segmentFile, err := os.Open(segmentPath)
@ -490,7 +590,7 @@ func MergeSegmentFiles(
return "", fmt.Errorf("failed to open segment %d: %w", i, err) return "", fmt.Errorf("failed to open segment %d: %w", i, err)
} }
written, err := io.Copy(outputFile, segmentFile) written, err := io.Copy(bufferedWriter, segmentFile)
segmentFile.Close() segmentFile.Close()
if err != nil { if err != nil {
@ -505,6 +605,10 @@ func MergeSegmentFiles(
} }
} }
if err := bufferedWriter.Flush(); err != nil {
return "", fmt.Errorf("failed to flush data: %w", err)
}
if config.Remux { if config.Remux {
err := av.RemuxFile(outputPath) err := av.RemuxFile(outputPath)
if err != nil { if err != nil {

View file

@ -1,22 +1,38 @@
package util package util
import ( import (
"net"
"net/http" "net/http"
"sync"
"time" "time"
) )
var httpSession = NewHTTPSession() var (
httpSession *http.Client
func NewHTTPSession() *http.Client { httpSessionOnce sync.Once
session := &http.Client{ )
Timeout: 20 * time.Second,
Transport: &http.Transport{
Proxy: http.ProxyFromEnvironment,
},
}
return session
}
func GetHTTPSession() *http.Client { func GetHTTPSession() *http.Client {
httpSessionOnce.Do(func() {
transport := &http.Transport{
Proxy: http.ProxyFromEnvironment,
DialContext: (&net.Dialer{
Timeout: 30 * time.Second,
KeepAlive: 30 * time.Second,
}).DialContext,
ForceAttemptHTTP2: true,
MaxIdleConns: 100,
IdleConnTimeout: 90 * time.Second,
TLSHandshakeTimeout: 10 * time.Second,
ExpectContinueTimeout: 1 * time.Second,
MaxIdleConnsPerHost: 10,
MaxConnsPerHost: 10,
}
httpSession = &http.Client{
Transport: transport,
Timeout: 30 * time.Second,
}
})
return httpSession return httpSession
} }