govd/ext/util.go
stefanodvx 3e351e7e43 use host matching instead of regex (desc)
why? using regex on every single message the bot receives, even simple patterns, can be very harmful for your cpu lol
2025-04-16 14:11:55 +02:00

111 lines
2.2 KiB
Go

package ext
import (
"fmt"
"govd/models"
"net/url"
"strings"
"sync"
)
var (
maxRedirects = 5
extractorsByHost map[string][]*models.Extractor
extractorMapOnce sync.Once
)
func initExtractorMap() {
extractorMapOnce.Do(func() {
extractorsByHost = make(map[string][]*models.Extractor)
for _, extractor := range List {
if len(extractor.Host) > 0 {
for _, domain := range extractor.Host {
extractorsByHost[domain] = append(extractorsByHost[domain], extractor)
}
}
}
})
}
func CtxByURL(urlStr string) (*models.DownloadContext, error) {
initExtractorMap()
var redirectCount int
currentURL := urlStr
for redirectCount <= maxRedirects {
parsedURL, err := url.Parse(currentURL)
if err != nil {
return nil, fmt.Errorf("invalid URL: %v", err)
}
host := strings.TrimPrefix(parsedURL.Host, "www.")
extractors := extractorsByHost[host]
if len(extractors) == 0 {
return nil, nil
}
var extractor *models.Extractor
var matches []string
var groups map[string]string
for _, ext := range extractors {
matches = ext.URLPattern.FindStringSubmatch(currentURL)
if matches != nil {
extractor = ext
groupNames := ext.URLPattern.SubexpNames()
groups = make(map[string]string)
for i, name := range groupNames {
if name != "" && i < len(matches) {
groups[name] = matches[i]
}
}
groups["match"] = matches[0]
break
}
}
if extractor == nil || matches == nil {
return nil, nil
}
ctx := &models.DownloadContext{
MatchedContentID: groups["id"],
MatchedContentURL: groups["match"],
MatchedGroups: groups,
Extractor: extractor,
}
if !extractor.IsRedirect {
return ctx, nil
}
response, err := extractor.Run(ctx)
if err != nil {
return nil, err
}
if response.URL == "" {
return nil, fmt.Errorf("no URL found in response")
}
currentURL = response.URL
redirectCount++
if redirectCount > maxRedirects {
return nil, fmt.Errorf("exceeded maximum number of redirects (%d)", maxRedirects)
}
}
return nil, fmt.Errorf("failed to extract from URL: %s", urlStr)
}
func ByCodeName(codeName string) *models.Extractor {
for _, extractor := range List {
if extractor.CodeName == codeName {
return extractor
}
}
return nil
}