why? using regex on every single message the bot receives, even simple patterns, can be very harmful for your cpu lol
111 lines
2.2 KiB
Go
111 lines
2.2 KiB
Go
package ext
|
|
|
|
import (
|
|
"fmt"
|
|
"govd/models"
|
|
"net/url"
|
|
"strings"
|
|
"sync"
|
|
)
|
|
|
|
var (
|
|
maxRedirects = 5
|
|
|
|
extractorsByHost map[string][]*models.Extractor
|
|
extractorMapOnce sync.Once
|
|
)
|
|
|
|
func initExtractorMap() {
|
|
extractorMapOnce.Do(func() {
|
|
extractorsByHost = make(map[string][]*models.Extractor)
|
|
for _, extractor := range List {
|
|
if len(extractor.Host) > 0 {
|
|
for _, domain := range extractor.Host {
|
|
extractorsByHost[domain] = append(extractorsByHost[domain], extractor)
|
|
}
|
|
}
|
|
}
|
|
})
|
|
}
|
|
|
|
func CtxByURL(urlStr string) (*models.DownloadContext, error) {
|
|
initExtractorMap()
|
|
|
|
var redirectCount int
|
|
currentURL := urlStr
|
|
|
|
for redirectCount <= maxRedirects {
|
|
parsedURL, err := url.Parse(currentURL)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("invalid URL: %v", err)
|
|
}
|
|
|
|
host := strings.TrimPrefix(parsedURL.Host, "www.")
|
|
|
|
extractors := extractorsByHost[host]
|
|
if len(extractors) == 0 {
|
|
return nil, nil
|
|
}
|
|
|
|
var extractor *models.Extractor
|
|
var matches []string
|
|
var groups map[string]string
|
|
|
|
for _, ext := range extractors {
|
|
matches = ext.URLPattern.FindStringSubmatch(currentURL)
|
|
if matches != nil {
|
|
extractor = ext
|
|
groupNames := ext.URLPattern.SubexpNames()
|
|
groups = make(map[string]string)
|
|
for i, name := range groupNames {
|
|
if name != "" && i < len(matches) {
|
|
groups[name] = matches[i]
|
|
}
|
|
}
|
|
groups["match"] = matches[0]
|
|
break
|
|
}
|
|
}
|
|
|
|
if extractor == nil || matches == nil {
|
|
return nil, nil
|
|
}
|
|
|
|
ctx := &models.DownloadContext{
|
|
MatchedContentID: groups["id"],
|
|
MatchedContentURL: groups["match"],
|
|
MatchedGroups: groups,
|
|
Extractor: extractor,
|
|
}
|
|
|
|
if !extractor.IsRedirect {
|
|
return ctx, nil
|
|
}
|
|
|
|
response, err := extractor.Run(ctx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if response.URL == "" {
|
|
return nil, fmt.Errorf("no URL found in response")
|
|
}
|
|
|
|
currentURL = response.URL
|
|
redirectCount++
|
|
|
|
if redirectCount > maxRedirects {
|
|
return nil, fmt.Errorf("exceeded maximum number of redirects (%d)", maxRedirects)
|
|
}
|
|
}
|
|
|
|
return nil, fmt.Errorf("failed to extract from URL: %s", urlStr)
|
|
}
|
|
|
|
func ByCodeName(codeName string) *models.Extractor {
|
|
for _, extractor := range List {
|
|
if extractor.CodeName == codeName {
|
|
return extractor
|
|
}
|
|
}
|
|
return nil
|
|
}
|