This commit is contained in:
stefanodvx 2025-04-14 13:05:43 +02:00
parent 264c97183e
commit 3faede7b1c
74 changed files with 6228 additions and 1 deletions

169
ext/instagram/main.go Normal file
View file

@ -0,0 +1,169 @@
package instagram
import (
"crypto/tls"
"fmt"
"govd/enums"
"govd/models"
"govd/util"
"io"
"net/http"
"regexp"
"github.com/quic-go/quic-go"
"github.com/quic-go/quic-go/http3"
)
// as a public service, we can't use the official API
// so we use igram.world API, a third-party service
// that provides a similar functionality
// feel free to open PR, if you want to
// add support for the official Instagram API
const (
apiHostname = "api.igram.world"
apiKey = "aaeaf2805cea6abef3f9d2b6a666fce62fd9d612a43ab772bb50ce81455112e0"
apiTimestamp = "1742201548873"
// todo: Implement a proper way
// to get the API key and timestamp
)
var HTTPClient = &http.Client{
Transport: &http3.Transport{
TLSClientConfig: &tls.Config{
InsecureSkipVerify: true,
},
QUICConfig: &quic.Config{
MaxIncomingStreams: -1,
EnableDatagrams: true,
},
},
}
var Extractor = &models.Extractor{
Name: "Instagram",
CodeName: "instagram",
Type: enums.ExtractorTypeSingle,
Category: enums.ExtractorCategorySocial,
URLPattern: regexp.MustCompile(`https:\/\/www\.instagram\.com\/(reel|p|tv)\/(?P<id>[a-zA-Z0-9_-]+)`),
IsRedirect: false,
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
mediaList, err := MediaListFromAPI(ctx, false)
return &models.ExtractorResponse{
MediaList: mediaList,
}, err
},
}
var StoriesExtractor = &models.Extractor{
Name: "Instagram Stories",
CodeName: "instagram:stories",
Type: enums.ExtractorTypeSingle,
Category: enums.ExtractorCategorySocial,
URLPattern: regexp.MustCompile(`https:\/\/www\.instagram\.com\/stories\/[a-zA-Z0-9._]+\/(?P<id>\d+)`),
IsRedirect: false,
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
mediaList, err := MediaListFromAPI(ctx, true)
return &models.ExtractorResponse{
MediaList: mediaList,
}, err
},
}
func MediaListFromAPI(
ctx *models.DownloadContext,
stories bool,
) ([]*models.Media, error) {
var mediaList []*models.Media
postURL := ctx.MatchedContentURL
details, err := GetVideoAPI(postURL)
if err != nil {
return nil, fmt.Errorf("failed to get post: %w", err)
}
var caption string
if !stories {
caption, err = GetPostCaption(postURL)
if err != nil {
return nil, fmt.Errorf("failed to get caption: %w", err)
}
}
for _, item := range details.Items {
media := ctx.Extractor.NewMedia(
ctx.MatchedContentID,
ctx.MatchedContentURL,
)
media.SetCaption(caption)
urlObj := item.URL[0]
contentURL, err := GetCDNURL(urlObj.URL)
if err != nil {
return nil, err
}
thumbnailURL, err := GetCDNURL(item.Thumb)
if err != nil {
return nil, err
}
fileExt := urlObj.Ext
formatID := urlObj.Type
switch fileExt {
case "mp4":
media.AddFormat(&models.MediaFormat{
Type: enums.MediaTypeVideo,
FormatID: formatID,
URL: []string{contentURL},
VideoCodec: enums.MediaCodecAVC,
AudioCodec: enums.MediaCodecAAC,
Thumbnail: []string{thumbnailURL},
},
)
case "jpg", "webp", "heic", "jpeg":
media.AddFormat(&models.MediaFormat{
Type: enums.MediaTypePhoto,
FormatID: formatID,
URL: []string{contentURL},
})
default:
return nil, fmt.Errorf("unknown format: %s", fileExt)
}
mediaList = append(mediaList, media)
}
return mediaList, nil
}
func GetVideoAPI(contentURL string) (*IGramResponse, error) {
apiURL := fmt.Sprintf(
"https://%s/api/convert",
apiHostname,
)
payload, err := BuildSignedPayload(contentURL)
if err != nil {
return nil, fmt.Errorf("failed to build signed payload: %w", err)
}
req, err := http.NewRequest("POST", apiURL, payload)
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("User-Agent", util.ChromeUA)
resp, err := HTTPClient.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to send request: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("failed to get response: %s", resp.Status)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to read response body: %w", err)
}
response, err := ParseIGramResponse(body)
if err != nil {
return nil, fmt.Errorf("failed to parse response: %w", err)
}
return response, nil
}

19
ext/instagram/models.go Normal file
View file

@ -0,0 +1,19 @@
package instagram
type IGramResponse struct {
Items []*IGramMedia `json:"items"`
}
type IGramMedia struct {
URL []*MediaURL `json:"url"`
Thumb string `json:"thumb"`
Hosting string `json:"hosting"`
Timestamp int `json:"timestamp"`
}
type MediaURL struct {
URL string `json:"url"`
Name string `json:"name"`
Type string `json:"type"`
Ext string `json:"ext"`
}

139
ext/instagram/util.go Normal file
View file

@ -0,0 +1,139 @@
package instagram
import (
"crypto/sha256"
"encoding/json"
"fmt"
"govd/util"
"html"
"io"
"net/http"
"net/url"
"regexp"
"strings"
"time"
)
var captionPattern = regexp.MustCompile(
`(?s)<meta property="og:title" content=".*?: &quot;(.*?)&quot;"`,
)
func BuildSignedPayload(contentURL string) (io.Reader, error) {
timestamp := fmt.Sprintf("%d", time.Now().UnixMilli())
hash := sha256.New()
_, err := io.WriteString(
hash,
contentURL+timestamp+apiKey,
)
if err != nil {
return nil, fmt.Errorf("error writing to SHA256 hash: %w", err)
}
secretBytes := hash.Sum(nil)
secretString := fmt.Sprintf("%x", secretBytes)
secretString = strings.ToLower(secretString)
payload := map[string]string{
"url": contentURL,
"ts": timestamp,
"_ts": apiTimestamp,
"_tsc": "0", // ?
"_s": secretString,
}
parsedPayload, err := json.Marshal(payload)
if err != nil {
return nil, fmt.Errorf("error marshalling payload: %w", err)
}
reader := strings.NewReader(string(parsedPayload))
return reader, nil
}
func ParseIGramResponse(body []byte) (*IGramResponse, error) {
var rawResponse interface{}
if err := json.Unmarshal(body, &rawResponse); err != nil {
return nil, fmt.Errorf("failed to decode response: %w", err)
}
switch rawResponse.(type) {
case []interface{}:
// array of IGramMedia
var media []*IGramMedia
if err := json.Unmarshal(body, &media); err != nil {
return nil, fmt.Errorf("failed to decode response: %w", err)
}
return &IGramResponse{
Items: media,
}, nil
case map[string]interface{}:
// single IGramMedia
var media IGramMedia
if err := json.Unmarshal(body, &media); err != nil {
return nil, fmt.Errorf("failed to decode response: %w", err)
}
return &IGramResponse{
Items: []*IGramMedia{&media},
}, nil
default:
return nil, fmt.Errorf("unexpected response type: %T", rawResponse)
}
}
func GetCDNURL(contentURL string) (string, error) {
parsedUrl, err := url.Parse(contentURL)
if err != nil {
return "", fmt.Errorf("can't parse igram URL: %v", err)
}
queryParams, err := url.ParseQuery(parsedUrl.RawQuery)
if err != nil {
return "", fmt.Errorf("can't unescape igram URL: %v", err)
}
cdnURL := queryParams.Get("uri")
return cdnURL, nil
}
func GetPostCaption(
postURL string,
) (string, error) {
req, err := http.NewRequest(
http.MethodGet,
postURL,
nil,
)
if err != nil {
return "", fmt.Errorf("failed to create request: %w", err)
}
req.Header.Set("User-Agent", util.ChromeUA)
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
req.Header.Set("Accept-Language", "it-IT,it;q=0.8,en-US;q=0.5,en;q=0.3")
req.Header.Set("Referer", "https://www.instagram.com/accounts/onetap/?next=%2F")
req.Header.Set("Alt-Used", "www.instagram.com")
req.Header.Set("Connection", "keep-alive")
req.Header.Set("Cookie", `csrftoken=Ib2Zuvf1y9HkDwXFxkdang; sessionid=8569455296%3AIFQiov2eYfTdSd%3A19%3AAYfVHnaxecWGWhyzxvz60vu5qLn05DyKgN_tTZUXTA; ds_user_id=8569455296; mid=Z_j1vQAEAAGVUE3KuxMR7vBonGBw; ig_did=BC48C8B7-D71B-49EF-8195-F9DE37A57B49; rur="CLN\0548569455296\0541775905137:01f7ebda5b896815e9279bb86a572db6bdc8ebccf3e1f8d5327e2bc5ca187fd5cd932b66"; wd=513x594; datr=x_X4Z_CHqpwtjaRKq7PtCNu3`)
req.Header.Set("Upgrade-Insecure-Requests", "1")
req.Header.Set("Sec-Fetch-Dest", "document")
req.Header.Set("Sec-Fetch-Mode", "navigate")
req.Header.Set("Sec-Fetch-Site", "same-origin")
req.Header.Set("Priority", "u=0, i")
req.Header.Set("Pragma", "no-cache")
req.Header.Set("Cache-Control", "no-cache")
req.Header.Set("TE", "trailers")
resp, err := HTTPClient.Do(req)
if err != nil {
return "", fmt.Errorf("failed to send request: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("failed to get response: %s", resp.Status)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", fmt.Errorf("failed to read response body: %w", err)
}
matches := captionPattern.FindStringSubmatch(string(body))
if len(matches) < 2 {
// post has no caption most likely
return "", nil
} else {
return html.UnescapeString(matches[1]), nil
}
}