Init
This commit is contained in:
parent
264c97183e
commit
3faede7b1c
74 changed files with 6228 additions and 1 deletions
169
ext/instagram/main.go
Normal file
169
ext/instagram/main.go
Normal file
|
@ -0,0 +1,169 @@
|
|||
package instagram
|
||||
|
||||
import (
|
||||
"crypto/tls"
|
||||
"fmt"
|
||||
"govd/enums"
|
||||
"govd/models"
|
||||
"govd/util"
|
||||
"io"
|
||||
"net/http"
|
||||
"regexp"
|
||||
|
||||
"github.com/quic-go/quic-go"
|
||||
"github.com/quic-go/quic-go/http3"
|
||||
)
|
||||
|
||||
// as a public service, we can't use the official API
|
||||
// so we use igram.world API, a third-party service
|
||||
// that provides a similar functionality
|
||||
// feel free to open PR, if you want to
|
||||
// add support for the official Instagram API
|
||||
|
||||
const (
|
||||
apiHostname = "api.igram.world"
|
||||
apiKey = "aaeaf2805cea6abef3f9d2b6a666fce62fd9d612a43ab772bb50ce81455112e0"
|
||||
apiTimestamp = "1742201548873"
|
||||
|
||||
// todo: Implement a proper way
|
||||
// to get the API key and timestamp
|
||||
)
|
||||
|
||||
var HTTPClient = &http.Client{
|
||||
Transport: &http3.Transport{
|
||||
TLSClientConfig: &tls.Config{
|
||||
InsecureSkipVerify: true,
|
||||
},
|
||||
QUICConfig: &quic.Config{
|
||||
MaxIncomingStreams: -1,
|
||||
EnableDatagrams: true,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
var Extractor = &models.Extractor{
|
||||
Name: "Instagram",
|
||||
CodeName: "instagram",
|
||||
Type: enums.ExtractorTypeSingle,
|
||||
Category: enums.ExtractorCategorySocial,
|
||||
URLPattern: regexp.MustCompile(`https:\/\/www\.instagram\.com\/(reel|p|tv)\/(?P<id>[a-zA-Z0-9_-]+)`),
|
||||
IsRedirect: false,
|
||||
|
||||
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
||||
mediaList, err := MediaListFromAPI(ctx, false)
|
||||
return &models.ExtractorResponse{
|
||||
MediaList: mediaList,
|
||||
}, err
|
||||
},
|
||||
}
|
||||
|
||||
var StoriesExtractor = &models.Extractor{
|
||||
Name: "Instagram Stories",
|
||||
CodeName: "instagram:stories",
|
||||
Type: enums.ExtractorTypeSingle,
|
||||
Category: enums.ExtractorCategorySocial,
|
||||
URLPattern: regexp.MustCompile(`https:\/\/www\.instagram\.com\/stories\/[a-zA-Z0-9._]+\/(?P<id>\d+)`),
|
||||
IsRedirect: false,
|
||||
|
||||
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
||||
mediaList, err := MediaListFromAPI(ctx, true)
|
||||
return &models.ExtractorResponse{
|
||||
MediaList: mediaList,
|
||||
}, err
|
||||
},
|
||||
}
|
||||
|
||||
func MediaListFromAPI(
|
||||
ctx *models.DownloadContext,
|
||||
stories bool,
|
||||
) ([]*models.Media, error) {
|
||||
var mediaList []*models.Media
|
||||
postURL := ctx.MatchedContentURL
|
||||
details, err := GetVideoAPI(postURL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get post: %w", err)
|
||||
}
|
||||
var caption string
|
||||
if !stories {
|
||||
caption, err = GetPostCaption(postURL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get caption: %w", err)
|
||||
}
|
||||
}
|
||||
for _, item := range details.Items {
|
||||
media := ctx.Extractor.NewMedia(
|
||||
ctx.MatchedContentID,
|
||||
ctx.MatchedContentURL,
|
||||
)
|
||||
media.SetCaption(caption)
|
||||
urlObj := item.URL[0]
|
||||
contentURL, err := GetCDNURL(urlObj.URL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
thumbnailURL, err := GetCDNURL(item.Thumb)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
fileExt := urlObj.Ext
|
||||
formatID := urlObj.Type
|
||||
switch fileExt {
|
||||
case "mp4":
|
||||
media.AddFormat(&models.MediaFormat{
|
||||
Type: enums.MediaTypeVideo,
|
||||
FormatID: formatID,
|
||||
URL: []string{contentURL},
|
||||
VideoCodec: enums.MediaCodecAVC,
|
||||
AudioCodec: enums.MediaCodecAAC,
|
||||
Thumbnail: []string{thumbnailURL},
|
||||
},
|
||||
)
|
||||
case "jpg", "webp", "heic", "jpeg":
|
||||
media.AddFormat(&models.MediaFormat{
|
||||
Type: enums.MediaTypePhoto,
|
||||
FormatID: formatID,
|
||||
URL: []string{contentURL},
|
||||
})
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown format: %s", fileExt)
|
||||
}
|
||||
mediaList = append(mediaList, media)
|
||||
}
|
||||
|
||||
return mediaList, nil
|
||||
}
|
||||
|
||||
func GetVideoAPI(contentURL string) (*IGramResponse, error) {
|
||||
apiURL := fmt.Sprintf(
|
||||
"https://%s/api/convert",
|
||||
apiHostname,
|
||||
)
|
||||
payload, err := BuildSignedPayload(contentURL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to build signed payload: %w", err)
|
||||
}
|
||||
req, err := http.NewRequest("POST", apiURL, payload)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("User-Agent", util.ChromeUA)
|
||||
|
||||
resp, err := HTTPClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to send request: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("failed to get response: %s", resp.Status)
|
||||
}
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read response body: %w", err)
|
||||
}
|
||||
response, err := ParseIGramResponse(body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse response: %w", err)
|
||||
}
|
||||
return response, nil
|
||||
}
|
19
ext/instagram/models.go
Normal file
19
ext/instagram/models.go
Normal file
|
@ -0,0 +1,19 @@
|
|||
package instagram
|
||||
|
||||
type IGramResponse struct {
|
||||
Items []*IGramMedia `json:"items"`
|
||||
}
|
||||
|
||||
type IGramMedia struct {
|
||||
URL []*MediaURL `json:"url"`
|
||||
Thumb string `json:"thumb"`
|
||||
Hosting string `json:"hosting"`
|
||||
Timestamp int `json:"timestamp"`
|
||||
}
|
||||
|
||||
type MediaURL struct {
|
||||
URL string `json:"url"`
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type"`
|
||||
Ext string `json:"ext"`
|
||||
}
|
139
ext/instagram/util.go
Normal file
139
ext/instagram/util.go
Normal file
|
@ -0,0 +1,139 @@
|
|||
package instagram
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"govd/util"
|
||||
"html"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
var captionPattern = regexp.MustCompile(
|
||||
`(?s)<meta property="og:title" content=".*?: "(.*?)""`,
|
||||
)
|
||||
|
||||
func BuildSignedPayload(contentURL string) (io.Reader, error) {
|
||||
timestamp := fmt.Sprintf("%d", time.Now().UnixMilli())
|
||||
hash := sha256.New()
|
||||
_, err := io.WriteString(
|
||||
hash,
|
||||
contentURL+timestamp+apiKey,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error writing to SHA256 hash: %w", err)
|
||||
}
|
||||
secretBytes := hash.Sum(nil)
|
||||
secretString := fmt.Sprintf("%x", secretBytes)
|
||||
secretString = strings.ToLower(secretString)
|
||||
payload := map[string]string{
|
||||
"url": contentURL,
|
||||
"ts": timestamp,
|
||||
"_ts": apiTimestamp,
|
||||
"_tsc": "0", // ?
|
||||
"_s": secretString,
|
||||
}
|
||||
parsedPayload, err := json.Marshal(payload)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error marshalling payload: %w", err)
|
||||
}
|
||||
reader := strings.NewReader(string(parsedPayload))
|
||||
return reader, nil
|
||||
}
|
||||
|
||||
func ParseIGramResponse(body []byte) (*IGramResponse, error) {
|
||||
var rawResponse interface{}
|
||||
if err := json.Unmarshal(body, &rawResponse); err != nil {
|
||||
return nil, fmt.Errorf("failed to decode response: %w", err)
|
||||
}
|
||||
switch rawResponse.(type) {
|
||||
case []interface{}:
|
||||
// array of IGramMedia
|
||||
var media []*IGramMedia
|
||||
if err := json.Unmarshal(body, &media); err != nil {
|
||||
return nil, fmt.Errorf("failed to decode response: %w", err)
|
||||
}
|
||||
return &IGramResponse{
|
||||
Items: media,
|
||||
}, nil
|
||||
case map[string]interface{}:
|
||||
// single IGramMedia
|
||||
var media IGramMedia
|
||||
if err := json.Unmarshal(body, &media); err != nil {
|
||||
return nil, fmt.Errorf("failed to decode response: %w", err)
|
||||
}
|
||||
return &IGramResponse{
|
||||
Items: []*IGramMedia{&media},
|
||||
}, nil
|
||||
default:
|
||||
return nil, fmt.Errorf("unexpected response type: %T", rawResponse)
|
||||
}
|
||||
}
|
||||
|
||||
func GetCDNURL(contentURL string) (string, error) {
|
||||
parsedUrl, err := url.Parse(contentURL)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("can't parse igram URL: %v", err)
|
||||
}
|
||||
queryParams, err := url.ParseQuery(parsedUrl.RawQuery)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("can't unescape igram URL: %v", err)
|
||||
}
|
||||
cdnURL := queryParams.Get("uri")
|
||||
return cdnURL, nil
|
||||
}
|
||||
|
||||
func GetPostCaption(
|
||||
postURL string,
|
||||
) (string, error) {
|
||||
req, err := http.NewRequest(
|
||||
http.MethodGet,
|
||||
postURL,
|
||||
nil,
|
||||
)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
req.Header.Set("User-Agent", util.ChromeUA)
|
||||
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
|
||||
req.Header.Set("Accept-Language", "it-IT,it;q=0.8,en-US;q=0.5,en;q=0.3")
|
||||
req.Header.Set("Referer", "https://www.instagram.com/accounts/onetap/?next=%2F")
|
||||
req.Header.Set("Alt-Used", "www.instagram.com")
|
||||
req.Header.Set("Connection", "keep-alive")
|
||||
req.Header.Set("Cookie", `csrftoken=Ib2Zuvf1y9HkDwXFxkdang; sessionid=8569455296%3AIFQiov2eYfTdSd%3A19%3AAYfVHnaxecWGWhyzxvz60vu5qLn05DyKgN_tTZUXTA; ds_user_id=8569455296; mid=Z_j1vQAEAAGVUE3KuxMR7vBonGBw; ig_did=BC48C8B7-D71B-49EF-8195-F9DE37A57B49; rur="CLN\0548569455296\0541775905137:01f7ebda5b896815e9279bb86a572db6bdc8ebccf3e1f8d5327e2bc5ca187fd5cd932b66"; wd=513x594; datr=x_X4Z_CHqpwtjaRKq7PtCNu3`)
|
||||
req.Header.Set("Upgrade-Insecure-Requests", "1")
|
||||
req.Header.Set("Sec-Fetch-Dest", "document")
|
||||
req.Header.Set("Sec-Fetch-Mode", "navigate")
|
||||
req.Header.Set("Sec-Fetch-Site", "same-origin")
|
||||
req.Header.Set("Priority", "u=0, i")
|
||||
req.Header.Set("Pragma", "no-cache")
|
||||
req.Header.Set("Cache-Control", "no-cache")
|
||||
req.Header.Set("TE", "trailers")
|
||||
|
||||
resp, err := HTTPClient.Do(req)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to send request: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return "", fmt.Errorf("failed to get response: %s", resp.Status)
|
||||
}
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to read response body: %w", err)
|
||||
}
|
||||
|
||||
matches := captionPattern.FindStringSubmatch(string(body))
|
||||
if len(matches) < 2 {
|
||||
// post has no caption most likely
|
||||
return "", nil
|
||||
} else {
|
||||
return html.UnescapeString(matches[1]), nil
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue