instagram: new extraction method
new extraction method first tries to fetch content directly from instagram graphql API, fallback to html embed page. in case every method fail, rely on 3rd party
This commit is contained in:
parent
1b3c426808
commit
93e964a28b
10 changed files with 494 additions and 110 deletions
|
@ -1,7 +1,7 @@
|
|||
# govd
|
||||
a telegram bot for downloading media from various platforms
|
||||
|
||||
this project was born after the discontinuation of a highly popular bot known as uvd, and draws significant inspiration from [yt-dlp](https://github.com/yt-dlp/yt-dlp)
|
||||
this project draws significant inspiration from [yt-dlp](https://github.com/yt-dlp/yt-dlp)
|
||||
|
||||
- official instance: [@govd_bot](https://t.me/govd_bot)
|
||||
- support group: [govdsupport](https://t.me/govdsupport)
|
||||
|
@ -28,7 +28,7 @@ this project was born after the discontinuation of a highly popular bot known as
|
|||
# installation
|
||||
## build
|
||||
> [!NOTE]
|
||||
> there's no official support for windows yet. if you want to run the bot on it, please follow [docker installation](#docker-recommended)
|
||||
> there's no official support for windows yet. if you want to run the bot on it, please follow [docker installation](#docker-recommended).
|
||||
|
||||
1. clone the repository
|
||||
```bash
|
||||
|
@ -54,7 +54,7 @@ this project was born after the discontinuation of a highly popular bot known as
|
|||
```
|
||||
|
||||
2. update the `.env` file to ensure the database properties match the environment variables defined for the mariadb service in the `docker-compose.yml` file.
|
||||
for enhanced security, it is recommended to change the `MYSQL_PASSWORD` property in `docker-compose.yaml` and ensure `DB_PASSWORD` in `.env` matches it.
|
||||
for enhanced security, it is recommended to change the `MARIADB_PASSWORD` property in `docker-compose.yaml` and ensure `DB_PASSWORD` in `.env` matches it.
|
||||
|
||||
the following line in the `.env` file **must** be set as:
|
||||
|
||||
|
|
|
@ -3,7 +3,6 @@ package core
|
|||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
|
@ -238,11 +237,7 @@ func StartInlineTask(
|
|||
IsPersonal: true,
|
||||
},
|
||||
)
|
||||
if err != nil {
|
||||
log.Println("failed to answer inline query:", err)
|
||||
}
|
||||
if !ok {
|
||||
log.Println("failed to answer inline query")
|
||||
if err != nil || !ok {
|
||||
return nil
|
||||
}
|
||||
SetTask(taskID, dlCtx)
|
||||
|
|
|
@ -10,9 +10,7 @@ import (
|
|||
|
||||
var startMessage = "govd is an open-source telegram bot " +
|
||||
"that allows you to download medias from " +
|
||||
"various platforms. the project born after " +
|
||||
"the discontinuation of an " +
|
||||
"highly popular bot, known as UVD."
|
||||
"various platforms."
|
||||
|
||||
func getStartKeyboard(bot *gotgbot.Bot) gotgbot.InlineKeyboardMarkup {
|
||||
return gotgbot.InlineKeyboardMarkup{
|
||||
|
|
|
@ -10,24 +10,7 @@ import (
|
|||
"regexp"
|
||||
)
|
||||
|
||||
// as a public service, we can't use the official API
|
||||
// so we use igram.world API, a third-party service
|
||||
// that provides a similar functionality
|
||||
// feel free to open PR, if you want to
|
||||
// add support for the official Instagram API
|
||||
|
||||
const (
|
||||
apiHostname = "api.igram.world"
|
||||
apiKey = "aaeaf2805cea6abef3f9d2b6a666fce62fd9d612a43ab772bb50ce81455112e0"
|
||||
apiTimestamp = "1742201548873"
|
||||
|
||||
// todo: Implement a proper way
|
||||
// to get the API key and timestamp
|
||||
)
|
||||
|
||||
var instagramHost = []string{
|
||||
"instagram.com",
|
||||
}
|
||||
var instagramHost = []string{"instagram.com"}
|
||||
|
||||
var Extractor = &models.Extractor{
|
||||
Name: "Instagram",
|
||||
|
@ -39,10 +22,28 @@ var Extractor = &models.Extractor{
|
|||
IsRedirect: false,
|
||||
|
||||
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
||||
mediaList, err := MediaListFromAPI(ctx, false)
|
||||
// method 1: get media from GQL web API
|
||||
mediaList, err := GetGQLMediaList(ctx)
|
||||
if err == nil && len(mediaList) > 0 {
|
||||
return &models.ExtractorResponse{
|
||||
MediaList: mediaList,
|
||||
}, err
|
||||
}, nil
|
||||
}
|
||||
// method 2: get media from embed page
|
||||
mediaList, err = GetEmbedMediaList(ctx)
|
||||
if err == nil && len(mediaList) > 0 {
|
||||
return &models.ExtractorResponse{
|
||||
MediaList: mediaList,
|
||||
}, nil
|
||||
}
|
||||
// method 3: get media from 3rd party service (unlikely)
|
||||
mediaList, err = GetIGramMediaList(ctx)
|
||||
if err == nil && len(mediaList) > 0 {
|
||||
return &models.ExtractorResponse{
|
||||
MediaList: mediaList,
|
||||
}, nil
|
||||
}
|
||||
return nil, fmt.Errorf("failed to extract media: all methods failed")
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -56,7 +57,7 @@ var StoriesExtractor = &models.Extractor{
|
|||
IsRedirect: false,
|
||||
|
||||
Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) {
|
||||
mediaList, err := MediaListFromAPI(ctx, true)
|
||||
mediaList, err := GetIGramMediaList(ctx)
|
||||
return &models.ExtractorResponse{
|
||||
MediaList: mediaList,
|
||||
}, err
|
||||
|
@ -88,31 +89,63 @@ var ShareURLExtractor = &models.Extractor{
|
|||
},
|
||||
}
|
||||
|
||||
func MediaListFromAPI(
|
||||
func GetGQLMediaList(
|
||||
ctx *models.DownloadContext,
|
||||
stories bool,
|
||||
) ([]*models.Media, error) {
|
||||
client := util.GetHTTPClient(ctx.Extractor.CodeName)
|
||||
graphData, err := GetGQLData(ctx, ctx.MatchedContentID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get graph data: %w", err)
|
||||
}
|
||||
return ParseGQLMedia(ctx, graphData.ShortcodeMedia)
|
||||
}
|
||||
|
||||
func GetEmbedMediaList(
|
||||
ctx *models.DownloadContext,
|
||||
) ([]*models.Media, error) {
|
||||
session := util.GetHTTPClient(ctx.Extractor.CodeName)
|
||||
embedURL := fmt.Sprintf("https://www.instagram.com/p/%s/embed/captioned", ctx.MatchedContentID)
|
||||
req, err := http.NewRequest(
|
||||
http.MethodGet,
|
||||
embedURL,
|
||||
nil,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
for key, value := range igHeaders {
|
||||
req.Header.Set(key, value)
|
||||
}
|
||||
resp, err := session.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to send request: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("failed to get embed page: %s", resp.Status)
|
||||
}
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read response body: %w", err)
|
||||
}
|
||||
graphData, err := ParseEmbedGQL(body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse embed page: %w", err)
|
||||
}
|
||||
return ParseGQLMedia(ctx, graphData)
|
||||
}
|
||||
|
||||
func GetIGramMediaList(ctx *models.DownloadContext) ([]*models.Media, error) {
|
||||
var mediaList []*models.Media
|
||||
postURL := ctx.MatchedContentURL
|
||||
details, err := GetVideoAPI(client, postURL)
|
||||
details, err := GetFromIGram(ctx, postURL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get post: %w", err)
|
||||
}
|
||||
var caption string
|
||||
if !stories {
|
||||
caption, err = GetPostCaption(client, postURL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get caption: %w", err)
|
||||
}
|
||||
}
|
||||
for _, item := range details.Items {
|
||||
media := ctx.Extractor.NewMedia(
|
||||
ctx.MatchedContentID,
|
||||
ctx.MatchedContentURL,
|
||||
)
|
||||
media.SetCaption(caption)
|
||||
urlObj := item.URL[0]
|
||||
contentURL, err := GetCDNURL(urlObj.URL)
|
||||
if err != nil {
|
||||
|
@ -150,26 +183,27 @@ func MediaListFromAPI(
|
|||
return mediaList, nil
|
||||
}
|
||||
|
||||
func GetVideoAPI(
|
||||
client models.HTTPClient,
|
||||
func GetFromIGram(
|
||||
ctx *models.DownloadContext,
|
||||
contentURL string,
|
||||
) (*IGramResponse, error) {
|
||||
session := util.GetHTTPClient(ctx.Extractor.CodeName)
|
||||
apiURL := fmt.Sprintf(
|
||||
"https://%s/api/convert",
|
||||
apiHostname,
|
||||
igramHostname,
|
||||
)
|
||||
payload, err := BuildSignedPayload(contentURL)
|
||||
payload, err := BuildIGramPayload(contentURL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to build signed payload: %w", err)
|
||||
}
|
||||
req, err := http.NewRequest(http.MethodPost, apiURL, payload)
|
||||
req, err := http.NewRequest("POST", apiURL, payload)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("User-Agent", util.ChromeUA)
|
||||
|
||||
resp, err := client.Do(req)
|
||||
resp, err := session.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to send request: %w", err)
|
||||
}
|
||||
|
|
|
@ -1,17 +1,107 @@
|
|||
package instagram
|
||||
|
||||
type GraphQLResponse struct {
|
||||
Data *GraphQLData `json:"data"`
|
||||
Status string `json:"status"`
|
||||
}
|
||||
|
||||
type GraphQLData struct {
|
||||
ShortcodeMedia *Media `json:"xdt_shortcode_media"`
|
||||
}
|
||||
|
||||
type ContextJSON struct {
|
||||
Context *Context `json:"context"`
|
||||
GqlData *GqlData `json:"gql_data"`
|
||||
}
|
||||
|
||||
type GqlData struct {
|
||||
ShortcodeMedia *Media `json:"shortcode_media"`
|
||||
}
|
||||
|
||||
type EdgeMediaToCaption struct {
|
||||
Edges []*Edges `json:"edges"`
|
||||
}
|
||||
|
||||
type EdgeNode struct {
|
||||
Node *Media `json:"node"`
|
||||
}
|
||||
|
||||
type EdgeSidecarToChildren struct {
|
||||
Edges []*EdgeNode `json:"edges"`
|
||||
}
|
||||
|
||||
type Dimensions struct {
|
||||
Height int `json:"height"`
|
||||
Width int `json:"width"`
|
||||
}
|
||||
|
||||
type DisplayResources struct {
|
||||
ConfigHeight int `json:"config_height"`
|
||||
ConfigWidth int `json:"config_width"`
|
||||
Src string `json:"src"`
|
||||
}
|
||||
|
||||
type Node struct {
|
||||
Text string `json:"text"`
|
||||
}
|
||||
|
||||
type Edges struct {
|
||||
Node *Node `json:"node"`
|
||||
}
|
||||
type Media struct {
|
||||
Typename string `json:"__typename"`
|
||||
CommenterCount int `json:"commenter_count"`
|
||||
Dimensions *Dimensions `json:"dimensions"`
|
||||
DisplayResources []*DisplayResources `json:"display_resources"`
|
||||
EdgeMediaToCaption *EdgeMediaToCaption `json:"edge_media_to_caption"`
|
||||
EdgeSidecarToChildren *EdgeSidecarToChildren `json:"edge_sidecar_to_children"`
|
||||
DisplayURL string `json:"display_url"`
|
||||
ID string `json:"id"`
|
||||
IsVideo bool `json:"is_video"`
|
||||
MediaPreview string `json:"media_preview"`
|
||||
Shortcode string `json:"shortcode"`
|
||||
TakenAtTimestamp int `json:"taken_at_timestamp"`
|
||||
Title string `json:"title"`
|
||||
VideoURL string `json:"video_url"`
|
||||
VideoViewCount int `json:"video_view_count"`
|
||||
}
|
||||
|
||||
type Posts struct {
|
||||
Src string `json:"src"`
|
||||
Srcset string `json:"srcset"`
|
||||
}
|
||||
|
||||
type Context struct {
|
||||
AltText string `json:"alt_text"`
|
||||
Caption string `json:"caption"`
|
||||
CaptionTitleLinkified string `json:"caption_title_linkified"`
|
||||
DisplaySrc string `json:"display_src"`
|
||||
DisplaySrcset string `json:"display_srcset"`
|
||||
IsIgtv bool `json:"is_igtv"`
|
||||
LikesCount int `json:"likes_count"`
|
||||
Media *Media `json:"media"`
|
||||
MediaPermalink string `json:"media_permalink"`
|
||||
RequestID string `json:"request_id"`
|
||||
Shortcode string `json:"shortcode"`
|
||||
Title string `json:"title"`
|
||||
Type string `json:"type"`
|
||||
Username string `json:"username"`
|
||||
Verified bool `json:"verified"`
|
||||
VideoViews int `json:"video_views"`
|
||||
}
|
||||
|
||||
type IGramResponse struct {
|
||||
Items []*IGramMedia `json:"items"`
|
||||
}
|
||||
|
||||
type IGramMedia struct {
|
||||
URL []*MediaURL `json:"url"`
|
||||
URL []*IGramMediaURL `json:"url"`
|
||||
Thumb string `json:"thumb"`
|
||||
Hosting string `json:"hosting"`
|
||||
Timestamp int `json:"timestamp"`
|
||||
}
|
||||
|
||||
type MediaURL struct {
|
||||
type IGramMediaURL struct {
|
||||
URL string `json:"url"`
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type"`
|
||||
|
|
|
@ -1,13 +1,15 @@
|
|||
package instagram
|
||||
|
||||
import (
|
||||
"crypto/rand"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"govd/enums"
|
||||
"govd/models"
|
||||
"govd/util"
|
||||
"html"
|
||||
"io"
|
||||
"math/big"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"regexp"
|
||||
|
@ -16,11 +18,21 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/bytedance/sonic"
|
||||
"github.com/titanous/json5"
|
||||
)
|
||||
|
||||
const (
|
||||
graphQLEndpoint = "https://www.instagram.com/graphql/query/"
|
||||
polarisAction = "PolarisPostActionLoadPostQueryQuery"
|
||||
|
||||
igramHostname = "api.igram.world"
|
||||
igramKey = "aaeaf2805cea6abef3f9d2b6a666fce62fd9d612a43ab772bb50ce81455112e0"
|
||||
igramTimestamp = "1742201548873"
|
||||
)
|
||||
|
||||
var (
|
||||
captionPattern = regexp.MustCompile(
|
||||
`(?s)<meta property="og:title" content=".*?: "(.*?)""`)
|
||||
embedPattern = regexp.MustCompile(
|
||||
`new ServerJS\(\)\);s\.handle\(({.*})\);requireLazy`)
|
||||
|
||||
igHeaders = map[string]string{
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
||||
|
@ -40,12 +52,129 @@ var (
|
|||
}
|
||||
)
|
||||
|
||||
func BuildSignedPayload(contentURL string) (io.Reader, error) {
|
||||
func ParseGQLMedia(
|
||||
ctx *models.DownloadContext,
|
||||
data *Media,
|
||||
) ([]*models.Media, error) {
|
||||
var mediaList []*models.Media
|
||||
|
||||
var caption string
|
||||
if data.EdgeMediaToCaption != nil && len(data.EdgeMediaToCaption.Edges) > 0 {
|
||||
caption = data.EdgeMediaToCaption.Edges[0].Node.Text
|
||||
}
|
||||
|
||||
mediaType := data.Typename
|
||||
contentID := ctx.MatchedContentID
|
||||
contentURL := ctx.MatchedContentURL
|
||||
|
||||
switch mediaType {
|
||||
case "GraphVideo", "XDTGraphVideo":
|
||||
media := ctx.Extractor.NewMedia(contentID, contentURL)
|
||||
media.SetCaption(caption)
|
||||
|
||||
media.AddFormat(&models.MediaFormat{
|
||||
FormatID: "video",
|
||||
Type: enums.MediaTypeVideo,
|
||||
VideoCodec: enums.MediaCodecAVC,
|
||||
AudioCodec: enums.MediaCodecAAC,
|
||||
URL: []string{data.VideoURL},
|
||||
Thumbnail: []string{data.DisplayURL},
|
||||
Width: int64(data.Dimensions.Width),
|
||||
Height: int64(data.Dimensions.Height),
|
||||
})
|
||||
|
||||
mediaList = append(mediaList, media)
|
||||
|
||||
case "GraphImage", "XDTGraphImage":
|
||||
media := ctx.Extractor.NewMedia(contentID, contentURL)
|
||||
media.SetCaption(caption)
|
||||
|
||||
media.AddFormat(&models.MediaFormat{
|
||||
FormatID: "image",
|
||||
Type: enums.MediaTypePhoto,
|
||||
URL: []string{data.DisplayURL},
|
||||
})
|
||||
|
||||
mediaList = append(mediaList, media)
|
||||
|
||||
case "GraphSidecar", "XDTGraphSidecar":
|
||||
if data.EdgeSidecarToChildren != nil && len(data.EdgeSidecarToChildren.Edges) > 0 {
|
||||
for _, edge := range data.EdgeSidecarToChildren.Edges {
|
||||
node := edge.Node
|
||||
media := ctx.Extractor.NewMedia(contentID, contentURL)
|
||||
media.SetCaption(caption)
|
||||
|
||||
switch node.Typename {
|
||||
case "GraphVideo", "XDTGraphVideo":
|
||||
media.AddFormat(&models.MediaFormat{
|
||||
FormatID: "video",
|
||||
Type: enums.MediaTypeVideo,
|
||||
VideoCodec: enums.MediaCodecAVC,
|
||||
AudioCodec: enums.MediaCodecAAC,
|
||||
URL: []string{node.VideoURL},
|
||||
Thumbnail: []string{node.DisplayURL},
|
||||
Width: int64(node.Dimensions.Width),
|
||||
Height: int64(node.Dimensions.Height),
|
||||
})
|
||||
|
||||
case "GraphImage", "XDTGraphImage":
|
||||
|
||||
media.AddFormat(&models.MediaFormat{
|
||||
FormatID: "image",
|
||||
Type: enums.MediaTypePhoto,
|
||||
URL: []string{node.DisplayURL},
|
||||
})
|
||||
}
|
||||
|
||||
mediaList = append(mediaList, media)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return mediaList, nil
|
||||
}
|
||||
|
||||
func ParseEmbedGQL(
|
||||
body []byte,
|
||||
) (*Media, error) {
|
||||
match := embedPattern.FindStringSubmatch(string(body))
|
||||
if len(match) < 2 {
|
||||
return nil, fmt.Errorf("failed to find JSON in response")
|
||||
}
|
||||
jsonData := match[1]
|
||||
|
||||
var data map[string]interface{}
|
||||
if err := json5.Unmarshal([]byte(jsonData), &data); err != nil {
|
||||
return nil, fmt.Errorf("failed to unmarshal JSON: %w", err)
|
||||
}
|
||||
igCtx := util.TraverseJSON(data, "contextJSON")
|
||||
if igCtx == nil {
|
||||
return nil, fmt.Errorf("contextJSON not found in data")
|
||||
}
|
||||
var ctxJSON ContextJSON
|
||||
switch v := igCtx.(type) {
|
||||
case string:
|
||||
if err := json5.Unmarshal([]byte(v), &ctxJSON); err != nil {
|
||||
return nil, fmt.Errorf("failed to unmarshal contextJSON: %w", err)
|
||||
}
|
||||
default:
|
||||
return nil, fmt.Errorf("contextJSON is not a string")
|
||||
}
|
||||
if ctxJSON.GqlData == nil {
|
||||
return nil, fmt.Errorf("gql_data is nil")
|
||||
}
|
||||
if ctxJSON.GqlData.ShortcodeMedia == nil {
|
||||
return nil, fmt.Errorf("media is nil")
|
||||
}
|
||||
return ctxJSON.GqlData.ShortcodeMedia, nil
|
||||
}
|
||||
|
||||
func BuildIGramPayload(contentURL string) (io.Reader, error) {
|
||||
timestamp := strconv.FormatInt(time.Now().UnixMilli(), 10)
|
||||
hash := sha256.New()
|
||||
_, err := io.WriteString(
|
||||
hash,
|
||||
contentURL+timestamp+apiKey,
|
||||
contentURL+timestamp+igramKey,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error writing to SHA256 hash: %w", err)
|
||||
|
@ -56,7 +185,7 @@ func BuildSignedPayload(contentURL string) (io.Reader, error) {
|
|||
payload := map[string]string{
|
||||
"url": contentURL,
|
||||
"ts": timestamp,
|
||||
"_ts": apiTimestamp,
|
||||
"_ts": igramTimestamp,
|
||||
"_tsc": "0", // ?
|
||||
"_s": secretString,
|
||||
}
|
||||
|
@ -69,15 +198,14 @@ func BuildSignedPayload(contentURL string) (io.Reader, error) {
|
|||
}
|
||||
|
||||
func ParseIGramResponse(body []byte) (*IGramResponse, error) {
|
||||
var rawResponse interface{}
|
||||
//move to the start of the body
|
||||
// Use sonic's decoder to unmarshal the raw response
|
||||
var rawResponse any
|
||||
|
||||
if err := sonic.ConfigFastest.Unmarshal(body, &rawResponse); err != nil {
|
||||
return nil, fmt.Errorf("failed to decode response1: %w", err)
|
||||
}
|
||||
|
||||
switch rawResponse.(type) {
|
||||
case []interface{}:
|
||||
case []any:
|
||||
// array of IGramMedia
|
||||
var media []*IGramMedia
|
||||
if err := sonic.ConfigFastest.Unmarshal(body, &media); err != nil {
|
||||
|
@ -86,7 +214,7 @@ func ParseIGramResponse(body []byte) (*IGramResponse, error) {
|
|||
return &IGramResponse{
|
||||
Items: media,
|
||||
}, nil
|
||||
case map[string]interface{}:
|
||||
case map[string]any:
|
||||
// single IGramMedia
|
||||
var media IGramMedia
|
||||
if err := sonic.ConfigFastest.Unmarshal(body, &media); err != nil {
|
||||
|
@ -113,53 +241,142 @@ func GetCDNURL(contentURL string) (string, error) {
|
|||
return cdnURL, nil
|
||||
}
|
||||
|
||||
func GetPostCaption(
|
||||
client models.HTTPClient,
|
||||
postURL string,
|
||||
) (string, error) {
|
||||
func GetGQLData(
|
||||
ctx *models.DownloadContext,
|
||||
shortcode string,
|
||||
) (*GraphQLData, error) {
|
||||
session := util.GetHTTPClient(ctx.Extractor.CodeName)
|
||||
graphHeaders, body, err := BuildGQLData()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to build GQL data: %w", err)
|
||||
}
|
||||
formData := url.Values{}
|
||||
for key, value := range body {
|
||||
formData.Set(key, value)
|
||||
}
|
||||
formData.Set("fb_api_caller_class", "RelayModern")
|
||||
formData.Set("fb_api_req_friendly_name", polarisAction)
|
||||
variables := map[string]any{
|
||||
"shortcode": shortcode,
|
||||
"fetch_tagged_user_count": nil,
|
||||
"hoisted_comment_id": nil,
|
||||
"hoisted_reply_id": nil,
|
||||
}
|
||||
variablesJSON, err := sonic.ConfigFastest.Marshal(variables)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to marshal variables: %w", err)
|
||||
}
|
||||
formData.Set("variables", string(variablesJSON))
|
||||
formData.Set("server_timestamps", "true")
|
||||
formData.Set("doc_id", "8845758582119845") // idk what this is
|
||||
req, err := http.NewRequest(
|
||||
http.MethodGet,
|
||||
postURL,
|
||||
nil,
|
||||
http.MethodPost,
|
||||
graphQLEndpoint,
|
||||
strings.NewReader(formData.Encode()),
|
||||
)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to create request: %w", err)
|
||||
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
req.Header.Set("User-Agent", util.ChromeUA)
|
||||
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
|
||||
req.Header.Set("Accept-Language", "it-IT,it;q=0.8,en-US;q=0.5,en;q=0.3")
|
||||
req.Header.Set("Referer", "https://www.instagram.com/accounts/onetap/?next=%2F")
|
||||
req.Header.Set("Alt-Used", "www.instagram.com")
|
||||
req.Header.Set("Connection", "keep-alive")
|
||||
req.Header.Set("Upgrade-Insecure-Requests", "1")
|
||||
req.Header.Set("Sec-Fetch-Dest", "document")
|
||||
req.Header.Set("Sec-Fetch-Mode", "navigate")
|
||||
req.Header.Set("Sec-Fetch-Site", "same-origin")
|
||||
req.Header.Set("Priority", "u=0, i")
|
||||
req.Header.Set("Pragma", "no-cache")
|
||||
req.Header.Set("Cache-Control", "no-cache")
|
||||
req.Header.Set("TE", "trailers")
|
||||
|
||||
resp, err := client.Do(req)
|
||||
for key, value := range igHeaders {
|
||||
req.Header.Set(key, value)
|
||||
}
|
||||
for key, value := range graphHeaders {
|
||||
req.Header.Set(key, value)
|
||||
}
|
||||
resp, err := session.Do(req)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to send request: %w", err)
|
||||
return nil, fmt.Errorf("failed to send request: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
// return an empty caption
|
||||
// probably 429 error
|
||||
return "", nil
|
||||
return nil, fmt.Errorf("invalid response code: %s", resp.Status)
|
||||
}
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to read response body: %w", err)
|
||||
var response GraphQLResponse
|
||||
decoder := sonic.ConfigFastest.NewDecoder(resp.Body)
|
||||
if err := decoder.Decode(&response); err != nil {
|
||||
return nil, fmt.Errorf("failed to parse response: %w", err)
|
||||
}
|
||||
|
||||
matches := captionPattern.FindStringSubmatch(string(body))
|
||||
if len(matches) < 2 {
|
||||
// post has no caption most likely
|
||||
return "", nil
|
||||
if response.Data == nil {
|
||||
return nil, fmt.Errorf("data is nil")
|
||||
}
|
||||
return html.UnescapeString(matches[1]), nil
|
||||
if response.Status != "ok" {
|
||||
return nil, fmt.Errorf("status is not ok: %s", response.Status)
|
||||
}
|
||||
if response.Data.ShortcodeMedia == nil {
|
||||
return nil, fmt.Errorf("media is nil")
|
||||
}
|
||||
return response.Data, nil
|
||||
}
|
||||
|
||||
func BuildGQLData() (map[string]string, map[string]string, error) {
|
||||
const (
|
||||
domain = "www"
|
||||
requestID = "b"
|
||||
clientCapabilityGrade = "EXCELLENT"
|
||||
sessionInternalID = "7436540909012459023"
|
||||
apiVersion = "1"
|
||||
rolloutHash = "1019933358"
|
||||
appID = "936619743392459"
|
||||
bloksVersionID = "6309c8d03d8a3f47a1658ba38b304a3f837142ef5f637ebf1f8f52d4b802951e"
|
||||
asbdID = "129477"
|
||||
hiddenState = "20126.HYP:instagram_web_pkg.2.1...0"
|
||||
loggedIn = "0"
|
||||
cometRequestID = "7"
|
||||
appVersion = "0"
|
||||
pixelRatio = "2"
|
||||
buildType = "trunk"
|
||||
)
|
||||
session := "::" + util.RandomAlphaString(6)
|
||||
sessionData := util.RandomBase64(8)
|
||||
csrfToken := util.RandomBase64(32)
|
||||
deviceID := util.RandomBase64(24)
|
||||
machineID := util.RandomBase64(24)
|
||||
dynamicFlags := util.RandomBase64(154)
|
||||
clientSessionRnd := util.RandomBase64(154)
|
||||
jazoestBig, err := rand.Int(rand.Reader, big.NewInt(10000))
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("failed to generate jazoest: %w", err)
|
||||
}
|
||||
jazoest := strconv.FormatInt(jazoestBig.Int64()+1, 10)
|
||||
timestamp := strconv.FormatInt(time.Now().Unix(), 10)
|
||||
cookies := []string{
|
||||
"csrftoken=" + csrfToken,
|
||||
"ig_did=" + deviceID,
|
||||
"wd=1280x720",
|
||||
"dpr=2",
|
||||
"mid=" + machineID,
|
||||
"ig_nrcb=1",
|
||||
}
|
||||
headers := map[string]string{
|
||||
"x-ig-app-id": appID,
|
||||
"X-FB-LSD": sessionData,
|
||||
"X-CSRFToken": csrfToken,
|
||||
"X-Bloks-Version-Id": bloksVersionID,
|
||||
"x-asbd-id": asbdID,
|
||||
"cookie": strings.Join(cookies, "; "),
|
||||
"Content-Type": "application/x-www-form-urlencoded",
|
||||
"X-FB-Friendly-Name": polarisAction,
|
||||
}
|
||||
body := map[string]string{
|
||||
"__d": domain,
|
||||
"__a": apiVersion,
|
||||
"__s": session,
|
||||
"__hs": hiddenState,
|
||||
"__req": requestID,
|
||||
"__ccg": clientCapabilityGrade,
|
||||
"__rev": rolloutHash,
|
||||
"__hsi": sessionInternalID,
|
||||
"__dyn": dynamicFlags,
|
||||
"__csr": clientSessionRnd,
|
||||
"__user": loggedIn,
|
||||
"__comet_req": cometRequestID,
|
||||
"av": appVersion,
|
||||
"dpr": pixelRatio,
|
||||
"lsd": sessionData,
|
||||
"jazoest": jazoest,
|
||||
"__spin_r": rolloutHash,
|
||||
"__spin_b": buildType,
|
||||
"__spin_t": timestamp,
|
||||
}
|
||||
return headers, body, nil
|
||||
}
|
||||
|
|
3
go.mod
3
go.mod
|
@ -13,6 +13,7 @@ require (
|
|||
github.com/strukturag/libheif v1.19.7
|
||||
github.com/u2takey/ffmpeg-go v0.5.0
|
||||
golang.org/x/image v0.26.0
|
||||
gopkg.in/yaml.v3 v3.0.1
|
||||
gorm.io/gorm v1.25.12
|
||||
)
|
||||
|
||||
|
@ -26,7 +27,6 @@ require (
|
|||
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
|
||||
golang.org/x/arch v0.0.0-20210923205945-b76863e36670 // indirect
|
||||
gopkg.in/yaml.v2 v2.4.0 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
)
|
||||
|
||||
require (
|
||||
|
@ -38,6 +38,7 @@ require (
|
|||
github.com/jinzhu/now v1.1.5 // indirect
|
||||
github.com/jmespath/go-jmespath v0.4.0 // indirect
|
||||
github.com/pkg/errors v0.9.1
|
||||
github.com/titanous/json5 v1.0.0
|
||||
github.com/u2takey/go-utils v0.3.1 // indirect
|
||||
golang.org/x/text v0.24.0 // indirect
|
||||
gorm.io/driver/mysql v1.5.7
|
||||
|
|
9
go.sum
9
go.sum
|
@ -51,6 +51,8 @@ github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+o
|
|||
github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4=
|
||||
github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
|
||||
github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
|
||||
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
|
||||
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
|
||||
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
|
||||
|
@ -61,6 +63,8 @@ github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
|
|||
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/robertkrimen/otto v0.2.1 h1:FVP0PJ0AHIjC+N4pKCG9yCDz6LHNPCwi/GKID5pGGF0=
|
||||
github.com/robertkrimen/otto v0.2.1/go.mod h1:UPwtJ1Xu7JrLcZjNWN8orJaM5n5YEtqL//farB5FlRY=
|
||||
github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
|
||||
|
@ -76,6 +80,8 @@ github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOf
|
|||
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||
github.com/strukturag/libheif v1.19.7 h1:XMfSJvmnucTbiS6CSxxZmpx5XSPjdqkpA3wiL6+I2Iw=
|
||||
github.com/strukturag/libheif v1.19.7/go.mod h1:E/PNRlmVtrtj9j2AvBZlrO4dsBDu6KfwDZn7X1Ce8Ks=
|
||||
github.com/titanous/json5 v1.0.0 h1:hJf8Su1d9NuI/ffpxgxQfxh/UiBFZX7bMPid0rIL/7s=
|
||||
github.com/titanous/json5 v1.0.0/go.mod h1:7JH1M8/LHKc6cyP5o5g3CSaRj+mBrIimTxzpvmckH8c=
|
||||
github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
|
||||
github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
|
||||
github.com/u2takey/ffmpeg-go v0.5.0 h1:r7d86XuL7uLWJ5mzSeQ03uvjfIhiJYvsRAJFCW4uklU=
|
||||
|
@ -104,7 +110,10 @@ golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU=
|
|||
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/tools v0.0.0-20181030221726-6c7e314b6563/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/sourcemap.v1 v1.0.5 h1:inv58fC9f9J3TK2Y2R1NPntXEn3/wjWHkonhIUODNTI=
|
||||
gopkg.in/sourcemap.v1 v1.0.5/go.mod h1:2RlvNNSMglmRrcvhfuzp4hQHwOtjxlbjX7UPY/GXb78=
|
||||
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
gopkg.in/yaml.v2 v2.2.7/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
|
|
|
@ -2,7 +2,6 @@ package util
|
|||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"govd/config"
|
||||
"govd/models"
|
||||
|
@ -14,6 +13,8 @@ import (
|
|||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/bytedance/sonic"
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -235,7 +236,7 @@ func parseProxyResponse(proxyResp *http.Response, originalReq *http.Request) (*h
|
|||
}
|
||||
|
||||
var response models.ProxyResponse
|
||||
if err := json.Unmarshal(body, &response); err != nil {
|
||||
if err := sonic.ConfigFastest.Unmarshal(body, &response); err != nil {
|
||||
return nil, fmt.Errorf("error parsing proxy response: %w", err)
|
||||
}
|
||||
|
||||
|
|
39
util/misc.go
39
util/misc.go
|
@ -1,6 +1,7 @@
|
|||
package util
|
||||
|
||||
import (
|
||||
"crypto/rand"
|
||||
"fmt"
|
||||
"govd/models"
|
||||
"net/http"
|
||||
|
@ -95,6 +96,44 @@ func GetLastError(err error) error {
|
|||
return lastErr
|
||||
}
|
||||
|
||||
func RandomBase64(length int) string {
|
||||
const letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"
|
||||
const mask = 63 // 6 bits, since len(letters) == 64
|
||||
|
||||
result := make([]byte, length)
|
||||
random := make([]byte, length)
|
||||
_, err := rand.Read(random)
|
||||
if err != nil {
|
||||
return strings.Repeat("A", length)
|
||||
}
|
||||
for i, b := range random {
|
||||
result[i] = letters[int(b)&mask]
|
||||
}
|
||||
return string(result)
|
||||
}
|
||||
|
||||
func RandomAlphaString(length int) string {
|
||||
const letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
|
||||
const lettersLen = byte(len(letters))
|
||||
const maxByte = 255 - (255 % lettersLen) // 255 - (255 % 52) = 255 - 47 = 208
|
||||
|
||||
result := make([]byte, length)
|
||||
i := 0
|
||||
for i < length {
|
||||
b := make([]byte, 1)
|
||||
_, err := rand.Read(b)
|
||||
if err != nil {
|
||||
return strings.Repeat("a", length)
|
||||
}
|
||||
if b[0] > maxByte {
|
||||
continue // avoid bias
|
||||
}
|
||||
result[i] = letters[b[0]%lettersLen]
|
||||
i++
|
||||
}
|
||||
return string(result)
|
||||
}
|
||||
|
||||
func ParseCookieFile(fileName string) ([]*http.Cookie, error) {
|
||||
cachedCookies, ok := cookiesCache[fileName]
|
||||
if ok {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue