diff --git a/README.md b/README.md index ce86e22..33ef7dd 100644 --- a/README.md +++ b/README.md @@ -1,93 +1,115 @@ # govd - a telegram bot for downloading media from various platforms -this project was born after the discontinuation of a highly popular bot known as UVD, and draws significant inspiration from [yt-dlp](https://github.com/yt-dlp/yt-dlp) +this project was born after the discontinuation of a highly popular bot known as uvd, and draws significant inspiration from [yt-dlp](https://github.com/yt-dlp/yt-dlp) - official instance: [@govd_bot](https://t.me/govd_bot) - support group: [govdsupport](https://t.me/govdsupport) -## features +--- -- download media from various platforms -- download videos, photos, and audio -- inline mode support -- group chat support with customizable settings -- media caption support +* [dependencies](#dependencies) +* [installation](#installation) + * [build](#build) + * [docker](#docker-recommended) +* [options](#options) +* [authentication](#authentication) +* [proxying](#proxying) +* [todo](#todo) -## dependencies - -- ffmpeg >= 6.1.1 +# dependencies +- ffmpeg >= 7.x **(*)** - libheif >= 1.19.7 - pkg-config - mysql or mariadb -## installation +**note:** libav shared libraries must be installed on the system in order to build the bot. -```bash -git clone https://github.com/govdbot/govd.git -cd govd -# edit .env file with your bot token and database credentials -sh build.sh -``` +# installation +## build +_this method only works on linux and macos, if you want to build the bot on windows, check [docker installation](#installation-with-docker) instead._ -## installation with docker +1. clone the repository + ```bash + git clone https://github.com/govdbot/govd.git && cd govd + ``` -first build the image using the dockerfile +2. edit the `.env` file to set the database properties. + for enhanced security, it is recommended to change the `DB_PASSWORD` property in the `.env` file. -```bash -docker build -t govd-bot . -``` +3. make sure your database is up and running. -next, update the .env file to ensure the database properties match the environment variables defined for the MariaDB service in the docker-compose.yml file -(while the default environment variables defined for the MariaDB service are acceptable, it is recommended to change the `MYSQL_PASSWORD` property in the docker-compose.yaml file for enhanced security and ensure that you also modify the the `DB_PASSWORD` property in the .env file to reflect this change) +4. build and run the bot: -the following line in the .env file MUST be set as shown below + ```bash + sh build.sh && ./govd + ``` -```env -DB_HOST=db -``` +## docker (recommended) +> [!WARNING] +> this method is currently not working due to a wrong version of the libav (ffmpeg) library in the docker image. feel free to open a PR if you can fix it. -finally run the compose to start all services +1. build the image using the dockerfile: -```bash -docker compose up -d -``` + ```bash + docker build -t govd-bot . + ``` -## env variables +2. update the `.env` file to ensure the database properties match the environment variables defined for the mariadb service in the `docker-compose.yml` file. + for enhanced security, it is recommended to change the `MYSQL_PASSWORD` property in `docker-compose.yaml` and ensure `DB_PASSWORD` in `.env` matches it. -| variable | description | default | -|-----------------------|--------------------------------------------------|----------------------------------------| -| `DB_HOST` | database host | `localhost` | -| `DB_PORT` | database port | `3306` | -| `DB_NAME` | database name | `govd` | -| `DB_USER` | database user | `govd` | -| `DB_PASSWORD` | database password | `password` | -| `BOT_API_URL`* | telegram bot api url | `https://api.telegram.org` | -| `BOT_TOKEN` | telegram bot token | `12345678:ABC-DEF1234ghIkl-zyx57W2P0s` | -| `CONCURRENT_UPDATES` | max concurrent updates handled by the bot | `50` | -| `LOG_DISPATCHER_ERRORS` | log dispatcher errors | `0` | -| `DOWNLOADS_DIR` | directory for downloaded files | `downloads` | -| `HTTP_PROXY` | http proxy (optional) | | -| `HTTPS_PROXY` | http proxy (optional) | | -| `NO_PROXY` | no proxy domains (optional) | | -| `REPO_URL` | project repository url | `https://github.com/govdbot/govd` | -| `PROFILER_PORT` | port for profiler http server (pprof) | `0` _(disabled)_ | + the following line in the `.env` file **must** be set as: -**note:** -to avoid limits on files, you should host your own telegram botapi. public bot instance is currently running under a botapi fork, [tdlight-telegram-bot-api](https://github.com/tdlight-team/tdlight-telegram-bot-api), but you can use the official botapi client too. + ``` + DB_HOST=db + ``` -## cookies +3. run the compose to start all services: -some extractors require cookies for download. to add your cookies, just insert a txt file in cookies folder (netscape format) + ```bash + docker compose up -d + ``` -## todo +# options +| variable | description | default | +|------------------------|----------------------------------------------|---------------------------------------| +| DB_HOST | database host | localhost | +| DB_PORT | database port | 3306 | +| DB_NAME | database name | govd | +| DB_USER | database user | govd | +| DB_PASSWORD | database password | password | +| BOT_API_URL | telegram bot api url | https://api.telegram.org | +| BOT_TOKEN | telegram bot token | 12345678:ABC-DEF1234ghIkl-zyx57W2P0s | +| CONCURRENT_UPDATES | max concurrent updates handled | 50 | +| LOG_DISPATCHER_ERRORS | log dispatcher errors | 0 | +| DOWNLOADS_DIR | directory for downloaded files | downloads | +| HTTP_PROXY [(?)](#proxying) | http proxy (optional) | | +| HTTPS_PROXY [(?)](#proxying) | https proxy (optional) | | +| NO_PROXY [(?)](#proxying) | no proxy domains (optional) | | +| EDGE_PROXY_URL [(?)](#proxying) | url of your edge proxy url (optional) | | +| REPO_URL | project repository url | https://github.com/govdbot/govd | +| PROFILER_PORT | port for profiler http server (pprof) | 0 _(disabled)_ | +**note:** to avoid limits on files, you should host your own telegram botapi. public bot instance is currently running under a botapi fork, [tdlight-telegram-bot-api](https://github.com/tdlight-team/tdlight-telegram-bot-api), but you can use the official botapi client too. + +# proxying +there are two types of proxying available: http and edge. +- **http proxy**: this is a standard http proxy that can be used to route requests through a proxy server. you can set the `HTTP_PROXY` and `HTTPS_PROXY` environment variables to use this feature. (SOCKS5 is supported too) +- **edge proxy**: this is a custom proxy that is used to route requests through a specific url. you can set the `EDGE_PROXY_URL` environment variable to use this feature. this is useful for routing requests through a specific server or service. howver, this feature is not totally implemented yet. + +**note:** by settings `NO_PROXY` environment variable, you can specify domains that should not be proxied. + +# authentication +some extractors require authentication to access the content. you can easily use cookies for that; simply export cookies from your browser in netscape format and place them in cookies folder (e.g. `cookies/reddit.txt`). you can easily export cookies using _Get cookies.txt LOCALLY_ extension for your browser. + +# todo - [ ] add more extractors - [ ] switch to native libav - [ ] add tests - [ ] improve error handling -- [ ] add support for telegram wehbhooks +- [ ] add support for telegram webhooks - [ ] switch to pgsql (?) -- [ ] better API (?) -- [ ] better docs with multiple README +- [ ] better api (?) +- [ ] better docs with multiple readme + +--- \ No newline at end of file diff --git a/ext/instagram/main.go b/ext/instagram/main.go index 81d06cc..0fb3add 100644 --- a/ext/instagram/main.go +++ b/ext/instagram/main.go @@ -92,6 +92,8 @@ var ShareURLExtractor = &models.Extractor{ IsRedirect: true, Run: func(ctx *models.DownloadContext) (*models.ExtractorResponse, error) { + // temporary fix for public instances + edgeProxyClient := util.GetEdgeProxyClient() req, err := http.NewRequest( http.MethodGet, ctx.MatchedContentURL, @@ -100,14 +102,12 @@ var ShareURLExtractor = &models.Extractor{ if err != nil { return nil, fmt.Errorf("failed to create request: %w", err) } - for k, v := range igHeaders { - req.Header.Set(k, v) - } - resp, err := httpSession.Do(req) + resp, err := edgeProxyClient.Do(req) if err != nil { return nil, fmt.Errorf("failed to send request: %w", err) } defer resp.Body.Close() + return &models.ExtractorResponse{ URL: resp.Request.URL.String(), }, nil diff --git a/models/edgeproxy.go b/models/edgeproxy.go new file mode 100644 index 0000000..6f139df --- /dev/null +++ b/models/edgeproxy.go @@ -0,0 +1,9 @@ +package models + +type ProxyResponse struct { + URL string `json:"url"` + StatusCode int `json:"status_code"` + Text string `json:"text"` + Headers map[string]string `json:"headers"` + Cookies []string `json:"cookies"` +} diff --git a/util/edgeproxy.go b/util/edgeproxy.go new file mode 100644 index 0000000..d6641db --- /dev/null +++ b/util/edgeproxy.go @@ -0,0 +1,111 @@ +package util + +import ( + "bytes" + "encoding/json" + "fmt" + "govd/models" + "io" + "net/http" + "net/url" + "os" + "sync" + "time" +) + +var ( + edgeProxyClient *EdgeProxyClient + edgeProxyClientOnce sync.Once +) + +type EdgeProxyClient struct { + *http.Client +} + +func GetEdgeProxyClient() *EdgeProxyClient { + edgeProxyClientOnce.Do(func() { + edgeProxyClient = &EdgeProxyClient{ + Client: &http.Client{ + Transport: baseTransport, + Timeout: 60 * time.Second, + }, + } + }) + return edgeProxyClient +} + +func (c *EdgeProxyClient) Do(req *http.Request) (*http.Response, error) { + proxyURL := os.Getenv("EDGE_PROXY_URL") + if proxyURL == "" { + return nil, fmt.Errorf("EDGE_PROXY_URL environment variable is not set") + } + targetURL := req.URL.String() + encodedURL := url.QueryEscape(targetURL) + proxyURLWithParam := proxyURL + "?url=" + encodedURL + + var bodyBytes []byte + var err error + + if req.Body != nil { + bodyBytes, err = io.ReadAll(req.Body) + if err != nil { + return nil, fmt.Errorf("error reading request body: %w", err) + } + req.Body.Close() + req.Body = io.NopCloser(bytes.NewBuffer(bodyBytes)) + } + + proxyReq, err := http.NewRequest( + req.Method, + proxyURLWithParam, + bytes.NewBuffer(bodyBytes), + ) + if err != nil { + return nil, fmt.Errorf("error creating proxy request: %w", err) + } + + for name, values := range req.Header { + for _, value := range values { + proxyReq.Header.Add(name, value) + } + } + + proxyResp, err := c.Client.Do(proxyReq) + if err != nil { + return nil, fmt.Errorf("proxy request failed: %w", err) + } + defer proxyResp.Body.Close() + + body, err := io.ReadAll(proxyResp.Body) + if err != nil { + return nil, fmt.Errorf("error reading proxy response: %w", err) + } + + var response models.ProxyResponse + if err := json.Unmarshal(body, &response); err != nil { + return nil, fmt.Errorf("error parsing proxy response: %w", err) + } + + resp := &http.Response{ + StatusCode: response.StatusCode, + Status: fmt.Sprintf("%d %s", response.StatusCode, http.StatusText(response.StatusCode)), + Body: io.NopCloser(bytes.NewBufferString(response.Text)), + Header: make(http.Header), + Request: req, + } + parsedResponseURL, err := url.Parse(response.URL) + if err != nil { + return nil, fmt.Errorf("error parsing response URL: %w", err) + } + resp.Request.URL = parsedResponseURL + + for name, value := range response.Headers { + resp.Header.Set(name, value) + } + + for _, cookie := range response.Cookies { + resp.Header.Add("Set-Cookie", cookie) + } + + return resp, nil +} diff --git a/util/http.go b/util/http.go index f03a723..e88e207 100644 --- a/util/http.go +++ b/util/http.go @@ -10,34 +10,28 @@ import ( var ( httpSession *http.Client httpSessionOnce sync.Once + baseTransport = &http.Transport{ + Proxy: http.ProxyFromEnvironment, + DialContext: (&net.Dialer{ + Timeout: 30 * time.Second, + KeepAlive: 30 * time.Second, + }).DialContext, + ForceAttemptHTTP2: true, + MaxIdleConns: 100, + IdleConnTimeout: 90 * time.Second, + TLSHandshakeTimeout: 5 * time.Second, + ExpectContinueTimeout: 1 * time.Second, + MaxIdleConnsPerHost: 100, + MaxConnsPerHost: 100, + ResponseHeaderTimeout: 10 * time.Second, + DisableCompression: false, + } ) func GetHTTPSession() *http.Client { httpSessionOnce.Do(func() { - transport := &http.Transport{ - Proxy: http.ProxyFromEnvironment, - DialContext: (&net.Dialer{ - Timeout: 30 * time.Second, - KeepAlive: 30 * time.Second, - }).DialContext, - ForceAttemptHTTP2: true, - - MaxIdleConns: 100, - IdleConnTimeout: 90 * time.Second, - - TLSHandshakeTimeout: 5 * time.Second, - ExpectContinueTimeout: 1 * time.Second, - - MaxIdleConnsPerHost: 100, - MaxConnsPerHost: 100, - - ResponseHeaderTimeout: 10 * time.Second, - - DisableCompression: false, - } - httpSession = &http.Client{ - Transport: transport, + Transport: baseTransport, Timeout: 60 * time.Second, } })