gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

client.go (10762B)


      1 // GoToSocial
      2 // Copyright (C) GoToSocial Authors admin@gotosocial.org
      3 // SPDX-License-Identifier: AGPL-3.0-or-later
      4 //
      5 // This program is free software: you can redistribute it and/or modify
      6 // it under the terms of the GNU Affero General Public License as published by
      7 // the Free Software Foundation, either version 3 of the License, or
      8 // (at your option) any later version.
      9 //
     10 // This program is distributed in the hope that it will be useful,
     11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
     12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13 // GNU Affero General Public License for more details.
     14 //
     15 // You should have received a copy of the GNU Affero General Public License
     16 // along with this program.  If not, see <http://www.gnu.org/licenses/>.
     17 
     18 package httpclient
     19 
     20 import (
     21 	"context"
     22 	"errors"
     23 	"fmt"
     24 	"io"
     25 	"net"
     26 	"net/http"
     27 	"net/netip"
     28 	"runtime"
     29 	"strconv"
     30 	"strings"
     31 	"time"
     32 
     33 	"codeberg.org/gruf/go-bytesize"
     34 	"codeberg.org/gruf/go-byteutil"
     35 	"codeberg.org/gruf/go-cache/v3"
     36 	errorsv2 "codeberg.org/gruf/go-errors/v2"
     37 	"codeberg.org/gruf/go-iotools"
     38 	"codeberg.org/gruf/go-kv"
     39 	"github.com/superseriousbusiness/gotosocial/internal/gtscontext"
     40 	"github.com/superseriousbusiness/gotosocial/internal/gtserror"
     41 	"github.com/superseriousbusiness/gotosocial/internal/log"
     42 )
     43 
     44 var (
     45 	// ErrInvalidRequest is returned if a given HTTP request is invalid and cannot be performed.
     46 	ErrInvalidRequest = errors.New("invalid http request")
     47 
     48 	// ErrInvalidNetwork is returned if the request would not be performed over TCP
     49 	ErrInvalidNetwork = errors.New("invalid network type")
     50 
     51 	// ErrReservedAddr is returned if a dialed address resolves to an IP within a blocked or reserved net.
     52 	ErrReservedAddr = errors.New("dial within blocked / reserved IP range")
     53 
     54 	// ErrBodyTooLarge is returned when a received response body is above predefined limit (default 40MB).
     55 	ErrBodyTooLarge = errors.New("body size too large")
     56 )
     57 
     58 // Config provides configuration details for setting up a new
     59 // instance of httpclient.Client{}. Within are a subset of the
     60 // configuration values passed to initialized http.Transport{}
     61 // and http.Client{}, along with httpclient.Client{} specific.
     62 type Config struct {
     63 	// MaxOpenConnsPerHost limits the max number of open connections to a host.
     64 	MaxOpenConnsPerHost int
     65 
     66 	// MaxIdleConns: see http.Transport{}.MaxIdleConns.
     67 	MaxIdleConns int
     68 
     69 	// ReadBufferSize: see http.Transport{}.ReadBufferSize.
     70 	ReadBufferSize int
     71 
     72 	// WriteBufferSize: see http.Transport{}.WriteBufferSize.
     73 	WriteBufferSize int
     74 
     75 	// MaxBodySize determines the maximum fetchable body size.
     76 	MaxBodySize int64
     77 
     78 	// Timeout: see http.Client{}.Timeout.
     79 	Timeout time.Duration
     80 
     81 	// DisableCompression: see http.Transport{}.DisableCompression.
     82 	DisableCompression bool
     83 
     84 	// AllowRanges allows outgoing communications to given IP nets.
     85 	AllowRanges []netip.Prefix
     86 
     87 	// BlockRanges blocks outgoing communiciations to given IP nets.
     88 	BlockRanges []netip.Prefix
     89 }
     90 
     91 // Client wraps an underlying http.Client{} to provide the following:
     92 //   - setting a maximum received request body size, returning error on
     93 //     large content lengths, and using a limited reader in all other
     94 //     cases to protect against forged / unknown content-lengths
     95 //   - protection from server side request forgery (SSRF) by only dialing
     96 //     out to known public IP prefixes, configurable with allows/blocks
     97 //   - retry-backoff logic for error temporary HTTP error responses
     98 //   - optional request signing
     99 //   - request logging
    100 type Client struct {
    101 	client   http.Client
    102 	badHosts cache.Cache[string, struct{}]
    103 	bodyMax  int64
    104 }
    105 
    106 // New returns a new instance of Client initialized using configuration.
    107 func New(cfg Config) *Client {
    108 	var c Client
    109 
    110 	d := &net.Dialer{
    111 		Timeout:   15 * time.Second,
    112 		KeepAlive: 30 * time.Second,
    113 		Resolver:  &net.Resolver{},
    114 	}
    115 
    116 	if cfg.MaxOpenConnsPerHost <= 0 {
    117 		// By default base this value on GOMAXPROCS.
    118 		maxprocs := runtime.GOMAXPROCS(0)
    119 		cfg.MaxOpenConnsPerHost = maxprocs * 20
    120 	}
    121 
    122 	if cfg.MaxIdleConns <= 0 {
    123 		// By default base this value on MaxOpenConns.
    124 		cfg.MaxIdleConns = cfg.MaxOpenConnsPerHost * 10
    125 	}
    126 
    127 	if cfg.MaxBodySize <= 0 {
    128 		// By default set this to a reasonable 40MB.
    129 		cfg.MaxBodySize = int64(40 * bytesize.MiB)
    130 	}
    131 
    132 	// Protect dialer with IP range sanitizer.
    133 	d.Control = (&sanitizer{
    134 		allow: cfg.AllowRanges,
    135 		block: cfg.BlockRanges,
    136 	}).Sanitize
    137 
    138 	// Prepare client fields.
    139 	c.client.Timeout = cfg.Timeout
    140 	c.bodyMax = cfg.MaxBodySize
    141 
    142 	// Set underlying HTTP client roundtripper.
    143 	c.client.Transport = &http.Transport{
    144 		Proxy:                 http.ProxyFromEnvironment,
    145 		ForceAttemptHTTP2:     true,
    146 		DialContext:           d.DialContext,
    147 		MaxIdleConns:          cfg.MaxIdleConns,
    148 		IdleConnTimeout:       90 * time.Second,
    149 		TLSHandshakeTimeout:   10 * time.Second,
    150 		ExpectContinueTimeout: 1 * time.Second,
    151 		ReadBufferSize:        cfg.ReadBufferSize,
    152 		WriteBufferSize:       cfg.WriteBufferSize,
    153 		DisableCompression:    cfg.DisableCompression,
    154 	}
    155 
    156 	// Initiate outgoing bad hosts lookup cache.
    157 	c.badHosts = cache.New[string, struct{}](0, 1000, 0)
    158 	c.badHosts.SetTTL(time.Hour, false)
    159 	if !c.badHosts.Start(time.Minute) {
    160 		log.Panic(nil, "failed to start transport controller cache")
    161 	}
    162 
    163 	return &c
    164 }
    165 
    166 // Do will essentially perform http.Client{}.Do() with retry-backoff functionality.
    167 func (c *Client) Do(r *http.Request) (*http.Response, error) {
    168 	return c.DoSigned(r, func(r *http.Request) error {
    169 		return nil // no request signing
    170 	})
    171 }
    172 
    173 // DoSigned will essentially perform http.Client{}.Do() with retry-backoff functionality and requesting signing..
    174 func (c *Client) DoSigned(r *http.Request, sign SignFunc) (rsp *http.Response, err error) {
    175 	const (
    176 		// max no. attempts.
    177 		maxRetries = 5
    178 
    179 		// starting backoff duration.
    180 		baseBackoff = 2 * time.Second
    181 	)
    182 
    183 	// First validate incoming request.
    184 	if err := ValidateRequest(r); err != nil {
    185 		return nil, err
    186 	}
    187 
    188 	// Get request hostname.
    189 	host := r.URL.Hostname()
    190 
    191 	// Check whether request should fast fail.
    192 	fastFail := gtscontext.IsFastfail(r.Context())
    193 	if !fastFail {
    194 		// Check if recently reached max retries for this host
    195 		// so we don't bother with a retry-backoff loop. The only
    196 		// errors that are retried upon are server failure, TLS
    197 		// and domain resolution type errors, so this cached result
    198 		// indicates this server is likely having issues.
    199 		fastFail = c.badHosts.Has(host)
    200 		defer func() {
    201 			if err != nil {
    202 				// On error return mark as bad-host.
    203 				c.badHosts.Set(host, struct{}{})
    204 			}
    205 		}()
    206 	}
    207 
    208 	// Start a log entry for this request
    209 	l := log.WithContext(r.Context()).
    210 		WithFields(kv.Fields{
    211 			{"method", r.Method},
    212 			{"url", r.URL.String()},
    213 		}...)
    214 
    215 	for i := 0; i < maxRetries; i++ {
    216 		var backoff time.Duration
    217 
    218 		// Reset signing header fields
    219 		now := time.Now().UTC()
    220 		r.Header.Set("Date", now.Format("Mon, 02 Jan 2006 15:04:05")+" GMT")
    221 		r.Header.Del("Signature")
    222 		r.Header.Del("Digest")
    223 
    224 		// Rewind body reader and content-length if set.
    225 		if rc, ok := r.Body.(*byteutil.ReadNopCloser); ok {
    226 			r.ContentLength = int64(rc.Len())
    227 			rc.Rewind()
    228 		}
    229 
    230 		// Sign the outgoing request.
    231 		if err := sign(r); err != nil {
    232 			return nil, err
    233 		}
    234 
    235 		l.Infof("performing request")
    236 
    237 		// Perform the request.
    238 		rsp, err = c.do(r)
    239 		if err == nil { //nolint:gocritic
    240 
    241 			// TooManyRequest means we need to slow
    242 			// down and retry our request. Codes over
    243 			// 500 generally indicate temp. outages.
    244 			if code := rsp.StatusCode; code < 500 &&
    245 				code != http.StatusTooManyRequests {
    246 				return rsp, nil
    247 			}
    248 
    249 			// Create loggable error from response status code.
    250 			err = fmt.Errorf(`http response: %s`, rsp.Status)
    251 
    252 			// Search for a provided "Retry-After" header value.
    253 			if after := rsp.Header.Get("Retry-After"); after != "" {
    254 
    255 				if u, _ := strconv.ParseUint(after, 10, 32); u != 0 {
    256 					// An integer number of backoff seconds was provided.
    257 					backoff = time.Duration(u) * time.Second
    258 				} else if at, _ := http.ParseTime(after); !at.Before(now) {
    259 					// An HTTP formatted future date-time was provided.
    260 					backoff = at.Sub(now)
    261 				}
    262 
    263 				// Don't let their provided backoff exceed our max.
    264 				if max := baseBackoff * maxRetries; backoff > max {
    265 					backoff = max
    266 				}
    267 			}
    268 
    269 			// Close + unset rsp.
    270 			_ = rsp.Body.Close()
    271 			rsp = nil
    272 
    273 		} else if errorsv2.Comparable(err,
    274 			context.DeadlineExceeded,
    275 			context.Canceled,
    276 			ErrBodyTooLarge,
    277 			ErrReservedAddr,
    278 		) {
    279 			// Non-retryable errors.
    280 			return nil, err
    281 		} else if errstr := err.Error(); // nocollapse
    282 		strings.Contains(errstr, "stopped after 10 redirects") ||
    283 			strings.Contains(errstr, "tls: ") ||
    284 			strings.Contains(errstr, "x509: ") {
    285 			// These error types aren't wrapped
    286 			// so we have to check the error string.
    287 			// All are unrecoverable!
    288 			return nil, err
    289 		} else if dnserr := (*net.DNSError)(nil); // nocollapse
    290 		errors.As(err, &dnserr) && dnserr.IsNotFound {
    291 			// DNS lookup failure, this domain does not exist
    292 			return nil, gtserror.SetNotFound(err)
    293 		}
    294 
    295 		if fastFail {
    296 			// on fast-fail, don't bother backoff/retry
    297 			return nil, fmt.Errorf("%w (fast fail)", err)
    298 		}
    299 
    300 		if backoff == 0 {
    301 			// No retry-after found, set our predefined
    302 			// backoff according to a multiplier of 2^n.
    303 			backoff = baseBackoff * 1 << (i + 1)
    304 		}
    305 
    306 		l.Errorf("backing off for %s after http request error: %v", backoff, err)
    307 
    308 		select {
    309 		// Request ctx cancelled
    310 		case <-r.Context().Done():
    311 			return nil, r.Context().Err()
    312 
    313 		// Backoff for some time
    314 		case <-time.After(backoff):
    315 		}
    316 	}
    317 
    318 	// Set error return to trigger setting "bad host".
    319 	err = errors.New("transport reached max retries")
    320 	return
    321 }
    322 
    323 // do wraps http.Client{}.Do() to provide safely limited response bodies.
    324 func (c *Client) do(req *http.Request) (*http.Response, error) {
    325 	// Perform the HTTP request.
    326 	rsp, err := c.client.Do(req)
    327 	if err != nil {
    328 		return nil, err
    329 	}
    330 
    331 	// Seperate the body implementers.
    332 	rbody := (io.Reader)(rsp.Body)
    333 	cbody := (io.Closer)(rsp.Body)
    334 
    335 	var limit int64
    336 
    337 	if limit = rsp.ContentLength; limit < 0 {
    338 		// If unknown, use max as reader limit.
    339 		limit = c.bodyMax
    340 	}
    341 
    342 	// Don't trust them, limit body reads.
    343 	rbody = io.LimitReader(rbody, limit)
    344 
    345 	// Wrap closer to ensure entire body drained BEFORE close.
    346 	cbody = iotools.CloserAfterCallback(cbody, func() {
    347 		_, _ = discard.ReadFrom(rbody)
    348 	})
    349 
    350 	// Wrap body with limit.
    351 	rsp.Body = &struct {
    352 		io.Reader
    353 		io.Closer
    354 	}{rbody, cbody}
    355 
    356 	// Check response body not too large.
    357 	if rsp.ContentLength > c.bodyMax {
    358 		_ = rsp.Body.Close()
    359 		return nil, ErrBodyTooLarge
    360 	}
    361 
    362 	return rsp, nil
    363 }
    364 
    365 // cast discard writer to full interface it supports.
    366 var discard = io.Discard.(interface { //nolint
    367 	io.Writer
    368 	io.StringWriter
    369 	io.ReaderFrom
    370 })