gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

status.go (16794B)


      1 // GoToSocial
      2 // Copyright (C) GoToSocial Authors admin@gotosocial.org
      3 // SPDX-License-Identifier: AGPL-3.0-or-later
      4 //
      5 // This program is free software: you can redistribute it and/or modify
      6 // it under the terms of the GNU Affero General Public License as published by
      7 // the Free Software Foundation, either version 3 of the License, or
      8 // (at your option) any later version.
      9 //
     10 // This program is distributed in the hope that it will be useful,
     11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
     12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13 // GNU Affero General Public License for more details.
     14 //
     15 // You should have received a copy of the GNU Affero General Public License
     16 // along with this program.  If not, see <http://www.gnu.org/licenses/>.
     17 
     18 package dereferencing
     19 
     20 import (
     21 	"context"
     22 	"errors"
     23 	"io"
     24 	"net/url"
     25 	"time"
     26 
     27 	"github.com/superseriousbusiness/gotosocial/internal/ap"
     28 	"github.com/superseriousbusiness/gotosocial/internal/config"
     29 	"github.com/superseriousbusiness/gotosocial/internal/db"
     30 	"github.com/superseriousbusiness/gotosocial/internal/gtscontext"
     31 	"github.com/superseriousbusiness/gotosocial/internal/gtserror"
     32 	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
     33 	"github.com/superseriousbusiness/gotosocial/internal/id"
     34 	"github.com/superseriousbusiness/gotosocial/internal/log"
     35 	"github.com/superseriousbusiness/gotosocial/internal/media"
     36 	"github.com/superseriousbusiness/gotosocial/internal/transport"
     37 )
     38 
     39 // statusUpToDate returns whether the given status model is both updateable
     40 // (i.e. remote status) and whether it needs an update based on `fetched_at`.
     41 func statusUpToDate(status *gtsmodel.Status) bool {
     42 	if *status.Local {
     43 		// Can't update local statuses.
     44 		return true
     45 	}
     46 
     47 	// If this status was updated recently (last interval), we return as-is.
     48 	if next := status.FetchedAt.Add(2 * time.Hour); time.Now().Before(next) {
     49 		return true
     50 	}
     51 
     52 	return false
     53 }
     54 
     55 // GetStatus: implements Dereferencer{}.GetStatus().
     56 func (d *deref) GetStatusByURI(ctx context.Context, requestUser string, uri *url.URL) (*gtsmodel.Status, ap.Statusable, error) {
     57 	// Fetch and dereference status if necessary.
     58 	status, apubStatus, err := d.getStatusByURI(ctx,
     59 		requestUser,
     60 		uri,
     61 	)
     62 	if err != nil {
     63 		return nil, nil, err
     64 	}
     65 
     66 	if apubStatus != nil {
     67 		// This status was updated, enqueue re-dereferencing the whole thread.
     68 		d.state.Workers.Federator.MustEnqueueCtx(ctx, func(ctx context.Context) {
     69 			d.dereferenceThread(ctx, requestUser, uri, status, apubStatus)
     70 		})
     71 	}
     72 
     73 	return status, apubStatus, nil
     74 }
     75 
     76 // getStatusByURI is a package internal form of .GetStatusByURI() that doesn't bother dereferencing the whole thread on update.
     77 func (d *deref) getStatusByURI(ctx context.Context, requestUser string, uri *url.URL) (*gtsmodel.Status, ap.Statusable, error) {
     78 	var (
     79 		status *gtsmodel.Status
     80 		uriStr = uri.String()
     81 		err    error
     82 	)
     83 
     84 	// Search the database for existing status with URI.
     85 	status, err = d.state.DB.GetStatusByURI(
     86 		// request a barebones object, it may be in the
     87 		// db but with related models not yet dereferenced.
     88 		gtscontext.SetBarebones(ctx),
     89 		uriStr,
     90 	)
     91 	if err != nil && !errors.Is(err, db.ErrNoEntries) {
     92 		return nil, nil, gtserror.Newf("error checking database for status %s by uri: %w", uriStr, err)
     93 	}
     94 
     95 	if status == nil {
     96 		// Else, search the database for existing by URL.
     97 		status, err = d.state.DB.GetStatusByURL(
     98 			gtscontext.SetBarebones(ctx),
     99 			uriStr,
    100 		)
    101 		if err != nil && !errors.Is(err, db.ErrNoEntries) {
    102 			return nil, nil, gtserror.Newf("error checking database for status %s by url: %w", uriStr, err)
    103 		}
    104 	}
    105 
    106 	if status == nil {
    107 		// Ensure that this is isn't a search for a local status.
    108 		if uri.Host == config.GetHost() || uri.Host == config.GetAccountDomain() {
    109 			return nil, nil, NewErrNotRetrievable(err) // this will be db.ErrNoEntries
    110 		}
    111 
    112 		// Create and pass-through a new bare-bones model for deref.
    113 		return d.enrichStatus(ctx, requestUser, uri, &gtsmodel.Status{
    114 			Local: func() *bool { var false bool; return &false }(),
    115 			URI:   uriStr,
    116 		}, nil)
    117 	}
    118 
    119 	// Check whether needs update.
    120 	if statusUpToDate(status) {
    121 		// This is existing up-to-date status, ensure it is populated.
    122 		if err := d.state.DB.PopulateStatus(ctx, status); err != nil {
    123 			log.Errorf(ctx, "error populating existing status: %v", err)
    124 		}
    125 		return status, nil, nil
    126 	}
    127 
    128 	// Try to update + deref existing status model.
    129 	latest, apubStatus, err := d.enrichStatus(ctx,
    130 		requestUser,
    131 		uri,
    132 		status,
    133 		nil,
    134 	)
    135 	if err != nil {
    136 		log.Errorf(ctx, "error enriching remote status: %v", err)
    137 
    138 		// Update fetch-at to slow re-attempts.
    139 		status.FetchedAt = time.Now()
    140 		_ = d.state.DB.UpdateStatus(ctx, status, "fetched_at")
    141 
    142 		// Fallback to existing.
    143 		return status, nil, nil
    144 	}
    145 
    146 	return latest, apubStatus, nil
    147 }
    148 
    149 // RefreshStatus: implements Dereferencer{}.RefreshStatus().
    150 func (d *deref) RefreshStatus(ctx context.Context, requestUser string, status *gtsmodel.Status, apubStatus ap.Statusable, force bool) (*gtsmodel.Status, ap.Statusable, error) {
    151 	// Check whether needs update.
    152 	if statusUpToDate(status) {
    153 		return status, nil, nil
    154 	}
    155 
    156 	// Parse the URI from status.
    157 	uri, err := url.Parse(status.URI)
    158 	if err != nil {
    159 		return nil, nil, gtserror.Newf("invalid status uri %q: %w", status.URI, err)
    160 	}
    161 
    162 	// Try to update + deref existing status model.
    163 	latest, apubStatus, err := d.enrichStatus(ctx,
    164 		requestUser,
    165 		uri,
    166 		status,
    167 		apubStatus,
    168 	)
    169 	if err != nil {
    170 		return nil, nil, err
    171 	}
    172 
    173 	// This status was updated, enqueue re-dereferencing the whole thread.
    174 	d.state.Workers.Federator.MustEnqueueCtx(ctx, func(ctx context.Context) {
    175 		d.dereferenceThread(ctx, requestUser, uri, latest, apubStatus)
    176 	})
    177 
    178 	return latest, apubStatus, nil
    179 }
    180 
    181 // RefreshStatusAsync: implements Dereferencer{}.RefreshStatusAsync().
    182 func (d *deref) RefreshStatusAsync(ctx context.Context, requestUser string, status *gtsmodel.Status, apubStatus ap.Statusable, force bool) {
    183 	// Check whether needs update.
    184 	if statusUpToDate(status) {
    185 		return
    186 	}
    187 
    188 	// Parse the URI from status.
    189 	uri, err := url.Parse(status.URI)
    190 	if err != nil {
    191 		log.Errorf(ctx, "invalid status uri %q: %v", status.URI, err)
    192 		return
    193 	}
    194 
    195 	// Enqueue a worker function to re-fetch this status async.
    196 	d.state.Workers.Federator.MustEnqueueCtx(ctx, func(ctx context.Context) {
    197 		latest, apubStatus, err := d.enrichStatus(ctx, requestUser, uri, status, apubStatus)
    198 		if err != nil {
    199 			log.Errorf(ctx, "error enriching remote status: %v", err)
    200 			return
    201 		}
    202 
    203 		// This status was updated, re-dereference the whole thread.
    204 		d.dereferenceThread(ctx, requestUser, uri, latest, apubStatus)
    205 	})
    206 }
    207 
    208 // enrichStatus will enrich the given status, whether a new barebones model, or existing model from the database. It handles necessary dereferencing etc.
    209 func (d *deref) enrichStatus(ctx context.Context, requestUser string, uri *url.URL, status *gtsmodel.Status, apubStatus ap.Statusable) (*gtsmodel.Status, ap.Statusable, error) {
    210 	// Pre-fetch a transport for requesting username, used by later dereferencing.
    211 	tsport, err := d.transportController.NewTransportForUsername(ctx, requestUser)
    212 	if err != nil {
    213 		return nil, nil, gtserror.Newf("couldn't create transport: %w", err)
    214 	}
    215 
    216 	// Check whether this account URI is a blocked domain / subdomain.
    217 	if blocked, err := d.state.DB.IsDomainBlocked(ctx, uri.Host); err != nil {
    218 		return nil, nil, gtserror.Newf("error checking blocked domain: %w", err)
    219 	} else if blocked {
    220 		return nil, nil, gtserror.Newf("%s is blocked", uri.Host)
    221 	}
    222 
    223 	var derefd bool
    224 
    225 	if apubStatus == nil {
    226 		// Dereference latest version of the status.
    227 		b, err := tsport.Dereference(ctx, uri)
    228 		if err != nil {
    229 			return nil, nil, &ErrNotRetrievable{gtserror.Newf("error deferencing %s: %w", uri, err)}
    230 		}
    231 
    232 		// Attempt to resolve ActivityPub status from data.
    233 		apubStatus, err = ap.ResolveStatusable(ctx, b)
    234 		if err != nil {
    235 			return nil, nil, gtserror.Newf("error resolving statusable from data for account %s: %w", uri, err)
    236 		}
    237 
    238 		// Mark as deref'd.
    239 		derefd = true
    240 	}
    241 
    242 	// Get the attributed-to account in order to fetch profile.
    243 	attributedTo, err := ap.ExtractAttributedToURI(apubStatus)
    244 	if err != nil {
    245 		return nil, nil, gtserror.New("attributedTo was empty")
    246 	}
    247 
    248 	// Ensure we have the author account of the status dereferenced (+ up-to-date).
    249 	if author, _, err := d.getAccountByURI(ctx, requestUser, attributedTo); err != nil {
    250 		if status.AccountID == "" {
    251 			// Provided status account is nil, i.e. this is a new status / author, so a deref fail is unrecoverable.
    252 			return nil, nil, gtserror.Newf("failed to dereference status author %s: %w", uri, err)
    253 		}
    254 	} else if status.AccountID != "" && status.AccountID != author.ID {
    255 		// There already existed an account for this status author, but account ID changed. This shouldn't happen!
    256 		log.Warnf(ctx, "status author account ID changed: old=%s new=%s", status.AccountID, author.ID)
    257 	}
    258 
    259 	// By default we assume that apubStatus has been passed,
    260 	// indicating that the given status is already latest.
    261 	latestStatus := status
    262 
    263 	if derefd {
    264 		// ActivityPub model was recently dereferenced, so assume that passed status
    265 		// may contain out-of-date information, convert AP model to our GTS model.
    266 		latestStatus, err = d.typeConverter.ASStatusToStatus(ctx, apubStatus)
    267 		if err != nil {
    268 			return nil, nil, gtserror.Newf("error converting statusable to gts model for status %s: %w", uri, err)
    269 		}
    270 	}
    271 
    272 	// Use existing status ID.
    273 	latestStatus.ID = status.ID
    274 
    275 	if latestStatus.ID == "" {
    276 		// Generate new status ID from the provided creation date.
    277 		latestStatus.ID, err = id.NewULIDFromTime(latestStatus.CreatedAt)
    278 		if err != nil {
    279 			return nil, nil, gtserror.Newf("invalid created at date: %w", err)
    280 		}
    281 	}
    282 
    283 	// Carry-over values and set fetch time.
    284 	latestStatus.FetchedAt = time.Now()
    285 	latestStatus.Local = status.Local
    286 
    287 	// Ensure the status' mentions are populated, and pass in existing to check for changes.
    288 	if err := d.fetchStatusMentions(ctx, requestUser, status, latestStatus); err != nil {
    289 		return nil, nil, gtserror.Newf("error populating mentions for status %s: %w", uri, err)
    290 	}
    291 
    292 	// TODO: populateStatusTags()
    293 
    294 	// Ensure the status' media attachments are populated, passing in existing to check for changes.
    295 	if err := d.fetchStatusAttachments(ctx, tsport, status, latestStatus); err != nil {
    296 		return nil, nil, gtserror.Newf("error populating attachments for status %s: %w", uri, err)
    297 	}
    298 
    299 	// Ensure the status' emoji attachments are populated, passing in existing to check for changes.
    300 	if err := d.fetchStatusEmojis(ctx, requestUser, status, latestStatus); err != nil {
    301 		return nil, nil, gtserror.Newf("error populating emojis for status %s: %w", uri, err)
    302 	}
    303 
    304 	if status.CreatedAt.IsZero() {
    305 		// CreatedAt will be zero if no local copy was
    306 		// found in one of the GetStatusBy___() functions.
    307 		//
    308 		// This is new, put the status in the database.
    309 		err := d.state.DB.PutStatus(ctx, latestStatus)
    310 
    311 		if errors.Is(err, db.ErrAlreadyExists) {
    312 			// TODO: replace this quick fix with per-URI deref locks.
    313 			latestStatus, err = d.state.DB.GetStatusByURI(ctx, latestStatus.URI)
    314 			return latestStatus, nil, err
    315 		}
    316 
    317 		if err != nil {
    318 			return nil, nil, gtserror.Newf("error putting in database: %w", err)
    319 		}
    320 	} else {
    321 		// This is an existing status, update the model in the database.
    322 		if err := d.state.DB.UpdateStatus(ctx, latestStatus); err != nil {
    323 			return nil, nil, gtserror.Newf("error updating database: %w", err)
    324 		}
    325 	}
    326 
    327 	return latestStatus, apubStatus, nil
    328 }
    329 
    330 func (d *deref) fetchStatusMentions(ctx context.Context, requestUser string, existing *gtsmodel.Status, status *gtsmodel.Status) error {
    331 	// Allocate new slice to take the yet-to-be created mention IDs.
    332 	status.MentionIDs = make([]string, len(status.Mentions))
    333 
    334 	for i := range status.Mentions {
    335 		mention := status.Mentions[i]
    336 
    337 		// Look for existing mention with target account URI first.
    338 		existing, ok := existing.GetMentionByTargetURI(mention.TargetAccountURI)
    339 		if ok && existing.ID != "" {
    340 			status.Mentions[i] = existing
    341 			status.MentionIDs[i] = existing.ID
    342 			continue
    343 		}
    344 
    345 		// Ensure that mention account URI is parseable.
    346 		accountURI, err := url.Parse(mention.TargetAccountURI)
    347 		if err != nil {
    348 			log.Errorf(ctx, "invalid account uri %q: %v", mention.TargetAccountURI, err)
    349 			continue
    350 		}
    351 
    352 		// Ensure we have the account of the mention target dereferenced.
    353 		mention.TargetAccount, _, err = d.getAccountByURI(ctx, requestUser, accountURI)
    354 		if err != nil {
    355 			log.Errorf(ctx, "failed to dereference account %s: %v", accountURI, err)
    356 			continue
    357 		}
    358 
    359 		// Generate new ID according to status creation.
    360 		mention.ID, err = id.NewULIDFromTime(status.CreatedAt)
    361 		if err != nil {
    362 			log.Errorf(ctx, "invalid created at date: %v", err)
    363 			mention.ID = id.NewULID() // just use "now"
    364 		}
    365 
    366 		// Set known further mention details.
    367 		mention.CreatedAt = status.CreatedAt
    368 		mention.UpdatedAt = status.UpdatedAt
    369 		mention.OriginAccount = status.Account
    370 		mention.OriginAccountID = status.AccountID
    371 		mention.OriginAccountURI = status.AccountURI
    372 		mention.TargetAccountID = mention.TargetAccount.ID
    373 		mention.TargetAccountURI = mention.TargetAccount.URI
    374 		mention.TargetAccountURL = mention.TargetAccount.URL
    375 		mention.StatusID = status.ID
    376 		mention.Status = status
    377 
    378 		// Place the new mention into the database.
    379 		if err := d.state.DB.PutMention(ctx, mention); err != nil {
    380 			return gtserror.Newf("error putting mention in database: %w", err)
    381 		}
    382 
    383 		// Set the *new* mention and ID.
    384 		status.Mentions[i] = mention
    385 		status.MentionIDs[i] = mention.ID
    386 	}
    387 
    388 	for i := 0; i < len(status.MentionIDs); i++ {
    389 		if status.MentionIDs[i] == "" {
    390 			// This is a failed mention population, likely due
    391 			// to invalid incoming data / now-deleted accounts.
    392 			copy(status.Mentions[i:], status.Mentions[i+1:])
    393 			copy(status.MentionIDs[i:], status.MentionIDs[i+1:])
    394 			status.Mentions = status.Mentions[:len(status.Mentions)-1]
    395 			status.MentionIDs = status.MentionIDs[:len(status.MentionIDs)-1]
    396 		}
    397 	}
    398 
    399 	return nil
    400 }
    401 
    402 func (d *deref) fetchStatusAttachments(ctx context.Context, tsport transport.Transport, existing *gtsmodel.Status, status *gtsmodel.Status) error {
    403 	// Allocate new slice to take the yet-to-be fetched attachment IDs.
    404 	status.AttachmentIDs = make([]string, len(status.Attachments))
    405 
    406 	for i := range status.Attachments {
    407 		placeholder := status.Attachments[i]
    408 
    409 		// Look for existing media attachment with remoet URL first.
    410 		existing, ok := existing.GetAttachmentByRemoteURL(placeholder.RemoteURL)
    411 		if ok && existing.ID != "" {
    412 			status.Attachments[i] = existing
    413 			status.AttachmentIDs[i] = existing.ID
    414 			continue
    415 		}
    416 
    417 		// Ensure a valid media attachment remote URL.
    418 		remoteURL, err := url.Parse(placeholder.RemoteURL)
    419 		if err != nil {
    420 			log.Errorf(ctx, "invalid remote media url %q: %v", placeholder.RemoteURL, err)
    421 			continue
    422 		}
    423 
    424 		// Start pre-processing remote media at remote URL.
    425 		processing, err := d.mediaManager.PreProcessMedia(ctx, func(ctx context.Context) (io.ReadCloser, int64, error) {
    426 			return tsport.DereferenceMedia(ctx, remoteURL)
    427 		}, status.AccountID, &media.AdditionalMediaInfo{
    428 			StatusID:    &status.ID,
    429 			RemoteURL:   &placeholder.RemoteURL,
    430 			Description: &placeholder.Description,
    431 			Blurhash:    &placeholder.Blurhash,
    432 		})
    433 		if err != nil {
    434 			log.Errorf(ctx, "error processing attachment: %v", err)
    435 			continue
    436 		}
    437 
    438 		// Force attachment loading *right now*.
    439 		media, err := processing.LoadAttachment(ctx)
    440 		if err != nil {
    441 			log.Errorf(ctx, "error loading attachment: %v", err)
    442 			continue
    443 		}
    444 
    445 		// Set the *new* attachment and ID.
    446 		status.Attachments[i] = media
    447 		status.AttachmentIDs[i] = media.ID
    448 	}
    449 
    450 	for i := 0; i < len(status.AttachmentIDs); i++ {
    451 		if status.AttachmentIDs[i] == "" {
    452 			// This is a failed attachment population, this may
    453 			// be due to us not currently supporting a media type.
    454 			copy(status.Attachments[i:], status.Attachments[i+1:])
    455 			copy(status.AttachmentIDs[i:], status.AttachmentIDs[i+1:])
    456 			status.Attachments = status.Attachments[:len(status.Attachments)-1]
    457 			status.AttachmentIDs = status.AttachmentIDs[:len(status.AttachmentIDs)-1]
    458 		}
    459 	}
    460 
    461 	return nil
    462 }
    463 
    464 func (d *deref) fetchStatusEmojis(ctx context.Context, requestUser string, existing *gtsmodel.Status, status *gtsmodel.Status) error {
    465 	// Fetch the full-fleshed-out emoji objects for our status.
    466 	emojis, err := d.populateEmojis(ctx, status.Emojis, requestUser)
    467 	if err != nil {
    468 		return gtserror.Newf("failed to populate emojis: %w", err)
    469 	}
    470 
    471 	// Iterate over and get their IDs.
    472 	emojiIDs := make([]string, 0, len(emojis))
    473 	for _, e := range emojis {
    474 		emojiIDs = append(emojiIDs, e.ID)
    475 	}
    476 
    477 	// Set known emoji details.
    478 	status.Emojis = emojis
    479 	status.EmojiIDs = emojiIDs
    480 
    481 	return nil
    482 }