status.go (16794B)
1 // GoToSocial 2 // Copyright (C) GoToSocial Authors admin@gotosocial.org 3 // SPDX-License-Identifier: AGPL-3.0-or-later 4 // 5 // This program is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU Affero General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // This program is distributed in the hope that it will be useful, 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Affero General Public License for more details. 14 // 15 // You should have received a copy of the GNU Affero General Public License 16 // along with this program. If not, see <http://www.gnu.org/licenses/>. 17 18 package dereferencing 19 20 import ( 21 "context" 22 "errors" 23 "io" 24 "net/url" 25 "time" 26 27 "github.com/superseriousbusiness/gotosocial/internal/ap" 28 "github.com/superseriousbusiness/gotosocial/internal/config" 29 "github.com/superseriousbusiness/gotosocial/internal/db" 30 "github.com/superseriousbusiness/gotosocial/internal/gtscontext" 31 "github.com/superseriousbusiness/gotosocial/internal/gtserror" 32 "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" 33 "github.com/superseriousbusiness/gotosocial/internal/id" 34 "github.com/superseriousbusiness/gotosocial/internal/log" 35 "github.com/superseriousbusiness/gotosocial/internal/media" 36 "github.com/superseriousbusiness/gotosocial/internal/transport" 37 ) 38 39 // statusUpToDate returns whether the given status model is both updateable 40 // (i.e. remote status) and whether it needs an update based on `fetched_at`. 41 func statusUpToDate(status *gtsmodel.Status) bool { 42 if *status.Local { 43 // Can't update local statuses. 44 return true 45 } 46 47 // If this status was updated recently (last interval), we return as-is. 48 if next := status.FetchedAt.Add(2 * time.Hour); time.Now().Before(next) { 49 return true 50 } 51 52 return false 53 } 54 55 // GetStatus: implements Dereferencer{}.GetStatus(). 56 func (d *deref) GetStatusByURI(ctx context.Context, requestUser string, uri *url.URL) (*gtsmodel.Status, ap.Statusable, error) { 57 // Fetch and dereference status if necessary. 58 status, apubStatus, err := d.getStatusByURI(ctx, 59 requestUser, 60 uri, 61 ) 62 if err != nil { 63 return nil, nil, err 64 } 65 66 if apubStatus != nil { 67 // This status was updated, enqueue re-dereferencing the whole thread. 68 d.state.Workers.Federator.MustEnqueueCtx(ctx, func(ctx context.Context) { 69 d.dereferenceThread(ctx, requestUser, uri, status, apubStatus) 70 }) 71 } 72 73 return status, apubStatus, nil 74 } 75 76 // getStatusByURI is a package internal form of .GetStatusByURI() that doesn't bother dereferencing the whole thread on update. 77 func (d *deref) getStatusByURI(ctx context.Context, requestUser string, uri *url.URL) (*gtsmodel.Status, ap.Statusable, error) { 78 var ( 79 status *gtsmodel.Status 80 uriStr = uri.String() 81 err error 82 ) 83 84 // Search the database for existing status with URI. 85 status, err = d.state.DB.GetStatusByURI( 86 // request a barebones object, it may be in the 87 // db but with related models not yet dereferenced. 88 gtscontext.SetBarebones(ctx), 89 uriStr, 90 ) 91 if err != nil && !errors.Is(err, db.ErrNoEntries) { 92 return nil, nil, gtserror.Newf("error checking database for status %s by uri: %w", uriStr, err) 93 } 94 95 if status == nil { 96 // Else, search the database for existing by URL. 97 status, err = d.state.DB.GetStatusByURL( 98 gtscontext.SetBarebones(ctx), 99 uriStr, 100 ) 101 if err != nil && !errors.Is(err, db.ErrNoEntries) { 102 return nil, nil, gtserror.Newf("error checking database for status %s by url: %w", uriStr, err) 103 } 104 } 105 106 if status == nil { 107 // Ensure that this is isn't a search for a local status. 108 if uri.Host == config.GetHost() || uri.Host == config.GetAccountDomain() { 109 return nil, nil, NewErrNotRetrievable(err) // this will be db.ErrNoEntries 110 } 111 112 // Create and pass-through a new bare-bones model for deref. 113 return d.enrichStatus(ctx, requestUser, uri, >smodel.Status{ 114 Local: func() *bool { var false bool; return &false }(), 115 URI: uriStr, 116 }, nil) 117 } 118 119 // Check whether needs update. 120 if statusUpToDate(status) { 121 // This is existing up-to-date status, ensure it is populated. 122 if err := d.state.DB.PopulateStatus(ctx, status); err != nil { 123 log.Errorf(ctx, "error populating existing status: %v", err) 124 } 125 return status, nil, nil 126 } 127 128 // Try to update + deref existing status model. 129 latest, apubStatus, err := d.enrichStatus(ctx, 130 requestUser, 131 uri, 132 status, 133 nil, 134 ) 135 if err != nil { 136 log.Errorf(ctx, "error enriching remote status: %v", err) 137 138 // Update fetch-at to slow re-attempts. 139 status.FetchedAt = time.Now() 140 _ = d.state.DB.UpdateStatus(ctx, status, "fetched_at") 141 142 // Fallback to existing. 143 return status, nil, nil 144 } 145 146 return latest, apubStatus, nil 147 } 148 149 // RefreshStatus: implements Dereferencer{}.RefreshStatus(). 150 func (d *deref) RefreshStatus(ctx context.Context, requestUser string, status *gtsmodel.Status, apubStatus ap.Statusable, force bool) (*gtsmodel.Status, ap.Statusable, error) { 151 // Check whether needs update. 152 if statusUpToDate(status) { 153 return status, nil, nil 154 } 155 156 // Parse the URI from status. 157 uri, err := url.Parse(status.URI) 158 if err != nil { 159 return nil, nil, gtserror.Newf("invalid status uri %q: %w", status.URI, err) 160 } 161 162 // Try to update + deref existing status model. 163 latest, apubStatus, err := d.enrichStatus(ctx, 164 requestUser, 165 uri, 166 status, 167 apubStatus, 168 ) 169 if err != nil { 170 return nil, nil, err 171 } 172 173 // This status was updated, enqueue re-dereferencing the whole thread. 174 d.state.Workers.Federator.MustEnqueueCtx(ctx, func(ctx context.Context) { 175 d.dereferenceThread(ctx, requestUser, uri, latest, apubStatus) 176 }) 177 178 return latest, apubStatus, nil 179 } 180 181 // RefreshStatusAsync: implements Dereferencer{}.RefreshStatusAsync(). 182 func (d *deref) RefreshStatusAsync(ctx context.Context, requestUser string, status *gtsmodel.Status, apubStatus ap.Statusable, force bool) { 183 // Check whether needs update. 184 if statusUpToDate(status) { 185 return 186 } 187 188 // Parse the URI from status. 189 uri, err := url.Parse(status.URI) 190 if err != nil { 191 log.Errorf(ctx, "invalid status uri %q: %v", status.URI, err) 192 return 193 } 194 195 // Enqueue a worker function to re-fetch this status async. 196 d.state.Workers.Federator.MustEnqueueCtx(ctx, func(ctx context.Context) { 197 latest, apubStatus, err := d.enrichStatus(ctx, requestUser, uri, status, apubStatus) 198 if err != nil { 199 log.Errorf(ctx, "error enriching remote status: %v", err) 200 return 201 } 202 203 // This status was updated, re-dereference the whole thread. 204 d.dereferenceThread(ctx, requestUser, uri, latest, apubStatus) 205 }) 206 } 207 208 // enrichStatus will enrich the given status, whether a new barebones model, or existing model from the database. It handles necessary dereferencing etc. 209 func (d *deref) enrichStatus(ctx context.Context, requestUser string, uri *url.URL, status *gtsmodel.Status, apubStatus ap.Statusable) (*gtsmodel.Status, ap.Statusable, error) { 210 // Pre-fetch a transport for requesting username, used by later dereferencing. 211 tsport, err := d.transportController.NewTransportForUsername(ctx, requestUser) 212 if err != nil { 213 return nil, nil, gtserror.Newf("couldn't create transport: %w", err) 214 } 215 216 // Check whether this account URI is a blocked domain / subdomain. 217 if blocked, err := d.state.DB.IsDomainBlocked(ctx, uri.Host); err != nil { 218 return nil, nil, gtserror.Newf("error checking blocked domain: %w", err) 219 } else if blocked { 220 return nil, nil, gtserror.Newf("%s is blocked", uri.Host) 221 } 222 223 var derefd bool 224 225 if apubStatus == nil { 226 // Dereference latest version of the status. 227 b, err := tsport.Dereference(ctx, uri) 228 if err != nil { 229 return nil, nil, &ErrNotRetrievable{gtserror.Newf("error deferencing %s: %w", uri, err)} 230 } 231 232 // Attempt to resolve ActivityPub status from data. 233 apubStatus, err = ap.ResolveStatusable(ctx, b) 234 if err != nil { 235 return nil, nil, gtserror.Newf("error resolving statusable from data for account %s: %w", uri, err) 236 } 237 238 // Mark as deref'd. 239 derefd = true 240 } 241 242 // Get the attributed-to account in order to fetch profile. 243 attributedTo, err := ap.ExtractAttributedToURI(apubStatus) 244 if err != nil { 245 return nil, nil, gtserror.New("attributedTo was empty") 246 } 247 248 // Ensure we have the author account of the status dereferenced (+ up-to-date). 249 if author, _, err := d.getAccountByURI(ctx, requestUser, attributedTo); err != nil { 250 if status.AccountID == "" { 251 // Provided status account is nil, i.e. this is a new status / author, so a deref fail is unrecoverable. 252 return nil, nil, gtserror.Newf("failed to dereference status author %s: %w", uri, err) 253 } 254 } else if status.AccountID != "" && status.AccountID != author.ID { 255 // There already existed an account for this status author, but account ID changed. This shouldn't happen! 256 log.Warnf(ctx, "status author account ID changed: old=%s new=%s", status.AccountID, author.ID) 257 } 258 259 // By default we assume that apubStatus has been passed, 260 // indicating that the given status is already latest. 261 latestStatus := status 262 263 if derefd { 264 // ActivityPub model was recently dereferenced, so assume that passed status 265 // may contain out-of-date information, convert AP model to our GTS model. 266 latestStatus, err = d.typeConverter.ASStatusToStatus(ctx, apubStatus) 267 if err != nil { 268 return nil, nil, gtserror.Newf("error converting statusable to gts model for status %s: %w", uri, err) 269 } 270 } 271 272 // Use existing status ID. 273 latestStatus.ID = status.ID 274 275 if latestStatus.ID == "" { 276 // Generate new status ID from the provided creation date. 277 latestStatus.ID, err = id.NewULIDFromTime(latestStatus.CreatedAt) 278 if err != nil { 279 return nil, nil, gtserror.Newf("invalid created at date: %w", err) 280 } 281 } 282 283 // Carry-over values and set fetch time. 284 latestStatus.FetchedAt = time.Now() 285 latestStatus.Local = status.Local 286 287 // Ensure the status' mentions are populated, and pass in existing to check for changes. 288 if err := d.fetchStatusMentions(ctx, requestUser, status, latestStatus); err != nil { 289 return nil, nil, gtserror.Newf("error populating mentions for status %s: %w", uri, err) 290 } 291 292 // TODO: populateStatusTags() 293 294 // Ensure the status' media attachments are populated, passing in existing to check for changes. 295 if err := d.fetchStatusAttachments(ctx, tsport, status, latestStatus); err != nil { 296 return nil, nil, gtserror.Newf("error populating attachments for status %s: %w", uri, err) 297 } 298 299 // Ensure the status' emoji attachments are populated, passing in existing to check for changes. 300 if err := d.fetchStatusEmojis(ctx, requestUser, status, latestStatus); err != nil { 301 return nil, nil, gtserror.Newf("error populating emojis for status %s: %w", uri, err) 302 } 303 304 if status.CreatedAt.IsZero() { 305 // CreatedAt will be zero if no local copy was 306 // found in one of the GetStatusBy___() functions. 307 // 308 // This is new, put the status in the database. 309 err := d.state.DB.PutStatus(ctx, latestStatus) 310 311 if errors.Is(err, db.ErrAlreadyExists) { 312 // TODO: replace this quick fix with per-URI deref locks. 313 latestStatus, err = d.state.DB.GetStatusByURI(ctx, latestStatus.URI) 314 return latestStatus, nil, err 315 } 316 317 if err != nil { 318 return nil, nil, gtserror.Newf("error putting in database: %w", err) 319 } 320 } else { 321 // This is an existing status, update the model in the database. 322 if err := d.state.DB.UpdateStatus(ctx, latestStatus); err != nil { 323 return nil, nil, gtserror.Newf("error updating database: %w", err) 324 } 325 } 326 327 return latestStatus, apubStatus, nil 328 } 329 330 func (d *deref) fetchStatusMentions(ctx context.Context, requestUser string, existing *gtsmodel.Status, status *gtsmodel.Status) error { 331 // Allocate new slice to take the yet-to-be created mention IDs. 332 status.MentionIDs = make([]string, len(status.Mentions)) 333 334 for i := range status.Mentions { 335 mention := status.Mentions[i] 336 337 // Look for existing mention with target account URI first. 338 existing, ok := existing.GetMentionByTargetURI(mention.TargetAccountURI) 339 if ok && existing.ID != "" { 340 status.Mentions[i] = existing 341 status.MentionIDs[i] = existing.ID 342 continue 343 } 344 345 // Ensure that mention account URI is parseable. 346 accountURI, err := url.Parse(mention.TargetAccountURI) 347 if err != nil { 348 log.Errorf(ctx, "invalid account uri %q: %v", mention.TargetAccountURI, err) 349 continue 350 } 351 352 // Ensure we have the account of the mention target dereferenced. 353 mention.TargetAccount, _, err = d.getAccountByURI(ctx, requestUser, accountURI) 354 if err != nil { 355 log.Errorf(ctx, "failed to dereference account %s: %v", accountURI, err) 356 continue 357 } 358 359 // Generate new ID according to status creation. 360 mention.ID, err = id.NewULIDFromTime(status.CreatedAt) 361 if err != nil { 362 log.Errorf(ctx, "invalid created at date: %v", err) 363 mention.ID = id.NewULID() // just use "now" 364 } 365 366 // Set known further mention details. 367 mention.CreatedAt = status.CreatedAt 368 mention.UpdatedAt = status.UpdatedAt 369 mention.OriginAccount = status.Account 370 mention.OriginAccountID = status.AccountID 371 mention.OriginAccountURI = status.AccountURI 372 mention.TargetAccountID = mention.TargetAccount.ID 373 mention.TargetAccountURI = mention.TargetAccount.URI 374 mention.TargetAccountURL = mention.TargetAccount.URL 375 mention.StatusID = status.ID 376 mention.Status = status 377 378 // Place the new mention into the database. 379 if err := d.state.DB.PutMention(ctx, mention); err != nil { 380 return gtserror.Newf("error putting mention in database: %w", err) 381 } 382 383 // Set the *new* mention and ID. 384 status.Mentions[i] = mention 385 status.MentionIDs[i] = mention.ID 386 } 387 388 for i := 0; i < len(status.MentionIDs); i++ { 389 if status.MentionIDs[i] == "" { 390 // This is a failed mention population, likely due 391 // to invalid incoming data / now-deleted accounts. 392 copy(status.Mentions[i:], status.Mentions[i+1:]) 393 copy(status.MentionIDs[i:], status.MentionIDs[i+1:]) 394 status.Mentions = status.Mentions[:len(status.Mentions)-1] 395 status.MentionIDs = status.MentionIDs[:len(status.MentionIDs)-1] 396 } 397 } 398 399 return nil 400 } 401 402 func (d *deref) fetchStatusAttachments(ctx context.Context, tsport transport.Transport, existing *gtsmodel.Status, status *gtsmodel.Status) error { 403 // Allocate new slice to take the yet-to-be fetched attachment IDs. 404 status.AttachmentIDs = make([]string, len(status.Attachments)) 405 406 for i := range status.Attachments { 407 placeholder := status.Attachments[i] 408 409 // Look for existing media attachment with remoet URL first. 410 existing, ok := existing.GetAttachmentByRemoteURL(placeholder.RemoteURL) 411 if ok && existing.ID != "" { 412 status.Attachments[i] = existing 413 status.AttachmentIDs[i] = existing.ID 414 continue 415 } 416 417 // Ensure a valid media attachment remote URL. 418 remoteURL, err := url.Parse(placeholder.RemoteURL) 419 if err != nil { 420 log.Errorf(ctx, "invalid remote media url %q: %v", placeholder.RemoteURL, err) 421 continue 422 } 423 424 // Start pre-processing remote media at remote URL. 425 processing, err := d.mediaManager.PreProcessMedia(ctx, func(ctx context.Context) (io.ReadCloser, int64, error) { 426 return tsport.DereferenceMedia(ctx, remoteURL) 427 }, status.AccountID, &media.AdditionalMediaInfo{ 428 StatusID: &status.ID, 429 RemoteURL: &placeholder.RemoteURL, 430 Description: &placeholder.Description, 431 Blurhash: &placeholder.Blurhash, 432 }) 433 if err != nil { 434 log.Errorf(ctx, "error processing attachment: %v", err) 435 continue 436 } 437 438 // Force attachment loading *right now*. 439 media, err := processing.LoadAttachment(ctx) 440 if err != nil { 441 log.Errorf(ctx, "error loading attachment: %v", err) 442 continue 443 } 444 445 // Set the *new* attachment and ID. 446 status.Attachments[i] = media 447 status.AttachmentIDs[i] = media.ID 448 } 449 450 for i := 0; i < len(status.AttachmentIDs); i++ { 451 if status.AttachmentIDs[i] == "" { 452 // This is a failed attachment population, this may 453 // be due to us not currently supporting a media type. 454 copy(status.Attachments[i:], status.Attachments[i+1:]) 455 copy(status.AttachmentIDs[i:], status.AttachmentIDs[i+1:]) 456 status.Attachments = status.Attachments[:len(status.Attachments)-1] 457 status.AttachmentIDs = status.AttachmentIDs[:len(status.AttachmentIDs)-1] 458 } 459 } 460 461 return nil 462 } 463 464 func (d *deref) fetchStatusEmojis(ctx context.Context, requestUser string, existing *gtsmodel.Status, status *gtsmodel.Status) error { 465 // Fetch the full-fleshed-out emoji objects for our status. 466 emojis, err := d.populateEmojis(ctx, status.Emojis, requestUser) 467 if err != nil { 468 return gtserror.Newf("failed to populate emojis: %w", err) 469 } 470 471 // Iterate over and get their IDs. 472 emojiIDs := make([]string, 0, len(emojis)) 473 for _, e := range emojis { 474 emojiIDs = append(emojiIDs, e.ID) 475 } 476 477 // Set known emoji details. 478 status.Emojis = emojis 479 status.EmojiIDs = emojiIDs 480 481 return nil 482 }