get.go (20395B)
1 // GoToSocial 2 // Copyright (C) GoToSocial Authors admin@gotosocial.org 3 // SPDX-License-Identifier: AGPL-3.0-or-later 4 // 5 // This program is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU Affero General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // This program is distributed in the hope that it will be useful, 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Affero General Public License for more details. 14 // 15 // You should have received a copy of the GNU Affero General Public License 16 // along with this program. If not, see <http://www.gnu.org/licenses/>. 17 18 package search 19 20 import ( 21 "context" 22 "errors" 23 "fmt" 24 "net/mail" 25 "net/url" 26 "strings" 27 28 "codeberg.org/gruf/go-kv" 29 "github.com/superseriousbusiness/gotosocial/internal/ap" 30 apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model" 31 "github.com/superseriousbusiness/gotosocial/internal/config" 32 "github.com/superseriousbusiness/gotosocial/internal/db" 33 "github.com/superseriousbusiness/gotosocial/internal/federation/dereferencing" 34 "github.com/superseriousbusiness/gotosocial/internal/gtscontext" 35 "github.com/superseriousbusiness/gotosocial/internal/gtserror" 36 "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" 37 "github.com/superseriousbusiness/gotosocial/internal/log" 38 "github.com/superseriousbusiness/gotosocial/internal/util" 39 ) 40 41 const ( 42 queryTypeAny = "" 43 queryTypeAccounts = "accounts" 44 queryTypeStatuses = "statuses" 45 queryTypeHashtags = "hashtags" 46 ) 47 48 // Get performs a search for accounts and/or statuses using the 49 // provided request parameters. 50 // 51 // Implementation note: in this function, we try to only return 52 // an error to the caller they've submitted a bad request, or when 53 // a serious error has occurred. This is because the search has a 54 // sort of fallthrough logic: if we can't get a result with one 55 // type of search, we should proceed with y search rather than 56 // returning an early error. 57 // 58 // If we get to the end and still haven't found anything, even 59 // then we shouldn't return an error, just return an empty result. 60 func (p *Processor) Get( 61 ctx context.Context, 62 account *gtsmodel.Account, 63 req *apimodel.SearchRequest, 64 ) (*apimodel.SearchResult, gtserror.WithCode) { 65 66 var ( 67 maxID = req.MaxID 68 minID = req.MinID 69 limit = req.Limit 70 offset = req.Offset 71 query = strings.TrimSpace(req.Query) // Trim trailing/leading whitespace. 72 queryType = strings.TrimSpace(strings.ToLower(req.QueryType)) // Trim trailing/leading whitespace; convert to lowercase. 73 resolve = req.Resolve 74 following = req.Following 75 ) 76 77 // Validate query. 78 if query == "" { 79 err := errors.New("search query was empty string after trimming space") 80 return nil, gtserror.NewErrorBadRequest(err, err.Error()) 81 } 82 83 // Validate query type. 84 switch queryType { 85 case queryTypeAny, queryTypeAccounts, queryTypeStatuses, queryTypeHashtags: 86 // No problem. 87 default: 88 err := fmt.Errorf( 89 "search query type %s was not recognized, valid options are ['%s', '%s', '%s', '%s']", 90 queryType, queryTypeAny, queryTypeAccounts, queryTypeStatuses, queryTypeHashtags, 91 ) 92 return nil, gtserror.NewErrorBadRequest(err, err.Error()) 93 } 94 95 log. 96 WithContext(ctx). 97 WithFields(kv.Fields{ 98 {"maxID", maxID}, 99 {"minID", minID}, 100 {"limit", limit}, 101 {"offset", offset}, 102 {"query", query}, 103 {"queryType", queryType}, 104 {"resolve", resolve}, 105 {"following", following}, 106 }...). 107 Debugf("beginning search") 108 109 // todo: Currently we don't support offset for paging; 110 // a caller can page using maxID or minID, but if they 111 // supply an offset greater than 0, return nothing as 112 // though there were no additional results. 113 if req.Offset > 0 { 114 return p.packageSearchResult(ctx, account, nil, nil) 115 } 116 117 var ( 118 foundStatuses = make([]*gtsmodel.Status, 0, limit) 119 foundAccounts = make([]*gtsmodel.Account, 0, limit) 120 appendStatus = func(foundStatus *gtsmodel.Status) { foundStatuses = append(foundStatuses, foundStatus) } 121 appendAccount = func(foundAccount *gtsmodel.Account) { foundAccounts = append(foundAccounts, foundAccount) } 122 keepLooking bool 123 err error 124 ) 125 126 // Only try to search by namestring if search type includes 127 // accounts, since this is all namestring search can return. 128 if includeAccounts(queryType) { 129 // Copy query to avoid altering original. 130 var queryC = query 131 132 // If query looks vaguely like an email address, ie. it doesn't 133 // start with '@' but it has '@' in it somewhere, it's probably 134 // a poorly-formed namestring. Be generous and correct for this. 135 if strings.Contains(queryC, "@") && queryC[0] != '@' { 136 if _, err := mail.ParseAddress(queryC); err == nil { 137 // Yep, really does look like 138 // an email address! Be nice. 139 queryC = "@" + queryC 140 } 141 } 142 143 // Search using what may or may not be a namestring. 144 keepLooking, err = p.accountsByNamestring( 145 ctx, 146 account, 147 maxID, 148 minID, 149 limit, 150 offset, 151 queryC, 152 resolve, 153 following, 154 appendAccount, 155 ) 156 if err != nil && !errors.Is(err, db.ErrNoEntries) { 157 err = gtserror.Newf("error searching by namestring: %w", err) 158 return nil, gtserror.NewErrorInternalError(err) 159 } 160 161 if !keepLooking { 162 // Return whatever we have. 163 return p.packageSearchResult( 164 ctx, 165 account, 166 foundAccounts, 167 foundStatuses, 168 ) 169 } 170 } 171 172 // Check if the query is a URI with a recognizable 173 // scheme and use it to look for accounts or statuses. 174 keepLooking, err = p.byURI( 175 ctx, 176 account, 177 query, 178 queryType, 179 resolve, 180 appendAccount, 181 appendStatus, 182 ) 183 if err != nil && !errors.Is(err, db.ErrNoEntries) { 184 err = gtserror.Newf("error searching by URI: %w", err) 185 return nil, gtserror.NewErrorInternalError(err) 186 } 187 188 if !keepLooking { 189 // Return whatever we have. 190 return p.packageSearchResult( 191 ctx, 192 account, 193 foundAccounts, 194 foundStatuses, 195 ) 196 } 197 198 // As a last resort, search for accounts and 199 // statuses using the query as arbitrary text. 200 if err := p.byText( 201 ctx, 202 account, 203 maxID, 204 minID, 205 limit, 206 offset, 207 query, 208 queryType, 209 following, 210 appendAccount, 211 appendStatus, 212 ); err != nil && !errors.Is(err, db.ErrNoEntries) { 213 err = gtserror.Newf("error searching by text: %w", err) 214 return nil, gtserror.NewErrorInternalError(err) 215 } 216 217 // Return whatever we ended 218 // up with (could be nothing). 219 return p.packageSearchResult( 220 ctx, 221 account, 222 foundAccounts, 223 foundStatuses, 224 ) 225 } 226 227 // accountsByNamestring searches for accounts using the 228 // provided namestring query. If domain is not set in 229 // the namestring, it may return more than one result 230 // by doing a text search in the database for accounts 231 // matching the query. Otherwise, it tries to return an 232 // exact match. 233 func (p *Processor) accountsByNamestring( 234 ctx context.Context, 235 requestingAccount *gtsmodel.Account, 236 maxID string, 237 minID string, 238 limit int, 239 offset int, 240 query string, 241 resolve bool, 242 following bool, 243 appendAccount func(*gtsmodel.Account), 244 ) (bool, error) { 245 // See if we have something that looks like a namestring. 246 username, domain, err := util.ExtractNamestringParts(query) 247 if err != nil { 248 // No need to return error; just not a namestring 249 // we can search with. Caller should keep looking 250 // with another search method. 251 return true, nil //nolint:nilerr 252 } 253 254 if domain == "" { 255 // No error, but no domain set. That means the query 256 // looked like '@someone' which is not an exact search. 257 // Try to search for any accounts that match the query 258 // string, and let the caller know they should stop. 259 return false, p.accountsByText( 260 ctx, 261 requestingAccount.ID, 262 maxID, 263 minID, 264 limit, 265 offset, 266 // OK to assume username is set now. Use 267 // it instead of query to omit leading '@'. 268 username, 269 following, 270 appendAccount, 271 ) 272 } 273 274 // No error, and domain and username were both set. 275 // Caller is likely trying to search for an exact 276 // match, from either a remote instance or local. 277 foundAccount, err := p.accountByUsernameDomain( 278 ctx, 279 requestingAccount, 280 username, 281 domain, 282 resolve, 283 ) 284 if err != nil { 285 // Check for semi-expected error types. 286 // On one of these, we can continue. 287 var ( 288 errNotRetrievable = new(*dereferencing.ErrNotRetrievable) // Item can't be dereferenced. 289 errWrongType = new(*ap.ErrWrongType) // Item was dereferenced, but wasn't an account. 290 ) 291 292 if !errors.As(err, errNotRetrievable) && !errors.As(err, errWrongType) { 293 err = gtserror.Newf("error looking up %s as account: %w", query, err) 294 return false, gtserror.NewErrorInternalError(err) 295 } 296 } else { 297 appendAccount(foundAccount) 298 } 299 300 // Regardless of whether we have a hit at this point, 301 // return false to indicate caller should stop looking; 302 // namestrings are a very specific format so it's unlikely 303 // the caller was looking for something other than an account. 304 return false, nil 305 } 306 307 // accountByUsernameDomain looks for one account with the given 308 // username and domain. If domain is empty, or equal to our domain, 309 // search will be confined to local accounts. 310 // 311 // Will return either a hit, an ErrNotRetrievable, an ErrWrongType, 312 // or a real error that the caller should handle. 313 func (p *Processor) accountByUsernameDomain( 314 ctx context.Context, 315 requestingAccount *gtsmodel.Account, 316 username string, 317 domain string, 318 resolve bool, 319 ) (*gtsmodel.Account, error) { 320 var usernameDomain string 321 if domain == "" || domain == config.GetHost() || domain == config.GetAccountDomain() { 322 // Local lookup, normalize domain. 323 domain = "" 324 usernameDomain = username 325 } else { 326 // Remote lookup. 327 usernameDomain = username + "@" + domain 328 329 // Ensure domain not blocked. 330 blocked, err := p.state.DB.IsDomainBlocked(ctx, domain) 331 if err != nil { 332 err = gtserror.Newf("error checking domain block: %w", err) 333 return nil, gtserror.NewErrorInternalError(err) 334 } 335 336 if blocked { 337 // Don't search on blocked domain. 338 return nil, dereferencing.NewErrNotRetrievable(err) 339 } 340 } 341 342 if resolve { 343 // We're allowed to resolve, leave the 344 // rest up to the dereferencer functions. 345 account, _, err := p.federator.GetAccountByUsernameDomain( 346 gtscontext.SetFastFail(ctx), 347 requestingAccount.Username, 348 username, domain, 349 ) 350 351 return account, err 352 } 353 354 // We're not allowed to resolve. Search the database 355 // for existing account with given username + domain. 356 account, err := p.state.DB.GetAccountByUsernameDomain(ctx, username, domain) 357 if err != nil && !errors.Is(err, db.ErrNoEntries) { 358 err = gtserror.Newf("error checking database for account %s: %w", usernameDomain, err) 359 return nil, err 360 } 361 362 if account != nil { 363 // We got a hit! No need to continue. 364 return account, nil 365 } 366 367 err = fmt.Errorf("account %s could not be retrieved locally and we cannot resolve", usernameDomain) 368 return nil, dereferencing.NewErrNotRetrievable(err) 369 } 370 371 // byURI looks for account(s) or a status with the given URI 372 // set as either its URL or ActivityPub URI. If it gets hits, it 373 // will call the provided append functions to return results. 374 // 375 // The boolean return value indicates to the caller whether the 376 // search should continue (true) or stop (false). False will be 377 // returned in cases where a hit has been found, the domain of the 378 // searched URI is blocked, or an unrecoverable error has occurred. 379 func (p *Processor) byURI( 380 ctx context.Context, 381 requestingAccount *gtsmodel.Account, 382 query string, 383 queryType string, 384 resolve bool, 385 appendAccount func(*gtsmodel.Account), 386 appendStatus func(*gtsmodel.Status), 387 ) (bool, error) { 388 uri, err := url.Parse(query) 389 if err != nil { 390 // No need to return error; just not a URI 391 // we can search with. Caller should keep 392 // looking with another search method. 393 return true, nil //nolint:nilerr 394 } 395 396 if !(uri.Scheme == "https" || uri.Scheme == "http") { 397 // This might just be a weirdly-parsed URI, 398 // since Go's url package tends to be a bit 399 // trigger-happy when deciding things are URIs. 400 // Indicate caller should keep looking. 401 return true, nil 402 } 403 404 blocked, err := p.state.DB.IsURIBlocked(ctx, uri) 405 if err != nil { 406 err = gtserror.Newf("error checking domain block: %w", err) 407 return false, gtserror.NewErrorInternalError(err) 408 } 409 410 if blocked { 411 // Don't search for blocked domains. 412 // Caller should stop looking. 413 return false, nil 414 } 415 416 if includeAccounts(queryType) { 417 // Check if URI points to an account. 418 foundAccount, err := p.accountByURI(ctx, requestingAccount, uri, resolve) 419 if err != nil { 420 // Check for semi-expected error types. 421 // On one of these, we can continue. 422 var ( 423 errNotRetrievable = new(*dereferencing.ErrNotRetrievable) // Item can't be dereferenced. 424 errWrongType = new(*ap.ErrWrongType) // Item was dereferenced, but wasn't an account. 425 ) 426 427 if !errors.As(err, errNotRetrievable) && !errors.As(err, errWrongType) { 428 err = gtserror.Newf("error looking up %s as account: %w", uri, err) 429 return false, gtserror.NewErrorInternalError(err) 430 } 431 } else { 432 // Hit; return false to indicate caller should 433 // stop looking, since it's extremely unlikely 434 // a status and an account will have the same URL. 435 appendAccount(foundAccount) 436 return false, nil 437 } 438 } 439 440 if includeStatuses(queryType) { 441 // Check if URI points to a status. 442 foundStatus, err := p.statusByURI(ctx, requestingAccount, uri, resolve) 443 if err != nil { 444 // Check for semi-expected error types. 445 // On one of these, we can continue. 446 var ( 447 errNotRetrievable = new(*dereferencing.ErrNotRetrievable) // Item can't be dereferenced. 448 errWrongType = new(*ap.ErrWrongType) // Item was dereferenced, but wasn't a status. 449 ) 450 451 if !errors.As(err, errNotRetrievable) && !errors.As(err, errWrongType) { 452 err = gtserror.Newf("error looking up %s as status: %w", uri, err) 453 return false, gtserror.NewErrorInternalError(err) 454 } 455 } else { 456 // Hit; return false to indicate caller should 457 // stop looking, since it's extremely unlikely 458 // a status and an account will have the same URL. 459 appendStatus(foundStatus) 460 return false, nil 461 } 462 } 463 464 // No errors, but no hits either; since this 465 // was a URI, caller should stop looking. 466 return false, nil 467 } 468 469 // accountByURI looks for one account with the given URI. 470 // If resolve is false, it will only look in the database. 471 // If resolve is true, it will try to resolve the account 472 // from remote using the URI, if necessary. 473 // 474 // Will return either a hit, ErrNotRetrievable, ErrWrongType, 475 // or a real error that the caller should handle. 476 func (p *Processor) accountByURI( 477 ctx context.Context, 478 requestingAccount *gtsmodel.Account, 479 uri *url.URL, 480 resolve bool, 481 ) (*gtsmodel.Account, error) { 482 if resolve { 483 // We're allowed to resolve, leave the 484 // rest up to the dereferencer functions. 485 account, _, err := p.federator.GetAccountByURI( 486 gtscontext.SetFastFail(ctx), 487 requestingAccount.Username, 488 uri, 489 ) 490 491 return account, err 492 } 493 494 // We're not allowed to resolve; search database only. 495 uriStr := uri.String() // stringify uri just once 496 497 // Search by ActivityPub URI. 498 account, err := p.state.DB.GetAccountByURI(ctx, uriStr) 499 if err != nil && !errors.Is(err, db.ErrNoEntries) { 500 err = gtserror.Newf("error checking database for account using URI %s: %w", uriStr, err) 501 return nil, err 502 } 503 504 if account != nil { 505 // We got a hit! No need to continue. 506 return account, nil 507 } 508 509 // No hit yet. Fallback to try by URL. 510 account, err = p.state.DB.GetAccountByURL(ctx, uriStr) 511 if err != nil && !errors.Is(err, db.ErrNoEntries) { 512 err = gtserror.Newf("error checking database for account using URL %s: %w", uriStr, err) 513 return nil, err 514 } 515 516 if account != nil { 517 // We got a hit! No need to continue. 518 return account, nil 519 } 520 521 err = fmt.Errorf("account %s could not be retrieved locally and we cannot resolve", uriStr) 522 return nil, dereferencing.NewErrNotRetrievable(err) 523 } 524 525 // statusByURI looks for one status with the given URI. 526 // If resolve is false, it will only look in the database. 527 // If resolve is true, it will try to resolve the status 528 // from remote using the URI, if necessary. 529 // 530 // Will return either a hit, ErrNotRetrievable, ErrWrongType, 531 // or a real error that the caller should handle. 532 func (p *Processor) statusByURI( 533 ctx context.Context, 534 requestingAccount *gtsmodel.Account, 535 uri *url.URL, 536 resolve bool, 537 ) (*gtsmodel.Status, error) { 538 if resolve { 539 // We're allowed to resolve, leave the 540 // rest up to the dereferencer functions. 541 status, _, err := p.federator.GetStatusByURI( 542 gtscontext.SetFastFail(ctx), 543 requestingAccount.Username, 544 uri, 545 ) 546 547 return status, err 548 } 549 550 // We're not allowed to resolve; search database only. 551 uriStr := uri.String() // stringify uri just once 552 553 // Search by ActivityPub URI. 554 status, err := p.state.DB.GetStatusByURI(ctx, uriStr) 555 if err != nil && !errors.Is(err, db.ErrNoEntries) { 556 err = gtserror.Newf("error checking database for status using URI %s: %w", uriStr, err) 557 return nil, err 558 } 559 560 if status != nil { 561 // We got a hit! No need to continue. 562 return status, nil 563 } 564 565 // No hit yet. Fallback to try by URL. 566 status, err = p.state.DB.GetStatusByURL(ctx, uriStr) 567 if err != nil && !errors.Is(err, db.ErrNoEntries) { 568 err = gtserror.Newf("error checking database for status using URL %s: %w", uriStr, err) 569 return nil, err 570 } 571 572 if status != nil { 573 // We got a hit! No need to continue. 574 return status, nil 575 } 576 577 err = fmt.Errorf("status %s could not be retrieved locally and we cannot resolve", uriStr) 578 return nil, dereferencing.NewErrNotRetrievable(err) 579 } 580 581 // byText searches in the database for accounts and/or 582 // statuses containing the given query string, using 583 // the provided parameters. 584 // 585 // If queryType is any (empty string), both accounts 586 // and statuses will be searched, else only the given 587 // queryType of item will be returned. 588 func (p *Processor) byText( 589 ctx context.Context, 590 requestingAccount *gtsmodel.Account, 591 maxID string, 592 minID string, 593 limit int, 594 offset int, 595 query string, 596 queryType string, 597 following bool, 598 appendAccount func(*gtsmodel.Account), 599 appendStatus func(*gtsmodel.Status), 600 ) error { 601 if queryType == queryTypeAny { 602 // If search type is any, ignore maxID and minID 603 // parameters, since we can't use them to page 604 // on both accounts and statuses simultaneously. 605 maxID = "" 606 minID = "" 607 } 608 609 if includeAccounts(queryType) { 610 // Search for accounts using the given text. 611 if err := p.accountsByText(ctx, 612 requestingAccount.ID, 613 maxID, 614 minID, 615 limit, 616 offset, 617 query, 618 following, 619 appendAccount, 620 ); err != nil { 621 return err 622 } 623 } 624 625 if includeStatuses(queryType) { 626 // Search for statuses using the given text. 627 if err := p.statusesByText(ctx, 628 requestingAccount.ID, 629 maxID, 630 minID, 631 limit, 632 offset, 633 query, 634 appendStatus, 635 ); err != nil { 636 return err 637 } 638 } 639 640 return nil 641 } 642 643 // accountsByText searches in the database for limit 644 // number of accounts using the given query text. 645 func (p *Processor) accountsByText( 646 ctx context.Context, 647 requestingAccountID string, 648 maxID string, 649 minID string, 650 limit int, 651 offset int, 652 query string, 653 following bool, 654 appendAccount func(*gtsmodel.Account), 655 ) error { 656 accounts, err := p.state.DB.SearchForAccounts( 657 ctx, 658 requestingAccountID, 659 query, maxID, minID, limit, following, offset) 660 if err != nil && !errors.Is(err, db.ErrNoEntries) { 661 return gtserror.Newf("error checking database for accounts using text %s: %w", query, err) 662 } 663 664 for _, account := range accounts { 665 appendAccount(account) 666 } 667 668 return nil 669 } 670 671 // statusesByText searches in the database for limit 672 // number of statuses using the given query text. 673 func (p *Processor) statusesByText( 674 ctx context.Context, 675 requestingAccountID string, 676 maxID string, 677 minID string, 678 limit int, 679 offset int, 680 query string, 681 appendStatus func(*gtsmodel.Status), 682 ) error { 683 statuses, err := p.state.DB.SearchForStatuses( 684 ctx, 685 requestingAccountID, 686 query, maxID, minID, limit, offset) 687 if err != nil && !errors.Is(err, db.ErrNoEntries) { 688 return gtserror.Newf("error checking database for statuses using text %s: %w", query, err) 689 } 690 691 for _, status := range statuses { 692 appendStatus(status) 693 } 694 695 return nil 696 }