search.go (12924B)
1 // GoToSocial 2 // Copyright (C) GoToSocial Authors admin@gotosocial.org 3 // SPDX-License-Identifier: AGPL-3.0-or-later 4 // 5 // This program is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU Affero General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // This program is distributed in the hope that it will be useful, 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Affero General Public License for more details. 14 // 15 // You should have received a copy of the GNU Affero General Public License 16 // along with this program. If not, see <http://www.gnu.org/licenses/>. 17 18 package bundb 19 20 import ( 21 "context" 22 "strings" 23 24 "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" 25 "github.com/superseriousbusiness/gotosocial/internal/id" 26 "github.com/superseriousbusiness/gotosocial/internal/log" 27 "github.com/superseriousbusiness/gotosocial/internal/state" 28 "github.com/uptrace/bun" 29 "github.com/uptrace/bun/dialect" 30 ) 31 32 // todo: currently we pass an 'offset' parameter into functions owned by this struct, 33 // which is ignored. 34 // 35 // The idea of 'offset' is to allow callers to page through results without supplying 36 // maxID or minID params; they simply use the offset as more or less a 'page number'. 37 // This works fine when you're dealing with something like Elasticsearch, but for 38 // SQLite or Postgres 'LIKE' queries it doesn't really, because for each higher offset 39 // you have to calculate the value of all the previous offsets as well *within the 40 // execution time of the query*. It's MUCH more efficient to page using maxID and 41 // minID for queries like this. For now, then, we just ignore the offset and hope that 42 // the caller will page using maxID and minID instead. 43 // 44 // In future, however, it would be good to support offset in a way that doesn't totally 45 // destroy database queries. One option would be to cache previous offsets when paging 46 // down (which is the most common use case). 47 // 48 // For example, say a caller makes a call with offset 0: we run the query as normal, 49 // and in a 10 minute cache or something, store the next maxID value as it would be for 50 // offset 1, for the supplied query, limit, following, etc. Then when they call for 51 // offset 1, instead of supplying 'offset' in the query and causing slowdown, we check 52 // the cache to see if we have the next maxID value stored for that query, and use that 53 // instead. If a caller out of the blue requests offset 4 or something, on an empty cache, 54 // we could run the previous 4 queries and store the offsets for those before making the 55 // 5th call for page 4. 56 // 57 // This isn't ideal, of course, but at least we could cover the most common use case of 58 // a caller paging down through results. 59 type searchDB struct { 60 conn *DBConn 61 state *state.State 62 } 63 64 // replacer is a thread-safe string replacer which escapes 65 // common SQLite + Postgres `LIKE` wildcard chars using the 66 // escape character `\`. Initialized as a var in this package 67 // so it can be reused. 68 var replacer = strings.NewReplacer( 69 `\`, `\\`, // Escape char. 70 `%`, `\%`, // Zero or more char. 71 `_`, `\_`, // Exactly one char. 72 ) 73 74 // whereSubqueryLike appends a WHERE clause to the 75 // given SelectQuery q, which searches for matches 76 // of searchQuery in the given subQuery using LIKE. 77 func whereSubqueryLike( 78 q *bun.SelectQuery, 79 subQuery *bun.SelectQuery, 80 searchQuery string, 81 ) *bun.SelectQuery { 82 // Escape existing wildcard + escape 83 // chars in the search query string. 84 searchQuery = replacer.Replace(searchQuery) 85 86 // Add our own wildcards back in; search 87 // zero or more chars around the query. 88 searchQuery = `%` + searchQuery + `%` 89 90 // Append resulting WHERE 91 // clause to the main query. 92 return q.Where( 93 "(?) LIKE ? ESCAPE ?", 94 subQuery, searchQuery, `\`, 95 ) 96 } 97 98 // Query example (SQLite): 99 // 100 // SELECT "account"."id" FROM "accounts" AS "account" 101 // WHERE (("account"."domain" IS NULL) OR ("account"."domain" != "account"."username")) 102 // AND ("account"."id" < 'ZZZZZZZZZZZZZZZZZZZZZZZZZZ') 103 // AND ("account"."id" IN (SELECT "target_account_id" FROM "follows" WHERE ("account_id" = '016T5Q3SQKBT337DAKVSKNXXW1'))) 104 // AND ((SELECT LOWER("account"."username" || COALESCE("account"."display_name", '') || COALESCE("account"."note", '')) AS "account_text") LIKE '%turtle%' ESCAPE '\') 105 // ORDER BY "account"."id" DESC LIMIT 10 106 func (s *searchDB) SearchForAccounts( 107 ctx context.Context, 108 accountID string, 109 query string, 110 maxID string, 111 minID string, 112 limit int, 113 following bool, 114 offset int, 115 ) ([]*gtsmodel.Account, error) { 116 // Ensure reasonable 117 if limit < 0 { 118 limit = 0 119 } 120 121 // Make educated guess for slice size 122 var ( 123 accountIDs = make([]string, 0, limit) 124 frontToBack = true 125 ) 126 127 q := s.conn. 128 NewSelect(). 129 TableExpr("? AS ?", bun.Ident("accounts"), bun.Ident("account")). 130 // Select only IDs from table. 131 Column("account.id"). 132 // Try to ignore instance accounts. Account domain must 133 // be either nil or, if set, not equal to the account's 134 // username (which is commonly used to indicate it's an 135 // instance service account). 136 WhereGroup(" AND ", func(q *bun.SelectQuery) *bun.SelectQuery { 137 return q. 138 Where("? IS NULL", bun.Ident("account.domain")). 139 WhereOr("? != ?", bun.Ident("account.domain"), bun.Ident("account.username")) 140 }) 141 142 // Return only items with a LOWER id than maxID. 143 if maxID == "" { 144 maxID = id.Highest 145 } 146 q = q.Where("? < ?", bun.Ident("account.id"), maxID) 147 148 if minID != "" { 149 // Return only items with a HIGHER id than minID. 150 q = q.Where("? > ?", bun.Ident("account.id"), minID) 151 152 // page up 153 frontToBack = false 154 } 155 156 if following { 157 // Select only from accounts followed by accountID. 158 q = q.Where( 159 "? IN (?)", 160 bun.Ident("account.id"), 161 s.followedAccounts(accountID), 162 ) 163 } 164 165 // Select account text as subquery. 166 accountTextSubq := s.accountText(following) 167 168 // Search using LIKE for matches of query 169 // string within accountText subquery. 170 q = whereSubqueryLike(q, accountTextSubq, query) 171 172 if limit > 0 { 173 // Limit amount of accounts returned. 174 q = q.Limit(limit) 175 } 176 177 if frontToBack { 178 // Page down. 179 q = q.Order("account.id DESC") 180 } else { 181 // Page up. 182 q = q.Order("account.id ASC") 183 } 184 185 if err := q.Scan(ctx, &accountIDs); err != nil { 186 return nil, s.conn.ProcessError(err) 187 } 188 189 if len(accountIDs) == 0 { 190 return nil, nil 191 } 192 193 // If we're paging up, we still want accounts 194 // to be sorted by ID desc, so reverse ids slice. 195 // https://zchee.github.io/golang-wiki/SliceTricks/#reversing 196 if !frontToBack { 197 for l, r := 0, len(accountIDs)-1; l < r; l, r = l+1, r-1 { 198 accountIDs[l], accountIDs[r] = accountIDs[r], accountIDs[l] 199 } 200 } 201 202 accounts := make([]*gtsmodel.Account, 0, len(accountIDs)) 203 for _, id := range accountIDs { 204 // Fetch account from db for ID 205 account, err := s.state.DB.GetAccountByID(ctx, id) 206 if err != nil { 207 log.Errorf(ctx, "error fetching account %q: %v", id, err) 208 continue 209 } 210 211 // Append account to slice 212 accounts = append(accounts, account) 213 } 214 215 return accounts, nil 216 } 217 218 // followedAccounts returns a subquery that selects only IDs 219 // of accounts that are followed by the given accountID. 220 func (s *searchDB) followedAccounts(accountID string) *bun.SelectQuery { 221 return s.conn. 222 NewSelect(). 223 TableExpr("? AS ?", bun.Ident("follows"), bun.Ident("follow")). 224 Column("follow.target_account_id"). 225 Where("? = ?", bun.Ident("follow.account_id"), accountID) 226 } 227 228 // statusText returns a subquery that selects a concatenation 229 // of account username and display name as "account_text". If 230 // `following` is true, then account note will also be included 231 // in the concatenation. 232 func (s *searchDB) accountText(following bool) *bun.SelectQuery { 233 var ( 234 accountText = s.conn.NewSelect() 235 query string 236 args []interface{} 237 ) 238 239 if following { 240 // If querying for accounts we follow, 241 // include note in text search params. 242 args = []interface{}{ 243 bun.Ident("account.username"), 244 bun.Ident("account.display_name"), "", 245 bun.Ident("account.note"), "", 246 bun.Ident("account_text"), 247 } 248 } else { 249 // If querying for accounts we're not following, 250 // don't include note in text search params. 251 args = []interface{}{ 252 bun.Ident("account.username"), 253 bun.Ident("account.display_name"), "", 254 bun.Ident("account_text"), 255 } 256 } 257 258 // SQLite and Postgres use different syntaxes for 259 // concatenation, and we also need to use a 260 // different number of placeholders depending on 261 // following/not following. COALESCE calls ensure 262 // that we're not trying to concatenate null values. 263 d := s.conn.Dialect().Name() 264 switch { 265 266 case d == dialect.SQLite && following: 267 query = "LOWER(? || COALESCE(?, ?) || COALESCE(?, ?)) AS ?" 268 269 case d == dialect.SQLite && !following: 270 query = "LOWER(? || COALESCE(?, ?)) AS ?" 271 272 case d == dialect.PG && following: 273 query = "LOWER(CONCAT(?, COALESCE(?, ?), COALESCE(?, ?))) AS ?" 274 275 case d == dialect.PG && !following: 276 query = "LOWER(CONCAT(?, COALESCE(?, ?))) AS ?" 277 278 default: 279 panic("db conn was neither pg not sqlite") 280 } 281 282 return accountText.ColumnExpr(query, args...) 283 } 284 285 // Query example (SQLite): 286 // 287 // SELECT "status"."id" 288 // FROM "statuses" AS "status" 289 // WHERE ("status"."boost_of_id" IS NULL) 290 // AND (("status"."account_id" = '01F8MH1H7YV1Z7D2C8K2730QBF') OR ("status"."in_reply_to_account_id" = '01F8MH1H7YV1Z7D2C8K2730QBF')) 291 // AND ("status"."id" < 'ZZZZZZZZZZZZZZZZZZZZZZZZZZ') 292 // AND ((SELECT LOWER("status"."content" || COALESCE("status"."content_warning", '')) AS "status_text") LIKE '%hello%' ESCAPE '\') 293 // ORDER BY "status"."id" DESC LIMIT 10 294 func (s *searchDB) SearchForStatuses( 295 ctx context.Context, 296 accountID string, 297 query string, 298 maxID string, 299 minID string, 300 limit int, 301 offset int, 302 ) ([]*gtsmodel.Status, error) { 303 // Ensure reasonable 304 if limit < 0 { 305 limit = 0 306 } 307 308 // Make educated guess for slice size 309 var ( 310 statusIDs = make([]string, 0, limit) 311 frontToBack = true 312 ) 313 314 q := s.conn. 315 NewSelect(). 316 TableExpr("? AS ?", bun.Ident("statuses"), bun.Ident("status")). 317 // Select only IDs from table 318 Column("status.id"). 319 // Ignore boosts. 320 Where("? IS NULL", bun.Ident("status.boost_of_id")). 321 // Select only statuses created by 322 // accountID or replying to accountID. 323 WhereGroup(" AND ", func(q *bun.SelectQuery) *bun.SelectQuery { 324 return q. 325 Where("? = ?", bun.Ident("status.account_id"), accountID). 326 WhereOr("? = ?", bun.Ident("status.in_reply_to_account_id"), accountID) 327 }) 328 329 // Return only items with a LOWER id than maxID. 330 if maxID == "" { 331 maxID = id.Highest 332 } 333 q = q.Where("? < ?", bun.Ident("status.id"), maxID) 334 335 if minID != "" { 336 // return only statuses HIGHER (ie., newer) than minID 337 q = q.Where("? > ?", bun.Ident("status.id"), minID) 338 339 // page up 340 frontToBack = false 341 } 342 343 // Select status text as subquery. 344 statusTextSubq := s.statusText() 345 346 // Search using LIKE for matches of query 347 // string within statusText subquery. 348 q = whereSubqueryLike(q, statusTextSubq, query) 349 350 if limit > 0 { 351 // Limit amount of statuses returned. 352 q = q.Limit(limit) 353 } 354 355 if frontToBack { 356 // Page down. 357 q = q.Order("status.id DESC") 358 } else { 359 // Page up. 360 q = q.Order("status.id ASC") 361 } 362 363 if err := q.Scan(ctx, &statusIDs); err != nil { 364 return nil, s.conn.ProcessError(err) 365 } 366 367 if len(statusIDs) == 0 { 368 return nil, nil 369 } 370 371 // If we're paging up, we still want statuses 372 // to be sorted by ID desc, so reverse ids slice. 373 // https://zchee.github.io/golang-wiki/SliceTricks/#reversing 374 if !frontToBack { 375 for l, r := 0, len(statusIDs)-1; l < r; l, r = l+1, r-1 { 376 statusIDs[l], statusIDs[r] = statusIDs[r], statusIDs[l] 377 } 378 } 379 380 statuses := make([]*gtsmodel.Status, 0, len(statusIDs)) 381 for _, id := range statusIDs { 382 // Fetch status from db for ID 383 status, err := s.state.DB.GetStatusByID(ctx, id) 384 if err != nil { 385 log.Errorf(ctx, "error fetching status %q: %v", id, err) 386 continue 387 } 388 389 // Append status to slice 390 statuses = append(statuses, status) 391 } 392 393 return statuses, nil 394 } 395 396 // statusText returns a subquery that selects a concatenation 397 // of status content and content warning as "status_text". 398 func (s *searchDB) statusText() *bun.SelectQuery { 399 statusText := s.conn.NewSelect() 400 401 // SQLite and Postgres use different 402 // syntaxes for concatenation. 403 switch s.conn.Dialect().Name() { 404 405 case dialect.SQLite: 406 statusText = statusText.ColumnExpr( 407 "LOWER(? || COALESCE(?, ?)) AS ?", 408 bun.Ident("status.content"), bun.Ident("status.content_warning"), "", 409 bun.Ident("status_text")) 410 411 case dialect.PG: 412 statusText = statusText.ColumnExpr( 413 "LOWER(CONCAT(?, COALESCE(?, ?))) AS ?", 414 bun.Ident("status.content"), bun.Ident("status.content_warning"), "", 415 bun.Ident("status_text")) 416 417 default: 418 panic("db conn was neither pg not sqlite") 419 } 420 421 return statusText 422 }