gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

search.go (12924B)


      1 // GoToSocial
      2 // Copyright (C) GoToSocial Authors admin@gotosocial.org
      3 // SPDX-License-Identifier: AGPL-3.0-or-later
      4 //
      5 // This program is free software: you can redistribute it and/or modify
      6 // it under the terms of the GNU Affero General Public License as published by
      7 // the Free Software Foundation, either version 3 of the License, or
      8 // (at your option) any later version.
      9 //
     10 // This program is distributed in the hope that it will be useful,
     11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
     12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13 // GNU Affero General Public License for more details.
     14 //
     15 // You should have received a copy of the GNU Affero General Public License
     16 // along with this program.  If not, see <http://www.gnu.org/licenses/>.
     17 
     18 package bundb
     19 
     20 import (
     21 	"context"
     22 	"strings"
     23 
     24 	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
     25 	"github.com/superseriousbusiness/gotosocial/internal/id"
     26 	"github.com/superseriousbusiness/gotosocial/internal/log"
     27 	"github.com/superseriousbusiness/gotosocial/internal/state"
     28 	"github.com/uptrace/bun"
     29 	"github.com/uptrace/bun/dialect"
     30 )
     31 
     32 // todo: currently we pass an 'offset' parameter into functions owned by this struct,
     33 // which is ignored.
     34 //
     35 // The idea of 'offset' is to allow callers to page through results without supplying
     36 // maxID or minID params; they simply use the offset as more or less a 'page number'.
     37 // This works fine when you're dealing with something like Elasticsearch, but for
     38 // SQLite or Postgres 'LIKE' queries it doesn't really, because for each higher offset
     39 // you have to calculate the value of all the previous offsets as well *within the
     40 // execution time of the query*. It's MUCH more efficient to page using maxID and
     41 // minID for queries like this. For now, then, we just ignore the offset and hope that
     42 // the caller will page using maxID and minID instead.
     43 //
     44 // In future, however, it would be good to support offset in a way that doesn't totally
     45 // destroy database queries. One option would be to cache previous offsets when paging
     46 // down (which is the most common use case).
     47 //
     48 // For example, say a caller makes a call with offset 0: we run the query as normal,
     49 // and in a 10 minute cache or something, store the next maxID value as it would be for
     50 // offset 1, for the supplied query, limit, following, etc. Then when they call for
     51 // offset 1, instead of supplying 'offset' in the query and causing slowdown, we check
     52 // the cache to see if we have the next maxID value stored for that query, and use that
     53 // instead. If a caller out of the blue requests offset 4 or something, on an empty cache,
     54 // we could run the previous 4 queries and store the offsets for those before making the
     55 // 5th call for page 4.
     56 //
     57 // This isn't ideal, of course, but at least we could cover the most common use case of
     58 // a caller paging down through results.
     59 type searchDB struct {
     60 	conn  *DBConn
     61 	state *state.State
     62 }
     63 
     64 // replacer is a thread-safe string replacer which escapes
     65 // common SQLite + Postgres `LIKE` wildcard chars using the
     66 // escape character `\`. Initialized as a var in this package
     67 // so it can be reused.
     68 var replacer = strings.NewReplacer(
     69 	`\`, `\\`, // Escape char.
     70 	`%`, `\%`, // Zero or more char.
     71 	`_`, `\_`, // Exactly one char.
     72 )
     73 
     74 // whereSubqueryLike appends a WHERE clause to the
     75 // given SelectQuery q, which searches for matches
     76 // of searchQuery in the given subQuery using LIKE.
     77 func whereSubqueryLike(
     78 	q *bun.SelectQuery,
     79 	subQuery *bun.SelectQuery,
     80 	searchQuery string,
     81 ) *bun.SelectQuery {
     82 	// Escape existing wildcard + escape
     83 	// chars in the search query string.
     84 	searchQuery = replacer.Replace(searchQuery)
     85 
     86 	// Add our own wildcards back in; search
     87 	// zero or more chars around the query.
     88 	searchQuery = `%` + searchQuery + `%`
     89 
     90 	// Append resulting WHERE
     91 	// clause to the main query.
     92 	return q.Where(
     93 		"(?) LIKE ? ESCAPE ?",
     94 		subQuery, searchQuery, `\`,
     95 	)
     96 }
     97 
     98 // Query example (SQLite):
     99 //
    100 //	SELECT "account"."id" FROM "accounts" AS "account"
    101 //	WHERE (("account"."domain" IS NULL) OR ("account"."domain" != "account"."username"))
    102 //	AND ("account"."id" < 'ZZZZZZZZZZZZZZZZZZZZZZZZZZ')
    103 //	AND ("account"."id" IN (SELECT "target_account_id" FROM "follows" WHERE ("account_id" = '016T5Q3SQKBT337DAKVSKNXXW1')))
    104 //	AND ((SELECT LOWER("account"."username" || COALESCE("account"."display_name", '') || COALESCE("account"."note", '')) AS "account_text") LIKE '%turtle%' ESCAPE '\')
    105 //	ORDER BY "account"."id" DESC LIMIT 10
    106 func (s *searchDB) SearchForAccounts(
    107 	ctx context.Context,
    108 	accountID string,
    109 	query string,
    110 	maxID string,
    111 	minID string,
    112 	limit int,
    113 	following bool,
    114 	offset int,
    115 ) ([]*gtsmodel.Account, error) {
    116 	// Ensure reasonable
    117 	if limit < 0 {
    118 		limit = 0
    119 	}
    120 
    121 	// Make educated guess for slice size
    122 	var (
    123 		accountIDs  = make([]string, 0, limit)
    124 		frontToBack = true
    125 	)
    126 
    127 	q := s.conn.
    128 		NewSelect().
    129 		TableExpr("? AS ?", bun.Ident("accounts"), bun.Ident("account")).
    130 		// Select only IDs from table.
    131 		Column("account.id").
    132 		// Try to ignore instance accounts. Account domain must
    133 		// be either nil or, if set, not equal to the account's
    134 		// username (which is commonly used to indicate it's an
    135 		// instance service account).
    136 		WhereGroup(" AND ", func(q *bun.SelectQuery) *bun.SelectQuery {
    137 			return q.
    138 				Where("? IS NULL", bun.Ident("account.domain")).
    139 				WhereOr("? != ?", bun.Ident("account.domain"), bun.Ident("account.username"))
    140 		})
    141 
    142 	// Return only items with a LOWER id than maxID.
    143 	if maxID == "" {
    144 		maxID = id.Highest
    145 	}
    146 	q = q.Where("? < ?", bun.Ident("account.id"), maxID)
    147 
    148 	if minID != "" {
    149 		// Return only items with a HIGHER id than minID.
    150 		q = q.Where("? > ?", bun.Ident("account.id"), minID)
    151 
    152 		// page up
    153 		frontToBack = false
    154 	}
    155 
    156 	if following {
    157 		// Select only from accounts followed by accountID.
    158 		q = q.Where(
    159 			"? IN (?)",
    160 			bun.Ident("account.id"),
    161 			s.followedAccounts(accountID),
    162 		)
    163 	}
    164 
    165 	// Select account text as subquery.
    166 	accountTextSubq := s.accountText(following)
    167 
    168 	// Search using LIKE for matches of query
    169 	// string within accountText subquery.
    170 	q = whereSubqueryLike(q, accountTextSubq, query)
    171 
    172 	if limit > 0 {
    173 		// Limit amount of accounts returned.
    174 		q = q.Limit(limit)
    175 	}
    176 
    177 	if frontToBack {
    178 		// Page down.
    179 		q = q.Order("account.id DESC")
    180 	} else {
    181 		// Page up.
    182 		q = q.Order("account.id ASC")
    183 	}
    184 
    185 	if err := q.Scan(ctx, &accountIDs); err != nil {
    186 		return nil, s.conn.ProcessError(err)
    187 	}
    188 
    189 	if len(accountIDs) == 0 {
    190 		return nil, nil
    191 	}
    192 
    193 	// If we're paging up, we still want accounts
    194 	// to be sorted by ID desc, so reverse ids slice.
    195 	// https://zchee.github.io/golang-wiki/SliceTricks/#reversing
    196 	if !frontToBack {
    197 		for l, r := 0, len(accountIDs)-1; l < r; l, r = l+1, r-1 {
    198 			accountIDs[l], accountIDs[r] = accountIDs[r], accountIDs[l]
    199 		}
    200 	}
    201 
    202 	accounts := make([]*gtsmodel.Account, 0, len(accountIDs))
    203 	for _, id := range accountIDs {
    204 		// Fetch account from db for ID
    205 		account, err := s.state.DB.GetAccountByID(ctx, id)
    206 		if err != nil {
    207 			log.Errorf(ctx, "error fetching account %q: %v", id, err)
    208 			continue
    209 		}
    210 
    211 		// Append account to slice
    212 		accounts = append(accounts, account)
    213 	}
    214 
    215 	return accounts, nil
    216 }
    217 
    218 // followedAccounts returns a subquery that selects only IDs
    219 // of accounts that are followed by the given accountID.
    220 func (s *searchDB) followedAccounts(accountID string) *bun.SelectQuery {
    221 	return s.conn.
    222 		NewSelect().
    223 		TableExpr("? AS ?", bun.Ident("follows"), bun.Ident("follow")).
    224 		Column("follow.target_account_id").
    225 		Where("? = ?", bun.Ident("follow.account_id"), accountID)
    226 }
    227 
    228 // statusText returns a subquery that selects a concatenation
    229 // of account username and display name as "account_text". If
    230 // `following` is true, then account note will also be included
    231 // in the concatenation.
    232 func (s *searchDB) accountText(following bool) *bun.SelectQuery {
    233 	var (
    234 		accountText = s.conn.NewSelect()
    235 		query       string
    236 		args        []interface{}
    237 	)
    238 
    239 	if following {
    240 		// If querying for accounts we follow,
    241 		// include note in text search params.
    242 		args = []interface{}{
    243 			bun.Ident("account.username"),
    244 			bun.Ident("account.display_name"), "",
    245 			bun.Ident("account.note"), "",
    246 			bun.Ident("account_text"),
    247 		}
    248 	} else {
    249 		// If querying for accounts we're not following,
    250 		// don't include note in text search params.
    251 		args = []interface{}{
    252 			bun.Ident("account.username"),
    253 			bun.Ident("account.display_name"), "",
    254 			bun.Ident("account_text"),
    255 		}
    256 	}
    257 
    258 	// SQLite and Postgres use different syntaxes for
    259 	// concatenation, and we also need to use a
    260 	// different number of placeholders depending on
    261 	// following/not following. COALESCE calls ensure
    262 	// that we're not trying to concatenate null values.
    263 	d := s.conn.Dialect().Name()
    264 	switch {
    265 
    266 	case d == dialect.SQLite && following:
    267 		query = "LOWER(? || COALESCE(?, ?) || COALESCE(?, ?)) AS ?"
    268 
    269 	case d == dialect.SQLite && !following:
    270 		query = "LOWER(? || COALESCE(?, ?)) AS ?"
    271 
    272 	case d == dialect.PG && following:
    273 		query = "LOWER(CONCAT(?, COALESCE(?, ?), COALESCE(?, ?))) AS ?"
    274 
    275 	case d == dialect.PG && !following:
    276 		query = "LOWER(CONCAT(?, COALESCE(?, ?))) AS ?"
    277 
    278 	default:
    279 		panic("db conn was neither pg not sqlite")
    280 	}
    281 
    282 	return accountText.ColumnExpr(query, args...)
    283 }
    284 
    285 // Query example (SQLite):
    286 //
    287 //	SELECT "status"."id"
    288 //	FROM "statuses" AS "status"
    289 //	WHERE ("status"."boost_of_id" IS NULL)
    290 //	AND (("status"."account_id" = '01F8MH1H7YV1Z7D2C8K2730QBF') OR ("status"."in_reply_to_account_id" = '01F8MH1H7YV1Z7D2C8K2730QBF'))
    291 //	AND ("status"."id" < 'ZZZZZZZZZZZZZZZZZZZZZZZZZZ')
    292 //	AND ((SELECT LOWER("status"."content" || COALESCE("status"."content_warning", '')) AS "status_text") LIKE '%hello%' ESCAPE '\')
    293 //	ORDER BY "status"."id" DESC LIMIT 10
    294 func (s *searchDB) SearchForStatuses(
    295 	ctx context.Context,
    296 	accountID string,
    297 	query string,
    298 	maxID string,
    299 	minID string,
    300 	limit int,
    301 	offset int,
    302 ) ([]*gtsmodel.Status, error) {
    303 	// Ensure reasonable
    304 	if limit < 0 {
    305 		limit = 0
    306 	}
    307 
    308 	// Make educated guess for slice size
    309 	var (
    310 		statusIDs   = make([]string, 0, limit)
    311 		frontToBack = true
    312 	)
    313 
    314 	q := s.conn.
    315 		NewSelect().
    316 		TableExpr("? AS ?", bun.Ident("statuses"), bun.Ident("status")).
    317 		// Select only IDs from table
    318 		Column("status.id").
    319 		// Ignore boosts.
    320 		Where("? IS NULL", bun.Ident("status.boost_of_id")).
    321 		// Select only statuses created by
    322 		// accountID or replying to accountID.
    323 		WhereGroup(" AND ", func(q *bun.SelectQuery) *bun.SelectQuery {
    324 			return q.
    325 				Where("? = ?", bun.Ident("status.account_id"), accountID).
    326 				WhereOr("? = ?", bun.Ident("status.in_reply_to_account_id"), accountID)
    327 		})
    328 
    329 	// Return only items with a LOWER id than maxID.
    330 	if maxID == "" {
    331 		maxID = id.Highest
    332 	}
    333 	q = q.Where("? < ?", bun.Ident("status.id"), maxID)
    334 
    335 	if minID != "" {
    336 		// return only statuses HIGHER (ie., newer) than minID
    337 		q = q.Where("? > ?", bun.Ident("status.id"), minID)
    338 
    339 		// page up
    340 		frontToBack = false
    341 	}
    342 
    343 	// Select status text as subquery.
    344 	statusTextSubq := s.statusText()
    345 
    346 	// Search using LIKE for matches of query
    347 	// string within statusText subquery.
    348 	q = whereSubqueryLike(q, statusTextSubq, query)
    349 
    350 	if limit > 0 {
    351 		// Limit amount of statuses returned.
    352 		q = q.Limit(limit)
    353 	}
    354 
    355 	if frontToBack {
    356 		// Page down.
    357 		q = q.Order("status.id DESC")
    358 	} else {
    359 		// Page up.
    360 		q = q.Order("status.id ASC")
    361 	}
    362 
    363 	if err := q.Scan(ctx, &statusIDs); err != nil {
    364 		return nil, s.conn.ProcessError(err)
    365 	}
    366 
    367 	if len(statusIDs) == 0 {
    368 		return nil, nil
    369 	}
    370 
    371 	// If we're paging up, we still want statuses
    372 	// to be sorted by ID desc, so reverse ids slice.
    373 	// https://zchee.github.io/golang-wiki/SliceTricks/#reversing
    374 	if !frontToBack {
    375 		for l, r := 0, len(statusIDs)-1; l < r; l, r = l+1, r-1 {
    376 			statusIDs[l], statusIDs[r] = statusIDs[r], statusIDs[l]
    377 		}
    378 	}
    379 
    380 	statuses := make([]*gtsmodel.Status, 0, len(statusIDs))
    381 	for _, id := range statusIDs {
    382 		// Fetch status from db for ID
    383 		status, err := s.state.DB.GetStatusByID(ctx, id)
    384 		if err != nil {
    385 			log.Errorf(ctx, "error fetching status %q: %v", id, err)
    386 			continue
    387 		}
    388 
    389 		// Append status to slice
    390 		statuses = append(statuses, status)
    391 	}
    392 
    393 	return statuses, nil
    394 }
    395 
    396 // statusText returns a subquery that selects a concatenation
    397 // of status content and content warning as "status_text".
    398 func (s *searchDB) statusText() *bun.SelectQuery {
    399 	statusText := s.conn.NewSelect()
    400 
    401 	// SQLite and Postgres use different
    402 	// syntaxes for concatenation.
    403 	switch s.conn.Dialect().Name() {
    404 
    405 	case dialect.SQLite:
    406 		statusText = statusText.ColumnExpr(
    407 			"LOWER(? || COALESCE(?, ?)) AS ?",
    408 			bun.Ident("status.content"), bun.Ident("status.content_warning"), "",
    409 			bun.Ident("status_text"))
    410 
    411 	case dialect.PG:
    412 		statusText = statusText.ColumnExpr(
    413 			"LOWER(CONCAT(?, COALESCE(?, ?))) AS ?",
    414 			bun.Ident("status.content"), bun.Ident("status.content_warning"), "",
    415 			bun.Ident("status_text"))
    416 
    417 	default:
    418 		panic("db conn was neither pg not sqlite")
    419 	}
    420 
    421 	return statusText
    422 }