commit 8de928b5e9441d3968e5f79860f4de08d52f24ed
parent 07727753b96d209406783e5e539725bcdafebdc7
Author: tobi <31960611+tsmethurst@users.noreply.github.com>
Date: Mon, 7 Mar 2022 11:33:18 +0100
[performance] Database optimizations (#419)
* create first index on notifications
* tidy up + add tests
* log queries for trace, ops for debug
* index commonly used fields
* rearrange query
* add a few more indexes
* remove schema-breaking index
(add this back in later)
* re-add cleanup query index
Diffstat:
5 files changed, 427 insertions(+), 8 deletions(-)
diff --git a/internal/db/bundb/bundb.go b/internal/db/bundb/bundb.go
@@ -137,8 +137,8 @@ func NewBunDBService(ctx context.Context) (db.DB, error) {
}
// add a hook to just log queries and the time they take
- // only do this for trace logging where performance isn't 1st concern
- if logrus.GetLevel() >= logrus.TraceLevel {
+ // only do this for logging where performance isn't 1st concern
+ if logrus.GetLevel() >= logrus.DebugLevel {
conn.DB.AddQueryHook(newDebugQueryHook())
}
diff --git a/internal/db/bundb/migrations/20220305130328_database_optimizations.go b/internal/db/bundb/migrations/20220305130328_database_optimizations.go
@@ -0,0 +1,291 @@
+/*
+ GoToSocial
+ Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+package migrations
+
+import (
+ "context"
+
+ "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
+ "github.com/uptrace/bun"
+)
+
+func init() {
+ up := func(ctx context.Context, db *bun.DB) error {
+ return db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error {
+ // ACCOUNTS are often selected by URI, username, or domain
+ if _, err := tx.
+ NewCreateIndex().
+ Model(>smodel.Account{}).
+ Index("accounts_uri_idx").
+ Column("uri").
+ Exec(ctx); err != nil {
+ return err
+ }
+
+ if _, err := tx.
+ NewCreateIndex().
+ Model(>smodel.Account{}).
+ Index("accounts_username_idx").
+ Column("username").
+ Exec(ctx); err != nil {
+ return err
+ }
+
+ if _, err := tx.
+ NewCreateIndex().
+ Model(>smodel.Account{}).
+ Index("accounts_domain_idx").
+ Column("domain").
+ Exec(ctx); err != nil {
+ return err
+ }
+
+ if _, err := tx.
+ NewCreateIndex().
+ Model(>smodel.Account{}).
+ Index("accounts_username_domain_idx"). // for selecting local accounts by username
+ Column("username", "domain").
+ Exec(ctx); err != nil {
+ return err
+ }
+
+ // NOTIFICATIONS are commonly selected by their target_account_id
+ if _, err := tx.
+ NewCreateIndex().
+ Model(>smodel.Notification{}).
+ Index("notifications_target_account_id_idx").
+ Column("target_account_id").
+ Exec(ctx); err != nil {
+ return err
+ }
+
+ // STATUSES are selected in many different ways, so we need quite few indexes
+ // to avoid queries becoming painfully slow as more statuses get stored
+ if _, err := tx.
+ NewCreateIndex().
+ Model(>smodel.Status{}).
+ Index("statuses_uri_idx").
+ Column("uri").
+ Exec(ctx); err != nil {
+ return err
+ }
+
+ if _, err := tx.
+ NewCreateIndex().
+ Model(>smodel.Status{}).
+ Index("statuses_account_id_idx").
+ Column("account_id").
+ Exec(ctx); err != nil {
+ return err
+ }
+
+ if _, err := tx.
+ NewCreateIndex().
+ Model(>smodel.Status{}).
+ Index("statuses_in_reply_to_account_id_idx").
+ Column("in_reply_to_account_id").
+ Exec(ctx); err != nil {
+ return err
+ }
+
+ if _, err := tx.
+ NewCreateIndex().
+ Model(>smodel.Status{}).
+ Index("statuses_boost_of_account_id_idx").
+ Column("boost_of_account_id").
+ Exec(ctx); err != nil {
+ return err
+ }
+
+ if _, err := tx.
+ NewCreateIndex().
+ Model(>smodel.Status{}).
+ Index("statuses_in_reply_to_id_idx").
+ Column("in_reply_to_id").
+ Exec(ctx); err != nil {
+ return err
+ }
+
+ if _, err := tx.
+ NewCreateIndex().
+ Model(>smodel.Status{}).
+ Index("statuses_boost_of_id_idx").
+ Column("boost_of_id").
+ Exec(ctx); err != nil {
+ return err
+ }
+
+ if _, err := tx.
+ NewCreateIndex().
+ Model(>smodel.Status{}).
+ Index("statuses_visibility_idx").
+ Column("visibility").
+ Exec(ctx); err != nil {
+ return err
+ }
+
+ if _, err := tx.
+ NewCreateIndex().
+ Model(>smodel.Status{}).
+ Index("statuses_local_idx").
+ Column("local").
+ Exec(ctx); err != nil {
+ return err
+ }
+
+ // DOMAIN BLOCKS are almost always selected by their domain
+ if _, err := tx.
+ NewCreateIndex().
+ Model(>smodel.DomainBlock{}).
+ Index("domain_blocks_domain_idx").
+ Column("domain").
+ Exec(ctx); err != nil {
+ return err
+ }
+
+ // INSTANCES are usually selected by their domain
+ if _, err := tx.
+ NewCreateIndex().
+ Model(>smodel.Instance{}).
+ Index("instances_domain_idx").
+ Column("domain").
+ Exec(ctx); err != nil {
+ return err
+ }
+
+ // STATUS FAVES are almost always selected by their target status
+ if _, err := tx.
+ NewCreateIndex().
+ Model(>smodel.StatusFave{}).
+ Index("status_faves_status_id_idx").
+ Column("status_id").
+ Exec(ctx); err != nil {
+ return err
+ }
+
+ // MENTIONS are almost always selected by their originating status
+ if _, err := tx.
+ NewCreateIndex().
+ Model(>smodel.Mention{}).
+ Index("mentions_status_id_idx").
+ Column("status_id").
+ Exec(ctx); err != nil {
+ return err
+ }
+
+ // FOLLOW_REQUESTS and FOLLOWS are usually selected by who they originate from, and who they target
+ if _, err := tx.
+ NewCreateIndex().
+ Model(>smodel.FollowRequest{}).
+ Index("follow_requests_account_id_idx").
+ Column("account_id").
+ Exec(ctx); err != nil {
+ return err
+ }
+
+ if _, err := tx.
+ NewCreateIndex().
+ Model(>smodel.FollowRequest{}).
+ Index("follow_requests_target_account_id_idx").
+ Column("target_account_id").
+ Exec(ctx); err != nil {
+ return err
+ }
+
+ if _, err := tx.
+ NewCreateIndex().
+ Model(>smodel.Follow{}).
+ Index("follows_account_id_idx").
+ Column("account_id").
+ Exec(ctx); err != nil {
+ return err
+ }
+
+ if _, err := tx.
+ NewCreateIndex().
+ Model(>smodel.Follow{}).
+ Index("follows_target_account_id_idx").
+ Column("target_account_id").
+ Exec(ctx); err != nil {
+ return err
+ }
+
+ // BLOCKS are usually selected simultaneously by who they originate from and who they target
+ if _, err := tx.
+ NewCreateIndex().
+ Model(>smodel.Block{}).
+ Index("blocks_account_id_target_account_id_idx").
+ Column("account_id", "target_account_id").
+ Exec(ctx); err != nil {
+ return err
+ }
+
+ // MEDIA ATTACHMENTS are often selected by status ID, the account that owns the attachment
+ if _, err := tx.
+ NewCreateIndex().
+ Model(>smodel.MediaAttachment{}).
+ Index("media_attachments_status_id_idx").
+ Column("status_id").
+ Exec(ctx); err != nil {
+ return err
+ }
+
+ if _, err := tx.
+ NewCreateIndex().
+ Model(>smodel.MediaAttachment{}).
+ Index("media_attachments_account_id_idx").
+ Column("account_id").
+ Exec(ctx); err != nil {
+ return err
+ }
+
+ // for media cleanup jobs, attachments will be selected on a bunch of fields so make an index of this...
+ if _, err := tx.
+ NewCreateIndex().
+ Model(>smodel.MediaAttachment{}).
+ Index("media_attachments_cleanup_idx").
+ Column("cached", "avatar", "header", "created_at", "remote_url").
+ Exec(ctx); err != nil {
+ return err
+ }
+
+ // TOKENS are usually selected via Access field for user-level tokens
+ if _, err := tx.
+ NewCreateIndex().
+ Model(>smodel.Token{}).
+ Index("tokens_access_idx").
+ Column("access").
+ Exec(ctx); err != nil {
+ return err
+ }
+
+ return nil
+ })
+ }
+
+ down := func(ctx context.Context, db *bun.DB) error {
+ return db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error {
+ return nil
+ })
+ }
+
+ if err := Migrations.Register(up, down); err != nil {
+ panic(err)
+ }
+}
diff --git a/internal/db/bundb/notification.go b/internal/db/bundb/notification.go
@@ -66,9 +66,7 @@ func (n *notificationDB) GetNotifications(ctx context.Context, accountID string,
q := n.conn.
NewSelect().
Model(¬ifications).
- Column("id").
- Where("target_account_id = ?", accountID).
- Order("id DESC")
+ Column("id")
if maxID != "" {
q = q.Where("id < ?", maxID)
@@ -78,6 +76,10 @@ func (n *notificationDB) GetNotifications(ctx context.Context, accountID string,
q = q.Where("id > ?", sinceID)
}
+ q = q.
+ Where("target_account_id = ?", accountID).
+ Order("id DESC")
+
if limit != 0 {
q = q.Limit(limit)
}
@@ -120,8 +122,7 @@ func (n *notificationDB) putNotificationCache(notif *gtsmodel.Notification) {
}
func (n *notificationDB) getNotificationDB(ctx context.Context, id string, dst *gtsmodel.Notification) error {
- q := n.newNotificationQ(dst).
- Where("notification.id = ?", id)
+ q := n.newNotificationQ(dst).WherePK()
if err := q.Scan(ctx); err != nil {
return n.conn.ProcessError(err)
diff --git a/internal/db/bundb/notification_test.go b/internal/db/bundb/notification_test.go
@@ -0,0 +1,123 @@
+/*
+ GoToSocial
+ Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+package bundb_test
+
+import (
+ "context"
+ "fmt"
+ "testing"
+ "time"
+
+ "github.com/stretchr/testify/suite"
+ "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
+ "github.com/superseriousbusiness/gotosocial/internal/id"
+)
+
+func (suite *NotificationTestSuite) spamNotifs() {
+ // spam a shit ton of notifs into the database
+ // half of them will be for zork, the other half
+ // will be for random accounts
+ notifCount := 10000
+
+ zork := suite.testAccounts["local_account_1"]
+
+ for i := 0; i < notifCount; i++ {
+ notifID, err := id.NewULID()
+ if err != nil {
+ panic(err)
+ }
+
+ var targetAccountID string
+ if i%2 == 0 {
+ targetAccountID = zork.ID
+ } else {
+ randomAssID, err := id.NewRandomULID()
+ if err != nil {
+ panic(err)
+ }
+ targetAccountID = randomAssID
+ }
+
+ statusID, err := id.NewRandomULID()
+ if err != nil {
+ panic(err)
+ }
+
+ originAccountID, err := id.NewRandomULID()
+ if err != nil {
+ panic(err)
+ }
+
+ notif := >smodel.Notification{
+ ID: notifID,
+ NotificationType: gtsmodel.NotificationFave,
+ CreatedAt: time.Now(),
+ TargetAccountID: targetAccountID,
+ OriginAccountID: originAccountID,
+ StatusID: statusID,
+ Read: false,
+ }
+
+ if err := suite.db.Put(context.Background(), notif); err != nil {
+ panic(err)
+ }
+ }
+
+ fmt.Printf("\n\n\nput %d notifs in the db\n\n\n", notifCount)
+}
+
+type NotificationTestSuite struct {
+ BunDBStandardTestSuite
+}
+
+func (suite *NotificationTestSuite) TestGetNotificationsWithSpam() {
+ suite.spamNotifs()
+ testAccount := suite.testAccounts["local_account_1"]
+ before := time.Now()
+ notifications, err := suite.db.GetNotifications(context.Background(), testAccount.ID, 20, "ZZZZZZZZZZZZZZZZZZZZZZZZZZ", "00000000000000000000000000")
+ suite.NoError(err)
+ timeTaken := time.Since(before)
+ fmt.Printf("\n\n\n withSpam: got %d notifications in %s\n\n\n", len(notifications), timeTaken)
+
+ suite.NotNil(notifications)
+ for _, n := range notifications {
+ suite.Equal(testAccount.ID, n.TargetAccountID)
+ }
+}
+
+func (suite *NotificationTestSuite) TestGetNotificationsWithoutSpam() {
+ testAccount := suite.testAccounts["local_account_1"]
+ before := time.Now()
+ notifications, err := suite.db.GetNotifications(context.Background(), testAccount.ID, 20, "ZZZZZZZZZZZZZZZZZZZZZZZZZZ", "00000000000000000000000000")
+ suite.NoError(err)
+ timeTaken := time.Since(before)
+ fmt.Printf("\n\n\n withoutSpam: got %d notifications in %s\n\n\n", len(notifications), timeTaken)
+
+ suite.NotNil(notifications)
+ for _, n := range notifications {
+ suite.Equal(testAccount.ID, n.TargetAccountID)
+ suite.NotNil(n.OriginAccount)
+ suite.NotNil(n.TargetAccount)
+ suite.NotNil(n.Status)
+ }
+}
+
+func TestNotificationTestSuite(t *testing.T) {
+ suite.Run(t, new(NotificationTestSuite))
+}
diff --git a/internal/db/bundb/trace.go b/internal/db/bundb/trace.go
@@ -47,5 +47,9 @@ func (q *debugQueryHook) AfterQuery(_ context.Context, event *bun.QueryEvent) {
"operation": event.Operation(),
})
- l.Tracef("[%s] %s", dur, event.Operation())
+ if logrus.GetLevel() == logrus.TraceLevel {
+ l.Tracef("[%s] %s", dur, event.Query)
+ } else {
+ l.Debugf("[%s] %s", dur, event.Operation())
+ }
}