20220612091800_duplicated_media_cleanup.go (5415B)
1 // GoToSocial 2 // Copyright (C) GoToSocial Authors admin@gotosocial.org 3 // SPDX-License-Identifier: AGPL-3.0-or-later 4 // 5 // This program is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU Affero General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // This program is distributed in the hope that it will be useful, 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Affero General Public License for more details. 14 // 15 // You should have received a copy of the GNU Affero General Public License 16 // along with this program. If not, see <http://www.gnu.org/licenses/>. 17 18 package migrations 19 20 import ( 21 "context" 22 "database/sql" 23 "fmt" 24 "path" 25 26 "codeberg.org/gruf/go-store/v2/kv" 27 "codeberg.org/gruf/go-store/v2/storage" 28 "github.com/superseriousbusiness/gotosocial/internal/config" 29 "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" 30 "github.com/superseriousbusiness/gotosocial/internal/log" 31 "github.com/uptrace/bun" 32 ) 33 34 func init() { 35 deleteAttachment := func(ctx context.Context, l log.Entry, a *gtsmodel.MediaAttachment, s *kv.KVStore, tx bun.Tx) { 36 if err := s.Delete(ctx, a.File.Path); err != nil && err != storage.ErrNotFound { 37 l.Errorf("error removing file %s: %s", a.File.Path, err) 38 } else { 39 l.Debugf("deleted %s", a.File.Path) 40 } 41 42 if err := s.Delete(ctx, a.Thumbnail.Path); err != nil && err != storage.ErrNotFound { 43 l.Errorf("error removing file %s: %s", a.Thumbnail.Path, err) 44 } else { 45 l.Debugf("deleted %s", a.Thumbnail.Path) 46 } 47 48 if _, err := tx.NewDelete(). 49 TableExpr("? AS ?", bun.Ident("media_attachments"), bun.Ident("media_attachment")). 50 Where("? = ?", bun.Ident("media_attachment.id"), a.ID). 51 Exec(ctx); err != nil { 52 l.Errorf("error deleting attachment with id %s: %s", a.ID, err) 53 } else { 54 l.Debugf("deleted attachment with id %s", a.ID) 55 } 56 } 57 58 up := func(ctx context.Context, db *bun.DB) error { 59 l := log.WithField("migration", "20220612091800_duplicated_media_cleanup") 60 61 if config.GetStorageBackend() != "local" { 62 // this migration only affects versions which only supported local storage 63 return nil 64 } 65 66 storageBasePath := config.GetStorageLocalBasePath() 67 if storageBasePath == "" { 68 return fmt.Errorf("%s must be set to do storage migration", config.StorageLocalBasePathFlag()) 69 } 70 71 return db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error { 72 s, err := kv.OpenDisk(storageBasePath, &storage.DiskConfig{ 73 LockFile: path.Join(storageBasePath, "store.lock"), 74 }) 75 if err != nil { 76 return fmt.Errorf("error creating storage backend: %s", err) 77 } 78 defer s.Close() 79 80 // step 1. select all media attachment remote URLs that have duplicates 81 var dupes int 82 dupedRemoteURLs := []*gtsmodel.MediaAttachment{} 83 if err := tx.NewSelect(). 84 Model(&dupedRemoteURLs). 85 ColumnExpr("remote_url", "count(*)"). 86 Where("remote_url IS NOT NULL"). 87 Group("remote_url"). 88 Having("count(*) > 1"). 89 Scan(ctx); err != nil { 90 return err 91 } 92 dupes = len(dupedRemoteURLs) 93 l.Infof("found %d attachments with duplicate remote URLs", dupes) 94 95 for i, dupedRemoteURL := range dupedRemoteURLs { 96 if i%10 == 0 { 97 l.Infof("cleaning %d of %d", i, dupes) 98 } 99 100 // step 2: select all media attachments associated with this url 101 dupedAttachments := []*gtsmodel.MediaAttachment{} 102 if err := tx.NewSelect(). 103 Model(&dupedAttachments). 104 Where("remote_url = ?", dupedRemoteURL.RemoteURL). 105 Scan(ctx); err != nil { 106 l.Errorf("error running same attachments query: %s", err) 107 continue 108 } 109 l.Debugf("found %d duplicates of attachment with remote url %s", len(dupedAttachments), dupedRemoteURL.RemoteURL) 110 111 var statusID string 112 statusIDLoop: 113 for _, dupe := range dupedAttachments { 114 if dupe.StatusID != "" { 115 statusID = dupe.StatusID 116 break statusIDLoop 117 } 118 } 119 120 if statusID == "" { 121 l.Debugf("%s not associated with a status, moving on", dupedRemoteURL.RemoteURL) 122 continue 123 } 124 l.Debugf("%s is associated with status %s", dupedRemoteURL.RemoteURL, statusID) 125 126 // step 3: get the status that these attachments are supposedly associated with, bail if we can't get it 127 status := >smodel.Status{} 128 if err := tx.NewSelect(). 129 Model(status). 130 Where("id = ?", statusID). 131 Scan(ctx); err != nil { 132 if err != sql.ErrNoRows { 133 l.Errorf("error selecting status with id %s: %s", statusID, err) 134 } 135 continue 136 } 137 138 // step 4: for each attachment, check if it's actually one that the status is currently set to use, and delete if not 139 for _, dupe := range dupedAttachments { 140 var currentlyUsed bool 141 currentlyUsedLoop: 142 for _, attachmentID := range status.AttachmentIDs { 143 if attachmentID == dupe.ID { 144 currentlyUsed = true 145 break currentlyUsedLoop 146 } 147 } 148 149 if currentlyUsed { 150 l.Debugf("attachment with id %s is a correct current attachment, leaving it alone!", dupe.ID) 151 continue 152 } 153 154 deleteAttachment(ctx, l, dupe, s, tx) 155 } 156 } 157 return nil 158 }) 159 } 160 161 down := func(ctx context.Context, db *bun.DB) error { 162 return nil 163 } 164 165 if err := Migrations.Register(up, down); err != nil { 166 panic(err) 167 } 168 }