gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

20220612091800_duplicated_media_cleanup.go (5415B)


      1 // GoToSocial
      2 // Copyright (C) GoToSocial Authors admin@gotosocial.org
      3 // SPDX-License-Identifier: AGPL-3.0-or-later
      4 //
      5 // This program is free software: you can redistribute it and/or modify
      6 // it under the terms of the GNU Affero General Public License as published by
      7 // the Free Software Foundation, either version 3 of the License, or
      8 // (at your option) any later version.
      9 //
     10 // This program is distributed in the hope that it will be useful,
     11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
     12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13 // GNU Affero General Public License for more details.
     14 //
     15 // You should have received a copy of the GNU Affero General Public License
     16 // along with this program.  If not, see <http://www.gnu.org/licenses/>.
     17 
     18 package migrations
     19 
     20 import (
     21 	"context"
     22 	"database/sql"
     23 	"fmt"
     24 	"path"
     25 
     26 	"codeberg.org/gruf/go-store/v2/kv"
     27 	"codeberg.org/gruf/go-store/v2/storage"
     28 	"github.com/superseriousbusiness/gotosocial/internal/config"
     29 	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
     30 	"github.com/superseriousbusiness/gotosocial/internal/log"
     31 	"github.com/uptrace/bun"
     32 )
     33 
     34 func init() {
     35 	deleteAttachment := func(ctx context.Context, l log.Entry, a *gtsmodel.MediaAttachment, s *kv.KVStore, tx bun.Tx) {
     36 		if err := s.Delete(ctx, a.File.Path); err != nil && err != storage.ErrNotFound {
     37 			l.Errorf("error removing file %s: %s", a.File.Path, err)
     38 		} else {
     39 			l.Debugf("deleted %s", a.File.Path)
     40 		}
     41 
     42 		if err := s.Delete(ctx, a.Thumbnail.Path); err != nil && err != storage.ErrNotFound {
     43 			l.Errorf("error removing file %s: %s", a.Thumbnail.Path, err)
     44 		} else {
     45 			l.Debugf("deleted %s", a.Thumbnail.Path)
     46 		}
     47 
     48 		if _, err := tx.NewDelete().
     49 			TableExpr("? AS ?", bun.Ident("media_attachments"), bun.Ident("media_attachment")).
     50 			Where("? = ?", bun.Ident("media_attachment.id"), a.ID).
     51 			Exec(ctx); err != nil {
     52 			l.Errorf("error deleting attachment with id %s: %s", a.ID, err)
     53 		} else {
     54 			l.Debugf("deleted attachment with id %s", a.ID)
     55 		}
     56 	}
     57 
     58 	up := func(ctx context.Context, db *bun.DB) error {
     59 		l := log.WithField("migration", "20220612091800_duplicated_media_cleanup")
     60 
     61 		if config.GetStorageBackend() != "local" {
     62 			// this migration only affects versions which only supported local storage
     63 			return nil
     64 		}
     65 
     66 		storageBasePath := config.GetStorageLocalBasePath()
     67 		if storageBasePath == "" {
     68 			return fmt.Errorf("%s must be set to do storage migration", config.StorageLocalBasePathFlag())
     69 		}
     70 
     71 		return db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error {
     72 			s, err := kv.OpenDisk(storageBasePath, &storage.DiskConfig{
     73 				LockFile: path.Join(storageBasePath, "store.lock"),
     74 			})
     75 			if err != nil {
     76 				return fmt.Errorf("error creating storage backend: %s", err)
     77 			}
     78 			defer s.Close()
     79 
     80 			// step 1. select all media attachment remote URLs that have duplicates
     81 			var dupes int
     82 			dupedRemoteURLs := []*gtsmodel.MediaAttachment{}
     83 			if err := tx.NewSelect().
     84 				Model(&dupedRemoteURLs).
     85 				ColumnExpr("remote_url", "count(*)").
     86 				Where("remote_url IS NOT NULL").
     87 				Group("remote_url").
     88 				Having("count(*) > 1").
     89 				Scan(ctx); err != nil {
     90 				return err
     91 			}
     92 			dupes = len(dupedRemoteURLs)
     93 			l.Infof("found %d attachments with duplicate remote URLs", dupes)
     94 
     95 			for i, dupedRemoteURL := range dupedRemoteURLs {
     96 				if i%10 == 0 {
     97 					l.Infof("cleaning %d of %d", i, dupes)
     98 				}
     99 
    100 				// step 2: select all media attachments associated with this url
    101 				dupedAttachments := []*gtsmodel.MediaAttachment{}
    102 				if err := tx.NewSelect().
    103 					Model(&dupedAttachments).
    104 					Where("remote_url = ?", dupedRemoteURL.RemoteURL).
    105 					Scan(ctx); err != nil {
    106 					l.Errorf("error running same attachments query: %s", err)
    107 					continue
    108 				}
    109 				l.Debugf("found %d duplicates of attachment with remote url %s", len(dupedAttachments), dupedRemoteURL.RemoteURL)
    110 
    111 				var statusID string
    112 			statusIDLoop:
    113 				for _, dupe := range dupedAttachments {
    114 					if dupe.StatusID != "" {
    115 						statusID = dupe.StatusID
    116 						break statusIDLoop
    117 					}
    118 				}
    119 
    120 				if statusID == "" {
    121 					l.Debugf("%s not associated with a status, moving on", dupedRemoteURL.RemoteURL)
    122 					continue
    123 				}
    124 				l.Debugf("%s is associated with status %s", dupedRemoteURL.RemoteURL, statusID)
    125 
    126 				// step 3: get the status that these attachments are supposedly associated with, bail if we can't get it
    127 				status := &gtsmodel.Status{}
    128 				if err := tx.NewSelect().
    129 					Model(status).
    130 					Where("id = ?", statusID).
    131 					Scan(ctx); err != nil {
    132 					if err != sql.ErrNoRows {
    133 						l.Errorf("error selecting status with id %s: %s", statusID, err)
    134 					}
    135 					continue
    136 				}
    137 
    138 				// step 4: for each attachment, check if it's actually one that the status is currently set to use, and delete if not
    139 				for _, dupe := range dupedAttachments {
    140 					var currentlyUsed bool
    141 				currentlyUsedLoop:
    142 					for _, attachmentID := range status.AttachmentIDs {
    143 						if attachmentID == dupe.ID {
    144 							currentlyUsed = true
    145 							break currentlyUsedLoop
    146 						}
    147 					}
    148 
    149 					if currentlyUsed {
    150 						l.Debugf("attachment with id %s is a correct current attachment, leaving it alone!", dupe.ID)
    151 						continue
    152 					}
    153 
    154 					deleteAttachment(ctx, l, dupe, s, tx)
    155 				}
    156 			}
    157 			return nil
    158 		})
    159 	}
    160 
    161 	down := func(ctx context.Context, db *bun.DB) error {
    162 		return nil
    163 	}
    164 
    165 	if err := Migrations.Register(up, down); err != nil {
    166 		panic(err)
    167 	}
    168 }