gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

prune.go (12994B)


      1 // GoToSocial
      2 // Copyright (C) GoToSocial Authors admin@gotosocial.org
      3 // SPDX-License-Identifier: AGPL-3.0-or-later
      4 //
      5 // This program is free software: you can redistribute it and/or modify
      6 // it under the terms of the GNU Affero General Public License as published by
      7 // the Free Software Foundation, either version 3 of the License, or
      8 // (at your option) any later version.
      9 //
     10 // This program is distributed in the hope that it will be useful,
     11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
     12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13 // GNU Affero General Public License for more details.
     14 //
     15 // You should have received a copy of the GNU Affero General Public License
     16 // along with this program.  If not, see <http://www.gnu.org/licenses/>.
     17 
     18 package media
     19 
     20 import (
     21 	"context"
     22 	"errors"
     23 	"fmt"
     24 	"time"
     25 
     26 	"codeberg.org/gruf/go-store/v2/storage"
     27 	"github.com/superseriousbusiness/gotosocial/internal/db"
     28 	"github.com/superseriousbusiness/gotosocial/internal/gtserror"
     29 	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
     30 	"github.com/superseriousbusiness/gotosocial/internal/log"
     31 	"github.com/superseriousbusiness/gotosocial/internal/regexes"
     32 	"github.com/superseriousbusiness/gotosocial/internal/uris"
     33 )
     34 
     35 const (
     36 	selectPruneLimit          = 50 // Amount of media entries to select at a time from the db when pruning.
     37 	unusedLocalAttachmentDays = 3  // Number of days to keep local media in storage if not attached to a status.
     38 )
     39 
     40 // PruneAll runs all of the below pruning/uncacheing functions, and then cleans up any resulting
     41 // empty directories from the storage driver. It can be called as a shortcut for calling the below
     42 // pruning functions one by one.
     43 //
     44 // If blocking is true, then any errors encountered during the prune will be combined + returned to
     45 // the caller. If blocking is false, the prune is run in the background and errors are just logged
     46 // instead.
     47 func (m *Manager) PruneAll(ctx context.Context, mediaCacheRemoteDays int, blocking bool) error {
     48 	const dry = false
     49 
     50 	f := func(innerCtx context.Context) error {
     51 		errs := gtserror.MultiError{}
     52 
     53 		pruned, err := m.PruneUnusedLocal(innerCtx, dry)
     54 		if err != nil {
     55 			errs = append(errs, fmt.Sprintf("error pruning unused local media (%s)", err))
     56 		} else {
     57 			log.Infof(ctx, "pruned %d unused local media", pruned)
     58 		}
     59 
     60 		pruned, err = m.PruneUnusedRemote(innerCtx, dry)
     61 		if err != nil {
     62 			errs = append(errs, fmt.Sprintf("error pruning unused remote media: (%s)", err))
     63 		} else {
     64 			log.Infof(ctx, "pruned %d unused remote media", pruned)
     65 		}
     66 
     67 		pruned, err = m.UncacheRemote(innerCtx, mediaCacheRemoteDays, dry)
     68 		if err != nil {
     69 			errs = append(errs, fmt.Sprintf("error uncacheing remote media older than %d day(s): (%s)", mediaCacheRemoteDays, err))
     70 		} else {
     71 			log.Infof(ctx, "uncached %d remote media older than %d day(s)", pruned, mediaCacheRemoteDays)
     72 		}
     73 
     74 		pruned, err = m.PruneOrphaned(innerCtx, dry)
     75 		if err != nil {
     76 			errs = append(errs, fmt.Sprintf("error pruning orphaned media: (%s)", err))
     77 		} else {
     78 			log.Infof(ctx, "pruned %d orphaned media", pruned)
     79 		}
     80 
     81 		if err := m.state.Storage.Storage.Clean(innerCtx); err != nil {
     82 			errs = append(errs, fmt.Sprintf("error cleaning storage: (%s)", err))
     83 		} else {
     84 			log.Info(ctx, "cleaned storage")
     85 		}
     86 
     87 		return errs.Combine()
     88 	}
     89 
     90 	if blocking {
     91 		return f(ctx)
     92 	}
     93 
     94 	go func() {
     95 		if err := f(context.Background()); err != nil {
     96 			log.Error(ctx, err)
     97 		}
     98 	}()
     99 
    100 	return nil
    101 }
    102 
    103 // PruneUnusedRemote prunes unused/out of date headers and avatars cached on this instance.
    104 //
    105 // The returned int is the amount of media that was pruned by this function.
    106 func (m *Manager) PruneUnusedRemote(ctx context.Context, dry bool) (int, error) {
    107 	var (
    108 		totalPruned int
    109 		maxID       string
    110 		attachments []*gtsmodel.MediaAttachment
    111 		err         error
    112 	)
    113 
    114 	// We don't know in advance how many remote attachments will meet
    115 	// our criteria for being 'unused'. So a dry run in this case just
    116 	// means we iterate through as normal, but do nothing with each entry
    117 	// instead of removing it. Define this here so we don't do the 'if dry'
    118 	// check inside the loop a million times.
    119 	var f func(ctx context.Context, attachment *gtsmodel.MediaAttachment) error
    120 	if !dry {
    121 		f = m.deleteAttachment
    122 	} else {
    123 		f = func(_ context.Context, _ *gtsmodel.MediaAttachment) error {
    124 			return nil // noop
    125 		}
    126 	}
    127 
    128 	for attachments, err = m.state.DB.GetAvatarsAndHeaders(ctx, maxID, selectPruneLimit); err == nil && len(attachments) != 0; attachments, err = m.state.DB.GetAvatarsAndHeaders(ctx, maxID, selectPruneLimit) {
    129 		maxID = attachments[len(attachments)-1].ID // use the id of the last attachment in the slice as the next 'maxID' value
    130 
    131 		for _, attachment := range attachments {
    132 			// Retrieve owning account if possible.
    133 			var account *gtsmodel.Account
    134 			if accountID := attachment.AccountID; accountID != "" {
    135 				account, err = m.state.DB.GetAccountByID(ctx, attachment.AccountID)
    136 				if err != nil && !errors.Is(err, db.ErrNoEntries) {
    137 					// Only return on a real error.
    138 					return 0, fmt.Errorf("PruneUnusedRemote: error fetching account with id %s: %w", accountID, err)
    139 				}
    140 			}
    141 
    142 			// Prune each attachment that meets one of the following criteria:
    143 			// - Has no owning account in the database.
    144 			// - Is a header but isn't the owning account's current header.
    145 			// - Is an avatar but isn't the owning account's current avatar.
    146 			if account == nil ||
    147 				(*attachment.Header && attachment.ID != account.HeaderMediaAttachmentID) ||
    148 				(*attachment.Avatar && attachment.ID != account.AvatarMediaAttachmentID) {
    149 				if err := f(ctx, attachment); err != nil {
    150 					return totalPruned, err
    151 				}
    152 				totalPruned++
    153 			}
    154 		}
    155 	}
    156 
    157 	// Make sure we don't have a real error when we leave the loop.
    158 	if err != nil && !errors.Is(err, db.ErrNoEntries) {
    159 		return totalPruned, err
    160 	}
    161 
    162 	return totalPruned, nil
    163 }
    164 
    165 // PruneOrphaned prunes files that exist in storage but which do not have a corresponding
    166 // entry in the database.
    167 //
    168 // If dry is true, then nothing will be changed, only the amount that *would* be removed
    169 // is returned to the caller.
    170 func (m *Manager) PruneOrphaned(ctx context.Context, dry bool) (int, error) {
    171 	// Emojis are stored under the instance account, so we
    172 	// need the ID of the instance account for the next part.
    173 	instanceAccount, err := m.state.DB.GetInstanceAccount(ctx, "")
    174 	if err != nil {
    175 		return 0, fmt.Errorf("PruneOrphaned: error getting instance account: %w", err)
    176 	}
    177 
    178 	instanceAccountID := instanceAccount.ID
    179 
    180 	var orphanedKeys []string
    181 
    182 	// Keys in storage will look like the following format:
    183 	// `[ACCOUNT_ID]/[MEDIA_TYPE]/[MEDIA_SIZE]/[MEDIA_ID].[EXTENSION]`
    184 	// We can filter out keys we're not interested in by matching through a regex.
    185 	if err := m.state.Storage.WalkKeys(ctx, func(ctx context.Context, key string) error {
    186 		if !regexes.FilePath.MatchString(key) {
    187 			// This is not our expected key format.
    188 			return nil
    189 		}
    190 
    191 		// Check whether this storage entry is orphaned.
    192 		orphaned, err := m.orphaned(ctx, key, instanceAccountID)
    193 		if err != nil {
    194 			return fmt.Errorf("error checking orphaned status: %w", err)
    195 		}
    196 
    197 		if orphaned {
    198 			// Add this orphaned entry to list of keys.
    199 			orphanedKeys = append(orphanedKeys, key)
    200 		}
    201 
    202 		return nil
    203 	}); err != nil {
    204 		return 0, fmt.Errorf("PruneOrphaned: error walking keys: %w", err)
    205 	}
    206 
    207 	totalPruned := len(orphanedKeys)
    208 
    209 	if dry {
    210 		// Dry run: don't remove anything.
    211 		return totalPruned, nil
    212 	}
    213 
    214 	// This is not a drill! We have to delete stuff!
    215 	return m.removeFiles(ctx, orphanedKeys...)
    216 }
    217 
    218 func (m *Manager) orphaned(ctx context.Context, key string, instanceAccountID string) (bool, error) {
    219 	pathParts := regexes.FilePath.FindStringSubmatch(key)
    220 	if len(pathParts) != 6 {
    221 		// This doesn't match our expectations so
    222 		// it wasn't created by gts; ignore it.
    223 		return false, nil
    224 	}
    225 
    226 	var (
    227 		mediaType = pathParts[2]
    228 		mediaID   = pathParts[4]
    229 		orphaned  = false
    230 	)
    231 
    232 	// Look for keys in storage that we don't have an attachment for.
    233 	switch Type(mediaType) {
    234 	case TypeAttachment, TypeHeader, TypeAvatar:
    235 		if _, err := m.state.DB.GetAttachmentByID(ctx, mediaID); err != nil {
    236 			if !errors.Is(err, db.ErrNoEntries) {
    237 				return false, fmt.Errorf("error calling GetAttachmentByID: %w", err)
    238 			}
    239 			orphaned = true
    240 		}
    241 	case TypeEmoji:
    242 		// Look using the static URL for the emoji. Emoji images can change, so
    243 		// the MEDIA_ID part of the key for emojis will not necessarily correspond
    244 		// to the file that's currently being used as the emoji image.
    245 		staticURL := uris.GenerateURIForAttachment(instanceAccountID, string(TypeEmoji), string(SizeStatic), mediaID, mimePng)
    246 		if _, err := m.state.DB.GetEmojiByStaticURL(ctx, staticURL); err != nil {
    247 			if !errors.Is(err, db.ErrNoEntries) {
    248 				return false, fmt.Errorf("error calling GetEmojiByStaticURL: %w", err)
    249 			}
    250 			orphaned = true
    251 		}
    252 	}
    253 
    254 	return orphaned, nil
    255 }
    256 
    257 // UncacheRemote uncaches all remote media attachments older than the given amount of days.
    258 //
    259 // In this context, uncacheing means deleting media files from storage and marking the attachment
    260 // as cached=false in the database.
    261 //
    262 // If 'dry' is true, then only a dry run will be performed: nothing will actually be changed.
    263 //
    264 // The returned int is the amount of media that was/would be uncached by this function.
    265 func (m *Manager) UncacheRemote(ctx context.Context, olderThanDays int, dry bool) (int, error) {
    266 	if olderThanDays < 0 {
    267 		return 0, nil
    268 	}
    269 
    270 	olderThan := time.Now().Add(-time.Hour * 24 * time.Duration(olderThanDays))
    271 
    272 	if dry {
    273 		// Dry run, just count eligible entries without removing them.
    274 		return m.state.DB.CountRemoteOlderThan(ctx, olderThan)
    275 	}
    276 
    277 	var (
    278 		totalPruned int
    279 		attachments []*gtsmodel.MediaAttachment
    280 		err         error
    281 	)
    282 
    283 	for attachments, err = m.state.DB.GetRemoteOlderThan(ctx, olderThan, selectPruneLimit); err == nil && len(attachments) != 0; attachments, err = m.state.DB.GetRemoteOlderThan(ctx, olderThan, selectPruneLimit) {
    284 		olderThan = attachments[len(attachments)-1].CreatedAt // use the created time of the last attachment in the slice as the next 'olderThan' value
    285 
    286 		for _, attachment := range attachments {
    287 			if err := m.uncacheAttachment(ctx, attachment); err != nil {
    288 				return totalPruned, err
    289 			}
    290 			totalPruned++
    291 		}
    292 	}
    293 
    294 	// Make sure we don't have a real error when we leave the loop.
    295 	if err != nil && !errors.Is(err, db.ErrNoEntries) {
    296 		return totalPruned, err
    297 	}
    298 
    299 	return totalPruned, nil
    300 }
    301 
    302 // PruneUnusedLocal prunes unused media attachments that were uploaded by
    303 // a user on this instance, but never actually attached to a status, or attached but
    304 // later detached.
    305 //
    306 // The returned int is the amount of media that was pruned by this function.
    307 func (m *Manager) PruneUnusedLocal(ctx context.Context, dry bool) (int, error) {
    308 	olderThan := time.Now().Add(-time.Hour * 24 * time.Duration(unusedLocalAttachmentDays))
    309 
    310 	if dry {
    311 		// Dry run, just count eligible entries without removing them.
    312 		return m.state.DB.CountLocalUnattachedOlderThan(ctx, olderThan)
    313 	}
    314 
    315 	var (
    316 		totalPruned int
    317 		attachments []*gtsmodel.MediaAttachment
    318 		err         error
    319 	)
    320 
    321 	for attachments, err = m.state.DB.GetLocalUnattachedOlderThan(ctx, olderThan, selectPruneLimit); err == nil && len(attachments) != 0; attachments, err = m.state.DB.GetLocalUnattachedOlderThan(ctx, olderThan, selectPruneLimit) {
    322 		olderThan = attachments[len(attachments)-1].CreatedAt // use the created time of the last attachment in the slice as the next 'olderThan' value
    323 
    324 		for _, attachment := range attachments {
    325 			if err := m.deleteAttachment(ctx, attachment); err != nil {
    326 				return totalPruned, err
    327 			}
    328 			totalPruned++
    329 		}
    330 	}
    331 
    332 	// Make sure we don't have a real error when we leave the loop.
    333 	if err != nil && !errors.Is(err, db.ErrNoEntries) {
    334 		return totalPruned, err
    335 	}
    336 
    337 	return totalPruned, nil
    338 }
    339 
    340 /*
    341 	Handy little helpers
    342 */
    343 
    344 func (m *Manager) deleteAttachment(ctx context.Context, attachment *gtsmodel.MediaAttachment) error {
    345 	if _, err := m.removeFiles(ctx, attachment.File.Path, attachment.Thumbnail.Path); err != nil {
    346 		return err
    347 	}
    348 
    349 	// Delete attachment completely.
    350 	return m.state.DB.DeleteAttachment(ctx, attachment.ID)
    351 }
    352 
    353 func (m *Manager) uncacheAttachment(ctx context.Context, attachment *gtsmodel.MediaAttachment) error {
    354 	if _, err := m.removeFiles(ctx, attachment.File.Path, attachment.Thumbnail.Path); err != nil {
    355 		return err
    356 	}
    357 
    358 	// Update attachment to reflect that we no longer have it cached.
    359 	attachment.Cached = func() *bool { i := false; return &i }()
    360 	return m.state.DB.UpdateAttachment(ctx, attachment, "cached")
    361 }
    362 
    363 func (m *Manager) removeFiles(ctx context.Context, keys ...string) (int, error) {
    364 	errs := make(gtserror.MultiError, 0, len(keys))
    365 
    366 	for _, key := range keys {
    367 		if err := m.state.Storage.Delete(ctx, key); err != nil && !errors.Is(err, storage.ErrNotFound) {
    368 			errs = append(errs, "storage error removing "+key+": "+err.Error())
    369 		}
    370 	}
    371 
    372 	return len(keys) - len(errs), errs.Combine()
    373 }