prune.go (12994B)
1 // GoToSocial 2 // Copyright (C) GoToSocial Authors admin@gotosocial.org 3 // SPDX-License-Identifier: AGPL-3.0-or-later 4 // 5 // This program is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU Affero General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // This program is distributed in the hope that it will be useful, 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Affero General Public License for more details. 14 // 15 // You should have received a copy of the GNU Affero General Public License 16 // along with this program. If not, see <http://www.gnu.org/licenses/>. 17 18 package media 19 20 import ( 21 "context" 22 "errors" 23 "fmt" 24 "time" 25 26 "codeberg.org/gruf/go-store/v2/storage" 27 "github.com/superseriousbusiness/gotosocial/internal/db" 28 "github.com/superseriousbusiness/gotosocial/internal/gtserror" 29 "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" 30 "github.com/superseriousbusiness/gotosocial/internal/log" 31 "github.com/superseriousbusiness/gotosocial/internal/regexes" 32 "github.com/superseriousbusiness/gotosocial/internal/uris" 33 ) 34 35 const ( 36 selectPruneLimit = 50 // Amount of media entries to select at a time from the db when pruning. 37 unusedLocalAttachmentDays = 3 // Number of days to keep local media in storage if not attached to a status. 38 ) 39 40 // PruneAll runs all of the below pruning/uncacheing functions, and then cleans up any resulting 41 // empty directories from the storage driver. It can be called as a shortcut for calling the below 42 // pruning functions one by one. 43 // 44 // If blocking is true, then any errors encountered during the prune will be combined + returned to 45 // the caller. If blocking is false, the prune is run in the background and errors are just logged 46 // instead. 47 func (m *Manager) PruneAll(ctx context.Context, mediaCacheRemoteDays int, blocking bool) error { 48 const dry = false 49 50 f := func(innerCtx context.Context) error { 51 errs := gtserror.MultiError{} 52 53 pruned, err := m.PruneUnusedLocal(innerCtx, dry) 54 if err != nil { 55 errs = append(errs, fmt.Sprintf("error pruning unused local media (%s)", err)) 56 } else { 57 log.Infof(ctx, "pruned %d unused local media", pruned) 58 } 59 60 pruned, err = m.PruneUnusedRemote(innerCtx, dry) 61 if err != nil { 62 errs = append(errs, fmt.Sprintf("error pruning unused remote media: (%s)", err)) 63 } else { 64 log.Infof(ctx, "pruned %d unused remote media", pruned) 65 } 66 67 pruned, err = m.UncacheRemote(innerCtx, mediaCacheRemoteDays, dry) 68 if err != nil { 69 errs = append(errs, fmt.Sprintf("error uncacheing remote media older than %d day(s): (%s)", mediaCacheRemoteDays, err)) 70 } else { 71 log.Infof(ctx, "uncached %d remote media older than %d day(s)", pruned, mediaCacheRemoteDays) 72 } 73 74 pruned, err = m.PruneOrphaned(innerCtx, dry) 75 if err != nil { 76 errs = append(errs, fmt.Sprintf("error pruning orphaned media: (%s)", err)) 77 } else { 78 log.Infof(ctx, "pruned %d orphaned media", pruned) 79 } 80 81 if err := m.state.Storage.Storage.Clean(innerCtx); err != nil { 82 errs = append(errs, fmt.Sprintf("error cleaning storage: (%s)", err)) 83 } else { 84 log.Info(ctx, "cleaned storage") 85 } 86 87 return errs.Combine() 88 } 89 90 if blocking { 91 return f(ctx) 92 } 93 94 go func() { 95 if err := f(context.Background()); err != nil { 96 log.Error(ctx, err) 97 } 98 }() 99 100 return nil 101 } 102 103 // PruneUnusedRemote prunes unused/out of date headers and avatars cached on this instance. 104 // 105 // The returned int is the amount of media that was pruned by this function. 106 func (m *Manager) PruneUnusedRemote(ctx context.Context, dry bool) (int, error) { 107 var ( 108 totalPruned int 109 maxID string 110 attachments []*gtsmodel.MediaAttachment 111 err error 112 ) 113 114 // We don't know in advance how many remote attachments will meet 115 // our criteria for being 'unused'. So a dry run in this case just 116 // means we iterate through as normal, but do nothing with each entry 117 // instead of removing it. Define this here so we don't do the 'if dry' 118 // check inside the loop a million times. 119 var f func(ctx context.Context, attachment *gtsmodel.MediaAttachment) error 120 if !dry { 121 f = m.deleteAttachment 122 } else { 123 f = func(_ context.Context, _ *gtsmodel.MediaAttachment) error { 124 return nil // noop 125 } 126 } 127 128 for attachments, err = m.state.DB.GetAvatarsAndHeaders(ctx, maxID, selectPruneLimit); err == nil && len(attachments) != 0; attachments, err = m.state.DB.GetAvatarsAndHeaders(ctx, maxID, selectPruneLimit) { 129 maxID = attachments[len(attachments)-1].ID // use the id of the last attachment in the slice as the next 'maxID' value 130 131 for _, attachment := range attachments { 132 // Retrieve owning account if possible. 133 var account *gtsmodel.Account 134 if accountID := attachment.AccountID; accountID != "" { 135 account, err = m.state.DB.GetAccountByID(ctx, attachment.AccountID) 136 if err != nil && !errors.Is(err, db.ErrNoEntries) { 137 // Only return on a real error. 138 return 0, fmt.Errorf("PruneUnusedRemote: error fetching account with id %s: %w", accountID, err) 139 } 140 } 141 142 // Prune each attachment that meets one of the following criteria: 143 // - Has no owning account in the database. 144 // - Is a header but isn't the owning account's current header. 145 // - Is an avatar but isn't the owning account's current avatar. 146 if account == nil || 147 (*attachment.Header && attachment.ID != account.HeaderMediaAttachmentID) || 148 (*attachment.Avatar && attachment.ID != account.AvatarMediaAttachmentID) { 149 if err := f(ctx, attachment); err != nil { 150 return totalPruned, err 151 } 152 totalPruned++ 153 } 154 } 155 } 156 157 // Make sure we don't have a real error when we leave the loop. 158 if err != nil && !errors.Is(err, db.ErrNoEntries) { 159 return totalPruned, err 160 } 161 162 return totalPruned, nil 163 } 164 165 // PruneOrphaned prunes files that exist in storage but which do not have a corresponding 166 // entry in the database. 167 // 168 // If dry is true, then nothing will be changed, only the amount that *would* be removed 169 // is returned to the caller. 170 func (m *Manager) PruneOrphaned(ctx context.Context, dry bool) (int, error) { 171 // Emojis are stored under the instance account, so we 172 // need the ID of the instance account for the next part. 173 instanceAccount, err := m.state.DB.GetInstanceAccount(ctx, "") 174 if err != nil { 175 return 0, fmt.Errorf("PruneOrphaned: error getting instance account: %w", err) 176 } 177 178 instanceAccountID := instanceAccount.ID 179 180 var orphanedKeys []string 181 182 // Keys in storage will look like the following format: 183 // `[ACCOUNT_ID]/[MEDIA_TYPE]/[MEDIA_SIZE]/[MEDIA_ID].[EXTENSION]` 184 // We can filter out keys we're not interested in by matching through a regex. 185 if err := m.state.Storage.WalkKeys(ctx, func(ctx context.Context, key string) error { 186 if !regexes.FilePath.MatchString(key) { 187 // This is not our expected key format. 188 return nil 189 } 190 191 // Check whether this storage entry is orphaned. 192 orphaned, err := m.orphaned(ctx, key, instanceAccountID) 193 if err != nil { 194 return fmt.Errorf("error checking orphaned status: %w", err) 195 } 196 197 if orphaned { 198 // Add this orphaned entry to list of keys. 199 orphanedKeys = append(orphanedKeys, key) 200 } 201 202 return nil 203 }); err != nil { 204 return 0, fmt.Errorf("PruneOrphaned: error walking keys: %w", err) 205 } 206 207 totalPruned := len(orphanedKeys) 208 209 if dry { 210 // Dry run: don't remove anything. 211 return totalPruned, nil 212 } 213 214 // This is not a drill! We have to delete stuff! 215 return m.removeFiles(ctx, orphanedKeys...) 216 } 217 218 func (m *Manager) orphaned(ctx context.Context, key string, instanceAccountID string) (bool, error) { 219 pathParts := regexes.FilePath.FindStringSubmatch(key) 220 if len(pathParts) != 6 { 221 // This doesn't match our expectations so 222 // it wasn't created by gts; ignore it. 223 return false, nil 224 } 225 226 var ( 227 mediaType = pathParts[2] 228 mediaID = pathParts[4] 229 orphaned = false 230 ) 231 232 // Look for keys in storage that we don't have an attachment for. 233 switch Type(mediaType) { 234 case TypeAttachment, TypeHeader, TypeAvatar: 235 if _, err := m.state.DB.GetAttachmentByID(ctx, mediaID); err != nil { 236 if !errors.Is(err, db.ErrNoEntries) { 237 return false, fmt.Errorf("error calling GetAttachmentByID: %w", err) 238 } 239 orphaned = true 240 } 241 case TypeEmoji: 242 // Look using the static URL for the emoji. Emoji images can change, so 243 // the MEDIA_ID part of the key for emojis will not necessarily correspond 244 // to the file that's currently being used as the emoji image. 245 staticURL := uris.GenerateURIForAttachment(instanceAccountID, string(TypeEmoji), string(SizeStatic), mediaID, mimePng) 246 if _, err := m.state.DB.GetEmojiByStaticURL(ctx, staticURL); err != nil { 247 if !errors.Is(err, db.ErrNoEntries) { 248 return false, fmt.Errorf("error calling GetEmojiByStaticURL: %w", err) 249 } 250 orphaned = true 251 } 252 } 253 254 return orphaned, nil 255 } 256 257 // UncacheRemote uncaches all remote media attachments older than the given amount of days. 258 // 259 // In this context, uncacheing means deleting media files from storage and marking the attachment 260 // as cached=false in the database. 261 // 262 // If 'dry' is true, then only a dry run will be performed: nothing will actually be changed. 263 // 264 // The returned int is the amount of media that was/would be uncached by this function. 265 func (m *Manager) UncacheRemote(ctx context.Context, olderThanDays int, dry bool) (int, error) { 266 if olderThanDays < 0 { 267 return 0, nil 268 } 269 270 olderThan := time.Now().Add(-time.Hour * 24 * time.Duration(olderThanDays)) 271 272 if dry { 273 // Dry run, just count eligible entries without removing them. 274 return m.state.DB.CountRemoteOlderThan(ctx, olderThan) 275 } 276 277 var ( 278 totalPruned int 279 attachments []*gtsmodel.MediaAttachment 280 err error 281 ) 282 283 for attachments, err = m.state.DB.GetRemoteOlderThan(ctx, olderThan, selectPruneLimit); err == nil && len(attachments) != 0; attachments, err = m.state.DB.GetRemoteOlderThan(ctx, olderThan, selectPruneLimit) { 284 olderThan = attachments[len(attachments)-1].CreatedAt // use the created time of the last attachment in the slice as the next 'olderThan' value 285 286 for _, attachment := range attachments { 287 if err := m.uncacheAttachment(ctx, attachment); err != nil { 288 return totalPruned, err 289 } 290 totalPruned++ 291 } 292 } 293 294 // Make sure we don't have a real error when we leave the loop. 295 if err != nil && !errors.Is(err, db.ErrNoEntries) { 296 return totalPruned, err 297 } 298 299 return totalPruned, nil 300 } 301 302 // PruneUnusedLocal prunes unused media attachments that were uploaded by 303 // a user on this instance, but never actually attached to a status, or attached but 304 // later detached. 305 // 306 // The returned int is the amount of media that was pruned by this function. 307 func (m *Manager) PruneUnusedLocal(ctx context.Context, dry bool) (int, error) { 308 olderThan := time.Now().Add(-time.Hour * 24 * time.Duration(unusedLocalAttachmentDays)) 309 310 if dry { 311 // Dry run, just count eligible entries without removing them. 312 return m.state.DB.CountLocalUnattachedOlderThan(ctx, olderThan) 313 } 314 315 var ( 316 totalPruned int 317 attachments []*gtsmodel.MediaAttachment 318 err error 319 ) 320 321 for attachments, err = m.state.DB.GetLocalUnattachedOlderThan(ctx, olderThan, selectPruneLimit); err == nil && len(attachments) != 0; attachments, err = m.state.DB.GetLocalUnattachedOlderThan(ctx, olderThan, selectPruneLimit) { 322 olderThan = attachments[len(attachments)-1].CreatedAt // use the created time of the last attachment in the slice as the next 'olderThan' value 323 324 for _, attachment := range attachments { 325 if err := m.deleteAttachment(ctx, attachment); err != nil { 326 return totalPruned, err 327 } 328 totalPruned++ 329 } 330 } 331 332 // Make sure we don't have a real error when we leave the loop. 333 if err != nil && !errors.Is(err, db.ErrNoEntries) { 334 return totalPruned, err 335 } 336 337 return totalPruned, nil 338 } 339 340 /* 341 Handy little helpers 342 */ 343 344 func (m *Manager) deleteAttachment(ctx context.Context, attachment *gtsmodel.MediaAttachment) error { 345 if _, err := m.removeFiles(ctx, attachment.File.Path, attachment.Thumbnail.Path); err != nil { 346 return err 347 } 348 349 // Delete attachment completely. 350 return m.state.DB.DeleteAttachment(ctx, attachment.ID) 351 } 352 353 func (m *Manager) uncacheAttachment(ctx context.Context, attachment *gtsmodel.MediaAttachment) error { 354 if _, err := m.removeFiles(ctx, attachment.File.Path, attachment.Thumbnail.Path); err != nil { 355 return err 356 } 357 358 // Update attachment to reflect that we no longer have it cached. 359 attachment.Cached = func() *bool { i := false; return &i }() 360 return m.state.DB.UpdateAttachment(ctx, attachment, "cached") 361 } 362 363 func (m *Manager) removeFiles(ctx context.Context, keys ...string) (int, error) { 364 errs := make(gtserror.MultiError, 0, len(keys)) 365 366 for _, key := range keys { 367 if err := m.state.Storage.Delete(ctx, key); err != nil && !errors.Is(err, storage.ErrNotFound) { 368 errs = append(errs, "storage error removing "+key+": "+err.Error()) 369 } 370 } 371 372 return len(keys) - len(errs), errs.Combine() 373 }