gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

commit 589bb9df0275457b5f9c3790e67517ec1be1745d
parent 6f5ccf435585e43a00e3cc50f4bcefac36ada818
Author: tsmethurst <tobi.smethurst@protonmail.com>
Date:   Sun, 16 Jan 2022 18:52:55 +0100

pass reader around instead of []byte

Diffstat:
Minternal/federation/dereferencing/account.go | 5+++--
Minternal/federation/dereferencing/media.go | 3++-
Minternal/media/image.go | 46++++++++++++++++++++++------------------------
Minternal/media/manager_test.go | 32+++++++++++++++++++++-----------
Minternal/media/processingemoji.go | 168+++++++++++++++++++++++++++++++++----------------------------------------------
Minternal/media/processingmedia.go | 179++++++++++++++++++++++++++++++++++++++++++-------------------------------------
Minternal/media/types.go | 5+++--
Minternal/media/util.go | 11+++++------
Minternal/processing/account/update.go | 42++++--------------------------------------
Minternal/processing/admin/emoji.go | 20++------------------
Minternal/processing/media/create.go | 19++-----------------
Minternal/transport/derefmedia.go | 7+++----
Minternal/transport/transport.go | 5+++--
Mtestrig/storage.go | 82+------------------------------------------------------------------------------
14 files changed, 238 insertions(+), 386 deletions(-)

diff --git a/internal/federation/dereferencing/account.go b/internal/federation/dereferencing/account.go @@ -23,6 +23,7 @@ import ( "encoding/json" "errors" "fmt" + "io" "net/url" "strings" @@ -251,7 +252,7 @@ func (d *deref) fetchHeaderAndAviForAccount(ctx context.Context, targetAccount * return err } - data := func(innerCtx context.Context) ([]byte, error) { + data := func(innerCtx context.Context) (io.Reader, error) { return t.DereferenceMedia(innerCtx, avatarIRI) } @@ -273,7 +274,7 @@ func (d *deref) fetchHeaderAndAviForAccount(ctx context.Context, targetAccount * return err } - data := func(innerCtx context.Context) ([]byte, error) { + data := func(innerCtx context.Context) (io.Reader, error) { return t.DereferenceMedia(innerCtx, headerIRI) } diff --git a/internal/federation/dereferencing/media.go b/internal/federation/dereferencing/media.go @@ -21,6 +21,7 @@ package dereferencing import ( "context" "fmt" + "io" "net/url" "github.com/superseriousbusiness/gotosocial/internal/media" @@ -41,7 +42,7 @@ func (d *deref) GetRemoteMedia(ctx context.Context, requestingUsername string, a return nil, fmt.Errorf("GetRemoteMedia: error parsing url: %s", err) } - dataFunc := func(innerCtx context.Context) ([]byte, error) { + dataFunc := func(innerCtx context.Context) (io.Reader, error) { return t.DereferenceMedia(innerCtx, derefURI) } diff --git a/internal/media/image.go b/internal/media/image.go @@ -26,6 +26,7 @@ import ( "image/gif" "image/jpeg" "image/png" + "io" "github.com/buckket/go-blurhash" "github.com/nfnt/resize" @@ -37,17 +38,17 @@ const ( thumbnailMaxHeight = 512 ) -type ImageMeta struct { - image []byte +type imageMeta struct { width int height int size int aspect float64 - blurhash string + blurhash string // defined only for calls to deriveThumbnail if createBlurhash is true + small []byte // defined only for calls to deriveStaticEmoji or deriveThumbnail } -func decodeGif(b []byte) (*ImageMeta, error) { - gif, err := gif.DecodeAll(bytes.NewReader(b)) +func decodeGif(r io.Reader) (*imageMeta, error) { + gif, err := gif.DecodeAll(r) if err != nil { return nil, err } @@ -58,8 +59,7 @@ func decodeGif(b []byte) (*ImageMeta, error) { size := width * height aspect := float64(width) / float64(height) - return &ImageMeta{ - image: b, + return &imageMeta{ width: width, height: height, size: size, @@ -67,15 +67,15 @@ func decodeGif(b []byte) (*ImageMeta, error) { }, nil } -func decodeImage(b []byte, contentType string) (*ImageMeta, error) { +func decodeImage(r io.Reader, contentType string) (*imageMeta, error) { var i image.Image var err error switch contentType { case mimeImageJpeg: - i, err = jpeg.Decode(bytes.NewReader(b)) + i, err = jpeg.Decode(r) case mimeImagePng: - i, err = png.Decode(bytes.NewReader(b)) + i, err = png.Decode(r) default: err = fmt.Errorf("content type %s not recognised", contentType) } @@ -93,8 +93,7 @@ func decodeImage(b []byte, contentType string) (*ImageMeta, error) { size := width * height aspect := float64(width) / float64(height) - return &ImageMeta{ - image: b, + return &imageMeta{ width: width, height: height, size: size, @@ -111,17 +110,17 @@ func decodeImage(b []byte, contentType string) (*ImageMeta, error) { // // If createBlurhash is false, then the blurhash field on the returned ImageAndMeta // will be an empty string. -func deriveThumbnail(b []byte, contentType string, createBlurhash bool) (*ImageMeta, error) { +func deriveThumbnail(r io.Reader, contentType string, createBlurhash bool) (*imageMeta, error) { var i image.Image var err error switch contentType { case mimeImageJpeg: - i, err = jpeg.Decode(bytes.NewReader(b)) + i, err = jpeg.Decode(r) case mimeImagePng: - i, err = png.Decode(bytes.NewReader(b)) + i, err = png.Decode(r) case mimeImageGif: - i, err = gif.Decode(bytes.NewReader(b)) + i, err = gif.Decode(r) default: err = fmt.Errorf("content type %s can't be thumbnailed", contentType) } @@ -140,7 +139,7 @@ func deriveThumbnail(b []byte, contentType string, createBlurhash bool) (*ImageM size := width * height aspect := float64(width) / float64(height) - im := &ImageMeta{ + im := &imageMeta{ width: width, height: height, size: size, @@ -165,25 +164,24 @@ func deriveThumbnail(b []byte, contentType string, createBlurhash bool) (*ImageM }); err != nil { return nil, err } - - im.image = out.Bytes() + im.small = out.Bytes() return im, nil } // deriveStaticEmojji takes a given gif or png of an emoji, decodes it, and re-encodes it as a static png. -func deriveStaticEmoji(b []byte, contentType string) (*ImageMeta, error) { +func deriveStaticEmoji(r io.Reader, contentType string) (*imageMeta, error) { var i image.Image var err error switch contentType { case mimeImagePng: - i, err = png.Decode(bytes.NewReader(b)) + i, err = png.Decode(r) if err != nil { return nil, err } case mimeImageGif: - i, err = gif.Decode(bytes.NewReader(b)) + i, err = gif.Decode(r) if err != nil { return nil, err } @@ -195,8 +193,8 @@ func deriveStaticEmoji(b []byte, contentType string) (*ImageMeta, error) { if err := png.Encode(out, i); err != nil { return nil, err } - return &ImageMeta{ - image: out.Bytes(), + return &imageMeta{ + small: out.Bytes(), }, nil } diff --git a/internal/media/manager_test.go b/internal/media/manager_test.go @@ -19,8 +19,10 @@ package media_test import ( + "bytes" "context" "fmt" + "io" "os" "testing" "time" @@ -37,9 +39,13 @@ type ManagerTestSuite struct { func (suite *ManagerTestSuite) TestSimpleJpegProcessBlocking() { ctx := context.Background() - data := func(_ context.Context) ([]byte, error) { + data := func(_ context.Context) (io.Reader, error) { // load bytes from a test image - return os.ReadFile("./test/test-jpeg.jpg") + b, err := os.ReadFile("./test/test-jpeg.jpg") + if err != nil { + panic(err) + } + return bytes.NewBuffer(b), nil } accountID := "01FS1X72SK9ZPW0J1QQ68BD264" @@ -103,9 +109,13 @@ func (suite *ManagerTestSuite) TestSimpleJpegProcessBlocking() { func (suite *ManagerTestSuite) TestSimpleJpegProcessAsync() { ctx := context.Background() - data := func(_ context.Context) ([]byte, error) { + data := func(_ context.Context) (io.Reader, error) { // load bytes from a test image - return os.ReadFile("./test/test-jpeg.jpg") + b, err := os.ReadFile("./test/test-jpeg.jpg") + if err != nil { + panic(err) + } + return bytes.NewBuffer(b), nil } accountID := "01FS1X72SK9ZPW0J1QQ68BD264" @@ -175,16 +185,16 @@ func (suite *ManagerTestSuite) TestSimpleJpegProcessAsync() { func (suite *ManagerTestSuite) TestSimpleJpegQueueSpamming() { // in this test, we spam the manager queue with 50 new media requests, just to see how it holds up - ctx := context.Background() - // load bytes from a test image - testBytes, err := os.ReadFile("./test/test-jpeg.jpg") - suite.NoError(err) - suite.NotEmpty(testBytes) + b, err := os.ReadFile("./test/test-jpeg.jpg") + if err != nil { + panic(err) + } - data := func(_ context.Context) ([]byte, error) { - return testBytes, nil + data := func(_ context.Context) (io.Reader, error) { + // load bytes from a test image + return bytes.NewReader(b), nil } accountID := "01FS1X72SK9ZPW0J1QQ68BD264" diff --git a/internal/media/processingemoji.go b/internal/media/processingemoji.go @@ -19,8 +19,10 @@ package media import ( + "bytes" "context" "fmt" + "io" "strings" "sync" "time" @@ -46,22 +48,14 @@ type ProcessingEmoji struct { emoji *gtsmodel.Emoji data DataFunc - - rawData []byte // will be set once the fetchRawData function has been called + read bool // bool indicating that data function has been triggered already /* - below fields represent the processing state of the static version of the emoji - */ - - staticState processState - static *ImageMeta - - /* - below fields represent the processing state of the emoji image + below fields represent the processing state of the static of the emoji */ + staticState processState fullSizeState processState - fullSize *ImageMeta /* below pointers to database and storage are maintained so that @@ -85,21 +79,18 @@ func (p *ProcessingEmoji) EmojiID() string { // LoadEmoji blocks until the static and fullsize image // has been processed, and then returns the completed emoji. func (p *ProcessingEmoji) LoadEmoji(ctx context.Context) (*gtsmodel.Emoji, error) { - if err := p.fetchRawData(ctx); err != nil { - return nil, err - } + p.mu.Lock() + defer p.mu.Unlock() - if _, err := p.loadStatic(ctx); err != nil { + if err := p.store(ctx); err != nil { return nil, err } - if _, err := p.loadFullSize(ctx); err != nil { + if err := p.loadStatic(ctx); err != nil { return nil, err } // store the result in the database before returning it - p.mu.Lock() - defer p.mu.Unlock() if !p.insertedInDB { if err := p.database.Put(ctx, p.emoji); err != nil { return nil, err @@ -116,118 +107,85 @@ func (p *ProcessingEmoji) Finished() bool { return p.staticState == complete && p.fullSizeState == complete } -func (p *ProcessingEmoji) loadStatic(ctx context.Context) (*ImageMeta, error) { - p.mu.Lock() - defer p.mu.Unlock() - +func (p *ProcessingEmoji) loadStatic(ctx context.Context) error { switch p.staticState { case received: - // we haven't processed a static version of this emoji yet so do it now - static, err := deriveStaticEmoji(p.rawData, p.emoji.ImageContentType) + // stream the original file out of storage... + stored, err := p.storage.GetStream(p.emoji.ImagePath) if err != nil { - p.err = fmt.Errorf("error deriving static: %s", err) + p.err = fmt.Errorf("loadStatic: error fetching file from storage: %s", err) p.staticState = errored - return nil, p.err + return p.err } - // put the static in storage - if err := p.storage.Put(p.emoji.ImageStaticPath, static.image); err != nil { - p.err = fmt.Errorf("error storing static: %s", err) + // we haven't processed a static version of this emoji yet so do it now + static, err := deriveStaticEmoji(stored, p.emoji.ImageContentType) + if err != nil { + p.err = fmt.Errorf("loadStatic: error deriving static: %s", err) p.staticState = errored - return nil, p.err + return p.err } - // set appropriate fields on the emoji based on the static version we derived - p.emoji.ImageStaticFileSize = len(static.image) - - // set the static on the processing emoji - p.static = static - - // we're done processing the static version of the emoji! - p.staticState = complete - fallthrough - case complete: - return p.static, nil - case errored: - return nil, p.err - } - - return nil, fmt.Errorf("static processing status %d unknown", p.staticState) -} - -func (p *ProcessingEmoji) loadFullSize(ctx context.Context) (*ImageMeta, error) { - p.mu.Lock() - defer p.mu.Unlock() - - switch p.fullSizeState { - case received: - var err error - var decoded *ImageMeta - - ct := p.emoji.ImageContentType - switch ct { - case mimeImagePng: - decoded, err = decodeImage(p.rawData, ct) - case mimeImageGif: - decoded, err = decodeGif(p.rawData) - default: - err = fmt.Errorf("content type %s not a processible emoji type", ct) - } - - if err != nil { - p.err = err - p.fullSizeState = errored - return nil, err + if err := stored.Close(); err != nil { + p.err = fmt.Errorf("loadStatic: error closing stored full size: %s", err) + p.staticState = errored + return p.err } - // put the full size emoji in storage - if err := p.storage.Put(p.emoji.ImagePath, decoded.image); err != nil { - p.err = fmt.Errorf("error storing full size emoji: %s", err) - p.fullSizeState = errored - return nil, p.err + // put the static in storage + if err := p.storage.Put(p.emoji.ImageStaticPath, static.small); err != nil { + p.err = fmt.Errorf("loadStatic: error storing static: %s", err) + p.staticState = errored + return p.err } - // set the fullsize of this media - p.fullSize = decoded + p.emoji.ImageStaticFileSize = len(static.small) - // we're done processing the full-size emoji - p.fullSizeState = complete + // we're done processing the static version of the emoji! + p.staticState = complete fallthrough case complete: - return p.fullSize, nil + return nil case errored: - return nil, p.err + return p.err } - return nil, fmt.Errorf("full size processing status %d unknown", p.fullSizeState) + return fmt.Errorf("static processing status %d unknown", p.staticState) } -// fetchRawData calls the data function attached to p if it hasn't been called yet, -// and updates the underlying emoji fields as necessary. -// It should only be called from within a function that already has a lock on p! -func (p *ProcessingEmoji) fetchRawData(ctx context.Context) error { +// store calls the data function attached to p if it hasn't been called yet, +// and updates the underlying attachment fields as necessary. It will then stream +// bytes from p's reader directly into storage so that it can be retrieved later. +func (p *ProcessingEmoji) store(ctx context.Context) error { // check if we've already done this and bail early if we have - if p.rawData != nil { + if p.read { return nil } - // execute the data function and pin the raw bytes for further processing - b, err := p.data(ctx) + // execute the data function to get the reader out of it + reader, err := p.data(ctx) if err != nil { - return fmt.Errorf("fetchRawData: error executing data function: %s", err) + return fmt.Errorf("store: error executing data function: %s", err) + } + + // extract no more than 261 bytes from the beginning of the file -- this is the header + firstBytes := make([]byte, maxFileHeaderBytes) + if _, err := reader.Read(firstBytes); err != nil { + return fmt.Errorf("store: error reading initial %d bytes: %s", maxFileHeaderBytes, err) } - p.rawData = b - // now we have the data we can work out the content type - contentType, err := parseContentType(p.rawData) + // now we have the file header we can work out the content type from it + contentType, err := parseContentType(firstBytes) if err != nil { - return fmt.Errorf("fetchRawData: error parsing content type: %s", err) + return fmt.Errorf("store: error parsing content type: %s", err) } + // bail if this is a type we can't process if !supportedEmoji(contentType) { - return fmt.Errorf("fetchRawData: content type %s was not valid for an emoji", contentType) + return fmt.Errorf("store: content type %s was not valid for an emoji", contentType) } + // extract the file extension split := strings.Split(contentType, "/") extension := split[1] // something like 'gif' @@ -236,8 +194,24 @@ func (p *ProcessingEmoji) fetchRawData(ctx context.Context) error { p.emoji.ImageURL = uris.GenerateURIForAttachment(p.instanceAccountID, string(TypeEmoji), string(SizeOriginal), p.emoji.ID, extension) p.emoji.ImagePath = fmt.Sprintf("%s/%s/%s/%s.%s", p.instanceAccountID, TypeEmoji, SizeOriginal, p.emoji.ID, extension) p.emoji.ImageContentType = contentType - p.emoji.ImageFileSize = len(p.rawData) + // concatenate the first bytes with the existing bytes still in the reader (thanks Mara) + multiReader := io.MultiReader(bytes.NewBuffer(firstBytes), reader) + + // store this for now -- other processes can pull it out of storage as they please + if err := p.storage.PutStream(p.emoji.ImagePath, multiReader); err != nil { + return fmt.Errorf("store: error storing stream: %s", err) + } + p.emoji.ImageFileSize = 36702 // TODO: set this based on the result of PutStream + + // if the original reader is a readcloser, close it since we're done with it now + if rc, ok := reader.(io.ReadCloser); ok { + if err := rc.Close(); err != nil { + return fmt.Errorf("store: error closing readcloser: %s", err) + } + } + + p.read = true return nil } diff --git a/internal/media/processingmedia.go b/internal/media/processingmedia.go @@ -19,8 +19,10 @@ package media import ( + "bytes" "context" "fmt" + "io" "strings" "sync" "time" @@ -44,22 +46,10 @@ type ProcessingMedia struct { attachment *gtsmodel.MediaAttachment data DataFunc + read bool // bool indicating that data function has been triggered already - rawData []byte // will be set once the fetchRawData function has been called - - /* - below fields represent the processing state of the media thumbnail - */ - - thumbstate processState - thumb *ImageMeta - - /* - below fields represent the processing state of the full-sized media - */ - - fullSizeState processState - fullSize *ImageMeta + thumbstate processState // the processing state of the media thumbnail + fullSizeState processState // the processing state of the full-sized media /* below pointers to database and storage are maintained so that @@ -83,21 +73,22 @@ func (p *ProcessingMedia) AttachmentID() string { // LoadAttachment blocks until the thumbnail and fullsize content // has been processed, and then returns the completed attachment. func (p *ProcessingMedia) LoadAttachment(ctx context.Context) (*gtsmodel.MediaAttachment, error) { - if err := p.fetchRawData(ctx); err != nil { + p.mu.Lock() + defer p.mu.Unlock() + + if err := p.store(ctx); err != nil { return nil, err } - if _, err := p.loadThumb(ctx); err != nil { + if err := p.loadThumb(ctx); err != nil { return nil, err } - if _, err := p.loadFullSize(ctx); err != nil { + if err := p.loadFullSize(ctx); err != nil { return nil, err } // store the result in the database before returning it - p.mu.Lock() - defer p.mu.Unlock() if !p.insertedInDB { if err := p.database.Put(ctx, p.attachment); err != nil { return nil, err @@ -114,10 +105,7 @@ func (p *ProcessingMedia) Finished() bool { return p.thumbstate == complete && p.fullSizeState == complete } -func (p *ProcessingMedia) loadThumb(ctx context.Context) (*ImageMeta, error) { - p.mu.Lock() - defer p.mu.Unlock() - +func (p *ProcessingMedia) loadThumb(ctx context.Context) error { switch p.thumbstate { case received: // we haven't processed a thumbnail for this media yet so do it now @@ -129,87 +117,94 @@ func (p *ProcessingMedia) loadThumb(ctx context.Context) (*ImageMeta, error) { createBlurhash = true } - thumb, err := deriveThumbnail(p.rawData, p.attachment.File.ContentType, createBlurhash) + // stream the original file out of storage... + stored, err := p.storage.GetStream(p.attachment.File.Path) + if err != nil { + p.err = fmt.Errorf("loadThumb: error fetching file from storage: %s", err) + p.thumbstate = errored + return p.err + } + + // ... and into the derive thumbnail function + thumb, err := deriveThumbnail(stored, p.attachment.File.ContentType, createBlurhash) if err != nil { - p.err = fmt.Errorf("error deriving thumbnail: %s", err) + p.err = fmt.Errorf("loadThumb: error deriving thumbnail: %s", err) + p.thumbstate = errored + return p.err + } + + if err := stored.Close(); err != nil { + p.err = fmt.Errorf("loadThumb: error closing stored full size: %s", err) p.thumbstate = errored - return nil, p.err + return p.err } // put the thumbnail in storage - if err := p.storage.Put(p.attachment.Thumbnail.Path, thumb.image); err != nil { - p.err = fmt.Errorf("error storing thumbnail: %s", err) + if err := p.storage.Put(p.attachment.Thumbnail.Path, thumb.small); err != nil { + p.err = fmt.Errorf("loadThumb: error storing thumbnail: %s", err) p.thumbstate = errored - return nil, p.err + return p.err } // set appropriate fields on the attachment based on the thumbnail we derived if createBlurhash { p.attachment.Blurhash = thumb.blurhash } - p.attachment.FileMeta.Small = gtsmodel.Small{ Width: thumb.width, Height: thumb.height, Size: thumb.size, Aspect: thumb.aspect, } - p.attachment.Thumbnail.FileSize = len(thumb.image) - - // set the thumbnail of this media - p.thumb = thumb + p.attachment.Thumbnail.FileSize = len(thumb.small) // we're done processing the thumbnail! p.thumbstate = complete fallthrough case complete: - return p.thumb, nil + return nil case errored: - return nil, p.err + return p.err } - return nil, fmt.Errorf("thumbnail processing status %d unknown", p.thumbstate) + return fmt.Errorf("loadThumb: thumbnail processing status %d unknown", p.thumbstate) } -func (p *ProcessingMedia) loadFullSize(ctx context.Context) (*ImageMeta, error) { - p.mu.Lock() - defer p.mu.Unlock() - +func (p *ProcessingMedia) loadFullSize(ctx context.Context) error { switch p.fullSizeState { case received: - var clean []byte var err error - var decoded *ImageMeta + var decoded *imageMeta + + // stream the original file out of storage... + stored, err := p.storage.GetStream(p.attachment.File.Path) + if err != nil { + p.err = fmt.Errorf("loadFullSize: error fetching file from storage: %s", err) + p.fullSizeState = errored + return p.err + } + // decode the image ct := p.attachment.File.ContentType switch ct { case mimeImageJpeg, mimeImagePng: - // first 'clean' image by purging exif data from it - var exifErr error - if clean, exifErr = purgeExif(p.rawData); exifErr != nil { - err = exifErr - break - } - decoded, err = decodeImage(clean, ct) + decoded, err = decodeImage(stored, ct) case mimeImageGif: - // gifs are already clean - no exif data to remove - clean = p.rawData - decoded, err = decodeGif(clean) + decoded, err = decodeGif(stored) default: - err = fmt.Errorf("content type %s not a processible image type", ct) + err = fmt.Errorf("loadFullSize: content type %s not a processible image type", ct) } if err != nil { p.err = err p.fullSizeState = errored - return nil, err + return p.err } - // put the full size in storage - if err := p.storage.Put(p.attachment.File.Path, decoded.image); err != nil { - p.err = fmt.Errorf("error storing full size image: %s", err) - p.fullSizeState = errored - return nil, p.err + if err := stored.Close(); err != nil { + p.err = fmt.Errorf("loadFullSize: error closing stored full size: %s", err) + p.thumbstate = errored + return p.err } // set appropriate fields on the attachment based on the image we derived @@ -219,56 +214,58 @@ func (p *ProcessingMedia) loadFullSize(ctx context.Context) (*ImageMeta, error) Size: decoded.size, Aspect: decoded.aspect, } - p.attachment.File.FileSize = len(decoded.image) p.attachment.File.UpdatedAt = time.Now() p.attachment.Processing = gtsmodel.ProcessingStatusProcessed - // set the fullsize of this media - p.fullSize = decoded - // we're done processing the full-size image p.fullSizeState = complete fallthrough case complete: - return p.fullSize, nil + return nil case errored: - return nil, p.err + return p.err } - return nil, fmt.Errorf("full size processing status %d unknown", p.fullSizeState) + return fmt.Errorf("loadFullSize: full size processing status %d unknown", p.fullSizeState) } -// fetchRawData calls the data function attached to p if it hasn't been called yet, -// and updates the underlying attachment fields as necessary. -// It should only be called from within a function that already has a lock on p! -func (p *ProcessingMedia) fetchRawData(ctx context.Context) error { +// store calls the data function attached to p if it hasn't been called yet, +// and updates the underlying attachment fields as necessary. It will then stream +// bytes from p's reader directly into storage so that it can be retrieved later. +func (p *ProcessingMedia) store(ctx context.Context) error { // check if we've already done this and bail early if we have - if p.rawData != nil { + if p.read { return nil } - // execute the data function and pin the raw bytes for further processing - b, err := p.data(ctx) + // execute the data function to get the reader out of it + reader, err := p.data(ctx) if err != nil { - return fmt.Errorf("fetchRawData: error executing data function: %s", err) + return fmt.Errorf("store: error executing data function: %s", err) + } + + // extract no more than 261 bytes from the beginning of the file -- this is the header + firstBytes := make([]byte, maxFileHeaderBytes) + if _, err := reader.Read(firstBytes); err != nil { + return fmt.Errorf("store: error reading initial %d bytes: %s", maxFileHeaderBytes, err) } - p.rawData = b - // now we have the data we can work out the content type - contentType, err := parseContentType(p.rawData) + // now we have the file header we can work out the content type from it + contentType, err := parseContentType(firstBytes) if err != nil { - return fmt.Errorf("fetchRawData: error parsing content type: %s", err) + return fmt.Errorf("store: error parsing content type: %s", err) } + // bail if this is a type we can't process if !supportedImage(contentType) { - return fmt.Errorf("fetchRawData: media type %s not (yet) supported", contentType) + return fmt.Errorf("store: media type %s not (yet) supported", contentType) } + // extract the file extension split := strings.Split(contentType, "/") if len(split) != 2 { - return fmt.Errorf("fetchRawData: content type %s was not valid", contentType) + return fmt.Errorf("store: content type %s was not valid", contentType) } - extension := split[1] // something like 'jpeg' // set some additional fields on the attachment now that @@ -282,6 +279,22 @@ func (p *ProcessingMedia) fetchRawData(ctx context.Context) error { p.attachment.File.Path = fmt.Sprintf("%s/%s/%s/%s.%s", p.attachment.AccountID, TypeAttachment, SizeOriginal, p.attachment.ID, extension) p.attachment.File.ContentType = contentType + // concatenate the first bytes with the existing bytes still in the reader (thanks Mara) + multiReader := io.MultiReader(bytes.NewBuffer(firstBytes), reader) + + // store this for now -- other processes can pull it out of storage as they please + if err := p.storage.PutStream(p.attachment.File.Path, multiReader); err != nil { + return fmt.Errorf("store: error storing stream: %s", err) + } + + // if the original reader is a readcloser, close it since we're done with it now + if rc, ok := reader.(io.ReadCloser); ok { + if err := rc.Close(); err != nil { + return fmt.Errorf("store: error closing readcloser: %s", err) + } + } + + p.read = true return nil } diff --git a/internal/media/types.go b/internal/media/types.go @@ -20,6 +20,7 @@ package media import ( "context" + "io" "time" ) @@ -28,7 +29,7 @@ import ( // // See: https://en.wikipedia.org/wiki/File_format#File_header // and https://github.com/h2non/filetype -const maxFileHeaderBytes = 262 +const maxFileHeaderBytes = 261 // mime consts const ( @@ -117,4 +118,4 @@ type AdditionalEmojiInfo struct { } // DataFunc represents a function used to retrieve the raw bytes of a piece of media. -type DataFunc func(ctx context.Context) ([]byte, error) +type DataFunc func(ctx context.Context) (io.Reader, error) diff --git a/internal/media/util.go b/internal/media/util.go @@ -19,7 +19,6 @@ package media import ( - "bytes" "errors" "fmt" @@ -28,11 +27,11 @@ import ( // parseContentType parses the MIME content type from a file, returning it as a string in the form (eg., "image/jpeg"). // Returns an error if the content type is not something we can process. -func parseContentType(content []byte) (string, error) { - // read in the first bytes of the file - fileHeader := make([]byte, maxFileHeaderBytes) - if _, err := bytes.NewReader(content).Read(fileHeader); err != nil { - return "", fmt.Errorf("could not read first magic bytes of file: %s", err) +// +// Fileheader should be no longer than 262 bytes; anything more than this is inefficient. +func parseContentType(fileHeader []byte) (string, error) { + if fhLength := len(fileHeader); fhLength > maxFileHeaderBytes { + return "", fmt.Errorf("parseContentType requires %d bytes max, we got %d", maxFileHeaderBytes, fhLength) } kind, err := filetype.Match(fileHeader) diff --git a/internal/processing/account/update.go b/internal/processing/account/update.go @@ -19,9 +19,7 @@ package account import ( - "bytes" "context" - "errors" "fmt" "io" "mime/multipart" @@ -142,24 +140,8 @@ func (p *processor) UpdateAvatar(ctx context.Context, avatar *multipart.FileHead return nil, fmt.Errorf("UpdateAvatar: avatar with size %d exceeded max image size of %d bytes", avatar.Size, maxImageSize) } - dataFunc := func(ctx context.Context) ([]byte, error) { - // pop open the fileheader - f, err := avatar.Open() - if err != nil { - return nil, fmt.Errorf("UpdateAvatar: could not read provided avatar: %s", err) - } - - // extract the bytes - buf := new(bytes.Buffer) - size, err := io.Copy(buf, f) - if err != nil { - return nil, fmt.Errorf("UpdateAvatar: could not read provided avatar: %s", err) - } - if size == 0 { - return nil, errors.New("UpdateAvatar: could not read provided avatar: size 0 bytes") - } - - return buf.Bytes(), f.Close() + dataFunc := func(ctx context.Context) (io.Reader, error) { + return avatar.Open() } isAvatar := true @@ -184,24 +166,8 @@ func (p *processor) UpdateHeader(ctx context.Context, header *multipart.FileHead return nil, fmt.Errorf("UpdateHeader: header with size %d exceeded max image size of %d bytes", header.Size, maxImageSize) } - dataFunc := func(ctx context.Context) ([]byte, error) { - // pop open the fileheader - f, err := header.Open() - if err != nil { - return nil, fmt.Errorf("UpdateHeader: could not read provided header: %s", err) - } - - // extract the bytes - buf := new(bytes.Buffer) - size, err := io.Copy(buf, f) - if err != nil { - return nil, fmt.Errorf("UpdateHeader: could not read provided header: %s", err) - } - if size == 0 { - return nil, errors.New("UpdateHeader: could not read provided header: size 0 bytes") - } - - return buf.Bytes(), f.Close() + dataFunc := func(ctx context.Context) (io.Reader, error) { + return header.Open() } isHeader := true diff --git a/internal/processing/admin/emoji.go b/internal/processing/admin/emoji.go @@ -19,9 +19,7 @@ package admin import ( - "bytes" "context" - "errors" "fmt" "io" @@ -38,22 +36,8 @@ func (p *processor) EmojiCreate(ctx context.Context, account *gtsmodel.Account, return nil, gtserror.NewErrorNotAuthorized(fmt.Errorf("user %s not an admin", user.ID), "user is not an admin") } - data := func(innerCtx context.Context) ([]byte, error) { - // open the emoji and extract the bytes from it - f, err := form.Image.Open() - if err != nil { - return nil, fmt.Errorf("error opening emoji: %s", err) - } - buf := new(bytes.Buffer) - size, err := io.Copy(buf, f) - if err != nil { - return nil, fmt.Errorf("error reading emoji: %s", err) - } - if size == 0 { - return nil, errors.New("could not read provided emoji: size 0 bytes") - } - - return buf.Bytes(), f.Close() + data := func(innerCtx context.Context) (io.Reader, error) { + return form.Image.Open() } emojiID, err := id.NewRandomULID() diff --git a/internal/processing/media/create.go b/internal/processing/media/create.go @@ -19,9 +19,7 @@ package media import ( - "bytes" "context" - "errors" "fmt" "io" @@ -31,21 +29,8 @@ import ( ) func (p *processor) Create(ctx context.Context, account *gtsmodel.Account, form *apimodel.AttachmentRequest) (*apimodel.Attachment, error) { - data := func(innerCtx context.Context) ([]byte, error) { - // open the attachment and extract the bytes from it - f, err := form.File.Open() - if err != nil { - return nil, fmt.Errorf("error opening attachment: %s", err) - } - buf := new(bytes.Buffer) - size, err := io.Copy(buf, f) - if err != nil { - return nil, fmt.Errorf("error reading attachment: %s", err) - } - if size == 0 { - return nil, errors.New("could not read provided attachment: size 0 bytes") - } - return buf.Bytes(), f.Close() + data := func(innerCtx context.Context) (io.Reader, error) { + return form.File.Open() } focusX, focusY, err := parseFocus(form.Focus) diff --git a/internal/transport/derefmedia.go b/internal/transport/derefmedia.go @@ -21,14 +21,14 @@ package transport import ( "context" "fmt" - "io/ioutil" + "io" "net/http" "net/url" "github.com/sirupsen/logrus" ) -func (t *transport) DereferenceMedia(ctx context.Context, iri *url.URL) ([]byte, error) { +func (t *transport) DereferenceMedia(ctx context.Context, iri *url.URL) (io.ReadCloser, error) { l := logrus.WithField("func", "DereferenceMedia") l.Debugf("performing GET to %s", iri.String()) req, err := http.NewRequestWithContext(ctx, "GET", iri.String(), nil) @@ -50,9 +50,8 @@ func (t *transport) DereferenceMedia(ctx context.Context, iri *url.URL) ([]byte, if err != nil { return nil, err } - defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return nil, fmt.Errorf("GET request to %s failed (%d): %s", iri.String(), resp.StatusCode, resp.Status) } - return ioutil.ReadAll(resp.Body) + return resp.Body, nil } diff --git a/internal/transport/transport.go b/internal/transport/transport.go @@ -21,6 +21,7 @@ package transport import ( "context" "crypto" + "io" "net/url" "sync" @@ -33,8 +34,8 @@ import ( // functionality for fetching remote media. type Transport interface { pub.Transport - // DereferenceMedia fetches the bytes of the given media attachment IRI. - DereferenceMedia(ctx context.Context, iri *url.URL) ([]byte, error) + // DereferenceMedia fetches the given media attachment IRI. + DereferenceMedia(ctx context.Context, iri *url.URL) (io.ReadCloser, error) // DereferenceInstance dereferences remote instance information, first by checking /api/v1/instance, and then by checking /.well-known/nodeinfo. DereferenceInstance(ctx context.Context, iri *url.URL) (*gtsmodel.Instance, error) // Finger performs a webfinger request with the given username and domain, and returns the bytes from the response body. diff --git a/testrig/storage.go b/testrig/storage.go @@ -19,20 +19,16 @@ package testrig import ( - "bytes" - "errors" "fmt" - "io" "os" "codeberg.org/gruf/go-store/kv" "codeberg.org/gruf/go-store/storage" - "codeberg.org/gruf/go-store/util" ) // NewTestStorage returns a new in memory storage with the default test config func NewTestStorage() *kv.KVStore { - storage, err := kv.OpenStorage(&inMemStorage{storage: map[string][]byte{}, overwrite: false}) + storage, err := kv.OpenStorage(storage.OpenMemory(200, false)) if err != nil { panic(err) } @@ -113,79 +109,3 @@ func StandardStorageTeardown(s *kv.KVStore) { } } } - -type inMemStorage struct { - storage map[string][]byte - overwrite bool -} - -func (s *inMemStorage) Clean() error { - return nil -} - -func (s *inMemStorage) ReadBytes(key string) ([]byte, error) { - b, ok := s.storage[key] - if !ok { - return nil, errors.New("key not found") - } - return b, nil -} - -func (s *inMemStorage) ReadStream(key string) (io.ReadCloser, error) { - b, err := s.ReadBytes(key) - if err != nil { - return nil, err - } - return util.NopReadCloser(bytes.NewReader(b)), nil -} - -func (s *inMemStorage) WriteBytes(key string, value []byte) error { - if _, ok := s.storage[key]; ok && !s.overwrite { - return errors.New("key already in storage") - } - s.storage[key] = copyBytes(value) - return nil -} - -func (s *inMemStorage) WriteStream(key string, r io.Reader) error { - b, err := io.ReadAll(r) - if err != nil { - return err - } - return s.WriteBytes(key, b) -} - -func (s *inMemStorage) Stat(key string) (bool, error) { - _, ok := s.storage[key] - return ok, nil -} - -func (s *inMemStorage) Remove(key string) error { - if _, ok := s.storage[key]; !ok { - return errors.New("key not found") - } - delete(s.storage, key) - return nil -} - -func (s *inMemStorage) WalkKeys(opts storage.WalkKeysOptions) error { - if opts.WalkFn == nil { - return errors.New("invalid walkfn") - } - for key := range s.storage { - opts.WalkFn(entry(key)) - } - return nil -} - -type entry string - -func (e entry) Key() string { - return string(e) -} - -func copyBytes(b []byte) []byte { - p := make([]byte, len(b)) - copy(p, b) - return p -}