gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

commit eeb78bd141ea69a59c4361fac2cfe6fc36df59b3
parent d5fe729cf43ce73ff7475323cbae121cb363e14a
Author: tobi <31960611+tsmethurst@users.noreply.github.com>
Date:   Mon, 25 Apr 2022 14:45:44 +0200

[bugfix] Allow processing of .png files where checksum is not correct (#487)

* add png stripping code from google/wuffs

* experiment with stripping data from pngs

* add test images

* use StrippedPngDecode for pngs

* add StrippedPngDecode func

* update tests for (no)alphachannel pngs

* nolint on copied function
Diffstat:
MREADME.md | 1+
Minternal/media/image.go | 6+++---
Minternal/media/manager_test.go | 144+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainternal/media/png-stripper.go | 204+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainternal/media/test/test-png-alphachannel-processed.png | 0
Ainternal/media/test/test-png-alphachannel-thumbnail.jpg | 0
Ainternal/media/test/test-png-noalphachannel-processed.png | 0
Ainternal/media/test/test-png-noalphachannel-thumbnail.jpg | 0
8 files changed, 352 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md @@ -196,6 +196,7 @@ The following libraries and frameworks are used by GoToSocial, with gratitude - [gin-contrib/static](https://github.com/gin-contrib/static); Gin static page middleware. [MIT License](https://spdx.org/licenses/MIT.html) - [go-fed/httpsig](https://github.com/go-fed/httpsig); secure HTTP signature library. [BSD-3-Clause License](https://spdx.org/licenses/BSD-3-Clause.html). - [google/uuid](https://github.com/google/uuid); UUID generation. [BSD-3-Clause License](https://spdx.org/licenses/BSD-3-Clause.html) +- [google/wuffs](https://github.com/google/wuffs); png-stripping code. [Apache-2.0 License](https://spdx.org/licenses/Apache-2.0.html). - [go-playground/validator](https://github.com/go-playground/validator); struct validation. [MIT License](https://spdx.org/licenses/MIT.html) - [gorilla/websocket](https://github.com/gorilla/websocket); Websocket connectivity. [BSD-2-Clause License](https://spdx.org/licenses/BSD-2-Clause.html). - [gruf/go-runners](https://codeberg.org/gruf/go-runners); worker pool library. [MIT License](https://spdx.org/licenses/MIT.html). diff --git a/internal/media/image.go b/internal/media/image.go @@ -74,7 +74,7 @@ func decodeImage(r io.Reader, contentType string) (*imageMeta, error) { case mimeImageJpeg: i, err = jpeg.Decode(r) case mimeImagePng: - i, err = png.Decode(r) + i, err = StrippedPngDecode(r) default: err = fmt.Errorf("content type %s not recognised", contentType) } @@ -117,7 +117,7 @@ func deriveThumbnail(r io.Reader, contentType string, createBlurhash bool) (*ima case mimeImageJpeg: i, err = jpeg.Decode(r) case mimeImagePng: - i, err = png.Decode(r) + i, err = StrippedPngDecode(r) case mimeImageGif: i, err = gif.Decode(r) default: @@ -175,7 +175,7 @@ func deriveStaticEmoji(r io.Reader, contentType string) (*imageMeta, error) { switch contentType { case mimeImagePng: - i, err = png.Decode(r) + i, err = StrippedPngDecode(r) if err != nil { return nil, err } diff --git a/internal/media/manager_test.go b/internal/media/manager_test.go @@ -111,6 +111,150 @@ func (suite *ManagerTestSuite) TestSimpleJpegProcessBlocking() { suite.Equal(processedThumbnailBytesExpected, processedThumbnailBytes) } +func (suite *ManagerTestSuite) TestPngNoAlphaChannelProcessBlocking() { + ctx := context.Background() + + data := func(_ context.Context) (io.Reader, int, error) { + // load bytes from a test image + b, err := os.ReadFile("./test/test-png-noalphachannel.png") + if err != nil { + panic(err) + } + return bytes.NewBuffer(b), len(b), nil + } + + accountID := "01FS1X72SK9ZPW0J1QQ68BD264" + + // process the media with no additional info provided + processingMedia, err := suite.manager.ProcessMedia(ctx, data, nil, accountID, nil) + suite.NoError(err) + // fetch the attachment id from the processing media + attachmentID := processingMedia.AttachmentID() + + // do a blocking call to fetch the attachment + attachment, err := processingMedia.LoadAttachment(ctx) + suite.NoError(err) + suite.NotNil(attachment) + + // make sure it's got the stuff set on it that we expect + // the attachment ID and accountID we expect + suite.Equal(attachmentID, attachment.ID) + suite.Equal(accountID, attachment.AccountID) + + // file meta should be correctly derived from the image + suite.EqualValues(gtsmodel.Original{ + Width: 186, Height: 187, Size: 34782, Aspect: 0.9946524064171123, + }, attachment.FileMeta.Original) + suite.EqualValues(gtsmodel.Small{ + Width: 186, Height: 187, Size: 34782, Aspect: 0.9946524064171123, + }, attachment.FileMeta.Small) + suite.Equal("image/png", attachment.File.ContentType) + suite.Equal("image/jpeg", attachment.Thumbnail.ContentType) + suite.Equal(17471, attachment.File.FileSize) + suite.Equal("LFP?{^.A-?xd.9o#RVRQ~oj:_0xW", attachment.Blurhash) + + // now make sure the attachment is in the database + dbAttachment, err := suite.db.GetAttachmentByID(ctx, attachmentID) + suite.NoError(err) + suite.NotNil(dbAttachment) + + // make sure the processed file is in storage + processedFullBytes, err := suite.storage.Get(attachment.File.Path) + suite.NoError(err) + suite.NotEmpty(processedFullBytes) + + // load the processed bytes from our test folder, to compare + processedFullBytesExpected, err := os.ReadFile("./test/test-png-noalphachannel-processed.png") + suite.NoError(err) + suite.NotEmpty(processedFullBytesExpected) + + // the bytes in storage should be what we expected + suite.Equal(processedFullBytesExpected, processedFullBytes) + + // now do the same for the thumbnail and make sure it's what we expected + processedThumbnailBytes, err := suite.storage.Get(attachment.Thumbnail.Path) + suite.NoError(err) + suite.NotEmpty(processedThumbnailBytes) + + processedThumbnailBytesExpected, err := os.ReadFile("./test/test-png-noalphachannel-thumbnail.jpg") + suite.NoError(err) + suite.NotEmpty(processedThumbnailBytesExpected) + + suite.Equal(processedThumbnailBytesExpected, processedThumbnailBytes) +} + +func (suite *ManagerTestSuite) TestPngAlphaChannelProcessBlocking() { + ctx := context.Background() + + data := func(_ context.Context) (io.Reader, int, error) { + // load bytes from a test image + b, err := os.ReadFile("./test/test-png-alphachannel.png") + if err != nil { + panic(err) + } + return bytes.NewBuffer(b), len(b), nil + } + + accountID := "01FS1X72SK9ZPW0J1QQ68BD264" + + // process the media with no additional info provided + processingMedia, err := suite.manager.ProcessMedia(ctx, data, nil, accountID, nil) + suite.NoError(err) + // fetch the attachment id from the processing media + attachmentID := processingMedia.AttachmentID() + + // do a blocking call to fetch the attachment + attachment, err := processingMedia.LoadAttachment(ctx) + suite.NoError(err) + suite.NotNil(attachment) + + // make sure it's got the stuff set on it that we expect + // the attachment ID and accountID we expect + suite.Equal(attachmentID, attachment.ID) + suite.Equal(accountID, attachment.AccountID) + + // file meta should be correctly derived from the image + suite.EqualValues(gtsmodel.Original{ + Width: 186, Height: 187, Size: 34782, Aspect: 0.9946524064171123, + }, attachment.FileMeta.Original) + suite.EqualValues(gtsmodel.Small{ + Width: 186, Height: 187, Size: 34782, Aspect: 0.9946524064171123, + }, attachment.FileMeta.Small) + suite.Equal("image/png", attachment.File.ContentType) + suite.Equal("image/jpeg", attachment.Thumbnail.ContentType) + suite.Equal(18904, attachment.File.FileSize) + suite.Equal("LFP?{^.A-?xd.9o#RVRQ~oj:_0xW", attachment.Blurhash) + + // now make sure the attachment is in the database + dbAttachment, err := suite.db.GetAttachmentByID(ctx, attachmentID) + suite.NoError(err) + suite.NotNil(dbAttachment) + + // make sure the processed file is in storage + processedFullBytes, err := suite.storage.Get(attachment.File.Path) + suite.NoError(err) + suite.NotEmpty(processedFullBytes) + + // load the processed bytes from our test folder, to compare + processedFullBytesExpected, err := os.ReadFile("./test/test-png-alphachannel-processed.png") + suite.NoError(err) + suite.NotEmpty(processedFullBytesExpected) + + // the bytes in storage should be what we expected + suite.Equal(processedFullBytesExpected, processedFullBytes) + + // now do the same for the thumbnail and make sure it's what we expected + processedThumbnailBytes, err := suite.storage.Get(attachment.Thumbnail.Path) + suite.NoError(err) + suite.NotEmpty(processedThumbnailBytes) + + processedThumbnailBytesExpected, err := os.ReadFile("./test/test-png-alphachannel-thumbnail.jpg") + suite.NoError(err) + suite.NotEmpty(processedThumbnailBytesExpected) + + suite.Equal(processedThumbnailBytesExpected, processedThumbnailBytes) +} + func (suite *ManagerTestSuite) TestSimpleJpegProcessBlockingWithCallback() { ctx := context.Background() diff --git a/internal/media/png-stripper.go b/internal/media/png-stripper.go @@ -0,0 +1,204 @@ +/* + GoToSocial + Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +package media + +/* + The code in this file is taken from the following source: + https://github.com/google/wuffs/blob/414a011491ff513b86d8694c5d71800f3cb5a715/script/strip-png-ancillary-chunks.go + + It presents a workaround for this issue: https://github.com/golang/go/issues/43382 + + The license for the copied code is reproduced below: + + Copyright 2021 The Wuffs Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +// strip-png-ancillary-chunks.go copies PNG data from stdin to stdout, removing +// any ancillary chunks. +// +// Specification-compliant PNG decoders are required to honor critical chunks +// but may ignore ancillary (non-critical) chunks. Stripping out ancillary +// chunks before decoding should mean that different PNG decoders will agree on +// the decoded output regardless of which ancillary chunk types they choose to +// honor. Specifically, some PNG decoders may implement color and gamma +// correction but not all do. +// +// This program will strip out all ancillary chunks, but it should be +// straightforward to copy-paste-and-modify it to strip out only certain chunk +// types (e.g. only "tRNS" transparency chunks). +// +// -------- +// +// A PNG file consists of an 8-byte magic identifier and then a series of +// chunks. Each chunk is: +// +// - a 4-byte uint32 payload length N. +// - a 4-byte chunk type (e.g. "gAMA" for gamma correction metadata). +// - an N-byte payload. +// - a 4-byte CRC-32 checksum of the previous (N + 4) bytes, including the +// chunk type but excluding the payload length. +// +// Chunk types consist of 4 ASCII letters. The upper-case / lower-case bit of +// the first letter denote critical or ancillary chunks: "IDAT" and "PLTE" are +// critical, "gAMA" and "tEXt" are ancillary. See +// https://www.w3.org/TR/2003/REC-PNG-20031110/#5Chunk-naming-conventions +// +// -------- + +import ( + "encoding/binary" + "image" + "image/png" + "io" +) + +// chunkTypeAncillaryBit is whether the first byte of a big-endian uint32 chunk +// type (the first of four ASCII letters) is lower-case. +const chunkTypeAncillaryBit = 0x20000000 + +// PNGAncillaryChunkStripper wraps another io.Reader to strip ancillary chunks, +// if the data is in the PNG file format. If the data isn't PNG, it is passed +// through unmodified. +type PNGAncillaryChunkStripper struct { + // Reader is the wrapped io.Reader. + Reader io.Reader + + // stickyErr is the first error returned from the wrapped io.Reader. + stickyErr error + + // buffer[rIndex:wIndex] holds data read from the wrapped io.Reader that + // wasn't passed through yet. + buffer [8]byte + rIndex int + wIndex int + + // pending and discard is the number of remaining bytes for (and whether to + // discard or pass through) the current chunk-in-progress. + pending int64 + discard bool + + // notPNG is set true if the data stream doesn't start with the 8-byte PNG + // magic identifier. If true, the wrapped io.Reader's data (including the + // first up-to-8 bytes) is passed through without modification. + notPNG bool + + // seenMagic is whether we've seen the 8-byte PNG magic identifier. + seenMagic bool +} + +// Read implements io.Reader. +func (r *PNGAncillaryChunkStripper) Read(p []byte) (int, error) { + for { + // If the wrapped io.Reader returned a non-nil error, drain r.buffer + // (what data we have) and return that error (if fully drained). + if r.stickyErr != nil { + n := copy(p, r.buffer[r.rIndex:r.wIndex]) + r.rIndex += n + if r.rIndex < r.wIndex { + return n, nil + } + return n, r.stickyErr + } + + // Handle trivial requests, including draining our buffer. + if len(p) == 0 { + return 0, nil + } else if r.rIndex < r.wIndex { + n := copy(p, r.buffer[r.rIndex:r.wIndex]) + r.rIndex += n + return n, nil + } + + // From here onwards, our buffer is drained: r.rIndex == r.wIndex. + + // Handle non-PNG input. + if r.notPNG { + return r.Reader.Read(p) + } + + // Continue processing any PNG chunk that's in progress, whether + // discarding it or passing it through. + for r.pending > 0 { + if int64(len(p)) > r.pending { + p = p[:r.pending] + } + n, err := r.Reader.Read(p) + r.pending -= int64(n) + r.stickyErr = err + if r.discard { + continue + } + return n, err + } + + // We're either expecting the 8-byte PNG magic identifier or the 4-byte + // PNG chunk length + 4-byte PNG chunk type. Either way, read 8 bytes. + r.rIndex = 0 + r.wIndex, r.stickyErr = io.ReadFull(r.Reader, r.buffer[:8]) + if r.stickyErr != nil { + // Undo io.ReadFull converting io.EOF to io.ErrUnexpectedEOF. + if r.stickyErr == io.ErrUnexpectedEOF { + r.stickyErr = io.EOF + } + continue + } + + // Process those 8 bytes, either: + // - a PNG chunk (if we've already seen the PNG magic identifier), + // - the PNG magic identifier itself (if the input is a PNG) or + // - something else (if it's not a PNG). + //nolint:gocritic + if r.seenMagic { + // The number of pending bytes is equal to (N + 4) because of the 4 + // byte trailer, a checksum. + r.pending = int64(binary.BigEndian.Uint32(r.buffer[:4])) + 4 + chunkType := binary.BigEndian.Uint32(r.buffer[4:]) + r.discard = (chunkType & chunkTypeAncillaryBit) != 0 + if r.discard { + r.rIndex = r.wIndex + } + } else if string(r.buffer[:8]) == "\x89PNG\x0D\x0A\x1A\x0A" { + r.seenMagic = true + } else { + r.notPNG = true + } + } +} + +// StrippedPngDecode strips ancillary data from png to allow more lenient decoding of pngs +// see: https://github.com/golang/go/issues/43382 +// and: https://github.com/google/wuffs/blob/414a011491ff513b86d8694c5d71800f3cb5a715/script/strip-png-ancillary-chunks.go +func StrippedPngDecode(r io.Reader) (image.Image, error) { + strippedPngReader := io.Reader(&PNGAncillaryChunkStripper{ + Reader: r, + }) + return png.Decode(strippedPngReader) +} diff --git a/internal/media/test/test-png-alphachannel-processed.png b/internal/media/test/test-png-alphachannel-processed.png Binary files differ. diff --git a/internal/media/test/test-png-alphachannel-thumbnail.jpg b/internal/media/test/test-png-alphachannel-thumbnail.jpg Binary files differ. diff --git a/internal/media/test/test-png-noalphachannel-processed.png b/internal/media/test/test-png-noalphachannel-processed.png Binary files differ. diff --git a/internal/media/test/test-png-noalphachannel-thumbnail.jpg b/internal/media/test/test-png-noalphachannel-thumbnail.jpg Binary files differ.