iOS images unpremult SIMD support

Support for SIMD instructions for unpremult

First checkin, using rive::int16x4 instructions : 1 pixel at a time
Further checkin, using rive::int16x4 instructions : 2 pixels at a time
Last checkin, avoid computation when opaque pixels (assume there will be enough opaque pixels to warrant this)

Thanks to Chris for the SIMD instructions usage in rive

More checkins: move the decode and unpremult to the rive decoder - this requires modifications to build files. The benefits are we are now running tests on this path. However, there are some issues with decoding two images for tests:

"../../test/assets/bad.jpg" ... Apple Preview app cannot open this image, however, the current test says that it should be not null
And
"../../test/assets/bad.png", Apple Preview app can load this images, however, the current test says that it should be null

Diffs=
e992059d6 iOS images unpremult SIMD support (#7875)

Co-authored-by: rivessamr <suki@rive.app>
This commit is contained in:
rivessamr
2024-08-21 18:29:40 +00:00
parent f0647c10a1
commit a8b7f00bed
12 changed files with 280 additions and 77 deletions

View File

@@ -1 +1 @@
ad34dd4dae54aa071ca80c457e375c015b9497a8
e992059d6354434e91cde562e463f51bff7eac58

View File

@@ -13,7 +13,7 @@ do
includedirs({ 'include', '../include', libpng, libjpeg })
files({ 'src/**.cpp' })
files({ 'src/bitmap_decoder.cpp' })
filter({ 'options:not no-libjpeg-renames' })
do
@@ -22,4 +22,18 @@ do
})
forceincludes({ 'rive_libjpeg_renames.h' })
end
filter({ 'system:macosx or system:ios' })
do
files({ 'src/**.mm' })
end
filter({ 'system:not macosx', 'system:not ios' })
do
files({
'src/bitmap_decoder_thirdparty.cpp',
'src/decode_jpeg.cpp',
'src/decode_png.cpp',
})
end
end

View File

@@ -38,66 +38,6 @@ size_t Bitmap::byteSize(PixelFormat format) const
size_t Bitmap::byteSize() const { return byteSize(m_PixelFormat); }
std::unique_ptr<Bitmap> DecodePng(const uint8_t bytes[], size_t byteCount);
std::unique_ptr<Bitmap> DecodeJpeg(const uint8_t bytes[], size_t byteCount);
std::unique_ptr<Bitmap> DecodeWebP(const uint8_t bytes[], size_t byteCount) { return nullptr; }
using BitmapDecoder = std::unique_ptr<Bitmap> (*)(const uint8_t bytes[], size_t byteCount);
struct ImageFormat
{
const char* name;
std::vector<uint8_t> fingerprint;
BitmapDecoder decodeImage;
};
std::unique_ptr<Bitmap> Bitmap::decode(const uint8_t bytes[], size_t byteCount)
{
static ImageFormat decoders[] = {
{
"png",
{0x89, 0x50, 0x4E, 0x47},
DecodePng,
},
{
"jpeg",
{0xFF, 0xD8, 0xFF},
DecodeJpeg,
},
{
"webp",
{0x52, 0x49, 0x46},
DecodeWebP,
},
};
for (auto recognizer : decoders)
{
auto& fingerprint = recognizer.fingerprint;
// Immediately discard decoders with fingerprints that are longer than
// the file buffer.
if (recognizer.fingerprint.size() > byteCount)
{
continue;
}
// If the fingerprint doesn't match, discrd this decoder. These are all bytes so .size() is
// fine here.
if (memcmp(fingerprint.data(), bytes, fingerprint.size()) != 0)
{
continue;
}
auto bitmap = recognizer.decodeImage(bytes, byteCount);
if (!bitmap)
{
fprintf(stderr, "Bitmap::decode - failed to decode a %s.\n", recognizer.name);
}
return bitmap;
}
return nullptr;
}
void Bitmap::pixelFormat(PixelFormat format)
{
if (format == m_PixelFormat)

View File

@@ -0,0 +1,165 @@
/*
* Copyright 2023 Rive
*/
#include "rive/decoders/bitmap_decoder.hpp"
#include "rive/rive_types.hpp"
#include "rive/math/simd.hpp"
#include "rive/math/math_types.hpp"
#include "rive/core/type_conversions.hpp"
#include "utils/auto_cf.hpp"
#include <TargetConditionals.h>
#if TARGET_OS_IPHONE
#include <CoreGraphics/CoreGraphics.h>
#include <ImageIO/ImageIO.h>
#elif TARGET_OS_MAC
#include <ApplicationServices/ApplicationServices.h>
#endif
#include <stdio.h>
#include <string.h>
#include <vector>
// Represents raw, premultiplied, RGBA image data with tightly packed rows (width * 4 bytes).
struct PlatformCGImage
{
uint32_t width = 0;
uint32_t height = 0;
bool opaque = false;
std::unique_ptr<uint8_t[]> pixels;
};
bool cg_image_decode(const uint8_t* encodedBytes,
size_t encodedSizeInBytes,
PlatformCGImage* platformImage)
{
AutoCF data = CFDataCreate(kCFAllocatorDefault, encodedBytes, encodedSizeInBytes);
if (!data)
{
return false;
}
AutoCF source = CGImageSourceCreateWithData(data, nullptr);
if (!source)
{
return false;
}
AutoCF image = CGImageSourceCreateImageAtIndex(source, 0, nullptr);
if (!image)
{
return false;
}
bool isOpaque = false;
switch (CGImageGetAlphaInfo(image.get()))
{
case kCGImageAlphaNone:
case kCGImageAlphaNoneSkipFirst:
case kCGImageAlphaNoneSkipLast:
isOpaque = true;
break;
default:
break;
}
const size_t width = CGImageGetWidth(image);
const size_t height = CGImageGetHeight(image);
const size_t rowBytes = width * 4; // 4 bytes per pixel
const size_t size = rowBytes * height;
const size_t bitsPerComponent = 8;
CGBitmapInfo cgInfo = kCGBitmapByteOrder32Big; // rgba
if (isOpaque)
{
cgInfo |= kCGImageAlphaNoneSkipLast;
}
else
{
cgInfo |= kCGImageAlphaPremultipliedLast;
}
std::unique_ptr<uint8_t[]> pixels(new uint8_t[size]);
AutoCF cs = CGColorSpaceCreateDeviceRGB();
AutoCF cg =
CGBitmapContextCreate(pixels.get(), width, height, bitsPerComponent, rowBytes, cs, cgInfo);
if (!cg)
{
return false;
}
CGContextSetBlendMode(cg, kCGBlendModeCopy);
CGContextDrawImage(cg, CGRectMake(0, 0, width, height), image);
platformImage->width = rive::castTo<uint32_t>(width);
platformImage->height = rive::castTo<uint32_t>(height);
platformImage->opaque = isOpaque;
platformImage->pixels = std::move(pixels);
return true;
}
std::unique_ptr<Bitmap> Bitmap::decode(const uint8_t bytes[], size_t byteCount)
{
PlatformCGImage image;
if (!cg_image_decode(bytes, byteCount, &image))
{
return nullptr;
}
// CG only supports premultiplied alpha. Unmultiply now.
size_t imageNumPixels = image.height * image.width;
size_t imageSizeInBytes = imageNumPixels * 4;
// Process 2 pixels at once, deal with odd number of pixels
if (imageNumPixels & 1)
{
imageSizeInBytes -= 4;
}
size_t i;
for (i = 0; i < imageSizeInBytes; i += 8)
{
// Load 2 pixels into 64 bits
auto twoPixels = rive::simd::load<uint8_t, 8>(&image.pixels[i]);
auto a0 = twoPixels[3];
auto a1 = twoPixels[7];
// Avoid computation if both pixels are either fully transparent or opaque pixels
if ((a0 > 0 && a0 < 255) || (a1 > 0 && a1 < 255))
{
// Avoid potential division by zero
a0 = std::max<uint8_t>(a0, 1);
a1 = std::max<uint8_t>(a1, 1);
// Cast to 16 bits to avoid overflow
rive::uint16x8 rgbaWidex2 = rive::simd::cast<uint16_t>(twoPixels);
// Unpremult: multiply by RGB by "255.0 / alpha"
rgbaWidex2 *= rive::uint16x8{255, 255, 255, 1, 255, 255, 255, 1};
rgbaWidex2 /= rive::uint16x8{a0, a0, a0, 1, a1, a1, a1, 1};
// Cast back to 8 bits and store
twoPixels = rive::simd::cast<uint8_t>(rgbaWidex2);
rive::simd::store(&image.pixels[i], twoPixels);
}
}
// Process last odd pixel if needed
if (imageNumPixels & 1)
{
// Load 1 pixel into 32 bits
auto rgba = rive::simd::load<uint8_t, 4>(&image.pixels[i]);
// Avoid computation for fully transparent or opaque pixels
if (rgba.a > 0 && rgba.a < 255)
{
// Cast to 16 bits to avoid overflow
rive::uint16x4 rgbaWide = rive::simd::cast<uint16_t>(rgba);
// Unpremult: multiply by RGB by "255.0 / alpha"
rgbaWide *= rive::uint16x4{255, 255, 255, 1};
rgbaWide /= rive::uint16x4{rgba.a, rgba.a, rgba.a, 1};
// Cast back to 8 bits and store
rgba = rive::simd::cast<uint8_t>(rgbaWide);
rive::simd::store(&image.pixels[i], rgba);
}
}
return std::make_unique<Bitmap>(
image.width, image.height, PixelFormat::RGBA, std::move(image.pixels));
}

View File

@@ -0,0 +1,69 @@
/*
* Copyright 2023 Rive
*/
#include "rive/decoders/bitmap_decoder.hpp"
#include "rive/rive_types.hpp"
#include <stdio.h>
#include <string.h>
#include <vector>
std::unique_ptr<Bitmap> DecodePng(const uint8_t bytes[], size_t byteCount);
std::unique_ptr<Bitmap> DecodeJpeg(const uint8_t bytes[], size_t byteCount);
std::unique_ptr<Bitmap> DecodeWebP(const uint8_t bytes[], size_t byteCount) { return nullptr; }
using BitmapDecoder = std::unique_ptr<Bitmap> (*)(const uint8_t bytes[], size_t byteCount);
struct ImageFormat
{
const char* name;
std::vector<uint8_t> fingerprint;
BitmapDecoder decodeImage;
};
std::unique_ptr<Bitmap> Bitmap::decode(const uint8_t bytes[], size_t byteCount)
{
static ImageFormat decoders[] = {
{
"png",
{0x89, 0x50, 0x4E, 0x47},
DecodePng,
},
{
"jpeg",
{0xFF, 0xD8, 0xFF},
DecodeJpeg,
},
{
"webp",
{0x52, 0x49, 0x46},
DecodeWebP,
},
};
for (auto recognizer : decoders)
{
auto& fingerprint = recognizer.fingerprint;
// Immediately discard decoders with fingerprints that are longer than
// the file buffer.
if (recognizer.fingerprint.size() > byteCount)
{
continue;
}
// If the fingerprint doesn't match, discrd this decoder. These are all bytes so .size() is
// fine here.
if (memcmp(fingerprint.data(), bytes, fingerprint.size()) != 0)
{
continue;
}
auto bitmap = recognizer.decodeImage(bytes, byteCount);
if (!bitmap)
{
fprintf(stderr, "Bitmap::decode - failed to decode a %s.\n", recognizer.name);
}
return bitmap;
}
return nullptr;
}

View File

@@ -352,7 +352,7 @@ do
filter('system:macosx or system:ios')
do
defines({'HAVE_CORETEXT'})
files({harfbuzz .. '/src/hb-coretext.cc'})
defines({ 'HAVE_CORETEXT' })
files({ harfbuzz .. '/src/hb-coretext.cc' })
end
end

View File

@@ -277,7 +277,7 @@ do
filter('system:macosx or system:ios')
do
defines({'HAVE_CORETEXT'})
files({harfbuzz .. '/src/hb-coretext.cc'})
defines({ 'HAVE_CORETEXT' })
files({ harfbuzz .. '/src/hb-coretext.cc' })
end
end

View File

@@ -15,7 +15,7 @@ newoption({
project('libjpeg')
do
kind('StaticLib')
optimize("Speed") -- Always optimize image encoding/decoding, even in debug builds.
optimize('Speed') -- Always optimize image encoding/decoding, even in debug builds.
includedirs({ libjpeg })

View File

@@ -17,7 +17,7 @@ do
includedirs({ yoga })
files({
files({
yoga .. '/yoga/Utils.cpp',
yoga .. '/yoga/YGConfig.cpp',
yoga .. '/yoga/YGLayout.cpp',

View File

@@ -63,12 +63,13 @@ do
forceincludes({ 'rive_yoga_renames.h' })
end
filter({ 'system:macosx'} )
filter({ 'system:macosx' })
do
links({
'Foundation.framework',
'ImageIO.framework',
'CoreGraphics.framework',
'CoreText.framework'
'CoreText.framework',
})
end
end

View File

@@ -71,13 +71,13 @@ defines({ 'NDEBUG' })
optimize('On')
filter({ 'options:with_rive_layout' })
do
defines({ 'YOGA_EXPORT=' })
includedirs({ yoga })
links({
'rive_yoga',
})
end
do
defines({ 'YOGA_EXPORT=' })
includedirs({ yoga })
links({
'rive_yoga',
})
end
-- Clean Function --
newaction({

View File

@@ -28,6 +28,9 @@ TEST_CASE("jpeg file decodes correctly", "[image-decoder]")
REQUIRE(bitmap->height() == 200);
}
#ifndef __APPLE__
// Loading this particular jpeg image in CG causes a memory leak CGImageSourceCreateImageAtIndex
// calls IIOReadPlugin::createInfoPtr which leaks
TEST_CASE("bad jpeg file doesn't cause an overflow", "[image-decoder]")
{
auto file = ReadFile("../../test/assets/bad.jpg");
@@ -40,6 +43,7 @@ TEST_CASE("bad jpeg file doesn't cause an overflow", "[image-decoder]")
REQUIRE(bitmap->width() == 24566);
REQUIRE(bitmap->height() == 58278);
}
#endif
TEST_CASE("bad png file doesn't cause an overflow", "[image-decoder]")
{
@@ -48,5 +52,15 @@ TEST_CASE("bad png file doesn't cause an overflow", "[image-decoder]")
auto bitmap = Bitmap::decode(file.data(), file.size());
#ifdef __APPLE__
// Loading this bad PNG file in CG actually works and we do get an image albiet black
REQUIRE(bitmap != nullptr);
REQUIRE(bitmap->width() == 58278);
REQUIRE(bitmap->height() == 24566);
#else
// Our decoders return null as we have an invalid header with bogus resolution and we want to
// avoid a potential attack vector
REQUIRE(bitmap == nullptr);
#endif
}