mirror of
https://github.com/libjpeg-turbo/libjpeg-turbo.git
synced 2026-01-18 13:31:21 +01:00
Fix issues with Windows Arm64EC builds
Arm64EC basically wraps native Arm64 functions with an emulated Windows/x64 ABI, which can improve performance for Windows/x64 applications running under the x64 emulator on Windows/Arm. When building for Arm64EC, the compiler defines _M_X64 and _M_ARM64EC but not _M_ARM64.
This commit is contained in:
@@ -20,6 +20,11 @@ used. Thus, this issue did not cause a segfault or other user-visible errant
|
||||
behavior (it was only detectable with ASan), and it did not likely pose a
|
||||
security risk.
|
||||
|
||||
4. The AArch64 (Arm 64-bit) Neon SIMD extensions and accelerated Huffman codec
|
||||
now support the Arm64EC ABI on Windows, which allows Windows/x64 applications
|
||||
to call native Arm64 functions when running under the Windows/x64 emulator on
|
||||
Windows/Arm.
|
||||
|
||||
|
||||
3.1.1
|
||||
=====
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
* jccolor-neon.c - colorspace conversion (Arm Neon)
|
||||
*
|
||||
* Copyright (C) 2020, Arm Limited. All Rights Reserved.
|
||||
* Copyright (C) 2020, 2024, D. R. Commander. All Rights Reserved.
|
||||
* Copyright (C) 2020, 2024-2025, D. R. Commander. All Rights Reserved.
|
||||
*
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty. In no event will the authors be held liable for any damages
|
||||
@@ -53,7 +53,7 @@ ALIGN(16) static const uint16_t jsimd_rgb_ycc_neon_consts[] = {
|
||||
|
||||
/* Include inline routines for colorspace extensions. */
|
||||
|
||||
#if defined(__aarch64__) || defined(_M_ARM64)
|
||||
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
|
||||
#include "aarch64/jccolext-neon.c"
|
||||
#else
|
||||
#include "aarch32/jccolext-neon.c"
|
||||
@@ -68,7 +68,7 @@ ALIGN(16) static const uint16_t jsimd_rgb_ycc_neon_consts[] = {
|
||||
#define RGB_BLUE EXT_RGB_BLUE
|
||||
#define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
|
||||
#define jsimd_rgb_ycc_convert_neon jsimd_extrgb_ycc_convert_neon
|
||||
#if defined(__aarch64__) || defined(_M_ARM64)
|
||||
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
|
||||
#include "aarch64/jccolext-neon.c"
|
||||
#else
|
||||
#include "aarch32/jccolext-neon.c"
|
||||
@@ -84,7 +84,7 @@ ALIGN(16) static const uint16_t jsimd_rgb_ycc_neon_consts[] = {
|
||||
#define RGB_BLUE EXT_RGBX_BLUE
|
||||
#define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
|
||||
#define jsimd_rgb_ycc_convert_neon jsimd_extrgbx_ycc_convert_neon
|
||||
#if defined(__aarch64__) || defined(_M_ARM64)
|
||||
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
|
||||
#include "aarch64/jccolext-neon.c"
|
||||
#else
|
||||
#include "aarch32/jccolext-neon.c"
|
||||
@@ -100,7 +100,7 @@ ALIGN(16) static const uint16_t jsimd_rgb_ycc_neon_consts[] = {
|
||||
#define RGB_BLUE EXT_BGR_BLUE
|
||||
#define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
|
||||
#define jsimd_rgb_ycc_convert_neon jsimd_extbgr_ycc_convert_neon
|
||||
#if defined(__aarch64__) || defined(_M_ARM64)
|
||||
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
|
||||
#include "aarch64/jccolext-neon.c"
|
||||
#else
|
||||
#include "aarch32/jccolext-neon.c"
|
||||
@@ -116,7 +116,7 @@ ALIGN(16) static const uint16_t jsimd_rgb_ycc_neon_consts[] = {
|
||||
#define RGB_BLUE EXT_BGRX_BLUE
|
||||
#define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
|
||||
#define jsimd_rgb_ycc_convert_neon jsimd_extbgrx_ycc_convert_neon
|
||||
#if defined(__aarch64__) || defined(_M_ARM64)
|
||||
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
|
||||
#include "aarch64/jccolext-neon.c"
|
||||
#else
|
||||
#include "aarch32/jccolext-neon.c"
|
||||
@@ -132,7 +132,7 @@ ALIGN(16) static const uint16_t jsimd_rgb_ycc_neon_consts[] = {
|
||||
#define RGB_BLUE EXT_XBGR_BLUE
|
||||
#define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
|
||||
#define jsimd_rgb_ycc_convert_neon jsimd_extxbgr_ycc_convert_neon
|
||||
#if defined(__aarch64__) || defined(_M_ARM64)
|
||||
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
|
||||
#include "aarch64/jccolext-neon.c"
|
||||
#else
|
||||
#include "aarch32/jccolext-neon.c"
|
||||
@@ -148,7 +148,7 @@ ALIGN(16) static const uint16_t jsimd_rgb_ycc_neon_consts[] = {
|
||||
#define RGB_BLUE EXT_XRGB_BLUE
|
||||
#define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
|
||||
#define jsimd_rgb_ycc_convert_neon jsimd_extxrgb_ycc_convert_neon
|
||||
#if defined(__aarch64__) || defined(_M_ARM64)
|
||||
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
|
||||
#include "aarch64/jccolext-neon.c"
|
||||
#else
|
||||
#include "aarch32/jccolext-neon.c"
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
* This file was part of the Independent JPEG Group's software:
|
||||
* Copyright (C) 1991-1997, Thomas G. Lane.
|
||||
* libjpeg-turbo Modifications:
|
||||
* Copyright (C) 2009, 2018, 2021, D. R. Commander.
|
||||
* Copyright (C) 2009, 2018, 2021, 2025, D. R. Commander.
|
||||
* Copyright (C) 2018, Matthias Räncker.
|
||||
* Copyright (C) 2020-2021, Arm Limited.
|
||||
* For conditions of distribution and use, see the accompanying README.ijg
|
||||
@@ -17,7 +17,7 @@
|
||||
* but must not be updated permanently until we complete the MCU.
|
||||
*/
|
||||
|
||||
#if defined(__aarch64__) || defined(_M_ARM64)
|
||||
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
|
||||
#define BIT_BUF_SIZE 64
|
||||
#else
|
||||
#define BIT_BUF_SIZE 32
|
||||
@@ -54,7 +54,7 @@ typedef struct {
|
||||
* directly to the output buffer. Otherwise, use the EMIT_BYTE() macro to
|
||||
* encode 0xFF as 0xFF 0x00.
|
||||
*/
|
||||
#if defined(__aarch64__) || defined(_M_ARM64)
|
||||
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
|
||||
|
||||
#define FLUSH() { \
|
||||
if (put_buffer & 0x8080808080808080 & ~(put_buffer + 0x0101010101010101)) { \
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
*
|
||||
* Copyright (C) 2020-2021, Arm Limited. All Rights Reserved.
|
||||
* Copyright (C) 2022, Matthieu Darbois. All Rights Reserved.
|
||||
* Copyright (C) 2022, 2024, D. R. Commander. All Rights Reserved.
|
||||
* Copyright (C) 2022, 2024-2025, D. R. Commander. All Rights Reserved.
|
||||
*
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty. In no event will the authors be held liable for any damages
|
||||
@@ -251,7 +251,7 @@ void jsimd_encode_mcu_AC_first_prepare_neon
|
||||
uint8x8_t bitmap_rows_4567 = vpadd_u8(bitmap_rows_45, bitmap_rows_67);
|
||||
uint8x8_t bitmap_all = vpadd_u8(bitmap_rows_0123, bitmap_rows_4567);
|
||||
|
||||
#if defined(__aarch64__) || defined(_M_ARM64)
|
||||
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
|
||||
/* Move bitmap to a 64-bit scalar register. */
|
||||
uint64_t bitmap = vget_lane_u64(vreinterpret_u64_u8(bitmap_all), 0);
|
||||
/* Store zerobits bitmap. */
|
||||
@@ -511,7 +511,7 @@ int jsimd_encode_mcu_AC_refine_prepare_neon
|
||||
uint8x8_t bitmap_rows_4567 = vpadd_u8(bitmap_rows_45, bitmap_rows_67);
|
||||
uint8x8_t bitmap_all = vpadd_u8(bitmap_rows_0123, bitmap_rows_4567);
|
||||
|
||||
#if defined(__aarch64__) || defined(_M_ARM64)
|
||||
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
|
||||
/* Move bitmap to a 64-bit scalar register. */
|
||||
uint64_t bitmap = vget_lane_u64(vreinterpret_u64_u8(bitmap_all), 0);
|
||||
/* Store zerobits bitmap. */
|
||||
@@ -552,7 +552,7 @@ int jsimd_encode_mcu_AC_refine_prepare_neon
|
||||
bitmap_rows_4567 = vpadd_u8(bitmap_rows_45, bitmap_rows_67);
|
||||
bitmap_all = vpadd_u8(bitmap_rows_0123, bitmap_rows_4567);
|
||||
|
||||
#if defined(__aarch64__) || defined(_M_ARM64)
|
||||
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
|
||||
/* Move bitmap to a 64-bit scalar register. */
|
||||
bitmap = vget_lane_u64(vreinterpret_u64_u8(bitmap_all), 0);
|
||||
/* Store signbits bitmap. */
|
||||
@@ -595,7 +595,7 @@ int jsimd_encode_mcu_AC_refine_prepare_neon
|
||||
bitmap_rows_4567 = vpadd_u8(bitmap_rows_45, bitmap_rows_67);
|
||||
bitmap_all = vpadd_u8(bitmap_rows_0123, bitmap_rows_4567);
|
||||
|
||||
#if defined(__aarch64__) || defined(_M_ARM64)
|
||||
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
|
||||
/* Move bitmap to a 64-bit scalar register. */
|
||||
bitmap = vget_lane_u64(vreinterpret_u64_u8(bitmap_all), 0);
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
* jcsample-neon.c - downsampling (Arm Neon)
|
||||
*
|
||||
* Copyright (C) 2020, Arm Limited. All Rights Reserved.
|
||||
* Copyright (C) 2024, D. R. Commander. All Rights Reserved.
|
||||
* Copyright (C) 2024-2025, D. R. Commander. All Rights Reserved.
|
||||
*
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty. In no event will the authors be held liable for any damages
|
||||
@@ -107,7 +107,7 @@ void jsimd_h2v1_downsample_neon(JDIMENSION image_width, int max_v_samp_factor,
|
||||
|
||||
/* Load pixels in last DCT block into a table. */
|
||||
uint8x16_t pixels = vld1q_u8(inptr + (width_in_blocks - 1) * 2 * DCTSIZE);
|
||||
#if defined(__aarch64__) || defined(_M_ARM64)
|
||||
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
|
||||
/* Pad the empty elements with the value of the last pixel. */
|
||||
pixels = vqtbl1q_u8(pixels, expand_mask);
|
||||
#else
|
||||
@@ -169,7 +169,7 @@ void jsimd_h2v2_downsample_neon(JDIMENSION image_width, int max_v_samp_factor,
|
||||
vld1q_u8(inptr0 + (width_in_blocks - 1) * 2 * DCTSIZE);
|
||||
uint8x16_t pixels_r1 =
|
||||
vld1q_u8(inptr1 + (width_in_blocks - 1) * 2 * DCTSIZE);
|
||||
#if defined(__aarch64__) || defined(_M_ARM64)
|
||||
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
|
||||
/* Pad the empty elements with the value of the last pixel. */
|
||||
pixels_r0 = vqtbl1q_u8(pixels_r0, expand_mask);
|
||||
pixels_r1 = vqtbl1q_u8(pixels_r1, expand_mask);
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
* jquanti-neon.c - sample data conversion and quantization (Arm Neon)
|
||||
*
|
||||
* Copyright (C) 2020-2021, Arm Limited. All Rights Reserved.
|
||||
* Copyright (C) 2024, D. R. Commander. All Rights Reserved.
|
||||
* Copyright (C) 2024-2025, D. R. Commander. All Rights Reserved.
|
||||
*
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty. In no event will the authors be held liable for any damages
|
||||
@@ -102,7 +102,8 @@ void jsimd_quantize_neon(JCOEFPTR coef_block, DCTELEM *divisors,
|
||||
DCTELEM *shift_ptr = divisors + 3 * DCTSIZE2;
|
||||
int i;
|
||||
|
||||
#if defined(__clang__) && (defined(__aarch64__) || defined(_M_ARM64))
|
||||
#if defined(__clang__) && (defined(__aarch64__) || defined(_M_ARM64) || \
|
||||
defined(_M_ARM64EC))
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (i = 0; i < DCTSIZE; i += DCTSIZE / 2) {
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
* Lossless JPEG Modifications:
|
||||
* Copyright (C) 1999, Ken Murchison.
|
||||
* libjpeg-turbo Modifications:
|
||||
* Copyright (C) 2009-2011, 2014-2016, 2018-2024, D. R. Commander.
|
||||
* Copyright (C) 2009-2011, 2014-2016, 2018-2025, D. R. Commander.
|
||||
* Copyright (C) 2015, Matthieu Darbois.
|
||||
* Copyright (C) 2018, Matthias Räncker.
|
||||
* Copyright (C) 2020, Arm Limited.
|
||||
@@ -55,7 +55,8 @@ typedef size_t bit_buf_type;
|
||||
* retain the old Huffman encoder behavior when using the GAS implementation.
|
||||
*/
|
||||
#if defined(WITH_SIMD) && !(defined(__arm__) || defined(__aarch64__) || \
|
||||
defined(_M_ARM) || defined(_M_ARM64))
|
||||
defined(_M_ARM) || defined(_M_ARM64) || \
|
||||
defined(_M_ARM64EC))
|
||||
typedef unsigned long long simd_bit_buf_type;
|
||||
#else
|
||||
typedef bit_buf_type simd_bit_buf_type;
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
* Copyright (C) 1991-1996, Thomas G. Lane.
|
||||
* libjpeg-turbo Modifications:
|
||||
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||
* Copyright (C) 2010, 2015-2016, 2022, 2024, D. R. Commander.
|
||||
* Copyright (C) 2010, 2015-2016, 2022, 2024-2025, D. R. Commander.
|
||||
* Copyright (C) 2014, MIPS Technologies, Inc., California.
|
||||
* Copyright (C) 2015, Google, Inc.
|
||||
* Copyright (C) 2019-2020, Arm Limited.
|
||||
@@ -501,7 +501,8 @@ _jinit_upsampler(j_decompress_ptr cinfo)
|
||||
v_in_group * 2 == v_out_group && do_fancy) {
|
||||
/* Non-fancy upsampling is handled by the generic method */
|
||||
#if defined(WITH_SIMD) && (defined(__arm__) || defined(__aarch64__) || \
|
||||
defined(_M_ARM) || defined(_M_ARM64))
|
||||
defined(_M_ARM) || defined(_M_ARM64) || \
|
||||
defined(_M_ARM64EC))
|
||||
if (jsimd_can_h1v2_fancy_upsample())
|
||||
upsample->methods[ci] = jsimd_h1v2_fancy_upsample;
|
||||
else
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2024, D. R. Commander.
|
||||
* Copyright (C) 2024-2025, D. R. Commander.
|
||||
*
|
||||
* For conditions of distribution and use, see the accompanying README.ijg
|
||||
* file.
|
||||
@@ -17,7 +17,7 @@
|
||||
* encoders can reuse jpeg_nbits_table from the SSE2 baseline Huffman encoder.
|
||||
*/
|
||||
#if (defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || \
|
||||
defined(_M_X64)) && defined(WITH_SIMD)
|
||||
(defined(_M_X64) && !defined(_M_ARM64EC))) && defined(WITH_SIMD)
|
||||
#undef INCLUDE_JPEG_NBITS_TABLE
|
||||
#endif
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2014, 2021, 2024, D. R. Commander.
|
||||
* Copyright (C) 2014, 2021, 2024-2025, D. R. Commander.
|
||||
* Copyright (C) 2014, Olle Liljenzin.
|
||||
* Copyright (C) 2020, Arm Limited.
|
||||
*
|
||||
@@ -23,7 +23,7 @@
|
||||
|
||||
/* NOTE: Both GCC and Clang define __GNUC__ */
|
||||
#if (defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))) || \
|
||||
defined(_M_ARM) || defined(_M_ARM64)
|
||||
defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC)
|
||||
#if !defined(__thumb__) || defined(__thumb2__)
|
||||
#define USE_CLZ_INTRINSIC
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user