Fix issues with Windows Arm64EC builds

Arm64EC basically wraps native Arm64 functions with an emulated
Windows/x64 ABI, which can improve performance for Windows/x64
applications running under the x64 emulator on Windows/Arm.  When
building for Arm64EC, the compiler defines _M_X64 and _M_ARM64EC but not
_M_ARM64.
This commit is contained in:
DRC
2025-08-21 11:22:51 -04:00
parent f2d1f1cbd4
commit 98c458381f
10 changed files with 37 additions and 29 deletions

View File

@@ -20,6 +20,11 @@ used. Thus, this issue did not cause a segfault or other user-visible errant
behavior (it was only detectable with ASan), and it did not likely pose a
security risk.
4. The AArch64 (Arm 64-bit) Neon SIMD extensions and accelerated Huffman codec
now support the Arm64EC ABI on Windows, which allows Windows/x64 applications
to call native Arm64 functions when running under the Windows/x64 emulator on
Windows/Arm.
3.1.1
=====

View File

@@ -2,7 +2,7 @@
* jccolor-neon.c - colorspace conversion (Arm Neon)
*
* Copyright (C) 2020, Arm Limited. All Rights Reserved.
* Copyright (C) 2020, 2024, D. R. Commander. All Rights Reserved.
* Copyright (C) 2020, 2024-2025, D. R. Commander. All Rights Reserved.
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
@@ -53,7 +53,7 @@ ALIGN(16) static const uint16_t jsimd_rgb_ycc_neon_consts[] = {
/* Include inline routines for colorspace extensions. */
#if defined(__aarch64__) || defined(_M_ARM64)
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
#include "aarch64/jccolext-neon.c"
#else
#include "aarch32/jccolext-neon.c"
@@ -68,7 +68,7 @@ ALIGN(16) static const uint16_t jsimd_rgb_ycc_neon_consts[] = {
#define RGB_BLUE EXT_RGB_BLUE
#define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
#define jsimd_rgb_ycc_convert_neon jsimd_extrgb_ycc_convert_neon
#if defined(__aarch64__) || defined(_M_ARM64)
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
#include "aarch64/jccolext-neon.c"
#else
#include "aarch32/jccolext-neon.c"
@@ -84,7 +84,7 @@ ALIGN(16) static const uint16_t jsimd_rgb_ycc_neon_consts[] = {
#define RGB_BLUE EXT_RGBX_BLUE
#define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
#define jsimd_rgb_ycc_convert_neon jsimd_extrgbx_ycc_convert_neon
#if defined(__aarch64__) || defined(_M_ARM64)
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
#include "aarch64/jccolext-neon.c"
#else
#include "aarch32/jccolext-neon.c"
@@ -100,7 +100,7 @@ ALIGN(16) static const uint16_t jsimd_rgb_ycc_neon_consts[] = {
#define RGB_BLUE EXT_BGR_BLUE
#define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
#define jsimd_rgb_ycc_convert_neon jsimd_extbgr_ycc_convert_neon
#if defined(__aarch64__) || defined(_M_ARM64)
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
#include "aarch64/jccolext-neon.c"
#else
#include "aarch32/jccolext-neon.c"
@@ -116,7 +116,7 @@ ALIGN(16) static const uint16_t jsimd_rgb_ycc_neon_consts[] = {
#define RGB_BLUE EXT_BGRX_BLUE
#define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
#define jsimd_rgb_ycc_convert_neon jsimd_extbgrx_ycc_convert_neon
#if defined(__aarch64__) || defined(_M_ARM64)
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
#include "aarch64/jccolext-neon.c"
#else
#include "aarch32/jccolext-neon.c"
@@ -132,7 +132,7 @@ ALIGN(16) static const uint16_t jsimd_rgb_ycc_neon_consts[] = {
#define RGB_BLUE EXT_XBGR_BLUE
#define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
#define jsimd_rgb_ycc_convert_neon jsimd_extxbgr_ycc_convert_neon
#if defined(__aarch64__) || defined(_M_ARM64)
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
#include "aarch64/jccolext-neon.c"
#else
#include "aarch32/jccolext-neon.c"
@@ -148,7 +148,7 @@ ALIGN(16) static const uint16_t jsimd_rgb_ycc_neon_consts[] = {
#define RGB_BLUE EXT_XRGB_BLUE
#define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
#define jsimd_rgb_ycc_convert_neon jsimd_extxrgb_ycc_convert_neon
#if defined(__aarch64__) || defined(_M_ARM64)
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
#include "aarch64/jccolext-neon.c"
#else
#include "aarch32/jccolext-neon.c"

View File

@@ -4,7 +4,7 @@
* This file was part of the Independent JPEG Group's software:
* Copyright (C) 1991-1997, Thomas G. Lane.
* libjpeg-turbo Modifications:
* Copyright (C) 2009, 2018, 2021, D. R. Commander.
* Copyright (C) 2009, 2018, 2021, 2025, D. R. Commander.
* Copyright (C) 2018, Matthias Räncker.
* Copyright (C) 2020-2021, Arm Limited.
* For conditions of distribution and use, see the accompanying README.ijg
@@ -17,7 +17,7 @@
* but must not be updated permanently until we complete the MCU.
*/
#if defined(__aarch64__) || defined(_M_ARM64)
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
#define BIT_BUF_SIZE 64
#else
#define BIT_BUF_SIZE 32
@@ -54,7 +54,7 @@ typedef struct {
* directly to the output buffer. Otherwise, use the EMIT_BYTE() macro to
* encode 0xFF as 0xFF 0x00.
*/
#if defined(__aarch64__) || defined(_M_ARM64)
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
#define FLUSH() { \
if (put_buffer & 0x8080808080808080 & ~(put_buffer + 0x0101010101010101)) { \

View File

@@ -3,7 +3,7 @@
*
* Copyright (C) 2020-2021, Arm Limited. All Rights Reserved.
* Copyright (C) 2022, Matthieu Darbois. All Rights Reserved.
* Copyright (C) 2022, 2024, D. R. Commander. All Rights Reserved.
* Copyright (C) 2022, 2024-2025, D. R. Commander. All Rights Reserved.
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
@@ -251,7 +251,7 @@ void jsimd_encode_mcu_AC_first_prepare_neon
uint8x8_t bitmap_rows_4567 = vpadd_u8(bitmap_rows_45, bitmap_rows_67);
uint8x8_t bitmap_all = vpadd_u8(bitmap_rows_0123, bitmap_rows_4567);
#if defined(__aarch64__) || defined(_M_ARM64)
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
/* Move bitmap to a 64-bit scalar register. */
uint64_t bitmap = vget_lane_u64(vreinterpret_u64_u8(bitmap_all), 0);
/* Store zerobits bitmap. */
@@ -511,7 +511,7 @@ int jsimd_encode_mcu_AC_refine_prepare_neon
uint8x8_t bitmap_rows_4567 = vpadd_u8(bitmap_rows_45, bitmap_rows_67);
uint8x8_t bitmap_all = vpadd_u8(bitmap_rows_0123, bitmap_rows_4567);
#if defined(__aarch64__) || defined(_M_ARM64)
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
/* Move bitmap to a 64-bit scalar register. */
uint64_t bitmap = vget_lane_u64(vreinterpret_u64_u8(bitmap_all), 0);
/* Store zerobits bitmap. */
@@ -552,7 +552,7 @@ int jsimd_encode_mcu_AC_refine_prepare_neon
bitmap_rows_4567 = vpadd_u8(bitmap_rows_45, bitmap_rows_67);
bitmap_all = vpadd_u8(bitmap_rows_0123, bitmap_rows_4567);
#if defined(__aarch64__) || defined(_M_ARM64)
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
/* Move bitmap to a 64-bit scalar register. */
bitmap = vget_lane_u64(vreinterpret_u64_u8(bitmap_all), 0);
/* Store signbits bitmap. */
@@ -595,7 +595,7 @@ int jsimd_encode_mcu_AC_refine_prepare_neon
bitmap_rows_4567 = vpadd_u8(bitmap_rows_45, bitmap_rows_67);
bitmap_all = vpadd_u8(bitmap_rows_0123, bitmap_rows_4567);
#if defined(__aarch64__) || defined(_M_ARM64)
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
/* Move bitmap to a 64-bit scalar register. */
bitmap = vget_lane_u64(vreinterpret_u64_u8(bitmap_all), 0);

View File

@@ -2,7 +2,7 @@
* jcsample-neon.c - downsampling (Arm Neon)
*
* Copyright (C) 2020, Arm Limited. All Rights Reserved.
* Copyright (C) 2024, D. R. Commander. All Rights Reserved.
* Copyright (C) 2024-2025, D. R. Commander. All Rights Reserved.
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
@@ -107,7 +107,7 @@ void jsimd_h2v1_downsample_neon(JDIMENSION image_width, int max_v_samp_factor,
/* Load pixels in last DCT block into a table. */
uint8x16_t pixels = vld1q_u8(inptr + (width_in_blocks - 1) * 2 * DCTSIZE);
#if defined(__aarch64__) || defined(_M_ARM64)
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
/* Pad the empty elements with the value of the last pixel. */
pixels = vqtbl1q_u8(pixels, expand_mask);
#else
@@ -169,7 +169,7 @@ void jsimd_h2v2_downsample_neon(JDIMENSION image_width, int max_v_samp_factor,
vld1q_u8(inptr0 + (width_in_blocks - 1) * 2 * DCTSIZE);
uint8x16_t pixels_r1 =
vld1q_u8(inptr1 + (width_in_blocks - 1) * 2 * DCTSIZE);
#if defined(__aarch64__) || defined(_M_ARM64)
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
/* Pad the empty elements with the value of the last pixel. */
pixels_r0 = vqtbl1q_u8(pixels_r0, expand_mask);
pixels_r1 = vqtbl1q_u8(pixels_r1, expand_mask);

View File

@@ -2,7 +2,7 @@
* jquanti-neon.c - sample data conversion and quantization (Arm Neon)
*
* Copyright (C) 2020-2021, Arm Limited. All Rights Reserved.
* Copyright (C) 2024, D. R. Commander. All Rights Reserved.
* Copyright (C) 2024-2025, D. R. Commander. All Rights Reserved.
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
@@ -102,7 +102,8 @@ void jsimd_quantize_neon(JCOEFPTR coef_block, DCTELEM *divisors,
DCTELEM *shift_ptr = divisors + 3 * DCTSIZE2;
int i;
#if defined(__clang__) && (defined(__aarch64__) || defined(_M_ARM64))
#if defined(__clang__) && (defined(__aarch64__) || defined(_M_ARM64) || \
defined(_M_ARM64EC))
#pragma unroll
#endif
for (i = 0; i < DCTSIZE; i += DCTSIZE / 2) {

View File

@@ -6,7 +6,7 @@
* Lossless JPEG Modifications:
* Copyright (C) 1999, Ken Murchison.
* libjpeg-turbo Modifications:
* Copyright (C) 2009-2011, 2014-2016, 2018-2024, D. R. Commander.
* Copyright (C) 2009-2011, 2014-2016, 2018-2025, D. R. Commander.
* Copyright (C) 2015, Matthieu Darbois.
* Copyright (C) 2018, Matthias Räncker.
* Copyright (C) 2020, Arm Limited.
@@ -55,7 +55,8 @@ typedef size_t bit_buf_type;
* retain the old Huffman encoder behavior when using the GAS implementation.
*/
#if defined(WITH_SIMD) && !(defined(__arm__) || defined(__aarch64__) || \
defined(_M_ARM) || defined(_M_ARM64))
defined(_M_ARM) || defined(_M_ARM64) || \
defined(_M_ARM64EC))
typedef unsigned long long simd_bit_buf_type;
#else
typedef bit_buf_type simd_bit_buf_type;

View File

@@ -5,7 +5,7 @@
* Copyright (C) 1991-1996, Thomas G. Lane.
* libjpeg-turbo Modifications:
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
* Copyright (C) 2010, 2015-2016, 2022, 2024, D. R. Commander.
* Copyright (C) 2010, 2015-2016, 2022, 2024-2025, D. R. Commander.
* Copyright (C) 2014, MIPS Technologies, Inc., California.
* Copyright (C) 2015, Google, Inc.
* Copyright (C) 2019-2020, Arm Limited.
@@ -501,7 +501,8 @@ _jinit_upsampler(j_decompress_ptr cinfo)
v_in_group * 2 == v_out_group && do_fancy) {
/* Non-fancy upsampling is handled by the generic method */
#if defined(WITH_SIMD) && (defined(__arm__) || defined(__aarch64__) || \
defined(_M_ARM) || defined(_M_ARM64))
defined(_M_ARM) || defined(_M_ARM64) || \
defined(_M_ARM64EC))
if (jsimd_can_h1v2_fancy_upsample())
upsample->methods[ci] = jsimd_h1v2_fancy_upsample;
else

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2024, D. R. Commander.
* Copyright (C) 2024-2025, D. R. Commander.
*
* For conditions of distribution and use, see the accompanying README.ijg
* file.
@@ -17,7 +17,7 @@
* encoders can reuse jpeg_nbits_table from the SSE2 baseline Huffman encoder.
*/
#if (defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || \
defined(_M_X64)) && defined(WITH_SIMD)
(defined(_M_X64) && !defined(_M_ARM64EC))) && defined(WITH_SIMD)
#undef INCLUDE_JPEG_NBITS_TABLE
#endif

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2014, 2021, 2024, D. R. Commander.
* Copyright (C) 2014, 2021, 2024-2025, D. R. Commander.
* Copyright (C) 2014, Olle Liljenzin.
* Copyright (C) 2020, Arm Limited.
*
@@ -23,7 +23,7 @@
/* NOTE: Both GCC and Clang define __GNUC__ */
#if (defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))) || \
defined(_M_ARM) || defined(_M_ARM64)
defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC)
#if !defined(__thumb__) || defined(__thumb2__)
#define USE_CLZ_INTRINSIC
#endif