diff --git a/ChangeLog.md b/ChangeLog.md index 700d5e8f..4bdbf53d 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -20,6 +20,11 @@ used. Thus, this issue did not cause a segfault or other user-visible errant behavior (it was only detectable with ASan), and it did not likely pose a security risk. +4. The AArch64 (Arm 64-bit) Neon SIMD extensions and accelerated Huffman codec +now support the Arm64EC ABI on Windows, which allows Windows/x64 applications +to call native Arm64 functions when running under the Windows/x64 emulator on +Windows/Arm. + 3.1.1 ===== diff --git a/simd/arm/jccolor-neon.c b/simd/arm/jccolor-neon.c index d14a7bf5..8e40bc1e 100644 --- a/simd/arm/jccolor-neon.c +++ b/simd/arm/jccolor-neon.c @@ -2,7 +2,7 @@ * jccolor-neon.c - colorspace conversion (Arm Neon) * * Copyright (C) 2020, Arm Limited. All Rights Reserved. - * Copyright (C) 2020, 2024, D. R. Commander. All Rights Reserved. + * Copyright (C) 2020, 2024-2025, D. R. Commander. All Rights Reserved. * * This software is provided 'as-is', without any express or implied * warranty. In no event will the authors be held liable for any damages @@ -53,7 +53,7 @@ ALIGN(16) static const uint16_t jsimd_rgb_ycc_neon_consts[] = { /* Include inline routines for colorspace extensions. */ -#if defined(__aarch64__) || defined(_M_ARM64) +#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) #include "aarch64/jccolext-neon.c" #else #include "aarch32/jccolext-neon.c" @@ -68,7 +68,7 @@ ALIGN(16) static const uint16_t jsimd_rgb_ycc_neon_consts[] = { #define RGB_BLUE EXT_RGB_BLUE #define RGB_PIXELSIZE EXT_RGB_PIXELSIZE #define jsimd_rgb_ycc_convert_neon jsimd_extrgb_ycc_convert_neon -#if defined(__aarch64__) || defined(_M_ARM64) +#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) #include "aarch64/jccolext-neon.c" #else #include "aarch32/jccolext-neon.c" @@ -84,7 +84,7 @@ ALIGN(16) static const uint16_t jsimd_rgb_ycc_neon_consts[] = { #define RGB_BLUE EXT_RGBX_BLUE #define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE #define jsimd_rgb_ycc_convert_neon jsimd_extrgbx_ycc_convert_neon -#if defined(__aarch64__) || defined(_M_ARM64) +#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) #include "aarch64/jccolext-neon.c" #else #include "aarch32/jccolext-neon.c" @@ -100,7 +100,7 @@ ALIGN(16) static const uint16_t jsimd_rgb_ycc_neon_consts[] = { #define RGB_BLUE EXT_BGR_BLUE #define RGB_PIXELSIZE EXT_BGR_PIXELSIZE #define jsimd_rgb_ycc_convert_neon jsimd_extbgr_ycc_convert_neon -#if defined(__aarch64__) || defined(_M_ARM64) +#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) #include "aarch64/jccolext-neon.c" #else #include "aarch32/jccolext-neon.c" @@ -116,7 +116,7 @@ ALIGN(16) static const uint16_t jsimd_rgb_ycc_neon_consts[] = { #define RGB_BLUE EXT_BGRX_BLUE #define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE #define jsimd_rgb_ycc_convert_neon jsimd_extbgrx_ycc_convert_neon -#if defined(__aarch64__) || defined(_M_ARM64) +#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) #include "aarch64/jccolext-neon.c" #else #include "aarch32/jccolext-neon.c" @@ -132,7 +132,7 @@ ALIGN(16) static const uint16_t jsimd_rgb_ycc_neon_consts[] = { #define RGB_BLUE EXT_XBGR_BLUE #define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE #define jsimd_rgb_ycc_convert_neon jsimd_extxbgr_ycc_convert_neon -#if defined(__aarch64__) || defined(_M_ARM64) +#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) #include "aarch64/jccolext-neon.c" #else #include "aarch32/jccolext-neon.c" @@ -148,7 +148,7 @@ ALIGN(16) static const uint16_t jsimd_rgb_ycc_neon_consts[] = { #define RGB_BLUE EXT_XRGB_BLUE #define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE #define jsimd_rgb_ycc_convert_neon jsimd_extxrgb_ycc_convert_neon -#if defined(__aarch64__) || defined(_M_ARM64) +#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) #include "aarch64/jccolext-neon.c" #else #include "aarch32/jccolext-neon.c" diff --git a/simd/arm/jchuff.h b/simd/arm/jchuff.h index 2fbd252b..5ca96293 100644 --- a/simd/arm/jchuff.h +++ b/simd/arm/jchuff.h @@ -4,7 +4,7 @@ * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1997, Thomas G. Lane. * libjpeg-turbo Modifications: - * Copyright (C) 2009, 2018, 2021, D. R. Commander. + * Copyright (C) 2009, 2018, 2021, 2025, D. R. Commander. * Copyright (C) 2018, Matthias Räncker. * Copyright (C) 2020-2021, Arm Limited. * For conditions of distribution and use, see the accompanying README.ijg @@ -17,7 +17,7 @@ * but must not be updated permanently until we complete the MCU. */ -#if defined(__aarch64__) || defined(_M_ARM64) +#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) #define BIT_BUF_SIZE 64 #else #define BIT_BUF_SIZE 32 @@ -54,7 +54,7 @@ typedef struct { * directly to the output buffer. Otherwise, use the EMIT_BYTE() macro to * encode 0xFF as 0xFF 0x00. */ -#if defined(__aarch64__) || defined(_M_ARM64) +#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) #define FLUSH() { \ if (put_buffer & 0x8080808080808080 & ~(put_buffer + 0x0101010101010101)) { \ diff --git a/simd/arm/jcphuff-neon.c b/simd/arm/jcphuff-neon.c index 435f96ee..cfc4d914 100644 --- a/simd/arm/jcphuff-neon.c +++ b/simd/arm/jcphuff-neon.c @@ -3,7 +3,7 @@ * * Copyright (C) 2020-2021, Arm Limited. All Rights Reserved. * Copyright (C) 2022, Matthieu Darbois. All Rights Reserved. - * Copyright (C) 2022, 2024, D. R. Commander. All Rights Reserved. + * Copyright (C) 2022, 2024-2025, D. R. Commander. All Rights Reserved. * * This software is provided 'as-is', without any express or implied * warranty. In no event will the authors be held liable for any damages @@ -251,7 +251,7 @@ void jsimd_encode_mcu_AC_first_prepare_neon uint8x8_t bitmap_rows_4567 = vpadd_u8(bitmap_rows_45, bitmap_rows_67); uint8x8_t bitmap_all = vpadd_u8(bitmap_rows_0123, bitmap_rows_4567); -#if defined(__aarch64__) || defined(_M_ARM64) +#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) /* Move bitmap to a 64-bit scalar register. */ uint64_t bitmap = vget_lane_u64(vreinterpret_u64_u8(bitmap_all), 0); /* Store zerobits bitmap. */ @@ -511,7 +511,7 @@ int jsimd_encode_mcu_AC_refine_prepare_neon uint8x8_t bitmap_rows_4567 = vpadd_u8(bitmap_rows_45, bitmap_rows_67); uint8x8_t bitmap_all = vpadd_u8(bitmap_rows_0123, bitmap_rows_4567); -#if defined(__aarch64__) || defined(_M_ARM64) +#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) /* Move bitmap to a 64-bit scalar register. */ uint64_t bitmap = vget_lane_u64(vreinterpret_u64_u8(bitmap_all), 0); /* Store zerobits bitmap. */ @@ -552,7 +552,7 @@ int jsimd_encode_mcu_AC_refine_prepare_neon bitmap_rows_4567 = vpadd_u8(bitmap_rows_45, bitmap_rows_67); bitmap_all = vpadd_u8(bitmap_rows_0123, bitmap_rows_4567); -#if defined(__aarch64__) || defined(_M_ARM64) +#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) /* Move bitmap to a 64-bit scalar register. */ bitmap = vget_lane_u64(vreinterpret_u64_u8(bitmap_all), 0); /* Store signbits bitmap. */ @@ -595,7 +595,7 @@ int jsimd_encode_mcu_AC_refine_prepare_neon bitmap_rows_4567 = vpadd_u8(bitmap_rows_45, bitmap_rows_67); bitmap_all = vpadd_u8(bitmap_rows_0123, bitmap_rows_4567); -#if defined(__aarch64__) || defined(_M_ARM64) +#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) /* Move bitmap to a 64-bit scalar register. */ bitmap = vget_lane_u64(vreinterpret_u64_u8(bitmap_all), 0); diff --git a/simd/arm/jcsample-neon.c b/simd/arm/jcsample-neon.c index fd8a93e5..9ad1acf5 100644 --- a/simd/arm/jcsample-neon.c +++ b/simd/arm/jcsample-neon.c @@ -2,7 +2,7 @@ * jcsample-neon.c - downsampling (Arm Neon) * * Copyright (C) 2020, Arm Limited. All Rights Reserved. - * Copyright (C) 2024, D. R. Commander. All Rights Reserved. + * Copyright (C) 2024-2025, D. R. Commander. All Rights Reserved. * * This software is provided 'as-is', without any express or implied * warranty. In no event will the authors be held liable for any damages @@ -107,7 +107,7 @@ void jsimd_h2v1_downsample_neon(JDIMENSION image_width, int max_v_samp_factor, /* Load pixels in last DCT block into a table. */ uint8x16_t pixels = vld1q_u8(inptr + (width_in_blocks - 1) * 2 * DCTSIZE); -#if defined(__aarch64__) || defined(_M_ARM64) +#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) /* Pad the empty elements with the value of the last pixel. */ pixels = vqtbl1q_u8(pixels, expand_mask); #else @@ -169,7 +169,7 @@ void jsimd_h2v2_downsample_neon(JDIMENSION image_width, int max_v_samp_factor, vld1q_u8(inptr0 + (width_in_blocks - 1) * 2 * DCTSIZE); uint8x16_t pixels_r1 = vld1q_u8(inptr1 + (width_in_blocks - 1) * 2 * DCTSIZE); -#if defined(__aarch64__) || defined(_M_ARM64) +#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) /* Pad the empty elements with the value of the last pixel. */ pixels_r0 = vqtbl1q_u8(pixels_r0, expand_mask); pixels_r1 = vqtbl1q_u8(pixels_r1, expand_mask); diff --git a/simd/arm/jquanti-neon.c b/simd/arm/jquanti-neon.c index e44fb3d4..48594d77 100644 --- a/simd/arm/jquanti-neon.c +++ b/simd/arm/jquanti-neon.c @@ -2,7 +2,7 @@ * jquanti-neon.c - sample data conversion and quantization (Arm Neon) * * Copyright (C) 2020-2021, Arm Limited. All Rights Reserved. - * Copyright (C) 2024, D. R. Commander. All Rights Reserved. + * Copyright (C) 2024-2025, D. R. Commander. All Rights Reserved. * * This software is provided 'as-is', without any express or implied * warranty. In no event will the authors be held liable for any damages @@ -102,7 +102,8 @@ void jsimd_quantize_neon(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *shift_ptr = divisors + 3 * DCTSIZE2; int i; -#if defined(__clang__) && (defined(__aarch64__) || defined(_M_ARM64)) +#if defined(__clang__) && (defined(__aarch64__) || defined(_M_ARM64) || \ + defined(_M_ARM64EC)) #pragma unroll #endif for (i = 0; i < DCTSIZE; i += DCTSIZE / 2) { diff --git a/src/jchuff.c b/src/jchuff.c index 8cdd5bd3..781e1ca7 100644 --- a/src/jchuff.c +++ b/src/jchuff.c @@ -6,7 +6,7 @@ * Lossless JPEG Modifications: * Copyright (C) 1999, Ken Murchison. * libjpeg-turbo Modifications: - * Copyright (C) 2009-2011, 2014-2016, 2018-2024, D. R. Commander. + * Copyright (C) 2009-2011, 2014-2016, 2018-2025, D. R. Commander. * Copyright (C) 2015, Matthieu Darbois. * Copyright (C) 2018, Matthias Räncker. * Copyright (C) 2020, Arm Limited. @@ -55,7 +55,8 @@ typedef size_t bit_buf_type; * retain the old Huffman encoder behavior when using the GAS implementation. */ #if defined(WITH_SIMD) && !(defined(__arm__) || defined(__aarch64__) || \ - defined(_M_ARM) || defined(_M_ARM64)) + defined(_M_ARM) || defined(_M_ARM64) || \ + defined(_M_ARM64EC)) typedef unsigned long long simd_bit_buf_type; #else typedef bit_buf_type simd_bit_buf_type; diff --git a/src/jdsample.c b/src/jdsample.c index e5a127de..022471dc 100644 --- a/src/jdsample.c +++ b/src/jdsample.c @@ -5,7 +5,7 @@ * Copyright (C) 1991-1996, Thomas G. Lane. * libjpeg-turbo Modifications: * Copyright 2009 Pierre Ossman for Cendio AB - * Copyright (C) 2010, 2015-2016, 2022, 2024, D. R. Commander. + * Copyright (C) 2010, 2015-2016, 2022, 2024-2025, D. R. Commander. * Copyright (C) 2014, MIPS Technologies, Inc., California. * Copyright (C) 2015, Google, Inc. * Copyright (C) 2019-2020, Arm Limited. @@ -501,7 +501,8 @@ _jinit_upsampler(j_decompress_ptr cinfo) v_in_group * 2 == v_out_group && do_fancy) { /* Non-fancy upsampling is handled by the generic method */ #if defined(WITH_SIMD) && (defined(__arm__) || defined(__aarch64__) || \ - defined(_M_ARM) || defined(_M_ARM64)) + defined(_M_ARM) || defined(_M_ARM64) || \ + defined(_M_ARM64EC)) if (jsimd_can_h1v2_fancy_upsample()) upsample->methods[ci] = jsimd_h1v2_fancy_upsample; else diff --git a/src/jpeg_nbits.c b/src/jpeg_nbits.c index c8ee6b05..752a1636 100644 --- a/src/jpeg_nbits.c +++ b/src/jpeg_nbits.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024, D. R. Commander. + * Copyright (C) 2024-2025, D. R. Commander. * * For conditions of distribution and use, see the accompanying README.ijg * file. @@ -17,7 +17,7 @@ * encoders can reuse jpeg_nbits_table from the SSE2 baseline Huffman encoder. */ #if (defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || \ - defined(_M_X64)) && defined(WITH_SIMD) + (defined(_M_X64) && !defined(_M_ARM64EC))) && defined(WITH_SIMD) #undef INCLUDE_JPEG_NBITS_TABLE #endif diff --git a/src/jpeg_nbits.h b/src/jpeg_nbits.h index 6481a122..61666fd5 100644 --- a/src/jpeg_nbits.h +++ b/src/jpeg_nbits.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2014, 2021, 2024, D. R. Commander. + * Copyright (C) 2014, 2021, 2024-2025, D. R. Commander. * Copyright (C) 2014, Olle Liljenzin. * Copyright (C) 2020, Arm Limited. * @@ -23,7 +23,7 @@ /* NOTE: Both GCC and Clang define __GNUC__ */ #if (defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))) || \ - defined(_M_ARM) || defined(_M_ARM64) + defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) #if !defined(__thumb__) || defined(__thumb2__) #define USE_CLZ_INTRINSIC #endif