Add x32 ABI support on Linux

The x32 ABI is similar to the x86-64 ABI but uses 32-bit pointers.
(Refer to https://sites.google.com/site/x32abi)

Based on:
8da8fc5213
1e33dfea80
24ffea78da
dedcf76753
d04228a7b5
b4ad38316a

Closes #274
This commit is contained in:
DRC
2018-09-04 16:56:22 -05:00
parent 995eb29dc3
commit 133e4af070
29 changed files with 293 additions and 186 deletions

View File

@@ -46,7 +46,7 @@ if(CMAKE_SYSTEM_PROCESSOR_LC MATCHES "x86_64" OR
CMAKE_SYSTEM_PROCESSOR_LC MATCHES "i[0-9]86" OR
CMAKE_SYSTEM_PROCESSOR_LC MATCHES "x86" OR
CMAKE_SYSTEM_PROCESSOR_LC MATCHES "ia32")
if(BITS EQUAL 64)
if(BITS EQUAL 64 OR CMAKE_C_COMPILER_ABI MATCHES "ELF X32")
set(CPU_TYPE x86_64)
else()
set(CPU_TYPE i386)
@@ -101,6 +101,8 @@ if(CMAKE_INSTALL_PREFIX STREQUAL "${CMAKE_INSTALL_DEFAULT_PREFIX}")
if(UNIX AND NOT APPLE)
if(BITS EQUAL 64)
set(CMAKE_INSTALL_DEFAULT_LIBDIR "lib64")
elseif(CMAKE_C_COMPILER_ABI MATCHES "ELF X32")
set(CMAKE_INSTALL_DEFAULT_LIBDIR "libx32")
else()
set(CMAKE_INSTALL_DEFAULT_LIBDIR "lib32")
endif()
@@ -153,8 +155,12 @@ option(WITH_ARITH_DEC "Include arithmetic decoding support when emulating the li
boolean_number(WITH_ARITH_DEC)
option(WITH_ARITH_ENC "Include arithmetic encoding support when emulating the libjpeg v6b API/ABI" TRUE)
boolean_number(WITH_ARITH_ENC)
option(WITH_JAVA "Build Java wrapper for the TurboJPEG API library (implies ENABLE_SHARED=1)" FALSE)
boolean_number(WITH_JAVA)
if(CMAKE_C_COMPILER_ABI MATCHES "ELF X32")
set(WITH_JAVA 0)
else()
option(WITH_JAVA "Build Java wrapper for the TurboJPEG API library (implies ENABLE_SHARED=1)" FALSE)
boolean_number(WITH_JAVA)
endif()
option(WITH_JPEG7 "Emulate libjpeg v7 API/ABI (this makes ${CMAKE_PROJECT_NAME} backward-incompatible with libjpeg v6b)" FALSE)
boolean_number(WITH_JPEG7)
option(WITH_JPEG8 "Emulate libjpeg v8 API/ABI (this makes ${CMAKE_PROJECT_NAME} backward-incompatible with libjpeg v6b)" FALSE)

View File

@@ -1,3 +1,22 @@
2.1 pre-beta
============
### Significant changes relative to 2.0.1:
1. The build system, x86-64 SIMD extensions, and accelerated Huffman codec now
support the x32 ABI on Linux, which allows for using x86-64 instructions with
32-bit pointers. The x32 ABI is generally enabled by adding `-mx32` to the
compiler flags.
Caveats:
- CMake 3.9.0 or later is required in order for the build system to
automatically detect an x32 build.
- Java does not support the x32 ABI, and thus the TurboJPEG Java API will
automatically be disabled with x32 builds.
- SIMD acceleration for progressive Huffman encoding does not (currently)
work with the x32 ABI and will be disabled in x32 builds.
2.0.1
=====

View File

@@ -118,6 +118,7 @@
# absolute paths where necessary, using the same logic.
#=============================================================================
# Copyright 2018 Matthias Räncker
# Copyright 2016 D. R. Commander
# Copyright 2016 Dmitry Marakasov
# Copyright 2016 Roger Leigh
@@ -259,6 +260,8 @@ if(NOT DEFINED CMAKE_INSTALL_DEFAULT_LIBDIR)
else()
if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
set(CMAKE_INSTALL_DEFAULT_LIBDIR "lib64")
elseif(CMAKE_C_COMPILER_ABI MATCHES "ELF X32")
set(CMAKE_INSTALL_DEFAULT_LIBDIR "libx32")
endif()
endif()
endif()

View File

@@ -6,6 +6,7 @@
* libjpeg-turbo Modifications:
* Copyright (C) 2009-2011, 2014-2016, 2018, D. R. Commander.
* Copyright (C) 2015, Matthieu Darbois.
* Copyright (C) 2018, Matthias Räncker.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
@@ -65,8 +66,14 @@
* but must not be updated permanently until we complete the MCU.
*/
#if defined(__x86_64__) && defined(__ILP32__)
typedef unsigned long long bit_buf_type;
#else
typedef size_t bit_buf_type;
#endif
typedef struct {
size_t put_buffer; /* current bit-accumulation buffer */
bit_buf_type put_buffer; /* current bit-accumulation buffer */
int put_bits; /* # of bits now in it */
int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
} savable_state;
@@ -387,7 +394,7 @@ dump_buffer(working_state *state)
#error Cannot determine word size
#endif
#if SIZEOF_SIZE_T == 8 || defined(_WIN64)
#if SIZEOF_SIZE_T == 8 || defined(_WIN64) || (defined(__x86_64__) && defined(__ILP32__))
#define EMIT_BITS(code, size) { \
CHECKBUF47() \
@@ -463,7 +470,7 @@ LOCAL(boolean)
flush_bits(working_state *state)
{
JOCTET _buffer[BUFSIZE], *buffer;
size_t put_buffer; int put_bits;
bit_buf_type put_buffer; int put_bits;
size_t bytes, bytestocopy; int localbuf = 0;
put_buffer = state->cur.put_buffer;
@@ -509,7 +516,7 @@ encode_one_block(working_state *state, JCOEFPTR block, int last_dc_val,
int nbits;
int r, code, size;
JOCTET _buffer[BUFSIZE], *buffer;
size_t put_buffer; int put_bits;
bit_buf_type put_buffer; int put_bits;
int code_0xf0 = actbl->ehufco[0xf0], size_0xf0 = actbl->ehufsi[0xf0];
size_t bytes, bytestocopy; int localbuf = 0;

View File

@@ -5,6 +5,7 @@
* Copyright (C) 1991-1997, Thomas G. Lane.
* libjpeg-turbo Modifications:
* Copyright (C) 2009-2011, 2016, 2018, D. R. Commander.
* Copyright (C) 2018, Matthias Räncker.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
@@ -423,7 +424,7 @@ no_more_bytes:
} \
}
#if SIZEOF_SIZE_T == 8 || defined(_WIN64)
#if SIZEOF_SIZE_T == 8 || defined(_WIN64) || (defined(__x86_64__) && defined(__ILP32__))
/* Pre-fetch 48 bytes, because the holding register is 64-bit */
#define FILL_BIT_BUFFER_FAST \

View File

@@ -5,6 +5,7 @@
* Copyright (C) 1991-1997, Thomas G. Lane.
* libjpeg-turbo Modifications:
* Copyright (C) 2010-2011, 2015-2016, D. R. Commander.
* Copyright (C) 2018, Matthias Räncker.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
@@ -78,6 +79,11 @@ EXTERN(void) jpeg_make_d_derived_tbl(j_decompress_ptr cinfo, boolean isDC,
typedef size_t bit_buf_type; /* type of bit-extraction buffer */
#define BIT_BUF_SIZE 64 /* size of buffer in bits */
#elif defined(__x86_64__) && defined(__ILP32__)
typedef unsigned long long bit_buf_type; /* type of bit-extraction buffer */
#define BIT_BUF_SIZE 64 /* size of buffer in bits */
#else
typedef unsigned long bit_buf_type; /* type of bit-extraction buffer */

View File

@@ -30,6 +30,9 @@ if(CPU_TYPE STREQUAL "x86_64")
if(CYGWIN)
set(CMAKE_ASM_NASM_OBJECT_FORMAT win64)
endif()
if(CMAKE_C_COMPILER_ABI MATCHES "ELF X32")
set(CMAKE_ASM_NASM_OBJECT_FORMAT elfx32)
endif()
elseif(CPU_TYPE STREQUAL "i386")
if(BORLAND)
set(CMAKE_ASM_NASM_OBJECT_FORMAT obj)

View File

@@ -2,8 +2,9 @@
; jsimdext.inc - common declarations
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2010, 2016, D. R. Commander.
; Copyright (C) 2010, 2016, 2018, D. R. Commander.
; Copyright (C) 2018, Matthieu Darbois.
; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library - version 1.02
;
@@ -132,13 +133,49 @@ section .note.GNU-stack noalloc noexec nowrite progbits
; Common types
;
%ifdef __x86_64__
%ifnidn __OUTPUT_FORMAT__, elfx32
%define POINTER qword ; general pointer type
%define SIZEOF_POINTER SIZEOF_QWORD ; sizeof(POINTER)
%define POINTER_BIT QWORD_BIT ; sizeof(POINTER)*BYTE_BIT
%else
%define raxp rax
%define rbxp rbx
%define rcxp rcx
%define rdxp rdx
%define rsip rsi
%define rdip rdi
%define rbpp rbp
%define rspp rsp
%define r8p r8
%define r9p r9
%define r10p r10
%define r11p r11
%define r12p r12
%define r13p r13
%define r14p r14
%define r15p r15
%endif
%endif
%ifndef raxp
%define POINTER dword ; general pointer type
%define SIZEOF_POINTER SIZEOF_DWORD ; sizeof(POINTER)
%define POINTER_BIT DWORD_BIT ; sizeof(POINTER)*BYTE_BIT
; x86_64 ILP32 ABI (x32)
%define raxp eax
%define rbxp ebx
%define rcxp ecx
%define rdxp edx
%define rsip esi
%define rdip edi
%define rbpp ebp
%define rspp esp
%define r8p r8d
%define r9p r9d
%define r10p r10d
%define r11p r11d
%define r12p r12d
%define r13p r13d
%define r14p r14d
%define r15p r15d
%endif
%define INT dword ; signed integer type

View File

@@ -3,6 +3,7 @@
;
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -59,9 +60,9 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
mov rsi, r12
mov ecx, r13d
mov rdi, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY]
mov rbx, JSAMPARRAY [rsi+1*SIZEOF_JSAMPARRAY]
mov rdx, JSAMPARRAY [rsi+2*SIZEOF_JSAMPARRAY]
mov rdip, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY]
mov rbxp, JSAMPARRAY [rsi+1*SIZEOF_JSAMPARRAY]
mov rdxp, JSAMPARRAY [rsi+2*SIZEOF_JSAMPARRAY]
lea rdi, [rdi+rcx*SIZEOF_JSAMPROW]
lea rbx, [rbx+rcx*SIZEOF_JSAMPROW]
lea rdx, [rdx+rcx*SIZEOF_JSAMPROW]
@@ -79,10 +80,10 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
push rsi
push rcx ; col
mov rsi, JSAMPROW [rsi] ; inptr
mov rdi, JSAMPROW [rdi] ; outptr0
mov rbx, JSAMPROW [rbx] ; outptr1
mov rdx, JSAMPROW [rdx] ; outptr2
mov rsip, JSAMPROW [rsi] ; inptr
mov rdip, JSAMPROW [rdi] ; outptr0
mov rbxp, JSAMPROW [rbx] ; outptr1
mov rdxp, JSAMPROW [rdx] ; outptr2
cmp rcx, byte SIZEOF_YMMWORD
jae near .columnloop

View File

@@ -2,6 +2,7 @@
; jccolext.asm - colorspace conversion (64-bit SSE2)
;
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -58,9 +59,9 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
mov rsi, r12
mov ecx, r13d
mov rdi, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY]
mov rbx, JSAMPARRAY [rsi+1*SIZEOF_JSAMPARRAY]
mov rdx, JSAMPARRAY [rsi+2*SIZEOF_JSAMPARRAY]
mov rdip, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY]
mov rbxp, JSAMPARRAY [rsi+1*SIZEOF_JSAMPARRAY]
mov rdxp, JSAMPARRAY [rsi+2*SIZEOF_JSAMPARRAY]
lea rdi, [rdi+rcx*SIZEOF_JSAMPROW]
lea rbx, [rbx+rcx*SIZEOF_JSAMPROW]
lea rdx, [rdx+rcx*SIZEOF_JSAMPROW]
@@ -78,10 +79,10 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
push rsi
push rcx ; col
mov rsi, JSAMPROW [rsi] ; inptr
mov rdi, JSAMPROW [rdi] ; outptr0
mov rbx, JSAMPROW [rbx] ; outptr1
mov rdx, JSAMPROW [rdx] ; outptr2
mov rsip, JSAMPROW [rsi] ; inptr
mov rdip, JSAMPROW [rdi] ; outptr0
mov rbxp, JSAMPROW [rbx] ; outptr1
mov rdxp, JSAMPROW [rdx] ; outptr2
cmp rcx, byte SIZEOF_XMMWORD
jae near .columnloop

View File

@@ -3,6 +3,7 @@
;
; Copyright (C) 2011, 2016, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -59,7 +60,7 @@ EXTN(jsimd_rgb_gray_convert_avx2):
mov rsi, r12
mov ecx, r13d
mov rdi, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY]
mov rdip, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY]
lea rdi, [rdi+rcx*SIZEOF_JSAMPROW]
pop rcx
@@ -73,8 +74,8 @@ EXTN(jsimd_rgb_gray_convert_avx2):
push rsi
push rcx ; col
mov rsi, JSAMPROW [rsi] ; inptr
mov rdi, JSAMPROW [rdi] ; outptr0
mov rsip, JSAMPROW [rsi] ; inptr
mov rdip, JSAMPROW [rdi] ; outptr0
cmp rcx, byte SIZEOF_YMMWORD
jae near .columnloop

View File

@@ -2,6 +2,7 @@
; jcgryext.asm - grayscale colorspace conversion (64-bit SSE2)
;
; Copyright (C) 2011, 2016, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -58,7 +59,7 @@ EXTN(jsimd_rgb_gray_convert_sse2):
mov rsi, r12
mov ecx, r13d
mov rdi, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY]
mov rdip, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY]
lea rdi, [rdi+rcx*SIZEOF_JSAMPROW]
pop rcx
@@ -72,8 +73,8 @@ EXTN(jsimd_rgb_gray_convert_sse2):
push rsi
push rcx ; col
mov rsi, JSAMPROW [rsi] ; inptr
mov rdi, JSAMPROW [rdi] ; outptr0
mov rsip, JSAMPROW [rsi] ; inptr
mov rdip, JSAMPROW [rdi] ; outptr0
cmp rcx, byte SIZEOF_XMMWORD
jae near .columnloop

View File

@@ -3,6 +3,7 @@
;
; Copyright (C) 2009-2011, 2014-2016, D. R. Commander.
; Copyright (C) 2015, Matthieu Darbois.
; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -199,8 +200,8 @@ EXTN(jsimd_huff_encode_one_block_sse2):
mov buffer, r11 ; r11 is now sratch
mov put_buffer, MMWORD [r10+16] ; put_buffer = state->cur.put_buffer;
mov put_bits, DWORD [r10+24] ; put_bits = state->cur.put_bits;
mov put_buffer, MMWORD [r10+SIZEOF_POINTER*2] ; put_buffer = state->cur.put_buffer;
mov put_bits, DWORD [r10+SIZEOF_POINTER*2+8] ; put_bits = state->cur.put_bits;
push r10 ; r10 is now scratch
; Encode the DC coefficient difference per section F.1.2.1
@@ -332,8 +333,8 @@ EXTN(jsimd_huff_encode_one_block_sse2):
.EFN:
pop r10
; Save put_buffer & put_bits
mov MMWORD [r10+16], put_buffer ; state->cur.put_buffer = put_buffer;
mov DWORD [r10+24], put_bits ; state->cur.put_bits = put_bits;
mov MMWORD [r10+SIZEOF_POINTER*2], put_buffer ; state->cur.put_buffer = put_buffer;
mov DWORD [r10+SIZEOF_POINTER*2+8], put_bits ; state->cur.put_bits = put_bits;
pop rbx
uncollect_args 6

View File

@@ -4,6 +4,7 @@
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -73,7 +74,7 @@ EXTN(jsimd_h2v1_downsample_avx2):
push rax
push rcx
mov rdi, JSAMPROW [rsi]
mov rdip, JSAMPROW [rsi]
add rdi, rdx
mov al, JSAMPLE [rdi-1]
@@ -109,8 +110,8 @@ EXTN(jsimd_h2v1_downsample_avx2):
push rdi
push rsi
mov rsi, JSAMPROW [rsi] ; inptr
mov rdi, JSAMPROW [rdi] ; outptr
mov rsip, JSAMPROW [rsi] ; inptr
mov rdip, JSAMPROW [rdi] ; outptr
cmp rcx, byte SIZEOF_YMMWORD
jae short .columnloop
@@ -235,7 +236,7 @@ EXTN(jsimd_h2v2_downsample_avx2):
push rax
push rcx
mov rdi, JSAMPROW [rsi]
mov rdip, JSAMPROW [rsi]
add rdi, rdx
mov al, JSAMPLE [rdi-1]
@@ -271,9 +272,9 @@ EXTN(jsimd_h2v2_downsample_avx2):
push rdi
push rsi
mov rdx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; inptr0
mov rsi, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; inptr1
mov rdi, JSAMPROW [rdi] ; outptr
mov rdxp, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; inptr0
mov rsip, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; inptr1
mov rdip, JSAMPROW [rdi] ; outptr
cmp rcx, byte SIZEOF_YMMWORD
jae short .columnloop

View File

@@ -3,6 +3,7 @@
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -72,7 +73,7 @@ EXTN(jsimd_h2v1_downsample_sse2):
push rax
push rcx
mov rdi, JSAMPROW [rsi]
mov rdip, JSAMPROW [rsi]
add rdi, rdx
mov al, JSAMPLE [rdi-1]
@@ -107,8 +108,8 @@ EXTN(jsimd_h2v1_downsample_sse2):
push rdi
push rsi
mov rsi, JSAMPROW [rsi] ; inptr
mov rdi, JSAMPROW [rdi] ; outptr
mov rsip, JSAMPROW [rsi] ; inptr
mov rdip, JSAMPROW [rdi] ; outptr
cmp rcx, byte SIZEOF_XMMWORD
jae short .columnloop
@@ -217,7 +218,7 @@ EXTN(jsimd_h2v2_downsample_sse2):
push rax
push rcx
mov rdi, JSAMPROW [rsi]
mov rdip, JSAMPROW [rsi]
add rdi, rdx
mov al, JSAMPLE [rdi-1]
@@ -252,9 +253,9 @@ EXTN(jsimd_h2v2_downsample_sse2):
push rdi
push rsi
mov rdx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; inptr0
mov rsi, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; inptr1
mov rdi, JSAMPROW [rdi] ; outptr
mov rdxp, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; inptr0
mov rsip, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; inptr1
mov rdip, JSAMPROW [rdi] ; outptr
cmp rcx, byte SIZEOF_XMMWORD
jae short .columnloop

View File

@@ -4,6 +4,7 @@
; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2012, 2016, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -60,9 +61,9 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
mov rdi, r11
mov ecx, r12d
mov rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
mov rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
mov rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
mov rsip, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
mov rbxp, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
mov rdxp, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
lea rsi, [rsi+rcx*SIZEOF_JSAMPROW]
lea rbx, [rbx+rcx*SIZEOF_JSAMPROW]
lea rdx, [rdx+rcx*SIZEOF_JSAMPROW]
@@ -81,10 +82,10 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
push rsi
push rcx ; col
mov rsi, JSAMPROW [rsi] ; inptr0
mov rbx, JSAMPROW [rbx] ; inptr1
mov rdx, JSAMPROW [rdx] ; inptr2
mov rdi, JSAMPROW [rdi] ; outptr
mov rsip, JSAMPROW [rsi] ; inptr0
mov rbxp, JSAMPROW [rbx] ; inptr1
mov rdxp, JSAMPROW [rdx] ; inptr2
mov rdip, JSAMPROW [rdi] ; outptr
.columnloop:
vmovdqu ymm5, YMMWORD [rbx] ; ymm5=Cb(0123456789ABCDEFGHIJKLMNOPQRSTUV)

View File

@@ -3,6 +3,7 @@
;
; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2012, 2016, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -59,9 +60,9 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
mov rdi, r11
mov ecx, r12d
mov rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
mov rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
mov rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
mov rsip, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
mov rbxp, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
mov rdxp, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
lea rsi, [rsi+rcx*SIZEOF_JSAMPROW]
lea rbx, [rbx+rcx*SIZEOF_JSAMPROW]
lea rdx, [rdx+rcx*SIZEOF_JSAMPROW]
@@ -80,10 +81,10 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
push rsi
push rcx ; col
mov rsi, JSAMPROW [rsi] ; inptr0
mov rbx, JSAMPROW [rbx] ; inptr1
mov rdx, JSAMPROW [rdx] ; inptr2
mov rdi, JSAMPROW [rdi] ; outptr
mov rsip, JSAMPROW [rsi] ; inptr0
mov rbxp, JSAMPROW [rbx] ; inptr1
mov rdxp, JSAMPROW [rdx] ; inptr2
mov rdip, JSAMPROW [rdi] ; outptr
.columnloop:
movdqa xmm5, XMMWORD [rbx] ; xmm5=Cb(0123456789ABCDEF)

View File

@@ -4,6 +4,7 @@
; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2012, 2016, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -60,14 +61,14 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
mov rdi, r11
mov ecx, r12d
mov rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
mov rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
mov rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
mov rsip, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
mov rbxp, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
mov rdxp, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
mov rdi, r13
mov rsi, JSAMPROW [rsi+rcx*SIZEOF_JSAMPROW] ; inptr0
mov rbx, JSAMPROW [rbx+rcx*SIZEOF_JSAMPROW] ; inptr1
mov rdx, JSAMPROW [rdx+rcx*SIZEOF_JSAMPROW] ; inptr2
mov rdi, JSAMPROW [rdi] ; outptr
mov rsip, JSAMPROW [rsi+rcx*SIZEOF_JSAMPROW] ; inptr0
mov rbxp, JSAMPROW [rbx+rcx*SIZEOF_JSAMPROW] ; inptr1
mov rdxp, JSAMPROW [rdx+rcx*SIZEOF_JSAMPROW] ; inptr2
mov rdip, JSAMPROW [rdi] ; outptr
pop rcx ; col
@@ -516,15 +517,16 @@ EXTN(jsimd_h2v2_merged_upsample_avx2):
mov rdi, r11
mov ecx, r12d
mov rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
mov rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
mov rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
mov rsip, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
mov rbxp, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
mov rdxp, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
mov rdi, r13
lea rsi, [rsi+rcx*SIZEOF_JSAMPROW]
push rdx ; inptr2
push rbx ; inptr1
push rsi ; inptr00
sub rsp, SIZEOF_JSAMPARRAY*4
mov JSAMPARRAY [rsp+0*SIZEOF_JSAMPARRAY], rsip ; intpr00
mov JSAMPARRAY [rsp+1*SIZEOF_JSAMPARRAY], rbxp ; intpr1
mov JSAMPARRAY [rsp+2*SIZEOF_JSAMPARRAY], rdxp ; intpr2
mov rbx, rsp
push rdi
@@ -548,16 +550,16 @@ EXTN(jsimd_h2v2_merged_upsample_avx2):
pop rax
pop rcx
pop rdi
pop rsi
pop rbx
pop rdx
mov rsip, JSAMPARRAY [rsp+0*SIZEOF_JSAMPARRAY]
mov rbxp, JSAMPARRAY [rsp+1*SIZEOF_JSAMPARRAY]
mov rdxp, JSAMPARRAY [rsp+2*SIZEOF_JSAMPARRAY]
add rdi, byte SIZEOF_JSAMPROW ; outptr1
add rsi, byte SIZEOF_JSAMPROW ; inptr01
push rdx ; inptr2
push rbx ; inptr1
push rsi ; inptr00
mov JSAMPARRAY [rsp+0*SIZEOF_JSAMPARRAY], rsip ; intpr00
mov JSAMPARRAY [rsp+1*SIZEOF_JSAMPARRAY], rbxp ; intpr1
mov JSAMPARRAY [rsp+2*SIZEOF_JSAMPARRAY], rdxp ; intpr2
mov rbx, rsp
push rdi
@@ -581,9 +583,10 @@ EXTN(jsimd_h2v2_merged_upsample_avx2):
pop rax
pop rcx
pop rdi
pop rsi
pop rbx
pop rdx
mov rsip, JSAMPARRAY [rsp+0*SIZEOF_JSAMPARRAY]
mov rbxp, JSAMPARRAY [rsp+1*SIZEOF_JSAMPARRAY]
mov rdxp, JSAMPARRAY [rsp+2*SIZEOF_JSAMPARRAY]
add rsp, SIZEOF_JSAMPARRAY*4
pop rbx
uncollect_args 4

View File

@@ -3,6 +3,7 @@
;
; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2012, 2016, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -59,14 +60,14 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
mov rdi, r11
mov ecx, r12d
mov rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
mov rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
mov rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
mov rsip, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
mov rbxp, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
mov rdxp, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
mov rdi, r13
mov rsi, JSAMPROW [rsi+rcx*SIZEOF_JSAMPROW] ; inptr0
mov rbx, JSAMPROW [rbx+rcx*SIZEOF_JSAMPROW] ; inptr1
mov rdx, JSAMPROW [rdx+rcx*SIZEOF_JSAMPROW] ; inptr2
mov rdi, JSAMPROW [rdi] ; outptr
mov rsip, JSAMPROW [rsi+rcx*SIZEOF_JSAMPROW] ; inptr0
mov rbxp, JSAMPROW [rbx+rcx*SIZEOF_JSAMPROW] ; inptr1
mov rdxp, JSAMPROW [rdx+rcx*SIZEOF_JSAMPROW] ; inptr2
mov rdip, JSAMPROW [rdi] ; outptr
pop rcx ; col
@@ -458,15 +459,16 @@ EXTN(jsimd_h2v2_merged_upsample_sse2):
mov rdi, r11
mov ecx, r12d
mov rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
mov rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
mov rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
mov rsip, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
mov rbxp, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
mov rdxp, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
mov rdi, r13
lea rsi, [rsi+rcx*SIZEOF_JSAMPROW]
push rdx ; inptr2
push rbx ; inptr1
push rsi ; inptr00
sub rsp, SIZEOF_JSAMPARRAY*4
mov JSAMPARRAY [rsp+0*SIZEOF_JSAMPARRAY], rsip ; intpr00
mov JSAMPARRAY [rsp+1*SIZEOF_JSAMPARRAY], rbxp ; intpr1
mov JSAMPARRAY [rsp+2*SIZEOF_JSAMPARRAY], rdxp ; intpr2
mov rbx, rsp
push rdi
@@ -490,16 +492,16 @@ EXTN(jsimd_h2v2_merged_upsample_sse2):
pop rax
pop rcx
pop rdi
pop rsi
pop rbx
pop rdx
mov rsip, JSAMPARRAY [rsp+0*SIZEOF_JSAMPARRAY]
mov rbxp, JSAMPARRAY [rsp+1*SIZEOF_JSAMPARRAY]
mov rdxp, JSAMPARRAY [rsp+2*SIZEOF_JSAMPARRAY]
add rdi, byte SIZEOF_JSAMPROW ; outptr1
add rsi, byte SIZEOF_JSAMPROW ; inptr01
push rdx ; inptr2
push rbx ; inptr1
push rsi ; inptr00
mov JSAMPARRAY [rsp+0*SIZEOF_JSAMPARRAY], rsip ; intpr00
mov JSAMPARRAY [rsp+1*SIZEOF_JSAMPARRAY], rbxp ; intpr1
mov JSAMPARRAY [rsp+2*SIZEOF_JSAMPARRAY], rdxp ; intpr2
mov rbx, rsp
push rdi
@@ -523,9 +525,10 @@ EXTN(jsimd_h2v2_merged_upsample_sse2):
pop rax
pop rcx
pop rdi
pop rsi
pop rbx
pop rdx
mov rsip, JSAMPARRAY [rsp+0*SIZEOF_JSAMPARRAY]
mov rbxp, JSAMPARRAY [rsp+1*SIZEOF_JSAMPARRAY]
mov rdxp, JSAMPARRAY [rsp+2*SIZEOF_JSAMPARRAY]
add rsp, SIZEOF_JSAMPARRAY*4
pop rbx
uncollect_args 4

View File

@@ -4,6 +4,7 @@
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -78,7 +79,7 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
mov rsi, r12 ; input_data
mov rdi, r13
mov rdi, JSAMPARRAY [rdi] ; output_data
mov rdip, JSAMPARRAY [rdi] ; output_data
vpxor ymm0, ymm0, ymm0 ; ymm0=(all 0's)
vpcmpeqb xmm9, xmm9, xmm9
@@ -92,8 +93,8 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
push rdi
push rsi
mov rsi, JSAMPROW [rsi] ; inptr
mov rdi, JSAMPROW [rdi] ; outptr
mov rsip, JSAMPROW [rsi] ; inptr
mov rdip, JSAMPROW [rdi] ; outptr
test rax, SIZEOF_YMMWORD-1
jz short .skip
@@ -237,18 +238,18 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
mov rsi, r12 ; input_data
mov rdi, r13
mov rdi, JSAMPARRAY [rdi] ; output_data
mov rdip, JSAMPARRAY [rdi] ; output_data
.rowloop:
push rax ; colctr
push rcx
push rdi
push rsi
mov rcx, JSAMPROW [rsi-1*SIZEOF_JSAMPROW] ; inptr1(above)
mov rbx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; inptr0
mov rsi, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; inptr1(below)
mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] ; outptr0
mov rdi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] ; outptr1
mov rcxp, JSAMPROW [rsi-1*SIZEOF_JSAMPROW] ; inptr1(above)
mov rbxp, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; inptr0
mov rsip, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; inptr1(below)
mov rdxp, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] ; outptr0
mov rdip, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] ; outptr1
vpxor ymm8, ymm8, ymm8 ; ymm8=(all 0's)
vpcmpeqb xmm9, xmm9, xmm9
@@ -541,13 +542,13 @@ EXTN(jsimd_h2v1_upsample_avx2):
mov rsi, r12 ; input_data
mov rdi, r13
mov rdi, JSAMPARRAY [rdi] ; output_data
mov rdip, JSAMPARRAY [rdi] ; output_data
.rowloop:
push rdi
push rsi
mov rsi, JSAMPROW [rsi] ; inptr
mov rdi, JSAMPROW [rdi] ; outptr
mov rsip, JSAMPROW [rsi] ; inptr
mov rdip, JSAMPROW [rdi] ; outptr
mov rax, rdx ; colctr
.columnloop:
@@ -631,14 +632,14 @@ EXTN(jsimd_h2v2_upsample_avx2):
mov rsi, r12 ; input_data
mov rdi, r13
mov rdi, JSAMPARRAY [rdi] ; output_data
mov rdip, JSAMPARRAY [rdi] ; output_data
.rowloop:
push rdi
push rsi
mov rsi, JSAMPROW [rsi] ; inptr
mov rbx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] ; outptr0
mov rdi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] ; outptr1
mov rsip, JSAMPROW [rsi] ; inptr
mov rbxp, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] ; outptr0
mov rdip, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] ; outptr1
mov rax, rdx ; colctr
.columnloop:

View File

@@ -3,6 +3,7 @@
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -76,14 +77,14 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2):
mov rsi, r12 ; input_data
mov rdi, r13
mov rdi, JSAMPARRAY [rdi] ; output_data
mov rdip, JSAMPARRAY [rdi] ; output_data
.rowloop:
push rax ; colctr
push rdi
push rsi
mov rsi, JSAMPROW [rsi] ; inptr
mov rdi, JSAMPROW [rdi] ; outptr
mov rsip, JSAMPROW [rsi] ; inptr
mov rdip, JSAMPROW [rdi] ; outptr
test rax, SIZEOF_XMMWORD-1
jz short .skip
@@ -223,18 +224,18 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
mov rsi, r12 ; input_data
mov rdi, r13
mov rdi, JSAMPARRAY [rdi] ; output_data
mov rdip, JSAMPARRAY [rdi] ; output_data
.rowloop:
push rax ; colctr
push rcx
push rdi
push rsi
mov rcx, JSAMPROW [rsi-1*SIZEOF_JSAMPROW] ; inptr1(above)
mov rbx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; inptr0
mov rsi, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; inptr1(below)
mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] ; outptr0
mov rdi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] ; outptr1
mov rcxp, JSAMPROW [rsi-1*SIZEOF_JSAMPROW] ; inptr1(above)
mov rbxp, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; inptr0
mov rsip, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; inptr1(below)
mov rdxp, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] ; outptr0
mov rdip, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] ; outptr1
test rax, SIZEOF_XMMWORD-1
jz short .skip
@@ -514,13 +515,13 @@ EXTN(jsimd_h2v1_upsample_sse2):
mov rsi, r12 ; input_data
mov rdi, r13
mov rdi, JSAMPARRAY [rdi] ; output_data
mov rdip, JSAMPARRAY [rdi] ; output_data
.rowloop:
push rdi
push rsi
mov rsi, JSAMPROW [rsi] ; inptr
mov rdi, JSAMPROW [rdi] ; outptr
mov rsip, JSAMPROW [rsi] ; inptr
mov rdip, JSAMPROW [rdi] ; outptr
mov rax, rdx ; colctr
.columnloop:
@@ -602,14 +603,14 @@ EXTN(jsimd_h2v2_upsample_sse2):
mov rsi, r12 ; input_data
mov rdi, r13
mov rdi, JSAMPARRAY [rdi] ; output_data
mov rdip, JSAMPARRAY [rdi] ; output_data
.rowloop:
push rdi
push rsi
mov rsi, JSAMPROW [rsi] ; inptr
mov rbx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] ; outptr0
mov rdi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] ; outptr1
mov rsip, JSAMPROW [rsi] ; inptr
mov rbxp, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] ; outptr0
mov rdip, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] ; outptr1
mov rax, rdx ; colctr
.columnloop:

View File

@@ -3,6 +3,7 @@
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -457,12 +458,12 @@ EXTN(jsimd_idct_float_sse2):
pshufd xmm5, xmm6, 0x4E ; xmm5=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07)
pshufd xmm3, xmm7, 0x4E ; xmm3=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27)
mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
mov rbx, JSAMPROW [rdi+2*SIZEOF_JSAMPROW]
mov rdxp, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
mov rbxp, JSAMPROW [rdi+2*SIZEOF_JSAMPROW]
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6
movq XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE], xmm7
mov rdx, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
mov rbx, JSAMPROW [rdi+3*SIZEOF_JSAMPROW]
mov rdxp, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
mov rbxp, JSAMPROW [rdi+3*SIZEOF_JSAMPROW]
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm5
movq XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE], xmm3

View File

@@ -3,6 +3,7 @@
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -462,21 +463,21 @@ EXTN(jsimd_idct_ifast_sse2):
pshufd xmm6, xmm4, 0x4E ; xmm6=(50 51 52 53 54 55 56 57 40 41 42 43 44 45 46 47)
pshufd xmm2, xmm7, 0x4E ; xmm2=(70 71 72 73 74 75 76 77 60 61 62 63 64 65 66 67)
mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
mov rsi, JSAMPROW [rdi+2*SIZEOF_JSAMPROW]
mov rdxp, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
mov rsip, JSAMPROW [rdi+2*SIZEOF_JSAMPROW]
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm1
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm3
mov rdx, JSAMPROW [rdi+4*SIZEOF_JSAMPROW]
mov rsi, JSAMPROW [rdi+6*SIZEOF_JSAMPROW]
mov rdxp, JSAMPROW [rdi+4*SIZEOF_JSAMPROW]
mov rsip, JSAMPROW [rdi+6*SIZEOF_JSAMPROW]
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm4
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm7
mov rdx, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
mov rsi, JSAMPROW [rdi+3*SIZEOF_JSAMPROW]
mov rdxp, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
mov rsip, JSAMPROW [rdi+3*SIZEOF_JSAMPROW]
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm5
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm0
mov rdx, JSAMPROW [rdi+5*SIZEOF_JSAMPROW]
mov rsi, JSAMPROW [rdi+7*SIZEOF_JSAMPROW]
mov rdxp, JSAMPROW [rdi+5*SIZEOF_JSAMPROW]
mov rsip, JSAMPROW [rdi+7*SIZEOF_JSAMPROW]
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm2

View File

@@ -3,6 +3,7 @@
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, 2018, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -389,23 +390,23 @@ EXTN(jsimd_idct_islow_avx2):
mov eax, r13d
mov rdx, JSAMPROW [r12+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov rsi, JSAMPROW [r12+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov rdxp, JSAMPROW [r12+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov rsip, JSAMPROW [r12+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm0
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm1
mov rdx, JSAMPROW [r12+2*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov rsi, JSAMPROW [r12+3*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov rdxp, JSAMPROW [r12+2*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov rsip, JSAMPROW [r12+3*SIZEOF_JSAMPROW] ; (JSAMPLE *)
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm2
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm3
mov rdx, JSAMPROW [r12+4*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov rsi, JSAMPROW [r12+5*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov rdxp, JSAMPROW [r12+4*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov rsip, JSAMPROW [r12+5*SIZEOF_JSAMPROW] ; (JSAMPLE *)
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm4
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm5
mov rdx, JSAMPROW [r12+6*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov rsi, JSAMPROW [r12+7*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov rdxp, JSAMPROW [r12+6*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov rsip, JSAMPROW [r12+7*SIZEOF_JSAMPROW] ; (JSAMPLE *)
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm7

View File

@@ -3,6 +3,7 @@
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -819,21 +820,21 @@ EXTN(jsimd_idct_islow_sse2):
pshufd xmm2, xmm4, 0x4E ; xmm2=(50 51 52 53 54 55 56 57 40 41 42 43 44 45 46 47)
pshufd xmm5, xmm3, 0x4E ; xmm5=(70 71 72 73 74 75 76 77 60 61 62 63 64 65 66 67)
mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
mov rsi, JSAMPROW [rdi+2*SIZEOF_JSAMPROW]
mov rdxp, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
mov rsip, JSAMPROW [rdi+2*SIZEOF_JSAMPROW]
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm7
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm1
mov rdx, JSAMPROW [rdi+4*SIZEOF_JSAMPROW]
mov rsi, JSAMPROW [rdi+6*SIZEOF_JSAMPROW]
mov rdxp, JSAMPROW [rdi+4*SIZEOF_JSAMPROW]
mov rsip, JSAMPROW [rdi+6*SIZEOF_JSAMPROW]
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm4
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm3
mov rdx, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
mov rsi, JSAMPROW [rdi+3*SIZEOF_JSAMPROW]
mov rdxp, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
mov rsip, JSAMPROW [rdi+3*SIZEOF_JSAMPROW]
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm0
mov rdx, JSAMPROW [rdi+5*SIZEOF_JSAMPROW]
mov rsi, JSAMPROW [rdi+7*SIZEOF_JSAMPROW]
mov rdxp, JSAMPROW [rdi+5*SIZEOF_JSAMPROW]
mov rsip, JSAMPROW [rdi+7*SIZEOF_JSAMPROW]
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm2
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm5

View File

@@ -3,6 +3,7 @@
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -381,12 +382,12 @@ EXTN(jsimd_idct_4x4_sse2):
pshufd xmm1, xmm4, 0x4E ; xmm1=(20 21 22 23 30 31 32 33 00 ..)
pshufd xmm3, xmm4, 0x93 ; xmm3=(30 31 32 33 00 01 02 03 10 ..)
mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
mov rsi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
mov rdxp, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
mov rsip, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
movd XMM_DWORD [rdx+rax*SIZEOF_JSAMPLE], xmm4
movd XMM_DWORD [rsi+rax*SIZEOF_JSAMPLE], xmm2
mov rdx, JSAMPROW [rdi+2*SIZEOF_JSAMPROW]
mov rsi, JSAMPROW [rdi+3*SIZEOF_JSAMPROW]
mov rdxp, JSAMPROW [rdi+2*SIZEOF_JSAMPROW]
mov rsip, JSAMPROW [rdi+3*SIZEOF_JSAMPROW]
movd XMM_DWORD [rdx+rax*SIZEOF_JSAMPLE], xmm1
movd XMM_DWORD [rsi+rax*SIZEOF_JSAMPLE], xmm3
@@ -560,8 +561,8 @@ EXTN(jsimd_idct_2x2_sse2):
pextrw ebx, xmm6, 0x00 ; ebx=(C0 D0 -- --)
pextrw ecx, xmm6, 0x01 ; ecx=(C1 D1 -- --)
mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
mov rsi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
mov rdxp, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
mov rsip, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
mov WORD [rdx+rax*SIZEOF_JSAMPLE], bx
mov WORD [rsi+rax*SIZEOF_JSAMPLE], cx

View File

@@ -3,6 +3,7 @@
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -53,8 +54,8 @@ EXTN(jsimd_convsamp_float_sse2):
mov rdi, r12
mov rcx, DCTSIZE/2
.convloop:
mov rbx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov rdx, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov rbxp, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov rdxp, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
movq xmm0, XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE]
movq xmm1, XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE]

View File

@@ -4,6 +4,7 @@
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, 2018, D. R. Commander.
; Copyright (C) 2016, Matthieu Darbois.
; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -46,23 +47,23 @@ EXTN(jsimd_convsamp_avx2):
mov eax, r11d
mov rsi, JSAMPROW [r10+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov rdi, JSAMPROW [r10+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov rsip, JSAMPROW [r10+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov rdip, JSAMPROW [r10+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
movq xmm0, XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE]
pinsrq xmm0, XMM_MMWORD [rdi+rax*SIZEOF_JSAMPLE], 1
mov rsi, JSAMPROW [r10+2*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov rdi, JSAMPROW [r10+3*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov rsip, JSAMPROW [r10+2*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov rdip, JSAMPROW [r10+3*SIZEOF_JSAMPROW] ; (JSAMPLE *)
movq xmm1, XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE]
pinsrq xmm1, XMM_MMWORD [rdi+rax*SIZEOF_JSAMPLE], 1
mov rsi, JSAMPROW [r10+4*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov rdi, JSAMPROW [r10+5*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov rsip, JSAMPROW [r10+4*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov rdip, JSAMPROW [r10+5*SIZEOF_JSAMPROW] ; (JSAMPLE *)
movq xmm2, XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE]
pinsrq xmm2, XMM_MMWORD [rdi+rax*SIZEOF_JSAMPLE], 1
mov rsi, JSAMPROW [r10+6*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov rdi, JSAMPROW [r10+7*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov rsip, JSAMPROW [r10+6*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov rdip, JSAMPROW [r10+7*SIZEOF_JSAMPROW] ; (JSAMPLE *)
movq xmm3, XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE]
pinsrq xmm3, XMM_MMWORD [rdi+rax*SIZEOF_JSAMPLE], 1

View File

@@ -3,6 +3,7 @@
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -53,14 +54,14 @@ EXTN(jsimd_convsamp_sse2):
mov rdi, r12
mov rcx, DCTSIZE/4
.convloop:
mov rbx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov rdx, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov rbxp, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov rdxp, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
movq xmm0, XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE] ; xmm0=(01234567)
movq xmm1, XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE] ; xmm1=(89ABCDEF)
mov rbx, JSAMPROW [rsi+2*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov rdx, JSAMPROW [rsi+3*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov rbxp, JSAMPROW [rsi+2*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov rdxp, JSAMPROW [rsi+3*SIZEOF_JSAMPROW] ; (JSAMPLE *)
movq xmm2, XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE] ; xmm2=(GHIJKLMN)
movq xmm3, XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE] ; xmm3=(OPQRSTUV)