mirror of
https://github.com/libjpeg-turbo/libjpeg-turbo.git
synced 2026-01-18 21:41:20 +01:00
Add x32 ABI support on Linux
The x32 ABI is similar to the x86-64 ABI but uses 32-bit pointers. (Refer to https://sites.google.com/site/x32abi) Based on:8da8fc52131e33dfea8024ffea78dadedcf76753d04228a7b5b4ad38316aCloses #274
This commit is contained in:
@@ -46,7 +46,7 @@ if(CMAKE_SYSTEM_PROCESSOR_LC MATCHES "x86_64" OR
|
||||
CMAKE_SYSTEM_PROCESSOR_LC MATCHES "i[0-9]86" OR
|
||||
CMAKE_SYSTEM_PROCESSOR_LC MATCHES "x86" OR
|
||||
CMAKE_SYSTEM_PROCESSOR_LC MATCHES "ia32")
|
||||
if(BITS EQUAL 64)
|
||||
if(BITS EQUAL 64 OR CMAKE_C_COMPILER_ABI MATCHES "ELF X32")
|
||||
set(CPU_TYPE x86_64)
|
||||
else()
|
||||
set(CPU_TYPE i386)
|
||||
@@ -101,6 +101,8 @@ if(CMAKE_INSTALL_PREFIX STREQUAL "${CMAKE_INSTALL_DEFAULT_PREFIX}")
|
||||
if(UNIX AND NOT APPLE)
|
||||
if(BITS EQUAL 64)
|
||||
set(CMAKE_INSTALL_DEFAULT_LIBDIR "lib64")
|
||||
elseif(CMAKE_C_COMPILER_ABI MATCHES "ELF X32")
|
||||
set(CMAKE_INSTALL_DEFAULT_LIBDIR "libx32")
|
||||
else()
|
||||
set(CMAKE_INSTALL_DEFAULT_LIBDIR "lib32")
|
||||
endif()
|
||||
@@ -153,8 +155,12 @@ option(WITH_ARITH_DEC "Include arithmetic decoding support when emulating the li
|
||||
boolean_number(WITH_ARITH_DEC)
|
||||
option(WITH_ARITH_ENC "Include arithmetic encoding support when emulating the libjpeg v6b API/ABI" TRUE)
|
||||
boolean_number(WITH_ARITH_ENC)
|
||||
option(WITH_JAVA "Build Java wrapper for the TurboJPEG API library (implies ENABLE_SHARED=1)" FALSE)
|
||||
boolean_number(WITH_JAVA)
|
||||
if(CMAKE_C_COMPILER_ABI MATCHES "ELF X32")
|
||||
set(WITH_JAVA 0)
|
||||
else()
|
||||
option(WITH_JAVA "Build Java wrapper for the TurboJPEG API library (implies ENABLE_SHARED=1)" FALSE)
|
||||
boolean_number(WITH_JAVA)
|
||||
endif()
|
||||
option(WITH_JPEG7 "Emulate libjpeg v7 API/ABI (this makes ${CMAKE_PROJECT_NAME} backward-incompatible with libjpeg v6b)" FALSE)
|
||||
boolean_number(WITH_JPEG7)
|
||||
option(WITH_JPEG8 "Emulate libjpeg v8 API/ABI (this makes ${CMAKE_PROJECT_NAME} backward-incompatible with libjpeg v6b)" FALSE)
|
||||
|
||||
19
ChangeLog.md
19
ChangeLog.md
@@ -1,3 +1,22 @@
|
||||
2.1 pre-beta
|
||||
============
|
||||
|
||||
### Significant changes relative to 2.0.1:
|
||||
|
||||
1. The build system, x86-64 SIMD extensions, and accelerated Huffman codec now
|
||||
support the x32 ABI on Linux, which allows for using x86-64 instructions with
|
||||
32-bit pointers. The x32 ABI is generally enabled by adding `-mx32` to the
|
||||
compiler flags.
|
||||
|
||||
Caveats:
|
||||
- CMake 3.9.0 or later is required in order for the build system to
|
||||
automatically detect an x32 build.
|
||||
- Java does not support the x32 ABI, and thus the TurboJPEG Java API will
|
||||
automatically be disabled with x32 builds.
|
||||
- SIMD acceleration for progressive Huffman encoding does not (currently)
|
||||
work with the x32 ABI and will be disabled in x32 builds.
|
||||
|
||||
|
||||
2.0.1
|
||||
=====
|
||||
|
||||
|
||||
@@ -118,6 +118,7 @@
|
||||
# absolute paths where necessary, using the same logic.
|
||||
|
||||
#=============================================================================
|
||||
# Copyright 2018 Matthias Räncker
|
||||
# Copyright 2016 D. R. Commander
|
||||
# Copyright 2016 Dmitry Marakasov
|
||||
# Copyright 2016 Roger Leigh
|
||||
@@ -259,6 +260,8 @@ if(NOT DEFINED CMAKE_INSTALL_DEFAULT_LIBDIR)
|
||||
else()
|
||||
if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
|
||||
set(CMAKE_INSTALL_DEFAULT_LIBDIR "lib64")
|
||||
elseif(CMAKE_C_COMPILER_ABI MATCHES "ELF X32")
|
||||
set(CMAKE_INSTALL_DEFAULT_LIBDIR "libx32")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
15
jchuff.c
15
jchuff.c
@@ -6,6 +6,7 @@
|
||||
* libjpeg-turbo Modifications:
|
||||
* Copyright (C) 2009-2011, 2014-2016, 2018, D. R. Commander.
|
||||
* Copyright (C) 2015, Matthieu Darbois.
|
||||
* Copyright (C) 2018, Matthias Räncker.
|
||||
* For conditions of distribution and use, see the accompanying README.ijg
|
||||
* file.
|
||||
*
|
||||
@@ -65,8 +66,14 @@
|
||||
* but must not be updated permanently until we complete the MCU.
|
||||
*/
|
||||
|
||||
#if defined(__x86_64__) && defined(__ILP32__)
|
||||
typedef unsigned long long bit_buf_type;
|
||||
#else
|
||||
typedef size_t bit_buf_type;
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
size_t put_buffer; /* current bit-accumulation buffer */
|
||||
bit_buf_type put_buffer; /* current bit-accumulation buffer */
|
||||
int put_bits; /* # of bits now in it */
|
||||
int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
|
||||
} savable_state;
|
||||
@@ -387,7 +394,7 @@ dump_buffer(working_state *state)
|
||||
#error Cannot determine word size
|
||||
#endif
|
||||
|
||||
#if SIZEOF_SIZE_T == 8 || defined(_WIN64)
|
||||
#if SIZEOF_SIZE_T == 8 || defined(_WIN64) || (defined(__x86_64__) && defined(__ILP32__))
|
||||
|
||||
#define EMIT_BITS(code, size) { \
|
||||
CHECKBUF47() \
|
||||
@@ -463,7 +470,7 @@ LOCAL(boolean)
|
||||
flush_bits(working_state *state)
|
||||
{
|
||||
JOCTET _buffer[BUFSIZE], *buffer;
|
||||
size_t put_buffer; int put_bits;
|
||||
bit_buf_type put_buffer; int put_bits;
|
||||
size_t bytes, bytestocopy; int localbuf = 0;
|
||||
|
||||
put_buffer = state->cur.put_buffer;
|
||||
@@ -509,7 +516,7 @@ encode_one_block(working_state *state, JCOEFPTR block, int last_dc_val,
|
||||
int nbits;
|
||||
int r, code, size;
|
||||
JOCTET _buffer[BUFSIZE], *buffer;
|
||||
size_t put_buffer; int put_bits;
|
||||
bit_buf_type put_buffer; int put_bits;
|
||||
int code_0xf0 = actbl->ehufco[0xf0], size_0xf0 = actbl->ehufsi[0xf0];
|
||||
size_t bytes, bytestocopy; int localbuf = 0;
|
||||
|
||||
|
||||
3
jdhuff.c
3
jdhuff.c
@@ -5,6 +5,7 @@
|
||||
* Copyright (C) 1991-1997, Thomas G. Lane.
|
||||
* libjpeg-turbo Modifications:
|
||||
* Copyright (C) 2009-2011, 2016, 2018, D. R. Commander.
|
||||
* Copyright (C) 2018, Matthias Räncker.
|
||||
* For conditions of distribution and use, see the accompanying README.ijg
|
||||
* file.
|
||||
*
|
||||
@@ -423,7 +424,7 @@ no_more_bytes:
|
||||
} \
|
||||
}
|
||||
|
||||
#if SIZEOF_SIZE_T == 8 || defined(_WIN64)
|
||||
#if SIZEOF_SIZE_T == 8 || defined(_WIN64) || (defined(__x86_64__) && defined(__ILP32__))
|
||||
|
||||
/* Pre-fetch 48 bytes, because the holding register is 64-bit */
|
||||
#define FILL_BIT_BUFFER_FAST \
|
||||
|
||||
6
jdhuff.h
6
jdhuff.h
@@ -5,6 +5,7 @@
|
||||
* Copyright (C) 1991-1997, Thomas G. Lane.
|
||||
* libjpeg-turbo Modifications:
|
||||
* Copyright (C) 2010-2011, 2015-2016, D. R. Commander.
|
||||
* Copyright (C) 2018, Matthias Räncker.
|
||||
* For conditions of distribution and use, see the accompanying README.ijg
|
||||
* file.
|
||||
*
|
||||
@@ -78,6 +79,11 @@ EXTERN(void) jpeg_make_d_derived_tbl(j_decompress_ptr cinfo, boolean isDC,
|
||||
typedef size_t bit_buf_type; /* type of bit-extraction buffer */
|
||||
#define BIT_BUF_SIZE 64 /* size of buffer in bits */
|
||||
|
||||
#elif defined(__x86_64__) && defined(__ILP32__)
|
||||
|
||||
typedef unsigned long long bit_buf_type; /* type of bit-extraction buffer */
|
||||
#define BIT_BUF_SIZE 64 /* size of buffer in bits */
|
||||
|
||||
#else
|
||||
|
||||
typedef unsigned long bit_buf_type; /* type of bit-extraction buffer */
|
||||
|
||||
@@ -30,6 +30,9 @@ if(CPU_TYPE STREQUAL "x86_64")
|
||||
if(CYGWIN)
|
||||
set(CMAKE_ASM_NASM_OBJECT_FORMAT win64)
|
||||
endif()
|
||||
if(CMAKE_C_COMPILER_ABI MATCHES "ELF X32")
|
||||
set(CMAKE_ASM_NASM_OBJECT_FORMAT elfx32)
|
||||
endif()
|
||||
elseif(CPU_TYPE STREQUAL "i386")
|
||||
if(BORLAND)
|
||||
set(CMAKE_ASM_NASM_OBJECT_FORMAT obj)
|
||||
|
||||
@@ -2,8 +2,9 @@
|
||||
; jsimdext.inc - common declarations
|
||||
;
|
||||
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||
; Copyright (C) 2010, 2016, D. R. Commander.
|
||||
; Copyright (C) 2010, 2016, 2018, D. R. Commander.
|
||||
; Copyright (C) 2018, Matthieu Darbois.
|
||||
; Copyright (C) 2018, Matthias Räncker.
|
||||
;
|
||||
; Based on the x86 SIMD extension for IJG JPEG library - version 1.02
|
||||
;
|
||||
@@ -132,13 +133,49 @@ section .note.GNU-stack noalloc noexec nowrite progbits
|
||||
; Common types
|
||||
;
|
||||
%ifdef __x86_64__
|
||||
%ifnidn __OUTPUT_FORMAT__, elfx32
|
||||
%define POINTER qword ; general pointer type
|
||||
%define SIZEOF_POINTER SIZEOF_QWORD ; sizeof(POINTER)
|
||||
%define POINTER_BIT QWORD_BIT ; sizeof(POINTER)*BYTE_BIT
|
||||
%else
|
||||
%define raxp rax
|
||||
%define rbxp rbx
|
||||
%define rcxp rcx
|
||||
%define rdxp rdx
|
||||
%define rsip rsi
|
||||
%define rdip rdi
|
||||
%define rbpp rbp
|
||||
%define rspp rsp
|
||||
%define r8p r8
|
||||
%define r9p r9
|
||||
%define r10p r10
|
||||
%define r11p r11
|
||||
%define r12p r12
|
||||
%define r13p r13
|
||||
%define r14p r14
|
||||
%define r15p r15
|
||||
%endif
|
||||
%endif
|
||||
%ifndef raxp
|
||||
%define POINTER dword ; general pointer type
|
||||
%define SIZEOF_POINTER SIZEOF_DWORD ; sizeof(POINTER)
|
||||
%define POINTER_BIT DWORD_BIT ; sizeof(POINTER)*BYTE_BIT
|
||||
; x86_64 ILP32 ABI (x32)
|
||||
%define raxp eax
|
||||
%define rbxp ebx
|
||||
%define rcxp ecx
|
||||
%define rdxp edx
|
||||
%define rsip esi
|
||||
%define rdip edi
|
||||
%define rbpp ebp
|
||||
%define rspp esp
|
||||
%define r8p r8d
|
||||
%define r9p r9d
|
||||
%define r10p r10d
|
||||
%define r11p r11d
|
||||
%define r12p r12d
|
||||
%define r13p r13d
|
||||
%define r14p r14d
|
||||
%define r15p r15d
|
||||
%endif
|
||||
|
||||
%define INT dword ; signed integer type
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
;
|
||||
; Copyright (C) 2009, 2016, D. R. Commander.
|
||||
; Copyright (C) 2015, Intel Corporation.
|
||||
; Copyright (C) 2018, Matthias Räncker.
|
||||
;
|
||||
; Based on the x86 SIMD extension for IJG JPEG library
|
||||
; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||
@@ -59,9 +60,9 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
|
||||
|
||||
mov rsi, r12
|
||||
mov ecx, r13d
|
||||
mov rdi, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY]
|
||||
mov rbx, JSAMPARRAY [rsi+1*SIZEOF_JSAMPARRAY]
|
||||
mov rdx, JSAMPARRAY [rsi+2*SIZEOF_JSAMPARRAY]
|
||||
mov rdip, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY]
|
||||
mov rbxp, JSAMPARRAY [rsi+1*SIZEOF_JSAMPARRAY]
|
||||
mov rdxp, JSAMPARRAY [rsi+2*SIZEOF_JSAMPARRAY]
|
||||
lea rdi, [rdi+rcx*SIZEOF_JSAMPROW]
|
||||
lea rbx, [rbx+rcx*SIZEOF_JSAMPROW]
|
||||
lea rdx, [rdx+rcx*SIZEOF_JSAMPROW]
|
||||
@@ -79,10 +80,10 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
|
||||
push rsi
|
||||
push rcx ; col
|
||||
|
||||
mov rsi, JSAMPROW [rsi] ; inptr
|
||||
mov rdi, JSAMPROW [rdi] ; outptr0
|
||||
mov rbx, JSAMPROW [rbx] ; outptr1
|
||||
mov rdx, JSAMPROW [rdx] ; outptr2
|
||||
mov rsip, JSAMPROW [rsi] ; inptr
|
||||
mov rdip, JSAMPROW [rdi] ; outptr0
|
||||
mov rbxp, JSAMPROW [rbx] ; outptr1
|
||||
mov rdxp, JSAMPROW [rdx] ; outptr2
|
||||
|
||||
cmp rcx, byte SIZEOF_YMMWORD
|
||||
jae near .columnloop
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
; jccolext.asm - colorspace conversion (64-bit SSE2)
|
||||
;
|
||||
; Copyright (C) 2009, 2016, D. R. Commander.
|
||||
; Copyright (C) 2018, Matthias Räncker.
|
||||
;
|
||||
; Based on the x86 SIMD extension for IJG JPEG library
|
||||
; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||
@@ -58,9 +59,9 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
|
||||
|
||||
mov rsi, r12
|
||||
mov ecx, r13d
|
||||
mov rdi, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY]
|
||||
mov rbx, JSAMPARRAY [rsi+1*SIZEOF_JSAMPARRAY]
|
||||
mov rdx, JSAMPARRAY [rsi+2*SIZEOF_JSAMPARRAY]
|
||||
mov rdip, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY]
|
||||
mov rbxp, JSAMPARRAY [rsi+1*SIZEOF_JSAMPARRAY]
|
||||
mov rdxp, JSAMPARRAY [rsi+2*SIZEOF_JSAMPARRAY]
|
||||
lea rdi, [rdi+rcx*SIZEOF_JSAMPROW]
|
||||
lea rbx, [rbx+rcx*SIZEOF_JSAMPROW]
|
||||
lea rdx, [rdx+rcx*SIZEOF_JSAMPROW]
|
||||
@@ -78,10 +79,10 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
|
||||
push rsi
|
||||
push rcx ; col
|
||||
|
||||
mov rsi, JSAMPROW [rsi] ; inptr
|
||||
mov rdi, JSAMPROW [rdi] ; outptr0
|
||||
mov rbx, JSAMPROW [rbx] ; outptr1
|
||||
mov rdx, JSAMPROW [rdx] ; outptr2
|
||||
mov rsip, JSAMPROW [rsi] ; inptr
|
||||
mov rdip, JSAMPROW [rdi] ; outptr0
|
||||
mov rbxp, JSAMPROW [rbx] ; outptr1
|
||||
mov rdxp, JSAMPROW [rdx] ; outptr2
|
||||
|
||||
cmp rcx, byte SIZEOF_XMMWORD
|
||||
jae near .columnloop
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
;
|
||||
; Copyright (C) 2011, 2016, D. R. Commander.
|
||||
; Copyright (C) 2015, Intel Corporation.
|
||||
; Copyright (C) 2018, Matthias Räncker.
|
||||
;
|
||||
; Based on the x86 SIMD extension for IJG JPEG library
|
||||
; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||
@@ -59,7 +60,7 @@ EXTN(jsimd_rgb_gray_convert_avx2):
|
||||
|
||||
mov rsi, r12
|
||||
mov ecx, r13d
|
||||
mov rdi, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY]
|
||||
mov rdip, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY]
|
||||
lea rdi, [rdi+rcx*SIZEOF_JSAMPROW]
|
||||
|
||||
pop rcx
|
||||
@@ -73,8 +74,8 @@ EXTN(jsimd_rgb_gray_convert_avx2):
|
||||
push rsi
|
||||
push rcx ; col
|
||||
|
||||
mov rsi, JSAMPROW [rsi] ; inptr
|
||||
mov rdi, JSAMPROW [rdi] ; outptr0
|
||||
mov rsip, JSAMPROW [rsi] ; inptr
|
||||
mov rdip, JSAMPROW [rdi] ; outptr0
|
||||
|
||||
cmp rcx, byte SIZEOF_YMMWORD
|
||||
jae near .columnloop
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
; jcgryext.asm - grayscale colorspace conversion (64-bit SSE2)
|
||||
;
|
||||
; Copyright (C) 2011, 2016, D. R. Commander.
|
||||
; Copyright (C) 2018, Matthias Räncker.
|
||||
;
|
||||
; Based on the x86 SIMD extension for IJG JPEG library
|
||||
; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||
@@ -58,7 +59,7 @@ EXTN(jsimd_rgb_gray_convert_sse2):
|
||||
|
||||
mov rsi, r12
|
||||
mov ecx, r13d
|
||||
mov rdi, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY]
|
||||
mov rdip, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY]
|
||||
lea rdi, [rdi+rcx*SIZEOF_JSAMPROW]
|
||||
|
||||
pop rcx
|
||||
@@ -72,8 +73,8 @@ EXTN(jsimd_rgb_gray_convert_sse2):
|
||||
push rsi
|
||||
push rcx ; col
|
||||
|
||||
mov rsi, JSAMPROW [rsi] ; inptr
|
||||
mov rdi, JSAMPROW [rdi] ; outptr0
|
||||
mov rsip, JSAMPROW [rsi] ; inptr
|
||||
mov rdip, JSAMPROW [rdi] ; outptr0
|
||||
|
||||
cmp rcx, byte SIZEOF_XMMWORD
|
||||
jae near .columnloop
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
;
|
||||
; Copyright (C) 2009-2011, 2014-2016, D. R. Commander.
|
||||
; Copyright (C) 2015, Matthieu Darbois.
|
||||
; Copyright (C) 2018, Matthias Räncker.
|
||||
;
|
||||
; Based on the x86 SIMD extension for IJG JPEG library
|
||||
; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||
@@ -199,8 +200,8 @@ EXTN(jsimd_huff_encode_one_block_sse2):
|
||||
|
||||
mov buffer, r11 ; r11 is now sratch
|
||||
|
||||
mov put_buffer, MMWORD [r10+16] ; put_buffer = state->cur.put_buffer;
|
||||
mov put_bits, DWORD [r10+24] ; put_bits = state->cur.put_bits;
|
||||
mov put_buffer, MMWORD [r10+SIZEOF_POINTER*2] ; put_buffer = state->cur.put_buffer;
|
||||
mov put_bits, DWORD [r10+SIZEOF_POINTER*2+8] ; put_bits = state->cur.put_bits;
|
||||
push r10 ; r10 is now scratch
|
||||
|
||||
; Encode the DC coefficient difference per section F.1.2.1
|
||||
@@ -332,8 +333,8 @@ EXTN(jsimd_huff_encode_one_block_sse2):
|
||||
.EFN:
|
||||
pop r10
|
||||
; Save put_buffer & put_bits
|
||||
mov MMWORD [r10+16], put_buffer ; state->cur.put_buffer = put_buffer;
|
||||
mov DWORD [r10+24], put_bits ; state->cur.put_bits = put_bits;
|
||||
mov MMWORD [r10+SIZEOF_POINTER*2], put_buffer ; state->cur.put_buffer = put_buffer;
|
||||
mov DWORD [r10+SIZEOF_POINTER*2+8], put_bits ; state->cur.put_bits = put_bits;
|
||||
|
||||
pop rbx
|
||||
uncollect_args 6
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||
; Copyright (C) 2009, 2016, D. R. Commander.
|
||||
; Copyright (C) 2015, Intel Corporation.
|
||||
; Copyright (C) 2018, Matthias Räncker.
|
||||
;
|
||||
; Based on the x86 SIMD extension for IJG JPEG library
|
||||
; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||
@@ -73,7 +74,7 @@ EXTN(jsimd_h2v1_downsample_avx2):
|
||||
push rax
|
||||
push rcx
|
||||
|
||||
mov rdi, JSAMPROW [rsi]
|
||||
mov rdip, JSAMPROW [rsi]
|
||||
add rdi, rdx
|
||||
mov al, JSAMPLE [rdi-1]
|
||||
|
||||
@@ -109,8 +110,8 @@ EXTN(jsimd_h2v1_downsample_avx2):
|
||||
push rdi
|
||||
push rsi
|
||||
|
||||
mov rsi, JSAMPROW [rsi] ; inptr
|
||||
mov rdi, JSAMPROW [rdi] ; outptr
|
||||
mov rsip, JSAMPROW [rsi] ; inptr
|
||||
mov rdip, JSAMPROW [rdi] ; outptr
|
||||
|
||||
cmp rcx, byte SIZEOF_YMMWORD
|
||||
jae short .columnloop
|
||||
@@ -235,7 +236,7 @@ EXTN(jsimd_h2v2_downsample_avx2):
|
||||
push rax
|
||||
push rcx
|
||||
|
||||
mov rdi, JSAMPROW [rsi]
|
||||
mov rdip, JSAMPROW [rsi]
|
||||
add rdi, rdx
|
||||
mov al, JSAMPLE [rdi-1]
|
||||
|
||||
@@ -271,9 +272,9 @@ EXTN(jsimd_h2v2_downsample_avx2):
|
||||
push rdi
|
||||
push rsi
|
||||
|
||||
mov rdx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; inptr0
|
||||
mov rsi, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; inptr1
|
||||
mov rdi, JSAMPROW [rdi] ; outptr
|
||||
mov rdxp, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; inptr0
|
||||
mov rsip, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; inptr1
|
||||
mov rdip, JSAMPROW [rdi] ; outptr
|
||||
|
||||
cmp rcx, byte SIZEOF_YMMWORD
|
||||
jae short .columnloop
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
;
|
||||
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||
; Copyright (C) 2009, 2016, D. R. Commander.
|
||||
; Copyright (C) 2018, Matthias Räncker.
|
||||
;
|
||||
; Based on the x86 SIMD extension for IJG JPEG library
|
||||
; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||
@@ -72,7 +73,7 @@ EXTN(jsimd_h2v1_downsample_sse2):
|
||||
push rax
|
||||
push rcx
|
||||
|
||||
mov rdi, JSAMPROW [rsi]
|
||||
mov rdip, JSAMPROW [rsi]
|
||||
add rdi, rdx
|
||||
mov al, JSAMPLE [rdi-1]
|
||||
|
||||
@@ -107,8 +108,8 @@ EXTN(jsimd_h2v1_downsample_sse2):
|
||||
push rdi
|
||||
push rsi
|
||||
|
||||
mov rsi, JSAMPROW [rsi] ; inptr
|
||||
mov rdi, JSAMPROW [rdi] ; outptr
|
||||
mov rsip, JSAMPROW [rsi] ; inptr
|
||||
mov rdip, JSAMPROW [rdi] ; outptr
|
||||
|
||||
cmp rcx, byte SIZEOF_XMMWORD
|
||||
jae short .columnloop
|
||||
@@ -217,7 +218,7 @@ EXTN(jsimd_h2v2_downsample_sse2):
|
||||
push rax
|
||||
push rcx
|
||||
|
||||
mov rdi, JSAMPROW [rsi]
|
||||
mov rdip, JSAMPROW [rsi]
|
||||
add rdi, rdx
|
||||
mov al, JSAMPLE [rdi-1]
|
||||
|
||||
@@ -252,9 +253,9 @@ EXTN(jsimd_h2v2_downsample_sse2):
|
||||
push rdi
|
||||
push rsi
|
||||
|
||||
mov rdx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; inptr0
|
||||
mov rsi, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; inptr1
|
||||
mov rdi, JSAMPROW [rdi] ; outptr
|
||||
mov rdxp, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; inptr0
|
||||
mov rsip, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; inptr1
|
||||
mov rdip, JSAMPROW [rdi] ; outptr
|
||||
|
||||
cmp rcx, byte SIZEOF_XMMWORD
|
||||
jae short .columnloop
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||
; Copyright (C) 2009, 2012, 2016, D. R. Commander.
|
||||
; Copyright (C) 2015, Intel Corporation.
|
||||
; Copyright (C) 2018, Matthias Räncker.
|
||||
;
|
||||
; Based on the x86 SIMD extension for IJG JPEG library
|
||||
; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||
@@ -60,9 +61,9 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
|
||||
|
||||
mov rdi, r11
|
||||
mov ecx, r12d
|
||||
mov rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
|
||||
mov rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
|
||||
mov rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
|
||||
mov rsip, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
|
||||
mov rbxp, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
|
||||
mov rdxp, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
|
||||
lea rsi, [rsi+rcx*SIZEOF_JSAMPROW]
|
||||
lea rbx, [rbx+rcx*SIZEOF_JSAMPROW]
|
||||
lea rdx, [rdx+rcx*SIZEOF_JSAMPROW]
|
||||
@@ -81,10 +82,10 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
|
||||
push rsi
|
||||
push rcx ; col
|
||||
|
||||
mov rsi, JSAMPROW [rsi] ; inptr0
|
||||
mov rbx, JSAMPROW [rbx] ; inptr1
|
||||
mov rdx, JSAMPROW [rdx] ; inptr2
|
||||
mov rdi, JSAMPROW [rdi] ; outptr
|
||||
mov rsip, JSAMPROW [rsi] ; inptr0
|
||||
mov rbxp, JSAMPROW [rbx] ; inptr1
|
||||
mov rdxp, JSAMPROW [rdx] ; inptr2
|
||||
mov rdip, JSAMPROW [rdi] ; outptr
|
||||
.columnloop:
|
||||
|
||||
vmovdqu ymm5, YMMWORD [rbx] ; ymm5=Cb(0123456789ABCDEFGHIJKLMNOPQRSTUV)
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
;
|
||||
; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||
; Copyright (C) 2009, 2012, 2016, D. R. Commander.
|
||||
; Copyright (C) 2018, Matthias Räncker.
|
||||
;
|
||||
; Based on the x86 SIMD extension for IJG JPEG library
|
||||
; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||
@@ -59,9 +60,9 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
|
||||
|
||||
mov rdi, r11
|
||||
mov ecx, r12d
|
||||
mov rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
|
||||
mov rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
|
||||
mov rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
|
||||
mov rsip, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
|
||||
mov rbxp, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
|
||||
mov rdxp, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
|
||||
lea rsi, [rsi+rcx*SIZEOF_JSAMPROW]
|
||||
lea rbx, [rbx+rcx*SIZEOF_JSAMPROW]
|
||||
lea rdx, [rdx+rcx*SIZEOF_JSAMPROW]
|
||||
@@ -80,10 +81,10 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
|
||||
push rsi
|
||||
push rcx ; col
|
||||
|
||||
mov rsi, JSAMPROW [rsi] ; inptr0
|
||||
mov rbx, JSAMPROW [rbx] ; inptr1
|
||||
mov rdx, JSAMPROW [rdx] ; inptr2
|
||||
mov rdi, JSAMPROW [rdi] ; outptr
|
||||
mov rsip, JSAMPROW [rsi] ; inptr0
|
||||
mov rbxp, JSAMPROW [rbx] ; inptr1
|
||||
mov rdxp, JSAMPROW [rdx] ; inptr2
|
||||
mov rdip, JSAMPROW [rdi] ; outptr
|
||||
.columnloop:
|
||||
|
||||
movdqa xmm5, XMMWORD [rbx] ; xmm5=Cb(0123456789ABCDEF)
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||
; Copyright (C) 2009, 2012, 2016, D. R. Commander.
|
||||
; Copyright (C) 2015, Intel Corporation.
|
||||
; Copyright (C) 2018, Matthias Räncker.
|
||||
;
|
||||
; Based on the x86 SIMD extension for IJG JPEG library
|
||||
; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||
@@ -60,14 +61,14 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
|
||||
|
||||
mov rdi, r11
|
||||
mov ecx, r12d
|
||||
mov rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
|
||||
mov rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
|
||||
mov rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
|
||||
mov rsip, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
|
||||
mov rbxp, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
|
||||
mov rdxp, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
|
||||
mov rdi, r13
|
||||
mov rsi, JSAMPROW [rsi+rcx*SIZEOF_JSAMPROW] ; inptr0
|
||||
mov rbx, JSAMPROW [rbx+rcx*SIZEOF_JSAMPROW] ; inptr1
|
||||
mov rdx, JSAMPROW [rdx+rcx*SIZEOF_JSAMPROW] ; inptr2
|
||||
mov rdi, JSAMPROW [rdi] ; outptr
|
||||
mov rsip, JSAMPROW [rsi+rcx*SIZEOF_JSAMPROW] ; inptr0
|
||||
mov rbxp, JSAMPROW [rbx+rcx*SIZEOF_JSAMPROW] ; inptr1
|
||||
mov rdxp, JSAMPROW [rdx+rcx*SIZEOF_JSAMPROW] ; inptr2
|
||||
mov rdip, JSAMPROW [rdi] ; outptr
|
||||
|
||||
pop rcx ; col
|
||||
|
||||
@@ -516,15 +517,16 @@ EXTN(jsimd_h2v2_merged_upsample_avx2):
|
||||
|
||||
mov rdi, r11
|
||||
mov ecx, r12d
|
||||
mov rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
|
||||
mov rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
|
||||
mov rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
|
||||
mov rsip, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
|
||||
mov rbxp, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
|
||||
mov rdxp, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
|
||||
mov rdi, r13
|
||||
lea rsi, [rsi+rcx*SIZEOF_JSAMPROW]
|
||||
|
||||
push rdx ; inptr2
|
||||
push rbx ; inptr1
|
||||
push rsi ; inptr00
|
||||
sub rsp, SIZEOF_JSAMPARRAY*4
|
||||
mov JSAMPARRAY [rsp+0*SIZEOF_JSAMPARRAY], rsip ; intpr00
|
||||
mov JSAMPARRAY [rsp+1*SIZEOF_JSAMPARRAY], rbxp ; intpr1
|
||||
mov JSAMPARRAY [rsp+2*SIZEOF_JSAMPARRAY], rdxp ; intpr2
|
||||
mov rbx, rsp
|
||||
|
||||
push rdi
|
||||
@@ -548,16 +550,16 @@ EXTN(jsimd_h2v2_merged_upsample_avx2):
|
||||
pop rax
|
||||
pop rcx
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbx
|
||||
pop rdx
|
||||
mov rsip, JSAMPARRAY [rsp+0*SIZEOF_JSAMPARRAY]
|
||||
mov rbxp, JSAMPARRAY [rsp+1*SIZEOF_JSAMPARRAY]
|
||||
mov rdxp, JSAMPARRAY [rsp+2*SIZEOF_JSAMPARRAY]
|
||||
|
||||
add rdi, byte SIZEOF_JSAMPROW ; outptr1
|
||||
add rsi, byte SIZEOF_JSAMPROW ; inptr01
|
||||
|
||||
push rdx ; inptr2
|
||||
push rbx ; inptr1
|
||||
push rsi ; inptr00
|
||||
mov JSAMPARRAY [rsp+0*SIZEOF_JSAMPARRAY], rsip ; intpr00
|
||||
mov JSAMPARRAY [rsp+1*SIZEOF_JSAMPARRAY], rbxp ; intpr1
|
||||
mov JSAMPARRAY [rsp+2*SIZEOF_JSAMPARRAY], rdxp ; intpr2
|
||||
mov rbx, rsp
|
||||
|
||||
push rdi
|
||||
@@ -581,9 +583,10 @@ EXTN(jsimd_h2v2_merged_upsample_avx2):
|
||||
pop rax
|
||||
pop rcx
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbx
|
||||
pop rdx
|
||||
mov rsip, JSAMPARRAY [rsp+0*SIZEOF_JSAMPARRAY]
|
||||
mov rbxp, JSAMPARRAY [rsp+1*SIZEOF_JSAMPARRAY]
|
||||
mov rdxp, JSAMPARRAY [rsp+2*SIZEOF_JSAMPARRAY]
|
||||
add rsp, SIZEOF_JSAMPARRAY*4
|
||||
|
||||
pop rbx
|
||||
uncollect_args 4
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
;
|
||||
; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||
; Copyright (C) 2009, 2012, 2016, D. R. Commander.
|
||||
; Copyright (C) 2018, Matthias Räncker.
|
||||
;
|
||||
; Based on the x86 SIMD extension for IJG JPEG library
|
||||
; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||
@@ -59,14 +60,14 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
|
||||
|
||||
mov rdi, r11
|
||||
mov ecx, r12d
|
||||
mov rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
|
||||
mov rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
|
||||
mov rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
|
||||
mov rsip, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
|
||||
mov rbxp, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
|
||||
mov rdxp, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
|
||||
mov rdi, r13
|
||||
mov rsi, JSAMPROW [rsi+rcx*SIZEOF_JSAMPROW] ; inptr0
|
||||
mov rbx, JSAMPROW [rbx+rcx*SIZEOF_JSAMPROW] ; inptr1
|
||||
mov rdx, JSAMPROW [rdx+rcx*SIZEOF_JSAMPROW] ; inptr2
|
||||
mov rdi, JSAMPROW [rdi] ; outptr
|
||||
mov rsip, JSAMPROW [rsi+rcx*SIZEOF_JSAMPROW] ; inptr0
|
||||
mov rbxp, JSAMPROW [rbx+rcx*SIZEOF_JSAMPROW] ; inptr1
|
||||
mov rdxp, JSAMPROW [rdx+rcx*SIZEOF_JSAMPROW] ; inptr2
|
||||
mov rdip, JSAMPROW [rdi] ; outptr
|
||||
|
||||
pop rcx ; col
|
||||
|
||||
@@ -458,15 +459,16 @@ EXTN(jsimd_h2v2_merged_upsample_sse2):
|
||||
|
||||
mov rdi, r11
|
||||
mov ecx, r12d
|
||||
mov rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
|
||||
mov rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
|
||||
mov rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
|
||||
mov rsip, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
|
||||
mov rbxp, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
|
||||
mov rdxp, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
|
||||
mov rdi, r13
|
||||
lea rsi, [rsi+rcx*SIZEOF_JSAMPROW]
|
||||
|
||||
push rdx ; inptr2
|
||||
push rbx ; inptr1
|
||||
push rsi ; inptr00
|
||||
sub rsp, SIZEOF_JSAMPARRAY*4
|
||||
mov JSAMPARRAY [rsp+0*SIZEOF_JSAMPARRAY], rsip ; intpr00
|
||||
mov JSAMPARRAY [rsp+1*SIZEOF_JSAMPARRAY], rbxp ; intpr1
|
||||
mov JSAMPARRAY [rsp+2*SIZEOF_JSAMPARRAY], rdxp ; intpr2
|
||||
mov rbx, rsp
|
||||
|
||||
push rdi
|
||||
@@ -490,16 +492,16 @@ EXTN(jsimd_h2v2_merged_upsample_sse2):
|
||||
pop rax
|
||||
pop rcx
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbx
|
||||
pop rdx
|
||||
mov rsip, JSAMPARRAY [rsp+0*SIZEOF_JSAMPARRAY]
|
||||
mov rbxp, JSAMPARRAY [rsp+1*SIZEOF_JSAMPARRAY]
|
||||
mov rdxp, JSAMPARRAY [rsp+2*SIZEOF_JSAMPARRAY]
|
||||
|
||||
add rdi, byte SIZEOF_JSAMPROW ; outptr1
|
||||
add rsi, byte SIZEOF_JSAMPROW ; inptr01
|
||||
|
||||
push rdx ; inptr2
|
||||
push rbx ; inptr1
|
||||
push rsi ; inptr00
|
||||
mov JSAMPARRAY [rsp+0*SIZEOF_JSAMPARRAY], rsip ; intpr00
|
||||
mov JSAMPARRAY [rsp+1*SIZEOF_JSAMPARRAY], rbxp ; intpr1
|
||||
mov JSAMPARRAY [rsp+2*SIZEOF_JSAMPARRAY], rdxp ; intpr2
|
||||
mov rbx, rsp
|
||||
|
||||
push rdi
|
||||
@@ -523,9 +525,10 @@ EXTN(jsimd_h2v2_merged_upsample_sse2):
|
||||
pop rax
|
||||
pop rcx
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbx
|
||||
pop rdx
|
||||
mov rsip, JSAMPARRAY [rsp+0*SIZEOF_JSAMPARRAY]
|
||||
mov rbxp, JSAMPARRAY [rsp+1*SIZEOF_JSAMPARRAY]
|
||||
mov rdxp, JSAMPARRAY [rsp+2*SIZEOF_JSAMPARRAY]
|
||||
add rsp, SIZEOF_JSAMPARRAY*4
|
||||
|
||||
pop rbx
|
||||
uncollect_args 4
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||
; Copyright (C) 2009, 2016, D. R. Commander.
|
||||
; Copyright (C) 2015, Intel Corporation.
|
||||
; Copyright (C) 2018, Matthias Räncker.
|
||||
;
|
||||
; Based on the x86 SIMD extension for IJG JPEG library
|
||||
; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||
@@ -78,7 +79,7 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
|
||||
|
||||
mov rsi, r12 ; input_data
|
||||
mov rdi, r13
|
||||
mov rdi, JSAMPARRAY [rdi] ; output_data
|
||||
mov rdip, JSAMPARRAY [rdi] ; output_data
|
||||
|
||||
vpxor ymm0, ymm0, ymm0 ; ymm0=(all 0's)
|
||||
vpcmpeqb xmm9, xmm9, xmm9
|
||||
@@ -92,8 +93,8 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
|
||||
push rdi
|
||||
push rsi
|
||||
|
||||
mov rsi, JSAMPROW [rsi] ; inptr
|
||||
mov rdi, JSAMPROW [rdi] ; outptr
|
||||
mov rsip, JSAMPROW [rsi] ; inptr
|
||||
mov rdip, JSAMPROW [rdi] ; outptr
|
||||
|
||||
test rax, SIZEOF_YMMWORD-1
|
||||
jz short .skip
|
||||
@@ -237,18 +238,18 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
|
||||
|
||||
mov rsi, r12 ; input_data
|
||||
mov rdi, r13
|
||||
mov rdi, JSAMPARRAY [rdi] ; output_data
|
||||
mov rdip, JSAMPARRAY [rdi] ; output_data
|
||||
.rowloop:
|
||||
push rax ; colctr
|
||||
push rcx
|
||||
push rdi
|
||||
push rsi
|
||||
|
||||
mov rcx, JSAMPROW [rsi-1*SIZEOF_JSAMPROW] ; inptr1(above)
|
||||
mov rbx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; inptr0
|
||||
mov rsi, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; inptr1(below)
|
||||
mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] ; outptr0
|
||||
mov rdi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] ; outptr1
|
||||
mov rcxp, JSAMPROW [rsi-1*SIZEOF_JSAMPROW] ; inptr1(above)
|
||||
mov rbxp, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; inptr0
|
||||
mov rsip, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; inptr1(below)
|
||||
mov rdxp, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] ; outptr0
|
||||
mov rdip, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] ; outptr1
|
||||
|
||||
vpxor ymm8, ymm8, ymm8 ; ymm8=(all 0's)
|
||||
vpcmpeqb xmm9, xmm9, xmm9
|
||||
@@ -541,13 +542,13 @@ EXTN(jsimd_h2v1_upsample_avx2):
|
||||
|
||||
mov rsi, r12 ; input_data
|
||||
mov rdi, r13
|
||||
mov rdi, JSAMPARRAY [rdi] ; output_data
|
||||
mov rdip, JSAMPARRAY [rdi] ; output_data
|
||||
.rowloop:
|
||||
push rdi
|
||||
push rsi
|
||||
|
||||
mov rsi, JSAMPROW [rsi] ; inptr
|
||||
mov rdi, JSAMPROW [rdi] ; outptr
|
||||
mov rsip, JSAMPROW [rsi] ; inptr
|
||||
mov rdip, JSAMPROW [rdi] ; outptr
|
||||
mov rax, rdx ; colctr
|
||||
.columnloop:
|
||||
|
||||
@@ -631,14 +632,14 @@ EXTN(jsimd_h2v2_upsample_avx2):
|
||||
|
||||
mov rsi, r12 ; input_data
|
||||
mov rdi, r13
|
||||
mov rdi, JSAMPARRAY [rdi] ; output_data
|
||||
mov rdip, JSAMPARRAY [rdi] ; output_data
|
||||
.rowloop:
|
||||
push rdi
|
||||
push rsi
|
||||
|
||||
mov rsi, JSAMPROW [rsi] ; inptr
|
||||
mov rbx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] ; outptr0
|
||||
mov rdi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] ; outptr1
|
||||
mov rsip, JSAMPROW [rsi] ; inptr
|
||||
mov rbxp, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] ; outptr0
|
||||
mov rdip, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] ; outptr1
|
||||
mov rax, rdx ; colctr
|
||||
.columnloop:
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
;
|
||||
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||
; Copyright (C) 2009, 2016, D. R. Commander.
|
||||
; Copyright (C) 2018, Matthias Räncker.
|
||||
;
|
||||
; Based on the x86 SIMD extension for IJG JPEG library
|
||||
; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||
@@ -76,14 +77,14 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2):
|
||||
|
||||
mov rsi, r12 ; input_data
|
||||
mov rdi, r13
|
||||
mov rdi, JSAMPARRAY [rdi] ; output_data
|
||||
mov rdip, JSAMPARRAY [rdi] ; output_data
|
||||
.rowloop:
|
||||
push rax ; colctr
|
||||
push rdi
|
||||
push rsi
|
||||
|
||||
mov rsi, JSAMPROW [rsi] ; inptr
|
||||
mov rdi, JSAMPROW [rdi] ; outptr
|
||||
mov rsip, JSAMPROW [rsi] ; inptr
|
||||
mov rdip, JSAMPROW [rdi] ; outptr
|
||||
|
||||
test rax, SIZEOF_XMMWORD-1
|
||||
jz short .skip
|
||||
@@ -223,18 +224,18 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
|
||||
|
||||
mov rsi, r12 ; input_data
|
||||
mov rdi, r13
|
||||
mov rdi, JSAMPARRAY [rdi] ; output_data
|
||||
mov rdip, JSAMPARRAY [rdi] ; output_data
|
||||
.rowloop:
|
||||
push rax ; colctr
|
||||
push rcx
|
||||
push rdi
|
||||
push rsi
|
||||
|
||||
mov rcx, JSAMPROW [rsi-1*SIZEOF_JSAMPROW] ; inptr1(above)
|
||||
mov rbx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; inptr0
|
||||
mov rsi, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; inptr1(below)
|
||||
mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] ; outptr0
|
||||
mov rdi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] ; outptr1
|
||||
mov rcxp, JSAMPROW [rsi-1*SIZEOF_JSAMPROW] ; inptr1(above)
|
||||
mov rbxp, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; inptr0
|
||||
mov rsip, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; inptr1(below)
|
||||
mov rdxp, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] ; outptr0
|
||||
mov rdip, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] ; outptr1
|
||||
|
||||
test rax, SIZEOF_XMMWORD-1
|
||||
jz short .skip
|
||||
@@ -514,13 +515,13 @@ EXTN(jsimd_h2v1_upsample_sse2):
|
||||
|
||||
mov rsi, r12 ; input_data
|
||||
mov rdi, r13
|
||||
mov rdi, JSAMPARRAY [rdi] ; output_data
|
||||
mov rdip, JSAMPARRAY [rdi] ; output_data
|
||||
.rowloop:
|
||||
push rdi
|
||||
push rsi
|
||||
|
||||
mov rsi, JSAMPROW [rsi] ; inptr
|
||||
mov rdi, JSAMPROW [rdi] ; outptr
|
||||
mov rsip, JSAMPROW [rsi] ; inptr
|
||||
mov rdip, JSAMPROW [rdi] ; outptr
|
||||
mov rax, rdx ; colctr
|
||||
.columnloop:
|
||||
|
||||
@@ -602,14 +603,14 @@ EXTN(jsimd_h2v2_upsample_sse2):
|
||||
|
||||
mov rsi, r12 ; input_data
|
||||
mov rdi, r13
|
||||
mov rdi, JSAMPARRAY [rdi] ; output_data
|
||||
mov rdip, JSAMPARRAY [rdi] ; output_data
|
||||
.rowloop:
|
||||
push rdi
|
||||
push rsi
|
||||
|
||||
mov rsi, JSAMPROW [rsi] ; inptr
|
||||
mov rbx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] ; outptr0
|
||||
mov rdi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] ; outptr1
|
||||
mov rsip, JSAMPROW [rsi] ; inptr
|
||||
mov rbxp, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] ; outptr0
|
||||
mov rdip, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] ; outptr1
|
||||
mov rax, rdx ; colctr
|
||||
.columnloop:
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
;
|
||||
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||
; Copyright (C) 2009, 2016, D. R. Commander.
|
||||
; Copyright (C) 2018, Matthias Räncker.
|
||||
;
|
||||
; Based on the x86 SIMD extension for IJG JPEG library
|
||||
; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||
@@ -457,12 +458,12 @@ EXTN(jsimd_idct_float_sse2):
|
||||
pshufd xmm5, xmm6, 0x4E ; xmm5=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07)
|
||||
pshufd xmm3, xmm7, 0x4E ; xmm3=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27)
|
||||
|
||||
mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
|
||||
mov rbx, JSAMPROW [rdi+2*SIZEOF_JSAMPROW]
|
||||
mov rdxp, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
|
||||
mov rbxp, JSAMPROW [rdi+2*SIZEOF_JSAMPROW]
|
||||
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6
|
||||
movq XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE], xmm7
|
||||
mov rdx, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
|
||||
mov rbx, JSAMPROW [rdi+3*SIZEOF_JSAMPROW]
|
||||
mov rdxp, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
|
||||
mov rbxp, JSAMPROW [rdi+3*SIZEOF_JSAMPROW]
|
||||
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm5
|
||||
movq XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE], xmm3
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
;
|
||||
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||
; Copyright (C) 2009, 2016, D. R. Commander.
|
||||
; Copyright (C) 2018, Matthias Räncker.
|
||||
;
|
||||
; Based on the x86 SIMD extension for IJG JPEG library
|
||||
; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||
@@ -462,21 +463,21 @@ EXTN(jsimd_idct_ifast_sse2):
|
||||
pshufd xmm6, xmm4, 0x4E ; xmm6=(50 51 52 53 54 55 56 57 40 41 42 43 44 45 46 47)
|
||||
pshufd xmm2, xmm7, 0x4E ; xmm2=(70 71 72 73 74 75 76 77 60 61 62 63 64 65 66 67)
|
||||
|
||||
mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
|
||||
mov rsi, JSAMPROW [rdi+2*SIZEOF_JSAMPROW]
|
||||
mov rdxp, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
|
||||
mov rsip, JSAMPROW [rdi+2*SIZEOF_JSAMPROW]
|
||||
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm1
|
||||
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm3
|
||||
mov rdx, JSAMPROW [rdi+4*SIZEOF_JSAMPROW]
|
||||
mov rsi, JSAMPROW [rdi+6*SIZEOF_JSAMPROW]
|
||||
mov rdxp, JSAMPROW [rdi+4*SIZEOF_JSAMPROW]
|
||||
mov rsip, JSAMPROW [rdi+6*SIZEOF_JSAMPROW]
|
||||
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm4
|
||||
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm7
|
||||
|
||||
mov rdx, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
|
||||
mov rsi, JSAMPROW [rdi+3*SIZEOF_JSAMPROW]
|
||||
mov rdxp, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
|
||||
mov rsip, JSAMPROW [rdi+3*SIZEOF_JSAMPROW]
|
||||
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm5
|
||||
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm0
|
||||
mov rdx, JSAMPROW [rdi+5*SIZEOF_JSAMPROW]
|
||||
mov rsi, JSAMPROW [rdi+7*SIZEOF_JSAMPROW]
|
||||
mov rdxp, JSAMPROW [rdi+5*SIZEOF_JSAMPROW]
|
||||
mov rsip, JSAMPROW [rdi+7*SIZEOF_JSAMPROW]
|
||||
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6
|
||||
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm2
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
;
|
||||
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||
; Copyright (C) 2009, 2016, 2018, D. R. Commander.
|
||||
; Copyright (C) 2018, Matthias Räncker.
|
||||
;
|
||||
; Based on the x86 SIMD extension for IJG JPEG library
|
||||
; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||
@@ -389,23 +390,23 @@ EXTN(jsimd_idct_islow_avx2):
|
||||
|
||||
mov eax, r13d
|
||||
|
||||
mov rdx, JSAMPROW [r12+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
||||
mov rsi, JSAMPROW [r12+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
||||
mov rdxp, JSAMPROW [r12+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
||||
mov rsip, JSAMPROW [r12+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
||||
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm0
|
||||
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm1
|
||||
|
||||
mov rdx, JSAMPROW [r12+2*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
||||
mov rsi, JSAMPROW [r12+3*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
||||
mov rdxp, JSAMPROW [r12+2*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
||||
mov rsip, JSAMPROW [r12+3*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
||||
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm2
|
||||
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm3
|
||||
|
||||
mov rdx, JSAMPROW [r12+4*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
||||
mov rsi, JSAMPROW [r12+5*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
||||
mov rdxp, JSAMPROW [r12+4*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
||||
mov rsip, JSAMPROW [r12+5*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
||||
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm4
|
||||
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm5
|
||||
|
||||
mov rdx, JSAMPROW [r12+6*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
||||
mov rsi, JSAMPROW [r12+7*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
||||
mov rdxp, JSAMPROW [r12+6*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
||||
mov rsip, JSAMPROW [r12+7*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
||||
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6
|
||||
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm7
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
;
|
||||
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||
; Copyright (C) 2009, 2016, D. R. Commander.
|
||||
; Copyright (C) 2018, Matthias Räncker.
|
||||
;
|
||||
; Based on the x86 SIMD extension for IJG JPEG library
|
||||
; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||
@@ -819,21 +820,21 @@ EXTN(jsimd_idct_islow_sse2):
|
||||
pshufd xmm2, xmm4, 0x4E ; xmm2=(50 51 52 53 54 55 56 57 40 41 42 43 44 45 46 47)
|
||||
pshufd xmm5, xmm3, 0x4E ; xmm5=(70 71 72 73 74 75 76 77 60 61 62 63 64 65 66 67)
|
||||
|
||||
mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
|
||||
mov rsi, JSAMPROW [rdi+2*SIZEOF_JSAMPROW]
|
||||
mov rdxp, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
|
||||
mov rsip, JSAMPROW [rdi+2*SIZEOF_JSAMPROW]
|
||||
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm7
|
||||
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm1
|
||||
mov rdx, JSAMPROW [rdi+4*SIZEOF_JSAMPROW]
|
||||
mov rsi, JSAMPROW [rdi+6*SIZEOF_JSAMPROW]
|
||||
mov rdxp, JSAMPROW [rdi+4*SIZEOF_JSAMPROW]
|
||||
mov rsip, JSAMPROW [rdi+6*SIZEOF_JSAMPROW]
|
||||
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm4
|
||||
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm3
|
||||
|
||||
mov rdx, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
|
||||
mov rsi, JSAMPROW [rdi+3*SIZEOF_JSAMPROW]
|
||||
mov rdxp, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
|
||||
mov rsip, JSAMPROW [rdi+3*SIZEOF_JSAMPROW]
|
||||
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6
|
||||
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm0
|
||||
mov rdx, JSAMPROW [rdi+5*SIZEOF_JSAMPROW]
|
||||
mov rsi, JSAMPROW [rdi+7*SIZEOF_JSAMPROW]
|
||||
mov rdxp, JSAMPROW [rdi+5*SIZEOF_JSAMPROW]
|
||||
mov rsip, JSAMPROW [rdi+7*SIZEOF_JSAMPROW]
|
||||
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm2
|
||||
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm5
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
;
|
||||
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||
; Copyright (C) 2009, 2016, D. R. Commander.
|
||||
; Copyright (C) 2018, Matthias Räncker.
|
||||
;
|
||||
; Based on the x86 SIMD extension for IJG JPEG library
|
||||
; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||
@@ -381,12 +382,12 @@ EXTN(jsimd_idct_4x4_sse2):
|
||||
pshufd xmm1, xmm4, 0x4E ; xmm1=(20 21 22 23 30 31 32 33 00 ..)
|
||||
pshufd xmm3, xmm4, 0x93 ; xmm3=(30 31 32 33 00 01 02 03 10 ..)
|
||||
|
||||
mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
|
||||
mov rsi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
|
||||
mov rdxp, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
|
||||
mov rsip, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
|
||||
movd XMM_DWORD [rdx+rax*SIZEOF_JSAMPLE], xmm4
|
||||
movd XMM_DWORD [rsi+rax*SIZEOF_JSAMPLE], xmm2
|
||||
mov rdx, JSAMPROW [rdi+2*SIZEOF_JSAMPROW]
|
||||
mov rsi, JSAMPROW [rdi+3*SIZEOF_JSAMPROW]
|
||||
mov rdxp, JSAMPROW [rdi+2*SIZEOF_JSAMPROW]
|
||||
mov rsip, JSAMPROW [rdi+3*SIZEOF_JSAMPROW]
|
||||
movd XMM_DWORD [rdx+rax*SIZEOF_JSAMPLE], xmm1
|
||||
movd XMM_DWORD [rsi+rax*SIZEOF_JSAMPLE], xmm3
|
||||
|
||||
@@ -560,8 +561,8 @@ EXTN(jsimd_idct_2x2_sse2):
|
||||
pextrw ebx, xmm6, 0x00 ; ebx=(C0 D0 -- --)
|
||||
pextrw ecx, xmm6, 0x01 ; ecx=(C1 D1 -- --)
|
||||
|
||||
mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
|
||||
mov rsi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
|
||||
mov rdxp, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
|
||||
mov rsip, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
|
||||
mov WORD [rdx+rax*SIZEOF_JSAMPLE], bx
|
||||
mov WORD [rsi+rax*SIZEOF_JSAMPLE], cx
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
;
|
||||
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||
; Copyright (C) 2009, 2016, D. R. Commander.
|
||||
; Copyright (C) 2018, Matthias Räncker.
|
||||
;
|
||||
; Based on the x86 SIMD extension for IJG JPEG library
|
||||
; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||
@@ -53,8 +54,8 @@ EXTN(jsimd_convsamp_float_sse2):
|
||||
mov rdi, r12
|
||||
mov rcx, DCTSIZE/2
|
||||
.convloop:
|
||||
mov rbx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
||||
mov rdx, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
||||
mov rbxp, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
||||
mov rdxp, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
||||
|
||||
movq xmm0, XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE]
|
||||
movq xmm1, XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE]
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||
; Copyright (C) 2009, 2016, 2018, D. R. Commander.
|
||||
; Copyright (C) 2016, Matthieu Darbois.
|
||||
; Copyright (C) 2018, Matthias Räncker.
|
||||
;
|
||||
; Based on the x86 SIMD extension for IJG JPEG library
|
||||
; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||
@@ -46,23 +47,23 @@ EXTN(jsimd_convsamp_avx2):
|
||||
|
||||
mov eax, r11d
|
||||
|
||||
mov rsi, JSAMPROW [r10+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
||||
mov rdi, JSAMPROW [r10+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
||||
mov rsip, JSAMPROW [r10+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
||||
mov rdip, JSAMPROW [r10+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
||||
movq xmm0, XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE]
|
||||
pinsrq xmm0, XMM_MMWORD [rdi+rax*SIZEOF_JSAMPLE], 1
|
||||
|
||||
mov rsi, JSAMPROW [r10+2*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
||||
mov rdi, JSAMPROW [r10+3*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
||||
mov rsip, JSAMPROW [r10+2*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
||||
mov rdip, JSAMPROW [r10+3*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
||||
movq xmm1, XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE]
|
||||
pinsrq xmm1, XMM_MMWORD [rdi+rax*SIZEOF_JSAMPLE], 1
|
||||
|
||||
mov rsi, JSAMPROW [r10+4*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
||||
mov rdi, JSAMPROW [r10+5*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
||||
mov rsip, JSAMPROW [r10+4*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
||||
mov rdip, JSAMPROW [r10+5*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
||||
movq xmm2, XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE]
|
||||
pinsrq xmm2, XMM_MMWORD [rdi+rax*SIZEOF_JSAMPLE], 1
|
||||
|
||||
mov rsi, JSAMPROW [r10+6*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
||||
mov rdi, JSAMPROW [r10+7*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
||||
mov rsip, JSAMPROW [r10+6*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
||||
mov rdip, JSAMPROW [r10+7*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
||||
movq xmm3, XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE]
|
||||
pinsrq xmm3, XMM_MMWORD [rdi+rax*SIZEOF_JSAMPLE], 1
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
;
|
||||
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||
; Copyright (C) 2009, 2016, D. R. Commander.
|
||||
; Copyright (C) 2018, Matthias Räncker.
|
||||
;
|
||||
; Based on the x86 SIMD extension for IJG JPEG library
|
||||
; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||
@@ -53,14 +54,14 @@ EXTN(jsimd_convsamp_sse2):
|
||||
mov rdi, r12
|
||||
mov rcx, DCTSIZE/4
|
||||
.convloop:
|
||||
mov rbx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
||||
mov rdx, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
||||
mov rbxp, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
||||
mov rdxp, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
||||
|
||||
movq xmm0, XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE] ; xmm0=(01234567)
|
||||
movq xmm1, XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE] ; xmm1=(89ABCDEF)
|
||||
|
||||
mov rbx, JSAMPROW [rsi+2*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
||||
mov rdx, JSAMPROW [rsi+3*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
||||
mov rbxp, JSAMPROW [rsi+2*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
||||
mov rdxp, JSAMPROW [rsi+3*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
||||
|
||||
movq xmm2, XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE] ; xmm2=(GHIJKLMN)
|
||||
movq xmm3, XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE] ; xmm3=(OPQRSTUV)
|
||||
|
||||
Reference in New Issue
Block a user