0
0
mirror of https://github.com/facebook/zstd synced 2026-01-18 17:21:18 +01:00

Merge pull request #4509 from arpadpanyik-arm/revert_branch_opt

AArch64: Revert previous branch optimization
This commit is contained in:
Yann Collet
2025-10-18 10:10:55 -07:00
committed by GitHub

View File

@@ -1261,6 +1261,7 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, c
const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state;
const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state;
# endif
(void)longOffsets;
seq.matchLength = mlDInfo->baseValue;
seq.litLength = llDInfo->baseValue;
{ U32 const ofBase = ofDInfo->baseValue;
@@ -1290,18 +1291,7 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, c
ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 > LONG_OFFSETS_MAX_EXTRA_BITS_32);
ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 - LONG_OFFSETS_MAX_EXTRA_BITS_32 >= MaxMLBits);
if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
/* Always read extra bits, this keeps the logic simple,
* avoids branches, and avoids accidentally reading 0 bits.
*/
U32 const extraBits = LONG_OFFSETS_MAX_EXTRA_BITS_32;
offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
BIT_reloadDStream(&seqState->DStream);
offset += BIT_readBitsFast(&seqState->DStream, extraBits);
} else {
offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
}
offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
prevOffset2 = prevOffset1;
prevOffset1 = prevOffset0;
prevOffset0 = offset;
@@ -1332,14 +1322,11 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, c
seq.offset = offset;
}
if (mlBits > 0) {
if (mlBits > 0)
seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
BIT_reloadDStream(&seqState->DStream);
if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
BIT_reloadDStream(&seqState->DStream);
}
if (UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
BIT_reloadDStream(&seqState->DStream);
/* Ensure there are enough bits to read the rest of data in 64-bit mode. */
ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
@@ -1347,9 +1334,6 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, c
if (llBits > 0)
seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
if (MEM_32bits())
BIT_reloadDStream(&seqState->DStream);
DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
@@ -1357,7 +1341,6 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, c
/* Don't update FSE state for last sequence. */
ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits); /* <= 9 bits */
ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits); /* <= 9 bits */
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits); /* <= 8 bits */
BIT_reloadDStream(&seqState->DStream);
}