0
0
mirror of https://github.com/lz4/lz4 synced 2026-01-18 17:21:30 +01:00

improved lorem ipsum generator speed by a factor > x8

This commit is contained in:
Yann Collet
2024-07-20 14:29:58 -07:00
parent 2a1de07855
commit 8ce9e94cd5
2 changed files with 62 additions and 17 deletions

View File

@@ -48,11 +48,11 @@
#include "lorem.h"
#include <assert.h>
#include <limits.h> /* INT_MAX */
#include <stdlib.h> /* malloc, abort */
#include <string.h> /* memcpy */
#define WORD_MAX_SIZE 20
/* Define the word pool */
/* Define the word pool
* Note: all words must have a len <= 16 */
static const char* kWords[] = {
"lorem", "ipsum", "dolor", "sit", "amet",
"consectetur", "adipiscing", "elit", "sed", "do",
@@ -109,7 +109,9 @@ static const char* kWords[] = {
#define KNBWORDS (sizeof(kWords) / sizeof(kWords[0]))
static const unsigned kNbWords = KNBWORDS;
static unsigned char g_wordLen[KNBWORDS] = {0};
static const char* g_words[KNBWORDS] = { NULL };
static unsigned g_wordLen[KNBWORDS] = {0};
static char* g_wordBuffer = NULL;
/* simple 1-dimension distribution, based on word's length, favors small words
*/
@@ -121,7 +123,7 @@ static int g_distrib[DISTRIB_SIZE_MAX] = { 0 };
static unsigned g_distribCount = 0;
static void countFreqs(
const unsigned char wordLen[],
const unsigned wordLen[],
size_t nbWords,
const int* weights,
unsigned long nbWeights)
@@ -142,8 +144,7 @@ static void countFreqs(
static void init_word_len(
const char* words[],
size_t nbWords
)
size_t nbWords)
{
size_t n;
assert(words != NULL);
@@ -155,8 +156,35 @@ static void init_word_len(
}
static size_t sumLen(const unsigned* sizes, size_t s)
{
size_t total = 0;
size_t n;
assert(sizes != NULL);
for (n=0; n<s; n++) {
total += sizes[n];
}
return total;
}
static void init_word_buffer(void)
{
size_t n;
size_t const bufSize = sumLen(g_wordLen, kNbWords) + 16;
char* ptr;
assert(g_wordBuffer == NULL);
g_wordBuffer = calloc(1, bufSize);
if (g_wordBuffer == NULL) abort();
ptr = g_wordBuffer;
for (n=0; n<kNbWords; n++) {
memcpy(ptr, kWords[n], g_wordLen[n]);
g_words[n] = ptr;
ptr += g_wordLen[n];
}
}
static void init_word_distrib(
const unsigned char wordLen[],
const unsigned wordLen[],
size_t nbWords,
const int* weights,
unsigned long nbWeights)
@@ -211,22 +239,38 @@ static void writeLastCharacters(void)
g_nbChars = g_maxChars;
}
static void generateWord(const char* word, size_t wordLen, const char* separator, size_t sepLen, int upCase)
static void generateLastWord(const char* word, size_t wordLen, int upCase)
{
size_t const len = wordLen + sepLen;
if (g_nbChars + len > g_maxChars) {
if (g_nbChars + wordLen + 2 > g_maxChars) {
writeLastCharacters();
return;
}
assert(wordLen <= 16);
memcpy(g_ptr + g_nbChars, word, wordLen);
if (upCase) {
static const char toUp = 'A' - 'a';
g_ptr[g_nbChars] = (char)(g_ptr[g_nbChars] + toUp);
}
g_nbChars += wordLen;
writeLastCharacters();
}
#define MAX(a,b) ((a)<(b)?(b):(a))
static void generateWord(const char* word, size_t wordLen, const char* separator, size_t sepLen, int upCase)
{
size_t const wlen = MAX(16, wordLen + 2);
if (g_nbChars + wlen > g_maxChars) {
generateLastWord(word, wordLen, upCase);
return;
}
assert(wordLen <= 16);
memcpy(g_ptr + g_nbChars, word, 16);
if (upCase) {
static const char toUp = 'A' - 'a';
g_ptr[g_nbChars] = (char)(g_ptr[g_nbChars] + toUp);
}
g_nbChars += wordLen;
assert(sepLen <= 2);
memcpy(g_ptr + g_nbChars, separator, sepLen);
memcpy(g_ptr + g_nbChars, separator, 2);
g_nbChars += sepLen;
}
@@ -253,7 +297,7 @@ static void generateSentence(int nbWords)
sep = ", ", sepLen=2;
if (i == nbWords - 1)
sep = endSep, sepLen=2;
generateWord(kWords[wordID], g_wordLen[wordID], sep, sepLen, i == 0);
generateWord(g_words[wordID], g_wordLen[wordID], sep, sepLen, i == 0);
}
}
@@ -284,9 +328,9 @@ static void generateFirstSentence(void)
separator = ", ", sepLen=2;
if (i == 7)
separator = ", ", sepLen=2;
generateWord(kWords[i], g_wordLen[i], separator, sepLen, i == 0);
generateWord(g_words[i], g_wordLen[i], separator, sepLen, i == 0);
}
generateWord(kWords[18], g_wordLen[18], ". ", 2, 0);
generateWord(g_words[18], g_wordLen[18], ". ", 2, 0);
}
size_t
@@ -299,6 +343,7 @@ LOREM_genBlock(void* buffer, size_t size, unsigned seed, int first, int fill)
g_randRoot = seed;
if (g_distribCount == 0) {
init_word_len(kWords, kNbWords);
init_word_buffer();
init_word_distrib(g_wordLen, kNbWords, kWeights, kNbWeights);
}

View File

@@ -38,7 +38,7 @@
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#define LOREM_BLOCKSIZE (1 << 10)
#define LOREM_BLOCKSIZE (2 << 10)
void LOREM_genOut(unsigned long long size, unsigned seed)
{
char buff[LOREM_BLOCKSIZE] = {0};