[Markdown]
Linux hints: compressors test script
small bash script to test performance of various linux compression utilities
general notes:
some compressors including very high, very slow, very resource hungry presets some settings can be tuned via variables at the begining of the script (not all compressors does support this settings) PARAMS is for internal use, do not touch, will be overwritten anyway set MEMORY_LIMITS_BYTES to amount of free ram set THREADS to free cpu cores count
not included compressors:
- lzip - very slow compression <2mb/s on ryzen9, no multithreading
- 7zip - does not work properly with pipes in 7z/lzma mode
notes on options:
- gzip - does support block size option, this option is not used in test because it does not make diffrence in compressed files size at all, but just increasing compression time
- xz - playing with lzma2 advanced options does not offer noticerable compression ratio benefit, but increasing comression time drastically, so this options avoided, only meaningful "advanced" option for xz is block size which is used
- lrzip - do not using "--maxram" option for ram limiting, looks like it handled incorrectly, or i am somehow passing variable incorrectly, also lrzip options handling is a bit buggy, for example if use input file from filesystem and not stdin with option "--outfile -" throwing compressed data to stderr and status information to stdout
small review of compressors
- notes on 7zip most popular compressor for windows (and linux), first of all 7zip have poor and limited linux port without many wanted features such as pipes and acls, but more important, 7zip is loosing in comression ratio and speed, from manual "-t7z -m0=lzma -mx=9 -mfb=64 -md=32m -ms=on" recommended "ultra settings" which compresing testing dataset into "81M" and taking 3:58 minutes of time to do so, xz in the other hand does 80,8MiB during 0:00:48 with -9 and block size 128mb, and 80,1MiB during 0:01:02 with -e -9e and 128block size, 78,9MiB 0:01:24 with -9 and 256 block size, 78,1MiB 0:01:53 with -e -9e, noticerable smaller size twice faster, lrzip in zpaq mode is absolute winner in all important tests (speed, compression ration) here 65,1MiB 0:01:38 for same dataset, options -z -L9
#!/bin/sh
# REQUIREMENTS:
# pv
# zstd
# gzip (pigz)
# bzip2 (pbzip2)
# xz
# lrzip
PARAMS=( )
THREADS=30
MEMORY_LIMIT_BYTES=98719476736
MEMORY_LIMIT_MBYTES=$(( MEMORY_LIMIT_BYTES/1024/1024 ))
MEMORY_LIMIT_GBYTES=$(( MEMORY_LIMIT_BYTES/1024/1024/1024 ))
#MEMORY_LIMIT=64G
FILE="$1"
echo "target file size $(du -hs ${FILE})"
printf "\n\n"
BLOCKSIZE128KB=131072
BLOCKSIZE256KB=262144
BLOCKSIZE512KB=524288
BLOCKSIZE1MB=1048576
BLOCKSIZE2MB=2097152
BLOCKSIZE16MB=16777216
BLOCKSIZE128MB=134217728
BLOCKSIZE256MB=268435456
BLOCKSIZE512MB=536870912
BLOCKSIZE=$BLOCKSIZE128KB
test_zstd()
{
PARAMS=( )
local MEM_LIM=""
if <a href="wiki/sss/Linux(20)hints/(20)(24)MEMORY_LIMIT_MBYTES(20)(3e)(20)4095(20)"> $MEMORY_LIMIT_MBYTES > 4095 </a>; then
MEM_LIM=4095
else
MEM_LIM=$MEMORY_LIMIT_IN_MBYTES
fi
echo "$(date) testing zstd with defaults..."
pv "${FILE}" -abt -N in | zstd --stdout -T$THREADS -M$MEM_LIMM ${PARAMS[@]} | pv -abt -N out > /dev/null
echo "$(date) done"
PARAMS=( "--long" "-10" )
echo "$(date) testing zstd with ( ${PARAMS[@]} ) medium..."
pv "${FILE}" -abt -N in | zstd --stdout -T$THREADS -M$MEM_LIMM ${PARAMS[@]} | pv -abt -N out > /dev/null
echo "$(date) done"
PARAMS=( "--long" "-18" )
echo "$(date) testing zstd with ( ${PARAMS[@]} ) ultra..."
pv "${FILE}" -abt -N in | zstd --stdout -T$THREADS -M$MEM_LIMM ${PARAMS[@]}| pv -abt -N out > /dev/null
echo "$(date) done"
printf "\n\n\n\n"
}
test_gzip()
{
PARAMS=( )
echo "$(date) testing gzip with defaults..."
pv "${FILE}" -abt -N in | gzip --stdout -p$THREADS ${PARAMS[@]} | pv -abt -N out > /dev/null
echo "$(date) done"
PARAMS=( "--fast" )
echo "$(date) testing gzip ( ${PARAMS[@]} ) fast mode..."
pv "${FILE}" -abt -N in | gzip --stdout -p$THREADS ${PARAMS[@]} | pv -abt -N out > /dev/null
echo "$(date) done"
PARAMS=( "--best" )
echo "$(date) testing gzip ( ${PARAMS[@]} ) best mode..."
pv "${FILE}" -abt -N in | gzip --stdout -p$THREADS ${PARAMS[@]} | pv -abt -N out > /dev/null
echo "$(date) done"
printf "\n\n\n\n"
}
test_bz2()
{
PARAMS=( )
local MEM_LIM=""
if <a href="wiki/sss/Linux(20)hints/(20)(24)MEMORY_LIMIT_MBYTES(20)(3e)(20)2000(20)"> $MEMORY_LIMIT_MBYTES > 2000 </a>; then
MEM_LIM=2000
else
MEM_LIM=$MEMORY_LIMIT_IN_MBYTES
fi
echo "$(date) testing bzip2 with defaults"
pv "${FILE}" -abt -N in | bzip2 --stdout -p$THREADS -m$MEM_LIM "${PARAMS[@]}" | pv -abt -N out > /dev/null
echo "$(date) done"
PARAMS=( "--fast" )
echo "$(date) testing bzip2 ( ${PARAMS[@]} ) fast preset"
pv "${FILE}" -abt -N in | bzip2 --stdout -p$THREADS -m$MEM_LIM "${PARAMS[@]}" | pv -abt -N out > /dev/null
echo "$(date) done"
PARAMS=( "--best" )
echo "$(date) testing bzip2 ( ${PARAMS[@]} ) best preset"
pv "${FILE}" -abt -N in | bzip2 --stdout -p$THREADS -m$MEM_LIM "${PARAMS[@]}" | pv -abt -N out > /dev/null
echo "$(date) done"
PARAMS=( "--best" "-b20k" )
echo "$(date) testing bzip2 ( ${PARAMS[@]} ) exterme preset"
pv "${FILE}" -abt -N in | bzip2 --stdout -p$THREADS -m$MEM_LIM "${PARAMS[@]}" | pv -abt -N out > /dev/null
echo "$(date) done"
printf "\n\n\n\n"
}
test_xz()
{
PARAMS=( )
echo "$(date) testing xz with defaults..."
pv "${FILE}" -abt -N in | xz --stdout -T$THREADS ${PARAMS[@]} -M$MEMORY_LIMIT_BYTES --block-size=$BLOCKSIZE | pv -abt -N out > /dev/null
echo "$(date) done"
PARAMS=( "-0" )
echo "$(date) testing xz ( ${PARAMS[@]} ) fast preset"
pv "${FILE}" -abt -N in | xz --stdout -T$THREADS ${PARAMS[@]} -M$MEMORY_LIMIT_BYTES --block-size=$BLOCKSIZE | pv -abt -N out > /dev/null
echo "$(date) done"
PARAMS=( "-9" )
echo "$(date) testing xz ( ${PARAMS[@]} ) best preset"
pv "${FILE}" -abt -N in | xz --stdout -T$THREADS ${PARAMS[@]} -M$MEMORY_LIMIT_BYTES --block-size=$BLOCKSIZE | pv -abt -N out > /dev/null
echo "$(date) done"
PARAMS=( "-e" "-9e" )
echo "$(date) testing xz ( ${PARAMS[@]} ) extreme preset"
pv "${FILE}" -abt -N in | xz --stdout -T$THREADS ${PARAMS[@]} -M$MEMORY_LIMIT_BYTES --block-size=$BLOCKSIZE | pv -abt -N out > /dev/null
echo "$(date) done"
printf "\n\n\n\n"
}
test_lrzip()
{
PARAMS=( )
echo "$(date) testing lrzip with default settings"
pv "${FILE}" -abt -N in | lrzip -q "${PARAMS[@]}" --threads $THREADS | pv -abt -N out > /dev/null
echo "$(date) done"
PARAMS=( "-L1" )
echo "$(date) testing lrzip ( ${PARAMS[@]} )"
pv "${FILE}" -abt -N in | lrzip -q "${PARAMS[@]}" --threads $THREADS | pv -abt -N out > /dev/null
echo "$(date) done"
PARAMS=( "-L9" )
echo "$(date) testing lrzip ( ${PARAMS[@]} )"
pv "${FILE}" -abt -N in | lrzip -q "${PARAMS[@]}" --threads $THREADS | pv -abt -N out > /dev/null
echo "$(date) done"
PARAMS=( "-z" )
echo "$(date) testing lrzip with default settings ( ZPAQ mode )"
pv "${FILE}" -abt -N in | lrzip -q "${PARAMS[@]}" --threads $THREADS | pv -abt -N out > /dev/null
echo "$(date) done"
PARAMS=("-z" "-L1")
echo "$(date) testing lrzip ( ${PARAMS[@]} ) ( ZPAQ mode )"
pv "${FILE}" -abt -N in | lrzip -q "${PARAMS[@]}" --threads $THREADS | pv -abt -N out > /dev/null
echo "$(date) done"
PARAMS=("-z" "-L9")
echo "$(date) testing lrzip ( ${PARAMS[@]} ) ( ZPAQ mode )"
pv "${FILE}" -abt -N in | lrzip -q "${PARAMS[@]}" --threads $THREADS | pv -abt -N out > /dev/null
echo "$(date) done"
printf "\n\n\n\n"
}
test_lrzip_with_zst()
{
PARAMS=( )
local MEM_LIM=""
if <a href="wiki/sss/Linux(20)hints/(20)(24)MEMORY_LIMIT_MBYTES(20)(3e)(20)4095(20)"> $MEMORY_LIMIT_MBYTES > 4095 </a>; then
MEM_LIM=4095
else
MEM_LIM=$MEMORY_LIMIT_IN_MBYTES
fi
echo "$(date) testing lrzip with zstd"
pv "${FILE}" -abt -N in | lrzip -q -n --threads $THREADS | zstd --stdout -T$THREADS -M$MEM_LIMM ${PARAMS[@]} | pv -abt -N out > /dev/null
echo "$(date) done"
PARAMS=( "--long" "-10" )
echo "$(date) testing lrzip with zstd with ( ${PARAMS[@]} ) medium"
pv "${FILE}" -abt -N in | lrzip -q -n --threads $THREADS | zstd --stdout -T$THREADS -M$MEM_LIMM ${PARAMS[@]} | pv -abt -N out > /dev/null
echo "$(date) done"
PARAMS=( "--long" "-18" )
echo "$(date) testing lrzip with zstd with ( ${PARAMS[@]} ) ultra"
pv "${FILE}" -abt -N in | lrzip -q -n --threads $THREADS | zstd --stdout -T$THREADS -M$MEM_LIMM ${PARAMS[@]} | pv -abt -N out > /dev/null
echo "$(date) done"
printf "\n\n\n\n"
}
echo "$(date) testing gzip"
test_gzip
echo "$(date) testing bzip2"
test_bz2
echo "$(date) testing zstd"
test_zstd
BLOCKSIZE=$BLOCKSIZE1MB
echo "$(date) testing xz with 1mb blocksize"
test_xz
BLOCKSIZE=$BLOCKSIZE16MB
echo "$(date) testing xz with 16mb block size"
test_xz
BLOCKSIZE=$BLOCKSIZE128MB
echo "$(date) testing xz with 128mb block size"
test_xz
BLOCKSIZE=$BLOCKSIZE256MB
echo "$(date) testing xz with 256mb block size"
test_xz
echo "$(date) testing lrzip"
test_lrzip
echo "${date} testing lrzip with zst"
test_lrzip_with_zst