[Markdown] 

Linux hints: compressors test script

small bash script to test performance of various linux compression utilities

general notes:

some compressors including very high, very slow, very resource hungry presets some settings can be tuned via variables at the begining of the script (not all compressors does support this settings) PARAMS is for internal use, do not touch, will be overwritten anyway set MEMORY_LIMITS_BYTES to amount of free ram set THREADS to free cpu cores count

not included compressors:

  1. lzip - very slow compression <2mb/s on ryzen9, no multithreading
  2. 7zip - does not work properly with pipes in 7z/lzma mode

notes on options:

  1. gzip - does support block size option, this option is not used in test because it does not make diffrence in compressed files size at all, but just increasing compression time
  2. xz - playing with lzma2 advanced options does not offer noticerable compression ratio benefit, but increasing comression time drastically, so this options avoided, only meaningful "advanced" option for xz is block size which is used
  3. lrzip - do not using "--maxram" option for ram limiting, looks like it handled incorrectly, or i am somehow passing variable incorrectly, also lrzip options handling is a bit buggy, for example if use input file from filesystem and not stdin with option "--outfile -" throwing compressed data to stderr and status information to stdout

small review of compressors

  1. notes on 7zip most popular compressor for windows (and linux), first of all 7zip have poor and limited linux port without many wanted features such as pipes and acls, but more important, 7zip is loosing in comression ratio and speed, from manual "-t7z -m0=lzma -mx=9 -mfb=64 -md=32m -ms=on" recommended "ultra settings" which compresing testing dataset into "81M" and taking 3:58 minutes of time to do so, xz in the other hand does 80,8MiB during 0:00:48 with -9 and block size 128mb, and 80,1MiB during 0:01:02 with -e -9e and 128block size, 78,9MiB 0:01:24 with -9 and 256 block size, 78,1MiB 0:01:53 with -e -9e, noticerable smaller size twice faster, lrzip in zpaq mode is absolute winner in all important tests (speed, compression ration) here 65,1MiB 0:01:38 for same dataset, options -z -L9
#!/bin/sh

# REQUIREMENTS:
# pv
# zstd
# gzip (pigz)
# bzip2 (pbzip2)
# xz
# lrzip

PARAMS=( )
THREADS=30
MEMORY_LIMIT_BYTES=98719476736
MEMORY_LIMIT_MBYTES=$(( MEMORY_LIMIT_BYTES/1024/1024 ))
MEMORY_LIMIT_GBYTES=$(( MEMORY_LIMIT_BYTES/1024/1024/1024 ))
#MEMORY_LIMIT=64G
FILE="$1"

echo "target file size $(du -hs ${FILE})"
printf "\n\n"

BLOCKSIZE128KB=131072
BLOCKSIZE256KB=262144
BLOCKSIZE512KB=524288
BLOCKSIZE1MB=1048576
BLOCKSIZE2MB=2097152
BLOCKSIZE16MB=16777216
BLOCKSIZE128MB=134217728
BLOCKSIZE256MB=268435456
BLOCKSIZE512MB=536870912

BLOCKSIZE=$BLOCKSIZE128KB

test_zstd()
{
    PARAMS=( )
    local MEM_LIM=""
    if <a href="wiki/sss/Linux(20)hints/(20)(24)MEMORY_LIMIT_MBYTES(20)(3e)(20)4095(20)"> $MEMORY_LIMIT_MBYTES > 4095 </a>; then
        MEM_LIM=4095
    else
        MEM_LIM=$MEMORY_LIMIT_IN_MBYTES
    fi
    echo "$(date) testing zstd with defaults..."
    pv "${FILE}" -abt -N in | zstd --stdout -T$THREADS -M$MEM_LIMM ${PARAMS[@]} | pv -abt -N out > /dev/null
    echo "$(date) done"
    PARAMS=( "--long" "-10" )
    echo "$(date) testing zstd with ( ${PARAMS[@]} ) medium..."
    pv "${FILE}" -abt -N in | zstd --stdout -T$THREADS -M$MEM_LIMM ${PARAMS[@]} | pv -abt -N out > /dev/null
    echo "$(date) done"
    PARAMS=( "--long" "-18" )
    echo "$(date) testing zstd with ( ${PARAMS[@]} ) ultra..."
    pv "${FILE}" -abt -N in | zstd --stdout -T$THREADS  -M$MEM_LIMM ${PARAMS[@]}| pv -abt -N out > /dev/null
    echo "$(date) done"
    printf "\n\n\n\n"
}

test_gzip()
{
    PARAMS=( )
    echo "$(date) testing gzip with defaults..."
    pv "${FILE}" -abt -N in | gzip --stdout -p$THREADS ${PARAMS[@]} | pv -abt -N out > /dev/null
    echo "$(date) done"
    PARAMS=( "--fast" )
    echo "$(date) testing gzip ( ${PARAMS[@]} ) fast mode..."
    pv "${FILE}" -abt -N in | gzip --stdout -p$THREADS ${PARAMS[@]} | pv -abt -N out > /dev/null
    echo "$(date) done"
    PARAMS=( "--best" )
    echo "$(date) testing gzip ( ${PARAMS[@]} ) best mode..."
    pv "${FILE}" -abt -N in | gzip --stdout -p$THREADS ${PARAMS[@]} | pv -abt -N out > /dev/null
    echo "$(date) done"
    printf "\n\n\n\n"
}

test_bz2()
{
    PARAMS=( )
    local MEM_LIM=""
    if <a href="wiki/sss/Linux(20)hints/(20)(24)MEMORY_LIMIT_MBYTES(20)(3e)(20)2000(20)"> $MEMORY_LIMIT_MBYTES > 2000 </a>; then
        MEM_LIM=2000
    else
        MEM_LIM=$MEMORY_LIMIT_IN_MBYTES
    fi
    echo "$(date) testing bzip2 with defaults"
    pv "${FILE}" -abt -N in | bzip2 --stdout -p$THREADS -m$MEM_LIM "${PARAMS[@]}" | pv -abt -N out > /dev/null
    echo "$(date) done"
    PARAMS=( "--fast" )
    echo "$(date) testing bzip2 ( ${PARAMS[@]} ) fast preset"
    pv "${FILE}" -abt -N in | bzip2 --stdout -p$THREADS -m$MEM_LIM "${PARAMS[@]}" | pv -abt -N out > /dev/null
    echo "$(date) done"
    PARAMS=( "--best" )
    echo "$(date) testing bzip2 ( ${PARAMS[@]} ) best preset"
    pv "${FILE}" -abt -N in | bzip2 --stdout -p$THREADS -m$MEM_LIM "${PARAMS[@]}" | pv -abt -N out > /dev/null
    echo "$(date) done"
    PARAMS=( "--best" "-b20k" )
    echo "$(date) testing bzip2 ( ${PARAMS[@]} ) exterme preset"
    pv "${FILE}" -abt -N in | bzip2 --stdout -p$THREADS -m$MEM_LIM "${PARAMS[@]}" | pv -abt -N out > /dev/null
    echo "$(date) done"
    printf "\n\n\n\n"
}

test_xz()
{
    PARAMS=( )
    echo "$(date) testing xz with defaults..."
    pv "${FILE}" -abt -N in | xz --stdout -T$THREADS ${PARAMS[@]} -M$MEMORY_LIMIT_BYTES --block-size=$BLOCKSIZE | pv -abt -N out > /dev/null
    echo "$(date) done"
    PARAMS=( "-0" )
    echo "$(date) testing xz ( ${PARAMS[@]} ) fast preset"
    pv "${FILE}" -abt -N in | xz --stdout -T$THREADS ${PARAMS[@]} -M$MEMORY_LIMIT_BYTES --block-size=$BLOCKSIZE | pv -abt -N out > /dev/null
    echo "$(date) done"
    PARAMS=( "-9" )
    echo "$(date) testing xz ( ${PARAMS[@]} ) best preset"
    pv "${FILE}" -abt -N in | xz --stdout -T$THREADS ${PARAMS[@]} -M$MEMORY_LIMIT_BYTES --block-size=$BLOCKSIZE | pv -abt -N out > /dev/null
    echo "$(date) done"
    PARAMS=( "-e" "-9e" )
    echo "$(date) testing xz ( ${PARAMS[@]} ) extreme preset"
    pv "${FILE}" -abt -N in | xz --stdout -T$THREADS ${PARAMS[@]} -M$MEMORY_LIMIT_BYTES --block-size=$BLOCKSIZE | pv -abt -N out > /dev/null
    echo "$(date) done"
    printf "\n\n\n\n"
}

test_lrzip()
{
    PARAMS=( )
    echo "$(date) testing lrzip with default settings"
    pv "${FILE}" -abt -N in | lrzip -q "${PARAMS[@]}" --threads $THREADS | pv -abt -N out > /dev/null
    echo "$(date) done"
    PARAMS=( "-L1" )
    echo "$(date) testing lrzip ( ${PARAMS[@]} )"
    pv "${FILE}" -abt -N in | lrzip -q "${PARAMS[@]}" --threads $THREADS | pv -abt -N out > /dev/null
    echo "$(date) done"
    PARAMS=( "-L9" )
    echo "$(date) testing lrzip ( ${PARAMS[@]} )"
    pv "${FILE}" -abt -N in | lrzip -q "${PARAMS[@]}" --threads $THREADS | pv -abt -N out > /dev/null
    echo "$(date) done"
    PARAMS=( "-z" )
    echo "$(date) testing lrzip with default settings ( ZPAQ mode )"
    pv "${FILE}" -abt -N in | lrzip -q "${PARAMS[@]}" --threads $THREADS | pv -abt -N out > /dev/null
    echo "$(date) done"
    PARAMS=("-z" "-L1")
    echo "$(date) testing lrzip ( ${PARAMS[@]} ) ( ZPAQ mode )"
    pv "${FILE}" -abt -N in | lrzip -q "${PARAMS[@]}" --threads $THREADS | pv -abt -N out > /dev/null
    echo "$(date) done"
    PARAMS=("-z" "-L9")
    echo "$(date) testing lrzip ( ${PARAMS[@]} ) ( ZPAQ mode )"
    pv "${FILE}" -abt -N in | lrzip -q "${PARAMS[@]}" --threads $THREADS | pv -abt -N out > /dev/null
    echo "$(date) done"
    printf "\n\n\n\n"
}

test_lrzip_with_zst()
{
    PARAMS=( )
    local MEM_LIM=""
    if <a href="wiki/sss/Linux(20)hints/(20)(24)MEMORY_LIMIT_MBYTES(20)(3e)(20)4095(20)"> $MEMORY_LIMIT_MBYTES > 4095 </a>; then
        MEM_LIM=4095
    else
        MEM_LIM=$MEMORY_LIMIT_IN_MBYTES
    fi
    echo "$(date) testing lrzip with zstd"
    pv "${FILE}" -abt -N in | lrzip -q -n --threads $THREADS | zstd --stdout -T$THREADS -M$MEM_LIMM ${PARAMS[@]} | pv -abt -N out > /dev/null
    echo "$(date) done"
    PARAMS=( "--long" "-10" )
    echo "$(date) testing lrzip with zstd with ( ${PARAMS[@]} ) medium"
    pv "${FILE}" -abt -N in | lrzip -q -n --threads $THREADS | zstd --stdout -T$THREADS -M$MEM_LIMM ${PARAMS[@]} | pv -abt -N out > /dev/null
    echo "$(date) done"
    PARAMS=( "--long" "-18" )
    echo "$(date) testing lrzip with zstd with ( ${PARAMS[@]} ) ultra"
    pv "${FILE}" -abt -N in | lrzip -q -n --threads $THREADS | zstd --stdout -T$THREADS -M$MEM_LIMM ${PARAMS[@]} | pv -abt -N out > /dev/null
    echo "$(date) done"
    printf "\n\n\n\n"
}


echo "$(date) testing gzip"
test_gzip
echo "$(date) testing bzip2"
test_bz2
echo "$(date) testing zstd"
test_zstd
BLOCKSIZE=$BLOCKSIZE1MB
echo "$(date) testing xz with 1mb blocksize"
test_xz
BLOCKSIZE=$BLOCKSIZE16MB
echo "$(date) testing xz with 16mb block size"
test_xz
BLOCKSIZE=$BLOCKSIZE128MB
echo "$(date) testing xz with 128mb block size"
test_xz
BLOCKSIZE=$BLOCKSIZE256MB
echo "$(date) testing xz with 256mb block size"
test_xz
echo "$(date) testing lrzip"
test_lrzip
echo "${date} testing lrzip with zst"                                                                                    
test_lrzip_with_zst