aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorConrad Meyer <cem@FreeBSD.org>2020-05-23 20:37:33 +0000
committerConrad Meyer <cem@FreeBSD.org>2020-05-23 20:37:33 +0000
commitbc64b5ce191d48b503e4fad8c0cefb774a2fa969 (patch)
tree9b41925d7159f1f57c1b59a1a5f887c80a57e999
parentea68403922c3b53b00fc999fcb3eaef1feb50177 (diff)
downloadsrc-vendor/zstd.tar.gz
src-vendor/zstd.zip
Notes
Notes: svn path=/vendor/zstd/dist/; revision=361423 svn path=/vendor/zstd/1.4.5/; revision=361424; tag=vendor/zstd/1.4.5
-rw-r--r--CHANGELOG29
-rw-r--r--CONTRIBUTING.md352
-rw-r--r--Makefile40
-rw-r--r--README.md40
-rw-r--r--TESTING.md4
-rw-r--r--appveyor.yml27
-rwxr-xr-xcontrib/cleanTabs2
-rw-r--r--contrib/docker/Dockerfile20
-rw-r--r--contrib/docker/README.md20
-rw-r--r--contrib/experimental_dict_builders/benchmarkDictBuilder/Makefile44
-rw-r--r--contrib/experimental_dict_builders/benchmarkDictBuilder/README.md849
-rw-r--r--contrib/experimental_dict_builders/benchmarkDictBuilder/benchmark.c442
-rw-r--r--contrib/experimental_dict_builders/benchmarkDictBuilder/dictBuilder.h6
-rwxr-xr-xcontrib/experimental_dict_builders/benchmarkDictBuilder/test.sh2
-rw-r--r--contrib/experimental_dict_builders/fastCover/Makefile54
-rw-r--r--contrib/experimental_dict_builders/fastCover/README.md24
-rw-r--r--contrib/experimental_dict_builders/fastCover/fastCover.c809
-rw-r--r--contrib/experimental_dict_builders/fastCover/fastCover.h57
-rw-r--r--contrib/experimental_dict_builders/fastCover/main.c183
-rwxr-xr-xcontrib/experimental_dict_builders/fastCover/test.sh15
-rw-r--r--contrib/experimental_dict_builders/randomDictBuilder/Makefile52
-rw-r--r--contrib/experimental_dict_builders/randomDictBuilder/README.md20
-rw-r--r--contrib/experimental_dict_builders/randomDictBuilder/io.c284
-rw-r--r--contrib/experimental_dict_builders/randomDictBuilder/io.h60
-rw-r--r--contrib/experimental_dict_builders/randomDictBuilder/main.c161
-rw-r--r--contrib/experimental_dict_builders/randomDictBuilder/random.c163
-rw-r--r--contrib/experimental_dict_builders/randomDictBuilder/random.h29
-rwxr-xr-xcontrib/experimental_dict_builders/randomDictBuilder/test.sh14
-rw-r--r--contrib/gen_html/Makefile51
-rw-r--r--contrib/gen_html/README.md31
-rwxr-xr-xcontrib/gen_html/gen-zstd-manual.sh9
-rw-r--r--contrib/gen_html/gen_html.cpp224
-rw-r--r--contrib/largeNbDicts/Makefile58
-rw-r--r--contrib/largeNbDicts/README.md25
-rw-r--r--contrib/largeNbDicts/largeNbDicts.c817
-rw-r--r--contrib/premake/premake4.lua6
-rw-r--r--contrib/premake/zstd.lua80
-rw-r--r--contrib/pzstd/BUCK72
-rw-r--r--contrib/pzstd/ErrorHolder.h54
-rw-r--r--contrib/pzstd/Logging.h72
-rw-r--r--contrib/pzstd/Makefile271
-rw-r--r--contrib/pzstd/Options.cpp428
-rw-r--r--contrib/pzstd/Options.h68
-rw-r--r--contrib/pzstd/Pzstd.cpp611
-rw-r--r--contrib/pzstd/Pzstd.h150
-rw-r--r--contrib/pzstd/README.md56
-rw-r--r--contrib/pzstd/SkippableFrame.cpp30
-rw-r--r--contrib/pzstd/SkippableFrame.h64
-rw-r--r--contrib/pzstd/images/Cspeed.pngbin69804 -> 0 bytes
-rw-r--r--contrib/pzstd/images/Dspeed.pngbin26335 -> 0 bytes
-rw-r--r--contrib/pzstd/main.cpp27
-rw-r--r--contrib/pzstd/test/BUCK37
-rw-r--r--contrib/pzstd/test/OptionsTest.cpp536
-rw-r--r--contrib/pzstd/test/PzstdTest.cpp149
-rw-r--r--contrib/pzstd/test/RoundTrip.h86
-rw-r--r--contrib/pzstd/test/RoundTripTest.cpp86
-rw-r--r--contrib/pzstd/utils/BUCK75
-rw-r--r--contrib/pzstd/utils/Buffer.h99
-rw-r--r--contrib/pzstd/utils/FileSystem.h94
-rw-r--r--contrib/pzstd/utils/Likely.h28
-rw-r--r--contrib/pzstd/utils/Range.h131
-rw-r--r--contrib/pzstd/utils/ResourcePool.h96
-rw-r--r--contrib/pzstd/utils/ScopeGuard.h50
-rw-r--r--contrib/pzstd/utils/ThreadPool.h58
-rw-r--r--contrib/pzstd/utils/WorkQueue.h181
-rw-r--r--contrib/pzstd/utils/test/BUCK35
-rw-r--r--contrib/pzstd/utils/test/BufferTest.cpp89
-rw-r--r--contrib/pzstd/utils/test/RangeTest.cpp82
-rw-r--r--contrib/pzstd/utils/test/ResourcePoolTest.cpp72
-rw-r--r--contrib/pzstd/utils/test/ScopeGuardTest.cpp28
-rw-r--r--contrib/pzstd/utils/test/ThreadPoolTest.cpp71
-rw-r--r--contrib/pzstd/utils/test/WorkQueueTest.cpp282
-rw-r--r--contrib/seekable_format/examples/Makefile53
-rw-r--r--contrib/seekable_format/examples/parallel_compression.c215
-rw-r--r--contrib/seekable_format/examples/parallel_processing.c194
-rw-r--r--contrib/seekable_format/examples/seekable_compression.c133
-rw-r--r--contrib/seekable_format/examples/seekable_decompression.c138
-rw-r--r--contrib/seekable_format/examples/seekable_decompression_mem.c144
-rw-r--r--contrib/seekable_format/zstd_seekable.h186
-rw-r--r--contrib/seekable_format/zstd_seekable_compression_format.md116
-rw-r--r--contrib/seekable_format/zstdseek_compress.c369
-rw-r--r--contrib/seekable_format/zstdseek_decompress.c467
-rw-r--r--contrib/snap/snapcraft.yaml28
-rw-r--r--doc/educational_decoder/Makefile6
-rw-r--r--doc/educational_decoder/README.md7
-rw-r--r--doc/educational_decoder/harness.c99
-rw-r--r--doc/educational_decoder/zstd_decompress.c255
-rw-r--r--doc/educational_decoder/zstd_decompress.h3
-rw-r--r--doc/zstd_compression_format.md34
-rw-r--r--doc/zstd_manual.html54
-rw-r--r--examples/Makefile3
-rw-r--r--examples/common.h2
-rw-r--r--examples/dictionary_compression.c2
-rw-r--r--examples/dictionary_decompression.c2
-rw-r--r--examples/multiple_simple_compression.c2
-rw-r--r--examples/multiple_streaming_compression.c2
-rw-r--r--examples/simple_compression.c2
-rw-r--r--examples/simple_decompression.c2
-rw-r--r--examples/streaming_compression.c2
-rw-r--r--examples/streaming_decompression.c2
-rw-r--r--examples/streaming_memory_usage.c2
-rw-r--r--lib/Makefile181
-rw-r--r--lib/README.md48
-rw-r--r--lib/common/bitstream.h68
-rw-r--r--lib/common/compiler.h22
-rw-r--r--lib/common/cpu.h2
-rw-r--r--lib/common/debug.c42
-rw-r--r--lib/common/debug.h42
-rw-r--r--lib/common/entropy_common.c46
-rw-r--r--lib/common/error_private.c3
-rw-r--r--lib/common/error_private.h8
-rw-r--r--lib/common/fse.h42
-rw-r--r--lib/common/fse_decompress.c49
-rw-r--r--lib/common/huf.h48
-rw-r--r--lib/common/mem.h2
-rw-r--r--lib/common/pool.c2
-rw-r--r--lib/common/pool.h4
-rw-r--r--lib/common/threading.c7
-rw-r--r--lib/common/threading.h7
-rw-r--r--lib/common/xxhash.c48
-rw-r--r--lib/common/xxhash.h42
-rw-r--r--lib/common/zstd_common.c2
-rw-r--r--lib/common/zstd_errors.h3
-rw-r--r--lib/common/zstd_internal.h127
-rw-r--r--lib/compress/fse_compress.c57
-rw-r--r--lib/compress/hist.c50
-rw-r--r--lib/compress/hist.h44
-rw-r--r--lib/compress/huf_compress.c184
-rw-r--r--lib/compress/zstd_compress.c725
-rw-r--r--lib/compress/zstd_compress_internal.h150
-rw-r--r--lib/compress/zstd_compress_literals.c16
-rw-r--r--lib/compress/zstd_compress_literals.h2
-rw-r--r--lib/compress/zstd_compress_sequences.c44
-rw-r--r--lib/compress/zstd_compress_sequences.h13
-rw-r--r--lib/compress/zstd_compress_superblock.c845
-rw-r--r--lib/compress/zstd_compress_superblock.h32
-rw-r--r--lib/compress/zstd_cwksp.h16
-rw-r--r--lib/compress/zstd_double_fast.c19
-rw-r--r--lib/compress/zstd_double_fast.h4
-rw-r--r--lib/compress/zstd_fast.c60
-rw-r--r--lib/compress/zstd_fast.h4
-rw-r--r--lib/compress/zstd_lazy.c45
-rw-r--r--lib/compress/zstd_lazy.h2
-rw-r--r--lib/compress/zstd_ldm.c32
-rw-r--r--lib/compress/zstd_ldm.h9
-rw-r--r--lib/compress/zstd_opt.c122
-rw-r--r--lib/compress/zstd_opt.h2
-rw-r--r--lib/compress/zstdmt_compress.c69
-rw-r--r--lib/compress/zstdmt_compress.h4
-rw-r--r--lib/decompress/huf_decompress.c138
-rw-r--r--lib/decompress/zstd_ddict.c20
-rw-r--r--lib/decompress/zstd_ddict.h4
-rw-r--r--lib/decompress/zstd_decompress.c412
-rw-r--r--lib/decompress/zstd_decompress_block.c515
-rw-r--r--lib/decompress/zstd_decompress_block.h6
-rw-r--r--lib/decompress/zstd_decompress_internal.h22
-rw-r--r--lib/deprecated/zbuff.h6
-rw-r--r--lib/deprecated/zbuff_common.c4
-rw-r--r--lib/deprecated/zbuff_compress.c2
-rw-r--r--lib/deprecated/zbuff_decompress.c2
-rw-r--r--lib/dictBuilder/cover.c10
-rw-r--r--lib/dictBuilder/cover.h18
-rw-r--r--lib/dictBuilder/fastcover.c18
-rw-r--r--lib/dictBuilder/zdict.c42
-rw-r--r--lib/dictBuilder/zdict.h79
-rw-r--r--lib/legacy/zstd_legacy.h8
-rw-r--r--lib/legacy/zstd_v01.c30
-rw-r--r--lib/legacy/zstd_v01.h2
-rw-r--r--lib/legacy/zstd_v02.c16
-rw-r--r--lib/legacy/zstd_v02.h2
-rw-r--r--lib/legacy/zstd_v03.c16
-rw-r--r--lib/legacy/zstd_v03.h2
-rw-r--r--lib/legacy/zstd_v04.c20
-rw-r--r--lib/legacy/zstd_v04.h2
-rw-r--r--lib/legacy/zstd_v05.c30
-rw-r--r--lib/legacy/zstd_v05.h4
-rw-r--r--lib/legacy/zstd_v06.c30
-rw-r--r--lib/legacy/zstd_v06.h2
-rw-r--r--lib/legacy/zstd_v07.c36
-rw-r--r--lib/legacy/zstd_v07.h2
-rw-r--r--lib/libzstd.pc.in4
-rw-r--r--lib/zstd.h83
-rw-r--r--programs/Makefile93
-rw-r--r--programs/README.md78
-rw-r--r--programs/benchfn.c2
-rw-r--r--programs/benchfn.h2
-rw-r--r--programs/benchzstd.c13
-rw-r--r--programs/benchzstd.h4
-rw-r--r--programs/datagen.c4
-rw-r--r--programs/datagen.h2
-rw-r--r--programs/dibio.c6
-rw-r--r--programs/dibio.h4
-rw-r--r--programs/fileio.c397
-rw-r--r--programs/fileio.h6
-rw-r--r--programs/platform.h19
-rw-r--r--programs/timefn.c7
-rw-r--r--programs/timefn.h8
-rw-r--r--programs/util.c472
-rw-r--r--programs/util.h179
-rw-r--r--programs/zstd.1184
-rw-r--r--programs/zstd.1.md58
-rw-r--r--programs/zstdcli.c1055
-rwxr-xr-xprograms/zstdgrep6
-rw-r--r--programs/zstdgrep.12
-rw-r--r--programs/zstdless.12
-rw-r--r--tests/Makefile484
-rw-r--r--tests/README.md143
-rw-r--r--tests/bigdict.c128
-rw-r--r--tests/checkTag.c65
-rw-r--r--tests/datagencli.c130
-rw-r--r--tests/decodecorpus.c1932
-rw-r--r--tests/fullbench.c843
-rw-r--r--tests/fuzz/Makefile147
-rw-r--r--tests/fuzz/README.md101
-rw-r--r--tests/fuzz/block_decompress.c49
-rw-r--r--tests/fuzz/block_round_trip.c97
-rw-r--r--tests/fuzz/dictionary_decompress.c68
-rw-r--r--tests/fuzz/dictionary_loader.c93
-rw-r--r--tests/fuzz/dictionary_round_trip.c109
-rw-r--r--tests/fuzz/fuzz.h52
-rwxr-xr-xtests/fuzz/fuzz.py884
-rw-r--r--tests/fuzz/fuzz_data_producer.c85
-rw-r--r--tests/fuzz/fuzz_data_producer.h60
-rw-r--r--tests/fuzz/fuzz_helpers.h62
-rw-r--r--tests/fuzz/regression_driver.c77
-rw-r--r--tests/fuzz/simple_compress.c52
-rw-r--r--tests/fuzz/simple_decompress.c49
-rw-r--r--tests/fuzz/simple_round_trip.c93
-rw-r--r--tests/fuzz/stream_decompress.c89
-rw-r--r--tests/fuzz/stream_round_trip.c170
-rw-r--r--tests/fuzz/zstd_frame_info.c39
-rw-r--r--tests/fuzz/zstd_helpers.c143
-rw-r--r--tests/fuzz/zstd_helpers.h51
-rw-r--r--tests/fuzzer.c2839
-rw-r--r--tests/golden-compression/huffman-compressed-largerbin143 -> 0 bytes
-rw-r--r--tests/golden-decompression/rle-first-block.zstbin45 -> 0 bytes
-rw-r--r--tests/gzip/Makefile44
-rwxr-xr-xtests/gzip/gzip-env.sh46
-rwxr-xr-xtests/gzip/helin-segv.sh31
-rwxr-xr-xtests/gzip/help-version.sh270
-rw-r--r--tests/gzip/hufts-segv.gzbin425 -> 0 bytes
-rwxr-xr-xtests/gzip/hufts.sh34
-rw-r--r--tests/gzip/init.cfg5
-rwxr-xr-xtests/gzip/init.sh616
-rwxr-xr-xtests/gzip/keep.sh51
-rwxr-xr-xtests/gzip/list.sh31
-rwxr-xr-xtests/gzip/memcpy-abuse.sh34
-rwxr-xr-xtests/gzip/mixed.sh68
-rwxr-xr-xtests/gzip/null-suffix-clobber.sh35
-rwxr-xr-xtests/gzip/stdin.sh31
-rwxr-xr-xtests/gzip/test-driver.sh150
-rwxr-xr-xtests/gzip/trailing-nul.sh37
-rwxr-xr-xtests/gzip/unpack-invalid.sh36
-rwxr-xr-xtests/gzip/z-suffix.sh30
-rwxr-xr-xtests/gzip/zdiff.sh48
-rwxr-xr-xtests/gzip/zgrep-context.sh47
-rwxr-xr-xtests/gzip/zgrep-f.sh43
-rwxr-xr-xtests/gzip/zgrep-signal.sh64
-rwxr-xr-xtests/gzip/znew-k.sh40
-rw-r--r--tests/invalidDictionaries.c61
-rw-r--r--tests/legacy.c260
-rwxr-xr-xtests/libzstd_partial_builds.sh89
-rw-r--r--tests/longmatch.c101
-rw-r--r--tests/paramgrill.c2966
-rwxr-xr-xtests/playTests.sh1187
-rw-r--r--tests/poolTests.c271
-rwxr-xr-xtests/rateLimiter.py40
-rw-r--r--tests/regression/Makefile58
-rw-r--r--tests/regression/config.c278
-rw-r--r--tests/regression/config.h86
-rw-r--r--tests/regression/data.c617
-rw-r--r--tests/regression/data.h140
-rw-r--r--tests/regression/levels.h44
-rw-r--r--tests/regression/method.c688
-rw-r--r--tests/regression/method.h65
-rw-r--r--tests/regression/result.c28
-rw-r--r--tests/regression/result.h103
-rw-r--r--tests/regression/results.csv636
-rw-r--r--tests/regression/test.c362
-rw-r--r--tests/roundTripCrash.c241
-rw-r--r--tests/seqgen.c260
-rw-r--r--tests/seqgen.h58
-rw-r--r--tests/symbols.c164
-rwxr-xr-xtests/test-zstd-speed.py376
-rwxr-xr-xtests/test-zstd-versions.py276
-rw-r--r--tests/zbufftest.c619
-rw-r--r--tests/zstreamtest.c2477
-rw-r--r--zlibWrapper/Makefile4
-rw-r--r--zlibWrapper/examples/zwrapbench.c42
-rw-r--r--zlibWrapper/gzcompatibility.h2
-rw-r--r--zlibWrapper/zstd_zlibwrapper.c2
-rw-r--r--zlibWrapper/zstd_zlibwrapper.h2
292 files changed, 5668 insertions, 37340 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 3b882d4cda51..0ed939a5bbb1 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,29 @@
+v1.4.5
+fix : Compression ratio regression on huge files (> 3 GB) using high levels (--ultra) and multithreading, by @terrelln
+perf: Improved decompression speed: x64 : +10% (clang) / +5% (gcc); ARM : from +15% to +50%, depending on SoC, by @terrelln
+perf: Automatically downsizes ZSTD_DCtx when too large for too long (#2069, by @bimbashreshta)
+perf: Improved fast compression speed on aarch64 (#2040, ~+3%, by @caoyzh)
+perf: Small level 1 compression speed gains (depending on compiler)
+cli : New --patch-from command, create and apply patches from files, by @bimbashreshta
+cli : New --filelist= : Provide a list of files to operate upon from a file
+cli : -b -d command can now benchmark decompression on multiple files
+cli : New --no-content-size command
+cli : New --show-default-cparams information command
+api : ZDICT_finalizeDictionary() is promoted to stable (#2111)
+api : new experimental parameter ZSTD_d_stableOutBuffer (#2094)
+build: Generate a single-file libzstd library (#2065, by @cwoffenden)
+build: Relative includes no longer require -I compiler flags for zstd lib subdirs (#2103, by @felixhandte)
+build: zstd now compiles cleanly under -pedantic (#2099)
+build: zstd now compiles with make-4.3
+build: Support mingw cross-compilation from Linux, by @Ericson2314
+build: Meson multi-thread build fix on windows
+build: Some misc icc fixes backed by new ci test on travis
+misc: bitflip analyzer tool, by @felixhandte
+misc: Extend largeNbDicts benchmark to compression
+misc: Edit-distance match finder in contrib/
+doc : Improved beginner CONTRIBUTING.md docs
+doc : New issue templates for zstd
+
v1.4.4
perf: Improved decompression speed, by > 10%, by @terrelln
perf: Better compression speed when re-using a context, by @felixhandte
@@ -14,7 +40,8 @@ cli: commands --stream-size=# and --size-hint=#, by @nmagerko
cli: command --exclude-compressed, by @shashank0791
cli: faster `-t` test mode
cli: improved some error messages, by @vangyzen
-cli: rare deadlock condition within dictionary builder, by @terrelln
+cli: fix command `-D dictionary` on Windows, reported by @artyompetrov
+cli: fix rare deadlock condition within dictionary builder, by @terrelln
build: single-file decoder with emscripten compilation script, by @cwoffenden
build: fixed zlibWrapper compilation on Visual Studio, reported by @bluenlive
build: fixed deprecation warning for certain gcc version, reported by @jasonma163
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index dd013f8084fa..637e37188550 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -26,6 +26,356 @@ to do this once to work on any of Facebook's open source projects.
Complete your CLA here: <https://code.facebook.com/cla>
+## Workflow
+Zstd uses a branch-based workflow for making changes to the codebase. Typically, zstd
+will use a new branch per sizable topic. For smaller changes, it is okay to lump multiple
+related changes into a branch.
+
+Our contribution process works in three main stages:
+1. Local development
+ * Update:
+ * Checkout your fork of zstd if you have not already
+ ```
+ git checkout https://github.com/<username>/zstd
+ cd zstd
+ ```
+ * Update your local dev branch
+ ```
+ git pull https://github.com/facebook/zstd dev
+ git push origin dev
+ ```
+ * Topic and development:
+ * Make a new branch on your fork about the topic you're developing for
+ ```
+ # branch names should be consise but sufficiently informative
+ git checkout -b <branch-name>
+ git push origin <branch-name>
+ ```
+ * Make commits and push
+ ```
+ # make some changes =
+ git add -u && git commit -m <message>
+ git push origin <branch-name>
+ ```
+ * Note: run local tests to ensure that your changes didn't break existing functionality
+ * Quick check
+ ```
+ make shortest
+ ```
+ * Longer check
+ ```
+ make test
+ ```
+2. Code Review and CI tests
+ * Ensure CI tests pass:
+ * Before sharing anything to the community, make sure that all CI tests pass on your local fork.
+ See our section on setting up your CI environment for more information on how to do this.
+ * Ensure that static analysis passes on your development machine. See the Static Analysis section
+ below to see how to do this.
+ * Create a pull request:
+ * When you are ready to share you changes to the community, create a pull request from your branch
+ to facebook:dev. You can do this very easily by clicking 'Create Pull Request' on your fork's home
+ page.
+ * From there, select the branch where you made changes as your source branch and facebook:dev
+ as the destination.
+ * Examine the diff presented between the two branches to make sure there is nothing unexpected.
+ * Write a good pull request description:
+ * While there is no strict template that our contributors follow, we would like them to
+ sufficiently summarize and motivate the changes they are proposing. We recommend all pull requests,
+ at least indirectly, address the following points.
+ * Is this pull request important and why?
+ * Is it addressing an issue? If so, what issue? (provide links for convenience please)
+ * Is this a new feature? If so, why is it useful and/or necessary?
+ * Are there background references and documents that reviewers should be aware of to properly assess this change?
+ * Note: make sure to point out any design and architectural decisions that you made and the rationale behind them.
+ * Note: if you have been working with a specific user and would like them to review your work, make sure you mention them using (@<username>)
+ * Submit the pull request and iterate with feedback.
+3. Merge and Release
+ * Getting approval:
+ * You will have to iterate on your changes with feedback from other collaborators to reach a point
+ where your pull request can be safely merged.
+ * To avoid too many comments on style and convention, make sure that you have a
+ look at our style section below before creating a pull request.
+ * Eventually, someone from the zstd team will approve your pull request and not long after merge it into
+ the dev branch.
+ * Housekeeping:
+ * Most PRs are linked with one or more Github issues. If this is the case for your PR, make sure
+ the corresponding issue is mentioned. If your change 'fixes' or completely addresses the
+ issue at hand, then please indicate this by requesting that an issue be closed by commenting.
+ * Just because your changes have been merged does not mean the topic or larger issue is complete. Remember
+ that the change must make it to an official zstd release for it to be meaningful. We recommend
+ that contributers track the activity on their pull request and corresponding issue(s) page(s) until
+ their change makes it to the next release of zstd. Users will often discover bugs in your code or
+ suggest ways to refine and improve your initial changes even after the pull request is merged.
+
+## Static Analysis
+Static analysis is a process for examining the correctness or validity of a program without actually
+executing it. It usually helps us find many simple bugs. Zstd uses clang's `scan-build` tool for
+static analysis. You can install it by following the instructions for your OS on https://clang-analyzer.llvm.org/scan-build.
+
+Once installed, you can ensure that our static analysis tests pass on your local development machine
+by running:
+```
+make staticAnalyze
+```
+
+In general, you can use `scan-build` to static analyze any build script. For example, to static analyze
+just `contrib/largeNbDicts` and nothing else, you can run:
+
+```
+scan-build make -C contrib/largeNbDicts largeNbDicts
+```
+
+## Performance
+Performance is extremely important for zstd and we only merge pull requests whose performance
+landscape and corresponding trade-offs have been adequately analyzed, reproduced, and presented.
+This high bar for performance means that every PR which has the potential to
+impact performance takes a very long time for us to properly review. That being said, we
+always welcome contributions to improve performance (or worsen performance for the trade-off of
+something else). Please keep the following in mind before submitting a performance related PR:
+
+1. Zstd isn't as old as gzip but it has been around for time now and its evolution is
+very well documented via past Github issues and pull requests. It may be the case that your
+particular performance optimization has already been considered in the past. Please take some
+time to search through old issues and pull requests using keywords specific to your
+would-be PR. Of course, just because a topic has already been discussed (and perhaps rejected
+on some grounds) in the past, doesn't mean it isn't worth bringing up again. But even in that case,
+it will be helpful for you to have context from that topic's history before contributing.
+2. The distinction between noise and actual performance gains can unfortunately be very subtle
+especially when microbenchmarking extremely small wins or losses. The only remedy to getting
+something subtle merged is extensive benchmarking. You will be doing us a great favor if you
+take the time to run extensive, long-duration, and potentially cross-(os, platform, process, etc)
+benchmarks on your end before submitting a PR. Of course, you will not be able to benchmark
+your changes on every single processor and os out there (and neither will we) but do that best
+you can:) We've adding some things to think about when benchmarking below in the Benchmarking
+Performance section which might be helpful for you.
+3. Optimizing performance for a certain OS, processor vendor, compiler, or network system is a perfectly
+legitimate thing to do as long as it does not harm the overall performance health of Zstd.
+This is a hard balance to strike but please keep in mind other aspects of Zstd when
+submitting changes that are clang-specific, windows-specific, etc.
+
+## Benchmarking Performance
+Performance microbenchmarking is a tricky subject but also essential for Zstd. We value empirical
+testing over theoretical speculation. This guide it not perfect but for most scenarios, it
+is a good place to start.
+
+### Stability
+Unfortunately, the most important aspect in being able to benchmark reliably is to have a stable
+benchmarking machine. A virtual machine, a machine with shared resources, or your laptop
+will typically not be stable enough to obtain reliable benchmark results. If you can get your
+hands on a desktop, this is usually a better scenario.
+
+Of course, benchmarking can be done on non-hyper-stable machines as well. You will just have to
+do a little more work to ensure that you are in fact measuring the changes you've made not and
+noise. Here are some things you can do to make your benchmarks more stable:
+
+1. The most simple thing you can do to drastically improve the stability of your benchmark is
+to run it multiple times and then aggregate the results of those runs. As a general rule of
+thumb, the smaller the change you are trying to measure, the more samples of benchmark runs
+you will have to aggregate over to get reliable results. Here are some additional things to keep in
+mind when running multiple trials:
+ * How you aggregate your samples are important. You might be tempted to use the mean of your
+ results. While this is certainly going to be a more stable number than a raw single sample
+ benchmark number, you might have more luck by taking the median. The mean is not robust to
+ outliers whereas the median is. Better still, you could simply take the fastest speed your
+ benchmark achieved on each run since that is likely the fastest your process will be
+ capable of running your code. In our experience, this (aggregating by just taking the sample
+ with the fastest running time) has been the most stable approach.
+ * The more samples you have, the more stable your benchmarks should be. You can verify
+ your improved stability by looking at the size of your confidence intervals as you
+ increase your sample count. These should get smaller and smaller. Eventually hopefully
+ smaller than the performance win you are expecting.
+ * Most processors will take some time to get `hot` when running anything. The observations
+ you collect during that time period will very different from the true performance number. Having
+ a very large number of sample will help alleviate this problem slightly but you can also
+ address is directly by simply not including the first `n` iterations of your benchmark in
+ your aggregations. You can determine `n` by simply looking at the results from each iteration
+ and then hand picking a good threshold after which the variance in results seems to stabilize.
+2. You cannot really get reliable benchmarks if your host machine is simultaneously running
+another cpu/memory-intensive application in the background. If you are running benchmarks on your
+personal laptop for instance, you should close all applications (including your code editor and
+browser) before running your benchmarks. You might also have invisible background applications
+running. You can see what these are by looking at either Activity Monitor on Mac or Task Manager
+on Windows. You will get more stable benchmark results of you end those processes as well.
+ * If you have multiple cores, you can even run your benchmark on a reserved core to prevent
+ pollution from other OS and user processes. There are a number of ways to do this depending
+ on your OS:
+ * On linux boxes, you have use https://github.com/lpechacek/cpuset.
+ * On Windows, you can "Set Processor Affinity" using https://www.thewindowsclub.com/processor-affinity-windows
+ * On Mac, you can try to use their dedicated affinity API https://developer.apple.com/library/archive/releasenotes/Performance/RN-AffinityAPI/#//apple_ref/doc/uid/TP40006635-CH1-DontLinkElementID_2
+3. To benchmark, you will likely end up writing a separate c/c++ program that will link libzstd.
+Dynamically linking your library will introduce some added variation (not a large amount but
+definitely some). Statically linking libzstd will be more stable. Static libraries should
+be enabled by default when building zstd.
+4. Use a profiler with a good high resolution timer. See the section below on profiling for
+details on this.
+5. Disable frequency scaling, turbo boost and address space randomization (this will vary by OS)
+6. Try to avoid storage. On some systems you can use tmpfs. Putting the program, inputs and outputs on
+tmpfs avoids touching a real storage system, which can have a pretty big variability.
+
+Also check our LLVM's guide on benchmarking here: https://llvm.org/docs/Benchmarking.html
+
+### Zstd benchmark
+The fastest signal you can get regarding your performance changes is via the in-build zstd cli
+bench option. You can run Zstd as you typically would for your scenario using some set of options
+and then additionally also specify the `-b#` option. Doing this will run our benchmarking pipeline
+for that options you have just provided. If you want to look at the internals of how this
+benchmarking script works, you can check out programs/benchzstd.c
+
+For example: say you have made a change that you believe improves the speed of zstd level 1. The
+very first thing you should use to asses whether you actually achieved any sort of improvement
+is `zstd -b`. You might try to do something like this. Note: you can use the `-i` option to
+specify a running time for your benchmark in seconds (default is 3 seconds).
+Usually, the longer the running time, the more stable your results will be.
+
+```
+$ git checkout <commit-before-your-change>
+$ make && cp zstd zstd-old
+$ git checkout <commit-after-your-change>
+$ make && cp zstd zstd-new
+$ zstd-old -i5 -b1 <your-test-data>
+ 1<your-test-data> : 8990 -> 3992 (2.252), 302.6 MB/s , 626.4 MB/s
+$ zstd-new -i5 -b1 <your-test-data>
+ 1<your-test-data> : 8990 -> 3992 (2.252), 302.8 MB/s , 628.4 MB/s
+```
+
+Unless your performance win is large enough to be visible despite the intrinsic noise
+on your computer, benchzstd alone will likely not be enough to validate the impact of your
+changes. For example, the results of the example above indicate that effectively nothing
+changed but there could be a small <3% improvement that the noise on the host machine
+obscured. So unless you see a large performance win (10-15% consistently) using just
+this method of evaluation will not be sufficient.
+
+### Profiling
+There are a number of great profilers out there. We're going to briefly mention how you can
+profile your code using `instruments` on mac, `perf` on linux and `visual studio profiler`
+on windows.
+
+Say you have an idea for a change that you think will provide some good performance gains
+for level 1 compression on Zstd. Typically this means, you have identified a section of
+code that you think can be made to run faster.
+
+The first thing you will want to do is make sure that the piece of code is actually taking up
+a notable amount of time to run. It is usually not worth optimzing something which accounts for less than
+0.0001% of the total running time. Luckily, there are tools to help with this.
+Profilers will let you see how much time your code spends inside a particular function.
+If your target code snippit is only part of a function, it might be worth trying to
+isolate that snippit by moving it to its own function (this is usually not necessary but
+might be).
+
+Most profilers (including the profilers dicusssed below) will generate a call graph of
+functions for you. Your goal will be to find your function of interest in this call grapch
+and then inspect the time spent inside of it. You might also want to to look at the
+annotated assembly which most profilers will provide you with.
+
+#### Instruments
+We will once again consider the scenario where you think you've identified a piece of code
+whose performance can be improved upon. Follow these steps to profile your code using
+Instruments.
+
+1. Open Instruments
+2. Select `Time Profiler` from the list of standard templates
+3. Close all other applications except for your instruments window and your terminal
+4. Run your benchmarking script from your terminal window
+ * You will want a benchmark that runs for at least a few seconds (5 seconds will
+ usually be long enough). This way the profiler will have something to work with
+ and you will have ample time to attach your profiler to this process:)
+ * I will just use benchzstd as my bencharmking script for this example:
+```
+$ zstd -b1 -i5 <my-data> # this will run for 5 seconds
+```
+5. Once you run your benchmarking script, switch back over to instruments and attach your
+process to the time profiler. You can do this by:
+ * Clicking on the `All Processes` drop down in the top left of the toolbar.
+ * Selecting your process from the dropdown. In my case, it is just going to be labled
+ `zstd`
+ * Hitting the bright red record circle button on the top left of the toolbar
+6. You profiler will now start collecting metrics from your bencharking script. Once
+you think you have collected enough samples (usually this is the case after 3 seconds of
+recording), stop your profiler.
+7. Make sure that in toolbar of the bottom window, `profile` is selected.
+8. You should be able to see your call graph.
+ * If you don't see the call graph or an incomplete call graph, make sure you have compiled
+ zstd and your benchmarking scripg using debug flags. On mac and linux, this just means
+ you will have to supply the `-g` flag alone with your build script. You might also
+ have to provide the `-fno-omit-frame-pointer` flag
+9. Dig down the graph to find your function call and then inspect it by double clicking
+the list item. You will be able to see the annotated source code and the assembly side by
+side.
+
+#### Perf
+
+This wiki has a pretty detailed tutorial on getting started working with perf so we'll
+leave you to check that out of you're getting started:
+
+https://perf.wiki.kernel.org/index.php/Tutorial
+
+Some general notes on perf:
+* Use `perf stat -r # <bench-program>` to quickly get some relevant timing and
+counter statistics. Perf uses a high resolution timer and this is likely one
+of the first things your team will run when assessing your PR.
+* Perf has a long list of hardware counters that can be viewed with `perf --list`.
+When measuring optimizations, something worth trying is to make sure the handware
+counters you expect to be impacted by your change are in fact being so. For example,
+if you expect the L1 cache misses to decrease with your change, you can look at the
+counter `L1-dcache-load-misses`
+* Perf hardware counters will not work on a virtual machine.
+
+#### Visual Studio
+
+TODO
+
+
+## Setting up continuous integration (CI) on your fork
+Zstd uses a number of different continuous integration (CI) tools to ensure that new changes
+are well tested before they make it to an official release. Specifically, we use the platforms
+travis-ci, circle-ci, and appveyor.
+
+Changes cannot be merged into the main dev branch unless they pass all of our CI tests.
+The easiest way to run these CI tests on your own before submitting a PR to our dev branch
+is to configure your personal fork of zstd with each of the CI platforms. Below, you'll find
+instructions for doing this.
+
+### travis-ci
+Follow these steps to link travis-ci with your github fork of zstd
+
+1. Make sure you are logged into your github account
+2. Go to https://travis-ci.org/
+3. Click 'Sign in with Github' on the top right
+4. Click 'Authorize travis-ci'
+5. Click 'Activate all repositories using Github Apps'
+6. Select 'Only select repositories' and select your fork of zstd from the drop down
+7. Click 'Approve and Install'
+8. Click 'Sign in with Github' again. This time, it will be for travis-pro (which will let you view your tests on the web dashboard)
+9. Click 'Authorize travis-pro'
+10. You should have travis set up on your fork now.
+
+### circle-ci
+TODO
+
+### appveyor
+Follow these steps to link circle-ci with your girhub fork of zstd
+
+1. Make sure you are logged into your github account
+2. Go to https://www.appveyor.com/
+3. Click 'Sign in' on the top right
+4. Select 'Github' on the left panel
+5. Click 'Authorize appveyor'
+6. You might be asked to select which repositories you want to give appveyor permission to. Select your fork of zstd if you're prompted
+7. You should have appveyor set up on your fork now.
+
+### General notes on CI
+CI tests run every time a pull request (PR) is created or updated. The exact tests
+that get run will depend on the destination branch you specify. Some tests take
+longer to run than others. Currently, our CI is set up to run a short
+series of tests when creating a PR to the dev branch and a longer series of tests
+when creating a PR to the master branch. You can look in the configuration files
+of the respective CI platform for more information on what gets run when.
+
+Most people will just want to create a PR with the destination set to their local dev
+branch of zstd. You can then find the status of the tests on the PR's page. You can also
+re-run tests and cancel running tests from the PR page or from the respective CI's dashboard.
+
## Issues
We use GitHub issues to track public bugs. Please ensure your description is
clear and has sufficient instructions to be able to reproduce the issue.
@@ -34,7 +384,7 @@ Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
disclosure of security bugs. In those cases, please go through the process
outlined on that page and do not file a public issue.
-## Coding Style
+## Coding Style
* 4 spaces for indentation rather than tabs
## License
diff --git a/Makefile b/Makefile
index efb555c35b39..2c1d34604fe9 100644
--- a/Makefile
+++ b/Makefile
@@ -1,10 +1,11 @@
# ################################################################
-# Copyright (c) 2015-present, Yann Collet, Facebook, Inc.
+# Copyright (c) 2015-2020, Yann Collet, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
# LICENSE file in the root directory of this source tree) and the GPLv2 (found
# in the COPYING file in the root directory of this source tree).
+# You may select, at your option, one of the above-listed licenses.
# ################################################################
PRGDIR = programs
@@ -17,7 +18,16 @@ FUZZDIR = $(TESTDIR)/fuzz
# Define nul output
VOID = /dev/null
-ifneq (,$(filter Windows%,$(OS)))
+# When cross-compiling from linux to windows, you might
+# need to specify this as "Windows." Fedora build fails
+# without it.
+#
+# Note: mingw-w64 build from linux to windows does not
+# fail on other tested distros (ubuntu, debian) even
+# without manually specifying the TARGET_SYSTEM.
+TARGET_SYSTEM ?= $(OS)
+
+ifneq (,$(filter Windows%,$(TARGET_SYSTEM)))
EXT =.exe
else
EXT =
@@ -35,7 +45,7 @@ allmost: allzstd zlibwrapper
# skip zwrapper, can't build that on alternate architectures without the proper zlib installed
.PHONY: allzstd
-allzstd: lib
+allzstd: lib-all
$(MAKE) -C $(PRGDIR) all
$(MAKE) -C $(TESTDIR) all
@@ -45,7 +55,7 @@ all32:
$(MAKE) -C $(TESTDIR) all32
.PHONY: lib lib-release libzstd.a
-lib lib-release :
+lib lib-release lib-all :
@$(MAKE) -C $(ZSTDDIR) $@
.PHONY: zstd zstd-release
@@ -80,6 +90,13 @@ shortest:
.PHONY: check
check: shortest
+.PHONY: automated_benchmarking
+automated_benchmarking:
+ $(MAKE) -C $(TESTDIR) $@
+
+.PHONY: benchmarking
+benchmarking: automated_benchmarking
+
## examples: build all examples in `/examples` directory
.PHONY: examples
examples: lib
@@ -101,7 +118,8 @@ contrib: lib
$(MAKE) -C contrib/pzstd all
$(MAKE) -C contrib/seekable_format/examples all
$(MAKE) -C contrib/largeNbDicts all
- cd contrib/single_file_decoder/ ; ./build_test.sh
+ cd contrib/single_file_libs/ ; ./build_decoder_test.sh
+ cd contrib/single_file_libs/ ; ./build_library_test.sh
.PHONY: cleanTabs
cleanTabs:
@@ -337,7 +355,7 @@ endif
ifneq (,$(filter MSYS%,$(shell uname)))
HOST_OS = MSYS
-CMAKE_PARAMS = -G"MSYS Makefiles" -DZSTD_MULTITHREAD_SUPPORT:BOOL=OFF -DZSTD_BUILD_STATIC:BOOL=ON -DZSTD_BUILD_TESTS:BOOL=ON
+CMAKE_PARAMS = -G"MSYS Makefiles" -DCMAKE_BUILD_TYPE=Debug -DZSTD_MULTITHREAD_SUPPORT:BOOL=OFF -DZSTD_BUILD_STATIC:BOOL=ON -DZSTD_BUILD_TESTS:BOOL=ON
endif
@@ -349,11 +367,15 @@ cmakebuild:
cmake --version
$(RM) -r $(BUILDIR)/cmake/build
mkdir $(BUILDIR)/cmake/build
- cd $(BUILDIR)/cmake/build ; cmake -DCMAKE_INSTALL_PREFIX:PATH=~/install_test_dir $(CMAKE_PARAMS) .. ; $(MAKE) install ; $(MAKE) uninstall
+ cd $(BUILDIR)/cmake/build; cmake -DCMAKE_INSTALL_PREFIX:PATH=~/install_test_dir $(CMAKE_PARAMS) ..
+ $(MAKE) -C $(BUILDIR)/cmake/build -j4;
+ $(MAKE) -C $(BUILDIR)/cmake/build install;
+ $(MAKE) -C $(BUILDIR)/cmake/build uninstall;
+ cd $(BUILDIR)/cmake/build; ctest -V -L Medium
-c90build: clean
+c89build: clean
$(CC) -v
- CFLAGS="-std=c90 -Werror" $(MAKE) allmost # will fail, due to missing support for `long long`
+ CFLAGS="-std=c89 -Werror" $(MAKE) allmost # will fail, due to missing support for `long long`
gnu90build: clean
$(CC) -v
diff --git a/README.md b/README.md
index 9c5f9201307c..5c300fdc49aa 100644
--- a/README.md
+++ b/README.md
@@ -31,10 +31,10 @@ a list of known ports and bindings is provided on [Zstandard homepage](http://ww
## Benchmarks
For reference, several fast compression algorithms were tested and compared
-on a server running Arch Linux (`Linux version 5.0.5-arch1-1`),
+on a server running Arch Linux (`Linux version 5.5.11-arch1-1`),
with a Core i9-9900K CPU @ 5.0GHz,
using [lzbench], an open-source in-memory benchmark by @inikep
-compiled with [gcc] 8.2.1,
+compiled with [gcc] 9.3.0,
on the [Silesia compression corpus].
[lzbench]: https://github.com/inikep/lzbench
@@ -43,18 +43,26 @@ on the [Silesia compression corpus].
| Compressor name | Ratio | Compression| Decompress.|
| --------------- | ------| -----------| ---------- |
-| **zstd 1.4.0 -1** | 2.884 | 530 MB/s | 1360 MB/s |
-| zlib 1.2.11 -1 | 2.743 | 110 MB/s | 440 MB/s |
-| brotli 1.0.7 -0 | 2.701 | 430 MB/s | 470 MB/s |
-| quicklz 1.5.0 -1 | 2.238 | 600 MB/s | 800 MB/s |
-| lzo1x 2.09 -1 | 2.106 | 680 MB/s | 950 MB/s |
-| lz4 1.8.3 | 2.101 | 800 MB/s | 4220 MB/s |
-| snappy 1.1.4 | 2.073 | 580 MB/s | 2020 MB/s |
-| lzf 3.6 -1 | 2.077 | 440 MB/s | 930 MB/s |
+| **zstd 1.4.5 -1** | 2.884 | 500 MB/s | 1660 MB/s |
+| zlib 1.2.11 -1 | 2.743 | 90 MB/s | 400 MB/s |
+| brotli 1.0.7 -0 | 2.703 | 400 MB/s | 450 MB/s |
+| **zstd 1.4.5 --fast=1** | 2.434 | 570 MB/s | 2200 MB/s |
+| **zstd 1.4.5 --fast=3** | 2.312 | 640 MB/s | 2300 MB/s |
+| quicklz 1.5.0 -1 | 2.238 | 560 MB/s | 710 MB/s |
+| **zstd 1.4.5 --fast=5** | 2.178 | 700 MB/s | 2420 MB/s |
+| lzo1x 2.10 -1 | 2.106 | 690 MB/s | 820 MB/s |
+| lz4 1.9.2 | 2.101 | 740 MB/s | 4530 MB/s |
+| **zstd 1.4.5 --fast=7** | 2.096 | 750 MB/s | 2480 MB/s |
+| lzf 3.6 -1 | 2.077 | 410 MB/s | 860 MB/s |
+| snappy 1.1.8 | 2.073 | 560 MB/s | 1790 MB/s |
[zlib]: http://www.zlib.net/
[LZ4]: http://www.lz4.org/
+The negative compression levels, specified with `--fast=#`,
+offer faster compression and decompression speed in exchange for some loss in
+compression ratio compared to level 1, as seen in the table above.
+
Zstd can also offer stronger compression ratios at the cost of compression speed.
Speed vs Compression trade-off is configurable by small increments.
Decompression speed is preserved and remains roughly the same at all settings,
@@ -143,6 +151,18 @@ example about how Meson is used to build this project.
Note that default build type is **release**.
+### VCPKG
+You can build and install zstd [vcpkg](https://github.com/Microsoft/vcpkg/) dependency manager:
+
+ git clone https://github.com/Microsoft/vcpkg.git
+ cd vcpkg
+ ./bootstrap-vcpkg.sh
+ ./vcpkg integrate install
+ ./vcpkg install zstd
+
+The zstd port in vcpkg is kept up to date by Microsoft team members and community contributors.
+If the version is out of date, please [create an issue or pull request](https://github.com/Microsoft/vcpkg) on the vcpkg repository.
+
### Visual Studio (Windows)
Going into `build` directory, you will find additional possibilities:
diff --git a/TESTING.md b/TESTING.md
index 551981b14053..7e5305178b97 100644
--- a/TESTING.md
+++ b/TESTING.md
@@ -11,7 +11,7 @@ They consist of the following tests:
- Compilation on all supported targets (x86, x86_64, ARM, AArch64, PowerPC, and PowerPC64)
- Compilation on various versions of gcc, clang, and g++
- `tests/playTests.sh` on x86_64, without the tests on long data (CLI tests)
-- Small tests (`tests/legacy.c`, `tests/longmatch.c`, `tests/symbols.c`) on x64_64
+- Small tests (`tests/legacy.c`, `tests/longmatch.c`) on x64_64
Medium Tests
------------
@@ -19,7 +19,7 @@ Medium tests run on every commit and pull request to `dev` branch, on TravisCI.
They consist of the following tests:
- The following tests run with UBsan and Asan on x86_64 and x86, as well as with
Msan on x86_64
- - `tests/playTests.sh --test-long-data`
+ - `tests/playTests.sh --test-large-data`
- Fuzzer tests: `tests/fuzzer.c`, `tests/zstreamtest.c`, and `tests/decodecorpus.c`
- `tests/zstreamtest.c` under Tsan (streaming mode, including multithreaded mode)
- Valgrind Test (`make -C tests valgrindTest`) (testing CLI and fuzzer under valgrind)
diff --git a/appveyor.yml b/appveyor.yml
index dd2c02ac4826..5d77b3103481 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -14,7 +14,7 @@
- COMPILER: "gcc"
HOST: "mingw"
PLATFORM: "x64"
- SCRIPT: "make allzstd MOREFLAGS=-static && make -C tests test-symbols fullbench-lib"
+ SCRIPT: "make allzstd MOREFLAGS=-static && make -C tests fullbench-lib"
ARTIFACT: "true"
BUILD: "true"
- COMPILER: "gcc"
@@ -169,7 +169,8 @@
- SET "FUZZERTEST=-T30s"
- if [%HOST%]==[visual] if [%CONFIGURATION%]==[Release] (
CD tests &&
- SET ZSTD=./zstd.exe &&
+ SET ZSTD_BIN=./zstd.exe&&
+ SET DATAGEN_BIN=./datagen.exe&&
sh -e playTests.sh --test-large-data &&
fullbench.exe -i1 &&
fullbench.exe -i1 -P0 &&
@@ -188,6 +189,9 @@
environment:
matrix:
- COMPILER: "gcc"
+ HOST: "cygwin"
+ PLATFORM: "x64"
+ - COMPILER: "gcc"
HOST: "mingw"
PLATFORM: "x64"
SCRIPT: "CPPFLAGS=-DDEBUGLEVEL=2 CFLAGS=-Werror make -j allzstd DEBUGLEVEL=2"
@@ -220,6 +224,14 @@
install:
- ECHO Installing %COMPILER% %PLATFORM% %CONFIGURATION%
- SET PATH_ORIGINAL=%PATH%
+ - if [%HOST%]==[cygwin] (
+ ECHO Installing Cygwin Packages &&
+ C:\cygwin64\setup-x86_64.exe -qnNdO -R "C:\cygwin64" -g -P ^
+ gcc-g++,^
+ gcc,^
+ cmake,^
+ make
+ )
- if [%HOST%]==[mingw] (
SET "PATH_MINGW32=C:\mingw-w64\i686-6.3.0-posix-dwarf-rt_v5-rev1\mingw32\bin" &&
SET "PATH_MINGW64=C:\mingw-w64\x86_64-6.3.0-posix-seh-rt_v5-rev1\mingw64\bin" &&
@@ -232,6 +244,17 @@
build_script:
- ECHO Building %COMPILER% %PLATFORM% %CONFIGURATION%
+ - if [%HOST%]==[cygwin] (
+ set CHERE_INVOKING=yes &&
+ set CC=%COMPILER% &&
+ C:\cygwin64\bin\bash --login -c "
+ set -e;
+ cd build/cmake;
+ CFLAGS='-Werror' cmake -G 'Unix Makefiles' -DCMAKE_BUILD_TYPE=Debug -DZSTD_BUILD_TESTS:BOOL=ON -DZSTD_FUZZER_FLAGS=-T30s -DZSTD_ZSTREAM_FLAGS=-T30s .;
+ make -j4;
+ ctest -V -L Medium;
+ "
+ )
- if [%HOST%]==[mingw] (
( if [%PLATFORM%]==[x64] (
SET "PATH=%PATH_MINGW64%;%PATH_ORIGINAL%"
diff --git a/contrib/cleanTabs b/contrib/cleanTabs
deleted file mode 100755
index 215913a90ace..000000000000
--- a/contrib/cleanTabs
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/bin/sh
-sed -i '' $'s/\t/ /g' ../lib/**/*.{h,c} ../programs/*.{h,c} ../tests/*.c ./**/*.{h,cpp} ../examples/*.c ../zlibWrapper/*.{h,c}
diff --git a/contrib/docker/Dockerfile b/contrib/docker/Dockerfile
deleted file mode 100644
index e06a32c0dac7..000000000000
--- a/contrib/docker/Dockerfile
+++ /dev/null
@@ -1,20 +0,0 @@
-# Dockerfile
-# First image to build the binary
-FROM alpine as builder
-
-RUN apk --no-cache add make gcc libc-dev
-COPY . /src
-RUN mkdir /pkg && cd /src && make && make DESTDIR=/pkg install
-
-# Second minimal image to only keep the built binary
-FROM alpine
-
-# Copy the built files
-COPY --from=builder /pkg /
-
-# Copy the license as well
-RUN mkdir -p /usr/local/share/licenses/zstd
-COPY --from=builder /src/LICENSE /usr/local/share/licences/zstd/
-
-# Just run `zstd` if no other command is given
-CMD ["/usr/local/bin/zstd"]
diff --git a/contrib/docker/README.md b/contrib/docker/README.md
deleted file mode 100644
index 43f6d7a1ae1a..000000000000
--- a/contrib/docker/README.md
+++ /dev/null
@@ -1,20 +0,0 @@
-
-## Requirement
-
-The `Dockerfile` script requires a version of `docker` >= 17.05
-
-## Installing docker
-
-The official docker install docs use a ppa with a modern version available:
-https://docs.docker.com/install/linux/docker-ce/ubuntu/
-
-## How to run
-
-`docker build -t zstd .`
-
-## test
-
-```
-echo foo | docker run -i --rm zstd | docker run -i --rm zstd zstdcat
-foo
-```
diff --git a/contrib/experimental_dict_builders/benchmarkDictBuilder/Makefile b/contrib/experimental_dict_builders/benchmarkDictBuilder/Makefile
deleted file mode 100644
index 72ce04f2a56b..000000000000
--- a/contrib/experimental_dict_builders/benchmarkDictBuilder/Makefile
+++ /dev/null
@@ -1,44 +0,0 @@
-ARG :=
-
-CC ?= gcc
-CFLAGS ?= -O3
-INCLUDES := -I ../randomDictBuilder -I ../../../programs -I ../../../lib/common -I ../../../lib -I ../../../lib/dictBuilder
-
-RANDOM_FILE := ../randomDictBuilder/random.c
-IO_FILE := ../randomDictBuilder/io.c
-
-all: run clean
-
-.PHONY: run
-run: benchmark
- echo "Benchmarking with $(ARG)"
- ./benchmark $(ARG)
-
-.PHONY: test
-test: benchmarkTest clean
-
-.PHONY: benchmarkTest
-benchmarkTest: benchmark test.sh
- sh test.sh
-
-benchmark: benchmark.o io.o random.o libzstd.a
- $(CC) $(CFLAGS) benchmark.o io.o random.o libzstd.a -o benchmark
-
-benchmark.o: benchmark.c
- $(CC) $(CFLAGS) $(INCLUDES) -c benchmark.c
-
-random.o: $(RANDOM_FILE)
- $(CC) $(CFLAGS) $(INCLUDES) -c $(RANDOM_FILE)
-
-io.o: $(IO_FILE)
- $(CC) $(CFLAGS) $(INCLUDES) -c $(IO_FILE)
-
-libzstd.a:
- $(MAKE) -C ../../../lib libzstd.a
- mv ../../../lib/libzstd.a .
-
-.PHONY: clean
-clean:
- rm -f *.o benchmark libzstd.a
- $(MAKE) -C ../../../lib clean
- echo "Cleaning is completed"
diff --git a/contrib/experimental_dict_builders/benchmarkDictBuilder/README.md b/contrib/experimental_dict_builders/benchmarkDictBuilder/README.md
deleted file mode 100644
index 6a6c7f1d2169..000000000000
--- a/contrib/experimental_dict_builders/benchmarkDictBuilder/README.md
+++ /dev/null
@@ -1,849 +0,0 @@
-Benchmarking Dictionary Builder
-
-### Permitted Argument:
-Input File/Directory (in=fileName): required; file/directory used to build dictionary; if directory, will operate recursively for files inside directory; can include multiple files/directories, each following "in="
-
-###Running Test:
-make test
-
-###Usage:
-Benchmark given input files: make ARG= followed by permitted arguments
-
-### Examples:
-make ARG="in=../../../lib/dictBuilder in=../../../lib/compress"
-
-###Benchmarking Result:
-- First Cover is optimize cover, second Cover uses optimized d and k from first one.
-- For every f value of fastCover, the first one is optimize fastCover and the second one uses optimized d and k from first one. This is run for accel values from 1 to 10.
-- Fourth column is chosen d and fifth column is chosen k
-
-github:
-NODICT 0.000004 2.999642
-RANDOM 0.024560 8.791189
-LEGACY 0.727109 8.173529
-COVER 40.565676 10.652243 8 1298
-COVER 3.608284 10.652243 8 1298
-FAST f=15 a=1 4.181024 10.570882 8 1154
-FAST f=15 a=1 0.040788 10.570882 8 1154
-FAST f=15 a=2 3.548352 10.574287 6 1970
-FAST f=15 a=2 0.035535 10.574287 6 1970
-FAST f=15 a=3 3.287364 10.613950 6 1010
-FAST f=15 a=3 0.032182 10.613950 6 1010
-FAST f=15 a=4 3.184976 10.573883 6 1058
-FAST f=15 a=4 0.029878 10.573883 6 1058
-FAST f=15 a=5 3.045513 10.580640 8 1154
-FAST f=15 a=5 0.022162 10.580640 8 1154
-FAST f=15 a=6 3.003296 10.583677 6 1010
-FAST f=15 a=6 0.028091 10.583677 6 1010
-FAST f=15 a=7 2.952655 10.622551 6 1106
-FAST f=15 a=7 0.02724 10.622551 6 1106
-FAST f=15 a=8 2.945674 10.614657 6 1010
-FAST f=15 a=8 0.027264 10.614657 6 1010
-FAST f=15 a=9 3.153439 10.564018 8 1154
-FAST f=15 a=9 0.020635 10.564018 8 1154
-FAST f=15 a=10 2.950416 10.511454 6 1010
-FAST f=15 a=10 0.026606 10.511454 6 1010
-FAST f=16 a=1 3.970029 10.681035 8 1154
-FAST f=16 a=1 0.038188 10.681035 8 1154
-FAST f=16 a=2 3.422892 10.484978 6 1874
-FAST f=16 a=2 0.034702 10.484978 6 1874
-FAST f=16 a=3 3.215836 10.632631 8 1154
-FAST f=16 a=3 0.026084 10.632631 8 1154
-FAST f=16 a=4 3.081353 10.626533 6 1106
-FAST f=16 a=4 0.030032 10.626533 6 1106
-FAST f=16 a=5 3.041241 10.545027 8 1922
-FAST f=16 a=5 0.022882 10.545027 8 1922
-FAST f=16 a=6 2.989390 10.638284 6 1874
-FAST f=16 a=6 0.028308 10.638284 6 1874
-FAST f=16 a=7 3.001581 10.797136 6 1106
-FAST f=16 a=7 0.027479 10.797136 6 1106
-FAST f=16 a=8 2.984107 10.658356 8 1058
-FAST f=16 a=8 0.021099 10.658356 8 1058
-FAST f=16 a=9 2.925788 10.523869 6 1010
-FAST f=16 a=9 0.026905 10.523869 6 1010
-FAST f=16 a=10 2.889605 10.745841 6 1874
-FAST f=16 a=10 0.026846 10.745841 6 1874
-FAST f=17 a=1 4.031953 10.672080 8 1202
-FAST f=17 a=1 0.040658 10.672080 8 1202
-FAST f=17 a=2 3.458107 10.589352 8 1106
-FAST f=17 a=2 0.02926 10.589352 8 1106
-FAST f=17 a=3 3.291189 10.662714 8 1154
-FAST f=17 a=3 0.026531 10.662714 8 1154
-FAST f=17 a=4 3.154950 10.549456 8 1346
-FAST f=17 a=4 0.024991 10.549456 8 1346
-FAST f=17 a=5 3.092271 10.541670 6 1202
-FAST f=17 a=5 0.038285 10.541670 6 1202
-FAST f=17 a=6 3.166146 10.729112 6 1874
-FAST f=17 a=6 0.038217 10.729112 6 1874
-FAST f=17 a=7 3.035467 10.810485 6 1106
-FAST f=17 a=7 0.036655 10.810485 6 1106
-FAST f=17 a=8 3.035668 10.530532 6 1058
-FAST f=17 a=8 0.037715 10.530532 6 1058
-FAST f=17 a=9 2.987917 10.589802 8 1922
-FAST f=17 a=9 0.02217 10.589802 8 1922
-FAST f=17 a=10 2.981647 10.722579 8 1106
-FAST f=17 a=10 0.021948 10.722579 8 1106
-FAST f=18 a=1 4.067144 10.634943 8 1154
-FAST f=18 a=1 0.041386 10.634943 8 1154
-FAST f=18 a=2 3.507377 10.546230 6 1970
-FAST f=18 a=2 0.037572 10.546230 6 1970
-FAST f=18 a=3 3.323015 10.648061 8 1154
-FAST f=18 a=3 0.028306 10.648061 8 1154
-FAST f=18 a=4 3.216735 10.705402 6 1010
-FAST f=18 a=4 0.030755 10.705402 6 1010
-FAST f=18 a=5 3.175794 10.588154 8 1874
-FAST f=18 a=5 0.025315 10.588154 8 1874
-FAST f=18 a=6 3.127459 10.751104 8 1106
-FAST f=18 a=6 0.023897 10.751104 8 1106
-FAST f=18 a=7 3.083017 10.780402 6 1106
-FAST f=18 a=7 0.029158 10.780402 6 1106
-FAST f=18 a=8 3.069700 10.547226 8 1346
-FAST f=18 a=8 0.024046 10.547226 8 1346
-FAST f=18 a=9 3.056591 10.674759 6 1010
-FAST f=18 a=9 0.028496 10.674759 6 1010
-FAST f=18 a=10 3.063588 10.737578 8 1106
-FAST f=18 a=10 0.023033 10.737578 8 1106
-FAST f=19 a=1 4.164041 10.650333 8 1154
-FAST f=19 a=1 0.042906 10.650333 8 1154
-FAST f=19 a=2 3.585409 10.577066 6 1058
-FAST f=19 a=2 0.038994 10.577066 6 1058
-FAST f=19 a=3 3.439643 10.639403 8 1154
-FAST f=19 a=3 0.028427 10.639403 8 1154
-FAST f=19 a=4 3.268869 10.554410 8 1298
-FAST f=19 a=4 0.026866 10.554410 8 1298
-FAST f=19 a=5 3.238225 10.615109 6 1010
-FAST f=19 a=5 0.03078 10.615109 6 1010
-FAST f=19 a=6 3.199558 10.609782 6 1874
-FAST f=19 a=6 0.030099 10.609782 6 1874
-FAST f=19 a=7 3.132395 10.794753 6 1106
-FAST f=19 a=7 0.028964 10.794753 6 1106
-FAST f=19 a=8 3.148446 10.554842 8 1298
-FAST f=19 a=8 0.024277 10.554842 8 1298
-FAST f=19 a=9 3.108324 10.668763 6 1010
-FAST f=19 a=9 0.02896 10.668763 6 1010
-FAST f=19 a=10 3.159863 10.757347 8 1106
-FAST f=19 a=10 0.023351 10.757347 8 1106
-FAST f=20 a=1 4.462698 10.661788 8 1154
-FAST f=20 a=1 0.047174 10.661788 8 1154
-FAST f=20 a=2 3.820269 10.678612 6 1106
-FAST f=20 a=2 0.040807 10.678612 6 1106
-FAST f=20 a=3 3.644955 10.648424 8 1154
-FAST f=20 a=3 0.031398 10.648424 8 1154
-FAST f=20 a=4 3.546257 10.559756 8 1298
-FAST f=20 a=4 0.029856 10.559756 8 1298
-FAST f=20 a=5 3.485248 10.646637 6 1010
-FAST f=20 a=5 0.033756 10.646637 6 1010
-FAST f=20 a=6 3.490438 10.775824 8 1106
-FAST f=20 a=6 0.028338 10.775824 8 1106
-FAST f=20 a=7 3.631289 10.801795 6 1106
-FAST f=20 a=7 0.035228 10.801795 6 1106
-FAST f=20 a=8 3.758936 10.545116 8 1346
-FAST f=20 a=8 0.027495 10.545116 8 1346
-FAST f=20 a=9 3.707024 10.677454 6 1010
-FAST f=20 a=9 0.031326 10.677454 6 1010
-FAST f=20 a=10 3.586593 10.756017 8 1106
-FAST f=20 a=10 0.027122 10.756017 8 1106
-FAST f=21 a=1 5.701396 10.655398 8 1154
-FAST f=21 a=1 0.067744 10.655398 8 1154
-FAST f=21 a=2 5.270542 10.650743 6 1106
-FAST f=21 a=2 0.052999 10.650743 6 1106
-FAST f=21 a=3 4.945294 10.652380 8 1154
-FAST f=21 a=3 0.052678 10.652380 8 1154
-FAST f=21 a=4 4.894079 10.543185 8 1298
-FAST f=21 a=4 0.04997 10.543185 8 1298
-FAST f=21 a=5 4.785417 10.630321 6 1010
-FAST f=21 a=5 0.045294 10.630321 6 1010
-FAST f=21 a=6 4.789381 10.664477 6 1874
-FAST f=21 a=6 0.046578 10.664477 6 1874
-FAST f=21 a=7 4.302955 10.805179 6 1106
-FAST f=21 a=7 0.041205 10.805179 6 1106
-FAST f=21 a=8 4.034630 10.551211 8 1298
-FAST f=21 a=8 0.040121 10.551211 8 1298
-FAST f=21 a=9 4.523868 10.799114 6 1010
-FAST f=21 a=9 0.043592 10.799114 6 1010
-FAST f=21 a=10 4.760736 10.750255 8 1106
-FAST f=21 a=10 0.043483 10.750255 8 1106
-FAST f=22 a=1 6.743064 10.640537 8 1154
-FAST f=22 a=1 0.086967 10.640537 8 1154
-FAST f=22 a=2 6.121739 10.626638 6 1970
-FAST f=22 a=2 0.066337 10.626638 6 1970
-FAST f=22 a=3 5.248851 10.640688 8 1154
-FAST f=22 a=3 0.054935 10.640688 8 1154
-FAST f=22 a=4 5.436579 10.588333 8 1298
-FAST f=22 a=4 0.064113 10.588333 8 1298
-FAST f=22 a=5 5.812815 10.652653 6 1010
-FAST f=22 a=5 0.058189 10.652653 6 1010
-FAST f=22 a=6 5.745472 10.666437 6 1874
-FAST f=22 a=6 0.057188 10.666437 6 1874
-FAST f=22 a=7 5.716393 10.806911 6 1106
-FAST f=22 a=7 0.056 10.806911 6 1106
-FAST f=22 a=8 5.698799 10.530784 8 1298
-FAST f=22 a=8 0.0583 10.530784 8 1298
-FAST f=22 a=9 5.710533 10.777391 6 1010
-FAST f=22 a=9 0.054945 10.777391 6 1010
-FAST f=22 a=10 5.685395 10.745023 8 1106
-FAST f=22 a=10 0.056526 10.745023 8 1106
-FAST f=23 a=1 7.836923 10.638828 8 1154
-FAST f=23 a=1 0.099522 10.638828 8 1154
-FAST f=23 a=2 6.627834 10.631061 6 1970
-FAST f=23 a=2 0.066769 10.631061 6 1970
-FAST f=23 a=3 5.602533 10.647288 8 1154
-FAST f=23 a=3 0.064513 10.647288 8 1154
-FAST f=23 a=4 6.005580 10.568747 8 1298
-FAST f=23 a=4 0.062022 10.568747 8 1298
-FAST f=23 a=5 5.481816 10.676921 6 1010
-FAST f=23 a=5 0.058959 10.676921 6 1010
-FAST f=23 a=6 5.460444 10.666194 6 1874
-FAST f=23 a=6 0.057687 10.666194 6 1874
-FAST f=23 a=7 5.659822 10.800377 6 1106
-FAST f=23 a=7 0.06783 10.800377 6 1106
-FAST f=23 a=8 6.826940 10.522167 8 1298
-FAST f=23 a=8 0.070533 10.522167 8 1298
-FAST f=23 a=9 6.804757 10.577799 8 1682
-FAST f=23 a=9 0.069949 10.577799 8 1682
-FAST f=23 a=10 6.774933 10.742093 8 1106
-FAST f=23 a=10 0.068395 10.742093 8 1106
-FAST f=24 a=1 8.444110 10.632783 8 1154
-FAST f=24 a=1 0.094357 10.632783 8 1154
-FAST f=24 a=2 7.289578 10.631061 6 1970
-FAST f=24 a=2 0.098515 10.631061 6 1970
-FAST f=24 a=3 8.619780 10.646289 8 1154
-FAST f=24 a=3 0.098041 10.646289 8 1154
-FAST f=24 a=4 8.508455 10.555199 8 1298
-FAST f=24 a=4 0.093885 10.555199 8 1298
-FAST f=24 a=5 8.471145 10.674363 6 1010
-FAST f=24 a=5 0.088676 10.674363 6 1010
-FAST f=24 a=6 8.426727 10.667228 6 1874
-FAST f=24 a=6 0.087247 10.667228 6 1874
-FAST f=24 a=7 8.356826 10.803027 6 1106
-FAST f=24 a=7 0.085835 10.803027 6 1106
-FAST f=24 a=8 6.756811 10.522049 8 1298
-FAST f=24 a=8 0.07107 10.522049 8 1298
-FAST f=24 a=9 6.548169 10.571882 8 1682
-FAST f=24 a=9 0.0713 10.571882 8 1682
-FAST f=24 a=10 8.238079 10.736453 8 1106
-FAST f=24 a=10 0.07004 10.736453 8 1106
-
-
-hg-commands:
-NODICT 0.000005 2.425276
-RANDOM 0.046332 3.490331
-LEGACY 0.720351 3.911682
-COVER 45.507731 4.132653 8 386
-COVER 1.868810 4.132653 8 386
-FAST f=15 a=1 4.561427 3.866894 8 1202
-FAST f=15 a=1 0.048946 3.866894 8 1202
-FAST f=15 a=2 3.574462 3.892119 8 1538
-FAST f=15 a=2 0.033677 3.892119 8 1538
-FAST f=15 a=3 3.230227 3.888791 6 1346
-FAST f=15 a=3 0.034312 3.888791 6 1346
-FAST f=15 a=4 3.042388 3.899739 8 1010
-FAST f=15 a=4 0.024307 3.899739 8 1010
-FAST f=15 a=5 2.800148 3.896220 8 818
-FAST f=15 a=5 0.022331 3.896220 8 818
-FAST f=15 a=6 2.706518 3.882039 8 578
-FAST f=15 a=6 0.020955 3.882039 8 578
-FAST f=15 a=7 2.701820 3.885430 6 866
-FAST f=15 a=7 0.026074 3.885430 6 866
-FAST f=15 a=8 2.604445 3.906932 8 1826
-FAST f=15 a=8 0.021789 3.906932 8 1826
-FAST f=15 a=9 2.598568 3.870324 6 1682
-FAST f=15 a=9 0.026004 3.870324 6 1682
-FAST f=15 a=10 2.575920 3.920783 8 1442
-FAST f=15 a=10 0.020228 3.920783 8 1442
-FAST f=16 a=1 4.630623 4.001430 8 770
-FAST f=16 a=1 0.047497 4.001430 8 770
-FAST f=16 a=2 3.674721 3.974431 8 1874
-FAST f=16 a=2 0.035761 3.974431 8 1874
-FAST f=16 a=3 3.338384 3.978703 8 1010
-FAST f=16 a=3 0.029436 3.978703 8 1010
-FAST f=16 a=4 3.004412 3.983035 8 1010
-FAST f=16 a=4 0.025744 3.983035 8 1010
-FAST f=16 a=5 2.881892 3.987710 8 770
-FAST f=16 a=5 0.023211 3.987710 8 770
-FAST f=16 a=6 2.807410 3.952717 8 1298
-FAST f=16 a=6 0.023199 3.952717 8 1298
-FAST f=16 a=7 2.819623 3.994627 8 770
-FAST f=16 a=7 0.021806 3.994627 8 770
-FAST f=16 a=8 2.740092 3.954032 8 1826
-FAST f=16 a=8 0.0226 3.954032 8 1826
-FAST f=16 a=9 2.682564 3.969879 6 1442
-FAST f=16 a=9 0.026324 3.969879 6 1442
-FAST f=16 a=10 2.657959 3.969755 8 674
-FAST f=16 a=10 0.020413 3.969755 8 674
-FAST f=17 a=1 4.729228 4.046000 8 530
-FAST f=17 a=1 0.049703 4.046000 8 530
-FAST f=17 a=2 3.764510 3.991519 8 1970
-FAST f=17 a=2 0.038195 3.991519 8 1970
-FAST f=17 a=3 3.416992 4.006296 6 914
-FAST f=17 a=3 0.036244 4.006296 6 914
-FAST f=17 a=4 3.145626 3.979182 8 1970
-FAST f=17 a=4 0.028676 3.979182 8 1970
-FAST f=17 a=5 2.995070 4.050070 8 770
-FAST f=17 a=5 0.025707 4.050070 8 770
-FAST f=17 a=6 2.911833 4.040024 8 770
-FAST f=17 a=6 0.02453 4.040024 8 770
-FAST f=17 a=7 2.894796 4.015884 8 818
-FAST f=17 a=7 0.023956 4.015884 8 818
-FAST f=17 a=8 2.789962 4.039303 8 530
-FAST f=17 a=8 0.023219 4.039303 8 530
-FAST f=17 a=9 2.787625 3.996762 8 1634
-FAST f=17 a=9 0.023651 3.996762 8 1634
-FAST f=17 a=10 2.754796 4.005059 8 1058
-FAST f=17 a=10 0.022537 4.005059 8 1058
-FAST f=18 a=1 4.779117 4.038214 8 242
-FAST f=18 a=1 0.048814 4.038214 8 242
-FAST f=18 a=2 3.829753 4.045768 8 722
-FAST f=18 a=2 0.036541 4.045768 8 722
-FAST f=18 a=3 3.495053 4.021497 8 770
-FAST f=18 a=3 0.032648 4.021497 8 770
-FAST f=18 a=4 3.221395 4.039623 8 770
-FAST f=18 a=4 0.027818 4.039623 8 770
-FAST f=18 a=5 3.059369 4.050414 8 530
-FAST f=18 a=5 0.026296 4.050414 8 530
-FAST f=18 a=6 3.019292 4.010714 6 962
-FAST f=18 a=6 0.031104 4.010714 6 962
-FAST f=18 a=7 2.949322 4.031439 6 770
-FAST f=18 a=7 0.030745 4.031439 6 770
-FAST f=18 a=8 2.876425 4.032088 6 386
-FAST f=18 a=8 0.027407 4.032088 6 386
-FAST f=18 a=9 2.850958 4.053372 8 674
-FAST f=18 a=9 0.023799 4.053372 8 674
-FAST f=18 a=10 2.884352 4.020148 8 1730
-FAST f=18 a=10 0.024401 4.020148 8 1730
-FAST f=19 a=1 4.815669 4.061203 8 674
-FAST f=19 a=1 0.051425 4.061203 8 674
-FAST f=19 a=2 3.951356 4.013822 8 1442
-FAST f=19 a=2 0.039968 4.013822 8 1442
-FAST f=19 a=3 3.554682 4.050425 8 722
-FAST f=19 a=3 0.032725 4.050425 8 722
-FAST f=19 a=4 3.242585 4.054677 8 722
-FAST f=19 a=4 0.028194 4.054677 8 722
-FAST f=19 a=5 3.105909 4.064524 8 818
-FAST f=19 a=5 0.02675 4.064524 8 818
-FAST f=19 a=6 3.059901 4.036857 8 1250
-FAST f=19 a=6 0.026396 4.036857 8 1250
-FAST f=19 a=7 3.016151 4.068234 6 770
-FAST f=19 a=7 0.031501 4.068234 6 770
-FAST f=19 a=8 2.962902 4.077509 8 530
-FAST f=19 a=8 0.023333 4.077509 8 530
-FAST f=19 a=9 2.899607 4.067328 8 530
-FAST f=19 a=9 0.024553 4.067328 8 530
-FAST f=19 a=10 2.950978 4.059901 8 434
-FAST f=19 a=10 0.023852 4.059901 8 434
-FAST f=20 a=1 5.259834 4.027579 8 1634
-FAST f=20 a=1 0.061123 4.027579 8 1634
-FAST f=20 a=2 4.382150 4.025093 8 1634
-FAST f=20 a=2 0.048009 4.025093 8 1634
-FAST f=20 a=3 4.104323 4.060842 8 530
-FAST f=20 a=3 0.040965 4.060842 8 530
-FAST f=20 a=4 3.853340 4.023504 6 914
-FAST f=20 a=4 0.041072 4.023504 6 914
-FAST f=20 a=5 3.728841 4.018089 6 1634
-FAST f=20 a=5 0.037469 4.018089 6 1634
-FAST f=20 a=6 3.683045 4.069138 8 578
-FAST f=20 a=6 0.028011 4.069138 8 578
-FAST f=20 a=7 3.726973 4.063160 8 722
-FAST f=20 a=7 0.028437 4.063160 8 722
-FAST f=20 a=8 3.555073 4.057690 8 386
-FAST f=20 a=8 0.027588 4.057690 8 386
-FAST f=20 a=9 3.551095 4.067253 8 482
-FAST f=20 a=9 0.025976 4.067253 8 482
-FAST f=20 a=10 3.490127 4.068518 8 530
-FAST f=20 a=10 0.025971 4.068518 8 530
-FAST f=21 a=1 7.343816 4.064945 8 770
-FAST f=21 a=1 0.085035 4.064945 8 770
-FAST f=21 a=2 5.930894 4.048206 8 386
-FAST f=21 a=2 0.067349 4.048206 8 386
-FAST f=21 a=3 6.770775 4.063417 8 578
-FAST f=21 a=3 0.077104 4.063417 8 578
-FAST f=21 a=4 6.889409 4.066761 8 626
-FAST f=21 a=4 0.0717 4.066761 8 626
-FAST f=21 a=5 6.714896 4.051813 8 914
-FAST f=21 a=5 0.071026 4.051813 8 914
-FAST f=21 a=6 6.539890 4.047263 8 1922
-FAST f=21 a=6 0.07127 4.047263 8 1922
-FAST f=21 a=7 6.511052 4.068373 8 482
-FAST f=21 a=7 0.065467 4.068373 8 482
-FAST f=21 a=8 6.458788 4.071597 8 482
-FAST f=21 a=8 0.063817 4.071597 8 482
-FAST f=21 a=9 6.377591 4.052905 8 434
-FAST f=21 a=9 0.063112 4.052905 8 434
-FAST f=21 a=10 6.360752 4.047773 8 530
-FAST f=21 a=10 0.063606 4.047773 8 530
-FAST f=22 a=1 10.523471 4.040812 8 962
-FAST f=22 a=1 0.14214 4.040812 8 962
-FAST f=22 a=2 9.454758 4.059396 8 914
-FAST f=22 a=2 0.118343 4.059396 8 914
-FAST f=22 a=3 9.043197 4.043019 8 1922
-FAST f=22 a=3 0.109798 4.043019 8 1922
-FAST f=22 a=4 8.716261 4.044819 8 770
-FAST f=22 a=4 0.099687 4.044819 8 770
-FAST f=22 a=5 8.529472 4.070576 8 530
-FAST f=22 a=5 0.093127 4.070576 8 530
-FAST f=22 a=6 8.424241 4.070565 8 722
-FAST f=22 a=6 0.093703 4.070565 8 722
-FAST f=22 a=7 8.403391 4.070591 8 578
-FAST f=22 a=7 0.089763 4.070591 8 578
-FAST f=22 a=8 8.285221 4.089171 8 530
-FAST f=22 a=8 0.087716 4.089171 8 530
-FAST f=22 a=9 8.282506 4.047470 8 722
-FAST f=22 a=9 0.089773 4.047470 8 722
-FAST f=22 a=10 8.241809 4.064151 8 818
-FAST f=22 a=10 0.090413 4.064151 8 818
-FAST f=23 a=1 12.389208 4.051635 6 530
-FAST f=23 a=1 0.147796 4.051635 6 530
-FAST f=23 a=2 11.300910 4.042835 6 914
-FAST f=23 a=2 0.133178 4.042835 6 914
-FAST f=23 a=3 10.879455 4.047415 8 626
-FAST f=23 a=3 0.129571 4.047415 8 626
-FAST f=23 a=4 10.522718 4.038269 6 914
-FAST f=23 a=4 0.118121 4.038269 6 914
-FAST f=23 a=5 10.348043 4.066884 8 434
-FAST f=23 a=5 0.112098 4.066884 8 434
-FAST f=23 a=6 10.238630 4.048635 8 1010
-FAST f=23 a=6 0.120281 4.048635 8 1010
-FAST f=23 a=7 10.213255 4.061809 8 530
-FAST f=23 a=7 0.1121 4.061809 8 530
-FAST f=23 a=8 10.107879 4.074104 8 818
-FAST f=23 a=8 0.116544 4.074104 8 818
-FAST f=23 a=9 10.063424 4.064811 8 674
-FAST f=23 a=9 0.109045 4.064811 8 674
-FAST f=23 a=10 10.035801 4.054918 8 530
-FAST f=23 a=10 0.108735 4.054918 8 530
-FAST f=24 a=1 14.963878 4.073490 8 722
-FAST f=24 a=1 0.206344 4.073490 8 722
-FAST f=24 a=2 13.833472 4.036100 8 962
-FAST f=24 a=2 0.17486 4.036100 8 962
-FAST f=24 a=3 13.404631 4.026281 6 1106
-FAST f=24 a=3 0.153961 4.026281 6 1106
-FAST f=24 a=4 13.041164 4.065448 8 674
-FAST f=24 a=4 0.155509 4.065448 8 674
-FAST f=24 a=5 12.879412 4.054636 8 674
-FAST f=24 a=5 0.148282 4.054636 8 674
-FAST f=24 a=6 12.773736 4.081376 8 530
-FAST f=24 a=6 0.142563 4.081376 8 530
-FAST f=24 a=7 12.711310 4.059834 8 770
-FAST f=24 a=7 0.149321 4.059834 8 770
-FAST f=24 a=8 12.635459 4.052050 8 1298
-FAST f=24 a=8 0.15095 4.052050 8 1298
-FAST f=24 a=9 12.558104 4.076516 8 722
-FAST f=24 a=9 0.144361 4.076516 8 722
-FAST f=24 a=10 10.661348 4.062137 8 818
-FAST f=24 a=10 0.108232 4.062137 8 818
-
-
-hg-changelog:
-NODICT 0.000017 1.377590
-RANDOM 0.186171 2.097487
-LEGACY 1.670867 2.058907
-COVER 173.561948 2.189685 8 98
-COVER 4.811180 2.189685 8 98
-FAST f=15 a=1 18.685906 2.129682 8 434
-FAST f=15 a=1 0.173376 2.129682 8 434
-FAST f=15 a=2 12.928259 2.131890 8 482
-FAST f=15 a=2 0.102582 2.131890 8 482
-FAST f=15 a=3 11.132343 2.128027 8 386
-FAST f=15 a=3 0.077122 2.128027 8 386
-FAST f=15 a=4 10.120683 2.125797 8 434
-FAST f=15 a=4 0.065175 2.125797 8 434
-FAST f=15 a=5 9.479092 2.127697 8 386
-FAST f=15 a=5 0.057905 2.127697 8 386
-FAST f=15 a=6 9.159523 2.127132 8 1682
-FAST f=15 a=6 0.058604 2.127132 8 1682
-FAST f=15 a=7 8.724003 2.129914 8 434
-FAST f=15 a=7 0.0493 2.129914 8 434
-FAST f=15 a=8 8.595001 2.127137 8 338
-FAST f=15 a=8 0.0474 2.127137 8 338
-FAST f=15 a=9 8.356405 2.125512 8 482
-FAST f=15 a=9 0.046126 2.125512 8 482
-FAST f=15 a=10 8.207111 2.126066 8 338
-FAST f=15 a=10 0.043292 2.126066 8 338
-FAST f=16 a=1 18.464436 2.144040 8 242
-FAST f=16 a=1 0.172156 2.144040 8 242
-FAST f=16 a=2 12.844825 2.148171 8 194
-FAST f=16 a=2 0.099619 2.148171 8 194
-FAST f=16 a=3 11.082568 2.140837 8 290
-FAST f=16 a=3 0.079165 2.140837 8 290
-FAST f=16 a=4 10.066749 2.144405 8 386
-FAST f=16 a=4 0.068411 2.144405 8 386
-FAST f=16 a=5 9.501121 2.140720 8 386
-FAST f=16 a=5 0.061316 2.140720 8 386
-FAST f=16 a=6 9.179332 2.139478 8 386
-FAST f=16 a=6 0.056322 2.139478 8 386
-FAST f=16 a=7 8.849438 2.142412 8 194
-FAST f=16 a=7 0.050493 2.142412 8 194
-FAST f=16 a=8 8.810919 2.143454 8 434
-FAST f=16 a=8 0.051304 2.143454 8 434
-FAST f=16 a=9 8.553900 2.140339 8 194
-FAST f=16 a=9 0.047285 2.140339 8 194
-FAST f=16 a=10 8.398027 2.143130 8 386
-FAST f=16 a=10 0.046386 2.143130 8 386
-FAST f=17 a=1 18.644657 2.157192 8 98
-FAST f=17 a=1 0.173884 2.157192 8 98
-FAST f=17 a=2 13.071242 2.159830 8 146
-FAST f=17 a=2 0.10388 2.159830 8 146
-FAST f=17 a=3 11.332366 2.153654 6 194
-FAST f=17 a=3 0.08983 2.153654 6 194
-FAST f=17 a=4 10.362413 2.156813 8 242
-FAST f=17 a=4 0.070389 2.156813 8 242
-FAST f=17 a=5 9.808159 2.155098 6 338
-FAST f=17 a=5 0.072661 2.155098 6 338
-FAST f=17 a=6 9.451165 2.153845 6 146
-FAST f=17 a=6 0.064959 2.153845 6 146
-FAST f=17 a=7 9.163097 2.155424 6 242
-FAST f=17 a=7 0.064323 2.155424 6 242
-FAST f=17 a=8 9.047276 2.156640 8 242
-FAST f=17 a=8 0.053382 2.156640 8 242
-FAST f=17 a=9 8.807671 2.152396 8 146
-FAST f=17 a=9 0.049617 2.152396 8 146
-FAST f=17 a=10 8.649827 2.152370 8 146
-FAST f=17 a=10 0.047849 2.152370 8 146
-FAST f=18 a=1 18.809502 2.168116 8 98
-FAST f=18 a=1 0.175226 2.168116 8 98
-FAST f=18 a=2 13.756502 2.170870 6 242
-FAST f=18 a=2 0.119507 2.170870 6 242
-FAST f=18 a=3 12.059748 2.163094 6 98
-FAST f=18 a=3 0.093912 2.163094 6 98
-FAST f=18 a=4 11.410294 2.172372 8 98
-FAST f=18 a=4 0.073048 2.172372 8 98
-FAST f=18 a=5 10.560297 2.166388 8 98
-FAST f=18 a=5 0.065136 2.166388 8 98
-FAST f=18 a=6 10.071390 2.162672 8 98
-FAST f=18 a=6 0.059402 2.162672 8 98
-FAST f=18 a=7 10.084214 2.166624 6 194
-FAST f=18 a=7 0.073276 2.166624 6 194
-FAST f=18 a=8 9.953226 2.167454 8 98
-FAST f=18 a=8 0.053659 2.167454 8 98
-FAST f=18 a=9 8.982461 2.161593 6 146
-FAST f=18 a=9 0.05955 2.161593 6 146
-FAST f=18 a=10 8.986092 2.164373 6 242
-FAST f=18 a=10 0.059135 2.164373 6 242
-FAST f=19 a=1 18.908277 2.176021 8 98
-FAST f=19 a=1 0.177316 2.176021 8 98
-FAST f=19 a=2 13.471313 2.176103 8 98
-FAST f=19 a=2 0.106344 2.176103 8 98
-FAST f=19 a=3 11.571406 2.172812 8 98
-FAST f=19 a=3 0.083293 2.172812 8 98
-FAST f=19 a=4 10.632775 2.177770 6 146
-FAST f=19 a=4 0.079864 2.177770 6 146
-FAST f=19 a=5 10.030190 2.175574 6 146
-FAST f=19 a=5 0.07223 2.175574 6 146
-FAST f=19 a=6 9.717818 2.169997 8 98
-FAST f=19 a=6 0.060049 2.169997 8 98
-FAST f=19 a=7 9.397531 2.172770 8 146
-FAST f=19 a=7 0.057188 2.172770 8 146
-FAST f=19 a=8 9.281061 2.175822 8 98
-FAST f=19 a=8 0.053711 2.175822 8 98
-FAST f=19 a=9 9.165242 2.169849 6 146
-FAST f=19 a=9 0.059898 2.169849 6 146
-FAST f=19 a=10 9.048763 2.173394 8 98
-FAST f=19 a=10 0.049757 2.173394 8 98
-FAST f=20 a=1 21.166917 2.183923 6 98
-FAST f=20 a=1 0.205425 2.183923 6 98
-FAST f=20 a=2 15.642753 2.182349 6 98
-FAST f=20 a=2 0.135957 2.182349 6 98
-FAST f=20 a=3 14.053730 2.173544 6 98
-FAST f=20 a=3 0.11266 2.173544 6 98
-FAST f=20 a=4 15.270019 2.183656 8 98
-FAST f=20 a=4 0.107892 2.183656 8 98
-FAST f=20 a=5 15.497927 2.174661 6 98
-FAST f=20 a=5 0.100305 2.174661 6 98
-FAST f=20 a=6 13.973505 2.172391 8 98
-FAST f=20 a=6 0.087565 2.172391 8 98
-FAST f=20 a=7 14.083296 2.172443 8 98
-FAST f=20 a=7 0.078062 2.172443 8 98
-FAST f=20 a=8 12.560048 2.175581 8 98
-FAST f=20 a=8 0.070282 2.175581 8 98
-FAST f=20 a=9 13.078645 2.173975 6 146
-FAST f=20 a=9 0.081041 2.173975 6 146
-FAST f=20 a=10 12.823328 2.177778 8 98
-FAST f=20 a=10 0.074522 2.177778 8 98
-FAST f=21 a=1 29.825370 2.183057 6 98
-FAST f=21 a=1 0.334453 2.183057 6 98
-FAST f=21 a=2 29.476474 2.182752 8 98
-FAST f=21 a=2 0.286602 2.182752 8 98
-FAST f=21 a=3 25.937186 2.175867 8 98
-FAST f=21 a=3 0.17626 2.175867 8 98
-FAST f=21 a=4 20.413865 2.179780 8 98
-FAST f=21 a=4 0.206085 2.179780 8 98
-FAST f=21 a=5 20.541889 2.178328 6 146
-FAST f=21 a=5 0.199157 2.178328 6 146
-FAST f=21 a=6 21.090670 2.174443 6 146
-FAST f=21 a=6 0.190645 2.174443 6 146
-FAST f=21 a=7 20.221569 2.177384 6 146
-FAST f=21 a=7 0.184278 2.177384 6 146
-FAST f=21 a=8 20.322357 2.179456 6 98
-FAST f=21 a=8 0.178458 2.179456 6 98
-FAST f=21 a=9 20.683912 2.174396 6 146
-FAST f=21 a=9 0.190829 2.174396 6 146
-FAST f=21 a=10 20.840865 2.174905 8 98
-FAST f=21 a=10 0.172515 2.174905 8 98
-FAST f=22 a=1 36.822827 2.181612 6 98
-FAST f=22 a=1 0.437389 2.181612 6 98
-FAST f=22 a=2 30.616902 2.183142 8 98
-FAST f=22 a=2 0.324284 2.183142 8 98
-FAST f=22 a=3 28.472482 2.178130 8 98
-FAST f=22 a=3 0.236538 2.178130 8 98
-FAST f=22 a=4 25.847028 2.181878 8 98
-FAST f=22 a=4 0.263744 2.181878 8 98
-FAST f=22 a=5 27.095881 2.180775 8 98
-FAST f=22 a=5 0.24988 2.180775 8 98
-FAST f=22 a=6 25.939172 2.170916 8 98
-FAST f=22 a=6 0.240033 2.170916 8 98
-FAST f=22 a=7 27.064194 2.177849 8 98
-FAST f=22 a=7 0.242383 2.177849 8 98
-FAST f=22 a=8 25.140221 2.178216 8 98
-FAST f=22 a=8 0.237601 2.178216 8 98
-FAST f=22 a=9 25.505283 2.177455 6 146
-FAST f=22 a=9 0.223217 2.177455 6 146
-FAST f=22 a=10 24.529362 2.176705 6 98
-FAST f=22 a=10 0.222876 2.176705 6 98
-FAST f=23 a=1 39.127310 2.183006 6 98
-FAST f=23 a=1 0.417338 2.183006 6 98
-FAST f=23 a=2 32.468161 2.183524 6 98
-FAST f=23 a=2 0.351645 2.183524 6 98
-FAST f=23 a=3 31.577620 2.172604 6 98
-FAST f=23 a=3 0.319659 2.172604 6 98
-FAST f=23 a=4 30.129247 2.183932 6 98
-FAST f=23 a=4 0.307239 2.183932 6 98
-FAST f=23 a=5 29.103376 2.183529 6 146
-FAST f=23 a=5 0.285533 2.183529 6 146
-FAST f=23 a=6 29.776045 2.174367 8 98
-FAST f=23 a=6 0.276846 2.174367 8 98
-FAST f=23 a=7 28.940407 2.178022 6 146
-FAST f=23 a=7 0.274082 2.178022 6 146
-FAST f=23 a=8 29.256009 2.179462 6 98
-FAST f=23 a=8 0.26949 2.179462 6 98
-FAST f=23 a=9 29.347312 2.170407 8 98
-FAST f=23 a=9 0.265034 2.170407 8 98
-FAST f=23 a=10 29.140081 2.171762 8 98
-FAST f=23 a=10 0.259183 2.171762 8 98
-FAST f=24 a=1 44.871179 2.182115 6 98
-FAST f=24 a=1 0.509433 2.182115 6 98
-FAST f=24 a=2 38.694867 2.180549 8 98
-FAST f=24 a=2 0.406695 2.180549 8 98
-FAST f=24 a=3 38.363769 2.172821 8 98
-FAST f=24 a=3 0.359581 2.172821 8 98
-FAST f=24 a=4 36.580797 2.184142 8 98
-FAST f=24 a=4 0.340614 2.184142 8 98
-FAST f=24 a=5 33.125701 2.183301 8 98
-FAST f=24 a=5 0.324874 2.183301 8 98
-FAST f=24 a=6 34.776068 2.173019 6 146
-FAST f=24 a=6 0.340397 2.173019 6 146
-FAST f=24 a=7 34.417625 2.176561 6 146
-FAST f=24 a=7 0.308223 2.176561 6 146
-FAST f=24 a=8 35.470291 2.182161 6 98
-FAST f=24 a=8 0.307724 2.182161 6 98
-FAST f=24 a=9 34.927252 2.172682 6 146
-FAST f=24 a=9 0.300598 2.172682 6 146
-FAST f=24 a=10 33.238355 2.173395 6 98
-FAST f=24 a=10 0.249916 2.173395 6 98
-
-
-hg-manifest:
-NODICT 0.000004 1.866377
-RANDOM 0.696346 2.309436
-LEGACY 7.064527 2.506977
-COVER 876.312865 2.582528 8 434
-COVER 35.684533 2.582528 8 434
-FAST f=15 a=1 76.618201 2.404013 8 1202
-FAST f=15 a=1 0.700722 2.404013 8 1202
-FAST f=15 a=2 49.213058 2.409248 6 1826
-FAST f=15 a=2 0.473393 2.409248 6 1826
-FAST f=15 a=3 41.753197 2.409677 8 1490
-FAST f=15 a=3 0.336848 2.409677 8 1490
-FAST f=15 a=4 38.648295 2.407996 8 1538
-FAST f=15 a=4 0.283952 2.407996 8 1538
-FAST f=15 a=5 36.144936 2.402895 8 1874
-FAST f=15 a=5 0.270128 2.402895 8 1874
-FAST f=15 a=6 35.484675 2.394873 8 1586
-FAST f=15 a=6 0.251637 2.394873 8 1586
-FAST f=15 a=7 34.280599 2.397311 8 1778
-FAST f=15 a=7 0.23984 2.397311 8 1778
-FAST f=15 a=8 32.122572 2.396089 6 1490
-FAST f=15 a=8 0.251508 2.396089 6 1490
-FAST f=15 a=9 29.909842 2.390092 6 1970
-FAST f=15 a=9 0.251233 2.390092 6 1970
-FAST f=15 a=10 30.102938 2.400086 6 1682
-FAST f=15 a=10 0.23688 2.400086 6 1682
-FAST f=16 a=1 67.750401 2.475460 6 1346
-FAST f=16 a=1 0.796035 2.475460 6 1346
-FAST f=16 a=2 52.812027 2.480860 6 1730
-FAST f=16 a=2 0.480384 2.480860 6 1730
-FAST f=16 a=3 44.179259 2.469304 8 1970
-FAST f=16 a=3 0.332657 2.469304 8 1970
-FAST f=16 a=4 37.612728 2.478208 6 1970
-FAST f=16 a=4 0.32498 2.478208 6 1970
-FAST f=16 a=5 35.056222 2.475568 6 1298
-FAST f=16 a=5 0.302824 2.475568 6 1298
-FAST f=16 a=6 34.713012 2.486079 8 1730
-FAST f=16 a=6 0.24755 2.486079 8 1730
-FAST f=16 a=7 33.713687 2.477180 6 1682
-FAST f=16 a=7 0.280358 2.477180 6 1682
-FAST f=16 a=8 31.571412 2.475418 8 1538
-FAST f=16 a=8 0.241241 2.475418 8 1538
-FAST f=16 a=9 31.608069 2.478263 8 1922
-FAST f=16 a=9 0.241764 2.478263 8 1922
-FAST f=16 a=10 31.358002 2.472263 8 1442
-FAST f=16 a=10 0.221661 2.472263 8 1442
-FAST f=17 a=1 66.185775 2.536085 6 1346
-FAST f=17 a=1 0.713549 2.536085 6 1346
-FAST f=17 a=2 50.365000 2.546105 8 1298
-FAST f=17 a=2 0.467846 2.546105 8 1298
-FAST f=17 a=3 42.712843 2.536250 8 1298
-FAST f=17 a=3 0.34047 2.536250 8 1298
-FAST f=17 a=4 39.514227 2.535555 8 1442
-FAST f=17 a=4 0.302989 2.535555 8 1442
-FAST f=17 a=5 35.189292 2.524925 8 1202
-FAST f=17 a=5 0.273451 2.524925 8 1202
-FAST f=17 a=6 35.791683 2.523466 8 1202
-FAST f=17 a=6 0.268261 2.523466 8 1202
-FAST f=17 a=7 37.416136 2.526625 6 1010
-FAST f=17 a=7 0.277558 2.526625 6 1010
-FAST f=17 a=8 37.084707 2.533274 6 1250
-FAST f=17 a=8 0.285104 2.533274 6 1250
-FAST f=17 a=9 34.183814 2.532765 8 1298
-FAST f=17 a=9 0.235133 2.532765 8 1298
-FAST f=17 a=10 31.149235 2.528722 8 1346
-FAST f=17 a=10 0.232679 2.528722 8 1346
-FAST f=18 a=1 72.942176 2.559857 6 386
-FAST f=18 a=1 0.718618 2.559857 6 386
-FAST f=18 a=2 51.690440 2.559572 8 290
-FAST f=18 a=2 0.403978 2.559572 8 290
-FAST f=18 a=3 45.344908 2.561040 8 962
-FAST f=18 a=3 0.357205 2.561040 8 962
-FAST f=18 a=4 39.804522 2.558446 8 1010
-FAST f=18 a=4 0.310526 2.558446 8 1010
-FAST f=18 a=5 38.134888 2.561811 8 626
-FAST f=18 a=5 0.273743 2.561811 8 626
-FAST f=18 a=6 35.091890 2.555518 8 722
-FAST f=18 a=6 0.260135 2.555518 8 722
-FAST f=18 a=7 34.639523 2.562938 8 290
-FAST f=18 a=7 0.234294 2.562938 8 290
-FAST f=18 a=8 36.076431 2.563567 8 1586
-FAST f=18 a=8 0.274075 2.563567 8 1586
-FAST f=18 a=9 36.376433 2.560950 8 722
-FAST f=18 a=9 0.240106 2.560950 8 722
-FAST f=18 a=10 32.624790 2.559340 8 578
-FAST f=18 a=10 0.234704 2.559340 8 578
-FAST f=19 a=1 70.513761 2.572441 8 194
-FAST f=19 a=1 0.726112 2.572441 8 194
-FAST f=19 a=2 59.263032 2.574560 8 482
-FAST f=19 a=2 0.451554 2.574560 8 482
-FAST f=19 a=3 51.509594 2.571546 6 194
-FAST f=19 a=3 0.393014 2.571546 6 194
-FAST f=19 a=4 55.393906 2.573386 8 482
-FAST f=19 a=4 0.38819 2.573386 8 482
-FAST f=19 a=5 43.201736 2.567589 8 674
-FAST f=19 a=5 0.292155 2.567589 8 674
-FAST f=19 a=6 42.911687 2.572666 6 434
-FAST f=19 a=6 0.303988 2.572666 6 434
-FAST f=19 a=7 44.687591 2.573613 6 290
-FAST f=19 a=7 0.308721 2.573613 6 290
-FAST f=19 a=8 37.372868 2.571039 6 194
-FAST f=19 a=8 0.287137 2.571039 6 194
-FAST f=19 a=9 36.074230 2.566473 6 482
-FAST f=19 a=9 0.280721 2.566473 6 482
-FAST f=19 a=10 33.731720 2.570306 8 194
-FAST f=19 a=10 0.224073 2.570306 8 194
-FAST f=20 a=1 79.670634 2.581146 6 290
-FAST f=20 a=1 0.899986 2.581146 6 290
-FAST f=20 a=2 58.827141 2.579782 8 386
-FAST f=20 a=2 0.602288 2.579782 8 386
-FAST f=20 a=3 51.289004 2.579627 8 722
-FAST f=20 a=3 0.446091 2.579627 8 722
-FAST f=20 a=4 47.711068 2.581508 8 722
-FAST f=20 a=4 0.473007 2.581508 8 722
-FAST f=20 a=5 47.402929 2.578062 6 434
-FAST f=20 a=5 0.497131 2.578062 6 434
-FAST f=20 a=6 54.797102 2.577365 8 482
-FAST f=20 a=6 0.515061 2.577365 8 482
-FAST f=20 a=7 51.370877 2.583050 8 386
-FAST f=20 a=7 0.402878 2.583050 8 386
-FAST f=20 a=8 51.437931 2.574875 6 242
-FAST f=20 a=8 0.453094 2.574875 6 242
-FAST f=20 a=9 44.105456 2.576700 6 242
-FAST f=20 a=9 0.456633 2.576700 6 242
-FAST f=20 a=10 44.447580 2.578305 8 338
-FAST f=20 a=10 0.409121 2.578305 8 338
-FAST f=21 a=1 113.031686 2.582449 6 242
-FAST f=21 a=1 1.456971 2.582449 6 242
-FAST f=21 a=2 97.700932 2.582124 8 194
-FAST f=21 a=2 1.072078 2.582124 8 194
-FAST f=21 a=3 96.563648 2.585479 8 434
-FAST f=21 a=3 0.949528 2.585479 8 434
-FAST f=21 a=4 90.597813 2.582366 6 386
-FAST f=21 a=4 0.76944 2.582366 6 386
-FAST f=21 a=5 86.815980 2.579043 8 434
-FAST f=21 a=5 0.858167 2.579043 8 434
-FAST f=21 a=6 91.235820 2.578378 8 530
-FAST f=21 a=6 0.684274 2.578378 8 530
-FAST f=21 a=7 84.392788 2.581243 8 386
-FAST f=21 a=7 0.814386 2.581243 8 386
-FAST f=21 a=8 82.052310 2.582547 8 338
-FAST f=21 a=8 0.822633 2.582547 8 338
-FAST f=21 a=9 74.696074 2.579319 8 194
-FAST f=21 a=9 0.811028 2.579319 8 194
-FAST f=21 a=10 76.211170 2.578766 8 290
-FAST f=21 a=10 0.809715 2.578766 8 290
-FAST f=22 a=1 138.976871 2.580478 8 194
-FAST f=22 a=1 1.748932 2.580478 8 194
-FAST f=22 a=2 120.164097 2.583633 8 386
-FAST f=22 a=2 1.333239 2.583633 8 386
-FAST f=22 a=3 111.986474 2.582566 6 194
-FAST f=22 a=3 1.305734 2.582566 6 194
-FAST f=22 a=4 108.548148 2.583068 6 194
-FAST f=22 a=4 1.314026 2.583068 6 194
-FAST f=22 a=5 103.173017 2.583495 6 290
-FAST f=22 a=5 1.228664 2.583495 6 290
-FAST f=22 a=6 108.421262 2.582349 8 530
-FAST f=22 a=6 1.076773 2.582349 8 530
-FAST f=22 a=7 103.284127 2.581022 8 386
-FAST f=22 a=7 1.112117 2.581022 8 386
-FAST f=22 a=8 96.330279 2.581073 8 290
-FAST f=22 a=8 1.109303 2.581073 8 290
-FAST f=22 a=9 97.651348 2.580075 6 194
-FAST f=22 a=9 0.933032 2.580075 6 194
-FAST f=22 a=10 101.660621 2.584886 8 194
-FAST f=22 a=10 0.796823 2.584886 8 194
-FAST f=23 a=1 159.322978 2.581474 6 242
-FAST f=23 a=1 2.015878 2.581474 6 242
-FAST f=23 a=2 134.331775 2.581619 8 194
-FAST f=23 a=2 1.545845 2.581619 8 194
-FAST f=23 a=3 127.724552 2.579888 6 338
-FAST f=23 a=3 1.444496 2.579888 6 338
-FAST f=23 a=4 126.077675 2.578137 6 242
-FAST f=23 a=4 1.364394 2.578137 6 242
-FAST f=23 a=5 124.914027 2.580843 8 338
-FAST f=23 a=5 1.116059 2.580843 8 338
-FAST f=23 a=6 122.874153 2.577637 6 338
-FAST f=23 a=6 1.164584 2.577637 6 338
-FAST f=23 a=7 123.099257 2.582715 6 386
-FAST f=23 a=7 1.354042 2.582715 6 386
-FAST f=23 a=8 122.026753 2.577681 8 194
-FAST f=23 a=8 1.210966 2.577681 8 194
-FAST f=23 a=9 121.164312 2.584599 6 290
-FAST f=23 a=9 1.174859 2.584599 6 290
-FAST f=23 a=10 117.462222 2.580358 8 194
-FAST f=23 a=10 1.075258 2.580358 8 194
-FAST f=24 a=1 169.539659 2.581642 6 194
-FAST f=24 a=1 1.916804 2.581642 6 194
-FAST f=24 a=2 160.539270 2.580421 6 290
-FAST f=24 a=2 1.71087 2.580421 6 290
-FAST f=24 a=3 155.455874 2.580449 6 242
-FAST f=24 a=3 1.60307 2.580449 6 242
-FAST f=24 a=4 147.630320 2.582953 6 338
-FAST f=24 a=4 1.396364 2.582953 6 338
-FAST f=24 a=5 133.767428 2.580589 6 290
-FAST f=24 a=5 1.19933 2.580589 6 290
-FAST f=24 a=6 146.437535 2.579453 8 194
-FAST f=24 a=6 1.385405 2.579453 8 194
-FAST f=24 a=7 147.227507 2.584155 8 386
-FAST f=24 a=7 1.48942 2.584155 8 386
-FAST f=24 a=8 138.005773 2.584115 8 194
-FAST f=24 a=8 1.352 2.584115 8 194
-FAST f=24 a=9 141.442625 2.582902 8 290
-FAST f=24 a=9 1.39647 2.582902 8 290
-FAST f=24 a=10 142.157446 2.582701 8 434
-FAST f=24 a=10 1.498889 2.582701 8 434
diff --git a/contrib/experimental_dict_builders/benchmarkDictBuilder/benchmark.c b/contrib/experimental_dict_builders/benchmarkDictBuilder/benchmark.c
deleted file mode 100644
index cd943797bdea..000000000000
--- a/contrib/experimental_dict_builders/benchmarkDictBuilder/benchmark.c
+++ /dev/null
@@ -1,442 +0,0 @@
-#include <stdio.h> /* fprintf */
-#include <stdlib.h> /* malloc, free, qsort */
-#include <string.h> /* strcmp, strlen */
-#include <errno.h> /* errno */
-#include <ctype.h>
-#include <time.h>
-#include "random.h"
-#include "dictBuilder.h"
-#include "zstd_internal.h" /* includes zstd.h */
-#include "io.h"
-#include "util.h"
-#include "zdict.h"
-
-
-
-/*-*************************************
-* Console display
-***************************************/
-#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
-#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
-
-static const U64 g_refreshRate = SEC_TO_MICRO / 6;
-static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
-
-#define DISPLAYUPDATE(l, ...) { if (displayLevel>=l) { \
- if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (displayLevel>=4)) \
- { g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
- if (displayLevel>=4) fflush(stderr); } } }
-
-
-/*-*************************************
-* Exceptions
-***************************************/
-#ifndef DEBUG
-# define DEBUG 0
-#endif
-#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
-#define EXM_THROW(error, ...) \
-{ \
- DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
- DISPLAY("Error %i : ", error); \
- DISPLAY(__VA_ARGS__); \
- DISPLAY("\n"); \
- exit(error); \
-}
-
-
-/*-*************************************
-* Constants
-***************************************/
-static const unsigned g_defaultMaxDictSize = 110 KB;
-#define DEFAULT_CLEVEL 3
-#define DEFAULT_DISPLAYLEVEL 2
-
-
-/*-*************************************
-* Struct
-***************************************/
-typedef struct {
- const void* dictBuffer;
- size_t dictSize;
-} dictInfo;
-
-
-/*-*************************************
-* Dictionary related operations
-***************************************/
-/** createDictFromFiles() :
- * Based on type of param given, train dictionary using the corresponding algorithm
- * @return dictInfo containing dictionary buffer and dictionary size
- */
-dictInfo* createDictFromFiles(sampleInfo *info, unsigned maxDictSize,
- ZDICT_random_params_t *randomParams, ZDICT_cover_params_t *coverParams,
- ZDICT_legacy_params_t *legacyParams, ZDICT_fastCover_params_t *fastParams) {
- unsigned const displayLevel = randomParams ? randomParams->zParams.notificationLevel :
- coverParams ? coverParams->zParams.notificationLevel :
- legacyParams ? legacyParams->zParams.notificationLevel :
- fastParams ? fastParams->zParams.notificationLevel :
- DEFAULT_DISPLAYLEVEL; /* no dict */
- void* const dictBuffer = malloc(maxDictSize);
-
- dictInfo* dInfo = NULL;
-
- /* Checks */
- if (!dictBuffer)
- EXM_THROW(12, "not enough memory for trainFromFiles"); /* should not happen */
-
- { size_t dictSize;
- if(randomParams) {
- dictSize = ZDICT_trainFromBuffer_random(dictBuffer, maxDictSize, info->srcBuffer,
- info->samplesSizes, info->nbSamples, *randomParams);
- }else if(coverParams) {
- /* Run the optimize version if either k or d is not provided */
- if (!coverParams->d || !coverParams->k){
- dictSize = ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, maxDictSize, info->srcBuffer,
- info->samplesSizes, info->nbSamples, coverParams);
- } else {
- dictSize = ZDICT_trainFromBuffer_cover(dictBuffer, maxDictSize, info->srcBuffer,
- info->samplesSizes, info->nbSamples, *coverParams);
- }
- } else if(legacyParams) {
- dictSize = ZDICT_trainFromBuffer_legacy(dictBuffer, maxDictSize, info->srcBuffer,
- info->samplesSizes, info->nbSamples, *legacyParams);
- } else if(fastParams) {
- /* Run the optimize version if either k or d is not provided */
- if (!fastParams->d || !fastParams->k) {
- dictSize = ZDICT_optimizeTrainFromBuffer_fastCover(dictBuffer, maxDictSize, info->srcBuffer,
- info->samplesSizes, info->nbSamples, fastParams);
- } else {
- dictSize = ZDICT_trainFromBuffer_fastCover(dictBuffer, maxDictSize, info->srcBuffer,
- info->samplesSizes, info->nbSamples, *fastParams);
- }
- } else {
- dictSize = 0;
- }
- if (ZDICT_isError(dictSize)) {
- DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize)); /* should not happen */
- free(dictBuffer);
- return dInfo;
- }
- dInfo = (dictInfo *)malloc(sizeof(dictInfo));
- dInfo->dictBuffer = dictBuffer;
- dInfo->dictSize = dictSize;
- }
- return dInfo;
-}
-
-
-/** compressWithDict() :
- * Compress samples from sample buffer given dictionary stored on dictionary buffer and compression level
- * @return compression ratio
- */
-double compressWithDict(sampleInfo *srcInfo, dictInfo* dInfo, int compressionLevel, int displayLevel) {
- /* Local variables */
- size_t totalCompressedSize = 0;
- size_t totalOriginalSize = 0;
- const unsigned hasDict = dInfo->dictSize > 0 ? 1 : 0;
- double cRatio;
- size_t dstCapacity;
- int i;
-
- /* Pointers */
- ZSTD_CDict *cdict = NULL;
- ZSTD_CCtx* cctx = NULL;
- size_t *offsets = NULL;
- void* dst = NULL;
-
- /* Allocate dst with enough space to compress the maximum sized sample */
- {
- size_t maxSampleSize = 0;
- for (i = 0; i < srcInfo->nbSamples; i++) {
- maxSampleSize = MAX(srcInfo->samplesSizes[i], maxSampleSize);
- }
- dstCapacity = ZSTD_compressBound(maxSampleSize);
- dst = malloc(dstCapacity);
- }
-
- /* Calculate offset for each sample */
- offsets = (size_t *)malloc((srcInfo->nbSamples + 1) * sizeof(size_t));
- offsets[0] = 0;
- for (i = 1; i <= srcInfo->nbSamples; i++) {
- offsets[i] = offsets[i - 1] + srcInfo->samplesSizes[i - 1];
- }
-
- /* Create the cctx */
- cctx = ZSTD_createCCtx();
- if(!cctx || !dst) {
- cRatio = -1;
- goto _cleanup;
- }
-
- /* Create CDict if there's a dictionary stored on buffer */
- if (hasDict) {
- cdict = ZSTD_createCDict(dInfo->dictBuffer, dInfo->dictSize, compressionLevel);
- if(!cdict) {
- cRatio = -1;
- goto _cleanup;
- }
- }
-
- /* Compress each sample and sum their sizes*/
- const BYTE *const samples = (const BYTE *)srcInfo->srcBuffer;
- for (i = 0; i < srcInfo->nbSamples; i++) {
- size_t compressedSize;
- if(hasDict) {
- compressedSize = ZSTD_compress_usingCDict(cctx, dst, dstCapacity, samples + offsets[i], srcInfo->samplesSizes[i], cdict);
- } else {
- compressedSize = ZSTD_compressCCtx(cctx, dst, dstCapacity,samples + offsets[i], srcInfo->samplesSizes[i], compressionLevel);
- }
- if (ZSTD_isError(compressedSize)) {
- cRatio = -1;
- goto _cleanup;
- }
- totalCompressedSize += compressedSize;
- }
-
- /* Sum original sizes */
- for (i = 0; i<srcInfo->nbSamples; i++) {
- totalOriginalSize += srcInfo->samplesSizes[i];
- }
-
- /* Calculate compression ratio */
- DISPLAYLEVEL(2, "original size is %lu\n", totalOriginalSize);
- DISPLAYLEVEL(2, "compressed size is %lu\n", totalCompressedSize);
- cRatio = (double)totalOriginalSize/(double)totalCompressedSize;
-
-_cleanup:
- free(dst);
- free(offsets);
- ZSTD_freeCCtx(cctx);
- ZSTD_freeCDict(cdict);
- return cRatio;
-}
-
-
-/** FreeDictInfo() :
- * Free memory allocated for dictInfo
- */
-void freeDictInfo(dictInfo* info) {
- if (!info) return;
- if (info->dictBuffer) free((void*)(info->dictBuffer));
- free(info);
-}
-
-
-
-/*-********************************************************
- * Benchmarking functions
-**********************************************************/
-/** benchmarkDictBuilder() :
- * Measure how long a dictionary builder takes and compression ratio with the dictionary built
- * @return 0 if benchmark successfully, 1 otherwise
- */
-int benchmarkDictBuilder(sampleInfo *srcInfo, unsigned maxDictSize, ZDICT_random_params_t *randomParam,
- ZDICT_cover_params_t *coverParam, ZDICT_legacy_params_t *legacyParam,
- ZDICT_fastCover_params_t *fastParam) {
- /* Local variables */
- const unsigned displayLevel = randomParam ? randomParam->zParams.notificationLevel :
- coverParam ? coverParam->zParams.notificationLevel :
- legacyParam ? legacyParam->zParams.notificationLevel :
- fastParam ? fastParam->zParams.notificationLevel:
- DEFAULT_DISPLAYLEVEL; /* no dict */
- const char* name = randomParam ? "RANDOM" :
- coverParam ? "COVER" :
- legacyParam ? "LEGACY" :
- fastParam ? "FAST":
- "NODICT"; /* no dict */
- const unsigned cLevel = randomParam ? randomParam->zParams.compressionLevel :
- coverParam ? coverParam->zParams.compressionLevel :
- legacyParam ? legacyParam->zParams.compressionLevel :
- fastParam ? fastParam->zParams.compressionLevel:
- DEFAULT_CLEVEL; /* no dict */
- int result = 0;
-
- /* Calculate speed */
- const UTIL_time_t begin = UTIL_getTime();
- dictInfo* dInfo = createDictFromFiles(srcInfo, maxDictSize, randomParam, coverParam, legacyParam, fastParam);
- const U64 timeMicro = UTIL_clockSpanMicro(begin);
- const double timeSec = timeMicro / (double)SEC_TO_MICRO;
- if (!dInfo) {
- DISPLAYLEVEL(1, "%s does not train successfully\n", name);
- result = 1;
- goto _cleanup;
- }
- DISPLAYLEVEL(1, "%s took %f seconds to execute \n", name, timeSec);
-
- /* Calculate compression ratio */
- const double cRatio = compressWithDict(srcInfo, dInfo, cLevel, displayLevel);
- if (cRatio < 0) {
- DISPLAYLEVEL(1, "Compressing with %s dictionary does not work\n", name);
- result = 1;
- goto _cleanup;
-
- }
- DISPLAYLEVEL(1, "Compression ratio with %s dictionary is %f\n", name, cRatio);
-
-_cleanup:
- freeDictInfo(dInfo);
- return result;
-}
-
-
-
-int main(int argCount, const char* argv[])
-{
- const int displayLevel = DEFAULT_DISPLAYLEVEL;
- const char* programName = argv[0];
- int result = 0;
-
- /* Initialize arguments to default values */
- unsigned k = 200;
- unsigned d = 8;
- unsigned f;
- unsigned accel;
- unsigned i;
- const unsigned cLevel = DEFAULT_CLEVEL;
- const unsigned dictID = 0;
- const unsigned maxDictSize = g_defaultMaxDictSize;
-
- /* Initialize table to store input files */
- const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*));
- unsigned filenameIdx = 0;
-
- char* fileNamesBuf = NULL;
- unsigned fileNamesNb = filenameIdx;
- const int followLinks = 0;
- const char** extendedFileList = NULL;
-
- /* Parse arguments */
- for (i = 1; i < argCount; i++) {
- const char* argument = argv[i];
- if (longCommandWArg(&argument, "in=")) {
- filenameTable[filenameIdx] = argument;
- filenameIdx++;
- continue;
- }
- DISPLAYLEVEL(1, "benchmark: Incorrect parameters\n");
- return 1;
- }
-
- /* Get the list of all files recursively (because followLinks==0)*/
- extendedFileList = UTIL_createFileList(filenameTable, filenameIdx, &fileNamesBuf,
- &fileNamesNb, followLinks);
- if (extendedFileList) {
- unsigned u;
- for (u=0; u<fileNamesNb; u++) DISPLAYLEVEL(4, "%u %s\n", u, extendedFileList[u]);
- free((void*)filenameTable);
- filenameTable = extendedFileList;
- filenameIdx = fileNamesNb;
- }
-
- /* get sampleInfo */
- size_t blockSize = 0;
- sampleInfo* srcInfo= getSampleInfo(filenameTable,
- filenameIdx, blockSize, maxDictSize, displayLevel);
-
- /* set up zParams */
- ZDICT_params_t zParams;
- zParams.compressionLevel = cLevel;
- zParams.notificationLevel = displayLevel;
- zParams.dictID = dictID;
-
- /* with no dict */
- {
- const int noDictResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL, NULL);
- if(noDictResult) {
- result = 1;
- goto _cleanup;
- }
- }
-
- /* for random */
- {
- ZDICT_random_params_t randomParam;
- randomParam.zParams = zParams;
- randomParam.k = k;
- const int randomResult = benchmarkDictBuilder(srcInfo, maxDictSize, &randomParam, NULL, NULL, NULL);
- DISPLAYLEVEL(2, "k=%u\n", randomParam.k);
- if(randomResult) {
- result = 1;
- goto _cleanup;
- }
- }
-
- /* for legacy */
- {
- ZDICT_legacy_params_t legacyParam;
- legacyParam.zParams = zParams;
- legacyParam.selectivityLevel = 9;
- const int legacyResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, &legacyParam, NULL);
- DISPLAYLEVEL(2, "selectivityLevel=%u\n", legacyParam.selectivityLevel);
- if(legacyResult) {
- result = 1;
- goto _cleanup;
- }
- }
-
- /* for cover */
- {
- /* for cover (optimizing k and d) */
- ZDICT_cover_params_t coverParam;
- memset(&coverParam, 0, sizeof(coverParam));
- coverParam.zParams = zParams;
- coverParam.splitPoint = 1.0;
- coverParam.steps = 40;
- coverParam.nbThreads = 1;
- const int coverOptResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, &coverParam, NULL, NULL);
- DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\nsplit=%u\n", coverParam.k, coverParam.d, coverParam.steps, (unsigned)(coverParam.splitPoint * 100));
- if(coverOptResult) {
- result = 1;
- goto _cleanup;
- }
-
- /* for cover (with k and d provided) */
- const int coverResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, &coverParam, NULL, NULL);
- DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\nsplit=%u\n", coverParam.k, coverParam.d, coverParam.steps, (unsigned)(coverParam.splitPoint * 100));
- if(coverResult) {
- result = 1;
- goto _cleanup;
- }
-
- }
-
- /* for fastCover */
- for (f = 15; f < 25; f++){
- DISPLAYLEVEL(2, "current f is %u\n", f);
- for (accel = 1; accel < 11; accel++) {
- DISPLAYLEVEL(2, "current accel is %u\n", accel);
- /* for fastCover (optimizing k and d) */
- ZDICT_fastCover_params_t fastParam;
- memset(&fastParam, 0, sizeof(fastParam));
- fastParam.zParams = zParams;
- fastParam.f = f;
- fastParam.steps = 40;
- fastParam.nbThreads = 1;
- fastParam.accel = accel;
- const int fastOptResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL, &fastParam);
- DISPLAYLEVEL(2, "k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\naccel=%u\n", fastParam.k, fastParam.d, fastParam.f, fastParam.steps, (unsigned)(fastParam.splitPoint * 100), fastParam.accel);
- if(fastOptResult) {
- result = 1;
- goto _cleanup;
- }
-
- /* for fastCover (with k and d provided) */
- for (i = 0; i < 5; i++) {
- const int fastResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL, &fastParam);
- DISPLAYLEVEL(2, "k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\naccel=%u\n", fastParam.k, fastParam.d, fastParam.f, fastParam.steps, (unsigned)(fastParam.splitPoint * 100), fastParam.accel);
- if(fastResult) {
- result = 1;
- goto _cleanup;
- }
- }
- }
- }
-
-
- /* Free allocated memory */
-_cleanup:
- UTIL_freeFileList(extendedFileList, fileNamesBuf);
- freeSampleInfo(srcInfo);
- return result;
-}
diff --git a/contrib/experimental_dict_builders/benchmarkDictBuilder/dictBuilder.h b/contrib/experimental_dict_builders/benchmarkDictBuilder/dictBuilder.h
deleted file mode 100644
index 781ec8c2f39e..000000000000
--- a/contrib/experimental_dict_builders/benchmarkDictBuilder/dictBuilder.h
+++ /dev/null
@@ -1,6 +0,0 @@
-/* ZDICT_trainFromBuffer_legacy() :
- * issue : samplesBuffer need to be followed by a noisy guard band.
- * work around : duplicate the buffer, and add the noise */
-size_t ZDICT_trainFromBuffer_legacy(void* dictBuffer, size_t dictBufferCapacity,
- const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
- ZDICT_legacy_params_t params);
diff --git a/contrib/experimental_dict_builders/benchmarkDictBuilder/test.sh b/contrib/experimental_dict_builders/benchmarkDictBuilder/test.sh
deleted file mode 100755
index 5eaf5930a3c6..000000000000
--- a/contrib/experimental_dict_builders/benchmarkDictBuilder/test.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-echo "Benchmark with in=../../lib/common"
-./benchmark in=../../../lib/common
diff --git a/contrib/experimental_dict_builders/fastCover/Makefile b/contrib/experimental_dict_builders/fastCover/Makefile
deleted file mode 100644
index 3ba24790ce01..000000000000
--- a/contrib/experimental_dict_builders/fastCover/Makefile
+++ /dev/null
@@ -1,54 +0,0 @@
-ARG :=
-
-CC ?= gcc
-CFLAGS ?= -O3 -g
-INCLUDES := -I ../../../programs -I ../randomDictBuilder -I ../../../lib/common -I ../../../lib -I ../../../lib/dictBuilder
-
-IO_FILE := ../randomDictBuilder/io.c
-
-TEST_INPUT := ../../../lib
-TEST_OUTPUT := fastCoverDict
-
-all: main run clean
-
-.PHONY: test
-test: main testrun testshell clean
-
-.PHONY: run
-run:
- echo "Building a fastCover dictionary with given arguments"
- ./main $(ARG)
-
-main: main.o io.o fastCover.o libzstd.a
- $(CC) $(CFLAGS) main.o io.o fastCover.o libzstd.a -o main
-
-main.o: main.c
- $(CC) $(CFLAGS) $(INCLUDES) -c main.c
-
-fastCover.o: fastCover.c
- $(CC) $(CFLAGS) $(INCLUDES) -c fastCover.c
-
-io.o: $(IO_FILE)
- $(CC) $(CFLAGS) $(INCLUDES) -c $(IO_FILE)
-
-libzstd.a:
- $(MAKE) MOREFLAGS=-g -C ../../../lib libzstd.a
- mv ../../../lib/libzstd.a .
-
-.PHONY: testrun
-testrun: main
- echo "Run with $(TEST_INPUT) and $(TEST_OUTPUT) "
- ./main in=$(TEST_INPUT) out=$(TEST_OUTPUT)
- zstd -be3 -D $(TEST_OUTPUT) -r $(TEST_INPUT) -q
- rm -f $(TEST_OUTPUT)
-
-.PHONY: testshell
-testshell: test.sh
- sh test.sh
- echo "Finish running test.sh"
-
-.PHONY: clean
-clean:
- rm -f *.o main libzstd.a
- $(MAKE) -C ../../../lib clean
- echo "Cleaning is completed"
diff --git a/contrib/experimental_dict_builders/fastCover/README.md b/contrib/experimental_dict_builders/fastCover/README.md
deleted file mode 100644
index ad377743f2a7..000000000000
--- a/contrib/experimental_dict_builders/fastCover/README.md
+++ /dev/null
@@ -1,24 +0,0 @@
-FastCover Dictionary Builder
-
-### Permitted Arguments:
-Input File/Directory (in=fileName): required; file/directory used to build dictionary; if directory, will operate recursively for files inside directory; can include multiple files/directories, each following "in="
-Output Dictionary (out=dictName): if not provided, default to fastCoverDict
-Dictionary ID (dictID=#): nonnegative number; if not provided, default to 0
-Maximum Dictionary Size (maxdict=#): positive number; in bytes, if not provided, default to 110KB
-Size of Selected Segment (k=#): positive number; in bytes; if not provided, default to 200
-Size of Dmer (d=#): either 6 or 8; if not provided, default to 8
-Number of steps (steps=#): positive number, if not provided, default to 32
-Percentage of samples used for training(split=#): positive number; if not provided, default to 100
-
-
-###Running Test:
-make test
-
-
-###Usage:
-To build a FASTCOVER dictionary with the provided arguments: make ARG= followed by arguments
-If k or d is not provided, the optimize version of FASTCOVER is run.
-
-### Examples:
-make ARG="in=../../../lib/dictBuilder out=dict100 dictID=520"
-make ARG="in=../../../lib/dictBuilder in=../../../lib/compress"
diff --git a/contrib/experimental_dict_builders/fastCover/fastCover.c b/contrib/experimental_dict_builders/fastCover/fastCover.c
deleted file mode 100644
index 0a338bde2b20..000000000000
--- a/contrib/experimental_dict_builders/fastCover/fastCover.c
+++ /dev/null
@@ -1,809 +0,0 @@
-/*-*************************************
-* Dependencies
-***************************************/
-#include <stdio.h> /* fprintf */
-#include <stdlib.h> /* malloc, free, qsort */
-#include <string.h> /* memset */
-#include <time.h> /* clock */
-#include "mem.h" /* read */
-#include "pool.h"
-#include "threading.h"
-#include "fastCover.h"
-#include "zstd_internal.h" /* includes zstd.h */
-#include "zdict.h"
-
-
-/*-*************************************
-* Constants
-***************************************/
-#define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((U32)-1) : ((U32)1 GB))
-#define FASTCOVER_MAX_F 32
-#define DEFAULT_SPLITPOINT 1.0
-
-/*-*************************************
-* Console display
-***************************************/
-static int g_displayLevel = 2;
-#define DISPLAY(...) \
- { \
- fprintf(stderr, __VA_ARGS__); \
- fflush(stderr); \
- }
-#define LOCALDISPLAYLEVEL(displayLevel, l, ...) \
- if (displayLevel >= l) { \
- DISPLAY(__VA_ARGS__); \
- } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
-#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
-
-#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
- if (displayLevel >= l) { \
- if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \
- g_time = clock(); \
- DISPLAY(__VA_ARGS__); \
- } \
- }
-#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
-static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
-static clock_t g_time = 0;
-
-
-/*-*************************************
-* Hash Functions
-***************************************/
-static const U64 prime6bytes = 227718039650203ULL;
-static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; }
-static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
-
-static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
-static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
-static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
-
-
-/**
- * Hash the d-byte value pointed to by p and mod 2^f
- */
-static size_t FASTCOVER_hashPtrToIndex(const void* p, U32 h, unsigned d) {
- if (d == 6) {
- return ZSTD_hash6Ptr(p, h) & ((1 << h) - 1);
- }
- return ZSTD_hash8Ptr(p, h) & ((1 << h) - 1);
-}
-
-
-/*-*************************************
-* Context
-***************************************/
-typedef struct {
- const BYTE *samples;
- size_t *offsets;
- const size_t *samplesSizes;
- size_t nbSamples;
- size_t nbTrainSamples;
- size_t nbTestSamples;
- size_t nbDmers;
- U32 *freqs;
- U16 *segmentFreqs;
- unsigned d;
-} FASTCOVER_ctx_t;
-
-
-/*-*************************************
-* Helper functions
-***************************************/
-/**
- * Returns the sum of the sample sizes.
- */
-static size_t FASTCOVER_sum(const size_t *samplesSizes, unsigned nbSamples) {
- size_t sum = 0;
- unsigned i;
- for (i = 0; i < nbSamples; ++i) {
- sum += samplesSizes[i];
- }
- return sum;
-}
-
-
-/*-*************************************
-* fast functions
-***************************************/
-/**
- * A segment is a range in the source as well as the score of the segment.
- */
-typedef struct {
- U32 begin;
- U32 end;
- U32 score;
-} FASTCOVER_segment_t;
-
-
-/**
- * Selects the best segment in an epoch.
- * Segments of are scored according to the function:
- *
- * Let F(d) be the frequency of all dmers with hash value d.
- * Let S_i be hash value of the dmer at position i of segment S which has length k.
- *
- * Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1})
- *
- * Once the dmer with hash value d is in the dictionary we set F(d) = F(d)/2.
- */
-static FASTCOVER_segment_t FASTCOVER_selectSegment(const FASTCOVER_ctx_t *ctx,
- U32 *freqs, U32 begin,U32 end,
- ZDICT_fastCover_params_t parameters) {
- /* Constants */
- const U32 k = parameters.k;
- const U32 d = parameters.d;
- const U32 dmersInK = k - d + 1;
- /* Try each segment (activeSegment) and save the best (bestSegment) */
- FASTCOVER_segment_t bestSegment = {0, 0, 0};
- FASTCOVER_segment_t activeSegment;
- /* Reset the activeDmers in the segment */
- /* The activeSegment starts at the beginning of the epoch. */
- activeSegment.begin = begin;
- activeSegment.end = begin;
- activeSegment.score = 0;
- {
- /* Slide the activeSegment through the whole epoch.
- * Save the best segment in bestSegment.
- */
- while (activeSegment.end < end) {
- /* Get hash value of current dmer */
- const size_t index = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.end, parameters.f, ctx->d);
- /* Add frequency of this index to score if this is the first occurrence of index in active segment */
- if (ctx->segmentFreqs[index] == 0) {
- activeSegment.score += freqs[index];
- }
- ctx->segmentFreqs[index] += 1;
- /* Increment end of segment */
- activeSegment.end += 1;
- /* If the window is now too large, drop the first position */
- if (activeSegment.end - activeSegment.begin == dmersInK + 1) {
- /* Get hash value of the dmer to be eliminated from active segment */
- const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, parameters.f, ctx->d);
- ctx->segmentFreqs[delIndex] -= 1;
- /* Subtract frequency of this index from score if this is the last occurrence of this index in active segment */
- if (ctx->segmentFreqs[delIndex] == 0) {
- activeSegment.score -= freqs[delIndex];
- }
- /* Increment start of segment */
- activeSegment.begin += 1;
- }
- /* If this segment is the best so far save it */
- if (activeSegment.score > bestSegment.score) {
- bestSegment = activeSegment;
- }
- }
- /* Zero out rest of segmentFreqs array */
- while (activeSegment.begin < end) {
- const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, parameters.f, ctx->d);
- ctx->segmentFreqs[delIndex] -= 1;
- activeSegment.begin += 1;
- }
- }
- {
- /* Trim off the zero frequency head and tail from the segment. */
- U32 newBegin = bestSegment.end;
- U32 newEnd = bestSegment.begin;
- U32 pos;
- for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
- const size_t index = FASTCOVER_hashPtrToIndex(ctx->samples + pos, parameters.f, ctx->d);
- U32 freq = freqs[index];
- if (freq != 0) {
- newBegin = MIN(newBegin, pos);
- newEnd = pos + 1;
- }
- }
- bestSegment.begin = newBegin;
- bestSegment.end = newEnd;
- }
- {
- /* Zero the frequency of hash value of each dmer covered by the chosen segment. */
- U32 pos;
- for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
- const size_t i = FASTCOVER_hashPtrToIndex(ctx->samples + pos, parameters.f, ctx->d);
- freqs[i] = 0;
- }
- }
- return bestSegment;
-}
-
-/**
- * Check the validity of the parameters.
- * Returns non-zero if the parameters are valid and 0 otherwise.
- */
-static int FASTCOVER_checkParameters(ZDICT_fastCover_params_t parameters,
- size_t maxDictSize) {
- /* k, d, and f are required parameters */
- if (parameters.d == 0 || parameters.k == 0 || parameters.f == 0) {
- return 0;
- }
- /* d has to be 6 or 8 */
- if (parameters.d != 6 && parameters.d != 8) {
- return 0;
- }
- /* 0 < f <= FASTCOVER_MAX_F */
- if (parameters.f > FASTCOVER_MAX_F) {
- return 0;
- }
- /* k <= maxDictSize */
- if (parameters.k > maxDictSize) {
- return 0;
- }
- /* d <= k */
- if (parameters.d > parameters.k) {
- return 0;
- }
- /* 0 < splitPoint <= 1 */
- if (parameters.splitPoint <= 0 || parameters.splitPoint > 1) {
- return 0;
- }
- return 1;
-}
-
-
-/**
- * Clean up a context initialized with `FASTCOVER_ctx_init()`.
- */
-static void FASTCOVER_ctx_destroy(FASTCOVER_ctx_t *ctx) {
- if (!ctx) {
- return;
- }
- if (ctx->segmentFreqs) {
- free(ctx->segmentFreqs);
- ctx->segmentFreqs = NULL;
- }
- if (ctx->freqs) {
- free(ctx->freqs);
- ctx->freqs = NULL;
- }
- if (ctx->offsets) {
- free(ctx->offsets);
- ctx->offsets = NULL;
- }
-}
-
-/**
- * Calculate for frequency of hash value of each dmer in ctx->samples
- */
-static void FASTCOVER_computeFrequency(U32 *freqs, unsigned f, FASTCOVER_ctx_t *ctx){
- size_t start; /* start of current dmer */
- for (unsigned i = 0; i < ctx->nbTrainSamples; i++) {
- size_t currSampleStart = ctx->offsets[i];
- size_t currSampleEnd = ctx->offsets[i+1];
- start = currSampleStart;
- while (start + ctx->d <= currSampleEnd) {
- const size_t dmerIndex = FASTCOVER_hashPtrToIndex(ctx->samples + start, f, ctx->d);
- freqs[dmerIndex]++;
- start++;
- }
- }
-}
-
-/**
- * Prepare a context for dictionary building.
- * The context is only dependent on the parameter `d` and can used multiple
- * times.
- * Returns 1 on success or zero on error.
- * The context must be destroyed with `FASTCOVER_ctx_destroy()`.
- */
-static int FASTCOVER_ctx_init(FASTCOVER_ctx_t *ctx, const void *samplesBuffer,
- const size_t *samplesSizes, unsigned nbSamples,
- unsigned d, double splitPoint, unsigned f) {
- const BYTE *const samples = (const BYTE *)samplesBuffer;
- const size_t totalSamplesSize = FASTCOVER_sum(samplesSizes, nbSamples);
- /* Split samples into testing and training sets */
- const unsigned nbTrainSamples = splitPoint < 1.0 ? (unsigned)((double)nbSamples * splitPoint) : nbSamples;
- const unsigned nbTestSamples = splitPoint < 1.0 ? nbSamples - nbTrainSamples : nbSamples;
- const size_t trainingSamplesSize = splitPoint < 1.0 ? FASTCOVER_sum(samplesSizes, nbTrainSamples) : totalSamplesSize;
- const size_t testSamplesSize = splitPoint < 1.0 ? FASTCOVER_sum(samplesSizes + nbTrainSamples, nbTestSamples) : totalSamplesSize;
- /* Checks */
- if (totalSamplesSize < MAX(d, sizeof(U64)) ||
- totalSamplesSize >= (size_t)FASTCOVER_MAX_SAMPLES_SIZE) {
- DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
- (U32)(totalSamplesSize >> 20), (FASTCOVER_MAX_SAMPLES_SIZE >> 20));
- return 0;
- }
- /* Check if there are at least 5 training samples */
- if (nbTrainSamples < 5) {
- DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples);
- return 0;
- }
- /* Check if there's testing sample */
- if (nbTestSamples < 1) {
- DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples);
- return 0;
- }
- /* Zero the context */
- memset(ctx, 0, sizeof(*ctx));
- DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples,
- (U32)trainingSamplesSize);
- DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples,
- (U32)testSamplesSize);
-
- ctx->samples = samples;
- ctx->samplesSizes = samplesSizes;
- ctx->nbSamples = nbSamples;
- ctx->nbTrainSamples = nbTrainSamples;
- ctx->nbTestSamples = nbTestSamples;
- ctx->nbDmers = trainingSamplesSize - d + 1;
- ctx->d = d;
-
- /* The offsets of each file */
- ctx->offsets = (size_t *)malloc((nbSamples + 1) * sizeof(size_t));
- if (!ctx->offsets) {
- DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n");
- FASTCOVER_ctx_destroy(ctx);
- return 0;
- }
-
- /* Fill offsets from the samplesSizes */
- {
- U32 i;
- ctx->offsets[0] = 0;
- for (i = 1; i <= nbSamples; ++i) {
- ctx->offsets[i] = ctx->offsets[i - 1] + samplesSizes[i - 1];
- }
- }
-
- /* Initialize frequency array of size 2^f */
- ctx->freqs = (U32 *)calloc((1 << f), sizeof(U32));
- ctx->segmentFreqs = (U16 *)calloc((1 << f), sizeof(U16));
- DISPLAYLEVEL(2, "Computing frequencies\n");
- FASTCOVER_computeFrequency(ctx->freqs, f, ctx);
-
- return 1;
-}
-
-
-/**
- * Given the prepared context build the dictionary.
- */
-static size_t FASTCOVER_buildDictionary(const FASTCOVER_ctx_t *ctx, U32 *freqs,
- void *dictBuffer,
- size_t dictBufferCapacity,
- ZDICT_fastCover_params_t parameters){
- BYTE *const dict = (BYTE *)dictBuffer;
- size_t tail = dictBufferCapacity;
- /* Divide the data up into epochs of equal size.
- * We will select at least one segment from each epoch.
- */
- const U32 epochs = MAX(1, (U32)(dictBufferCapacity / parameters.k));
- const U32 epochSize = (U32)(ctx->nbDmers / epochs);
- size_t epoch;
- DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", epochs,
- epochSize);
- /* Loop through the epochs until there are no more segments or the dictionary
- * is full.
- */
- for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs) {
- const U32 epochBegin = (U32)(epoch * epochSize);
- const U32 epochEnd = epochBegin + epochSize;
- size_t segmentSize;
- /* Select a segment */
- FASTCOVER_segment_t segment = FASTCOVER_selectSegment(
- ctx, freqs, epochBegin, epochEnd, parameters);
-
- /* If the segment covers no dmers, then we are out of content */
- if (segment.score == 0) {
- break;
- }
-
- /* Trim the segment if necessary and if it is too small then we are done */
- segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail);
- if (segmentSize < parameters.d) {
- break;
- }
-
- /* We fill the dictionary from the back to allow the best segments to be
- * referenced with the smallest offsets.
- */
- tail -= segmentSize;
- memcpy(dict + tail, ctx->samples + segment.begin, segmentSize);
- DISPLAYUPDATE(
- 2, "\r%u%% ",
- (U32)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
- }
- DISPLAYLEVEL(2, "\r%79s\r", "");
- return tail;
-}
-
-
-/**
- * FASTCOVER_best_t is used for two purposes:
- * 1. Synchronizing threads.
- * 2. Saving the best parameters and dictionary.
- *
- * All of the methods except FASTCOVER_best_init() are thread safe if zstd is
- * compiled with multithreaded support.
- */
-typedef struct fast_best_s {
- ZSTD_pthread_mutex_t mutex;
- ZSTD_pthread_cond_t cond;
- size_t liveJobs;
- void *dict;
- size_t dictSize;
- ZDICT_fastCover_params_t parameters;
- size_t compressedSize;
-} FASTCOVER_best_t;
-
-/**
- * Initialize the `FASTCOVER_best_t`.
- */
-static void FASTCOVER_best_init(FASTCOVER_best_t *best) {
- if (best==NULL) return; /* compatible with init on NULL */
- (void)ZSTD_pthread_mutex_init(&best->mutex, NULL);
- (void)ZSTD_pthread_cond_init(&best->cond, NULL);
- best->liveJobs = 0;
- best->dict = NULL;
- best->dictSize = 0;
- best->compressedSize = (size_t)-1;
- memset(&best->parameters, 0, sizeof(best->parameters));
-}
-
-/**
- * Wait until liveJobs == 0.
- */
-static void FASTCOVER_best_wait(FASTCOVER_best_t *best) {
- if (!best) {
- return;
- }
- ZSTD_pthread_mutex_lock(&best->mutex);
- while (best->liveJobs != 0) {
- ZSTD_pthread_cond_wait(&best->cond, &best->mutex);
- }
- ZSTD_pthread_mutex_unlock(&best->mutex);
-}
-
-/**
- * Call FASTCOVER_best_wait() and then destroy the FASTCOVER_best_t.
- */
-static void FASTCOVER_best_destroy(FASTCOVER_best_t *best) {
- if (!best) {
- return;
- }
- FASTCOVER_best_wait(best);
- if (best->dict) {
- free(best->dict);
- }
- ZSTD_pthread_mutex_destroy(&best->mutex);
- ZSTD_pthread_cond_destroy(&best->cond);
-}
-
-/**
- * Called when a thread is about to be launched.
- * Increments liveJobs.
- */
-static void FASTCOVER_best_start(FASTCOVER_best_t *best) {
- if (!best) {
- return;
- }
- ZSTD_pthread_mutex_lock(&best->mutex);
- ++best->liveJobs;
- ZSTD_pthread_mutex_unlock(&best->mutex);
-}
-
-/**
- * Called when a thread finishes executing, both on error or success.
- * Decrements liveJobs and signals any waiting threads if liveJobs == 0.
- * If this dictionary is the best so far save it and its parameters.
- */
-static void FASTCOVER_best_finish(FASTCOVER_best_t *best, size_t compressedSize,
- ZDICT_fastCover_params_t parameters, void *dict,
- size_t dictSize) {
- if (!best) {
- return;
- }
- {
- size_t liveJobs;
- ZSTD_pthread_mutex_lock(&best->mutex);
- --best->liveJobs;
- liveJobs = best->liveJobs;
- /* If the new dictionary is better */
- if (compressedSize < best->compressedSize) {
- /* Allocate space if necessary */
- if (!best->dict || best->dictSize < dictSize) {
- if (best->dict) {
- free(best->dict);
- }
- best->dict = malloc(dictSize);
- if (!best->dict) {
- best->compressedSize = ERROR(GENERIC);
- best->dictSize = 0;
- return;
- }
- }
- /* Save the dictionary, parameters, and size */
- memcpy(best->dict, dict, dictSize);
- best->dictSize = dictSize;
- best->parameters = parameters;
- best->compressedSize = compressedSize;
- }
- ZSTD_pthread_mutex_unlock(&best->mutex);
- if (liveJobs == 0) {
- ZSTD_pthread_cond_broadcast(&best->cond);
- }
- }
-}
-
-/**
- * Parameters for FASTCOVER_tryParameters().
- */
-typedef struct FASTCOVER_tryParameters_data_s {
- const FASTCOVER_ctx_t *ctx;
- FASTCOVER_best_t *best;
- size_t dictBufferCapacity;
- ZDICT_fastCover_params_t parameters;
-} FASTCOVER_tryParameters_data_t;
-
-/**
- * Tries a set of parameters and updates the FASTCOVER_best_t with the results.
- * This function is thread safe if zstd is compiled with multithreaded support.
- * It takes its parameters as an *OWNING* opaque pointer to support threading.
- */
-static void FASTCOVER_tryParameters(void *opaque) {
- /* Save parameters as local variables */
- FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t *)opaque;
- const FASTCOVER_ctx_t *const ctx = data->ctx;
- const ZDICT_fastCover_params_t parameters = data->parameters;
- size_t dictBufferCapacity = data->dictBufferCapacity;
- size_t totalCompressedSize = ERROR(GENERIC);
- /* Allocate space for hash table, dict, and freqs */
- BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
- U32 *freqs = (U32*) malloc((1 << parameters.f) * sizeof(U32));
- if (!dict || !freqs) {
- DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
- goto _cleanup;
- }
- /* Copy the frequencies because we need to modify them */
- memcpy(freqs, ctx->freqs, (1 << parameters.f) * sizeof(U32));
- /* Build the dictionary */
- {
- const size_t tail = FASTCOVER_buildDictionary(ctx, freqs, dict,
- dictBufferCapacity, parameters);
-
- dictBufferCapacity = ZDICT_finalizeDictionary(
- dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
- ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples,
- parameters.zParams);
- if (ZDICT_isError(dictBufferCapacity)) {
- DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
- goto _cleanup;
- }
- }
- /* Check total compressed size */
- {
- /* Pointers */
- ZSTD_CCtx *cctx;
- ZSTD_CDict *cdict;
- void *dst;
- /* Local variables */
- size_t dstCapacity;
- size_t i;
- /* Allocate dst with enough space to compress the maximum sized sample */
- {
- size_t maxSampleSize = 0;
- i = parameters.splitPoint < 1.0 ? ctx->nbTrainSamples : 0;
- for (; i < ctx->nbSamples; ++i) {
- maxSampleSize = MAX(ctx->samplesSizes[i], maxSampleSize);
- }
- dstCapacity = ZSTD_compressBound(maxSampleSize);
- dst = malloc(dstCapacity);
- }
- /* Create the cctx and cdict */
- cctx = ZSTD_createCCtx();
- cdict = ZSTD_createCDict(dict, dictBufferCapacity,
- parameters.zParams.compressionLevel);
- if (!dst || !cctx || !cdict) {
- goto _compressCleanup;
- }
- /* Compress each sample and sum their sizes (or error) */
- totalCompressedSize = dictBufferCapacity;
- i = parameters.splitPoint < 1.0 ? ctx->nbTrainSamples : 0;
- for (; i < ctx->nbSamples; ++i) {
- const size_t size = ZSTD_compress_usingCDict(
- cctx, dst, dstCapacity, ctx->samples + ctx->offsets[i],
- ctx->samplesSizes[i], cdict);
- if (ZSTD_isError(size)) {
- totalCompressedSize = ERROR(GENERIC);
- goto _compressCleanup;
- }
- totalCompressedSize += size;
- }
- _compressCleanup:
- ZSTD_freeCCtx(cctx);
- ZSTD_freeCDict(cdict);
- if (dst) {
- free(dst);
- }
- }
-
-_cleanup:
- FASTCOVER_best_finish(data->best, totalCompressedSize, parameters, dict,
- dictBufferCapacity);
- free(data);
- if (dict) {
- free(dict);
- }
- if (freqs) {
- free(freqs);
- }
-}
-
-ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(
- void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
- const size_t *samplesSizes, unsigned nbSamples, ZDICT_fastCover_params_t parameters) {
- BYTE* const dict = (BYTE*)dictBuffer;
- FASTCOVER_ctx_t ctx;
- parameters.splitPoint = 1.0;
- /* Initialize global data */
- g_displayLevel = parameters.zParams.notificationLevel;
- /* Checks */
- if (!FASTCOVER_checkParameters(parameters, dictBufferCapacity)) {
- DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n");
- return ERROR(GENERIC);
- }
- if (nbSamples == 0) {
- DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n");
- return ERROR(GENERIC);
- }
- if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
- DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
- ZDICT_DICTSIZE_MIN);
- return ERROR(dstSize_tooSmall);
- }
- /* Initialize context */
- if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
- parameters.d, parameters.splitPoint, parameters.f)) {
- DISPLAYLEVEL(1, "Failed to initialize context\n");
- return ERROR(GENERIC);
- }
- /* Build the dictionary */
- DISPLAYLEVEL(2, "Building dictionary\n");
- {
- const size_t tail = FASTCOVER_buildDictionary(&ctx, ctx.freqs, dictBuffer,
- dictBufferCapacity, parameters);
-
- const size_t dictionarySize = ZDICT_finalizeDictionary(
- dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
- samplesBuffer, samplesSizes, (unsigned)ctx.nbTrainSamples,
- parameters.zParams);
- if (!ZSTD_isError(dictionarySize)) {
- DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
- (U32)dictionarySize);
- }
- FASTCOVER_ctx_destroy(&ctx);
- return dictionarySize;
- }
-}
-
-
-
-ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(
- void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
- const size_t *samplesSizes, unsigned nbSamples,
- ZDICT_fastCover_params_t *parameters) {
- /* constants */
- const unsigned nbThreads = parameters->nbThreads;
- const double splitPoint =
- parameters->splitPoint <= 0.0 ? DEFAULT_SPLITPOINT : parameters->splitPoint;
- const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
- const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
- const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
- const unsigned kMaxK = parameters->k == 0 ? 2000 : parameters->k;
- const unsigned kSteps = parameters->steps == 0 ? 40 : parameters->steps;
- const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
- const unsigned kIterations =
- (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
- const unsigned f = parameters->f == 0 ? 23 : parameters->f;
-
- /* Local variables */
- const int displayLevel = parameters->zParams.notificationLevel;
- unsigned iteration = 1;
- unsigned d;
- unsigned k;
- FASTCOVER_best_t best;
- POOL_ctx *pool = NULL;
-
- /* Checks */
- if (splitPoint <= 0 || splitPoint > 1) {
- LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect splitPoint\n");
- return ERROR(GENERIC);
- }
- if (kMinK < kMaxD || kMaxK < kMinK) {
- LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect k\n");
- return ERROR(GENERIC);
- }
- if (nbSamples == 0) {
- DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n");
- return ERROR(GENERIC);
- }
- if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
- DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
- ZDICT_DICTSIZE_MIN);
- return ERROR(dstSize_tooSmall);
- }
- if (nbThreads > 1) {
- pool = POOL_create(nbThreads, 1);
- if (!pool) {
- return ERROR(memory_allocation);
- }
- }
- /* Initialization */
- FASTCOVER_best_init(&best);
- /* Turn down global display level to clean up display at level 2 and below */
- g_displayLevel = displayLevel == 0 ? 0 : displayLevel - 1;
- /* Loop through d first because each new value needs a new context */
- LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n",
- kIterations);
- for (d = kMinD; d <= kMaxD; d += 2) {
- /* Initialize the context for this value of d */
- FASTCOVER_ctx_t ctx;
- LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
- if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f)) {
- LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
- FASTCOVER_best_destroy(&best);
- POOL_free(pool);
- return ERROR(GENERIC);
- }
- /* Loop through k reusing the same context */
- for (k = kMinK; k <= kMaxK; k += kStepSize) {
- /* Prepare the arguments */
- FASTCOVER_tryParameters_data_t *data = (FASTCOVER_tryParameters_data_t *)malloc(
- sizeof(FASTCOVER_tryParameters_data_t));
- LOCALDISPLAYLEVEL(displayLevel, 3, "k=%u\n", k);
- if (!data) {
- LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to allocate parameters\n");
- FASTCOVER_best_destroy(&best);
- FASTCOVER_ctx_destroy(&ctx);
- POOL_free(pool);
- return ERROR(GENERIC);
- }
- data->ctx = &ctx;
- data->best = &best;
- data->dictBufferCapacity = dictBufferCapacity;
- data->parameters = *parameters;
- data->parameters.k = k;
- data->parameters.d = d;
- data->parameters.f = f;
- data->parameters.splitPoint = splitPoint;
- data->parameters.steps = kSteps;
- data->parameters.zParams.notificationLevel = g_displayLevel;
- /* Check the parameters */
- if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity)) {
- DISPLAYLEVEL(1, "fastCover parameters incorrect\n");
- free(data);
- continue;
- }
- /* Call the function and pass ownership of data to it */
- FASTCOVER_best_start(&best);
- if (pool) {
- POOL_add(pool, &FASTCOVER_tryParameters, data);
- } else {
- FASTCOVER_tryParameters(data);
- }
- /* Print status */
- LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%% ",
- (U32)((iteration * 100) / kIterations));
- ++iteration;
- }
- FASTCOVER_best_wait(&best);
- FASTCOVER_ctx_destroy(&ctx);
- }
- LOCALDISPLAYLEVEL(displayLevel, 2, "\r%79s\r", "");
- /* Fill the output buffer and parameters with output of the best parameters */
- {
- const size_t dictSize = best.dictSize;
- if (ZSTD_isError(best.compressedSize)) {
- const size_t compressedSize = best.compressedSize;
- FASTCOVER_best_destroy(&best);
- POOL_free(pool);
- return compressedSize;
- }
- *parameters = best.parameters;
- memcpy(dictBuffer, best.dict, dictSize);
- FASTCOVER_best_destroy(&best);
- POOL_free(pool);
- return dictSize;
- }
-
-}
diff --git a/contrib/experimental_dict_builders/fastCover/fastCover.h b/contrib/experimental_dict_builders/fastCover/fastCover.h
deleted file mode 100644
index 958e9f423930..000000000000
--- a/contrib/experimental_dict_builders/fastCover/fastCover.h
+++ /dev/null
@@ -1,57 +0,0 @@
-#include <stdio.h> /* fprintf */
-#include <stdlib.h> /* malloc, free, qsort */
-#include <string.h> /* memset */
-#include <time.h> /* clock */
-#include "mem.h" /* read */
-#include "pool.h"
-#include "threading.h"
-#include "zstd_internal.h" /* includes zstd.h */
-#ifndef ZDICT_STATIC_LINKING_ONLY
-#define ZDICT_STATIC_LINKING_ONLY
-#endif
-#include "zdict.h"
-
-
-typedef struct {
- unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
- unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
- unsigned f; /* log of size of frequency array */
- unsigned steps; /* Number of steps : Only used for optimization : 0 means default (32) : Higher means more parameters checked */
- unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
- double splitPoint; /* Percentage of samples used for training: the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (1.0), 1.0 when all samples are used for both training and testing */
- ZDICT_params_t zParams;
-} ZDICT_fastCover_params_t;
-
-
-/*! ZDICT_optimizeTrainFromBuffer_fastCover():
- * Train a dictionary from an array of samples using a modified version of the COVER algorithm.
- * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
- * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
- * The resulting dictionary will be saved into `dictBuffer`.
- * All of the parameters except for f are optional.
- * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
- * if steps is zero it defaults to its default value.
- * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048].
- *
- * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
- * or an error code, which can be tested with ZDICT_isError().
- * On success `*parameters` contains the parameters selected.
- */
- ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(
- void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
- const size_t *samplesSizes, unsigned nbSamples,
- ZDICT_fastCover_params_t *parameters);
-
-
-/*! ZDICT_trainFromBuffer_fastCover():
- * Train a dictionary from an array of samples using a modified version of the COVER algorithm.
- * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
- * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
- * The resulting dictionary will be saved into `dictBuffer`.
- * d, k, and f are required.
- * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
- * or an error code, which can be tested with ZDICT_isError().
- */
-ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(
- void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
- const size_t *samplesSizes, unsigned nbSamples, ZDICT_fastCover_params_t parameters);
diff --git a/contrib/experimental_dict_builders/fastCover/main.c b/contrib/experimental_dict_builders/fastCover/main.c
deleted file mode 100644
index df7d91812e29..000000000000
--- a/contrib/experimental_dict_builders/fastCover/main.c
+++ /dev/null
@@ -1,183 +0,0 @@
-#include <stdio.h> /* fprintf */
-#include <stdlib.h> /* malloc, free, qsort */
-#include <string.h> /* strcmp, strlen */
-#include <errno.h> /* errno */
-#include <ctype.h>
-#include "fastCover.h"
-#include "io.h"
-#include "util.h"
-#include "zdict.h"
-
-
-/*-*************************************
-* Console display
-***************************************/
-#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
-#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
-
-static const U64 g_refreshRate = SEC_TO_MICRO / 6;
-static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
-
-#define DISPLAYUPDATE(l, ...) { if (displayLevel>=l) { \
- if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (displayLevel>=4)) \
- { g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
- if (displayLevel>=4) fflush(stderr); } } }
-
-
-/*-*************************************
-* Exceptions
-***************************************/
-#ifndef DEBUG
-# define DEBUG 0
-#endif
-#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
-#define EXM_THROW(error, ...) \
-{ \
- DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
- DISPLAY("Error %i : ", error); \
- DISPLAY(__VA_ARGS__); \
- DISPLAY("\n"); \
- exit(error); \
-}
-
-
-/*-*************************************
-* Constants
-***************************************/
-static const unsigned g_defaultMaxDictSize = 110 KB;
-#define DEFAULT_CLEVEL 3
-
-
-/*-*************************************
-* FASTCOVER
-***************************************/
-int FASTCOVER_trainFromFiles(const char* dictFileName, sampleInfo *info,
- unsigned maxDictSize,
- ZDICT_fastCover_params_t *params) {
- unsigned const displayLevel = params->zParams.notificationLevel;
- void* const dictBuffer = malloc(maxDictSize);
-
- int result = 0;
-
- /* Checks */
- if (!dictBuffer)
- EXM_THROW(12, "not enough memory for trainFromFiles"); /* should not happen */
-
- { size_t dictSize;
- /* Run the optimize version if either k or d is not provided */
- if (!params->d || !params->k) {
- dictSize = ZDICT_optimizeTrainFromBuffer_fastCover(dictBuffer, maxDictSize, info->srcBuffer,
- info->samplesSizes, info->nbSamples, params);
- } else {
- dictSize = ZDICT_trainFromBuffer_fastCover(dictBuffer, maxDictSize, info->srcBuffer,
- info->samplesSizes, info->nbSamples, *params);
- }
- DISPLAYLEVEL(2, "k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\n", params->k, params->d, params->f, params->steps, (unsigned)(params->splitPoint*100));
- if (ZDICT_isError(dictSize)) {
- DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize)); /* should not happen */
- result = 1;
- goto _done;
- }
- /* save dict */
- DISPLAYLEVEL(2, "Save dictionary of size %u into file %s \n", (U32)dictSize, dictFileName);
- saveDict(dictFileName, dictBuffer, dictSize);
- }
-
- /* clean up */
-_done:
- free(dictBuffer);
- return result;
-}
-
-
-
-int main(int argCount, const char* argv[])
-{
- int displayLevel = 2;
- const char* programName = argv[0];
- int operationResult = 0;
-
- /* Initialize arguments to default values */
- unsigned k = 0;
- unsigned d = 0;
- unsigned f = 23;
- unsigned steps = 32;
- unsigned nbThreads = 1;
- unsigned split = 100;
- const char* outputFile = "fastCoverDict";
- unsigned dictID = 0;
- unsigned maxDictSize = g_defaultMaxDictSize;
-
- /* Initialize table to store input files */
- const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*));
- unsigned filenameIdx = 0;
-
- char* fileNamesBuf = NULL;
- unsigned fileNamesNb = filenameIdx;
- int followLinks = 0; /* follow directory recursively */
- const char** extendedFileList = NULL;
-
- /* Parse arguments */
- for (int i = 1; i < argCount; i++) {
- const char* argument = argv[i];
- if (longCommandWArg(&argument, "k=")) { k = readU32FromChar(&argument); continue; }
- if (longCommandWArg(&argument, "d=")) { d = readU32FromChar(&argument); continue; }
- if (longCommandWArg(&argument, "f=")) { f = readU32FromChar(&argument); continue; }
- if (longCommandWArg(&argument, "steps=")) { steps = readU32FromChar(&argument); continue; }
- if (longCommandWArg(&argument, "split=")) { split = readU32FromChar(&argument); continue; }
- if (longCommandWArg(&argument, "dictID=")) { dictID = readU32FromChar(&argument); continue; }
- if (longCommandWArg(&argument, "maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; }
- if (longCommandWArg(&argument, "in=")) {
- filenameTable[filenameIdx] = argument;
- filenameIdx++;
- continue;
- }
- if (longCommandWArg(&argument, "out=")) {
- outputFile = argument;
- continue;
- }
- DISPLAYLEVEL(1, "Incorrect parameters\n");
- operationResult = 1;
- return operationResult;
- }
-
- /* Get the list of all files recursively (because followLinks==0)*/
- extendedFileList = UTIL_createFileList(filenameTable, filenameIdx, &fileNamesBuf,
- &fileNamesNb, followLinks);
- if (extendedFileList) {
- unsigned u;
- for (u=0; u<fileNamesNb; u++) DISPLAYLEVEL(4, "%u %s\n", u, extendedFileList[u]);
- free((void*)filenameTable);
- filenameTable = extendedFileList;
- filenameIdx = fileNamesNb;
- }
-
- size_t blockSize = 0;
-
- /* Set up zParams */
- ZDICT_params_t zParams;
- zParams.compressionLevel = DEFAULT_CLEVEL;
- zParams.notificationLevel = displayLevel;
- zParams.dictID = dictID;
-
- /* Set up fastCover params */
- ZDICT_fastCover_params_t params;
- params.zParams = zParams;
- params.k = k;
- params.d = d;
- params.f = f;
- params.steps = steps;
- params.nbThreads = nbThreads;
- params.splitPoint = (double)split/100;
-
- /* Build dictionary */
- sampleInfo* info = getSampleInfo(filenameTable,
- filenameIdx, blockSize, maxDictSize, zParams.notificationLevel);
- operationResult = FASTCOVER_trainFromFiles(outputFile, info, maxDictSize, &params);
-
- /* Free allocated memory */
- UTIL_freeFileList(extendedFileList, fileNamesBuf);
- freeSampleInfo(info);
-
- return operationResult;
-}
diff --git a/contrib/experimental_dict_builders/fastCover/test.sh b/contrib/experimental_dict_builders/fastCover/test.sh
deleted file mode 100755
index f86915b59fc5..000000000000
--- a/contrib/experimental_dict_builders/fastCover/test.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-echo "Building fastCover dictionary with in=../../lib/common f=20 out=dict1"
-./main in=../../../lib/common f=20 out=dict1
-zstd -be3 -D dict1 -r ../../../lib/common -q
-echo "Building fastCover dictionary with in=../../lib/common k=500 d=6 f=24 out=dict2 dictID=100 maxdict=140000"
-./main in=../../../lib/common k=500 d=6 f=24 out=dict2 dictID=100 maxdict=140000
-zstd -be3 -D dict2 -r ../../../lib/common -q
-echo "Building fastCover dictionary with 2 sample sources"
-./main in=../../../lib/common in=../../../lib/compress out=dict3
-zstd -be3 -D dict3 -r ../../../lib/common -q
-echo "Removing dict1 dict2 dict3"
-rm -f dict1 dict2 dict3
-
-echo "Testing with invalid parameters, should fail"
-! ./main in=../../../lib/common r=10
-! ./main in=../../../lib/common d=10
diff --git a/contrib/experimental_dict_builders/randomDictBuilder/Makefile b/contrib/experimental_dict_builders/randomDictBuilder/Makefile
deleted file mode 100644
index bbd40e47c312..000000000000
--- a/contrib/experimental_dict_builders/randomDictBuilder/Makefile
+++ /dev/null
@@ -1,52 +0,0 @@
-ARG :=
-
-CC ?= gcc
-CFLAGS ?= -O3
-INCLUDES := -I ../../../programs -I ../../../lib/common -I ../../../lib -I ../../../lib/dictBuilder
-
-TEST_INPUT := ../../../lib
-TEST_OUTPUT := randomDict
-
-all: main run clean
-
-.PHONY: test
-test: main testrun testshell clean
-
-.PHONY: run
-run:
- echo "Building a random dictionary with given arguments"
- ./main $(ARG)
-
-main: main.o io.o random.o libzstd.a
- $(CC) $(CFLAGS) main.o io.o random.o libzstd.a -o main
-
-main.o: main.c
- $(CC) $(CFLAGS) $(INCLUDES) -c main.c
-
-random.o: random.c
- $(CC) $(CFLAGS) $(INCLUDES) -c random.c
-
-io.o: io.c
- $(CC) $(CFLAGS) $(INCLUDES) -c io.c
-
-libzstd.a:
- $(MAKE) -C ../../../lib libzstd.a
- mv ../../../lib/libzstd.a .
-
-.PHONY: testrun
-testrun: main
- echo "Run with $(TEST_INPUT) and $(TEST_OUTPUT) "
- ./main in=$(TEST_INPUT) out=$(TEST_OUTPUT)
- zstd -be3 -D $(TEST_OUTPUT) -r $(TEST_INPUT) -q
- rm -f $(TEST_OUTPUT)
-
-.PHONY: testshell
-testshell: test.sh
- sh test.sh
- echo "Finish running test.sh"
-
-.PHONY: clean
-clean:
- rm -f *.o main libzstd.a
- $(MAKE) -C ../../../lib clean
- echo "Cleaning is completed"
diff --git a/contrib/experimental_dict_builders/randomDictBuilder/README.md b/contrib/experimental_dict_builders/randomDictBuilder/README.md
deleted file mode 100644
index da12a4280541..000000000000
--- a/contrib/experimental_dict_builders/randomDictBuilder/README.md
+++ /dev/null
@@ -1,20 +0,0 @@
-Random Dictionary Builder
-
-### Permitted Arguments:
-Input File/Directory (in=fileName): required; file/directory used to build dictionary; if directory, will operate recursively for files inside directory; can include multiple files/directories, each following "in="
-Output Dictionary (out=dictName): if not provided, default to defaultDict
-Dictionary ID (dictID=#): nonnegative number; if not provided, default to 0
-Maximum Dictionary Size (maxdict=#): positive number; in bytes, if not provided, default to 110KB
-Size of Randomly Selected Segment (k=#): positive number; in bytes; if not provided, default to 200
-
-###Running Test:
-make test
-
-
-###Usage:
-To build a random dictionary with the provided arguments: make ARG= followed by arguments
-
-
-### Examples:
-make ARG="in=../../../lib/dictBuilder out=dict100 dictID=520"
-make ARG="in=../../../lib/dictBuilder in=../../../lib/compress"
diff --git a/contrib/experimental_dict_builders/randomDictBuilder/io.c b/contrib/experimental_dict_builders/randomDictBuilder/io.c
deleted file mode 100644
index bfe39eaed6b1..000000000000
--- a/contrib/experimental_dict_builders/randomDictBuilder/io.c
+++ /dev/null
@@ -1,284 +0,0 @@
-#include <stdio.h> /* fprintf */
-#include <stdlib.h> /* malloc, free, qsort */
-#include <string.h> /* strcmp, strlen */
-#include <errno.h> /* errno */
-#include <ctype.h>
-#include "io.h"
-#include "fileio.h" /* stdinmark, stdoutmark, ZSTD_EXTENSION */
-#include "platform.h" /* Large Files support */
-#include "util.h"
-#include "zdict.h"
-
-/*-*************************************
-* Console display
-***************************************/
-#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
-#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
-
-static const U64 g_refreshRate = SEC_TO_MICRO / 6;
-static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
-
-#define DISPLAYUPDATE(l, ...) { if (displayLevel>=l) { \
- if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (displayLevel>=4)) \
- { g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
- if (displayLevel>=4) fflush(stderr); } } }
-
-/*-*************************************
-* Exceptions
-***************************************/
-#ifndef DEBUG
-# define DEBUG 0
-#endif
-#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
-#define EXM_THROW(error, ...) \
-{ \
- DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
- DISPLAY("Error %i : ", error); \
- DISPLAY(__VA_ARGS__); \
- DISPLAY("\n"); \
- exit(error); \
-}
-
-
-/*-*************************************
-* Constants
-***************************************/
-
-#define SAMPLESIZE_MAX (128 KB)
-#define RANDOM_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((U32)-1) : ((U32)1 GB))
-#define RANDOM_MEMMULT 9
-static const size_t g_maxMemory = (sizeof(size_t) == 4) ?
- (2 GB - 64 MB) : ((size_t)(512 MB) << sizeof(size_t));
-
-#define NOISELENGTH 32
-
-
-/*-*************************************
-* Commandline related functions
-***************************************/
-unsigned readU32FromChar(const char** stringPtr){
- const char errorMsg[] = "error: numeric value too large";
- unsigned result = 0;
- while ((**stringPtr >='0') && (**stringPtr <='9')) {
- unsigned const max = (((unsigned)(-1)) / 10) - 1;
- if (result > max) exit(1);
- result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
- }
- if ((**stringPtr=='K') || (**stringPtr=='M')) {
- unsigned const maxK = ((unsigned)(-1)) >> 10;
- if (result > maxK) exit(1);
- result <<= 10;
- if (**stringPtr=='M') {
- if (result > maxK) exit(1);
- result <<= 10;
- }
- (*stringPtr)++; /* skip `K` or `M` */
- if (**stringPtr=='i') (*stringPtr)++;
- if (**stringPtr=='B') (*stringPtr)++;
- }
- return result;
-}
-
-unsigned longCommandWArg(const char** stringPtr, const char* longCommand){
- size_t const comSize = strlen(longCommand);
- int const result = !strncmp(*stringPtr, longCommand, comSize);
- if (result) *stringPtr += comSize;
- return result;
-}
-
-
-/* ********************************************************
-* File related operations
-**********************************************************/
-/** loadFiles() :
- * load samples from files listed in fileNamesTable into buffer.
- * works even if buffer is too small to load all samples.
- * Also provides the size of each sample into sampleSizes table
- * which must be sized correctly, using DiB_fileStats().
- * @return : nb of samples effectively loaded into `buffer`
- * *bufferSizePtr is modified, it provides the amount data loaded within buffer.
- * sampleSizes is filled with the size of each sample.
- */
-static unsigned loadFiles(void* buffer, size_t* bufferSizePtr, size_t* sampleSizes,
- unsigned sstSize, const char** fileNamesTable, unsigned nbFiles,
- size_t targetChunkSize, unsigned displayLevel) {
- char* const buff = (char*)buffer;
- size_t pos = 0;
- unsigned nbLoadedChunks = 0, fileIndex;
-
- for (fileIndex=0; fileIndex<nbFiles; fileIndex++) {
- const char* const fileName = fileNamesTable[fileIndex];
- unsigned long long const fs64 = UTIL_getFileSize(fileName);
- unsigned long long remainingToLoad = (fs64 == UTIL_FILESIZE_UNKNOWN) ? 0 : fs64;
- U32 const nbChunks = targetChunkSize ? (U32)((fs64 + (targetChunkSize-1)) / targetChunkSize) : 1;
- U64 const chunkSize = targetChunkSize ? MIN(targetChunkSize, fs64) : fs64;
- size_t const maxChunkSize = (size_t)MIN(chunkSize, SAMPLESIZE_MAX);
- U32 cnb;
- FILE* const f = fopen(fileName, "rb");
- if (f==NULL) EXM_THROW(10, "zstd: dictBuilder: %s %s ", fileName, strerror(errno));
- DISPLAYUPDATE(2, "Loading %s... \r", fileName);
- for (cnb=0; cnb<nbChunks; cnb++) {
- size_t const toLoad = (size_t)MIN(maxChunkSize, remainingToLoad);
- if (toLoad > *bufferSizePtr-pos) break;
- { size_t const readSize = fread(buff+pos, 1, toLoad, f);
- if (readSize != toLoad) EXM_THROW(11, "Pb reading %s", fileName);
- pos += readSize;
- sampleSizes[nbLoadedChunks++] = toLoad;
- remainingToLoad -= targetChunkSize;
- if (nbLoadedChunks == sstSize) { /* no more space left in sampleSizes table */
- fileIndex = nbFiles; /* stop there */
- break;
- }
- if (toLoad < targetChunkSize) {
- fseek(f, (long)(targetChunkSize - toLoad), SEEK_CUR);
- } } }
- fclose(f);
- }
- DISPLAYLEVEL(2, "\r%79s\r", "");
- *bufferSizePtr = pos;
- DISPLAYLEVEL(4, "loaded : %u KB \n", (U32)(pos >> 10))
- return nbLoadedChunks;
-}
-
-#define rotl32(x,r) ((x << r) | (x >> (32 - r)))
-static U32 getRand(U32* src)
-{
- static const U32 prime1 = 2654435761U;
- static const U32 prime2 = 2246822519U;
- U32 rand32 = *src;
- rand32 *= prime1;
- rand32 ^= prime2;
- rand32 = rotl32(rand32, 13);
- *src = rand32;
- return rand32 >> 5;
-}
-
-/* shuffle() :
- * shuffle a table of file names in a semi-random way
- * It improves dictionary quality by reducing "locality" impact, so if sample set is very large,
- * it will load random elements from it, instead of just the first ones. */
-static void shuffle(const char** fileNamesTable, unsigned nbFiles) {
- U32 seed = 0xFD2FB528;
- unsigned i;
- for (i = nbFiles - 1; i > 0; --i) {
- unsigned const j = getRand(&seed) % (i + 1);
- const char* const tmp = fileNamesTable[j];
- fileNamesTable[j] = fileNamesTable[i];
- fileNamesTable[i] = tmp;
- }
-}
-
-
-/*-********************************************************
-* Dictionary training functions
-**********************************************************/
-size_t findMaxMem(unsigned long long requiredMem) {
- size_t const step = 8 MB;
- void* testmem = NULL;
-
- requiredMem = (((requiredMem >> 23) + 1) << 23);
- requiredMem += step;
- if (requiredMem > g_maxMemory) requiredMem = g_maxMemory;
-
- while (!testmem) {
- testmem = malloc((size_t)requiredMem);
- requiredMem -= step;
- }
-
- free(testmem);
- return (size_t)requiredMem;
-}
-
-void saveDict(const char* dictFileName,
- const void* buff, size_t buffSize) {
- FILE* const f = fopen(dictFileName, "wb");
- if (f==NULL) EXM_THROW(3, "cannot open %s ", dictFileName);
-
- { size_t const n = fwrite(buff, 1, buffSize, f);
- if (n!=buffSize) EXM_THROW(4, "%s : write error", dictFileName) }
-
- { size_t const n = (size_t)fclose(f);
- if (n!=0) EXM_THROW(5, "%s : flush error", dictFileName) }
-}
-
-/*! getFileStats() :
- * Given a list of files, and a chunkSize (0 == no chunk, whole files)
- * provides the amount of data to be loaded and the resulting nb of samples.
- * This is useful primarily for allocation purpose => sample buffer, and sample sizes table.
- */
-static fileStats getFileStats(const char** fileNamesTable, unsigned nbFiles,
- size_t chunkSize, unsigned displayLevel) {
- fileStats fs;
- unsigned n;
- memset(&fs, 0, sizeof(fs));
- for (n=0; n<nbFiles; n++) {
- U64 const fileSize = UTIL_getFileSize(fileNamesTable[n]);
- U64 const srcSize = (fileSize == UTIL_FILESIZE_UNKNOWN) ? 0 : fileSize;
- U32 const nbSamples = (U32)(chunkSize ? (srcSize + (chunkSize-1)) / chunkSize : 1);
- U64 const chunkToLoad = chunkSize ? MIN(chunkSize, srcSize) : srcSize;
- size_t const cappedChunkSize = (size_t)MIN(chunkToLoad, SAMPLESIZE_MAX);
- fs.totalSizeToLoad += cappedChunkSize * nbSamples;
- fs.oneSampleTooLarge |= (chunkSize > 2*SAMPLESIZE_MAX);
- fs.nbSamples += nbSamples;
- }
- DISPLAYLEVEL(4, "Preparing to load : %u KB \n", (U32)(fs.totalSizeToLoad >> 10));
- return fs;
-}
-
-
-
-
-sampleInfo* getSampleInfo(const char** fileNamesTable, unsigned nbFiles, size_t chunkSize,
- unsigned maxDictSize, const unsigned displayLevel) {
- fileStats const fs = getFileStats(fileNamesTable, nbFiles, chunkSize, displayLevel);
- size_t* const sampleSizes = (size_t*)malloc(fs.nbSamples * sizeof(size_t));
- size_t const memMult = RANDOM_MEMMULT;
- size_t const maxMem = findMaxMem(fs.totalSizeToLoad * memMult) / memMult;
- size_t loadedSize = (size_t) MIN ((unsigned long long)maxMem, fs.totalSizeToLoad);
- void* const srcBuffer = malloc(loadedSize+NOISELENGTH);
-
- /* Checks */
- if ((!sampleSizes) || (!srcBuffer))
- EXM_THROW(12, "not enough memory for trainFromFiles"); /* should not happen */
- if (fs.oneSampleTooLarge) {
- DISPLAYLEVEL(2, "! Warning : some sample(s) are very large \n");
- DISPLAYLEVEL(2, "! Note that dictionary is only useful for small samples. \n");
- DISPLAYLEVEL(2, "! As a consequence, only the first %u bytes of each sample are loaded \n", SAMPLESIZE_MAX);
- }
- if (fs.nbSamples < 5) {
- DISPLAYLEVEL(2, "! Warning : nb of samples too low for proper processing ! \n");
- DISPLAYLEVEL(2, "! Please provide _one file per sample_. \n");
- DISPLAYLEVEL(2, "! Alternatively, split files into fixed-size blocks representative of samples, with -B# \n");
- EXM_THROW(14, "nb of samples too low"); /* we now clearly forbid this case */
- }
- if (fs.totalSizeToLoad < (unsigned long long)(8 * maxDictSize)) {
- DISPLAYLEVEL(2, "! Warning : data size of samples too small for target dictionary size \n");
- DISPLAYLEVEL(2, "! Samples should be about 100x larger than target dictionary size \n");
- }
-
- /* init */
- if (loadedSize < fs.totalSizeToLoad)
- DISPLAYLEVEL(1, "Not enough memory; training on %u MB only...\n", (unsigned)(loadedSize >> 20));
-
- /* Load input buffer */
- DISPLAYLEVEL(3, "Shuffling input files\n");
- shuffle(fileNamesTable, nbFiles);
- nbFiles = loadFiles(srcBuffer, &loadedSize, sampleSizes, fs.nbSamples,
- fileNamesTable, nbFiles, chunkSize, displayLevel);
-
- sampleInfo *info = (sampleInfo *)malloc(sizeof(sampleInfo));
-
- info->nbSamples = fs.nbSamples;
- info->samplesSizes = sampleSizes;
- info->srcBuffer = srcBuffer;
-
- return info;
-}
-
-
-void freeSampleInfo(sampleInfo *info) {
- if (!info) return;
- if (info->samplesSizes) free((void*)(info->samplesSizes));
- if (info->srcBuffer) free((void*)(info->srcBuffer));
- free(info);
-}
diff --git a/contrib/experimental_dict_builders/randomDictBuilder/io.h b/contrib/experimental_dict_builders/randomDictBuilder/io.h
deleted file mode 100644
index 0ee24604eed2..000000000000
--- a/contrib/experimental_dict_builders/randomDictBuilder/io.h
+++ /dev/null
@@ -1,60 +0,0 @@
-#include <stdio.h> /* fprintf */
-#include <stdlib.h> /* malloc, free, qsort */
-#include <string.h> /* strcmp, strlen */
-#include <errno.h> /* errno */
-#include <ctype.h>
-#include "zstd_internal.h" /* includes zstd.h */
-#include "fileio.h" /* stdinmark, stdoutmark, ZSTD_EXTENSION */
-#include "platform.h" /* Large Files support */
-#include "util.h"
-#include "zdict.h"
-
-
-/*-*************************************
-* Structs
-***************************************/
-typedef struct {
- U64 totalSizeToLoad;
- unsigned oneSampleTooLarge;
- unsigned nbSamples;
-} fileStats;
-
-typedef struct {
- const void* srcBuffer;
- const size_t *samplesSizes;
- size_t nbSamples;
-}sampleInfo;
-
-
-
-/*! getSampleInfo():
- * Load from input files and add samples to buffer
- * @return: a sampleInfo struct containing infomation about buffer where samples are stored,
- * size of each sample, and total number of samples
- */
-sampleInfo* getSampleInfo(const char** fileNamesTable, unsigned nbFiles, size_t chunkSize,
- unsigned maxDictSize, const unsigned displayLevel);
-
-
-
-/*! freeSampleInfo():
- * Free memory allocated for info
- */
-void freeSampleInfo(sampleInfo *info);
-
-
-
-/*! saveDict():
- * Save data stored on buff to dictFileName
- */
-void saveDict(const char* dictFileName, const void* buff, size_t buffSize);
-
-
-unsigned readU32FromChar(const char** stringPtr);
-
-/** longCommandWArg() :
- * check if *stringPtr is the same as longCommand.
- * If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
- * @return 0 and doesn't modify *stringPtr otherwise.
- */
-unsigned longCommandWArg(const char** stringPtr, const char* longCommand);
diff --git a/contrib/experimental_dict_builders/randomDictBuilder/main.c b/contrib/experimental_dict_builders/randomDictBuilder/main.c
deleted file mode 100644
index 3ad885746090..000000000000
--- a/contrib/experimental_dict_builders/randomDictBuilder/main.c
+++ /dev/null
@@ -1,161 +0,0 @@
-#include <stdio.h> /* fprintf */
-#include <stdlib.h> /* malloc, free, qsort */
-#include <string.h> /* strcmp, strlen */
-#include <errno.h> /* errno */
-#include <ctype.h>
-#include "random.h"
-#include "io.h"
-#include "util.h"
-#include "zdict.h"
-
-
-/*-*************************************
-* Console display
-***************************************/
-#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
-#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
-
-static const U64 g_refreshRate = SEC_TO_MICRO / 6;
-static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
-
-#define DISPLAYUPDATE(l, ...) { if (displayLevel>=l) { \
- if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (displayLevel>=4)) \
- { g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
- if (displayLevel>=4) fflush(stderr); } } }
-
-
-/*-*************************************
-* Exceptions
-***************************************/
-#ifndef DEBUG
-# define DEBUG 0
-#endif
-#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
-#define EXM_THROW(error, ...) \
-{ \
- DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
- DISPLAY("Error %i : ", error); \
- DISPLAY(__VA_ARGS__); \
- DISPLAY("\n"); \
- exit(error); \
-}
-
-
-/*-*************************************
-* Constants
-***************************************/
-static const unsigned g_defaultMaxDictSize = 110 KB;
-#define DEFAULT_CLEVEL 3
-#define DEFAULT_k 200
-#define DEFAULT_OUTPUTFILE "defaultDict"
-#define DEFAULT_DICTID 0
-
-
-
-/*-*************************************
-* RANDOM
-***************************************/
-int RANDOM_trainFromFiles(const char* dictFileName, sampleInfo *info,
- unsigned maxDictSize,
- ZDICT_random_params_t *params) {
- unsigned const displayLevel = params->zParams.notificationLevel;
- void* const dictBuffer = malloc(maxDictSize);
-
- int result = 0;
-
- /* Checks */
- if (!dictBuffer)
- EXM_THROW(12, "not enough memory for trainFromFiles"); /* should not happen */
-
- { size_t dictSize;
- dictSize = ZDICT_trainFromBuffer_random(dictBuffer, maxDictSize, info->srcBuffer,
- info->samplesSizes, info->nbSamples, *params);
- DISPLAYLEVEL(2, "k=%u\n", params->k);
- if (ZDICT_isError(dictSize)) {
- DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize)); /* should not happen */
- result = 1;
- goto _done;
- }
- /* save dict */
- DISPLAYLEVEL(2, "Save dictionary of size %u into file %s \n", (U32)dictSize, dictFileName);
- saveDict(dictFileName, dictBuffer, dictSize);
- }
-
- /* clean up */
-_done:
- free(dictBuffer);
- return result;
-}
-
-
-
-int main(int argCount, const char* argv[])
-{
- int displayLevel = 2;
- const char* programName = argv[0];
- int operationResult = 0;
-
- /* Initialize arguments to default values */
- unsigned k = DEFAULT_k;
- const char* outputFile = DEFAULT_OUTPUTFILE;
- unsigned dictID = DEFAULT_DICTID;
- unsigned maxDictSize = g_defaultMaxDictSize;
-
- /* Initialize table to store input files */
- const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*));
- unsigned filenameIdx = 0;
-
- /* Parse arguments */
- for (int i = 1; i < argCount; i++) {
- const char* argument = argv[i];
- if (longCommandWArg(&argument, "k=")) { k = readU32FromChar(&argument); continue; }
- if (longCommandWArg(&argument, "dictID=")) { dictID = readU32FromChar(&argument); continue; }
- if (longCommandWArg(&argument, "maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; }
- if (longCommandWArg(&argument, "in=")) {
- filenameTable[filenameIdx] = argument;
- filenameIdx++;
- continue;
- }
- if (longCommandWArg(&argument, "out=")) {
- outputFile = argument;
- continue;
- }
- DISPLAYLEVEL(1, "Incorrect parameters\n");
- operationResult = 1;
- return operationResult;
- }
-
- char* fileNamesBuf = NULL;
- unsigned fileNamesNb = filenameIdx;
- int followLinks = 0; /* follow directory recursively */
- const char** extendedFileList = NULL;
- extendedFileList = UTIL_createFileList(filenameTable, filenameIdx, &fileNamesBuf,
- &fileNamesNb, followLinks);
- if (extendedFileList) {
- unsigned u;
- for (u=0; u<fileNamesNb; u++) DISPLAYLEVEL(4, "%u %s\n", u, extendedFileList[u]);
- free((void*)filenameTable);
- filenameTable = extendedFileList;
- filenameIdx = fileNamesNb;
- }
-
- size_t blockSize = 0;
-
- ZDICT_random_params_t params;
- ZDICT_params_t zParams;
- zParams.compressionLevel = DEFAULT_CLEVEL;
- zParams.notificationLevel = displayLevel;
- zParams.dictID = dictID;
- params.zParams = zParams;
- params.k = k;
-
- sampleInfo* info = getSampleInfo(filenameTable,
- filenameIdx, blockSize, maxDictSize, zParams.notificationLevel);
- operationResult = RANDOM_trainFromFiles(outputFile, info, maxDictSize, &params);
-
- /* Free allocated memory */
- UTIL_freeFileList(extendedFileList, fileNamesBuf);
- freeSampleInfo(info);
-
- return operationResult;
-}
diff --git a/contrib/experimental_dict_builders/randomDictBuilder/random.c b/contrib/experimental_dict_builders/randomDictBuilder/random.c
deleted file mode 100644
index 5276bea96a56..000000000000
--- a/contrib/experimental_dict_builders/randomDictBuilder/random.c
+++ /dev/null
@@ -1,163 +0,0 @@
-/*-*************************************
-* Dependencies
-***************************************/
-#include <stdio.h> /* fprintf */
-#include <stdlib.h> /* malloc, free, qsort */
-#include <string.h> /* memset */
-#include <time.h> /* clock */
-#include "random.h"
-#include "util.h" /* UTIL_getFileSize, UTIL_getTotalFileSize */
-#ifndef ZDICT_STATIC_LINKING_ONLY
-#define ZDICT_STATIC_LINKING_ONLY
-#endif
-#include "zdict.h"
-
-/*-*************************************
-* Console display
-***************************************/
-#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
-#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
-
-#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
- if (displayLevel >= l) { \
- if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \
- g_time = clock(); \
- DISPLAY(__VA_ARGS__); \
- } \
- }
-#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(displayLevel, l, __VA_ARGS__)
-static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
-static clock_t g_time = 0;
-
-
-
-/* ********************************************************
-* Random Dictionary Builder
-**********************************************************/
-/**
- * Returns the sum of the sample sizes.
- */
-static size_t RANDOM_sum(const size_t *samplesSizes, unsigned nbSamples) {
- size_t sum = 0;
- unsigned i;
- for (i = 0; i < nbSamples; ++i) {
- sum += samplesSizes[i];
- }
- return sum;
-}
-
-
-/**
- * A segment is an inclusive range in the source.
- */
-typedef struct {
- U32 begin;
- U32 end;
-} RANDOM_segment_t;
-
-
-/**
- * Selects a random segment from totalSamplesSize - k + 1 possible segments
- */
-static RANDOM_segment_t RANDOM_selectSegment(const size_t totalSamplesSize,
- ZDICT_random_params_t parameters) {
- const U32 k = parameters.k;
- RANDOM_segment_t segment;
- unsigned index;
-
- /* Randomly generate a number from 0 to sampleSizes - k */
- index = rand()%(totalSamplesSize - k + 1);
-
- /* inclusive */
- segment.begin = index;
- segment.end = index + k - 1;
-
- return segment;
-}
-
-
-/**
- * Check the validity of the parameters.
- * Returns non-zero if the parameters are valid and 0 otherwise.
- */
-static int RANDOM_checkParameters(ZDICT_random_params_t parameters,
- size_t maxDictSize) {
- /* k is a required parameter */
- if (parameters.k == 0) {
- return 0;
- }
- /* k <= maxDictSize */
- if (parameters.k > maxDictSize) {
- return 0;
- }
- return 1;
-}
-
-
-/**
- * Given the prepared context build the dictionary.
- */
-static size_t RANDOM_buildDictionary(const size_t totalSamplesSize, const BYTE *samples,
- void *dictBuffer, size_t dictBufferCapacity,
- ZDICT_random_params_t parameters) {
- BYTE *const dict = (BYTE *)dictBuffer;
- size_t tail = dictBufferCapacity;
- const int displayLevel = parameters.zParams.notificationLevel;
- while (tail > 0) {
-
- /* Select a segment */
- RANDOM_segment_t segment = RANDOM_selectSegment(totalSamplesSize, parameters);
-
- size_t segmentSize;
- segmentSize = MIN(segment.end - segment.begin + 1, tail);
-
- tail -= segmentSize;
- memcpy(dict + tail, samples + segment.begin, segmentSize);
- DISPLAYUPDATE(
- 2, "\r%u%% ",
- (U32)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
- }
-
- return tail;
-}
-
-
-
-
-ZDICTLIB_API size_t ZDICT_trainFromBuffer_random(
- void *dictBuffer, size_t dictBufferCapacity,
- const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
- ZDICT_random_params_t parameters) {
- const int displayLevel = parameters.zParams.notificationLevel;
- BYTE* const dict = (BYTE*)dictBuffer;
- /* Checks */
- if (!RANDOM_checkParameters(parameters, dictBufferCapacity)) {
- DISPLAYLEVEL(1, "k is incorrect\n");
- return ERROR(GENERIC);
- }
- if (nbSamples == 0) {
- DISPLAYLEVEL(1, "Random must have at least one input file\n");
- return ERROR(GENERIC);
- }
- if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
- DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
- ZDICT_DICTSIZE_MIN);
- return ERROR(dstSize_tooSmall);
- }
- const size_t totalSamplesSize = RANDOM_sum(samplesSizes, nbSamples);
- const BYTE *const samples = (const BYTE *)samplesBuffer;
-
- DISPLAYLEVEL(2, "Building dictionary\n");
- {
- const size_t tail = RANDOM_buildDictionary(totalSamplesSize, samples,
- dictBuffer, dictBufferCapacity, parameters);
- const size_t dictSize = ZDICT_finalizeDictionary(
- dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
- samplesBuffer, samplesSizes, nbSamples, parameters.zParams);
- if (!ZSTD_isError(dictSize)) {
- DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
- (U32)dictSize);
- }
- return dictSize;
- }
-}
diff --git a/contrib/experimental_dict_builders/randomDictBuilder/random.h b/contrib/experimental_dict_builders/randomDictBuilder/random.h
deleted file mode 100644
index 352775f950c4..000000000000
--- a/contrib/experimental_dict_builders/randomDictBuilder/random.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#include <stdio.h> /* fprintf */
-#include <stdlib.h> /* malloc, free, qsort */
-#include <string.h> /* memset */
-#include <time.h> /* clock */
-#include "zstd_internal.h" /* includes zstd.h */
-#ifndef ZDICT_STATIC_LINKING_ONLY
-#define ZDICT_STATIC_LINKING_ONLY
-#endif
-#include "zdict.h"
-
-
-
-typedef struct {
- unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+]; Default to 200 */
- ZDICT_params_t zParams;
-} ZDICT_random_params_t;
-
-
-/*! ZDICT_trainFromBuffer_random():
- * Train a dictionary from an array of samples.
- * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
- * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
- * The resulting dictionary will be saved into `dictBuffer`.
- * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
- * or an error code, which can be tested with ZDICT_isError().
- */
-ZDICTLIB_API size_t ZDICT_trainFromBuffer_random( void *dictBuffer, size_t dictBufferCapacity,
- const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
- ZDICT_random_params_t parameters);
diff --git a/contrib/experimental_dict_builders/randomDictBuilder/test.sh b/contrib/experimental_dict_builders/randomDictBuilder/test.sh
deleted file mode 100755
index 1eb732e52a09..000000000000
--- a/contrib/experimental_dict_builders/randomDictBuilder/test.sh
+++ /dev/null
@@ -1,14 +0,0 @@
-echo "Building random dictionary with in=../../lib/common k=200 out=dict1"
-./main in=../../../lib/common k=200 out=dict1
-zstd -be3 -D dict1 -r ../../../lib/common -q
-echo "Building random dictionary with in=../../lib/common k=500 out=dict2 dictID=100 maxdict=140000"
-./main in=../../../lib/common k=500 out=dict2 dictID=100 maxdict=140000
-zstd -be3 -D dict2 -r ../../../lib/common -q
-echo "Building random dictionary with 2 sample sources"
-./main in=../../../lib/common in=../../../lib/compress out=dict3
-zstd -be3 -D dict3 -r ../../../lib/common -q
-echo "Removing dict1 dict2 dict3"
-rm -f dict1 dict2 dict3
-
-echo "Testing with invalid parameters, should fail"
-! ./main r=10
diff --git a/contrib/gen_html/Makefile b/contrib/gen_html/Makefile
deleted file mode 100644
index 425f266c4e46..000000000000
--- a/contrib/gen_html/Makefile
+++ /dev/null
@@ -1,51 +0,0 @@
-# ################################################################
-# Copyright (c) 2016-present, Facebook, Inc.
-# All rights reserved.
-#
-# This source code is licensed under both the BSD-style license (found in the
-# LICENSE file in the root directory of this source tree) and the GPLv2 (found
-# in the COPYING file in the root directory of this source tree).
-# ################################################################
-
-CXXFLAGS ?= -O3
-CXXFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wno-comment
-CXXFLAGS += $(MOREFLAGS)
-FLAGS = $(CPPFLAGS) $(CXXFLAGS) $(LDFLAGS)
-
-ZSTDAPI = ../../lib/zstd.h
-ZSTDMANUAL = ../../doc/zstd_manual.html
-LIBVER_MAJOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(ZSTDAPI)`
-LIBVER_MINOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(ZSTDAPI)`
-LIBVER_PATCH_SCRIPT:=`sed -n '/define ZSTD_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(ZSTDAPI)`
-LIBVER_SCRIPT:= $(LIBVER_MAJOR_SCRIPT).$(LIBVER_MINOR_SCRIPT).$(LIBVER_PATCH_SCRIPT)
-LIBVER := $(shell echo $(LIBVER_SCRIPT))
-
-
-# Define *.exe as extension for Windows systems
-ifneq (,$(filter Windows%,$(OS)))
-EXT =.exe
-else
-EXT =
-endif
-
-
-.PHONY: default
-default: gen_html
-
-.PHONY: all
-all: manual
-
-gen_html: gen_html.cpp
- $(CXX) $(FLAGS) $^ -o $@$(EXT)
-
-$(ZSTDMANUAL): gen_html $(ZSTDAPI)
- echo "Update zstd manual in /doc"
- ./gen_html $(LIBVER) $(ZSTDAPI) $(ZSTDMANUAL)
-
-.PHONY: manual
-manual: gen_html $(ZSTDMANUAL)
-
-.PHONY: clean
-clean:
- @$(RM) gen_html$(EXT)
- @echo Cleaning completed
diff --git a/contrib/gen_html/README.md b/contrib/gen_html/README.md
deleted file mode 100644
index 63a4caa25061..000000000000
--- a/contrib/gen_html/README.md
+++ /dev/null
@@ -1,31 +0,0 @@
-gen_html - a program for automatic generation of zstd manual
-============================================================
-
-#### Introduction
-
-This simple C++ program generates a single-page HTML manual from `zstd.h`.
-
-The format of recognized comment blocks is following:
-- comments of type `/*!` mean: this is a function declaration; switch comments with declarations
-- comments of type `/**` and `/*-` mean: this is a comment; use a `<H2>` header for the first line
-- comments of type `/*=` and `/**=` mean: use a `<H3>` header and show also all functions until first empty line
-- comments of type `/*X` where `X` is different from above-mentioned are ignored
-
-Moreover:
-- `ZSTDLIB_API` is removed to improve readability
-- `typedef` are detected and included even if uncommented
-- comments of type `/**<` and `/*!<` are detected and only function declaration is highlighted (bold)
-
-
-#### Usage
-
-The program requires 3 parameters:
-```
-gen_html [zstd_version] [input_file] [output_html]
-```
-
-To compile program and generate zstd manual we have used:
-```
-make
-./gen_html.exe 1.1.1 ../../lib/zstd.h zstd_manual.html
-```
diff --git a/contrib/gen_html/gen-zstd-manual.sh b/contrib/gen_html/gen-zstd-manual.sh
deleted file mode 100755
index 57a8b6ea512a..000000000000
--- a/contrib/gen_html/gen-zstd-manual.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/sh
-
-LIBVER_MAJOR_SCRIPT=`sed -n '/define ZSTD_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ../../lib/zstd.h`
-LIBVER_MINOR_SCRIPT=`sed -n '/define ZSTD_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ../../lib/zstd.h`
-LIBVER_PATCH_SCRIPT=`sed -n '/define ZSTD_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ../../lib/zstd.h`
-LIBVER_SCRIPT=$LIBVER_MAJOR_SCRIPT.$LIBVER_MINOR_SCRIPT.$LIBVER_PATCH_SCRIPT
-
-echo ZSTD_VERSION=$LIBVER_SCRIPT
-./gen_html $LIBVER_SCRIPT ../../lib/zstd.h ./zstd_manual.html
diff --git a/contrib/gen_html/gen_html.cpp b/contrib/gen_html/gen_html.cpp
deleted file mode 100644
index 90d5b21a3aa6..000000000000
--- a/contrib/gen_html/gen_html.cpp
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- * Copyright (c) 2016-present, Przemyslaw Skibinski, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- */
-
-#include <iostream>
-#include <fstream>
-#include <sstream>
-#include <vector>
-using namespace std;
-
-
-/* trim string at the beginning and at the end */
-void trim(string& s, string characters)
-{
- size_t p = s.find_first_not_of(characters);
- s.erase(0, p);
-
- p = s.find_last_not_of(characters);
- if (string::npos != p)
- s.erase(p+1);
-}
-
-
-/* trim C++ style comments */
-void trim_comments(string &s)
-{
- size_t spos, epos;
-
- spos = s.find("/*");
- epos = s.find("*/");
- s = s.substr(spos+3, epos-(spos+3));
-}
-
-
-/* get lines until a given terminator */
-vector<string> get_lines(vector<string>& input, int& linenum, string terminator)
-{
- vector<string> out;
- string line;
- size_t epos;
-
- while ((size_t)linenum < input.size()) {
- line = input[linenum];
-
- if (terminator.empty() && line.empty()) { linenum--; break; }
-
- epos = line.find(terminator);
- if (!terminator.empty() && epos!=string::npos) {
- out.push_back(line);
- break;
- }
- out.push_back(line);
- linenum++;
- }
- return out;
-}
-
-
-/* print line with ZSTDLIB_API removed and C++ comments not bold */
-void print_line(stringstream &sout, string line)
-{
- size_t spos;
-
- if (line.substr(0,12) == "ZSTDLIB_API ") line = line.substr(12);
- spos = line.find("/*");
- if (spos!=string::npos) {
- sout << line.substr(0, spos);
- sout << "</b>" << line.substr(spos) << "<b>" << endl;
- } else {
- // fprintf(stderr, "lines=%s\n", line.c_str());
- sout << line << endl;
- }
-}
-
-
-int main(int argc, char *argv[]) {
- char exclam;
- int linenum, chapter = 1;
- vector<string> input, lines, comments, chapters;
- string line, version;
- size_t spos, l;
- stringstream sout;
- ifstream istream;
- ofstream ostream;
-
- if (argc < 4) {
- cout << "usage: " << argv[0] << " [zstd_version] [input_file] [output_html]" << endl;
- return 1;
- }
-
- version = "zstd " + string(argv[1]) + " Manual";
-
- istream.open(argv[2], ifstream::in);
- if (!istream.is_open()) {
- cout << "Error opening file " << argv[2] << endl;
- return 1;
- }
-
- ostream.open(argv[3], ifstream::out);
- if (!ostream.is_open()) {
- cout << "Error opening file " << argv[3] << endl;
- return 1;
- }
-
- while (getline(istream, line)) {
- input.push_back(line);
- }
-
- for (linenum=0; (size_t)linenum < input.size(); linenum++) {
- line = input[linenum];
-
- /* typedefs are detected and included even if uncommented */
- if (line.substr(0,7) == "typedef" && line.find("{")!=string::npos) {
- lines = get_lines(input, linenum, "}");
- sout << "<pre><b>";
- for (l=0; l<lines.size(); l++) {
- print_line(sout, lines[l]);
- }
- sout << "</b></pre><BR>" << endl;
- continue;
- }
-
- /* comments of type /**< and /*!< are detected and only function declaration is highlighted (bold) */
- if ((line.find("/**<")!=string::npos || line.find("/*!<")!=string::npos) && line.find("*/")!=string::npos) {
- sout << "<pre><b>";
- print_line(sout, line);
- sout << "</b></pre><BR>" << endl;
- continue;
- }
-
- spos = line.find("/**=");
- if (spos==string::npos) {
- spos = line.find("/*!");
- if (spos==string::npos)
- spos = line.find("/**");
- if (spos==string::npos)
- spos = line.find("/*-");
- if (spos==string::npos)
- spos = line.find("/*=");
- if (spos==string::npos)
- continue;
- exclam = line[spos+2];
- }
- else exclam = '=';
-
- comments = get_lines(input, linenum, "*/");
- if (!comments.empty()) comments[0] = line.substr(spos+3);
- if (!comments.empty()) comments[comments.size()-1] = comments[comments.size()-1].substr(0, comments[comments.size()-1].find("*/"));
- for (l=0; l<comments.size(); l++) {
- if (comments[l].find(" *")==0) comments[l] = comments[l].substr(2);
- else if (comments[l].find(" *")==0) comments[l] = comments[l].substr(3);
- trim(comments[l], "*-=");
- }
- while (!comments.empty() && comments[comments.size()-1].empty()) comments.pop_back(); // remove empty line at the end
- while (!comments.empty() && comments[0].empty()) comments.erase(comments.begin()); // remove empty line at the start
-
- /* comments of type /*! mean: this is a function declaration; switch comments with declarations */
- if (exclam == '!') {
- if (!comments.empty()) comments.erase(comments.begin()); /* remove first line like "ZSTD_XXX() :" */
- linenum++;
- lines = get_lines(input, linenum, "");
-
- sout << "<pre><b>";
- for (l=0; l<lines.size(); l++) {
- // fprintf(stderr, "line[%d]=%s\n", l, lines[l].c_str());
- string fline = lines[l];
- if (fline.substr(0, 12) == "ZSTDLIB_API " ||
- fline.substr(0, 12) == string(12, ' '))
- fline = fline.substr(12);
- print_line(sout, fline);
- }
- sout << "</b><p>";
- for (l=0; l<comments.size(); l++) {
- print_line(sout, comments[l]);
- }
- sout << "</p></pre><BR>" << endl << endl;
- } else if (exclam == '=') { /* comments of type /*= and /**= mean: use a <H3> header and show also all functions until first empty line */
- trim(comments[0], " ");
- sout << "<h3>" << comments[0] << "</h3><pre>";
- for (l=1; l<comments.size(); l++) {
- print_line(sout, comments[l]);
- }
- sout << "</pre><b><pre>";
- lines = get_lines(input, ++linenum, "");
- for (l=0; l<lines.size(); l++) {
- print_line(sout, lines[l]);
- }
- sout << "</pre></b><BR>" << endl;
- } else { /* comments of type /** and /*- mean: this is a comment; use a <H2> header for the first line */
- if (comments.empty()) continue;
-
- trim(comments[0], " ");
- sout << "<a name=\"Chapter" << chapter << "\"></a><h2>" << comments[0] << "</h2><pre>";
- chapters.push_back(comments[0]);
- chapter++;
-
- for (l=1; l<comments.size(); l++) {
- print_line(sout, comments[l]);
- }
- if (comments.size() > 1)
- sout << "<BR></pre>" << endl << endl;
- else
- sout << "</pre>" << endl << endl;
- }
- }
-
- ostream << "<html>\n<head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-1\">\n<title>" << version << "</title>\n</head>\n<body>" << endl;
- ostream << "<h1>" << version << "</h1>\n";
-
- ostream << "<hr>\n<a name=\"Contents\"></a><h2>Contents</h2>\n<ol>\n";
- for (size_t i=0; i<chapters.size(); i++)
- ostream << "<li><a href=\"#Chapter" << i+1 << "\">" << chapters[i].c_str() << "</a></li>\n";
- ostream << "</ol>\n<hr>\n";
-
- ostream << sout.str();
- ostream << "</html>" << endl << "</body>" << endl;
-
- return 0;
-}
diff --git a/contrib/largeNbDicts/Makefile b/contrib/largeNbDicts/Makefile
deleted file mode 100644
index 4c055b0ed3fe..000000000000
--- a/contrib/largeNbDicts/Makefile
+++ /dev/null
@@ -1,58 +0,0 @@
-# ################################################################
-# Copyright (c) 2018-present, Yann Collet, Facebook, Inc.
-# All rights reserved.
-#
-# This source code is licensed under both the BSD-style license (found in the
-# LICENSE file in the root directory of this source tree) and the GPLv2 (found
-# in the COPYING file in the root directory of this source tree).
-# ################################################################
-
-PROGDIR = ../../programs
-LIBDIR = ../../lib
-
-LIBZSTD = $(LIBDIR)/libzstd.a
-
-CPPFLAGS+= -I$(LIBDIR) -I$(LIBDIR)/common -I$(LIBDIR)/dictBuilder -I$(PROGDIR)
-
-CFLAGS ?= -O3
-CFLAGS += -std=gnu99
-DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
- -Wstrict-aliasing=1 -Wswitch-enum \
- -Wstrict-prototypes -Wundef -Wpointer-arith \
- -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
- -Wredundant-decls
-CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS)
-
-
-default: largeNbDicts
-
-all : largeNbDicts
-
-largeNbDicts: util.o timefn.o benchfn.o datagen.o xxhash.o largeNbDicts.c $(LIBZSTD)
- $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
-
-.PHONY: $(LIBZSTD)
-$(LIBZSTD):
- $(MAKE) -C $(LIBDIR) libzstd.a CFLAGS="$(CFLAGS)"
-
-benchfn.o: $(PROGDIR)/benchfn.c
- $(CC) $(CPPFLAGS) $(CFLAGS) $^ -c
-
-timefn.o: $(PROGDIR)/timefn.c
- $(CC) $(CPPFLAGS) $(CFLAGS) $^ -c
-
-datagen.o: $(PROGDIR)/datagen.c
- $(CC) $(CPPFLAGS) $(CFLAGS) $^ -c
-
-util.o: $(PROGDIR)/util.c
- $(CC) $(CPPFLAGS) $(CFLAGS) $^ -c
-
-
-xxhash.o : $(LIBDIR)/common/xxhash.c
- $(CC) $(CPPFLAGS) $(CFLAGS) $^ -c
-
-
-clean:
- $(RM) *.o
- $(MAKE) -C $(LIBDIR) clean > /dev/null
- $(RM) largeNbDicts
diff --git a/contrib/largeNbDicts/README.md b/contrib/largeNbDicts/README.md
deleted file mode 100644
index f29bcdfe8e37..000000000000
--- a/contrib/largeNbDicts/README.md
+++ /dev/null
@@ -1,25 +0,0 @@
-largeNbDicts
-=====================
-
-`largeNbDicts` is a benchmark test tool
-dedicated to the specific scenario of
-dictionary decompression using a very large number of dictionaries.
-When dictionaries are constantly changing, they are always "cold",
-suffering from increased latency due to cache misses.
-
-The tool is created in a bid to investigate performance for this scenario,
-and experiment mitigation techniques.
-
-Command line :
-```
-largeNbDicts [Options] filename(s)
-
-Options :
--r : recursively load all files in subdirectories (default: off)
--B# : split input into blocks of size # (default: no split)
--# : use compression level # (default: 3)
--D # : use # as a dictionary (default: create one)
--i# : nb benchmark rounds (default: 6)
---nbDicts=# : set nb of dictionaries to # (default: one per block)
--h : help (this text)
-```
diff --git a/contrib/largeNbDicts/largeNbDicts.c b/contrib/largeNbDicts/largeNbDicts.c
deleted file mode 100644
index 627a6910576f..000000000000
--- a/contrib/largeNbDicts/largeNbDicts.c
+++ /dev/null
@@ -1,817 +0,0 @@
-/*
- * Copyright (c) 2018-present, Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-/* largeNbDicts
- * This is a benchmark test tool
- * dedicated to the specific case of dictionary decompression
- * using a very large nb of dictionaries
- * thus suffering latency from lots of cache misses.
- * It's created in a bid to investigate performance and find optimizations. */
-
-
-/*--- Dependencies ---*/
-
-#include <stddef.h> /* size_t */
-#include <stdlib.h> /* malloc, free, abort */
-#include <stdio.h> /* fprintf */
-#include <limits.h> /* UINT_MAX */
-#include <assert.h> /* assert */
-
-#include "util.h"
-#include "benchfn.h"
-#define ZSTD_STATIC_LINKING_ONLY
-#include "zstd.h"
-#include "zdict.h"
-
-
-/*--- Constants --- */
-
-#define KB *(1<<10)
-#define MB *(1<<20)
-
-#define BLOCKSIZE_DEFAULT 0 /* no slicing into blocks */
-#define DICTSIZE (4 KB)
-#define CLEVEL_DEFAULT 3
-
-#define BENCH_TIME_DEFAULT_S 6
-#define RUN_TIME_DEFAULT_MS 1000
-#define BENCH_TIME_DEFAULT_MS (BENCH_TIME_DEFAULT_S * RUN_TIME_DEFAULT_MS)
-
-#define DISPLAY_LEVEL_DEFAULT 3
-
-#define BENCH_SIZE_MAX (1200 MB)
-
-
-/*--- Macros ---*/
-
-#define CONTROL(c) { if (!(c)) abort(); }
-#undef MIN
-#define MIN(a,b) ((a) < (b) ? (a) : (b))
-
-
-/*--- Display Macros ---*/
-
-#define DISPLAY(...) fprintf(stdout, __VA_ARGS__)
-#define DISPLAYLEVEL(l, ...) { if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } }
-static int g_displayLevel = DISPLAY_LEVEL_DEFAULT; /* 0 : no display, 1: errors, 2 : + result + interaction + warnings, 3 : + progression, 4 : + information */
-
-
-/*--- buffer_t ---*/
-
-typedef struct {
- void* ptr;
- size_t size;
- size_t capacity;
-} buffer_t;
-
-static const buffer_t kBuffNull = { NULL, 0, 0 };
-
-/* @return : kBuffNull if any error */
-static buffer_t createBuffer(size_t capacity)
-{
- assert(capacity > 0);
- void* const ptr = malloc(capacity);
- if (ptr==NULL) return kBuffNull;
-
- buffer_t buffer;
- buffer.ptr = ptr;
- buffer.capacity = capacity;
- buffer.size = 0;
- return buffer;
-}
-
-static void freeBuffer(buffer_t buff)
-{
- free(buff.ptr);
-}
-
-
-static void fillBuffer_fromHandle(buffer_t* buff, FILE* f)
-{
- size_t const readSize = fread(buff->ptr, 1, buff->capacity, f);
- buff->size = readSize;
-}
-
-
-/* @return : kBuffNull if any error */
-static buffer_t createBuffer_fromFile(const char* fileName)
-{
- U64 const fileSize = UTIL_getFileSize(fileName);
- size_t const bufferSize = (size_t) fileSize;
-
- if (fileSize == UTIL_FILESIZE_UNKNOWN) return kBuffNull;
- assert((U64)bufferSize == fileSize); /* check overflow */
-
- { FILE* const f = fopen(fileName, "rb");
- if (f == NULL) return kBuffNull;
-
- buffer_t buff = createBuffer(bufferSize);
- CONTROL(buff.ptr != NULL);
-
- fillBuffer_fromHandle(&buff, f);
- CONTROL(buff.size == buff.capacity);
-
- fclose(f); /* do nothing specific if fclose() fails */
- return buff;
- }
-}
-
-
-/* @return : kBuffNull if any error */
-static buffer_t
-createDictionaryBuffer(const char* dictionaryName,
- const void* srcBuffer,
- const size_t* srcBlockSizes, size_t nbBlocks,
- size_t requestedDictSize)
-{
- if (dictionaryName) {
- DISPLAYLEVEL(3, "loading dictionary %s \n", dictionaryName);
- return createBuffer_fromFile(dictionaryName); /* note : result might be kBuffNull */
-
- } else {
-
- DISPLAYLEVEL(3, "creating dictionary, of target size %u bytes \n",
- (unsigned)requestedDictSize);
- void* const dictBuffer = malloc(requestedDictSize);
- CONTROL(dictBuffer != NULL);
-
- assert(nbBlocks <= UINT_MAX);
- size_t const dictSize = ZDICT_trainFromBuffer(dictBuffer, requestedDictSize,
- srcBuffer,
- srcBlockSizes, (unsigned)nbBlocks);
- CONTROL(!ZSTD_isError(dictSize));
-
- buffer_t result;
- result.ptr = dictBuffer;
- result.capacity = requestedDictSize;
- result.size = dictSize;
- return result;
- }
-}
-
-
-/*! BMK_loadFiles() :
- * Loads `buffer`, with content from files listed within `fileNamesTable`.
- * Fills `buffer` entirely.
- * @return : 0 on success, !=0 on error */
-static int loadFiles(void* buffer, size_t bufferSize,
- size_t* fileSizes,
- const char* const * fileNamesTable, unsigned nbFiles)
-{
- size_t pos = 0, totalSize = 0;
-
- for (unsigned n=0; n<nbFiles; n++) {
- U64 fileSize = UTIL_getFileSize(fileNamesTable[n]);
- if (UTIL_isDirectory(fileNamesTable[n])) {
- fileSizes[n] = 0;
- continue;
- }
- if (fileSize == UTIL_FILESIZE_UNKNOWN) {
- fileSizes[n] = 0;
- continue;
- }
-
- FILE* const f = fopen(fileNamesTable[n], "rb");
- assert(f!=NULL);
-
- assert(pos <= bufferSize);
- assert(fileSize <= bufferSize - pos);
-
- { size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f);
- assert(readSize == fileSize);
- pos += readSize;
- }
- fileSizes[n] = (size_t)fileSize;
- totalSize += (size_t)fileSize;
- fclose(f);
- }
-
- assert(totalSize == bufferSize);
- return 0;
-}
-
-
-
-/*--- slice_collection_t ---*/
-
-typedef struct {
- void** slicePtrs;
- size_t* capacities;
- size_t nbSlices;
-} slice_collection_t;
-
-static const slice_collection_t kNullCollection = { NULL, NULL, 0 };
-
-static void freeSliceCollection(slice_collection_t collection)
-{
- free(collection.slicePtrs);
- free(collection.capacities);
-}
-
-/* shrinkSizes() :
- * downsizes sizes of slices within collection, according to `newSizes`.
- * every `newSizes` entry must be <= than its corresponding collection size */
-void shrinkSizes(slice_collection_t collection,
- const size_t* newSizes) /* presumed same size as collection */
-{
- size_t const nbSlices = collection.nbSlices;
- for (size_t blockNb = 0; blockNb < nbSlices; blockNb++) {
- assert(newSizes[blockNb] <= collection.capacities[blockNb]);
- collection.capacities[blockNb] = newSizes[blockNb];
- }
-}
-
-
-/* splitSlices() :
- * nbSlices : if == 0, nbSlices is automatically determined from srcSlices and blockSize.
- * otherwise, creates exactly nbSlices slices,
- * by either truncating input (when smaller)
- * or repeating input from beginning */
-static slice_collection_t
-splitSlices(slice_collection_t srcSlices, size_t blockSize, size_t nbSlices)
-{
- if (blockSize==0) blockSize = (size_t)(-1); /* means "do not cut" */
- size_t nbSrcBlocks = 0;
- for (size_t ssnb=0; ssnb < srcSlices.nbSlices; ssnb++) {
- size_t pos = 0;
- while (pos <= srcSlices.capacities[ssnb]) {
- nbSrcBlocks++;
- pos += blockSize;
- }
- }
-
- if (nbSlices == 0) nbSlices = nbSrcBlocks;
-
- void** const sliceTable = (void**)malloc(nbSlices * sizeof(*sliceTable));
- size_t* const capacities = (size_t*)malloc(nbSlices * sizeof(*capacities));
- if (sliceTable == NULL || capacities == NULL) {
- free(sliceTable);
- free(capacities);
- return kNullCollection;
- }
-
- size_t ssnb = 0;
- for (size_t sliceNb=0; sliceNb < nbSlices; ) {
- ssnb = (ssnb + 1) % srcSlices.nbSlices;
- size_t pos = 0;
- char* const ptr = (char*)srcSlices.slicePtrs[ssnb];
- while (pos < srcSlices.capacities[ssnb] && sliceNb < nbSlices) {
- size_t const size = MIN(blockSize, srcSlices.capacities[ssnb] - pos);
- sliceTable[sliceNb] = ptr + pos;
- capacities[sliceNb] = size;
- sliceNb++;
- pos += blockSize;
- }
- }
-
- slice_collection_t result;
- result.nbSlices = nbSlices;
- result.slicePtrs = sliceTable;
- result.capacities = capacities;
- return result;
-}
-
-
-static size_t sliceCollection_totalCapacity(slice_collection_t sc)
-{
- size_t totalSize = 0;
- for (size_t n=0; n<sc.nbSlices; n++)
- totalSize += sc.capacities[n];
- return totalSize;
-}
-
-
-/* --- buffer collection --- */
-
-typedef struct {
- buffer_t buffer;
- slice_collection_t slices;
-} buffer_collection_t;
-
-
-static void freeBufferCollection(buffer_collection_t bc)
-{
- freeBuffer(bc.buffer);
- freeSliceCollection(bc.slices);
-}
-
-
-static buffer_collection_t
-createBufferCollection_fromSliceCollectionSizes(slice_collection_t sc)
-{
- size_t const bufferSize = sliceCollection_totalCapacity(sc);
-
- buffer_t buffer = createBuffer(bufferSize);
- CONTROL(buffer.ptr != NULL);
-
- size_t const nbSlices = sc.nbSlices;
- void** const slices = (void**)malloc(nbSlices * sizeof(*slices));
- CONTROL(slices != NULL);
-
- size_t* const capacities = (size_t*)malloc(nbSlices * sizeof(*capacities));
- CONTROL(capacities != NULL);
-
- char* const ptr = (char*)buffer.ptr;
- size_t pos = 0;
- for (size_t n=0; n < nbSlices; n++) {
- capacities[n] = sc.capacities[n];
- slices[n] = ptr + pos;
- pos += capacities[n];
- }
-
- buffer_collection_t result;
- result.buffer = buffer;
- result.slices.nbSlices = nbSlices;
- result.slices.capacities = capacities;
- result.slices.slicePtrs = slices;
- return result;
-}
-
-
-/* @return : kBuffNull if any error */
-static buffer_collection_t
-createBufferCollection_fromFiles(const char* const * fileNamesTable, unsigned nbFiles)
-{
- U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles);
- assert(totalSizeToLoad != UTIL_FILESIZE_UNKNOWN);
- assert(totalSizeToLoad <= BENCH_SIZE_MAX);
- size_t const loadedSize = (size_t)totalSizeToLoad;
- assert(loadedSize > 0);
- void* const srcBuffer = malloc(loadedSize);
- assert(srcBuffer != NULL);
-
- assert(nbFiles > 0);
- size_t* const fileSizes = (size_t*)calloc(nbFiles, sizeof(*fileSizes));
- assert(fileSizes != NULL);
-
- /* Load input buffer */
- int const errorCode = loadFiles(srcBuffer, loadedSize,
- fileSizes,
- fileNamesTable, nbFiles);
- assert(errorCode == 0);
-
- void** sliceTable = (void**)malloc(nbFiles * sizeof(*sliceTable));
- assert(sliceTable != NULL);
-
- char* const ptr = (char*)srcBuffer;
- size_t pos = 0;
- unsigned fileNb = 0;
- for ( ; (pos < loadedSize) && (fileNb < nbFiles); fileNb++) {
- sliceTable[fileNb] = ptr + pos;
- pos += fileSizes[fileNb];
- }
- assert(pos == loadedSize);
- assert(fileNb == nbFiles);
-
-
- buffer_t buffer;
- buffer.ptr = srcBuffer;
- buffer.capacity = loadedSize;
- buffer.size = loadedSize;
-
- slice_collection_t slices;
- slices.slicePtrs = sliceTable;
- slices.capacities = fileSizes;
- slices.nbSlices = nbFiles;
-
- buffer_collection_t bc;
- bc.buffer = buffer;
- bc.slices = slices;
- return bc;
-}
-
-
-
-
-/*--- ddict_collection_t ---*/
-
-typedef struct {
- ZSTD_DDict** ddicts;
- size_t nbDDict;
-} ddict_collection_t;
-
-static const ddict_collection_t kNullDDictCollection = { NULL, 0 };
-
-static void freeDDictCollection(ddict_collection_t ddictc)
-{
- for (size_t dictNb=0; dictNb < ddictc.nbDDict; dictNb++) {
- ZSTD_freeDDict(ddictc.ddicts[dictNb]);
- }
- free(ddictc.ddicts);
-}
-
-/* returns .buffers=NULL if operation fails */
-static ddict_collection_t createDDictCollection(const void* dictBuffer, size_t dictSize, size_t nbDDict)
-{
- ZSTD_DDict** const ddicts = malloc(nbDDict * sizeof(ZSTD_DDict*));
- assert(ddicts != NULL);
- if (ddicts==NULL) return kNullDDictCollection;
- for (size_t dictNb=0; dictNb < nbDDict; dictNb++) {
- ddicts[dictNb] = ZSTD_createDDict(dictBuffer, dictSize);
- assert(ddicts[dictNb] != NULL);
- }
- ddict_collection_t ddictc;
- ddictc.ddicts = ddicts;
- ddictc.nbDDict = nbDDict;
- return ddictc;
-}
-
-
-/* mess with addresses, so that linear scanning dictionaries != linear address scanning */
-void shuffleDictionaries(ddict_collection_t dicts)
-{
- size_t const nbDicts = dicts.nbDDict;
- for (size_t r=0; r<nbDicts; r++) {
- size_t const d = rand() % nbDicts;
- ZSTD_DDict* tmpd = dicts.ddicts[d];
- dicts.ddicts[d] = dicts.ddicts[r];
- dicts.ddicts[r] = tmpd;
- }
- for (size_t r=0; r<nbDicts; r++) {
- size_t const d1 = rand() % nbDicts;
- size_t const d2 = rand() % nbDicts;
- ZSTD_DDict* tmpd = dicts.ddicts[d1];
- dicts.ddicts[d1] = dicts.ddicts[d2];
- dicts.ddicts[d2] = tmpd;
- }
-}
-
-
-/* --- Compression --- */
-
-/* compressBlocks() :
- * @return : total compressed size of all blocks,
- * or 0 if error.
- */
-static size_t compressBlocks(size_t* cSizes, /* optional (can be NULL). If present, must contain at least nbBlocks fields */
- slice_collection_t dstBlockBuffers,
- slice_collection_t srcBlockBuffers,
- ZSTD_CDict* cdict, int cLevel)
-{
- size_t const nbBlocks = srcBlockBuffers.nbSlices;
- assert(dstBlockBuffers.nbSlices == srcBlockBuffers.nbSlices);
-
- ZSTD_CCtx* const cctx = ZSTD_createCCtx();
- assert(cctx != NULL);
-
- size_t totalCSize = 0;
- for (size_t blockNb=0; blockNb < nbBlocks; blockNb++) {
- size_t cBlockSize;
- if (cdict == NULL) {
- cBlockSize = ZSTD_compressCCtx(cctx,
- dstBlockBuffers.slicePtrs[blockNb], dstBlockBuffers.capacities[blockNb],
- srcBlockBuffers.slicePtrs[blockNb], srcBlockBuffers.capacities[blockNb],
- cLevel);
- } else {
- cBlockSize = ZSTD_compress_usingCDict(cctx,
- dstBlockBuffers.slicePtrs[blockNb], dstBlockBuffers.capacities[blockNb],
- srcBlockBuffers.slicePtrs[blockNb], srcBlockBuffers.capacities[blockNb],
- cdict);
- }
- CONTROL(!ZSTD_isError(cBlockSize));
- if (cSizes) cSizes[blockNb] = cBlockSize;
- totalCSize += cBlockSize;
- }
- return totalCSize;
-}
-
-
-/* --- Benchmark --- */
-
-typedef struct {
- ZSTD_DCtx* dctx;
- size_t nbDicts;
- size_t dictNb;
- ddict_collection_t dictionaries;
-} decompressInstructions;
-
-decompressInstructions createDecompressInstructions(ddict_collection_t dictionaries)
-{
- decompressInstructions di;
- di.dctx = ZSTD_createDCtx();
- assert(di.dctx != NULL);
- di.nbDicts = dictionaries.nbDDict;
- di.dictNb = 0;
- di.dictionaries = dictionaries;
- return di;
-}
-
-void freeDecompressInstructions(decompressInstructions di)
-{
- ZSTD_freeDCtx(di.dctx);
-}
-
-/* benched function */
-size_t decompress(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* payload)
-{
- decompressInstructions* const di = (decompressInstructions*) payload;
-
- size_t const result = ZSTD_decompress_usingDDict(di->dctx,
- dst, dstCapacity,
- src, srcSize,
- di->dictionaries.ddicts[di->dictNb]);
-
- di->dictNb = di->dictNb + 1;
- if (di->dictNb >= di->nbDicts) di->dictNb = 0;
-
- return result;
-}
-
-
-static int benchMem(slice_collection_t dstBlocks,
- slice_collection_t srcBlocks,
- ddict_collection_t dictionaries,
- int nbRounds)
-{
- assert(dstBlocks.nbSlices == srcBlocks.nbSlices);
-
- unsigned const ms_per_round = RUN_TIME_DEFAULT_MS;
- unsigned const total_time_ms = nbRounds * ms_per_round;
-
- double bestSpeed = 0.;
-
- BMK_timedFnState_t* const benchState =
- BMK_createTimedFnState(total_time_ms, ms_per_round);
- decompressInstructions di = createDecompressInstructions(dictionaries);
- BMK_benchParams_t const bp = {
- .benchFn = decompress,
- .benchPayload = &di,
- .initFn = NULL,
- .initPayload = NULL,
- .errorFn = ZSTD_isError,
- .blockCount = dstBlocks.nbSlices,
- .srcBuffers = (const void* const*) srcBlocks.slicePtrs,
- .srcSizes = srcBlocks.capacities,
- .dstBuffers = dstBlocks.slicePtrs,
- .dstCapacities = dstBlocks.capacities,
- .blockResults = NULL
- };
-
- for (;;) {
- BMK_runOutcome_t const outcome = BMK_benchTimedFn(benchState, bp);
- CONTROL(BMK_isSuccessful_runOutcome(outcome));
-
- BMK_runTime_t const result = BMK_extract_runTime(outcome);
- double const dTime_ns = result.nanoSecPerRun;
- double const dTime_sec = (double)dTime_ns / 1000000000;
- size_t const srcSize = result.sumOfReturn;
- double const dSpeed_MBps = (double)srcSize / dTime_sec / (1 MB);
- if (dSpeed_MBps > bestSpeed) bestSpeed = dSpeed_MBps;
- DISPLAY("Decompression Speed : %.1f MB/s \r", bestSpeed);
- fflush(stdout);
- if (BMK_isCompleted_TimedFn(benchState)) break;
- }
- DISPLAY("\n");
-
- freeDecompressInstructions(di);
- BMK_freeTimedFnState(benchState);
-
- return 0; /* success */
-}
-
-
-/*! bench() :
- * fileName : file to load for benchmarking purpose
- * dictionary : optional (can be NULL), file to load as dictionary,
- * if none provided : will be calculated on the fly by the program.
- * @return : 0 is success, 1+ otherwise */
-int bench(const char** fileNameTable, unsigned nbFiles,
- const char* dictionary,
- size_t blockSize, int clevel,
- unsigned nbDictMax, unsigned nbBlocks,
- int nbRounds)
-{
- int result = 0;
-
- DISPLAYLEVEL(3, "loading %u files... \n", nbFiles);
- buffer_collection_t const srcs = createBufferCollection_fromFiles(fileNameTable, nbFiles);
- CONTROL(srcs.buffer.ptr != NULL);
- buffer_t srcBuffer = srcs.buffer;
- size_t const srcSize = srcBuffer.size;
- DISPLAYLEVEL(3, "created src buffer of size %.1f MB \n",
- (double)srcSize / (1 MB));
-
- slice_collection_t const srcSlices = splitSlices(srcs.slices, blockSize, nbBlocks);
- nbBlocks = (unsigned)(srcSlices.nbSlices);
- DISPLAYLEVEL(3, "split input into %u blocks ", nbBlocks);
- if (blockSize)
- DISPLAYLEVEL(3, "of max size %u bytes ", (unsigned)blockSize);
- DISPLAYLEVEL(3, "\n");
- size_t const totalSrcSlicesSize = sliceCollection_totalCapacity(srcSlices);
-
-
- size_t* const dstCapacities = malloc(nbBlocks * sizeof(*dstCapacities));
- CONTROL(dstCapacities != NULL);
- size_t dstBufferCapacity = 0;
- for (size_t bnb=0; bnb<nbBlocks; bnb++) {
- dstCapacities[bnb] = ZSTD_compressBound(srcSlices.capacities[bnb]);
- dstBufferCapacity += dstCapacities[bnb];
- }
-
- buffer_t dstBuffer = createBuffer(dstBufferCapacity);
- CONTROL(dstBuffer.ptr != NULL);
-
- void** const sliceTable = malloc(nbBlocks * sizeof(*sliceTable));
- CONTROL(sliceTable != NULL);
-
- { char* const ptr = dstBuffer.ptr;
- size_t pos = 0;
- for (size_t snb=0; snb < nbBlocks; snb++) {
- sliceTable[snb] = ptr + pos;
- pos += dstCapacities[snb];
- } }
-
- slice_collection_t dstSlices;
- dstSlices.capacities = dstCapacities;
- dstSlices.slicePtrs = sliceTable;
- dstSlices.nbSlices = nbBlocks;
-
-
- /* dictionary determination */
- buffer_t const dictBuffer = createDictionaryBuffer(dictionary,
- srcs.buffer.ptr,
- srcs.slices.capacities, srcs.slices.nbSlices,
- DICTSIZE);
- CONTROL(dictBuffer.ptr != NULL);
-
- ZSTD_CDict* const cdict = ZSTD_createCDict(dictBuffer.ptr, dictBuffer.size, clevel);
- CONTROL(cdict != NULL);
-
- size_t const cTotalSizeNoDict = compressBlocks(NULL, dstSlices, srcSlices, NULL, clevel);
- CONTROL(cTotalSizeNoDict != 0);
- DISPLAYLEVEL(3, "compressing at level %u without dictionary : Ratio=%.2f (%u bytes) \n",
- clevel,
- (double)totalSrcSlicesSize / cTotalSizeNoDict, (unsigned)cTotalSizeNoDict);
-
- size_t* const cSizes = malloc(nbBlocks * sizeof(size_t));
- CONTROL(cSizes != NULL);
-
- size_t const cTotalSize = compressBlocks(cSizes, dstSlices, srcSlices, cdict, clevel);
- CONTROL(cTotalSize != 0);
- DISPLAYLEVEL(3, "compressed using a %u bytes dictionary : Ratio=%.2f (%u bytes) \n",
- (unsigned)dictBuffer.size,
- (double)totalSrcSlicesSize / cTotalSize, (unsigned)cTotalSize);
-
- /* now dstSlices contain the real compressed size of each block, instead of the maximum capacity */
- shrinkSizes(dstSlices, cSizes);
-
- size_t const dictMem = ZSTD_estimateDDictSize(dictBuffer.size, ZSTD_dlm_byCopy);
- unsigned const nbDicts = nbDictMax ? nbDictMax : nbBlocks;
- size_t const allDictMem = dictMem * nbDicts;
- DISPLAYLEVEL(3, "generating %u dictionaries, using %.1f MB of memory \n",
- nbDicts, (double)allDictMem / (1 MB));
-
- ddict_collection_t const dictionaries = createDDictCollection(dictBuffer.ptr, dictBuffer.size, nbDicts);
- CONTROL(dictionaries.ddicts != NULL);
-
- shuffleDictionaries(dictionaries);
-
- buffer_collection_t resultCollection = createBufferCollection_fromSliceCollectionSizes(srcSlices);
- CONTROL(resultCollection.buffer.ptr != NULL);
-
- result = benchMem(resultCollection.slices, dstSlices, dictionaries, nbRounds);
-
- /* free all heap objects in reverse order */
- freeBufferCollection(resultCollection);
- freeDDictCollection(dictionaries);
- free(cSizes);
- ZSTD_freeCDict(cdict);
- freeBuffer(dictBuffer);
- freeSliceCollection(dstSlices);
- freeBuffer(dstBuffer);
- freeSliceCollection(srcSlices);
- freeBufferCollection(srcs);
-
- return result;
-}
-
-
-
-/* --- Command Line --- */
-
-/*! readU32FromChar() :
- * @return : unsigned integer value read from input in `char` format.
- * allows and interprets K, KB, KiB, M, MB and MiB suffix.
- * Will also modify `*stringPtr`, advancing it to position where it stopped reading.
- * Note : function will exit() program if digit sequence overflows */
-static unsigned readU32FromChar(const char** stringPtr)
-{
- unsigned result = 0;
- while ((**stringPtr >='0') && (**stringPtr <='9')) {
- unsigned const max = (((unsigned)(-1)) / 10) - 1;
- assert(result <= max); /* check overflow */
- result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
- }
- if ((**stringPtr=='K') || (**stringPtr=='M')) {
- unsigned const maxK = ((unsigned)(-1)) >> 10;
- assert(result <= maxK); /* check overflow */
- result <<= 10;
- if (**stringPtr=='M') {
- assert(result <= maxK); /* check overflow */
- result <<= 10;
- }
- (*stringPtr)++; /* skip `K` or `M` */
- if (**stringPtr=='i') (*stringPtr)++;
- if (**stringPtr=='B') (*stringPtr)++;
- }
- return result;
-}
-
-/** longCommandWArg() :
- * check if *stringPtr is the same as longCommand.
- * If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
- * @return 0 and doesn't modify *stringPtr otherwise.
- */
-static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
-{
- size_t const comSize = strlen(longCommand);
- int const result = !strncmp(*stringPtr, longCommand, comSize);
- if (result) *stringPtr += comSize;
- return result;
-}
-
-
-int usage(const char* exeName)
-{
- DISPLAY (" \n");
- DISPLAY (" %s [Options] filename(s) \n", exeName);
- DISPLAY (" \n");
- DISPLAY ("Options : \n");
- DISPLAY ("-r : recursively load all files in subdirectories (default: off) \n");
- DISPLAY ("-B# : split input into blocks of size # (default: no split) \n");
- DISPLAY ("-# : use compression level # (default: %u) \n", CLEVEL_DEFAULT);
- DISPLAY ("-D # : use # as a dictionary (default: create one) \n");
- DISPLAY ("-i# : nb benchmark rounds (default: %u) \n", BENCH_TIME_DEFAULT_S);
- DISPLAY ("--nbBlocks=#: use # blocks for bench (default: one per file) \n");
- DISPLAY ("--nbDicts=# : create # dictionaries for bench (default: one per block) \n");
- DISPLAY ("-h : help (this text) \n");
- return 0;
-}
-
-int bad_usage(const char* exeName)
-{
- DISPLAY (" bad usage : \n");
- usage(exeName);
- return 1;
-}
-
-int main (int argc, const char** argv)
-{
- int recursiveMode = 0;
- int nbRounds = BENCH_TIME_DEFAULT_S;
- const char* const exeName = argv[0];
-
- if (argc < 2) return bad_usage(exeName);
-
- const char** nameTable = (const char**)malloc(argc * sizeof(const char*));
- assert(nameTable != NULL);
- unsigned nameIdx = 0;
-
- const char* dictionary = NULL;
- int cLevel = CLEVEL_DEFAULT;
- size_t blockSize = BLOCKSIZE_DEFAULT;
- unsigned nbDicts = 0; /* determine nbDicts automatically: 1 dictionary per block */
- unsigned nbBlocks = 0; /* determine nbBlocks automatically, from source and blockSize */
-
- for (int argNb = 1; argNb < argc ; argNb++) {
- const char* argument = argv[argNb];
- if (!strcmp(argument, "-h")) { free(nameTable); return usage(exeName); }
- if (!strcmp(argument, "-r")) { recursiveMode = 1; continue; }
- if (!strcmp(argument, "-D")) { argNb++; assert(argNb < argc); dictionary = argv[argNb]; continue; }
- if (longCommandWArg(&argument, "-i")) { nbRounds = readU32FromChar(&argument); continue; }
- if (longCommandWArg(&argument, "--dictionary=")) { dictionary = argument; continue; }
- if (longCommandWArg(&argument, "-B")) { blockSize = readU32FromChar(&argument); continue; }
- if (longCommandWArg(&argument, "--blockSize=")) { blockSize = readU32FromChar(&argument); continue; }
- if (longCommandWArg(&argument, "--nbDicts=")) { nbDicts = readU32FromChar(&argument); continue; }
- if (longCommandWArg(&argument, "--nbBlocks=")) { nbBlocks = readU32FromChar(&argument); continue; }
- if (longCommandWArg(&argument, "--clevel=")) { cLevel = readU32FromChar(&argument); continue; }
- if (longCommandWArg(&argument, "-")) { cLevel = readU32FromChar(&argument); continue; }
- /* anything that's not a command is a filename */
- nameTable[nameIdx++] = argument;
- }
-
- const char** filenameTable = nameTable;
- unsigned nbFiles = nameIdx;
- char* buffer_containing_filenames = NULL;
-
- if (recursiveMode) {
-#ifndef UTIL_HAS_CREATEFILELIST
- assert(0); /* missing capability, do not run */
-#endif
- filenameTable = UTIL_createFileList(nameTable, nameIdx, &buffer_containing_filenames, &nbFiles, 1 /* follow_links */);
- }
-
- int result = bench(filenameTable, nbFiles, dictionary, blockSize, cLevel, nbDicts, nbBlocks, nbRounds);
-
- free(buffer_containing_filenames);
- free(nameTable);
-
- return result;
-}
diff --git a/contrib/premake/premake4.lua b/contrib/premake/premake4.lua
deleted file mode 100644
index 6675e2e481c1..000000000000
--- a/contrib/premake/premake4.lua
+++ /dev/null
@@ -1,6 +0,0 @@
--- Include zstd.lua in your GENie or premake4 file, which exposes a project_zstd function
-dofile('zstd.lua')
-
-solution 'example'
- configurations { 'Debug', 'Release' }
- project_zstd('../../lib/')
diff --git a/contrib/premake/zstd.lua b/contrib/premake/zstd.lua
deleted file mode 100644
index df1ace3ee8ea..000000000000
--- a/contrib/premake/zstd.lua
+++ /dev/null
@@ -1,80 +0,0 @@
--- This GENie/premake file copies the behavior of the Makefile in the lib folder.
--- Basic usage: project_zstd(ZSTD_DIR)
-
-function project_zstd(dir, compression, decompression, deprecated, dictbuilder, legacy)
- if compression == nil then compression = true end
- if decompression == nil then decompression = true end
- if deprecated == nil then deprecated = false end
- if dictbuilder == nil then dictbuilder = false end
-
- if legacy == nil then legacy = 0 end
-
- if not compression then
- dictbuilder = false
- deprecated = false
- end
-
- if not decompression then
- legacy = 0
- deprecated = false
- end
-
- project 'zstd'
- kind 'StaticLib'
- language 'C'
-
- files {
- dir .. 'zstd.h',
- dir .. 'common/**.c',
- dir .. 'common/**.h'
- }
-
- if compression then
- files {
- dir .. 'compress/**.c',
- dir .. 'compress/**.h'
- }
- end
-
- if decompression then
- files {
- dir .. 'decompress/**.c',
- dir .. 'decompress/**.h'
- }
- end
-
- if dictbuilder then
- files {
- dir .. 'dictBuilder/**.c',
- dir .. 'dictBuilder/**.h'
- }
- end
-
- if deprecated then
- files {
- dir .. 'deprecated/**.c',
- dir .. 'deprecated/**.h'
- }
- end
-
- if legacy ~= 0 then
- if legacy >= 8 then
- files {
- dir .. 'legacy/zstd_v0' .. (legacy - 7) .. '.*'
- }
- end
- includedirs {
- dir .. 'legacy'
- }
- end
-
- includedirs {
- dir,
- dir .. 'common'
- }
-
- defines {
- 'XXH_NAMESPACE=ZSTD_',
- 'ZSTD_LEGACY_SUPPORT=' .. legacy
- }
-end
diff --git a/contrib/pzstd/BUCK b/contrib/pzstd/BUCK
deleted file mode 100644
index d04eeedd8a1a..000000000000
--- a/contrib/pzstd/BUCK
+++ /dev/null
@@ -1,72 +0,0 @@
-cxx_library(
- name='libpzstd',
- visibility=['PUBLIC'],
- header_namespace='',
- exported_headers=[
- 'ErrorHolder.h',
- 'Logging.h',
- 'Pzstd.h',
- ],
- headers=[
- 'SkippableFrame.h',
- ],
- srcs=[
- 'Pzstd.cpp',
- 'SkippableFrame.cpp',
- ],
- deps=[
- ':options',
- '//contrib/pzstd/utils:utils',
- '//lib:mem',
- '//lib:zstd',
- ],
-)
-
-cxx_library(
- name='options',
- visibility=['PUBLIC'],
- header_namespace='',
- exported_headers=['Options.h'],
- srcs=['Options.cpp'],
- deps=[
- '//contrib/pzstd/utils:scope_guard',
- '//lib:zstd',
- '//programs:util',
- ],
-)
-
-cxx_binary(
- name='pzstd',
- visibility=['PUBLIC'],
- srcs=['main.cpp'],
- deps=[
- ':libpzstd',
- ':options',
- ],
-)
-
-# Must run "make googletest" first
-cxx_library(
- name='gtest',
- srcs=glob([
- 'googletest/googletest/src/gtest-all.cc',
- 'googletest/googlemock/src/gmock-all.cc',
- 'googletest/googlemock/src/gmock_main.cc',
- ]),
- header_namespace='',
- exported_headers=subdir_glob([
- ('googletest/googletest/include', '**/*.h'),
- ('googletest/googlemock/include', '**/*.h'),
- ]),
- headers=subdir_glob([
- ('googletest/googletest', 'src/*.cc'),
- ('googletest/googletest', 'src/*.h'),
- ('googletest/googlemock', 'src/*.cc'),
- ('googletest/googlemock', 'src/*.h'),
- ]),
- platform_linker_flags=[
- ('android', []),
- ('', ['-lpthread']),
- ],
- visibility=['PUBLIC'],
-)
diff --git a/contrib/pzstd/ErrorHolder.h b/contrib/pzstd/ErrorHolder.h
deleted file mode 100644
index 829651c5961e..000000000000
--- a/contrib/pzstd/ErrorHolder.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2016-present, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- */
-#pragma once
-
-#include <atomic>
-#include <cassert>
-#include <stdexcept>
-#include <string>
-
-namespace pzstd {
-
-// Coordinates graceful shutdown of the pzstd pipeline
-class ErrorHolder {
- std::atomic<bool> error_;
- std::string message_;
-
- public:
- ErrorHolder() : error_(false) {}
-
- bool hasError() noexcept {
- return error_.load();
- }
-
- void setError(std::string message) noexcept {
- // Given multiple possibly concurrent calls, exactly one will ever succeed.
- bool expected = false;
- if (error_.compare_exchange_strong(expected, true)) {
- message_ = std::move(message);
- }
- }
-
- bool check(bool predicate, std::string message) noexcept {
- if (!predicate) {
- setError(std::move(message));
- }
- return !hasError();
- }
-
- std::string getError() noexcept {
- error_.store(false);
- return std::move(message_);
- }
-
- ~ErrorHolder() {
- assert(!hasError());
- }
-};
-}
diff --git a/contrib/pzstd/Logging.h b/contrib/pzstd/Logging.h
deleted file mode 100644
index 16a63932c0a3..000000000000
--- a/contrib/pzstd/Logging.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2016-present, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- */
-#pragma once
-
-#include <cstdio>
-#include <mutex>
-
-namespace pzstd {
-
-constexpr int ERROR = 1;
-constexpr int INFO = 2;
-constexpr int DEBUG = 3;
-constexpr int VERBOSE = 4;
-
-class Logger {
- std::mutex mutex_;
- FILE* out_;
- const int level_;
-
- using Clock = std::chrono::system_clock;
- Clock::time_point lastUpdate_;
- std::chrono::milliseconds refreshRate_;
-
- public:
- explicit Logger(int level, FILE* out = stderr)
- : out_(out), level_(level), lastUpdate_(Clock::now()),
- refreshRate_(150) {}
-
-
- bool logsAt(int level) {
- return level <= level_;
- }
-
- template <typename... Args>
- void operator()(int level, const char *fmt, Args... args) {
- if (level > level_) {
- return;
- }
- std::lock_guard<std::mutex> lock(mutex_);
- std::fprintf(out_, fmt, args...);
- }
-
- template <typename... Args>
- void update(int level, const char *fmt, Args... args) {
- if (level > level_) {
- return;
- }
- std::lock_guard<std::mutex> lock(mutex_);
- auto now = Clock::now();
- if (now - lastUpdate_ > refreshRate_) {
- lastUpdate_ = now;
- std::fprintf(out_, "\r");
- std::fprintf(out_, fmt, args...);
- }
- }
-
- void clear(int level) {
- if (level > level_) {
- return;
- }
- std::lock_guard<std::mutex> lock(mutex_);
- std::fprintf(out_, "\r%79s\r", "");
- }
-};
-
-}
diff --git a/contrib/pzstd/Makefile b/contrib/pzstd/Makefile
deleted file mode 100644
index 8d2b1932e91c..000000000000
--- a/contrib/pzstd/Makefile
+++ /dev/null
@@ -1,271 +0,0 @@
-# ################################################################
-# Copyright (c) 2016-present, Facebook, Inc.
-# All rights reserved.
-#
-# This source code is licensed under both the BSD-style license (found in the
-# LICENSE file in the root directory of this source tree) and the GPLv2 (found
-# in the COPYING file in the root directory of this source tree).
-# ################################################################
-
-# Standard variables for installation
-DESTDIR ?=
-PREFIX ?= /usr/local
-BINDIR := $(DESTDIR)$(PREFIX)/bin
-
-ZSTDDIR = ../../lib
-PROGDIR = ../../programs
-
-# External program to use to run tests, e.g. qemu or valgrind
-TESTPROG ?=
-# Flags to pass to the tests
-TESTFLAGS ?=
-
-# We use gcc/clang to generate the header dependencies of files
-DEPFLAGS = -MMD -MP -MF $*.Td
-POSTCOMPILE = mv -f $*.Td $*.d
-
-# CFLAGS, CXXFLAGS, CPPFLAGS, and LDFLAGS are for the users to override
-CFLAGS ?= -O3 -Wall -Wextra
-CXXFLAGS ?= -O3 -Wall -Wextra -pedantic
-CPPFLAGS ?=
-LDFLAGS ?=
-
-# Include flags
-PZSTD_INC = -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(PROGDIR) -I.
-GTEST_INC = -isystem googletest/googletest/include
-
-PZSTD_CPPFLAGS = $(PZSTD_INC)
-PZSTD_CCXXFLAGS =
-PZSTD_CFLAGS = $(PZSTD_CCXXFLAGS)
-PZSTD_CXXFLAGS = $(PZSTD_CCXXFLAGS) -std=c++11
-PZSTD_LDFLAGS =
-EXTRA_FLAGS =
-ALL_CFLAGS = $(EXTRA_FLAGS) $(CPPFLAGS) $(PZSTD_CPPFLAGS) $(CFLAGS) $(PZSTD_CFLAGS)
-ALL_CXXFLAGS = $(EXTRA_FLAGS) $(CPPFLAGS) $(PZSTD_CPPFLAGS) $(CXXFLAGS) $(PZSTD_CXXFLAGS)
-ALL_LDFLAGS = $(EXTRA_FLAGS) $(CXXFLAGS) $(LDFLAGS) $(PZSTD_LDFLAGS)
-
-
-# gtest libraries need to go before "-lpthread" because they depend on it.
-GTEST_LIB = -L googletest/build/googlemock/gtest
-LIBS =
-
-# Compilation commands
-LD_COMMAND = $(CXX) $^ $(ALL_LDFLAGS) $(LIBS) -pthread -o $@
-CC_COMMAND = $(CC) $(DEPFLAGS) $(ALL_CFLAGS) -c $< -o $@
-CXX_COMMAND = $(CXX) $(DEPFLAGS) $(ALL_CXXFLAGS) -c $< -o $@
-
-# Get a list of all zstd files so we rebuild the static library when we need to
-ZSTDCOMMON_FILES := $(wildcard $(ZSTDDIR)/common/*.c) \
- $(wildcard $(ZSTDDIR)/common/*.h)
-ZSTDCOMP_FILES := $(wildcard $(ZSTDDIR)/compress/*.c) \
- $(wildcard $(ZSTDDIR)/compress/*.h)
-ZSTDDECOMP_FILES := $(wildcard $(ZSTDDIR)/decompress/*.c) \
- $(wildcard $(ZSTDDIR)/decompress/*.h)
-ZSTDPROG_FILES := $(wildcard $(PROGDIR)/*.c) \
- $(wildcard $(PROGDIR)/*.h)
-ZSTD_FILES := $(wildcard $(ZSTDDIR)/*.h) \
- $(ZSTDDECOMP_FILES) $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES) \
- $(ZSTDPROG_FILES)
-
-# List all the pzstd source files so we can determine their dependencies
-PZSTD_SRCS := $(wildcard *.cpp)
-PZSTD_TESTS := $(wildcard test/*.cpp)
-UTILS_TESTS := $(wildcard utils/test/*.cpp)
-ALL_SRCS := $(PZSTD_SRCS) $(PZSTD_TESTS) $(UTILS_TESTS)
-
-
-# Define *.exe as extension for Windows systems
-ifneq (,$(filter Windows%,$(OS)))
-EXT =.exe
-else
-EXT =
-endif
-
-# Standard targets
-.PHONY: default
-default: all
-
-.PHONY: test-pzstd
-test-pzstd: TESTFLAGS=--gtest_filter=-*ExtremelyLarge*
-test-pzstd: clean googletest pzstd tests check
-
-.PHONY: test-pzstd32
-test-pzstd32: clean googletest32 all32 check
-
-.PHONY: test-pzstd-tsan
-test-pzstd-tsan: LDFLAGS=-fuse-ld=gold
-test-pzstd-tsan: TESTFLAGS=--gtest_filter=-*ExtremelyLarge*
-test-pzstd-tsan: clean googletest tsan check
-
-.PHONY: test-pzstd-asan
-test-pzstd-asan: LDFLAGS=-fuse-ld=gold
-test-pzstd-asan: TESTFLAGS=--gtest_filter=-*ExtremelyLarge*
-test-pzstd-asan: clean asan check
-
-.PHONY: check
-check:
- $(TESTPROG) ./utils/test/BufferTest$(EXT) $(TESTFLAGS)
- $(TESTPROG) ./utils/test/RangeTest$(EXT) $(TESTFLAGS)
- $(TESTPROG) ./utils/test/ResourcePoolTest$(EXT) $(TESTFLAGS)
- $(TESTPROG) ./utils/test/ScopeGuardTest$(EXT) $(TESTFLAGS)
- $(TESTPROG) ./utils/test/ThreadPoolTest$(EXT) $(TESTFLAGS)
- $(TESTPROG) ./utils/test/WorkQueueTest$(EXT) $(TESTFLAGS)
- $(TESTPROG) ./test/OptionsTest$(EXT) $(TESTFLAGS)
- $(TESTPROG) ./test/PzstdTest$(EXT) $(TESTFLAGS)
-
-.PHONY: install
-install: PZSTD_CPPFLAGS += -DNDEBUG
-install: pzstd$(EXT)
- install -d -m 755 $(BINDIR)/
- install -m 755 pzstd$(EXT) $(BINDIR)/pzstd$(EXT)
-
-.PHONY: uninstall
-uninstall:
- $(RM) $(BINDIR)/pzstd$(EXT)
-
-# Targets for many different builds
-.PHONY: all
-all: PZSTD_CPPFLAGS += -DNDEBUG
-all: pzstd$(EXT)
-
-.PHONY: debug
-debug: EXTRA_FLAGS += -g
-debug: pzstd$(EXT) tests roundtrip
-
-.PHONY: tsan
-tsan: PZSTD_CCXXFLAGS += -fsanitize=thread -fPIC
-tsan: PZSTD_LDFLAGS += -fsanitize=thread
-tsan: debug
-
-.PHONY: asan
-asan: EXTRA_FLAGS += -fsanitize=address
-asan: debug
-
-.PHONY: ubsan
-ubsan: EXTRA_FLAGS += -fsanitize=undefined
-ubsan: debug
-
-.PHONY: all32
-all32: EXTRA_FLAGS += -m32
-all32: all tests roundtrip
-
-.PHONY: debug32
-debug32: EXTRA_FLAGS += -m32
-debug32: debug
-
-.PHONY: asan32
-asan32: EXTRA_FLAGS += -m32
-asan32: asan
-
-.PHONY: tsan32
-tsan32: EXTRA_FLAGS += -m32
-tsan32: tsan
-
-.PHONY: ubsan32
-ubsan32: EXTRA_FLAGS += -m32
-ubsan32: ubsan
-
-# Run long round trip tests
-.PHONY: roundtripcheck
-roundtripcheck: roundtrip check
- $(TESTPROG) ./test/RoundTripTest$(EXT) $(TESTFLAGS)
-
-# Build the main binary
-pzstd$(EXT): main.o $(PROGDIR)/util.o Options.o Pzstd.o SkippableFrame.o $(ZSTDDIR)/libzstd.a
- $(LD_COMMAND)
-
-# Target that depends on all the tests
-.PHONY: tests
-tests: EXTRA_FLAGS += -Wno-deprecated-declarations
-tests: $(patsubst %,%$(EXT),$(basename $(PZSTD_TESTS) $(UTILS_TESTS)))
-
-# Build the round trip tests
-.PHONY: roundtrip
-roundtrip: EXTRA_FLAGS += -Wno-deprecated-declarations
-roundtrip: test/RoundTripTest$(EXT)
-
-# Use the static library that zstd builds for simplicity and
-# so we get the compiler options correct
-$(ZSTDDIR)/libzstd.a: $(ZSTD_FILES)
- CFLAGS="$(ALL_CFLAGS)" LDFLAGS="$(ALL_LDFLAGS)" $(MAKE) -C $(ZSTDDIR) libzstd.a
-
-# Rules to build the tests
-test/RoundTripTest$(EXT): test/RoundTripTest.o $(PROGDIR)/datagen.o \
- $(PROGDIR)/util.o Options.o \
- Pzstd.o SkippableFrame.o $(ZSTDDIR)/libzstd.a
- $(LD_COMMAND)
-
-test/%Test$(EXT): PZSTD_LDFLAGS += $(GTEST_LIB)
-test/%Test$(EXT): LIBS += -lgtest -lgtest_main
-test/%Test$(EXT): test/%Test.o $(PROGDIR)/datagen.o \
- $(PROGDIR)/util.o Options.o Pzstd.o \
- SkippableFrame.o $(ZSTDDIR)/libzstd.a
- $(LD_COMMAND)
-
-utils/test/%Test$(EXT): PZSTD_LDFLAGS += $(GTEST_LIB)
-utils/test/%Test$(EXT): LIBS += -lgtest -lgtest_main
-utils/test/%Test$(EXT): utils/test/%Test.o
- $(LD_COMMAND)
-
-
-GTEST_CMAKEFLAGS =
-
-# Install googletest
-.PHONY: googletest
-googletest: PZSTD_CCXXFLAGS += -fPIC
-googletest:
- @$(RM) -rf googletest
- @git clone https://github.com/google/googletest
- @mkdir -p googletest/build
- @cd googletest/build && cmake $(GTEST_CMAKEFLAGS) -DCMAKE_CXX_FLAGS="$(ALL_CXXFLAGS)" .. && $(MAKE)
-
-.PHONY: googletest32
-googletest32: PZSTD_CCXXFLAGS += -m32
-googletest32: googletest
-
-.PHONY: googletest-mingw64
-googletest-mingw64: GTEST_CMAKEFLAGS += -G "MSYS Makefiles"
-googletest-mingw64: googletest
-
-.PHONY: clean
-clean:
- $(RM) -f *.o pzstd$(EXT) *.Td *.d
- $(RM) -f test/*.o test/*Test$(EXT) test/*.Td test/*.d
- $(RM) -f utils/test/*.o utils/test/*Test$(EXT) utils/test/*.Td utils/test/*.d
- $(RM) -f $(PROGDIR)/*.o $(PROGDIR)/*.Td $(PROGDIR)/*.d
- $(MAKE) -C $(ZSTDDIR) clean
- @echo Cleaning completed
-
-
-# Cancel implicit rules
-%.o: %.c
-%.o: %.cpp
-
-# Object file rules
-%.o: %.c
- $(CC_COMMAND)
- $(POSTCOMPILE)
-
-$(PROGDIR)/%.o: $(PROGDIR)/%.c
- $(CC_COMMAND)
- $(POSTCOMPILE)
-
-%.o: %.cpp
- $(CXX_COMMAND)
- $(POSTCOMPILE)
-
-test/%.o: PZSTD_CPPFLAGS += $(GTEST_INC)
-test/%.o: test/%.cpp
- $(CXX_COMMAND)
- $(POSTCOMPILE)
-
-utils/test/%.o: PZSTD_CPPFLAGS += $(GTEST_INC)
-utils/test/%.o: utils/test/%.cpp
- $(CXX_COMMAND)
- $(POSTCOMPILE)
-
-# Dependency file stuff
-.PRECIOUS: %.d test/%.d utils/test/%.d
-
-# Include rules that specify header file dependencies
--include $(patsubst %,%.d,$(basename $(ALL_SRCS)))
diff --git a/contrib/pzstd/Options.cpp b/contrib/pzstd/Options.cpp
deleted file mode 100644
index 2123f8894c3e..000000000000
--- a/contrib/pzstd/Options.cpp
+++ /dev/null
@@ -1,428 +0,0 @@
-/*
- * Copyright (c) 2016-present, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- */
-#include "Options.h"
-#include "util.h"
-#include "utils/ScopeGuard.h"
-
-#include <algorithm>
-#include <cassert>
-#include <cstdio>
-#include <cstring>
-#include <iterator>
-#include <thread>
-#include <vector>
-
-
-namespace pzstd {
-
-namespace {
-unsigned defaultNumThreads() {
-#ifdef PZSTD_NUM_THREADS
- return PZSTD_NUM_THREADS;
-#else
- return std::thread::hardware_concurrency();
-#endif
-}
-
-unsigned parseUnsigned(const char **arg) {
- unsigned result = 0;
- while (**arg >= '0' && **arg <= '9') {
- result *= 10;
- result += **arg - '0';
- ++(*arg);
- }
- return result;
-}
-
-const char *getArgument(const char *options, const char **argv, int &i,
- int argc) {
- if (options[1] != 0) {
- return options + 1;
- }
- ++i;
- if (i == argc) {
- std::fprintf(stderr, "Option -%c requires an argument, but none provided\n",
- *options);
- return nullptr;
- }
- return argv[i];
-}
-
-const std::string kZstdExtension = ".zst";
-constexpr char kStdIn[] = "-";
-constexpr char kStdOut[] = "-";
-constexpr unsigned kDefaultCompressionLevel = 3;
-constexpr unsigned kMaxNonUltraCompressionLevel = 19;
-
-#ifdef _WIN32
-const char nullOutput[] = "nul";
-#else
-const char nullOutput[] = "/dev/null";
-#endif
-
-void notSupported(const char *option) {
- std::fprintf(stderr, "Operation not supported: %s\n", option);
-}
-
-void usage() {
- std::fprintf(stderr, "Usage:\n");
- std::fprintf(stderr, " pzstd [args] [FILE(s)]\n");
- std::fprintf(stderr, "Parallel ZSTD options:\n");
- std::fprintf(stderr, " -p, --processes # : number of threads to use for (de)compression (default:<numcpus>)\n");
-
- std::fprintf(stderr, "ZSTD options:\n");
- std::fprintf(stderr, " -# : # compression level (1-%d, default:%d)\n", kMaxNonUltraCompressionLevel, kDefaultCompressionLevel);
- std::fprintf(stderr, " -d, --decompress : decompression\n");
- std::fprintf(stderr, " -o file : result stored into `file` (only if 1 input file)\n");
- std::fprintf(stderr, " -f, --force : overwrite output without prompting, (de)compress links\n");
- std::fprintf(stderr, " --rm : remove source file(s) after successful (de)compression\n");
- std::fprintf(stderr, " -k, --keep : preserve source file(s) (default)\n");
- std::fprintf(stderr, " -h, --help : display help and exit\n");
- std::fprintf(stderr, " -V, --version : display version number and exit\n");
- std::fprintf(stderr, " -v, --verbose : verbose mode; specify multiple times to increase log level (default:2)\n");
- std::fprintf(stderr, " -q, --quiet : suppress warnings; specify twice to suppress errors too\n");
- std::fprintf(stderr, " -c, --stdout : force write to standard output, even if it is the console\n");
-#ifdef UTIL_HAS_CREATEFILELIST
- std::fprintf(stderr, " -r : operate recursively on directories\n");
-#endif
- std::fprintf(stderr, " --ultra : enable levels beyond %i, up to %i (requires more memory)\n", kMaxNonUltraCompressionLevel, ZSTD_maxCLevel());
- std::fprintf(stderr, " -C, --check : integrity check (default)\n");
- std::fprintf(stderr, " --no-check : no integrity check\n");
- std::fprintf(stderr, " -t, --test : test compressed file integrity\n");
- std::fprintf(stderr, " -- : all arguments after \"--\" are treated as files\n");
-}
-} // anonymous namespace
-
-Options::Options()
- : numThreads(defaultNumThreads()), maxWindowLog(23),
- compressionLevel(kDefaultCompressionLevel), decompress(false),
- overwrite(false), keepSource(true), writeMode(WriteMode::Auto),
- checksum(true), verbosity(2) {}
-
-Options::Status Options::parse(int argc, const char **argv) {
- bool test = false;
- bool recursive = false;
- bool ultra = false;
- bool forceStdout = false;
- bool followLinks = false;
- // Local copy of input files, which are pointers into argv.
- std::vector<const char *> localInputFiles;
- for (int i = 1; i < argc; ++i) {
- const char *arg = argv[i];
- // Protect against empty arguments
- if (arg[0] == 0) {
- continue;
- }
- // Everything after "--" is an input file
- if (!std::strcmp(arg, "--")) {
- ++i;
- std::copy(argv + i, argv + argc, std::back_inserter(localInputFiles));
- break;
- }
- // Long arguments that don't have a short option
- {
- bool isLongOption = true;
- if (!std::strcmp(arg, "--rm")) {
- keepSource = false;
- } else if (!std::strcmp(arg, "--ultra")) {
- ultra = true;
- maxWindowLog = 0;
- } else if (!std::strcmp(arg, "--no-check")) {
- checksum = false;
- } else if (!std::strcmp(arg, "--sparse")) {
- writeMode = WriteMode::Sparse;
- notSupported("Sparse mode");
- return Status::Failure;
- } else if (!std::strcmp(arg, "--no-sparse")) {
- writeMode = WriteMode::Regular;
- notSupported("Sparse mode");
- return Status::Failure;
- } else if (!std::strcmp(arg, "--dictID")) {
- notSupported(arg);
- return Status::Failure;
- } else if (!std::strcmp(arg, "--no-dictID")) {
- notSupported(arg);
- return Status::Failure;
- } else {
- isLongOption = false;
- }
- if (isLongOption) {
- continue;
- }
- }
- // Arguments with a short option simply set their short option.
- const char *options = nullptr;
- if (!std::strcmp(arg, "--processes")) {
- options = "p";
- } else if (!std::strcmp(arg, "--version")) {
- options = "V";
- } else if (!std::strcmp(arg, "--help")) {
- options = "h";
- } else if (!std::strcmp(arg, "--decompress")) {
- options = "d";
- } else if (!std::strcmp(arg, "--force")) {
- options = "f";
- } else if (!std::strcmp(arg, "--stdout")) {
- options = "c";
- } else if (!std::strcmp(arg, "--keep")) {
- options = "k";
- } else if (!std::strcmp(arg, "--verbose")) {
- options = "v";
- } else if (!std::strcmp(arg, "--quiet")) {
- options = "q";
- } else if (!std::strcmp(arg, "--check")) {
- options = "C";
- } else if (!std::strcmp(arg, "--test")) {
- options = "t";
- } else if (arg[0] == '-' && arg[1] != 0) {
- options = arg + 1;
- } else {
- localInputFiles.emplace_back(arg);
- continue;
- }
- assert(options != nullptr);
-
- bool finished = false;
- while (!finished && *options != 0) {
- // Parse the compression level
- if (*options >= '0' && *options <= '9') {
- compressionLevel = parseUnsigned(&options);
- continue;
- }
-
- switch (*options) {
- case 'h':
- case 'H':
- usage();
- return Status::Message;
- case 'V':
- std::fprintf(stderr, "PZSTD version: %s.\n", ZSTD_VERSION_STRING);
- return Status::Message;
- case 'p': {
- finished = true;
- const char *optionArgument = getArgument(options, argv, i, argc);
- if (optionArgument == nullptr) {
- return Status::Failure;
- }
- if (*optionArgument < '0' || *optionArgument > '9') {
- std::fprintf(stderr, "Option -p expects a number, but %s provided\n",
- optionArgument);
- return Status::Failure;
- }
- numThreads = parseUnsigned(&optionArgument);
- if (*optionArgument != 0) {
- std::fprintf(stderr,
- "Option -p expects a number, but %u%s provided\n",
- numThreads, optionArgument);
- return Status::Failure;
- }
- break;
- }
- case 'o': {
- finished = true;
- const char *optionArgument = getArgument(options, argv, i, argc);
- if (optionArgument == nullptr) {
- return Status::Failure;
- }
- outputFile = optionArgument;
- break;
- }
- case 'C':
- checksum = true;
- break;
- case 'k':
- keepSource = true;
- break;
- case 'd':
- decompress = true;
- break;
- case 'f':
- overwrite = true;
- forceStdout = true;
- followLinks = true;
- break;
- case 't':
- test = true;
- decompress = true;
- break;
-#ifdef UTIL_HAS_CREATEFILELIST
- case 'r':
- recursive = true;
- break;
-#endif
- case 'c':
- outputFile = kStdOut;
- forceStdout = true;
- break;
- case 'v':
- ++verbosity;
- break;
- case 'q':
- --verbosity;
- // Ignore them for now
- break;
- // Unsupported options from Zstd
- case 'D':
- case 's':
- notSupported("Zstd dictionaries.");
- return Status::Failure;
- case 'b':
- case 'e':
- case 'i':
- case 'B':
- notSupported("Zstd benchmarking options.");
- return Status::Failure;
- default:
- std::fprintf(stderr, "Invalid argument: %s\n", arg);
- return Status::Failure;
- }
- if (!finished) {
- ++options;
- }
- } // while (*options != 0);
- } // for (int i = 1; i < argc; ++i);
-
- // Set options for test mode
- if (test) {
- outputFile = nullOutput;
- keepSource = true;
- }
-
- // Input file defaults to standard input if not provided.
- if (localInputFiles.empty()) {
- localInputFiles.emplace_back(kStdIn);
- }
-
- // Check validity of input files
- if (localInputFiles.size() > 1) {
- const auto it = std::find(localInputFiles.begin(), localInputFiles.end(),
- std::string{kStdIn});
- if (it != localInputFiles.end()) {
- std::fprintf(
- stderr,
- "Cannot specify standard input when handling multiple files\n");
- return Status::Failure;
- }
- }
- if (localInputFiles.size() > 1 || recursive) {
- if (!outputFile.empty() && outputFile != nullOutput) {
- std::fprintf(
- stderr,
- "Cannot specify an output file when handling multiple inputs\n");
- return Status::Failure;
- }
- }
-
- g_utilDisplayLevel = verbosity;
- // Remove local input files that are symbolic links
- if (!followLinks) {
- std::remove_if(localInputFiles.begin(), localInputFiles.end(),
- [&](const char *path) {
- bool isLink = UTIL_isLink(path);
- if (isLink && verbosity >= 2) {
- std::fprintf(
- stderr,
- "Warning : %s is symbolic link, ignoring\n",
- path);
- }
- return isLink;
- });
- }
-
- // Translate input files/directories into files to (de)compress
- if (recursive) {
- char *scratchBuffer = nullptr;
- unsigned numFiles = 0;
- const char **files =
- UTIL_createFileList(localInputFiles.data(), localInputFiles.size(),
- &scratchBuffer, &numFiles, followLinks);
- if (files == nullptr) {
- std::fprintf(stderr, "Error traversing directories\n");
- return Status::Failure;
- }
- auto guard =
- makeScopeGuard([&] { UTIL_freeFileList(files, scratchBuffer); });
- if (numFiles == 0) {
- std::fprintf(stderr, "No files found\n");
- return Status::Failure;
- }
- inputFiles.resize(numFiles);
- std::copy(files, files + numFiles, inputFiles.begin());
- } else {
- inputFiles.resize(localInputFiles.size());
- std::copy(localInputFiles.begin(), localInputFiles.end(),
- inputFiles.begin());
- }
- localInputFiles.clear();
- assert(!inputFiles.empty());
-
- // If reading from standard input, default to standard output
- if (inputFiles[0] == kStdIn && outputFile.empty()) {
- assert(inputFiles.size() == 1);
- outputFile = "-";
- }
-
- if (inputFiles[0] == kStdIn && IS_CONSOLE(stdin)) {
- assert(inputFiles.size() == 1);
- std::fprintf(stderr, "Cannot read input from interactive console\n");
- return Status::Failure;
- }
- if (outputFile == "-" && IS_CONSOLE(stdout) && !(forceStdout && decompress)) {
- std::fprintf(stderr, "Will not write to console stdout unless -c or -f is "
- "specified and decompressing\n");
- return Status::Failure;
- }
-
- // Check compression level
- {
- unsigned maxCLevel =
- ultra ? ZSTD_maxCLevel() : kMaxNonUltraCompressionLevel;
- if (compressionLevel > maxCLevel || compressionLevel == 0) {
- std::fprintf(stderr, "Invalid compression level %u.\n", compressionLevel);
- return Status::Failure;
- }
- }
-
- // Check that numThreads is set
- if (numThreads == 0) {
- std::fprintf(stderr, "Invalid arguments: # of threads not specified "
- "and unable to determine hardware concurrency.\n");
- return Status::Failure;
- }
-
- // Modify verbosity
- // If we are piping input and output, turn off interaction
- if (inputFiles[0] == kStdIn && outputFile == kStdOut && verbosity == 2) {
- verbosity = 1;
- }
- // If we are in multi-file mode, turn off interaction
- if (inputFiles.size() > 1 && verbosity == 2) {
- verbosity = 1;
- }
-
- return Status::Success;
-}
-
-std::string Options::getOutputFile(const std::string &inputFile) const {
- if (!outputFile.empty()) {
- return outputFile;
- }
- // Attempt to add/remove zstd extension from the input file
- if (decompress) {
- int stemSize = inputFile.size() - kZstdExtension.size();
- if (stemSize > 0 && inputFile.substr(stemSize) == kZstdExtension) {
- return inputFile.substr(0, stemSize);
- } else {
- return "";
- }
- } else {
- return inputFile + kZstdExtension;
- }
-}
-}
diff --git a/contrib/pzstd/Options.h b/contrib/pzstd/Options.h
deleted file mode 100644
index f4f2aaa499cb..000000000000
--- a/contrib/pzstd/Options.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2016-present, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- */
-#pragma once
-
-#define ZSTD_STATIC_LINKING_ONLY
-#include "zstd.h"
-#undef ZSTD_STATIC_LINKING_ONLY
-
-#include <cstdint>
-#include <string>
-#include <vector>
-
-namespace pzstd {
-
-struct Options {
- enum class WriteMode { Regular, Auto, Sparse };
-
- unsigned numThreads;
- unsigned maxWindowLog;
- unsigned compressionLevel;
- bool decompress;
- std::vector<std::string> inputFiles;
- std::string outputFile;
- bool overwrite;
- bool keepSource;
- WriteMode writeMode;
- bool checksum;
- int verbosity;
-
- enum class Status {
- Success, // Successfully parsed options
- Failure, // Failure to parse options
- Message // Options specified to print a message (e.g. "-h")
- };
-
- Options();
- Options(unsigned numThreads, unsigned maxWindowLog, unsigned compressionLevel,
- bool decompress, std::vector<std::string> inputFiles,
- std::string outputFile, bool overwrite, bool keepSource,
- WriteMode writeMode, bool checksum, int verbosity)
- : numThreads(numThreads), maxWindowLog(maxWindowLog),
- compressionLevel(compressionLevel), decompress(decompress),
- inputFiles(std::move(inputFiles)), outputFile(std::move(outputFile)),
- overwrite(overwrite), keepSource(keepSource), writeMode(writeMode),
- checksum(checksum), verbosity(verbosity) {}
-
- Status parse(int argc, const char **argv);
-
- ZSTD_parameters determineParameters() const {
- ZSTD_parameters params = ZSTD_getParams(compressionLevel, 0, 0);
- params.fParams.contentSizeFlag = 0;
- params.fParams.checksumFlag = checksum;
- if (maxWindowLog != 0 && params.cParams.windowLog > maxWindowLog) {
- params.cParams.windowLog = maxWindowLog;
- params.cParams = ZSTD_adjustCParams(params.cParams, 0, 0);
- }
- return params;
- }
-
- std::string getOutputFile(const std::string &inputFile) const;
-};
-}
diff --git a/contrib/pzstd/Pzstd.cpp b/contrib/pzstd/Pzstd.cpp
deleted file mode 100644
index 652187c3bd0e..000000000000
--- a/contrib/pzstd/Pzstd.cpp
+++ /dev/null
@@ -1,611 +0,0 @@
-/*
- * Copyright (c) 2016-present, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- */
-#include "platform.h" /* Large Files support, SET_BINARY_MODE */
-#include "Pzstd.h"
-#include "SkippableFrame.h"
-#include "utils/FileSystem.h"
-#include "utils/Range.h"
-#include "utils/ScopeGuard.h"
-#include "utils/ThreadPool.h"
-#include "utils/WorkQueue.h"
-
-#include <chrono>
-#include <cinttypes>
-#include <cstddef>
-#include <cstdio>
-#include <memory>
-#include <string>
-
-
-namespace pzstd {
-
-namespace {
-#ifdef _WIN32
-const std::string nullOutput = "nul";
-#else
-const std::string nullOutput = "/dev/null";
-#endif
-}
-
-using std::size_t;
-
-static std::uintmax_t fileSizeOrZero(const std::string &file) {
- if (file == "-") {
- return 0;
- }
- std::error_code ec;
- auto size = file_size(file, ec);
- if (ec) {
- size = 0;
- }
- return size;
-}
-
-static std::uint64_t handleOneInput(const Options &options,
- const std::string &inputFile,
- FILE* inputFd,
- const std::string &outputFile,
- FILE* outputFd,
- SharedState& state) {
- auto inputSize = fileSizeOrZero(inputFile);
- // WorkQueue outlives ThreadPool so in the case of error we are certain
- // we don't accidentally try to call push() on it after it is destroyed
- WorkQueue<std::shared_ptr<BufferWorkQueue>> outs{options.numThreads + 1};
- std::uint64_t bytesRead;
- std::uint64_t bytesWritten;
- {
- // Initialize the (de)compression thread pool with numThreads
- ThreadPool executor(options.numThreads);
- // Run the reader thread on an extra thread
- ThreadPool readExecutor(1);
- if (!options.decompress) {
- // Add a job that reads the input and starts all the compression jobs
- readExecutor.add(
- [&state, &outs, &executor, inputFd, inputSize, &options, &bytesRead] {
- bytesRead = asyncCompressChunks(
- state,
- outs,
- executor,
- inputFd,
- inputSize,
- options.numThreads,
- options.determineParameters());
- });
- // Start writing
- bytesWritten = writeFile(state, outs, outputFd, options.decompress);
- } else {
- // Add a job that reads the input and starts all the decompression jobs
- readExecutor.add([&state, &outs, &executor, inputFd, &bytesRead] {
- bytesRead = asyncDecompressFrames(state, outs, executor, inputFd);
- });
- // Start writing
- bytesWritten = writeFile(state, outs, outputFd, options.decompress);
- }
- }
- if (!state.errorHolder.hasError()) {
- std::string inputFileName = inputFile == "-" ? "stdin" : inputFile;
- std::string outputFileName = outputFile == "-" ? "stdout" : outputFile;
- if (!options.decompress) {
- double ratio = static_cast<double>(bytesWritten) /
- static_cast<double>(bytesRead + !bytesRead);
- state.log(INFO, "%-20s :%6.2f%% (%6" PRIu64 " => %6" PRIu64
- " bytes, %s)\n",
- inputFileName.c_str(), ratio * 100, bytesRead, bytesWritten,
- outputFileName.c_str());
- } else {
- state.log(INFO, "%-20s: %" PRIu64 " bytes \n",
- inputFileName.c_str(),bytesWritten);
- }
- }
- return bytesWritten;
-}
-
-static FILE *openInputFile(const std::string &inputFile,
- ErrorHolder &errorHolder) {
- if (inputFile == "-") {
- SET_BINARY_MODE(stdin);
- return stdin;
- }
- // Check if input file is a directory
- {
- std::error_code ec;
- if (is_directory(inputFile, ec)) {
- errorHolder.setError("Output file is a directory -- ignored");
- return nullptr;
- }
- }
- auto inputFd = std::fopen(inputFile.c_str(), "rb");
- if (!errorHolder.check(inputFd != nullptr, "Failed to open input file")) {
- return nullptr;
- }
- return inputFd;
-}
-
-static FILE *openOutputFile(const Options &options,
- const std::string &outputFile,
- SharedState& state) {
- if (outputFile == "-") {
- SET_BINARY_MODE(stdout);
- return stdout;
- }
- // Check if the output file exists and then open it
- if (!options.overwrite && outputFile != nullOutput) {
- auto outputFd = std::fopen(outputFile.c_str(), "rb");
- if (outputFd != nullptr) {
- std::fclose(outputFd);
- if (!state.log.logsAt(INFO)) {
- state.errorHolder.setError("Output file exists");
- return nullptr;
- }
- state.log(
- INFO,
- "pzstd: %s already exists; do you wish to overwrite (y/n) ? ",
- outputFile.c_str());
- int c = getchar();
- if (c != 'y' && c != 'Y') {
- state.errorHolder.setError("Not overwritten");
- return nullptr;
- }
- }
- }
- auto outputFd = std::fopen(outputFile.c_str(), "wb");
- if (!state.errorHolder.check(
- outputFd != nullptr, "Failed to open output file")) {
- return nullptr;
- }
- return outputFd;
-}
-
-int pzstdMain(const Options &options) {
- int returnCode = 0;
- SharedState state(options);
- for (const auto& input : options.inputFiles) {
- // Setup the shared state
- auto printErrorGuard = makeScopeGuard([&] {
- if (state.errorHolder.hasError()) {
- returnCode = 1;
- state.log(ERROR, "pzstd: %s: %s.\n", input.c_str(),
- state.errorHolder.getError().c_str());
- }
- });
- // Open the input file
- auto inputFd = openInputFile(input, state.errorHolder);
- if (inputFd == nullptr) {
- continue;
- }
- auto closeInputGuard = makeScopeGuard([&] { std::fclose(inputFd); });
- // Open the output file
- auto outputFile = options.getOutputFile(input);
- if (!state.errorHolder.check(outputFile != "",
- "Input file does not have extension .zst")) {
- continue;
- }
- auto outputFd = openOutputFile(options, outputFile, state);
- if (outputFd == nullptr) {
- continue;
- }
- auto closeOutputGuard = makeScopeGuard([&] { std::fclose(outputFd); });
- // (de)compress the file
- handleOneInput(options, input, inputFd, outputFile, outputFd, state);
- if (state.errorHolder.hasError()) {
- continue;
- }
- // Delete the input file if necessary
- if (!options.keepSource) {
- // Be sure that we are done and have written everything before we delete
- if (!state.errorHolder.check(std::fclose(inputFd) == 0,
- "Failed to close input file")) {
- continue;
- }
- closeInputGuard.dismiss();
- if (!state.errorHolder.check(std::fclose(outputFd) == 0,
- "Failed to close output file")) {
- continue;
- }
- closeOutputGuard.dismiss();
- if (std::remove(input.c_str()) != 0) {
- state.errorHolder.setError("Failed to remove input file");
- continue;
- }
- }
- }
- // Returns 1 if any of the files failed to (de)compress.
- return returnCode;
-}
-
-/// Construct a `ZSTD_inBuffer` that points to the data in `buffer`.
-static ZSTD_inBuffer makeZstdInBuffer(const Buffer& buffer) {
- return ZSTD_inBuffer{buffer.data(), buffer.size(), 0};
-}
-
-/**
- * Advance `buffer` and `inBuffer` by the amount of data read, as indicated by
- * `inBuffer.pos`.
- */
-void advance(Buffer& buffer, ZSTD_inBuffer& inBuffer) {
- auto pos = inBuffer.pos;
- inBuffer.src = static_cast<const unsigned char*>(inBuffer.src) + pos;
- inBuffer.size -= pos;
- inBuffer.pos = 0;
- return buffer.advance(pos);
-}
-
-/// Construct a `ZSTD_outBuffer` that points to the data in `buffer`.
-static ZSTD_outBuffer makeZstdOutBuffer(Buffer& buffer) {
- return ZSTD_outBuffer{buffer.data(), buffer.size(), 0};
-}
-
-/**
- * Split `buffer` and advance `outBuffer` by the amount of data written, as
- * indicated by `outBuffer.pos`.
- */
-Buffer split(Buffer& buffer, ZSTD_outBuffer& outBuffer) {
- auto pos = outBuffer.pos;
- outBuffer.dst = static_cast<unsigned char*>(outBuffer.dst) + pos;
- outBuffer.size -= pos;
- outBuffer.pos = 0;
- return buffer.splitAt(pos);
-}
-
-/**
- * Stream chunks of input from `in`, compress it, and stream it out to `out`.
- *
- * @param state The shared state
- * @param in Queue that we `pop()` input buffers from
- * @param out Queue that we `push()` compressed output buffers to
- * @param maxInputSize An upper bound on the size of the input
- */
-static void compress(
- SharedState& state,
- std::shared_ptr<BufferWorkQueue> in,
- std::shared_ptr<BufferWorkQueue> out,
- size_t maxInputSize) {
- auto& errorHolder = state.errorHolder;
- auto guard = makeScopeGuard([&] { out->finish(); });
- // Initialize the CCtx
- auto ctx = state.cStreamPool->get();
- if (!errorHolder.check(ctx != nullptr, "Failed to allocate ZSTD_CStream")) {
- return;
- }
- {
- auto err = ZSTD_resetCStream(ctx.get(), 0);
- if (!errorHolder.check(!ZSTD_isError(err), ZSTD_getErrorName(err))) {
- return;
- }
- }
-
- // Allocate space for the result
- auto outBuffer = Buffer(ZSTD_compressBound(maxInputSize));
- auto zstdOutBuffer = makeZstdOutBuffer(outBuffer);
- {
- Buffer inBuffer;
- // Read a buffer in from the input queue
- while (in->pop(inBuffer) && !errorHolder.hasError()) {
- auto zstdInBuffer = makeZstdInBuffer(inBuffer);
- // Compress the whole buffer and send it to the output queue
- while (!inBuffer.empty() && !errorHolder.hasError()) {
- if (!errorHolder.check(
- !outBuffer.empty(), "ZSTD_compressBound() was too small")) {
- return;
- }
- // Compress
- auto err =
- ZSTD_compressStream(ctx.get(), &zstdOutBuffer, &zstdInBuffer);
- if (!errorHolder.check(!ZSTD_isError(err), ZSTD_getErrorName(err))) {
- return;
- }
- // Split the compressed data off outBuffer and pass to the output queue
- out->push(split(outBuffer, zstdOutBuffer));
- // Forget about the data we already compressed
- advance(inBuffer, zstdInBuffer);
- }
- }
- }
- // Write the epilog
- size_t bytesLeft;
- do {
- if (!errorHolder.check(
- !outBuffer.empty(), "ZSTD_compressBound() was too small")) {
- return;
- }
- bytesLeft = ZSTD_endStream(ctx.get(), &zstdOutBuffer);
- if (!errorHolder.check(
- !ZSTD_isError(bytesLeft), ZSTD_getErrorName(bytesLeft))) {
- return;
- }
- out->push(split(outBuffer, zstdOutBuffer));
- } while (bytesLeft != 0 && !errorHolder.hasError());
-}
-
-/**
- * Calculates how large each independently compressed frame should be.
- *
- * @param size The size of the source if known, 0 otherwise
- * @param numThreads The number of threads available to run compression jobs on
- * @param params The zstd parameters to be used for compression
- */
-static size_t calculateStep(
- std::uintmax_t size,
- size_t numThreads,
- const ZSTD_parameters &params) {
- (void)size;
- (void)numThreads;
- return size_t{1} << (params.cParams.windowLog + 2);
-}
-
-namespace {
-enum class FileStatus { Continue, Done, Error };
-/// Determines the status of the file descriptor `fd`.
-FileStatus fileStatus(FILE* fd) {
- if (std::feof(fd)) {
- return FileStatus::Done;
- } else if (std::ferror(fd)) {
- return FileStatus::Error;
- }
- return FileStatus::Continue;
-}
-} // anonymous namespace
-
-/**
- * Reads `size` data in chunks of `chunkSize` and puts it into `queue`.
- * Will read less if an error or EOF occurs.
- * Returns the status of the file after all of the reads have occurred.
- */
-static FileStatus
-readData(BufferWorkQueue& queue, size_t chunkSize, size_t size, FILE* fd,
- std::uint64_t *totalBytesRead) {
- Buffer buffer(size);
- while (!buffer.empty()) {
- auto bytesRead =
- std::fread(buffer.data(), 1, std::min(chunkSize, buffer.size()), fd);
- *totalBytesRead += bytesRead;
- queue.push(buffer.splitAt(bytesRead));
- auto status = fileStatus(fd);
- if (status != FileStatus::Continue) {
- return status;
- }
- }
- return FileStatus::Continue;
-}
-
-std::uint64_t asyncCompressChunks(
- SharedState& state,
- WorkQueue<std::shared_ptr<BufferWorkQueue>>& chunks,
- ThreadPool& executor,
- FILE* fd,
- std::uintmax_t size,
- size_t numThreads,
- ZSTD_parameters params) {
- auto chunksGuard = makeScopeGuard([&] { chunks.finish(); });
- std::uint64_t bytesRead = 0;
-
- // Break the input up into chunks of size `step` and compress each chunk
- // independently.
- size_t step = calculateStep(size, numThreads, params);
- state.log(DEBUG, "Chosen frame size: %zu\n", step);
- auto status = FileStatus::Continue;
- while (status == FileStatus::Continue && !state.errorHolder.hasError()) {
- // Make a new input queue that we will put the chunk's input data into.
- auto in = std::make_shared<BufferWorkQueue>();
- auto inGuard = makeScopeGuard([&] { in->finish(); });
- // Make a new output queue that compress will put the compressed data into.
- auto out = std::make_shared<BufferWorkQueue>();
- // Start compression in the thread pool
- executor.add([&state, in, out, step] {
- return compress(
- state, std::move(in), std::move(out), step);
- });
- // Pass the output queue to the writer thread.
- chunks.push(std::move(out));
- state.log(VERBOSE, "%s\n", "Starting a new frame");
- // Fill the input queue for the compression job we just started
- status = readData(*in, ZSTD_CStreamInSize(), step, fd, &bytesRead);
- }
- state.errorHolder.check(status != FileStatus::Error, "Error reading input");
- return bytesRead;
-}
-
-/**
- * Decompress a frame, whose data is streamed into `in`, and stream the output
- * to `out`.
- *
- * @param state The shared state
- * @param in Queue that we `pop()` input buffers from. It contains
- * exactly one compressed frame.
- * @param out Queue that we `push()` decompressed output buffers to
- */
-static void decompress(
- SharedState& state,
- std::shared_ptr<BufferWorkQueue> in,
- std::shared_ptr<BufferWorkQueue> out) {
- auto& errorHolder = state.errorHolder;
- auto guard = makeScopeGuard([&] { out->finish(); });
- // Initialize the DCtx
- auto ctx = state.dStreamPool->get();
- if (!errorHolder.check(ctx != nullptr, "Failed to allocate ZSTD_DStream")) {
- return;
- }
- {
- auto err = ZSTD_resetDStream(ctx.get());
- if (!errorHolder.check(!ZSTD_isError(err), ZSTD_getErrorName(err))) {
- return;
- }
- }
-
- const size_t outSize = ZSTD_DStreamOutSize();
- Buffer inBuffer;
- size_t returnCode = 0;
- // Read a buffer in from the input queue
- while (in->pop(inBuffer) && !errorHolder.hasError()) {
- auto zstdInBuffer = makeZstdInBuffer(inBuffer);
- // Decompress the whole buffer and send it to the output queue
- while (!inBuffer.empty() && !errorHolder.hasError()) {
- // Allocate a buffer with at least outSize bytes.
- Buffer outBuffer(outSize);
- auto zstdOutBuffer = makeZstdOutBuffer(outBuffer);
- // Decompress
- returnCode =
- ZSTD_decompressStream(ctx.get(), &zstdOutBuffer, &zstdInBuffer);
- if (!errorHolder.check(
- !ZSTD_isError(returnCode), ZSTD_getErrorName(returnCode))) {
- return;
- }
- // Pass the buffer with the decompressed data to the output queue
- out->push(split(outBuffer, zstdOutBuffer));
- // Advance past the input we already read
- advance(inBuffer, zstdInBuffer);
- if (returnCode == 0) {
- // The frame is over, prepare to (maybe) start a new frame
- ZSTD_initDStream(ctx.get());
- }
- }
- }
- if (!errorHolder.check(returnCode <= 1, "Incomplete block")) {
- return;
- }
- // We've given ZSTD_decompressStream all of our data, but there may still
- // be data to read.
- while (returnCode == 1) {
- // Allocate a buffer with at least outSize bytes.
- Buffer outBuffer(outSize);
- auto zstdOutBuffer = makeZstdOutBuffer(outBuffer);
- // Pass in no input.
- ZSTD_inBuffer zstdInBuffer{nullptr, 0, 0};
- // Decompress
- returnCode =
- ZSTD_decompressStream(ctx.get(), &zstdOutBuffer, &zstdInBuffer);
- if (!errorHolder.check(
- !ZSTD_isError(returnCode), ZSTD_getErrorName(returnCode))) {
- return;
- }
- // Pass the buffer with the decompressed data to the output queue
- out->push(split(outBuffer, zstdOutBuffer));
- }
-}
-
-std::uint64_t asyncDecompressFrames(
- SharedState& state,
- WorkQueue<std::shared_ptr<BufferWorkQueue>>& frames,
- ThreadPool& executor,
- FILE* fd) {
- auto framesGuard = makeScopeGuard([&] { frames.finish(); });
- std::uint64_t totalBytesRead = 0;
-
- // Split the source up into its component frames.
- // If we find our recognized skippable frame we know the next frames size
- // which means that we can decompress each standard frame in independently.
- // Otherwise, we will decompress using only one decompression task.
- const size_t chunkSize = ZSTD_DStreamInSize();
- auto status = FileStatus::Continue;
- while (status == FileStatus::Continue && !state.errorHolder.hasError()) {
- // Make a new input queue that we will put the frames's bytes into.
- auto in = std::make_shared<BufferWorkQueue>();
- auto inGuard = makeScopeGuard([&] { in->finish(); });
- // Make a output queue that decompress will put the decompressed data into
- auto out = std::make_shared<BufferWorkQueue>();
-
- size_t frameSize;
- {
- // Calculate the size of the next frame.
- // frameSize is 0 if the frame info can't be decoded.
- Buffer buffer(SkippableFrame::kSize);
- auto bytesRead = std::fread(buffer.data(), 1, buffer.size(), fd);
- totalBytesRead += bytesRead;
- status = fileStatus(fd);
- if (bytesRead == 0 && status != FileStatus::Continue) {
- break;
- }
- buffer.subtract(buffer.size() - bytesRead);
- frameSize = SkippableFrame::tryRead(buffer.range());
- in->push(std::move(buffer));
- }
- if (frameSize == 0) {
- // We hit a non SkippableFrame, so this will be the last job.
- // Make sure that we don't use too much memory
- in->setMaxSize(64);
- out->setMaxSize(64);
- }
- // Start decompression in the thread pool
- executor.add([&state, in, out] {
- return decompress(state, std::move(in), std::move(out));
- });
- // Pass the output queue to the writer thread
- frames.push(std::move(out));
- if (frameSize == 0) {
- // We hit a non SkippableFrame ==> not compressed by pzstd or corrupted
- // Pass the rest of the source to this decompression task
- state.log(VERBOSE, "%s\n",
- "Input not in pzstd format, falling back to serial decompression");
- while (status == FileStatus::Continue && !state.errorHolder.hasError()) {
- status = readData(*in, chunkSize, chunkSize, fd, &totalBytesRead);
- }
- break;
- }
- state.log(VERBOSE, "Decompressing a frame of size %zu", frameSize);
- // Fill the input queue for the decompression job we just started
- status = readData(*in, chunkSize, frameSize, fd, &totalBytesRead);
- }
- state.errorHolder.check(status != FileStatus::Error, "Error reading input");
- return totalBytesRead;
-}
-
-/// Write `data` to `fd`, returns true iff success.
-static bool writeData(ByteRange data, FILE* fd) {
- while (!data.empty()) {
- data.advance(std::fwrite(data.begin(), 1, data.size(), fd));
- if (std::ferror(fd)) {
- return false;
- }
- }
- return true;
-}
-
-std::uint64_t writeFile(
- SharedState& state,
- WorkQueue<std::shared_ptr<BufferWorkQueue>>& outs,
- FILE* outputFd,
- bool decompress) {
- auto& errorHolder = state.errorHolder;
- auto lineClearGuard = makeScopeGuard([&state] {
- state.log.clear(INFO);
- });
- std::uint64_t bytesWritten = 0;
- std::shared_ptr<BufferWorkQueue> out;
- // Grab the output queue for each decompression job (in order).
- while (outs.pop(out)) {
- if (errorHolder.hasError()) {
- continue;
- }
- if (!decompress) {
- // If we are compressing and want to write skippable frames we can't
- // start writing before compression is done because we need to know the
- // compressed size.
- // Wait for the compressed size to be available and write skippable frame
- SkippableFrame frame(out->size());
- if (!writeData(frame.data(), outputFd)) {
- errorHolder.setError("Failed to write output");
- return bytesWritten;
- }
- bytesWritten += frame.kSize;
- }
- // For each chunk of the frame: Pop it from the queue and write it
- Buffer buffer;
- while (out->pop(buffer) && !errorHolder.hasError()) {
- if (!writeData(buffer.range(), outputFd)) {
- errorHolder.setError("Failed to write output");
- return bytesWritten;
- }
- bytesWritten += buffer.size();
- state.log.update(INFO, "Written: %u MB ",
- static_cast<std::uint32_t>(bytesWritten >> 20));
- }
- }
- return bytesWritten;
-}
-}
diff --git a/contrib/pzstd/Pzstd.h b/contrib/pzstd/Pzstd.h
deleted file mode 100644
index 79d1fcca2653..000000000000
--- a/contrib/pzstd/Pzstd.h
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- * Copyright (c) 2016-present, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- */
-#pragma once
-
-#include "ErrorHolder.h"
-#include "Logging.h"
-#include "Options.h"
-#include "utils/Buffer.h"
-#include "utils/Range.h"
-#include "utils/ResourcePool.h"
-#include "utils/ThreadPool.h"
-#include "utils/WorkQueue.h"
-#define ZSTD_STATIC_LINKING_ONLY
-#include "zstd.h"
-#undef ZSTD_STATIC_LINKING_ONLY
-
-#include <cstddef>
-#include <cstdint>
-#include <memory>
-
-namespace pzstd {
-/**
- * Runs pzstd with `options` and returns the number of bytes written.
- * An error occurred if `errorHandler.hasError()`.
- *
- * @param options The pzstd options to use for (de)compression
- * @returns 0 upon success and non-zero on failure.
- */
-int pzstdMain(const Options& options);
-
-class SharedState {
- public:
- SharedState(const Options& options) : log(options.verbosity) {
- if (!options.decompress) {
- auto parameters = options.determineParameters();
- cStreamPool.reset(new ResourcePool<ZSTD_CStream>{
- [this, parameters]() -> ZSTD_CStream* {
- this->log(VERBOSE, "%s\n", "Creating new ZSTD_CStream");
- auto zcs = ZSTD_createCStream();
- if (zcs) {
- auto err = ZSTD_initCStream_advanced(
- zcs, nullptr, 0, parameters, 0);
- if (ZSTD_isError(err)) {
- ZSTD_freeCStream(zcs);
- return nullptr;
- }
- }
- return zcs;
- },
- [](ZSTD_CStream *zcs) {
- ZSTD_freeCStream(zcs);
- }});
- } else {
- dStreamPool.reset(new ResourcePool<ZSTD_DStream>{
- [this]() -> ZSTD_DStream* {
- this->log(VERBOSE, "%s\n", "Creating new ZSTD_DStream");
- auto zds = ZSTD_createDStream();
- if (zds) {
- auto err = ZSTD_initDStream(zds);
- if (ZSTD_isError(err)) {
- ZSTD_freeDStream(zds);
- return nullptr;
- }
- }
- return zds;
- },
- [](ZSTD_DStream *zds) {
- ZSTD_freeDStream(zds);
- }});
- }
- }
-
- ~SharedState() {
- // The resource pools have references to this, so destroy them first.
- cStreamPool.reset();
- dStreamPool.reset();
- }
-
- Logger log;
- ErrorHolder errorHolder;
- std::unique_ptr<ResourcePool<ZSTD_CStream>> cStreamPool;
- std::unique_ptr<ResourcePool<ZSTD_DStream>> dStreamPool;
-};
-
-/**
- * Streams input from `fd`, breaks input up into chunks, and compresses each
- * chunk independently. Output of each chunk gets streamed to a queue, and
- * the output queues get put into `chunks` in order.
- *
- * @param state The shared state
- * @param chunks Each compression jobs output queue gets `pushed()` here
- * as soon as it is available
- * @param executor The thread pool to run compression jobs in
- * @param fd The input file descriptor
- * @param size The size of the input file if known, 0 otherwise
- * @param numThreads The number of threads in the thread pool
- * @param parameters The zstd parameters to use for compression
- * @returns The number of bytes read from the file
- */
-std::uint64_t asyncCompressChunks(
- SharedState& state,
- WorkQueue<std::shared_ptr<BufferWorkQueue>>& chunks,
- ThreadPool& executor,
- FILE* fd,
- std::uintmax_t size,
- std::size_t numThreads,
- ZSTD_parameters parameters);
-
-/**
- * Streams input from `fd`. If pzstd headers are available it breaks the input
- * up into independent frames. It sends each frame to an independent
- * decompression job. Output of each frame gets streamed to a queue, and
- * the output queues get put into `frames` in order.
- *
- * @param state The shared state
- * @param frames Each decompression jobs output queue gets `pushed()` here
- * as soon as it is available
- * @param executor The thread pool to run compression jobs in
- * @param fd The input file descriptor
- * @returns The number of bytes read from the file
- */
-std::uint64_t asyncDecompressFrames(
- SharedState& state,
- WorkQueue<std::shared_ptr<BufferWorkQueue>>& frames,
- ThreadPool& executor,
- FILE* fd);
-
-/**
- * Streams input in from each queue in `outs` in order, and writes the data to
- * `outputFd`.
- *
- * @param state The shared state
- * @param outs A queue of output queues, one for each
- * (de)compression job.
- * @param outputFd The file descriptor to write to
- * @param decompress Are we decompressing?
- * @returns The number of bytes written
- */
-std::uint64_t writeFile(
- SharedState& state,
- WorkQueue<std::shared_ptr<BufferWorkQueue>>& outs,
- FILE* outputFd,
- bool decompress);
-}
diff --git a/contrib/pzstd/README.md b/contrib/pzstd/README.md
deleted file mode 100644
index 84d945815838..000000000000
--- a/contrib/pzstd/README.md
+++ /dev/null
@@ -1,56 +0,0 @@
-# Parallel Zstandard (PZstandard)
-
-Parallel Zstandard is a Pigz-like tool for Zstandard.
-It provides Zstandard format compatible compression and decompression that is able to utilize multiple cores.
-It breaks the input up into equal sized chunks and compresses each chunk independently into a Zstandard frame.
-It then concatenates the frames together to produce the final compressed output.
-Pzstandard will write a 12 byte header for each frame that is a skippable frame in the Zstandard format, which tells PZstandard the size of the next compressed frame.
-PZstandard supports parallel decompression of files compressed with PZstandard.
-When decompressing files compressed with Zstandard, PZstandard does IO in one thread, and decompression in another.
-
-## Usage
-
-PZstandard supports the same command line interface as Zstandard, but also provides the `-p` option to specify the number of threads.
-Dictionary mode is not currently supported.
-
-Basic usage
-
- pzstd input-file -o output-file -p num-threads -# # Compression
- pzstd -d input-file -o output-file -p num-threads # Decompression
-
-PZstandard also supports piping and fifo pipes
-
- cat input-file | pzstd -p num-threads -# -c > /dev/null
-
-For more options
-
- pzstd --help
-
-PZstandard tries to pick a smart default number of threads if not specified (displayed in `pzstd --help`).
-If this number is not suitable, during compilation you can define `PZSTD_NUM_THREADS` to the number of threads you prefer.
-
-## Benchmarks
-
-As a reference, PZstandard and Pigz were compared on an Intel Core i7 @ 3.1 GHz, each using 4 threads, with the [Silesia compression corpus](http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia).
-
-Compression Speed vs Ratio with 4 Threads | Decompression Speed with 4 Threads
-------------------------------------------|-----------------------------------
-![Compression Speed vs Ratio](images/Cspeed.png "Compression Speed vs Ratio") | ![Decompression Speed](images/Dspeed.png "Decompression Speed")
-
-The test procedure was to run each of the following commands 2 times for each compression level, and take the minimum time.
-
- time pzstd -# -p 4 -c silesia.tar > silesia.tar.zst
- time pzstd -d -p 4 -c silesia.tar.zst > /dev/null
-
- time pigz -# -p 4 -k -c silesia.tar > silesia.tar.gz
- time pigz -d -p 4 -k -c silesia.tar.gz > /dev/null
-
-PZstandard was tested using compression levels 1-19, and Pigz was tested using compression levels 1-9.
-Pigz cannot do parallel decompression, it simply does each of reading, decompression, and writing on separate threads.
-
-## Tests
-
-Tests require that you have [gtest](https://github.com/google/googletest) installed.
-Set `GTEST_INC` and `GTEST_LIB` in `Makefile` to specify the location of the gtest headers and libraries.
-Alternatively, run `make googletest`, which will clone googletest and build it.
-Run `make tests && make check` to run tests.
diff --git a/contrib/pzstd/SkippableFrame.cpp b/contrib/pzstd/SkippableFrame.cpp
deleted file mode 100644
index 769866dfc815..000000000000
--- a/contrib/pzstd/SkippableFrame.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (c) 2016-present, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- */
-#include "SkippableFrame.h"
-#include "mem.h"
-#include "utils/Range.h"
-
-#include <cstdio>
-
-using namespace pzstd;
-
-SkippableFrame::SkippableFrame(std::uint32_t size) : frameSize_(size) {
- MEM_writeLE32(data_.data(), kSkippableFrameMagicNumber);
- MEM_writeLE32(data_.data() + 4, kFrameContentsSize);
- MEM_writeLE32(data_.data() + 8, frameSize_);
-}
-
-/* static */ std::size_t SkippableFrame::tryRead(ByteRange bytes) {
- if (bytes.size() < SkippableFrame::kSize ||
- MEM_readLE32(bytes.begin()) != kSkippableFrameMagicNumber ||
- MEM_readLE32(bytes.begin() + 4) != kFrameContentsSize) {
- return 0;
- }
- return MEM_readLE32(bytes.begin() + 8);
-}
diff --git a/contrib/pzstd/SkippableFrame.h b/contrib/pzstd/SkippableFrame.h
deleted file mode 100644
index 60deed0405be..000000000000
--- a/contrib/pzstd/SkippableFrame.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2016-present, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- */
-#pragma once
-
-#include "utils/Range.h"
-
-#include <array>
-#include <cstddef>
-#include <cstdint>
-#include <cstdio>
-
-namespace pzstd {
-/**
- * We put a skippable frame before each frame.
- * It contains a skippable frame magic number, the size of the skippable frame,
- * and the size of the next frame.
- * Each skippable frame is exactly 12 bytes in little endian format.
- * The first 8 bytes are for compatibility with the ZSTD format.
- * If we have N threads, the output will look like
- *
- * [0x184D2A50|4|size1] [frame1 of size size1]
- * [0x184D2A50|4|size2] [frame2 of size size2]
- * ...
- * [0x184D2A50|4|sizeN] [frameN of size sizeN]
- *
- * Each sizeX is 4 bytes.
- *
- * These skippable frames should allow us to skip through the compressed file
- * and only load at most N pages.
- */
-class SkippableFrame {
- public:
- static constexpr std::size_t kSize = 12;
-
- private:
- std::uint32_t frameSize_;
- std::array<std::uint8_t, kSize> data_;
- static constexpr std::uint32_t kSkippableFrameMagicNumber = 0x184D2A50;
- // Could be improved if the size fits in less bytes
- static constexpr std::uint32_t kFrameContentsSize = kSize - 8;
-
- public:
- // Write the skippable frame to data_ in LE format.
- explicit SkippableFrame(std::uint32_t size);
-
- // Read the skippable frame from bytes in LE format.
- static std::size_t tryRead(ByteRange bytes);
-
- ByteRange data() const {
- return {data_.data(), data_.size()};
- }
-
- // Size of the next frame.
- std::size_t frameSize() const {
- return frameSize_;
- }
-};
-}
diff --git a/contrib/pzstd/images/Cspeed.png b/contrib/pzstd/images/Cspeed.png
deleted file mode 100644
index aca4f663ea2e..000000000000
--- a/contrib/pzstd/images/Cspeed.png
+++ /dev/null
Binary files differ
diff --git a/contrib/pzstd/images/Dspeed.png b/contrib/pzstd/images/Dspeed.png
deleted file mode 100644
index e48881bcd05b..000000000000
--- a/contrib/pzstd/images/Dspeed.png
+++ /dev/null
Binary files differ
diff --git a/contrib/pzstd/main.cpp b/contrib/pzstd/main.cpp
deleted file mode 100644
index b93f043b16b1..000000000000
--- a/contrib/pzstd/main.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (c) 2016-present, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- */
-#include "ErrorHolder.h"
-#include "Options.h"
-#include "Pzstd.h"
-
-using namespace pzstd;
-
-int main(int argc, const char** argv) {
- Options options;
- switch (options.parse(argc, argv)) {
- case Options::Status::Failure:
- return 1;
- case Options::Status::Message:
- return 0;
- default:
- break;
- }
-
- return pzstdMain(options);
-}
diff --git a/contrib/pzstd/test/BUCK b/contrib/pzstd/test/BUCK
deleted file mode 100644
index 6d3fdd3c269b..000000000000
--- a/contrib/pzstd/test/BUCK
+++ /dev/null
@@ -1,37 +0,0 @@
-cxx_test(
- name='options_test',
- srcs=['OptionsTest.cpp'],
- deps=['//contrib/pzstd:options'],
-)
-
-cxx_test(
- name='pzstd_test',
- srcs=['PzstdTest.cpp'],
- deps=[
- ':round_trip',
- '//contrib/pzstd:libpzstd',
- '//contrib/pzstd/utils:scope_guard',
- '//programs:datagen',
- ],
-)
-
-cxx_binary(
- name='round_trip_test',
- srcs=['RoundTripTest.cpp'],
- deps=[
- ':round_trip',
- '//contrib/pzstd/utils:scope_guard',
- '//programs:datagen',
- ]
-)
-
-cxx_library(
- name='round_trip',
- header_namespace='test',
- exported_headers=['RoundTrip.h'],
- deps=[
- '//contrib/pzstd:libpzstd',
- '//contrib/pzstd:options',
- '//contrib/pzstd/utils:scope_guard',
- ]
-)
diff --git a/contrib/pzstd/test/OptionsTest.cpp b/contrib/pzstd/test/OptionsTest.cpp
deleted file mode 100644
index e601148255d4..000000000000
--- a/contrib/pzstd/test/OptionsTest.cpp
+++ /dev/null
@@ -1,536 +0,0 @@
-/*
- * Copyright (c) 2016-present, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- */
-#include "Options.h"
-
-#include <array>
-#include <gtest/gtest.h>
-
-using namespace pzstd;
-
-namespace pzstd {
-bool operator==(const Options &lhs, const Options &rhs) {
- return lhs.numThreads == rhs.numThreads &&
- lhs.maxWindowLog == rhs.maxWindowLog &&
- lhs.compressionLevel == rhs.compressionLevel &&
- lhs.decompress == rhs.decompress && lhs.inputFiles == rhs.inputFiles &&
- lhs.outputFile == rhs.outputFile && lhs.overwrite == rhs.overwrite &&
- lhs.keepSource == rhs.keepSource && lhs.writeMode == rhs.writeMode &&
- lhs.checksum == rhs.checksum && lhs.verbosity == rhs.verbosity;
-}
-
-std::ostream &operator<<(std::ostream &out, const Options &opt) {
- out << "{";
- {
- out << "\n\t"
- << "numThreads: " << opt.numThreads;
- out << ",\n\t"
- << "maxWindowLog: " << opt.maxWindowLog;
- out << ",\n\t"
- << "compressionLevel: " << opt.compressionLevel;
- out << ",\n\t"
- << "decompress: " << opt.decompress;
- out << ",\n\t"
- << "inputFiles: {";
- {
- bool first = true;
- for (const auto &file : opt.inputFiles) {
- if (!first) {
- out << ",";
- }
- first = false;
- out << "\n\t\t" << file;
- }
- }
- out << "\n\t}";
- out << ",\n\t"
- << "outputFile: " << opt.outputFile;
- out << ",\n\t"
- << "overwrite: " << opt.overwrite;
- out << ",\n\t"
- << "keepSource: " << opt.keepSource;
- out << ",\n\t"
- << "writeMode: " << static_cast<int>(opt.writeMode);
- out << ",\n\t"
- << "checksum: " << opt.checksum;
- out << ",\n\t"
- << "verbosity: " << opt.verbosity;
- }
- out << "\n}";
- return out;
-}
-}
-
-namespace {
-#ifdef _WIN32
-const char nullOutput[] = "nul";
-#else
-const char nullOutput[] = "/dev/null";
-#endif
-
-constexpr auto autoMode = Options::WriteMode::Auto;
-} // anonymous namespace
-
-#define EXPECT_SUCCESS(...) EXPECT_EQ(Options::Status::Success, __VA_ARGS__)
-#define EXPECT_FAILURE(...) EXPECT_EQ(Options::Status::Failure, __VA_ARGS__)
-#define EXPECT_MESSAGE(...) EXPECT_EQ(Options::Status::Message, __VA_ARGS__)
-
-template <typename... Args>
-std::array<const char *, sizeof...(Args) + 1> makeArray(Args... args) {
- return {{nullptr, args...}};
-}
-
-TEST(Options, ValidInputs) {
- {
- Options options;
- auto args = makeArray("--processes", "5", "-o", "x", "y", "-f");
- EXPECT_SUCCESS(options.parse(args.size(), args.data()));
- Options expected = {5, 23, 3, false, {"y"}, "x",
- true, true, autoMode, true, 2};
- EXPECT_EQ(expected, options);
- }
- {
- Options options;
- auto args = makeArray("-p", "1", "input", "-19");
- EXPECT_SUCCESS(options.parse(args.size(), args.data()));
- Options expected = {1, 23, 19, false, {"input"}, "",
- false, true, autoMode, true, 2};
- EXPECT_EQ(expected, options);
- }
- {
- Options options;
- auto args =
- makeArray("--ultra", "-22", "-p", "1", "-o", "x", "-d", "x.zst", "-f");
- EXPECT_SUCCESS(options.parse(args.size(), args.data()));
- Options expected = {1, 0, 22, true, {"x.zst"}, "x",
- true, true, autoMode, true, 2};
- EXPECT_EQ(expected, options);
- }
- {
- Options options;
- auto args = makeArray("--processes", "100", "hello.zst", "--decompress",
- "--force");
- EXPECT_SUCCESS(options.parse(args.size(), args.data()));
- Options expected = {100, 23, 3, true, {"hello.zst"}, "", true,
- true, autoMode, true, 2};
- EXPECT_EQ(expected, options);
- }
- {
- Options options;
- auto args = makeArray("x", "-dp", "1", "-c");
- EXPECT_SUCCESS(options.parse(args.size(), args.data()));
- Options expected = {1, 23, 3, true, {"x"}, "-",
- false, true, autoMode, true, 2};
- EXPECT_EQ(expected, options);
- }
- {
- Options options;
- auto args = makeArray("x", "-dp", "1", "--stdout");
- EXPECT_SUCCESS(options.parse(args.size(), args.data()));
- Options expected = {1, 23, 3, true, {"x"}, "-",
- false, true, autoMode, true, 2};
- EXPECT_EQ(expected, options);
- }
- {
- Options options;
- auto args = makeArray("-p", "1", "x", "-5", "-fo", "-", "--ultra", "-d");
- EXPECT_SUCCESS(options.parse(args.size(), args.data()));
- Options expected = {1, 0, 5, true, {"x"}, "-",
- true, true, autoMode, true, 2};
- EXPECT_EQ(expected, options);
- }
- {
- Options options;
- auto args = makeArray("silesia.tar", "-o", "silesia.tar.pzstd", "-p", "2");
- EXPECT_SUCCESS(options.parse(args.size(), args.data()));
- Options expected = {2,
- 23,
- 3,
- false,
- {"silesia.tar"},
- "silesia.tar.pzstd",
- false,
- true,
- autoMode,
- true,
- 2};
- EXPECT_EQ(expected, options);
- }
- {
- Options options;
- auto args = makeArray("x", "-p", "1");
- EXPECT_SUCCESS(options.parse(args.size(), args.data()));
- }
- {
- Options options;
- auto args = makeArray("x", "-p", "1");
- EXPECT_SUCCESS(options.parse(args.size(), args.data()));
- }
-}
-
-TEST(Options, GetOutputFile) {
- {
- Options options;
- auto args = makeArray("x");
- EXPECT_SUCCESS(options.parse(args.size(), args.data()));
- EXPECT_EQ("x.zst", options.getOutputFile(options.inputFiles[0]));
- }
- {
- Options options;
- auto args = makeArray("x", "y", "-o", nullOutput);
- EXPECT_SUCCESS(options.parse(args.size(), args.data()));
- EXPECT_EQ(nullOutput, options.getOutputFile(options.inputFiles[0]));
- }
- {
- Options options;
- auto args = makeArray("x.zst", "-do", nullOutput);
- EXPECT_SUCCESS(options.parse(args.size(), args.data()));
- EXPECT_EQ(nullOutput, options.getOutputFile(options.inputFiles[0]));
- }
- {
- Options options;
- auto args = makeArray("x.zst", "-d");
- EXPECT_SUCCESS(options.parse(args.size(), args.data()));
- EXPECT_EQ("x", options.getOutputFile(options.inputFiles[0]));
- }
- {
- Options options;
- auto args = makeArray("xzst", "-d");
- EXPECT_SUCCESS(options.parse(args.size(), args.data()));
- EXPECT_EQ("", options.getOutputFile(options.inputFiles[0]));
- }
- {
- Options options;
- auto args = makeArray("xzst", "-doxx");
- EXPECT_SUCCESS(options.parse(args.size(), args.data()));
- EXPECT_EQ("xx", options.getOutputFile(options.inputFiles[0]));
- }
-}
-
-TEST(Options, MultipleFiles) {
- {
- Options options;
- auto args = makeArray("x", "y", "z");
- EXPECT_SUCCESS(options.parse(args.size(), args.data()));
- Options expected;
- expected.inputFiles = {"x", "y", "z"};
- expected.verbosity = 1;
- EXPECT_EQ(expected, options);
- }
- {
- Options options;
- auto args = makeArray("x", "y", "z", "-o", nullOutput);
- EXPECT_SUCCESS(options.parse(args.size(), args.data()));
- Options expected;
- expected.inputFiles = {"x", "y", "z"};
- expected.outputFile = nullOutput;
- expected.verbosity = 1;
- EXPECT_EQ(expected, options);
- }
- {
- Options options;
- auto args = makeArray("x", "y", "-o-");
- EXPECT_FAILURE(options.parse(args.size(), args.data()));
- }
- {
- Options options;
- auto args = makeArray("x", "y", "-o", "file");
- EXPECT_FAILURE(options.parse(args.size(), args.data()));
- }
- {
- Options options;
- auto args = makeArray("-qqvd12qp4", "-f", "x", "--", "--rm", "-c");
- EXPECT_SUCCESS(options.parse(args.size(), args.data()));
- Options expected = {4, 23, 12, true, {"x", "--rm", "-c"},
- "", true, true, autoMode, true,
- 0};
- EXPECT_EQ(expected, options);
- }
-}
-
-TEST(Options, NumThreads) {
- {
- Options options;
- auto args = makeArray("x", "-dfo", "-");
- EXPECT_SUCCESS(options.parse(args.size(), args.data()));
- }
- {
- Options options;
- auto args = makeArray("x", "-p", "0", "-fo", "-");
- EXPECT_FAILURE(options.parse(args.size(), args.data()));
- }
- {
- Options options;
- auto args = makeArray("-f", "-p", "-o", "-");
- EXPECT_FAILURE(options.parse(args.size(), args.data()));
- }
-}
-
-TEST(Options, BadCompressionLevel) {
- {
- Options options;
- auto args = makeArray("x", "-20");
- EXPECT_FAILURE(options.parse(args.size(), args.data()));
- }
- {
- Options options;
- auto args = makeArray("x", "--ultra", "-23");
- EXPECT_FAILURE(options.parse(args.size(), args.data()));
- }
- {
- Options options;
- auto args = makeArray("x", "--1"); // negative 1?
- EXPECT_FAILURE(options.parse(args.size(), args.data()));
- }
-}
-
-TEST(Options, InvalidOption) {
- {
- Options options;
- auto args = makeArray("x", "-x");
- EXPECT_FAILURE(options.parse(args.size(), args.data()));
- }
-}
-
-TEST(Options, BadOutputFile) {
- {
- Options options;
- auto args = makeArray("notzst", "-d", "-p", "1");
- EXPECT_SUCCESS(options.parse(args.size(), args.data()));
- EXPECT_EQ("", options.getOutputFile(options.inputFiles.front()));
- }
-}
-
-TEST(Options, BadOptionsWithArguments) {
- {
- Options options;
- auto args = makeArray("x", "-pf");
- EXPECT_FAILURE(options.parse(args.size(), args.data()));
- }
- {
- Options options;
- auto args = makeArray("x", "-p", "10f");
- EXPECT_FAILURE(options.parse(args.size(), args.data()));
- }
- {
- Options options;
- auto args = makeArray("x", "-p");
- EXPECT_FAILURE(options.parse(args.size(), args.data()));
- }
- {
- Options options;
- auto args = makeArray("x", "-o");
- EXPECT_FAILURE(options.parse(args.size(), args.data()));
- }
- {
- Options options;
- auto args = makeArray("x", "-o");
- EXPECT_FAILURE(options.parse(args.size(), args.data()));
- }
-}
-
-TEST(Options, KeepSource) {
- {
- Options options;
- auto args = makeArray("x", "--rm", "-k");
- EXPECT_SUCCESS(options.parse(args.size(), args.data()));
- EXPECT_EQ(true, options.keepSource);
- }
- {
- Options options;
- auto args = makeArray("x", "--rm", "--keep");
- EXPECT_SUCCESS(options.parse(args.size(), args.data()));
- EXPECT_EQ(true, options.keepSource);
- }
- {
- Options options;
- auto args = makeArray("x");
- EXPECT_SUCCESS(options.parse(args.size(), args.data()));
- EXPECT_EQ(true, options.keepSource);
- }
- {
- Options options;
- auto args = makeArray("x", "--rm");
- EXPECT_SUCCESS(options.parse(args.size(), args.data()));
- EXPECT_EQ(false, options.keepSource);
- }
-}
-
-TEST(Options, Verbosity) {
- {
- Options options;
- auto args = makeArray("x");
- EXPECT_SUCCESS(options.parse(args.size(), args.data()));
- EXPECT_EQ(2, options.verbosity);
- }
- {
- Options options;
- auto args = makeArray("--quiet", "-qq", "x");
- EXPECT_SUCCESS(options.parse(args.size(), args.data()));
- EXPECT_EQ(-1, options.verbosity);
- }
- {
- Options options;
- auto args = makeArray("x", "y");
- EXPECT_SUCCESS(options.parse(args.size(), args.data()));
- EXPECT_EQ(1, options.verbosity);
- }
- {
- Options options;
- auto args = makeArray("--", "x", "y");
- EXPECT_SUCCESS(options.parse(args.size(), args.data()));
- EXPECT_EQ(1, options.verbosity);
- }
- {
- Options options;
- auto args = makeArray("-qv", "x", "y");
- EXPECT_SUCCESS(options.parse(args.size(), args.data()));
- EXPECT_EQ(1, options.verbosity);
- }
- {
- Options options;
- auto args = makeArray("-v", "x", "y");
- EXPECT_SUCCESS(options.parse(args.size(), args.data()));
- EXPECT_EQ(3, options.verbosity);
- }
- {
- Options options;
- auto args = makeArray("-v", "x");
- EXPECT_SUCCESS(options.parse(args.size(), args.data()));
- EXPECT_EQ(3, options.verbosity);
- }
-}
-
-TEST(Options, TestMode) {
- {
- Options options;
- auto args = makeArray("x", "-t");
- EXPECT_SUCCESS(options.parse(args.size(), args.data()));
- EXPECT_EQ(true, options.keepSource);
- EXPECT_EQ(true, options.decompress);
- EXPECT_EQ(nullOutput, options.outputFile);
- }
- {
- Options options;
- auto args = makeArray("x", "--test", "--rm", "-ohello");
- EXPECT_SUCCESS(options.parse(args.size(), args.data()));
- EXPECT_EQ(true, options.keepSource);
- EXPECT_EQ(true, options.decompress);
- EXPECT_EQ(nullOutput, options.outputFile);
- }
-}
-
-TEST(Options, Checksum) {
- {
- Options options;
- auto args = makeArray("x.zst", "--no-check", "-Cd");
- EXPECT_SUCCESS(options.parse(args.size(), args.data()));
- EXPECT_EQ(true, options.checksum);
- }
- {
- Options options;
- auto args = makeArray("x");
- EXPECT_SUCCESS(options.parse(args.size(), args.data()));
- EXPECT_EQ(true, options.checksum);
- }
- {
- Options options;
- auto args = makeArray("x", "--no-check", "--check");
- EXPECT_SUCCESS(options.parse(args.size(), args.data()));
- EXPECT_EQ(true, options.checksum);
- }
- {
- Options options;
- auto args = makeArray("x", "--no-check");
- EXPECT_SUCCESS(options.parse(args.size(), args.data()));
- EXPECT_EQ(false, options.checksum);
- }
-}
-
-TEST(Options, InputFiles) {
- {
- Options options;
- auto args = makeArray("-cd");
- options.parse(args.size(), args.data());
- EXPECT_EQ(1, options.inputFiles.size());
- EXPECT_EQ("-", options.inputFiles[0]);
- EXPECT_EQ("-", options.outputFile);
- }
- {
- Options options;
- auto args = makeArray();
- options.parse(args.size(), args.data());
- EXPECT_EQ(1, options.inputFiles.size());
- EXPECT_EQ("-", options.inputFiles[0]);
- EXPECT_EQ("-", options.outputFile);
- }
- {
- Options options;
- auto args = makeArray("-d");
- options.parse(args.size(), args.data());
- EXPECT_EQ(1, options.inputFiles.size());
- EXPECT_EQ("-", options.inputFiles[0]);
- EXPECT_EQ("-", options.outputFile);
- }
- {
- Options options;
- auto args = makeArray("x", "-");
- EXPECT_FAILURE(options.parse(args.size(), args.data()));
- }
-}
-
-TEST(Options, InvalidOptions) {
- {
- Options options;
- auto args = makeArray("-ibasdf");
- EXPECT_FAILURE(options.parse(args.size(), args.data()));
- }
- {
- Options options;
- auto args = makeArray("- ");
- EXPECT_FAILURE(options.parse(args.size(), args.data()));
- }
- {
- Options options;
- auto args = makeArray("-n15");
- EXPECT_FAILURE(options.parse(args.size(), args.data()));
- }
- {
- Options options;
- auto args = makeArray("-0", "x");
- EXPECT_FAILURE(options.parse(args.size(), args.data()));
- }
-}
-
-TEST(Options, Extras) {
- {
- Options options;
- auto args = makeArray("-h");
- EXPECT_MESSAGE(options.parse(args.size(), args.data()));
- }
- {
- Options options;
- auto args = makeArray("-H");
- EXPECT_MESSAGE(options.parse(args.size(), args.data()));
- }
- {
- Options options;
- auto args = makeArray("-V");
- EXPECT_MESSAGE(options.parse(args.size(), args.data()));
- }
- {
- Options options;
- auto args = makeArray("--help");
- EXPECT_MESSAGE(options.parse(args.size(), args.data()));
- }
- {
- Options options;
- auto args = makeArray("--version");
- EXPECT_MESSAGE(options.parse(args.size(), args.data()));
- }
-}
diff --git a/contrib/pzstd/test/PzstdTest.cpp b/contrib/pzstd/test/PzstdTest.cpp
deleted file mode 100644
index 5c7d66310805..000000000000
--- a/contrib/pzstd/test/PzstdTest.cpp
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- * Copyright (c) 2016-present, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- */
-#include "Pzstd.h"
-extern "C" {
-#include "datagen.h"
-}
-#include "test/RoundTrip.h"
-#include "utils/ScopeGuard.h"
-
-#include <cstddef>
-#include <cstdio>
-#include <gtest/gtest.h>
-#include <memory>
-#include <random>
-
-using namespace std;
-using namespace pzstd;
-
-TEST(Pzstd, SmallSizes) {
- unsigned seed = std::random_device{}();
- std::fprintf(stderr, "Pzstd.SmallSizes seed: %u\n", seed);
- std::mt19937 gen(seed);
-
- for (unsigned len = 1; len < 256; ++len) {
- if (len % 16 == 0) {
- std::fprintf(stderr, "%u / 16\n", len / 16);
- }
- std::string inputFile = std::tmpnam(nullptr);
- auto guard = makeScopeGuard([&] { std::remove(inputFile.c_str()); });
- {
- static uint8_t buf[256];
- RDG_genBuffer(buf, len, 0.5, 0.0, gen());
- auto fd = std::fopen(inputFile.c_str(), "wb");
- auto written = std::fwrite(buf, 1, len, fd);
- std::fclose(fd);
- ASSERT_EQ(written, len);
- }
- for (unsigned numThreads = 1; numThreads <= 2; ++numThreads) {
- for (unsigned level = 1; level <= 4; level *= 4) {
- auto errorGuard = makeScopeGuard([&] {
- std::fprintf(stderr, "# threads: %u\n", numThreads);
- std::fprintf(stderr, "compression level: %u\n", level);
- });
- Options options;
- options.overwrite = true;
- options.inputFiles = {inputFile};
- options.numThreads = numThreads;
- options.compressionLevel = level;
- options.verbosity = 1;
- ASSERT_TRUE(roundTrip(options));
- errorGuard.dismiss();
- }
- }
- }
-}
-
-TEST(Pzstd, LargeSizes) {
- unsigned seed = std::random_device{}();
- std::fprintf(stderr, "Pzstd.LargeSizes seed: %u\n", seed);
- std::mt19937 gen(seed);
-
- for (unsigned len = 1 << 20; len <= (1 << 24); len *= 2) {
- std::string inputFile = std::tmpnam(nullptr);
- auto guard = makeScopeGuard([&] { std::remove(inputFile.c_str()); });
- {
- std::unique_ptr<uint8_t[]> buf(new uint8_t[len]);
- RDG_genBuffer(buf.get(), len, 0.5, 0.0, gen());
- auto fd = std::fopen(inputFile.c_str(), "wb");
- auto written = std::fwrite(buf.get(), 1, len, fd);
- std::fclose(fd);
- ASSERT_EQ(written, len);
- }
- for (unsigned numThreads = 1; numThreads <= 16; numThreads *= 4) {
- for (unsigned level = 1; level <= 4; level *= 4) {
- auto errorGuard = makeScopeGuard([&] {
- std::fprintf(stderr, "# threads: %u\n", numThreads);
- std::fprintf(stderr, "compression level: %u\n", level);
- });
- Options options;
- options.overwrite = true;
- options.inputFiles = {inputFile};
- options.numThreads = std::min(numThreads, options.numThreads);
- options.compressionLevel = level;
- options.verbosity = 1;
- ASSERT_TRUE(roundTrip(options));
- errorGuard.dismiss();
- }
- }
- }
-}
-
-TEST(Pzstd, DISABLED_ExtremelyLargeSize) {
- unsigned seed = std::random_device{}();
- std::fprintf(stderr, "Pzstd.ExtremelyLargeSize seed: %u\n", seed);
- std::mt19937 gen(seed);
-
- std::string inputFile = std::tmpnam(nullptr);
- auto guard = makeScopeGuard([&] { std::remove(inputFile.c_str()); });
-
- {
- // Write 4GB + 64 MB
- constexpr size_t kLength = 1 << 26;
- std::unique_ptr<uint8_t[]> buf(new uint8_t[kLength]);
- auto fd = std::fopen(inputFile.c_str(), "wb");
- auto closeGuard = makeScopeGuard([&] { std::fclose(fd); });
- for (size_t i = 0; i < (1 << 6) + 1; ++i) {
- RDG_genBuffer(buf.get(), kLength, 0.5, 0.0, gen());
- auto written = std::fwrite(buf.get(), 1, kLength, fd);
- if (written != kLength) {
- std::fprintf(stderr, "Failed to write file, skipping test\n");
- return;
- }
- }
- }
-
- Options options;
- options.overwrite = true;
- options.inputFiles = {inputFile};
- options.compressionLevel = 1;
- if (options.numThreads == 0) {
- options.numThreads = 1;
- }
- ASSERT_TRUE(roundTrip(options));
-}
-
-TEST(Pzstd, ExtremelyCompressible) {
- std::string inputFile = std::tmpnam(nullptr);
- auto guard = makeScopeGuard([&] { std::remove(inputFile.c_str()); });
- {
- std::unique_ptr<uint8_t[]> buf(new uint8_t[10000]);
- std::memset(buf.get(), 'a', 10000);
- auto fd = std::fopen(inputFile.c_str(), "wb");
- auto written = std::fwrite(buf.get(), 1, 10000, fd);
- std::fclose(fd);
- ASSERT_EQ(written, 10000);
- }
- Options options;
- options.overwrite = true;
- options.inputFiles = {inputFile};
- options.numThreads = 1;
- options.compressionLevel = 1;
- ASSERT_TRUE(roundTrip(options));
-}
diff --git a/contrib/pzstd/test/RoundTrip.h b/contrib/pzstd/test/RoundTrip.h
deleted file mode 100644
index c6364ecb4227..000000000000
--- a/contrib/pzstd/test/RoundTrip.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2016-present, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- */
-#pragma once
-
-#include "Options.h"
-#include "Pzstd.h"
-#include "utils/ScopeGuard.h"
-
-#include <cstdio>
-#include <string>
-#include <cstdint>
-#include <memory>
-
-namespace pzstd {
-
-inline bool check(std::string source, std::string decompressed) {
- std::unique_ptr<std::uint8_t[]> sBuf(new std::uint8_t[1024]);
- std::unique_ptr<std::uint8_t[]> dBuf(new std::uint8_t[1024]);
-
- auto sFd = std::fopen(source.c_str(), "rb");
- auto dFd = std::fopen(decompressed.c_str(), "rb");
- auto guard = makeScopeGuard([&] {
- std::fclose(sFd);
- std::fclose(dFd);
- });
-
- size_t sRead, dRead;
-
- do {
- sRead = std::fread(sBuf.get(), 1, 1024, sFd);
- dRead = std::fread(dBuf.get(), 1, 1024, dFd);
- if (std::ferror(sFd) || std::ferror(dFd)) {
- return false;
- }
- if (sRead != dRead) {
- return false;
- }
-
- for (size_t i = 0; i < sRead; ++i) {
- if (sBuf.get()[i] != dBuf.get()[i]) {
- return false;
- }
- }
- } while (sRead == 1024);
- if (!std::feof(sFd) || !std::feof(dFd)) {
- return false;
- }
- return true;
-}
-
-inline bool roundTrip(Options& options) {
- if (options.inputFiles.size() != 1) {
- return false;
- }
- std::string source = options.inputFiles.front();
- std::string compressedFile = std::tmpnam(nullptr);
- std::string decompressedFile = std::tmpnam(nullptr);
- auto guard = makeScopeGuard([&] {
- std::remove(compressedFile.c_str());
- std::remove(decompressedFile.c_str());
- });
-
- {
- options.outputFile = compressedFile;
- options.decompress = false;
- if (pzstdMain(options) != 0) {
- return false;
- }
- }
- {
- options.decompress = true;
- options.inputFiles.front() = compressedFile;
- options.outputFile = decompressedFile;
- if (pzstdMain(options) != 0) {
- return false;
- }
- }
- return check(source, decompressedFile);
-}
-}
diff --git a/contrib/pzstd/test/RoundTripTest.cpp b/contrib/pzstd/test/RoundTripTest.cpp
deleted file mode 100644
index 36af0673ae6a..000000000000
--- a/contrib/pzstd/test/RoundTripTest.cpp
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2016-present, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- */
-extern "C" {
-#include "datagen.h"
-}
-#include "Options.h"
-#include "test/RoundTrip.h"
-#include "utils/ScopeGuard.h"
-
-#include <cstddef>
-#include <cstdio>
-#include <cstdlib>
-#include <memory>
-#include <random>
-
-using namespace std;
-using namespace pzstd;
-
-namespace {
-string
-writeData(size_t size, double matchProba, double litProba, unsigned seed) {
- std::unique_ptr<uint8_t[]> buf(new uint8_t[size]);
- RDG_genBuffer(buf.get(), size, matchProba, litProba, seed);
- string file = tmpnam(nullptr);
- auto fd = std::fopen(file.c_str(), "wb");
- auto guard = makeScopeGuard([&] { std::fclose(fd); });
- auto bytesWritten = std::fwrite(buf.get(), 1, size, fd);
- if (bytesWritten != size) {
- std::abort();
- }
- return file;
-}
-
-template <typename Generator>
-string generateInputFile(Generator& gen) {
- // Use inputs ranging from 1 Byte to 2^16 Bytes
- std::uniform_int_distribution<size_t> size{1, 1 << 16};
- std::uniform_real_distribution<> prob{0, 1};
- return writeData(size(gen), prob(gen), prob(gen), gen());
-}
-
-template <typename Generator>
-Options generateOptions(Generator& gen, const string& inputFile) {
- Options options;
- options.inputFiles = {inputFile};
- options.overwrite = true;
-
- std::uniform_int_distribution<unsigned> numThreads{1, 32};
- std::uniform_int_distribution<unsigned> compressionLevel{1, 10};
-
- options.numThreads = numThreads(gen);
- options.compressionLevel = compressionLevel(gen);
-
- return options;
-}
-}
-
-int main() {
- std::mt19937 gen(std::random_device{}());
-
- auto newlineGuard = makeScopeGuard([] { std::fprintf(stderr, "\n"); });
- for (unsigned i = 0; i < 10000; ++i) {
- if (i % 100 == 0) {
- std::fprintf(stderr, "Progress: %u%%\r", i / 100);
- }
- auto inputFile = generateInputFile(gen);
- auto inputGuard = makeScopeGuard([&] { std::remove(inputFile.c_str()); });
- for (unsigned i = 0; i < 10; ++i) {
- auto options = generateOptions(gen, inputFile);
- if (!roundTrip(options)) {
- std::fprintf(stderr, "numThreads: %u\n", options.numThreads);
- std::fprintf(stderr, "level: %u\n", options.compressionLevel);
- std::fprintf(stderr, "decompress? %u\n", (unsigned)options.decompress);
- std::fprintf(stderr, "file: %s\n", inputFile.c_str());
- return 1;
- }
- }
- }
- return 0;
-}
diff --git a/contrib/pzstd/utils/BUCK b/contrib/pzstd/utils/BUCK
deleted file mode 100644
index e757f412070b..000000000000
--- a/contrib/pzstd/utils/BUCK
+++ /dev/null
@@ -1,75 +0,0 @@
-cxx_library(
- name='buffer',
- visibility=['PUBLIC'],
- header_namespace='utils',
- exported_headers=['Buffer.h'],
- deps=[':range'],
-)
-
-cxx_library(
- name='file_system',
- visibility=['PUBLIC'],
- header_namespace='utils',
- exported_headers=['FileSystem.h'],
- deps=[':range'],
-)
-
-cxx_library(
- name='likely',
- visibility=['PUBLIC'],
- header_namespace='utils',
- exported_headers=['Likely.h'],
-)
-
-cxx_library(
- name='range',
- visibility=['PUBLIC'],
- header_namespace='utils',
- exported_headers=['Range.h'],
- deps=[':likely'],
-)
-
-cxx_library(
- name='resource_pool',
- visibility=['PUBLIC'],
- header_namespace='utils',
- exported_headers=['ResourcePool.h'],
-)
-
-cxx_library(
- name='scope_guard',
- visibility=['PUBLIC'],
- header_namespace='utils',
- exported_headers=['ScopeGuard.h'],
-)
-
-cxx_library(
- name='thread_pool',
- visibility=['PUBLIC'],
- header_namespace='utils',
- exported_headers=['ThreadPool.h'],
- deps=[':work_queue'],
-)
-
-cxx_library(
- name='work_queue',
- visibility=['PUBLIC'],
- header_namespace='utils',
- exported_headers=['WorkQueue.h'],
- deps=[':buffer'],
-)
-
-cxx_library(
- name='utils',
- visibility=['PUBLIC'],
- deps=[
- ':buffer',
- ':file_system',
- ':likely',
- ':range',
- ':resource_pool',
- ':scope_guard',
- ':thread_pool',
- ':work_queue',
- ],
-)
diff --git a/contrib/pzstd/utils/Buffer.h b/contrib/pzstd/utils/Buffer.h
deleted file mode 100644
index f69c3b4d9f7a..000000000000
--- a/contrib/pzstd/utils/Buffer.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (c) 2016-present, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- */
-#pragma once
-
-#include "utils/Range.h"
-
-#include <array>
-#include <cstddef>
-#include <memory>
-
-namespace pzstd {
-
-/**
- * A `Buffer` has a pointer to a shared buffer, and a range of the buffer that
- * it owns.
- * The idea is that you can allocate one buffer, and write chunks into it
- * and break off those chunks.
- * The underlying buffer is reference counted, and will be destroyed when all
- * `Buffer`s that reference it are destroyed.
- */
-class Buffer {
- std::shared_ptr<unsigned char> buffer_;
- MutableByteRange range_;
-
- static void delete_buffer(unsigned char* buffer) {
- delete[] buffer;
- }
-
- public:
- /// Construct an empty buffer that owns no data.
- explicit Buffer() {}
-
- /// Construct a `Buffer` that owns a new underlying buffer of size `size`.
- explicit Buffer(std::size_t size)
- : buffer_(new unsigned char[size], delete_buffer),
- range_(buffer_.get(), buffer_.get() + size) {}
-
- explicit Buffer(std::shared_ptr<unsigned char> buffer, MutableByteRange data)
- : buffer_(buffer), range_(data) {}
-
- Buffer(Buffer&&) = default;
- Buffer& operator=(Buffer&&) & = default;
-
- /**
- * Splits the data into two pieces: [begin, begin + n), [begin + n, end).
- * Their data both points into the same underlying buffer.
- * Modifies the original `Buffer` to point to only [begin + n, end).
- *
- * @param n The offset to split at.
- * @returns A buffer that owns the data [begin, begin + n).
- */
- Buffer splitAt(std::size_t n) {
- auto firstPiece = range_.subpiece(0, n);
- range_.advance(n);
- return Buffer(buffer_, firstPiece);
- }
-
- /// Modifies the buffer to point to the range [begin + n, end).
- void advance(std::size_t n) {
- range_.advance(n);
- }
-
- /// Modifies the buffer to point to the range [begin, end - n).
- void subtract(std::size_t n) {
- range_.subtract(n);
- }
-
- /// Returns a read only `Range` pointing to the `Buffer`s data.
- ByteRange range() const {
- return range_;
- }
- /// Returns a mutable `Range` pointing to the `Buffer`s data.
- MutableByteRange range() {
- return range_;
- }
-
- const unsigned char* data() const {
- return range_.data();
- }
-
- unsigned char* data() {
- return range_.data();
- }
-
- std::size_t size() const {
- return range_.size();
- }
-
- bool empty() const {
- return range_.empty();
- }
-};
-}
diff --git a/contrib/pzstd/utils/FileSystem.h b/contrib/pzstd/utils/FileSystem.h
deleted file mode 100644
index 3cfbe86e507e..000000000000
--- a/contrib/pzstd/utils/FileSystem.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (c) 2016-present, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- */
-#pragma once
-
-#include "utils/Range.h"
-
-#include <sys/stat.h>
-#include <cerrno>
-#include <cstdint>
-#include <system_error>
-
-// A small subset of `std::filesystem`.
-// `std::filesystem` should be a drop in replacement.
-// See http://en.cppreference.com/w/cpp/filesystem for documentation.
-
-namespace pzstd {
-
-// using file_status = ... causes gcc to emit a false positive warning
-#if defined(_MSC_VER)
-typedef struct ::_stat64 file_status;
-#else
-typedef struct ::stat file_status;
-#endif
-
-/// http://en.cppreference.com/w/cpp/filesystem/status
-inline file_status status(StringPiece path, std::error_code& ec) noexcept {
- file_status status;
-#if defined(_MSC_VER)
- const auto error = ::_stat64(path.data(), &status);
-#else
- const auto error = ::stat(path.data(), &status);
-#endif
- if (error) {
- ec.assign(errno, std::generic_category());
- } else {
- ec.clear();
- }
- return status;
-}
-
-/// http://en.cppreference.com/w/cpp/filesystem/is_regular_file
-inline bool is_regular_file(file_status status) noexcept {
-#if defined(S_ISREG)
- return S_ISREG(status.st_mode);
-#elif !defined(S_ISREG) && defined(S_IFMT) && defined(S_IFREG)
- return (status.st_mode & S_IFMT) == S_IFREG;
-#else
- static_assert(false, "No POSIX stat() support.");
-#endif
-}
-
-/// http://en.cppreference.com/w/cpp/filesystem/is_regular_file
-inline bool is_regular_file(StringPiece path, std::error_code& ec) noexcept {
- return is_regular_file(status(path, ec));
-}
-
-/// http://en.cppreference.com/w/cpp/filesystem/is_directory
-inline bool is_directory(file_status status) noexcept {
-#if defined(S_ISDIR)
- return S_ISDIR(status.st_mode);
-#elif !defined(S_ISDIR) && defined(S_IFMT) && defined(S_IFDIR)
- return (status.st_mode & S_IFMT) == S_IFDIR;
-#else
- static_assert(false, "NO POSIX stat() support.");
-#endif
-}
-
-/// http://en.cppreference.com/w/cpp/filesystem/is_directory
-inline bool is_directory(StringPiece path, std::error_code& ec) noexcept {
- return is_directory(status(path, ec));
-}
-
-/// http://en.cppreference.com/w/cpp/filesystem/file_size
-inline std::uintmax_t file_size(
- StringPiece path,
- std::error_code& ec) noexcept {
- auto stat = status(path, ec);
- if (ec) {
- return -1;
- }
- if (!is_regular_file(stat)) {
- ec.assign(ENOTSUP, std::generic_category());
- return -1;
- }
- ec.clear();
- return stat.st_size;
-}
-}
diff --git a/contrib/pzstd/utils/Likely.h b/contrib/pzstd/utils/Likely.h
deleted file mode 100644
index 7cea8da2771f..000000000000
--- a/contrib/pzstd/utils/Likely.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (c) 2016-present, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- */
-
-/**
- * Compiler hints to indicate the fast path of an "if" branch: whether
- * the if condition is likely to be true or false.
- *
- * @author Tudor Bosman (tudorb@fb.com)
- */
-
-#pragma once
-
-#undef LIKELY
-#undef UNLIKELY
-
-#if defined(__GNUC__) && __GNUC__ >= 4
-#define LIKELY(x) (__builtin_expect((x), 1))
-#define UNLIKELY(x) (__builtin_expect((x), 0))
-#else
-#define LIKELY(x) (x)
-#define UNLIKELY(x) (x)
-#endif
diff --git a/contrib/pzstd/utils/Range.h b/contrib/pzstd/utils/Range.h
deleted file mode 100644
index fedb5d786c68..000000000000
--- a/contrib/pzstd/utils/Range.h
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * Copyright (c) 2016-present, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- */
-
-/**
- * A subset of `folly/Range.h`.
- * All code copied verbatim modulo formatting
- */
-#pragma once
-
-#include "utils/Likely.h"
-
-#include <cstddef>
-#include <cstring>
-#include <stdexcept>
-#include <string>
-#include <type_traits>
-
-namespace pzstd {
-
-namespace detail {
-/*
- *Use IsCharPointer<T>::type to enable const char* or char*.
- *Use IsCharPointer<T>::const_type to enable only const char*.
-*/
-template <class T>
-struct IsCharPointer {};
-
-template <>
-struct IsCharPointer<char*> {
- typedef int type;
-};
-
-template <>
-struct IsCharPointer<const char*> {
- typedef int const_type;
- typedef int type;
-};
-
-} // namespace detail
-
-template <typename Iter>
-class Range {
- Iter b_;
- Iter e_;
-
- public:
- using size_type = std::size_t;
- using iterator = Iter;
- using const_iterator = Iter;
- using value_type = typename std::remove_reference<
- typename std::iterator_traits<Iter>::reference>::type;
- using reference = typename std::iterator_traits<Iter>::reference;
-
- constexpr Range() : b_(), e_() {}
- constexpr Range(Iter begin, Iter end) : b_(begin), e_(end) {}
-
- constexpr Range(Iter begin, size_type size) : b_(begin), e_(begin + size) {}
-
- template <class T = Iter, typename detail::IsCharPointer<T>::type = 0>
- /* implicit */ Range(Iter str) : b_(str), e_(str + std::strlen(str)) {}
-
- template <class T = Iter, typename detail::IsCharPointer<T>::const_type = 0>
- /* implicit */ Range(const std::string& str)
- : b_(str.data()), e_(b_ + str.size()) {}
-
- // Allow implicit conversion from Range<From> to Range<To> if From is
- // implicitly convertible to To.
- template <
- class OtherIter,
- typename std::enable_if<
- (!std::is_same<Iter, OtherIter>::value &&
- std::is_convertible<OtherIter, Iter>::value),
- int>::type = 0>
- constexpr /* implicit */ Range(const Range<OtherIter>& other)
- : b_(other.begin()), e_(other.end()) {}
-
- Range(const Range&) = default;
- Range(Range&&) = default;
-
- Range& operator=(const Range&) & = default;
- Range& operator=(Range&&) & = default;
-
- constexpr size_type size() const {
- return e_ - b_;
- }
- bool empty() const {
- return b_ == e_;
- }
- Iter data() const {
- return b_;
- }
- Iter begin() const {
- return b_;
- }
- Iter end() const {
- return e_;
- }
-
- void advance(size_type n) {
- if (UNLIKELY(n > size())) {
- throw std::out_of_range("index out of range");
- }
- b_ += n;
- }
-
- void subtract(size_type n) {
- if (UNLIKELY(n > size())) {
- throw std::out_of_range("index out of range");
- }
- e_ -= n;
- }
-
- Range subpiece(size_type first, size_type length = std::string::npos) const {
- if (UNLIKELY(first > size())) {
- throw std::out_of_range("index out of range");
- }
-
- return Range(b_ + first, std::min(length, size() - first));
- }
-};
-
-using ByteRange = Range<const unsigned char*>;
-using MutableByteRange = Range<unsigned char*>;
-using StringPiece = Range<const char*>;
-}
diff --git a/contrib/pzstd/utils/ResourcePool.h b/contrib/pzstd/utils/ResourcePool.h
deleted file mode 100644
index 8dfcdd765909..000000000000
--- a/contrib/pzstd/utils/ResourcePool.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2016-present, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- */
-#pragma once
-
-#include <cassert>
-#include <functional>
-#include <memory>
-#include <mutex>
-#include <vector>
-
-namespace pzstd {
-
-/**
- * An unbounded pool of resources.
- * A `ResourcePool<T>` requires a factory function that takes allocates `T*` and
- * a free function that frees a `T*`.
- * Calling `ResourcePool::get()` will give you a new `ResourcePool::UniquePtr`
- * to a `T`, and when it goes out of scope the resource will be returned to the
- * pool.
- * The `ResourcePool<T>` *must* survive longer than any resources it hands out.
- * Remember that `ResourcePool<T>` hands out mutable `T`s, so make sure to clean
- * up the resource before or after every use.
- */
-template <typename T>
-class ResourcePool {
- public:
- class Deleter;
- using Factory = std::function<T*()>;
- using Free = std::function<void(T*)>;
- using UniquePtr = std::unique_ptr<T, Deleter>;
-
- private:
- std::mutex mutex_;
- Factory factory_;
- Free free_;
- std::vector<T*> resources_;
- unsigned inUse_;
-
- public:
- /**
- * Creates a `ResourcePool`.
- *
- * @param factory The function to use to create new resources.
- * @param free The function to use to free resources created by `factory`.
- */
- ResourcePool(Factory factory, Free free)
- : factory_(std::move(factory)), free_(std::move(free)), inUse_(0) {}
-
- /**
- * @returns A unique pointer to a resource. The resource is null iff
- * there are no available resources and `factory()` returns null.
- */
- UniquePtr get() {
- std::lock_guard<std::mutex> lock(mutex_);
- if (!resources_.empty()) {
- UniquePtr resource{resources_.back(), Deleter{*this}};
- resources_.pop_back();
- ++inUse_;
- return resource;
- }
- UniquePtr resource{factory_(), Deleter{*this}};
- ++inUse_;
- return resource;
- }
-
- ~ResourcePool() noexcept {
- assert(inUse_ == 0);
- for (const auto resource : resources_) {
- free_(resource);
- }
- }
-
- class Deleter {
- ResourcePool *pool_;
- public:
- explicit Deleter(ResourcePool &pool) : pool_(&pool) {}
-
- void operator() (T *resource) {
- std::lock_guard<std::mutex> lock(pool_->mutex_);
- // Make sure we don't put null resources into the pool
- if (resource) {
- pool_->resources_.push_back(resource);
- }
- assert(pool_->inUse_ > 0);
- --pool_->inUse_;
- }
- };
-};
-
-}
diff --git a/contrib/pzstd/utils/ScopeGuard.h b/contrib/pzstd/utils/ScopeGuard.h
deleted file mode 100644
index 31768f43d22c..000000000000
--- a/contrib/pzstd/utils/ScopeGuard.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2016-present, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- */
-#pragma once
-
-#include <utility>
-
-namespace pzstd {
-
-/**
- * Dismissable scope guard.
- * `Function` must be callable and take no parameters.
- * Unless `dissmiss()` is called, the callable is executed upon destruction of
- * `ScopeGuard`.
- *
- * Example:
- *
- * auto guard = makeScopeGuard([&] { cleanup(); });
- */
-template <typename Function>
-class ScopeGuard {
- Function function;
- bool dismissed;
-
- public:
- explicit ScopeGuard(Function&& function)
- : function(std::move(function)), dismissed(false) {}
-
- void dismiss() {
- dismissed = true;
- }
-
- ~ScopeGuard() noexcept {
- if (!dismissed) {
- function();
- }
- }
-};
-
-/// Creates a scope guard from `function`.
-template <typename Function>
-ScopeGuard<Function> makeScopeGuard(Function&& function) {
- return ScopeGuard<Function>(std::forward<Function>(function));
-}
-}
diff --git a/contrib/pzstd/utils/ThreadPool.h b/contrib/pzstd/utils/ThreadPool.h
deleted file mode 100644
index 8ece8e0da4eb..000000000000
--- a/contrib/pzstd/utils/ThreadPool.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2016-present, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- */
-#pragma once
-
-#include "utils/WorkQueue.h"
-
-#include <cstddef>
-#include <functional>
-#include <thread>
-#include <vector>
-
-namespace pzstd {
-/// A simple thread pool that pulls tasks off its queue in FIFO order.
-class ThreadPool {
- std::vector<std::thread> threads_;
-
- WorkQueue<std::function<void()>> tasks_;
-
- public:
- /// Constructs a thread pool with `numThreads` threads.
- explicit ThreadPool(std::size_t numThreads) {
- threads_.reserve(numThreads);
- for (std::size_t i = 0; i < numThreads; ++i) {
- threads_.emplace_back([this] {
- std::function<void()> task;
- while (tasks_.pop(task)) {
- task();
- }
- });
- }
- }
-
- /// Finishes all tasks currently in the queue.
- ~ThreadPool() {
- tasks_.finish();
- for (auto& thread : threads_) {
- thread.join();
- }
- }
-
- /**
- * Adds `task` to the queue of tasks to execute. Since `task` is a
- * `std::function<>`, it cannot be a move only type. So any lambda passed must
- * not capture move only types (like `std::unique_ptr`).
- *
- * @param task The task to execute.
- */
- void add(std::function<void()> task) {
- tasks_.push(std::move(task));
- }
-};
-}
diff --git a/contrib/pzstd/utils/WorkQueue.h b/contrib/pzstd/utils/WorkQueue.h
deleted file mode 100644
index 1d14d922c648..000000000000
--- a/contrib/pzstd/utils/WorkQueue.h
+++ /dev/null
@@ -1,181 +0,0 @@
-/*
- * Copyright (c) 2016-present, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- */
-#pragma once
-
-#include "utils/Buffer.h"
-
-#include <atomic>
-#include <cassert>
-#include <cstddef>
-#include <condition_variable>
-#include <cstddef>
-#include <functional>
-#include <mutex>
-#include <queue>
-
-namespace pzstd {
-
-/// Unbounded thread-safe work queue.
-template <typename T>
-class WorkQueue {
- // Protects all member variable access
- std::mutex mutex_;
- std::condition_variable readerCv_;
- std::condition_variable writerCv_;
- std::condition_variable finishCv_;
-
- std::queue<T> queue_;
- bool done_;
- std::size_t maxSize_;
-
- // Must have lock to call this function
- bool full() const {
- if (maxSize_ == 0) {
- return false;
- }
- return queue_.size() >= maxSize_;
- }
-
- public:
- /**
- * Constructs an empty work queue with an optional max size.
- * If `maxSize == 0` the queue size is unbounded.
- *
- * @param maxSize The maximum allowed size of the work queue.
- */
- WorkQueue(std::size_t maxSize = 0) : done_(false), maxSize_(maxSize) {}
-
- /**
- * Push an item onto the work queue. Notify a single thread that work is
- * available. If `finish()` has been called, do nothing and return false.
- * If `push()` returns false, then `item` has not been moved from.
- *
- * @param item Item to push onto the queue.
- * @returns True upon success, false if `finish()` has been called. An
- * item was pushed iff `push()` returns true.
- */
- bool push(T&& item) {
- {
- std::unique_lock<std::mutex> lock(mutex_);
- while (full() && !done_) {
- writerCv_.wait(lock);
- }
- if (done_) {
- return false;
- }
- queue_.push(std::move(item));
- }
- readerCv_.notify_one();
- return true;
- }
-
- /**
- * Attempts to pop an item off the work queue. It will block until data is
- * available or `finish()` has been called.
- *
- * @param[out] item If `pop` returns `true`, it contains the popped item.
- * If `pop` returns `false`, it is unmodified.
- * @returns True upon success. False if the queue is empty and
- * `finish()` has been called.
- */
- bool pop(T& item) {
- {
- std::unique_lock<std::mutex> lock(mutex_);
- while (queue_.empty() && !done_) {
- readerCv_.wait(lock);
- }
- if (queue_.empty()) {
- assert(done_);
- return false;
- }
- item = std::move(queue_.front());
- queue_.pop();
- }
- writerCv_.notify_one();
- return true;
- }
-
- /**
- * Sets the maximum queue size. If `maxSize == 0` then it is unbounded.
- *
- * @param maxSize The new maximum queue size.
- */
- void setMaxSize(std::size_t maxSize) {
- {
- std::lock_guard<std::mutex> lock(mutex_);
- maxSize_ = maxSize;
- }
- writerCv_.notify_all();
- }
-
- /**
- * Promise that `push()` won't be called again, so once the queue is empty
- * there will never any more work.
- */
- void finish() {
- {
- std::lock_guard<std::mutex> lock(mutex_);
- assert(!done_);
- done_ = true;
- }
- readerCv_.notify_all();
- writerCv_.notify_all();
- finishCv_.notify_all();
- }
-
- /// Blocks until `finish()` has been called (but the queue may not be empty).
- void waitUntilFinished() {
- std::unique_lock<std::mutex> lock(mutex_);
- while (!done_) {
- finishCv_.wait(lock);
- }
- }
-};
-
-/// Work queue for `Buffer`s that knows the total number of bytes in the queue.
-class BufferWorkQueue {
- WorkQueue<Buffer> queue_;
- std::atomic<std::size_t> size_;
-
- public:
- BufferWorkQueue(std::size_t maxSize = 0) : queue_(maxSize), size_(0) {}
-
- void push(Buffer buffer) {
- size_.fetch_add(buffer.size());
- queue_.push(std::move(buffer));
- }
-
- bool pop(Buffer& buffer) {
- bool result = queue_.pop(buffer);
- if (result) {
- size_.fetch_sub(buffer.size());
- }
- return result;
- }
-
- void setMaxSize(std::size_t maxSize) {
- queue_.setMaxSize(maxSize);
- }
-
- void finish() {
- queue_.finish();
- }
-
- /**
- * Blocks until `finish()` has been called.
- *
- * @returns The total number of bytes of all the `Buffer`s currently in the
- * queue.
- */
- std::size_t size() {
- queue_.waitUntilFinished();
- return size_.load();
- }
-};
-}
diff --git a/contrib/pzstd/utils/test/BUCK b/contrib/pzstd/utils/test/BUCK
deleted file mode 100644
index a5113cab6b0e..000000000000
--- a/contrib/pzstd/utils/test/BUCK
+++ /dev/null
@@ -1,35 +0,0 @@
-cxx_test(
- name='buffer_test',
- srcs=['BufferTest.cpp'],
- deps=['//contrib/pzstd/utils:buffer'],
-)
-
-cxx_test(
- name='range_test',
- srcs=['RangeTest.cpp'],
- deps=['//contrib/pzstd/utils:range'],
-)
-
-cxx_test(
- name='resource_pool_test',
- srcs=['ResourcePoolTest.cpp'],
- deps=['//contrib/pzstd/utils:resource_pool'],
-)
-
-cxx_test(
- name='scope_guard_test',
- srcs=['ScopeGuardTest.cpp'],
- deps=['//contrib/pzstd/utils:scope_guard'],
-)
-
-cxx_test(
- name='thread_pool_test',
- srcs=['ThreadPoolTest.cpp'],
- deps=['//contrib/pzstd/utils:thread_pool'],
-)
-
-cxx_test(
- name='work_queue_test',
- srcs=['RangeTest.cpp'],
- deps=['//contrib/pzstd/utils:work_queue'],
-)
diff --git a/contrib/pzstd/utils/test/BufferTest.cpp b/contrib/pzstd/utils/test/BufferTest.cpp
deleted file mode 100644
index fbba74e82628..000000000000
--- a/contrib/pzstd/utils/test/BufferTest.cpp
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2016-present, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- */
-#include "utils/Buffer.h"
-#include "utils/Range.h"
-
-#include <gtest/gtest.h>
-#include <memory>
-
-using namespace pzstd;
-
-namespace {
-void deleter(const unsigned char* buf) {
- delete[] buf;
-}
-}
-
-TEST(Buffer, Constructors) {
- Buffer empty;
- EXPECT_TRUE(empty.empty());
- EXPECT_EQ(0, empty.size());
-
- Buffer sized(5);
- EXPECT_FALSE(sized.empty());
- EXPECT_EQ(5, sized.size());
-
- Buffer moved(std::move(sized));
- EXPECT_FALSE(sized.empty());
- EXPECT_EQ(5, sized.size());
-
- Buffer assigned;
- assigned = std::move(moved);
- EXPECT_FALSE(sized.empty());
- EXPECT_EQ(5, sized.size());
-}
-
-TEST(Buffer, BufferManagement) {
- std::shared_ptr<unsigned char> buf(new unsigned char[10], deleter);
- {
- Buffer acquired(buf, MutableByteRange(buf.get(), buf.get() + 10));
- EXPECT_EQ(2, buf.use_count());
- Buffer moved(std::move(acquired));
- EXPECT_EQ(2, buf.use_count());
- Buffer assigned;
- assigned = std::move(moved);
- EXPECT_EQ(2, buf.use_count());
-
- Buffer split = assigned.splitAt(5);
- EXPECT_EQ(3, buf.use_count());
-
- split.advance(1);
- assigned.subtract(1);
- EXPECT_EQ(3, buf.use_count());
- }
- EXPECT_EQ(1, buf.use_count());
-}
-
-TEST(Buffer, Modifiers) {
- Buffer buf(10);
- {
- unsigned char i = 0;
- for (auto& byte : buf.range()) {
- byte = i++;
- }
- }
-
- auto prefix = buf.splitAt(2);
-
- ASSERT_EQ(2, prefix.size());
- EXPECT_EQ(0, *prefix.data());
-
- ASSERT_EQ(8, buf.size());
- EXPECT_EQ(2, *buf.data());
-
- buf.advance(2);
- EXPECT_EQ(4, *buf.data());
-
- EXPECT_EQ(9, *(buf.range().end() - 1));
-
- buf.subtract(2);
- EXPECT_EQ(7, *(buf.range().end() - 1));
-
- EXPECT_EQ(4, buf.size());
-}
diff --git a/contrib/pzstd/utils/test/RangeTest.cpp b/contrib/pzstd/utils/test/RangeTest.cpp
deleted file mode 100644
index 755b50fa6e80..000000000000
--- a/contrib/pzstd/utils/test/RangeTest.cpp
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (c) 2016-present, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- */
-#include "utils/Range.h"
-
-#include <gtest/gtest.h>
-#include <string>
-
-using namespace pzstd;
-
-// Range is directly copied from folly.
-// Just some sanity tests to make sure everything seems to work.
-
-TEST(Range, Constructors) {
- StringPiece empty;
- EXPECT_TRUE(empty.empty());
- EXPECT_EQ(0, empty.size());
-
- std::string str = "hello";
- {
- Range<std::string::const_iterator> piece(str.begin(), str.end());
- EXPECT_EQ(5, piece.size());
- EXPECT_EQ('h', *piece.data());
- EXPECT_EQ('o', *(piece.end() - 1));
- }
-
- {
- StringPiece piece(str.data(), str.size());
- EXPECT_EQ(5, piece.size());
- EXPECT_EQ('h', *piece.data());
- EXPECT_EQ('o', *(piece.end() - 1));
- }
-
- {
- StringPiece piece(str);
- EXPECT_EQ(5, piece.size());
- EXPECT_EQ('h', *piece.data());
- EXPECT_EQ('o', *(piece.end() - 1));
- }
-
- {
- StringPiece piece(str.c_str());
- EXPECT_EQ(5, piece.size());
- EXPECT_EQ('h', *piece.data());
- EXPECT_EQ('o', *(piece.end() - 1));
- }
-}
-
-TEST(Range, Modifiers) {
- StringPiece range("hello world");
- ASSERT_EQ(11, range.size());
-
- {
- auto hello = range.subpiece(0, 5);
- EXPECT_EQ(5, hello.size());
- EXPECT_EQ('h', *hello.data());
- EXPECT_EQ('o', *(hello.end() - 1));
- }
- {
- auto hello = range;
- hello.subtract(6);
- EXPECT_EQ(5, hello.size());
- EXPECT_EQ('h', *hello.data());
- EXPECT_EQ('o', *(hello.end() - 1));
- }
- {
- auto world = range;
- world.advance(6);
- EXPECT_EQ(5, world.size());
- EXPECT_EQ('w', *world.data());
- EXPECT_EQ('d', *(world.end() - 1));
- }
-
- std::string expected = "hello world";
- EXPECT_EQ(expected, std::string(range.begin(), range.end()));
- EXPECT_EQ(expected, std::string(range.data(), range.size()));
-}
diff --git a/contrib/pzstd/utils/test/ResourcePoolTest.cpp b/contrib/pzstd/utils/test/ResourcePoolTest.cpp
deleted file mode 100644
index 6fe145180be9..000000000000
--- a/contrib/pzstd/utils/test/ResourcePoolTest.cpp
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2016-present, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- */
-#include "utils/ResourcePool.h"
-
-#include <gtest/gtest.h>
-#include <atomic>
-#include <thread>
-
-using namespace pzstd;
-
-TEST(ResourcePool, FullTest) {
- unsigned numCreated = 0;
- unsigned numDeleted = 0;
- {
- ResourcePool<int> pool(
- [&numCreated] { ++numCreated; return new int{5}; },
- [&numDeleted](int *x) { ++numDeleted; delete x; });
-
- {
- auto i = pool.get();
- EXPECT_EQ(5, *i);
- *i = 6;
- }
- {
- auto i = pool.get();
- EXPECT_EQ(6, *i);
- auto j = pool.get();
- EXPECT_EQ(5, *j);
- *j = 7;
- }
- {
- auto i = pool.get();
- EXPECT_EQ(6, *i);
- auto j = pool.get();
- EXPECT_EQ(7, *j);
- }
- }
- EXPECT_EQ(2, numCreated);
- EXPECT_EQ(numCreated, numDeleted);
-}
-
-TEST(ResourcePool, ThreadSafe) {
- std::atomic<unsigned> numCreated{0};
- std::atomic<unsigned> numDeleted{0};
- {
- ResourcePool<int> pool(
- [&numCreated] { ++numCreated; return new int{0}; },
- [&numDeleted](int *x) { ++numDeleted; delete x; });
- auto push = [&pool] {
- for (int i = 0; i < 100; ++i) {
- auto x = pool.get();
- ++*x;
- }
- };
- std::thread t1{push};
- std::thread t2{push};
- t1.join();
- t2.join();
-
- auto x = pool.get();
- auto y = pool.get();
- EXPECT_EQ(200, *x + *y);
- }
- EXPECT_GE(2, numCreated);
- EXPECT_EQ(numCreated, numDeleted);
-}
diff --git a/contrib/pzstd/utils/test/ScopeGuardTest.cpp b/contrib/pzstd/utils/test/ScopeGuardTest.cpp
deleted file mode 100644
index 7bc624da79b2..000000000000
--- a/contrib/pzstd/utils/test/ScopeGuardTest.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (c) 2016-present, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- */
-#include "utils/ScopeGuard.h"
-
-#include <gtest/gtest.h>
-
-using namespace pzstd;
-
-TEST(ScopeGuard, Dismiss) {
- {
- auto guard = makeScopeGuard([&] { EXPECT_TRUE(false); });
- guard.dismiss();
- }
-}
-
-TEST(ScopeGuard, Executes) {
- bool executed = false;
- {
- auto guard = makeScopeGuard([&] { executed = true; });
- }
- EXPECT_TRUE(executed);
-}
diff --git a/contrib/pzstd/utils/test/ThreadPoolTest.cpp b/contrib/pzstd/utils/test/ThreadPoolTest.cpp
deleted file mode 100644
index 703fd4c9ca17..000000000000
--- a/contrib/pzstd/utils/test/ThreadPoolTest.cpp
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (c) 2016-present, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- */
-#include "utils/ThreadPool.h"
-
-#include <gtest/gtest.h>
-#include <atomic>
-#include <iostream>
-#include <thread>
-#include <vector>
-
-using namespace pzstd;
-
-TEST(ThreadPool, Ordering) {
- std::vector<int> results;
-
- {
- ThreadPool executor(1);
- for (int i = 0; i < 10; ++i) {
- executor.add([ &results, i ] { results.push_back(i); });
- }
- }
-
- for (int i = 0; i < 10; ++i) {
- EXPECT_EQ(i, results[i]);
- }
-}
-
-TEST(ThreadPool, AllJobsFinished) {
- std::atomic<unsigned> numFinished{0};
- std::atomic<bool> start{false};
- {
- std::cerr << "Creating executor" << std::endl;
- ThreadPool executor(5);
- for (int i = 0; i < 10; ++i) {
- executor.add([ &numFinished, &start ] {
- while (!start.load()) {
- std::this_thread::yield();
- }
- ++numFinished;
- });
- }
- std::cerr << "Starting" << std::endl;
- start.store(true);
- std::cerr << "Finishing" << std::endl;
- }
- EXPECT_EQ(10, numFinished.load());
-}
-
-TEST(ThreadPool, AddJobWhileJoining) {
- std::atomic<bool> done{false};
- {
- ThreadPool executor(1);
- executor.add([&executor, &done] {
- while (!done.load()) {
- std::this_thread::yield();
- }
- // Sleep for a second to be sure that we are joining
- std::this_thread::sleep_for(std::chrono::seconds(1));
- executor.add([] {
- EXPECT_TRUE(false);
- });
- });
- done.store(true);
- }
-}
diff --git a/contrib/pzstd/utils/test/WorkQueueTest.cpp b/contrib/pzstd/utils/test/WorkQueueTest.cpp
deleted file mode 100644
index 14cf77304f21..000000000000
--- a/contrib/pzstd/utils/test/WorkQueueTest.cpp
+++ /dev/null
@@ -1,282 +0,0 @@
-/*
- * Copyright (c) 2016-present, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- */
-#include "utils/Buffer.h"
-#include "utils/WorkQueue.h"
-
-#include <gtest/gtest.h>
-#include <iostream>
-#include <memory>
-#include <mutex>
-#include <thread>
-#include <vector>
-
-using namespace pzstd;
-
-namespace {
-struct Popper {
- WorkQueue<int>* queue;
- int* results;
- std::mutex* mutex;
-
- void operator()() {
- int result;
- while (queue->pop(result)) {
- std::lock_guard<std::mutex> lock(*mutex);
- results[result] = result;
- }
- }
-};
-}
-
-TEST(WorkQueue, SingleThreaded) {
- WorkQueue<int> queue;
- int result;
-
- queue.push(5);
- EXPECT_TRUE(queue.pop(result));
- EXPECT_EQ(5, result);
-
- queue.push(1);
- queue.push(2);
- EXPECT_TRUE(queue.pop(result));
- EXPECT_EQ(1, result);
- EXPECT_TRUE(queue.pop(result));
- EXPECT_EQ(2, result);
-
- queue.push(1);
- queue.push(2);
- queue.finish();
- EXPECT_TRUE(queue.pop(result));
- EXPECT_EQ(1, result);
- EXPECT_TRUE(queue.pop(result));
- EXPECT_EQ(2, result);
- EXPECT_FALSE(queue.pop(result));
-
- queue.waitUntilFinished();
-}
-
-TEST(WorkQueue, SPSC) {
- WorkQueue<int> queue;
- const int max = 100;
-
- for (int i = 0; i < 10; ++i) {
- queue.push(int{i});
- }
-
- std::thread thread([ &queue, max ] {
- int result;
- for (int i = 0;; ++i) {
- if (!queue.pop(result)) {
- EXPECT_EQ(i, max);
- break;
- }
- EXPECT_EQ(i, result);
- }
- });
-
- std::this_thread::yield();
- for (int i = 10; i < max; ++i) {
- queue.push(int{i});
- }
- queue.finish();
-
- thread.join();
-}
-
-TEST(WorkQueue, SPMC) {
- WorkQueue<int> queue;
- std::vector<int> results(50, -1);
- std::mutex mutex;
- std::vector<std::thread> threads;
- for (int i = 0; i < 5; ++i) {
- threads.emplace_back(Popper{&queue, results.data(), &mutex});
- }
-
- for (int i = 0; i < 50; ++i) {
- queue.push(int{i});
- }
- queue.finish();
-
- for (auto& thread : threads) {
- thread.join();
- }
-
- for (int i = 0; i < 50; ++i) {
- EXPECT_EQ(i, results[i]);
- }
-}
-
-TEST(WorkQueue, MPMC) {
- WorkQueue<int> queue;
- std::vector<int> results(100, -1);
- std::mutex mutex;
- std::vector<std::thread> popperThreads;
- for (int i = 0; i < 4; ++i) {
- popperThreads.emplace_back(Popper{&queue, results.data(), &mutex});
- }
-
- std::vector<std::thread> pusherThreads;
- for (int i = 0; i < 2; ++i) {
- auto min = i * 50;
- auto max = (i + 1) * 50;
- pusherThreads.emplace_back(
- [ &queue, min, max ] {
- for (int i = min; i < max; ++i) {
- queue.push(int{i});
- }
- });
- }
-
- for (auto& thread : pusherThreads) {
- thread.join();
- }
- queue.finish();
-
- for (auto& thread : popperThreads) {
- thread.join();
- }
-
- for (int i = 0; i < 100; ++i) {
- EXPECT_EQ(i, results[i]);
- }
-}
-
-TEST(WorkQueue, BoundedSizeWorks) {
- WorkQueue<int> queue(1);
- int result;
- queue.push(5);
- queue.pop(result);
- queue.push(5);
- queue.pop(result);
- queue.push(5);
- queue.finish();
- queue.pop(result);
- EXPECT_EQ(5, result);
-}
-
-TEST(WorkQueue, BoundedSizePushAfterFinish) {
- WorkQueue<int> queue(1);
- int result;
- queue.push(5);
- std::thread pusher([&queue] {
- queue.push(6);
- });
- // Dirtily try and make sure that pusher has run.
- std::this_thread::sleep_for(std::chrono::seconds(1));
- queue.finish();
- EXPECT_TRUE(queue.pop(result));
- EXPECT_EQ(5, result);
- EXPECT_FALSE(queue.pop(result));
-
- pusher.join();
-}
-
-TEST(WorkQueue, SetMaxSize) {
- WorkQueue<int> queue(2);
- int result;
- queue.push(5);
- queue.push(6);
- queue.setMaxSize(1);
- std::thread pusher([&queue] {
- queue.push(7);
- });
- // Dirtily try and make sure that pusher has run.
- std::this_thread::sleep_for(std::chrono::seconds(1));
- queue.finish();
- EXPECT_TRUE(queue.pop(result));
- EXPECT_EQ(5, result);
- EXPECT_TRUE(queue.pop(result));
- EXPECT_EQ(6, result);
- EXPECT_FALSE(queue.pop(result));
-
- pusher.join();
-}
-
-TEST(WorkQueue, BoundedSizeMPMC) {
- WorkQueue<int> queue(10);
- std::vector<int> results(200, -1);
- std::mutex mutex;
- std::cerr << "Creating popperThreads" << std::endl;
- std::vector<std::thread> popperThreads;
- for (int i = 0; i < 4; ++i) {
- popperThreads.emplace_back(Popper{&queue, results.data(), &mutex});
- }
-
- std::cerr << "Creating pusherThreads" << std::endl;
- std::vector<std::thread> pusherThreads;
- for (int i = 0; i < 2; ++i) {
- auto min = i * 100;
- auto max = (i + 1) * 100;
- pusherThreads.emplace_back(
- [ &queue, min, max ] {
- for (int i = min; i < max; ++i) {
- queue.push(int{i});
- }
- });
- }
-
- std::cerr << "Joining pusherThreads" << std::endl;
- for (auto& thread : pusherThreads) {
- thread.join();
- }
- std::cerr << "Finishing queue" << std::endl;
- queue.finish();
-
- std::cerr << "Joining popperThreads" << std::endl;
- for (auto& thread : popperThreads) {
- thread.join();
- }
-
- std::cerr << "Inspecting results" << std::endl;
- for (int i = 0; i < 200; ++i) {
- EXPECT_EQ(i, results[i]);
- }
-}
-
-TEST(WorkQueue, FailedPush) {
- WorkQueue<std::unique_ptr<int>> queue;
- std::unique_ptr<int> x(new int{5});
- EXPECT_TRUE(queue.push(std::move(x)));
- EXPECT_EQ(nullptr, x);
- queue.finish();
- x.reset(new int{6});
- EXPECT_FALSE(queue.push(std::move(x)));
- EXPECT_NE(nullptr, x);
- EXPECT_EQ(6, *x);
-}
-
-TEST(BufferWorkQueue, SizeCalculatedCorrectly) {
- {
- BufferWorkQueue queue;
- queue.finish();
- EXPECT_EQ(0, queue.size());
- }
- {
- BufferWorkQueue queue;
- queue.push(Buffer(10));
- queue.finish();
- EXPECT_EQ(10, queue.size());
- }
- {
- BufferWorkQueue queue;
- queue.push(Buffer(10));
- queue.push(Buffer(5));
- queue.finish();
- EXPECT_EQ(15, queue.size());
- }
- {
- BufferWorkQueue queue;
- queue.push(Buffer(10));
- queue.push(Buffer(5));
- queue.finish();
- Buffer buffer;
- queue.pop(buffer);
- EXPECT_EQ(5, queue.size());
- }
-}
diff --git a/contrib/seekable_format/examples/Makefile b/contrib/seekable_format/examples/Makefile
deleted file mode 100644
index 543780f75d34..000000000000
--- a/contrib/seekable_format/examples/Makefile
+++ /dev/null
@@ -1,53 +0,0 @@
-# ################################################################
-# Copyright (c) 2017-present, Facebook, Inc.
-# All rights reserved.
-#
-# This source code is licensed under both the BSD-style license (found in the
-# LICENSE file in the root directory of this source tree) and the GPLv2 (found
-# in the COPYING file in the root directory of this source tree).
-# ################################################################
-
-# This Makefile presumes libzstd is built, using `make` in / or /lib/
-
-ZSTDLIB_PATH = ../../../lib
-ZSTDLIB_NAME = libzstd.a
-ZSTDLIB = $(ZSTDLIB_PATH)/$(ZSTDLIB_NAME)
-
-CPPFLAGS += -I../ -I../../../lib -I../../../lib/common
-
-CFLAGS ?= -O3
-CFLAGS += -g
-
-SEEKABLE_OBJS = ../zstdseek_compress.c ../zstdseek_decompress.c $(ZSTDLIB)
-
-.PHONY: default all clean test
-
-default: all
-
-all: seekable_compression seekable_decompression seekable_decompression_mem \
- parallel_processing
-
-$(ZSTDLIB):
- make -C $(ZSTDLIB_PATH) $(ZSTDLIB_NAME)
-
-seekable_compression : seekable_compression.c $(SEEKABLE_OBJS)
- $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
-
-seekable_decompression : seekable_decompression.c $(SEEKABLE_OBJS)
- $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
-
-seekable_decompression_mem : seekable_decompression_mem.c $(SEEKABLE_OBJS)
- $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
-
-parallel_processing : parallel_processing.c $(SEEKABLE_OBJS)
- $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@ -pthread
-
-parallel_compression : parallel_compression.c $(SEEKABLE_OBJS)
- $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@ -pthread
-
-clean:
- @rm -f core *.o tmp* result* *.zst \
- seekable_compression seekable_decompression \
- seekable_decompression_mem \
- parallel_processing parallel_compression
- @echo Cleaning completed
diff --git a/contrib/seekable_format/examples/parallel_compression.c b/contrib/seekable_format/examples/parallel_compression.c
deleted file mode 100644
index 69644d2b3c80..000000000000
--- a/contrib/seekable_format/examples/parallel_compression.c
+++ /dev/null
@@ -1,215 +0,0 @@
-/*
- * Copyright (c) 2017-present, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- */
-
-#include <stdlib.h> // malloc, free, exit, atoi
-#include <stdio.h> // fprintf, perror, feof, fopen, etc.
-#include <string.h> // strlen, memset, strcat
-#define ZSTD_STATIC_LINKING_ONLY
-#include <zstd.h> // presumes zstd library is installed
-#include <zstd_errors.h>
-#if defined(WIN32) || defined(_WIN32)
-# include <windows.h>
-# define SLEEP(x) Sleep(x)
-#else
-# include <unistd.h>
-# define SLEEP(x) usleep(x * 1000)
-#endif
-
-#define XXH_NAMESPACE ZSTD_
-#include "xxhash.h"
-
-#include "pool.h" // use zstd thread pool for demo
-
-#include "zstd_seekable.h"
-
-static void* malloc_orDie(size_t size)
-{
- void* const buff = malloc(size);
- if (buff) return buff;
- /* error */
- perror("malloc:");
- exit(1);
-}
-
-static FILE* fopen_orDie(const char *filename, const char *instruction)
-{
- FILE* const inFile = fopen(filename, instruction);
- if (inFile) return inFile;
- /* error */
- perror(filename);
- exit(3);
-}
-
-static size_t fread_orDie(void* buffer, size_t sizeToRead, FILE* file)
-{
- size_t const readSize = fread(buffer, 1, sizeToRead, file);
- if (readSize == sizeToRead) return readSize; /* good */
- if (feof(file)) return readSize; /* good, reached end of file */
- /* error */
- perror("fread");
- exit(4);
-}
-
-static size_t fwrite_orDie(const void* buffer, size_t sizeToWrite, FILE* file)
-{
- size_t const writtenSize = fwrite(buffer, 1, sizeToWrite, file);
- if (writtenSize == sizeToWrite) return sizeToWrite; /* good */
- /* error */
- perror("fwrite");
- exit(5);
-}
-
-static size_t fclose_orDie(FILE* file)
-{
- if (!fclose(file)) return 0;
- /* error */
- perror("fclose");
- exit(6);
-}
-
-static void fseek_orDie(FILE* file, long int offset, int origin)
-{
- if (!fseek(file, offset, origin)) {
- if (!fflush(file)) return;
- }
- /* error */
- perror("fseek");
- exit(7);
-}
-
-static long int ftell_orDie(FILE* file)
-{
- long int off = ftell(file);
- if (off != -1) return off;
- /* error */
- perror("ftell");
- exit(8);
-}
-
-struct job {
- const void* src;
- size_t srcSize;
- void* dst;
- size_t dstSize;
-
- unsigned checksum;
-
- int compressionLevel;
- int done;
-};
-
-static void compressFrame(void* opaque)
-{
- struct job* job = opaque;
-
- job->checksum = XXH64(job->src, job->srcSize, 0);
-
- size_t ret = ZSTD_compress(job->dst, job->dstSize, job->src, job->srcSize, job->compressionLevel);
- if (ZSTD_isError(ret)) {
- fprintf(stderr, "ZSTD_compress() error : %s \n", ZSTD_getErrorName(ret));
- exit(20);
- }
-
- job->dstSize = ret;
- job->done = 1;
-}
-
-static void compressFile_orDie(const char* fname, const char* outName, int cLevel, unsigned frameSize, int nbThreads)
-{
- POOL_ctx* pool = POOL_create(nbThreads, nbThreads);
- if (pool == NULL) { fprintf(stderr, "POOL_create() error \n"); exit(9); }
-
- FILE* const fin = fopen_orDie(fname, "rb");
- FILE* const fout = fopen_orDie(outName, "wb");
-
- if (ZSTD_compressBound(frameSize) > 0xFFFFFFFFU) { fprintf(stderr, "Frame size too large \n"); exit(10); }
- unsigned dstSize = ZSTD_compressBound(frameSize);
-
-
- fseek_orDie(fin, 0, SEEK_END);
- long int length = ftell_orDie(fin);
- fseek_orDie(fin, 0, SEEK_SET);
-
- size_t numFrames = (length + frameSize - 1) / frameSize;
-
- struct job* jobs = malloc_orDie(sizeof(struct job) * numFrames);
-
- size_t i;
- for(i = 0; i < numFrames; i++) {
- void* in = malloc_orDie(frameSize);
- void* out = malloc_orDie(dstSize);
-
- size_t inSize = fread_orDie(in, frameSize, fin);
-
- jobs[i].src = in;
- jobs[i].srcSize = inSize;
- jobs[i].dst = out;
- jobs[i].dstSize = dstSize;
- jobs[i].compressionLevel = cLevel;
- jobs[i].done = 0;
- POOL_add(pool, compressFrame, &jobs[i]);
- }
-
- ZSTD_frameLog* fl = ZSTD_seekable_createFrameLog(1);
- if (fl == NULL) { fprintf(stderr, "ZSTD_seekable_createFrameLog() failed \n"); exit(11); }
- for (i = 0; i < numFrames; i++) {
- while (!jobs[i].done) SLEEP(5); /* wake up every 5 milliseconds to check */
- fwrite_orDie(jobs[i].dst, jobs[i].dstSize, fout);
- free((void*)jobs[i].src);
- free(jobs[i].dst);
-
- size_t ret = ZSTD_seekable_logFrame(fl, jobs[i].dstSize, jobs[i].srcSize, jobs[i].checksum);
- if (ZSTD_isError(ret)) { fprintf(stderr, "ZSTD_seekable_logFrame() error : %s \n", ZSTD_getErrorName(ret)); }
- }
-
- { unsigned char seekTableBuff[1024];
- ZSTD_outBuffer out = {seekTableBuff, 1024, 0};
- while (ZSTD_seekable_writeSeekTable(fl, &out) != 0) {
- fwrite_orDie(seekTableBuff, out.pos, fout);
- out.pos = 0;
- }
- fwrite_orDie(seekTableBuff, out.pos, fout);
- }
-
- ZSTD_seekable_freeFrameLog(fl);
- free(jobs);
- fclose_orDie(fout);
- fclose_orDie(fin);
-}
-
-static const char* createOutFilename_orDie(const char* filename)
-{
- size_t const inL = strlen(filename);
- size_t const outL = inL + 5;
- void* outSpace = malloc_orDie(outL);
- memset(outSpace, 0, outL);
- strcat(outSpace, filename);
- strcat(outSpace, ".zst");
- return (const char*)outSpace;
-}
-
-int main(int argc, const char** argv) {
- const char* const exeName = argv[0];
- if (argc!=4) {
- printf("wrong arguments\n");
- printf("usage:\n");
- printf("%s FILE FRAME_SIZE NB_THREADS\n", exeName);
- return 1;
- }
-
- { const char* const inFileName = argv[1];
- unsigned const frameSize = (unsigned)atoi(argv[2]);
- int const nbThreads = atoi(argv[3]);
-
- const char* const outFileName = createOutFilename_orDie(inFileName);
- compressFile_orDie(inFileName, outFileName, 5, frameSize, nbThreads);
- }
-
- return 0;
-}
diff --git a/contrib/seekable_format/examples/parallel_processing.c b/contrib/seekable_format/examples/parallel_processing.c
deleted file mode 100644
index 36226b49fd3c..000000000000
--- a/contrib/seekable_format/examples/parallel_processing.c
+++ /dev/null
@@ -1,194 +0,0 @@
-/*
- * Copyright (c) 2017-present, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- */
-
-/*
- * A simple demo that sums up all the bytes in the file in parallel using
- * seekable decompression and the zstd thread pool
- */
-
-#include <stdlib.h> // malloc, exit
-#include <stdio.h> // fprintf, perror, feof
-#include <string.h> // strerror
-#include <errno.h> // errno
-#define ZSTD_STATIC_LINKING_ONLY
-#include <zstd.h> // presumes zstd library is installed
-#include <zstd_errors.h>
-#if defined(WIN32) || defined(_WIN32)
-# include <windows.h>
-# define SLEEP(x) Sleep(x)
-#else
-# include <unistd.h>
-# define SLEEP(x) usleep(x * 1000)
-#endif
-
-#include "pool.h" // use zstd thread pool for demo
-
-#include "zstd_seekable.h"
-
-#define MIN(a, b) ((a) < (b) ? (a) : (b))
-
-static void* malloc_orDie(size_t size)
-{
- void* const buff = malloc(size);
- if (buff) return buff;
- /* error */
- perror("malloc");
- exit(1);
-}
-
-static void* realloc_orDie(void* ptr, size_t size)
-{
- ptr = realloc(ptr, size);
- if (ptr) return ptr;
- /* error */
- perror("realloc");
- exit(1);
-}
-
-static FILE* fopen_orDie(const char *filename, const char *instruction)
-{
- FILE* const inFile = fopen(filename, instruction);
- if (inFile) return inFile;
- /* error */
- perror(filename);
- exit(3);
-}
-
-static size_t fread_orDie(void* buffer, size_t sizeToRead, FILE* file)
-{
- size_t const readSize = fread(buffer, 1, sizeToRead, file);
- if (readSize == sizeToRead) return readSize; /* good */
- if (feof(file)) return readSize; /* good, reached end of file */
- /* error */
- perror("fread");
- exit(4);
-}
-
-static size_t fwrite_orDie(const void* buffer, size_t sizeToWrite, FILE* file)
-{
- size_t const writtenSize = fwrite(buffer, 1, sizeToWrite, file);
- if (writtenSize == sizeToWrite) return sizeToWrite; /* good */
- /* error */
- perror("fwrite");
- exit(5);
-}
-
-static size_t fclose_orDie(FILE* file)
-{
- if (!fclose(file)) return 0;
- /* error */
- perror("fclose");
- exit(6);
-}
-
-static void fseek_orDie(FILE* file, long int offset, int origin) {
- if (!fseek(file, offset, origin)) {
- if (!fflush(file)) return;
- }
- /* error */
- perror("fseek");
- exit(7);
-}
-
-struct sum_job {
- const char* fname;
- unsigned long long sum;
- unsigned frameNb;
- int done;
-};
-
-static void sumFrame(void* opaque)
-{
- struct sum_job* job = (struct sum_job*)opaque;
- job->done = 0;
-
- FILE* const fin = fopen_orDie(job->fname, "rb");
-
- ZSTD_seekable* const seekable = ZSTD_seekable_create();
- if (seekable==NULL) { fprintf(stderr, "ZSTD_seekable_create() error \n"); exit(10); }
-
- size_t const initResult = ZSTD_seekable_initFile(seekable, fin);
- if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_seekable_init() error : %s \n", ZSTD_getErrorName(initResult)); exit(11); }
-
- size_t const frameSize = ZSTD_seekable_getFrameDecompressedSize(seekable, job->frameNb);
- unsigned char* data = malloc_orDie(frameSize);
-
- size_t result = ZSTD_seekable_decompressFrame(seekable, data, frameSize, job->frameNb);
- if (ZSTD_isError(result)) { fprintf(stderr, "ZSTD_seekable_decompressFrame() error : %s \n", ZSTD_getErrorName(result)); exit(12); }
-
- unsigned long long sum = 0;
- size_t i;
- for (i = 0; i < frameSize; i++) {
- sum += data[i];
- }
- job->sum = sum;
- job->done = 1;
-
- fclose(fin);
- ZSTD_seekable_free(seekable);
- free(data);
-}
-
-static void sumFile_orDie(const char* fname, int nbThreads)
-{
- POOL_ctx* pool = POOL_create(nbThreads, nbThreads);
- if (pool == NULL) { fprintf(stderr, "POOL_create() error \n"); exit(9); }
-
- FILE* const fin = fopen_orDie(fname, "rb");
-
- ZSTD_seekable* const seekable = ZSTD_seekable_create();
- if (seekable==NULL) { fprintf(stderr, "ZSTD_seekable_create() error \n"); exit(10); }
-
- size_t const initResult = ZSTD_seekable_initFile(seekable, fin);
- if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_seekable_init() error : %s \n", ZSTD_getErrorName(initResult)); exit(11); }
-
- unsigned const numFrames = ZSTD_seekable_getNumFrames(seekable);
- struct sum_job* jobs = (struct sum_job*)malloc(numFrames * sizeof(struct sum_job));
-
- unsigned fnb;
- for (fnb = 0; fnb < numFrames; fnb++) {
- jobs[fnb] = (struct sum_job){ fname, 0, fnb, 0 };
- POOL_add(pool, sumFrame, &jobs[fnb]);
- }
-
- unsigned long long total = 0;
-
- for (fnb = 0; fnb < numFrames; fnb++) {
- while (!jobs[fnb].done) SLEEP(5); /* wake up every 5 milliseconds to check */
- total += jobs[fnb].sum;
- }
-
- printf("Sum: %llu\n", total);
-
- POOL_free(pool);
- ZSTD_seekable_free(seekable);
- fclose(fin);
- free(jobs);
-}
-
-
-int main(int argc, const char** argv)
-{
- const char* const exeName = argv[0];
-
- if (argc!=3) {
- fprintf(stderr, "wrong arguments\n");
- fprintf(stderr, "usage:\n");
- fprintf(stderr, "%s FILE NB_THREADS\n", exeName);
- return 1;
- }
-
- {
- const char* const inFilename = argv[1];
- int const nbThreads = atoi(argv[2]);
- sumFile_orDie(inFilename, nbThreads);
- }
-
- return 0;
-}
diff --git a/contrib/seekable_format/examples/seekable_compression.c b/contrib/seekable_format/examples/seekable_compression.c
deleted file mode 100644
index 9a331a89531e..000000000000
--- a/contrib/seekable_format/examples/seekable_compression.c
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * Copyright (c) 2017-present, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- */
-
-#include <stdlib.h> // malloc, free, exit, atoi
-#include <stdio.h> // fprintf, perror, feof, fopen, etc.
-#include <string.h> // strlen, memset, strcat
-#define ZSTD_STATIC_LINKING_ONLY
-#include <zstd.h> // presumes zstd library is installed
-
-#include "zstd_seekable.h"
-
-static void* malloc_orDie(size_t size)
-{
- void* const buff = malloc(size);
- if (buff) return buff;
- /* error */
- perror("malloc:");
- exit(1);
-}
-
-static FILE* fopen_orDie(const char *filename, const char *instruction)
-{
- FILE* const inFile = fopen(filename, instruction);
- if (inFile) return inFile;
- /* error */
- perror(filename);
- exit(3);
-}
-
-static size_t fread_orDie(void* buffer, size_t sizeToRead, FILE* file)
-{
- size_t const readSize = fread(buffer, 1, sizeToRead, file);
- if (readSize == sizeToRead) return readSize; /* good */
- if (feof(file)) return readSize; /* good, reached end of file */
- /* error */
- perror("fread");
- exit(4);
-}
-
-static size_t fwrite_orDie(const void* buffer, size_t sizeToWrite, FILE* file)
-{
- size_t const writtenSize = fwrite(buffer, 1, sizeToWrite, file);
- if (writtenSize == sizeToWrite) return sizeToWrite; /* good */
- /* error */
- perror("fwrite");
- exit(5);
-}
-
-static size_t fclose_orDie(FILE* file)
-{
- if (!fclose(file)) return 0;
- /* error */
- perror("fclose");
- exit(6);
-}
-
-static void compressFile_orDie(const char* fname, const char* outName, int cLevel, unsigned frameSize)
-{
- FILE* const fin = fopen_orDie(fname, "rb");
- FILE* const fout = fopen_orDie(outName, "wb");
- size_t const buffInSize = ZSTD_CStreamInSize(); /* can always read one full block */
- void* const buffIn = malloc_orDie(buffInSize);
- size_t const buffOutSize = ZSTD_CStreamOutSize(); /* can always flush a full block */
- void* const buffOut = malloc_orDie(buffOutSize);
-
- ZSTD_seekable_CStream* const cstream = ZSTD_seekable_createCStream();
- if (cstream==NULL) { fprintf(stderr, "ZSTD_seekable_createCStream() error \n"); exit(10); }
- size_t const initResult = ZSTD_seekable_initCStream(cstream, cLevel, 1, frameSize);
- if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_seekable_initCStream() error : %s \n", ZSTD_getErrorName(initResult)); exit(11); }
-
- size_t read, toRead = buffInSize;
- while( (read = fread_orDie(buffIn, toRead, fin)) ) {
- ZSTD_inBuffer input = { buffIn, read, 0 };
- while (input.pos < input.size) {
- ZSTD_outBuffer output = { buffOut, buffOutSize, 0 };
- toRead = ZSTD_seekable_compressStream(cstream, &output , &input); /* toRead is guaranteed to be <= ZSTD_CStreamInSize() */
- if (ZSTD_isError(toRead)) { fprintf(stderr, "ZSTD_seekable_compressStream() error : %s \n", ZSTD_getErrorName(toRead)); exit(12); }
- if (toRead > buffInSize) toRead = buffInSize; /* Safely handle case when `buffInSize` is manually changed to a value < ZSTD_CStreamInSize()*/
- fwrite_orDie(buffOut, output.pos, fout);
- }
- }
-
- while (1) {
- ZSTD_outBuffer output = { buffOut, buffOutSize, 0 };
- size_t const remainingToFlush = ZSTD_seekable_endStream(cstream, &output); /* close stream */
- if (ZSTD_isError(remainingToFlush)) { fprintf(stderr, "ZSTD_seekable_endStream() error : %s \n", ZSTD_getErrorName(remainingToFlush)); exit(13); }
- fwrite_orDie(buffOut, output.pos, fout);
- if (!remainingToFlush) break;
- }
-
- ZSTD_seekable_freeCStream(cstream);
- fclose_orDie(fout);
- fclose_orDie(fin);
- free(buffIn);
- free(buffOut);
-}
-
-static char* createOutFilename_orDie(const char* filename)
-{
- size_t const inL = strlen(filename);
- size_t const outL = inL + 5;
- void* outSpace = malloc_orDie(outL);
- memset(outSpace, 0, outL);
- strcat(outSpace, filename);
- strcat(outSpace, ".zst");
- return (char*)outSpace;
-}
-
-int main(int argc, const char** argv) {
- const char* const exeName = argv[0];
- if (argc!=3) {
- printf("wrong arguments\n");
- printf("usage:\n");
- printf("%s FILE FRAME_SIZE\n", exeName);
- return 1;
- }
-
- { const char* const inFileName = argv[1];
- unsigned const frameSize = (unsigned)atoi(argv[2]);
-
- char* const outFileName = createOutFilename_orDie(inFileName);
- compressFile_orDie(inFileName, outFileName, 5, frameSize);
- free(outFileName);
- }
-
- return 0;
-}
diff --git a/contrib/seekable_format/examples/seekable_decompression.c b/contrib/seekable_format/examples/seekable_decompression.c
deleted file mode 100644
index 7050e0fa5c64..000000000000
--- a/contrib/seekable_format/examples/seekable_decompression.c
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (c) 2017-present, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- */
-
-
-#include <stdlib.h> // malloc, exit
-#include <stdio.h> // fprintf, perror, feof
-#include <string.h> // strerror
-#include <errno.h> // errno
-#define ZSTD_STATIC_LINKING_ONLY
-#include <zstd.h> // presumes zstd library is installed
-#include <zstd_errors.h>
-
-#include "zstd_seekable.h"
-
-#define MIN(a, b) ((a) < (b) ? (a) : (b))
-
-static void* malloc_orDie(size_t size)
-{
- void* const buff = malloc(size);
- if (buff) return buff;
- /* error */
- perror("malloc");
- exit(1);
-}
-
-static void* realloc_orDie(void* ptr, size_t size)
-{
- ptr = realloc(ptr, size);
- if (ptr) return ptr;
- /* error */
- perror("realloc");
- exit(1);
-}
-
-static FILE* fopen_orDie(const char *filename, const char *instruction)
-{
- FILE* const inFile = fopen(filename, instruction);
- if (inFile) return inFile;
- /* error */
- perror(filename);
- exit(3);
-}
-
-static size_t fread_orDie(void* buffer, size_t sizeToRead, FILE* file)
-{
- size_t const readSize = fread(buffer, 1, sizeToRead, file);
- if (readSize == sizeToRead) return readSize; /* good */
- if (feof(file)) return readSize; /* good, reached end of file */
- /* error */
- perror("fread");
- exit(4);
-}
-
-static size_t fwrite_orDie(const void* buffer, size_t sizeToWrite, FILE* file)
-{
- size_t const writtenSize = fwrite(buffer, 1, sizeToWrite, file);
- if (writtenSize == sizeToWrite) return sizeToWrite; /* good */
- /* error */
- perror("fwrite");
- exit(5);
-}
-
-static size_t fclose_orDie(FILE* file)
-{
- if (!fclose(file)) return 0;
- /* error */
- perror("fclose");
- exit(6);
-}
-
-static void fseek_orDie(FILE* file, long int offset, int origin) {
- if (!fseek(file, offset, origin)) {
- if (!fflush(file)) return;
- }
- /* error */
- perror("fseek");
- exit(7);
-}
-
-
-static void decompressFile_orDie(const char* fname, off_t startOffset, off_t endOffset)
-{
- FILE* const fin = fopen_orDie(fname, "rb");
- FILE* const fout = stdout;
- size_t const buffOutSize = ZSTD_DStreamOutSize(); /* Guarantee to successfully flush at least one complete compressed block in all circumstances. */
- void* const buffOut = malloc_orDie(buffOutSize);
-
- ZSTD_seekable* const seekable = ZSTD_seekable_create();
- if (seekable==NULL) { fprintf(stderr, "ZSTD_seekable_create() error \n"); exit(10); }
-
- size_t const initResult = ZSTD_seekable_initFile(seekable, fin);
- if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_seekable_init() error : %s \n", ZSTD_getErrorName(initResult)); exit(11); }
-
- while (startOffset < endOffset) {
- size_t const result = ZSTD_seekable_decompress(seekable, buffOut, MIN(endOffset - startOffset, buffOutSize), startOffset);
-
- if (ZSTD_isError(result)) {
- fprintf(stderr, "ZSTD_seekable_decompress() error : %s \n",
- ZSTD_getErrorName(result));
- exit(12);
- }
- fwrite_orDie(buffOut, result, fout);
- startOffset += result;
- }
-
- ZSTD_seekable_free(seekable);
- fclose_orDie(fin);
- fclose_orDie(fout);
- free(buffOut);
-}
-
-
-int main(int argc, const char** argv)
-{
- const char* const exeName = argv[0];
-
- if (argc!=4) {
- fprintf(stderr, "wrong arguments\n");
- fprintf(stderr, "usage:\n");
- fprintf(stderr, "%s FILE START END\n", exeName);
- return 1;
- }
-
- {
- const char* const inFilename = argv[1];
- off_t const startOffset = atoll(argv[2]);
- off_t const endOffset = atoll(argv[3]);
- decompressFile_orDie(inFilename, startOffset, endOffset);
- }
-
- return 0;
-}
diff --git a/contrib/seekable_format/examples/seekable_decompression_mem.c b/contrib/seekable_format/examples/seekable_decompression_mem.c
deleted file mode 100644
index c36d2221f97e..000000000000
--- a/contrib/seekable_format/examples/seekable_decompression_mem.c
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Copyright (c) 2017-present, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- */
-
-
-#include <stdlib.h> // malloc, exit
-#include <stdio.h> // fprintf, perror, feof
-#include <string.h> // strerror
-#include <errno.h> // errno
-#define ZSTD_STATIC_LINKING_ONLY
-#include <zstd.h> // presumes zstd library is installed
-#include <zstd_errors.h>
-
-#include "zstd_seekable.h"
-
-#define MIN(a, b) ((a) < (b) ? (a) : (b))
-
-#define MAX_FILE_SIZE (8 * 1024 * 1024)
-
-static void* malloc_orDie(size_t size)
-{
- void* const buff = malloc(size);
- if (buff) return buff;
- /* error */
- perror("malloc");
- exit(1);
-}
-
-static void* realloc_orDie(void* ptr, size_t size)
-{
- ptr = realloc(ptr, size);
- if (ptr) return ptr;
- /* error */
- perror("realloc");
- exit(1);
-}
-
-static FILE* fopen_orDie(const char *filename, const char *instruction)
-{
- FILE* const inFile = fopen(filename, instruction);
- if (inFile) return inFile;
- /* error */
- perror(filename);
- exit(3);
-}
-
-static size_t fread_orDie(void* buffer, size_t sizeToRead, FILE* file)
-{
- size_t const readSize = fread(buffer, 1, sizeToRead, file);
- if (readSize == sizeToRead) return readSize; /* good */
- if (feof(file)) return readSize; /* good, reached end of file */
- /* error */
- perror("fread");
- exit(4);
-}
-
-static size_t fwrite_orDie(const void* buffer, size_t sizeToWrite, FILE* file)
-{
- size_t const writtenSize = fwrite(buffer, 1, sizeToWrite, file);
- if (writtenSize == sizeToWrite) return sizeToWrite; /* good */
- /* error */
- perror("fwrite");
- exit(5);
-}
-
-static size_t fclose_orDie(FILE* file)
-{
- if (!fclose(file)) return 0;
- /* error */
- perror("fclose");
- exit(6);
-}
-
-static void fseek_orDie(FILE* file, long int offset, int origin) {
- if (!fseek(file, offset, origin)) {
- if (!fflush(file)) return;
- }
- /* error */
- perror("fseek");
- exit(7);
-}
-
-
-static void decompressFile_orDie(const char* fname, off_t startOffset, off_t endOffset)
-{
- FILE* const fin = fopen_orDie(fname, "rb");
- FILE* const fout = stdout;
- // Just for demo purposes, assume file is <= MAX_FILE_SIZE
- void* const buffIn = malloc_orDie(MAX_FILE_SIZE);
- size_t const inSize = fread_orDie(buffIn, MAX_FILE_SIZE, fin);
- size_t const buffOutSize = ZSTD_DStreamOutSize(); /* Guarantee to successfully flush at least one complete compressed block in all circumstances. */
- void* const buffOut = malloc_orDie(buffOutSize);
-
- ZSTD_seekable* const seekable = ZSTD_seekable_create();
- if (seekable==NULL) { fprintf(stderr, "ZSTD_seekable_create() error \n"); exit(10); }
-
- size_t const initResult = ZSTD_seekable_initBuff(seekable, buffIn, inSize);
- if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_seekable_init() error : %s \n", ZSTD_getErrorName(initResult)); exit(11); }
-
- while (startOffset < endOffset) {
- size_t const result = ZSTD_seekable_decompress(seekable, buffOut, MIN(endOffset - startOffset, buffOutSize), startOffset);
-
- if (ZSTD_isError(result)) {
- fprintf(stderr, "ZSTD_seekable_decompress() error : %s \n",
- ZSTD_getErrorName(result));
- exit(12);
- }
- fwrite_orDie(buffOut, result, fout);
- startOffset += result;
- }
-
- ZSTD_seekable_free(seekable);
- fclose_orDie(fin);
- fclose_orDie(fout);
- free(buffIn);
- free(buffOut);
-}
-
-
-int main(int argc, const char** argv)
-{
- const char* const exeName = argv[0];
-
- if (argc!=4) {
- fprintf(stderr, "wrong arguments\n");
- fprintf(stderr, "usage:\n");
- fprintf(stderr, "%s FILE START END\n", exeName);
- return 1;
- }
-
- {
- const char* const inFilename = argv[1];
- off_t const startOffset = atoll(argv[2]);
- off_t const endOffset = atoll(argv[3]);
- decompressFile_orDie(inFilename, startOffset, endOffset);
- }
-
- return 0;
-}
diff --git a/contrib/seekable_format/zstd_seekable.h b/contrib/seekable_format/zstd_seekable.h
deleted file mode 100644
index 7ffd1ba0a72b..000000000000
--- a/contrib/seekable_format/zstd_seekable.h
+++ /dev/null
@@ -1,186 +0,0 @@
-#ifndef SEEKABLE_H
-#define SEEKABLE_H
-
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
-#include <stdio.h>
-#include "zstd.h" /* ZSTDLIB_API */
-
-
-#define ZSTD_seekTableFooterSize 9
-
-#define ZSTD_SEEKABLE_MAGICNUMBER 0x8F92EAB1
-
-#define ZSTD_SEEKABLE_MAXFRAMES 0x8000000U
-
-/* Limit the maximum size to avoid any potential issues storing the compressed size */
-#define ZSTD_SEEKABLE_MAX_FRAME_DECOMPRESSED_SIZE 0x80000000U
-
-/*-****************************************************************************
-* Seekable Format
-*
-* The seekable format splits the compressed data into a series of "frames",
-* each compressed individually so that decompression of a section in the
-* middle of an archive only requires zstd to decompress at most a frame's
-* worth of extra data, instead of the entire archive.
-******************************************************************************/
-
-typedef struct ZSTD_seekable_CStream_s ZSTD_seekable_CStream;
-typedef struct ZSTD_seekable_s ZSTD_seekable;
-
-/*-****************************************************************************
-* Seekable compression - HowTo
-* A ZSTD_seekable_CStream object is required to tracking streaming operation.
-* Use ZSTD_seekable_createCStream() and ZSTD_seekable_freeCStream() to create/
-* release resources.
-*
-* Streaming objects are reusable to avoid allocation and deallocation,
-* to start a new compression operation call ZSTD_seekable_initCStream() on the
-* compressor.
-*
-* Data streamed to the seekable compressor will automatically be split into
-* frames of size `maxFrameSize` (provided in ZSTD_seekable_initCStream()),
-* or if none is provided, will be cut off whenever ZSTD_seekable_endFrame() is
-* called or when the default maximum frame size (2GB) is reached.
-*
-* Use ZSTD_seekable_initCStream() to initialize a ZSTD_seekable_CStream object
-* for a new compression operation.
-* `maxFrameSize` indicates the size at which to automatically start a new
-* seekable frame. `maxFrameSize == 0` implies the default maximum size.
-* `checksumFlag` indicates whether or not the seek table should include frame
-* checksums on the uncompressed data for verification.
-* @return : a size hint for input to provide for compression, or an error code
-* checkable with ZSTD_isError()
-*
-* Use ZSTD_seekable_compressStream() repetitively to consume input stream.
-* The function will automatically update both `pos` fields.
-* Note that it may not consume the entire input, in which case `pos < size`,
-* and it's up to the caller to present again remaining data.
-* @return : a size hint, preferred nb of bytes to use as input for next
-* function call or an error code, which can be tested using
-* ZSTD_isError().
-* Note 1 : it's just a hint, to help latency a little, any other
-* value will work fine.
-*
-* At any time, call ZSTD_seekable_endFrame() to end the current frame and
-* start a new one.
-*
-* ZSTD_seekable_endStream() will end the current frame, and then write the seek
-* table so that decompressors can efficiently find compressed frames.
-* ZSTD_seekable_endStream() may return a number > 0 if it was unable to flush
-* all the necessary data to `output`. In this case, it should be called again
-* until all remaining data is flushed out and 0 is returned.
-******************************************************************************/
-
-/*===== Seekable compressor management =====*/
-ZSTDLIB_API ZSTD_seekable_CStream* ZSTD_seekable_createCStream(void);
-ZSTDLIB_API size_t ZSTD_seekable_freeCStream(ZSTD_seekable_CStream* zcs);
-
-/*===== Seekable compression functions =====*/
-ZSTDLIB_API size_t ZSTD_seekable_initCStream(ZSTD_seekable_CStream* zcs, int compressionLevel, int checksumFlag, unsigned maxFrameSize);
-ZSTDLIB_API size_t ZSTD_seekable_compressStream(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
-ZSTDLIB_API size_t ZSTD_seekable_endFrame(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output);
-ZSTDLIB_API size_t ZSTD_seekable_endStream(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output);
-
-/*= Raw seek table API
- * These functions allow for the seek table to be constructed directly.
- * This table can then be appended to a file of concatenated frames.
- * This allows the frames to be compressed independently, even in parallel,
- * and compiled together afterward into a seekable archive.
- *
- * Use ZSTD_seekable_createFrameLog() to allocate and initialize a tracking
- * structure.
- *
- * Call ZSTD_seekable_logFrame() once for each frame in the archive.
- * checksum is optional, and will not be used if checksumFlag was 0 when the
- * frame log was created. If present, it should be the least significant 32
- * bits of the XXH64 hash of the uncompressed data.
- *
- * Call ZSTD_seekable_writeSeekTable to serialize the data into a seek table.
- * If the entire table was written, the return value will be 0. Otherwise,
- * it will be equal to the number of bytes left to write. */
-typedef struct ZSTD_frameLog_s ZSTD_frameLog;
-ZSTDLIB_API ZSTD_frameLog* ZSTD_seekable_createFrameLog(int checksumFlag);
-ZSTDLIB_API size_t ZSTD_seekable_freeFrameLog(ZSTD_frameLog* fl);
-ZSTDLIB_API size_t ZSTD_seekable_logFrame(ZSTD_frameLog* fl, unsigned compressedSize, unsigned decompressedSize, unsigned checksum);
-ZSTDLIB_API size_t ZSTD_seekable_writeSeekTable(ZSTD_frameLog* fl, ZSTD_outBuffer* output);
-
-/*-****************************************************************************
-* Seekable decompression - HowTo
-* A ZSTD_seekable object is required to tracking the seekTable.
-*
-* Call ZSTD_seekable_init* to initialize a ZSTD_seekable object with the
-* the seek table provided in the input.
-* There are three modes for ZSTD_seekable_init:
-* - ZSTD_seekable_initBuff() : An in-memory API. The data contained in
-* `src` should be the entire seekable file, including the seek table.
-* `src` should be kept alive and unmodified until the ZSTD_seekable object
-* is freed or reset.
-* - ZSTD_seekable_initFile() : A simplified file API using stdio. fread and
-* fseek will be used to access the required data for building the seek
-* table and doing decompression operations. `src` should not be closed
-* or modified until the ZSTD_seekable object is freed or reset.
-* - ZSTD_seekable_initAdvanced() : A general API allowing the client to
-* provide its own read and seek callbacks.
-* + ZSTD_seekable_read() : read exactly `n` bytes into `buffer`.
-* Premature EOF should be treated as an error.
-* + ZSTD_seekable_seek() : seek the read head to `offset` from `origin`,
-* where origin is either SEEK_SET (beginning of
-* file), or SEEK_END (end of file).
-* Both functions should return a non-negative value in case of success, and a
-* negative value in case of failure. If implementing using this API and
-* stdio, be careful with files larger than 4GB and fseek. All of these
-* functions return an error code checkable with ZSTD_isError().
-*
-* Call ZSTD_seekable_decompress to decompress `dstSize` bytes at decompressed
-* offset `offset`. ZSTD_seekable_decompress may have to decompress the entire
-* prefix of the frame before the desired data if it has not already processed
-* this section. If ZSTD_seekable_decompress is called multiple times for a
-* consecutive range of data, it will efficiently retain the decompressor object
-* and avoid redecompressing frame prefixes. The return value is the number of
-* bytes decompressed, or an error code checkable with ZSTD_isError().
-*
-* The seek table access functions can be used to obtain the data contained
-* in the seek table. If frameIndex is larger than the value returned by
-* ZSTD_seekable_getNumFrames(), they will return error codes checkable with
-* ZSTD_isError(). Note that since the offset access functions return
-* unsigned long long instead of size_t, in this case they will instead return
-* the value ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE.
-******************************************************************************/
-
-/*===== Seekable decompressor management =====*/
-ZSTDLIB_API ZSTD_seekable* ZSTD_seekable_create(void);
-ZSTDLIB_API size_t ZSTD_seekable_free(ZSTD_seekable* zs);
-
-/*===== Seekable decompression functions =====*/
-ZSTDLIB_API size_t ZSTD_seekable_initBuff(ZSTD_seekable* zs, const void* src, size_t srcSize);
-ZSTDLIB_API size_t ZSTD_seekable_initFile(ZSTD_seekable* zs, FILE* src);
-ZSTDLIB_API size_t ZSTD_seekable_decompress(ZSTD_seekable* zs, void* dst, size_t dstSize, unsigned long long offset);
-ZSTDLIB_API size_t ZSTD_seekable_decompressFrame(ZSTD_seekable* zs, void* dst, size_t dstSize, unsigned frameIndex);
-
-#define ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE (0ULL-2)
-/*===== Seek Table access functions =====*/
-ZSTDLIB_API unsigned ZSTD_seekable_getNumFrames(ZSTD_seekable* const zs);
-ZSTDLIB_API unsigned long long ZSTD_seekable_getFrameCompressedOffset(ZSTD_seekable* const zs, unsigned frameIndex);
-ZSTDLIB_API unsigned long long ZSTD_seekable_getFrameDecompressedOffset(ZSTD_seekable* const zs, unsigned frameIndex);
-ZSTDLIB_API size_t ZSTD_seekable_getFrameCompressedSize(ZSTD_seekable* const zs, unsigned frameIndex);
-ZSTDLIB_API size_t ZSTD_seekable_getFrameDecompressedSize(ZSTD_seekable* const zs, unsigned frameIndex);
-ZSTDLIB_API unsigned ZSTD_seekable_offsetToFrameIndex(ZSTD_seekable* const zs, unsigned long long offset);
-
-/*===== Seekable advanced I/O API =====*/
-typedef int(ZSTD_seekable_read)(void* opaque, void* buffer, size_t n);
-typedef int(ZSTD_seekable_seek)(void* opaque, long long offset, int origin);
-typedef struct {
- void* opaque;
- ZSTD_seekable_read* read;
- ZSTD_seekable_seek* seek;
-} ZSTD_seekable_customFile;
-ZSTDLIB_API size_t ZSTD_seekable_initAdvanced(ZSTD_seekable* zs, ZSTD_seekable_customFile src);
-
-#if defined (__cplusplus)
-}
-#endif
-
-#endif
diff --git a/contrib/seekable_format/zstd_seekable_compression_format.md b/contrib/seekable_format/zstd_seekable_compression_format.md
deleted file mode 100644
index bf3080f7bbed..000000000000
--- a/contrib/seekable_format/zstd_seekable_compression_format.md
+++ /dev/null
@@ -1,116 +0,0 @@
-# Zstandard Seekable Format
-
-### Notices
-
-Copyright (c) 2017-present Facebook, Inc.
-
-Permission is granted to copy and distribute this document
-for any purpose and without charge,
-including translations into other languages
-and incorporation into compilations,
-provided that the copyright notice and this notice are preserved,
-and that any substantive changes or deletions from the original
-are clearly marked.
-Distribution of this document is unlimited.
-
-### Version
-0.1.0 (11/04/17)
-
-## Introduction
-This document defines a format for compressed data to be stored so that subranges of the data can be efficiently decompressed without requiring the entire document to be decompressed.
-This is done by splitting up the input data into frames,
-each of which are compressed independently,
-and so can be decompressed independently.
-Decompression then takes advantage of a provided 'seek table', which allows the decompressor to immediately jump to the desired data. This is done in a way that is compatible with the original Zstandard format by placing the seek table in a Zstandard skippable frame.
-
-### Overall conventions
-In this document:
-- square brackets i.e. `[` and `]` are used to indicate optional fields or parameters.
-- the naming convention for identifiers is `Mixed_Case_With_Underscores`
-- All numeric fields are little-endian unless specified otherwise
-
-## Format
-
-The format consists of a number of frames (Zstandard compressed frames and skippable frames), followed by a final skippable frame at the end containing the seek table.
-
-### Seek Table Format
-The structure of the seek table frame is as follows:
-
-|`Skippable_Magic_Number`|`Frame_Size`|`[Seek_Table_Entries]`|`Seek_Table_Footer`|
-|------------------------|------------|----------------------|-------------------|
-| 4 bytes | 4 bytes | 8-12 bytes each | 9 bytes |
-
-__`Skippable_Magic_Number`__
-
-Value : 0x184D2A5E.
-This is for compatibility with [Zstandard skippable frames].
-Since it is legal for other Zstandard skippable frames to use the same
-magic number, it is not recommended for a decoder to recognize frames
-solely on this.
-
-__`Frame_Size`__
-
-The total size of the skippable frame, not including the `Skippable_Magic_Number` or `Frame_Size`.
-This is for compatibility with [Zstandard skippable frames].
-
-[Zstandard skippable frames]: https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#skippable-frames
-
-#### `Seek_Table_Footer`
-The seek table footer format is as follows:
-
-|`Number_Of_Frames`|`Seek_Table_Descriptor`|`Seekable_Magic_Number`|
-|------------------|-----------------------|-----------------------|
-| 4 bytes | 1 byte | 4 bytes |
-
-__`Seekable_Magic_Number`__
-
-Value : 0x8F92EAB1.
-This value must be the last bytes present in the compressed file so that decoders
-can efficiently find it and determine if there is an actual seek table present.
-
-__`Number_Of_Frames`__
-
-The number of stored frames in the data.
-
-__`Seek_Table_Descriptor`__
-
-A bitfield describing the format of the seek table.
-
-| Bit number | Field name |
-| ---------- | ---------- |
-| 7 | `Checksum_Flag` |
-| 6-2 | `Reserved_Bits` |
-| 1-0 | `Unused_Bits` |
-
-While only `Checksum_Flag` currently exists, there are 7 other bits in this field that can be used for future changes to the format,
-for example the addition of inline dictionaries.
-
-__`Checksum_Flag`__
-
-If the checksum flag is set, each of the seek table entries contains a 4 byte checksum of the uncompressed data contained in its frame.
-
-`Reserved_Bits` are not currently used but may be used in the future for breaking changes, so a compliant decoder should ensure they are set to 0. `Unused_Bits` may be used in the future for non-breaking changes, so a compliant decoder should not interpret these bits.
-
-#### __`Seek_Table_Entries`__
-
-`Seek_Table_Entries` consists of `Number_Of_Frames` (one for each frame in the data, not including the seek table frame) entries of the following form, in sequence:
-
-|`Compressed_Size`|`Decompressed_Size`|`[Checksum]`|
-|-----------------|-------------------|------------|
-| 4 bytes | 4 bytes | 4 bytes |
-
-__`Compressed_Size`__
-
-The compressed size of the frame.
-The cumulative sum of the `Compressed_Size` fields of frames `0` to `i` gives the offset in the compressed file of frame `i+1`.
-
-__`Decompressed_Size`__
-
-The size of the decompressed data contained in the frame. For skippable or otherwise empty frames, this value is 0.
-
-__`Checksum`__
-
-Only present if `Checksum_Flag` is set in the `Seek_Table_Descriptor`. Value : the least significant 32 bits of the XXH64 digest of the uncompressed data, stored in little-endian format.
-
-## Version Changes
-- 0.1.0: initial version
diff --git a/contrib/seekable_format/zstdseek_compress.c b/contrib/seekable_format/zstdseek_compress.c
deleted file mode 100644
index 5a75714fac5b..000000000000
--- a/contrib/seekable_format/zstdseek_compress.c
+++ /dev/null
@@ -1,369 +0,0 @@
-/*
- * Copyright (c) 2017-present, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- */
-
-#include <stdlib.h> /* malloc, free */
-#include <limits.h> /* UINT_MAX */
-#include <assert.h>
-
-#define XXH_STATIC_LINKING_ONLY
-#define XXH_NAMESPACE ZSTD_
-#include "xxhash.h"
-
-#define ZSTD_STATIC_LINKING_ONLY
-#include "zstd.h"
-#include "zstd_errors.h"
-#include "mem.h"
-#include "zstd_seekable.h"
-
-#define CHECK_Z(f) { size_t const ret = (f); if (ret != 0) return ret; }
-
-#undef ERROR
-#define ERROR(name) ((size_t)-ZSTD_error_##name)
-
-#undef MIN
-#undef MAX
-#define MIN(a, b) ((a) < (b) ? (a) : (b))
-#define MAX(a, b) ((a) > (b) ? (a) : (b))
-
-typedef struct {
- U32 cSize;
- U32 dSize;
- U32 checksum;
-} framelogEntry_t;
-
-struct ZSTD_frameLog_s {
- framelogEntry_t* entries;
- U32 size;
- U32 capacity;
-
- int checksumFlag;
-
- /* for use when streaming out the seek table */
- U32 seekTablePos;
- U32 seekTableIndex;
-} framelog_t;
-
-struct ZSTD_seekable_CStream_s {
- ZSTD_CStream* cstream;
- ZSTD_frameLog framelog;
-
- U32 frameCSize;
- U32 frameDSize;
-
- XXH64_state_t xxhState;
-
- U32 maxFrameSize;
-
- int writingSeekTable;
-};
-
-size_t ZSTD_seekable_frameLog_allocVec(ZSTD_frameLog* fl)
-{
- /* allocate some initial space */
- size_t const FRAMELOG_STARTING_CAPACITY = 16;
- fl->entries = (framelogEntry_t*)malloc(
- sizeof(framelogEntry_t) * FRAMELOG_STARTING_CAPACITY);
- if (fl->entries == NULL) return ERROR(memory_allocation);
- fl->capacity = FRAMELOG_STARTING_CAPACITY;
-
- return 0;
-}
-
-size_t ZSTD_seekable_frameLog_freeVec(ZSTD_frameLog* fl)
-{
- if (fl != NULL) free(fl->entries);
- return 0;
-}
-
-ZSTD_frameLog* ZSTD_seekable_createFrameLog(int checksumFlag)
-{
- ZSTD_frameLog* fl = malloc(sizeof(ZSTD_frameLog));
- if (fl == NULL) return NULL;
-
- if (ZSTD_isError(ZSTD_seekable_frameLog_allocVec(fl))) {
- free(fl);
- return NULL;
- }
-
- fl->checksumFlag = checksumFlag;
- fl->seekTablePos = 0;
- fl->seekTableIndex = 0;
- fl->size = 0;
-
- return fl;
-}
-
-size_t ZSTD_seekable_freeFrameLog(ZSTD_frameLog* fl)
-{
- ZSTD_seekable_frameLog_freeVec(fl);
- free(fl);
- return 0;
-}
-
-ZSTD_seekable_CStream* ZSTD_seekable_createCStream()
-{
- ZSTD_seekable_CStream* zcs = malloc(sizeof(ZSTD_seekable_CStream));
-
- if (zcs == NULL) return NULL;
-
- memset(zcs, 0, sizeof(*zcs));
-
- zcs->cstream = ZSTD_createCStream();
- if (zcs->cstream == NULL) goto failed1;
-
- if (ZSTD_isError(ZSTD_seekable_frameLog_allocVec(&zcs->framelog))) goto failed2;
-
- return zcs;
-
-failed2:
- ZSTD_freeCStream(zcs->cstream);
-failed1:
- free(zcs);
- return NULL;
-}
-
-size_t ZSTD_seekable_freeCStream(ZSTD_seekable_CStream* zcs)
-{
- if (zcs == NULL) return 0; /* support free on null */
- ZSTD_freeCStream(zcs->cstream);
- ZSTD_seekable_frameLog_freeVec(&zcs->framelog);
- free(zcs);
-
- return 0;
-}
-
-size_t ZSTD_seekable_initCStream(ZSTD_seekable_CStream* zcs,
- int compressionLevel,
- int checksumFlag,
- unsigned maxFrameSize)
-{
- zcs->framelog.size = 0;
- zcs->frameCSize = 0;
- zcs->frameDSize = 0;
-
- /* make sure maxFrameSize has a reasonable value */
- if (maxFrameSize > ZSTD_SEEKABLE_MAX_FRAME_DECOMPRESSED_SIZE) {
- return ERROR(frameParameter_unsupported);
- }
-
- zcs->maxFrameSize = maxFrameSize
- ? maxFrameSize
- : ZSTD_SEEKABLE_MAX_FRAME_DECOMPRESSED_SIZE;
-
- zcs->framelog.checksumFlag = checksumFlag;
- if (zcs->framelog.checksumFlag) {
- XXH64_reset(&zcs->xxhState, 0);
- }
-
- zcs->framelog.seekTablePos = 0;
- zcs->framelog.seekTableIndex = 0;
- zcs->writingSeekTable = 0;
-
- return ZSTD_initCStream(zcs->cstream, compressionLevel);
-}
-
-size_t ZSTD_seekable_logFrame(ZSTD_frameLog* fl,
- unsigned compressedSize,
- unsigned decompressedSize,
- unsigned checksum)
-{
- if (fl->size == ZSTD_SEEKABLE_MAXFRAMES)
- return ERROR(frameIndex_tooLarge);
-
- /* grow the buffer if required */
- if (fl->size == fl->capacity) {
- /* exponential size increase for constant amortized runtime */
- size_t const newCapacity = fl->capacity * 2;
- framelogEntry_t* const newEntries = realloc(fl->entries,
- sizeof(framelogEntry_t) * newCapacity);
-
- if (newEntries == NULL) return ERROR(memory_allocation);
-
- fl->entries = newEntries;
- assert(newCapacity <= UINT_MAX);
- fl->capacity = (U32)newCapacity;
- }
-
- fl->entries[fl->size] = (framelogEntry_t){
- compressedSize, decompressedSize, checksum
- };
- fl->size++;
-
- return 0;
-}
-
-size_t ZSTD_seekable_endFrame(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output)
-{
- size_t const prevOutPos = output->pos;
- /* end the frame */
- size_t ret = ZSTD_endStream(zcs->cstream, output);
-
- zcs->frameCSize += output->pos - prevOutPos;
-
- /* need to flush before doing the rest */
- if (ret) return ret;
-
- /* frame done */
-
- /* store the frame data for later */
- ret = ZSTD_seekable_logFrame(
- &zcs->framelog, zcs->frameCSize, zcs->frameDSize,
- zcs->framelog.checksumFlag
- ? XXH64_digest(&zcs->xxhState) & 0xFFFFFFFFU
- : 0);
- if (ret) return ret;
-
- /* reset for the next frame */
- zcs->frameCSize = 0;
- zcs->frameDSize = 0;
-
- ZSTD_resetCStream(zcs->cstream, 0);
- if (zcs->framelog.checksumFlag)
- XXH64_reset(&zcs->xxhState, 0);
-
- return 0;
-}
-
-size_t ZSTD_seekable_compressStream(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
-{
- const BYTE* const inBase = (const BYTE*) input->src + input->pos;
- size_t inLen = input->size - input->pos;
-
- inLen = MIN(inLen, (size_t)(zcs->maxFrameSize - zcs->frameDSize));
-
- /* if we haven't finished flushing the last frame, don't start writing a new one */
- if (inLen > 0) {
- ZSTD_inBuffer inTmp = { inBase, inLen, 0 };
- size_t const prevOutPos = output->pos;
-
- size_t const ret = ZSTD_compressStream(zcs->cstream, output, &inTmp);
-
- if (zcs->framelog.checksumFlag) {
- XXH64_update(&zcs->xxhState, inBase, inTmp.pos);
- }
-
- zcs->frameCSize += output->pos - prevOutPos;
- zcs->frameDSize += inTmp.pos;
-
- input->pos += inTmp.pos;
-
- if (ZSTD_isError(ret)) return ret;
- }
-
- if (zcs->maxFrameSize == zcs->frameDSize) {
- /* log the frame and start over */
- size_t const ret = ZSTD_seekable_endFrame(zcs, output);
- if (ZSTD_isError(ret)) return ret;
-
- /* get the client ready for the next frame */
- return (size_t)zcs->maxFrameSize;
- }
-
- return (size_t)(zcs->maxFrameSize - zcs->frameDSize);
-}
-
-static inline size_t ZSTD_seekable_seekTableSize(const ZSTD_frameLog* fl)
-{
- size_t const sizePerFrame = 8 + (fl->checksumFlag?4:0);
- size_t const seekTableLen = ZSTD_SKIPPABLEHEADERSIZE +
- sizePerFrame * fl->size +
- ZSTD_seekTableFooterSize;
-
- return seekTableLen;
-}
-
-static inline size_t ZSTD_stwrite32(ZSTD_frameLog* fl,
- ZSTD_outBuffer* output, U32 const value,
- U32 const offset)
-{
- if (fl->seekTablePos < offset + 4) {
- BYTE tmp[4]; /* so that we can work with buffers too small to write a whole word to */
- size_t const lenWrite =
- MIN(output->size - output->pos, offset + 4 - fl->seekTablePos);
- MEM_writeLE32(tmp, value);
- memcpy((BYTE*)output->dst + output->pos,
- tmp + (fl->seekTablePos - offset), lenWrite);
- output->pos += lenWrite;
- fl->seekTablePos += lenWrite;
-
- if (lenWrite < 4) return ZSTD_seekable_seekTableSize(fl) - fl->seekTablePos;
- }
- return 0;
-}
-
-size_t ZSTD_seekable_writeSeekTable(ZSTD_frameLog* fl, ZSTD_outBuffer* output)
-{
- /* seekTableIndex: the current index in the table and
- * seekTableSize: the amount of the table written so far
- *
- * This function is written this way so that if it has to return early
- * because of a small buffer, it can keep going where it left off.
- */
-
- size_t const sizePerFrame = 8 + (fl->checksumFlag?4:0);
- size_t const seekTableLen = ZSTD_seekable_seekTableSize(fl);
-
- CHECK_Z(ZSTD_stwrite32(fl, output, ZSTD_MAGIC_SKIPPABLE_START | 0xE, 0));
- assert(seekTableLen <= (size_t)UINT_MAX);
- CHECK_Z(ZSTD_stwrite32(fl, output, (U32)seekTableLen - ZSTD_SKIPPABLEHEADERSIZE, 4));
-
- while (fl->seekTableIndex < fl->size) {
- unsigned long long const start = ZSTD_SKIPPABLEHEADERSIZE + sizePerFrame * fl->seekTableIndex;
- assert(start + 8 <= UINT_MAX);
- CHECK_Z(ZSTD_stwrite32(fl, output,
- fl->entries[fl->seekTableIndex].cSize,
- (U32)start + 0));
-
- CHECK_Z(ZSTD_stwrite32(fl, output,
- fl->entries[fl->seekTableIndex].dSize,
- (U32)start + 4));
-
- if (fl->checksumFlag) {
- CHECK_Z(ZSTD_stwrite32(
- fl, output, fl->entries[fl->seekTableIndex].checksum,
- (U32)start + 8));
- }
-
- fl->seekTableIndex++;
- }
-
- assert(seekTableLen <= UINT_MAX);
- CHECK_Z(ZSTD_stwrite32(fl, output, fl->size,
- (U32)seekTableLen - ZSTD_seekTableFooterSize));
-
- if (output->size - output->pos < 1) return seekTableLen - fl->seekTablePos;
- if (fl->seekTablePos < seekTableLen - 4) {
- BYTE sfd = 0;
- sfd |= (fl->checksumFlag) << 7;
-
- ((BYTE*)output->dst)[output->pos] = sfd;
- output->pos++;
- fl->seekTablePos++;
- }
-
- CHECK_Z(ZSTD_stwrite32(fl, output, ZSTD_SEEKABLE_MAGICNUMBER,
- (U32)seekTableLen - 4));
-
- if (fl->seekTablePos != seekTableLen) return ERROR(GENERIC);
- return 0;
-}
-
-size_t ZSTD_seekable_endStream(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output)
-{
- if (!zcs->writingSeekTable && zcs->frameDSize) {
- const size_t endFrame = ZSTD_seekable_endFrame(zcs, output);
- if (ZSTD_isError(endFrame)) return endFrame;
- /* return an accurate size hint */
- if (endFrame) return endFrame + ZSTD_seekable_seekTableSize(&zcs->framelog);
- }
-
- zcs->writingSeekTable = 1;
-
- return ZSTD_seekable_writeSeekTable(&zcs->framelog, output);
-}
diff --git a/contrib/seekable_format/zstdseek_decompress.c b/contrib/seekable_format/zstdseek_decompress.c
deleted file mode 100644
index abfd1e902717..000000000000
--- a/contrib/seekable_format/zstdseek_decompress.c
+++ /dev/null
@@ -1,467 +0,0 @@
-/*
- * Copyright (c) 2017-present, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
-/* *********************************************************
-* Turn on Large Files support (>4GB) for 32-bit Linux/Unix
-***********************************************************/
-#if !defined(__64BIT__) || defined(__MINGW32__) /* No point defining Large file for 64 bit but MinGW-w64 requires it */
-# if !defined(_FILE_OFFSET_BITS)
-# define _FILE_OFFSET_BITS 64 /* turn off_t into a 64-bit type for ftello, fseeko */
-# endif
-# if !defined(_LARGEFILE_SOURCE) /* obsolete macro, replaced with _FILE_OFFSET_BITS */
-# define _LARGEFILE_SOURCE 1 /* Large File Support extension (LFS) - fseeko, ftello */
-# endif
-# if defined(_AIX) || defined(__hpux)
-# define _LARGE_FILES /* Large file support on 32-bits AIX and HP-UX */
-# endif
-#endif
-
-/* ************************************************************
-* Avoid fseek()'s 2GiB barrier with MSVC, macOS, *BSD, MinGW
-***************************************************************/
-#if defined(_MSC_VER) && _MSC_VER >= 1400
-# define LONG_SEEK _fseeki64
-#elif !defined(__64BIT__) && (PLATFORM_POSIX_VERSION >= 200112L) /* No point defining Large file for 64 bit */
-# define LONG_SEEK fseeko
-#elif defined(__MINGW32__) && !defined(__STRICT_ANSI__) && !defined(__NO_MINGW_LFS) && defined(__MSVCRT__)
-# define LONG_SEEK fseeko64
-#elif defined(_WIN32) && !defined(__DJGPP__)
-# include <windows.h>
- static int LONG_SEEK(FILE* file, __int64 offset, int origin) {
- LARGE_INTEGER off;
- DWORD method;
- off.QuadPart = offset;
- if (origin == SEEK_END)
- method = FILE_END;
- else if (origin == SEEK_CUR)
- method = FILE_CURRENT;
- else
- method = FILE_BEGIN;
-
- if (SetFilePointerEx((HANDLE) _get_osfhandle(_fileno(file)), off, NULL, method))
- return 0;
- else
- return -1;
- }
-#else
-# define LONG_SEEK fseek
-#endif
-
-#include <stdlib.h> /* malloc, free */
-#include <stdio.h> /* FILE* */
-#include <limits.h> /* UNIT_MAX */
-#include <assert.h>
-
-#define XXH_STATIC_LINKING_ONLY
-#define XXH_NAMESPACE ZSTD_
-#include "xxhash.h"
-
-#define ZSTD_STATIC_LINKING_ONLY
-#include "zstd.h"
-#include "zstd_errors.h"
-#include "mem.h"
-#include "zstd_seekable.h"
-
-#undef ERROR
-#define ERROR(name) ((size_t)-ZSTD_error_##name)
-
-#define CHECK_IO(f) { int const errcod = (f); if (errcod < 0) return ERROR(seekableIO); }
-
-#undef MIN
-#undef MAX
-#define MIN(a, b) ((a) < (b) ? (a) : (b))
-#define MAX(a, b) ((a) > (b) ? (a) : (b))
-
-/* Special-case callbacks for FILE* and in-memory modes, so that we can treat
- * them the same way as the advanced API */
-static int ZSTD_seekable_read_FILE(void* opaque, void* buffer, size_t n)
-{
- size_t const result = fread(buffer, 1, n, (FILE*)opaque);
- if (result != n) {
- return -1;
- }
- return 0;
-}
-
-static int ZSTD_seekable_seek_FILE(void* opaque, long long offset, int origin)
-{
- int const ret = LONG_SEEK((FILE*)opaque, offset, origin);
- if (ret) return ret;
- return fflush((FILE*)opaque);
-}
-
-typedef struct {
- const void *ptr;
- size_t size;
- size_t pos;
-} buffWrapper_t;
-
-static int ZSTD_seekable_read_buff(void* opaque, void* buffer, size_t n)
-{
- buffWrapper_t* buff = (buffWrapper_t*) opaque;
- if (buff->pos + n > buff->size) return -1;
- memcpy(buffer, (const BYTE*)buff->ptr + buff->pos, n);
- buff->pos += n;
- return 0;
-}
-
-static int ZSTD_seekable_seek_buff(void* opaque, long long offset, int origin)
-{
- buffWrapper_t* const buff = (buffWrapper_t*) opaque;
- unsigned long long newOffset;
- switch (origin) {
- case SEEK_SET:
- newOffset = offset;
- break;
- case SEEK_CUR:
- newOffset = (unsigned long long)buff->pos + offset;
- break;
- case SEEK_END:
- newOffset = (unsigned long long)buff->size + offset;
- break;
- default:
- assert(0); /* not possible */
- }
- if (newOffset > buff->size) {
- return -1;
- }
- buff->pos = newOffset;
- return 0;
-}
-
-typedef struct {
- U64 cOffset;
- U64 dOffset;
- U32 checksum;
-} seekEntry_t;
-
-typedef struct {
- seekEntry_t* entries;
- size_t tableLen;
-
- int checksumFlag;
-} seekTable_t;
-
-#define SEEKABLE_BUFF_SIZE ZSTD_BLOCKSIZE_MAX
-
-struct ZSTD_seekable_s {
- ZSTD_DStream* dstream;
- seekTable_t seekTable;
- ZSTD_seekable_customFile src;
-
- U64 decompressedOffset;
- U32 curFrame;
-
- BYTE inBuff[SEEKABLE_BUFF_SIZE]; /* need to do our own input buffering */
- BYTE outBuff[SEEKABLE_BUFF_SIZE]; /* so we can efficiently decompress the
- starts of chunks before we get to the
- desired section */
- ZSTD_inBuffer in; /* maintain continuity across ZSTD_seekable_decompress operations */
- buffWrapper_t buffWrapper; /* for `src.opaque` in in-memory mode */
-
- XXH64_state_t xxhState;
-};
-
-ZSTD_seekable* ZSTD_seekable_create(void)
-{
- ZSTD_seekable* zs = malloc(sizeof(ZSTD_seekable));
-
- if (zs == NULL) return NULL;
-
- /* also initializes stage to zsds_init */
- memset(zs, 0, sizeof(*zs));
-
- zs->dstream = ZSTD_createDStream();
- if (zs->dstream == NULL) {
- free(zs);
- return NULL;
- }
-
- return zs;
-}
-
-size_t ZSTD_seekable_free(ZSTD_seekable* zs)
-{
- if (zs == NULL) return 0; /* support free on null */
- ZSTD_freeDStream(zs->dstream);
- free(zs->seekTable.entries);
- free(zs);
-
- return 0;
-}
-
-/** ZSTD_seekable_offsetToFrameIndex() :
- * Performs a binary search to find the last frame with a decompressed offset
- * <= pos
- * @return : the frame's index */
-unsigned ZSTD_seekable_offsetToFrameIndex(ZSTD_seekable* const zs, unsigned long long pos)
-{
- U32 lo = 0;
- U32 hi = (U32)zs->seekTable.tableLen;
- assert(zs->seekTable.tableLen <= UINT_MAX);
-
- if (pos >= zs->seekTable.entries[zs->seekTable.tableLen].dOffset) {
- return (U32)zs->seekTable.tableLen;
- }
-
- while (lo + 1 < hi) {
- U32 const mid = lo + ((hi - lo) >> 1);
- if (zs->seekTable.entries[mid].dOffset <= pos) {
- lo = mid;
- } else {
- hi = mid;
- }
- }
- return lo;
-}
-
-unsigned ZSTD_seekable_getNumFrames(ZSTD_seekable* const zs)
-{
- assert(zs->seekTable.tableLen <= UINT_MAX);
- return (unsigned)zs->seekTable.tableLen;
-}
-
-unsigned long long ZSTD_seekable_getFrameCompressedOffset(ZSTD_seekable* const zs, unsigned frameIndex)
-{
- if (frameIndex >= zs->seekTable.tableLen) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE;
- return zs->seekTable.entries[frameIndex].cOffset;
-}
-
-unsigned long long ZSTD_seekable_getFrameDecompressedOffset(ZSTD_seekable* const zs, unsigned frameIndex)
-{
- if (frameIndex >= zs->seekTable.tableLen) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE;
- return zs->seekTable.entries[frameIndex].dOffset;
-}
-
-size_t ZSTD_seekable_getFrameCompressedSize(ZSTD_seekable* const zs, unsigned frameIndex)
-{
- if (frameIndex >= zs->seekTable.tableLen) return ERROR(frameIndex_tooLarge);
- return zs->seekTable.entries[frameIndex + 1].cOffset -
- zs->seekTable.entries[frameIndex].cOffset;
-}
-
-size_t ZSTD_seekable_getFrameDecompressedSize(ZSTD_seekable* const zs, unsigned frameIndex)
-{
- if (frameIndex > zs->seekTable.tableLen) return ERROR(frameIndex_tooLarge);
- return zs->seekTable.entries[frameIndex + 1].dOffset -
- zs->seekTable.entries[frameIndex].dOffset;
-}
-
-static size_t ZSTD_seekable_loadSeekTable(ZSTD_seekable* zs)
-{
- int checksumFlag;
- ZSTD_seekable_customFile src = zs->src;
- /* read the footer, fixed size */
- CHECK_IO(src.seek(src.opaque, -(int)ZSTD_seekTableFooterSize, SEEK_END));
- CHECK_IO(src.read(src.opaque, zs->inBuff, ZSTD_seekTableFooterSize));
-
- if (MEM_readLE32(zs->inBuff + 5) != ZSTD_SEEKABLE_MAGICNUMBER) {
- return ERROR(prefix_unknown);
- }
-
- { BYTE const sfd = zs->inBuff[4];
- checksumFlag = sfd >> 7;
-
- /* check reserved bits */
- if ((checksumFlag >> 2) & 0x1f) {
- return ERROR(corruption_detected);
- }
- }
-
- { U32 const numFrames = MEM_readLE32(zs->inBuff);
- U32 const sizePerEntry = 8 + (checksumFlag?4:0);
- U32 const tableSize = sizePerEntry * numFrames;
- U32 const frameSize = tableSize + ZSTD_seekTableFooterSize + ZSTD_SKIPPABLEHEADERSIZE;
-
- U32 remaining = frameSize - ZSTD_seekTableFooterSize; /* don't need to re-read footer */
- {
- U32 const toRead = MIN(remaining, SEEKABLE_BUFF_SIZE);
-
- CHECK_IO(src.seek(src.opaque, -(S64)frameSize, SEEK_END));
- CHECK_IO(src.read(src.opaque, zs->inBuff, toRead));
-
- remaining -= toRead;
- }
-
- if (MEM_readLE32(zs->inBuff) != (ZSTD_MAGIC_SKIPPABLE_START | 0xE)) {
- return ERROR(prefix_unknown);
- }
- if (MEM_readLE32(zs->inBuff+4) + ZSTD_SKIPPABLEHEADERSIZE != frameSize) {
- return ERROR(prefix_unknown);
- }
-
- { /* Allocate an extra entry at the end so that we can do size
- * computations on the last element without special case */
- seekEntry_t* entries = (seekEntry_t*)malloc(sizeof(seekEntry_t) * (numFrames + 1));
-
- U32 idx = 0;
- U32 pos = 8;
-
-
- U64 cOffset = 0;
- U64 dOffset = 0;
-
- if (!entries) {
- free(entries);
- return ERROR(memory_allocation);
- }
-
- /* compute cumulative positions */
- for (; idx < numFrames; idx++) {
- if (pos + sizePerEntry > SEEKABLE_BUFF_SIZE) {
- U32 const offset = SEEKABLE_BUFF_SIZE - pos;
- U32 const toRead = MIN(remaining, SEEKABLE_BUFF_SIZE - offset);
- memmove(zs->inBuff, zs->inBuff + pos, offset); /* move any data we haven't read yet */
- CHECK_IO(src.read(src.opaque, zs->inBuff+offset, toRead));
- remaining -= toRead;
- pos = 0;
- }
- entries[idx].cOffset = cOffset;
- entries[idx].dOffset = dOffset;
-
- cOffset += MEM_readLE32(zs->inBuff + pos);
- pos += 4;
- dOffset += MEM_readLE32(zs->inBuff + pos);
- pos += 4;
- if (checksumFlag) {
- entries[idx].checksum = MEM_readLE32(zs->inBuff + pos);
- pos += 4;
- }
- }
- entries[numFrames].cOffset = cOffset;
- entries[numFrames].dOffset = dOffset;
-
- zs->seekTable.entries = entries;
- zs->seekTable.tableLen = numFrames;
- zs->seekTable.checksumFlag = checksumFlag;
- return 0;
- }
- }
-}
-
-size_t ZSTD_seekable_initBuff(ZSTD_seekable* zs, const void* src, size_t srcSize)
-{
- zs->buffWrapper = (buffWrapper_t){src, srcSize, 0};
- { ZSTD_seekable_customFile srcFile = {&zs->buffWrapper,
- &ZSTD_seekable_read_buff,
- &ZSTD_seekable_seek_buff};
- return ZSTD_seekable_initAdvanced(zs, srcFile); }
-}
-
-size_t ZSTD_seekable_initFile(ZSTD_seekable* zs, FILE* src)
-{
- ZSTD_seekable_customFile srcFile = {src, &ZSTD_seekable_read_FILE,
- &ZSTD_seekable_seek_FILE};
- return ZSTD_seekable_initAdvanced(zs, srcFile);
-}
-
-size_t ZSTD_seekable_initAdvanced(ZSTD_seekable* zs, ZSTD_seekable_customFile src)
-{
- zs->src = src;
-
- { const size_t seekTableInit = ZSTD_seekable_loadSeekTable(zs);
- if (ZSTD_isError(seekTableInit)) return seekTableInit; }
-
- zs->decompressedOffset = (U64)-1;
- zs->curFrame = (U32)-1;
-
- { const size_t dstreamInit = ZSTD_initDStream(zs->dstream);
- if (ZSTD_isError(dstreamInit)) return dstreamInit; }
- return 0;
-}
-
-size_t ZSTD_seekable_decompress(ZSTD_seekable* zs, void* dst, size_t len, unsigned long long offset)
-{
- U32 targetFrame = ZSTD_seekable_offsetToFrameIndex(zs, offset);
- do {
- /* check if we can continue from a previous decompress job */
- if (targetFrame != zs->curFrame || offset != zs->decompressedOffset) {
- zs->decompressedOffset = zs->seekTable.entries[targetFrame].dOffset;
- zs->curFrame = targetFrame;
-
- CHECK_IO(zs->src.seek(zs->src.opaque,
- zs->seekTable.entries[targetFrame].cOffset,
- SEEK_SET));
- zs->in = (ZSTD_inBuffer){zs->inBuff, 0, 0};
- XXH64_reset(&zs->xxhState, 0);
- ZSTD_resetDStream(zs->dstream);
- }
-
- while (zs->decompressedOffset < offset + len) {
- size_t toRead;
- ZSTD_outBuffer outTmp;
- size_t prevOutPos;
- if (zs->decompressedOffset < offset) {
- /* dummy decompressions until we get to the target offset */
- outTmp = (ZSTD_outBuffer){zs->outBuff, MIN(SEEKABLE_BUFF_SIZE, offset - zs->decompressedOffset), 0};
- } else {
- outTmp = (ZSTD_outBuffer){dst, len, zs->decompressedOffset - offset};
- }
-
- prevOutPos = outTmp.pos;
- toRead = ZSTD_decompressStream(zs->dstream, &outTmp, &zs->in);
- if (ZSTD_isError(toRead)) {
- return toRead;
- }
-
- if (zs->seekTable.checksumFlag) {
- XXH64_update(&zs->xxhState, (BYTE*)outTmp.dst + prevOutPos,
- outTmp.pos - prevOutPos);
- }
- zs->decompressedOffset += outTmp.pos - prevOutPos;
-
- if (toRead == 0) {
- /* frame complete */
-
- /* verify checksum */
- if (zs->seekTable.checksumFlag &&
- (XXH64_digest(&zs->xxhState) & 0xFFFFFFFFU) !=
- zs->seekTable.entries[targetFrame].checksum) {
- return ERROR(corruption_detected);
- }
-
- if (zs->decompressedOffset < offset + len) {
- /* go back to the start and force a reset of the stream */
- targetFrame = ZSTD_seekable_offsetToFrameIndex(zs, zs->decompressedOffset);
- }
- break;
- }
-
- /* read in more data if we're done with this buffer */
- if (zs->in.pos == zs->in.size) {
- toRead = MIN(toRead, SEEKABLE_BUFF_SIZE);
- CHECK_IO(zs->src.read(zs->src.opaque, zs->inBuff, toRead));
- zs->in.size = toRead;
- zs->in.pos = 0;
- }
- }
- } while (zs->decompressedOffset != offset + len);
-
- return len;
-}
-
-size_t ZSTD_seekable_decompressFrame(ZSTD_seekable* zs, void* dst, size_t dstSize, unsigned frameIndex)
-{
- if (frameIndex >= zs->seekTable.tableLen) {
- return ERROR(frameIndex_tooLarge);
- }
-
- {
- size_t const decompressedSize =
- zs->seekTable.entries[frameIndex + 1].dOffset -
- zs->seekTable.entries[frameIndex].dOffset;
- if (dstSize < decompressedSize) {
- return ERROR(dstSize_tooSmall);
- }
- return ZSTD_seekable_decompress(
- zs, dst, decompressedSize,
- zs->seekTable.entries[frameIndex].dOffset);
- }
-}
diff --git a/contrib/snap/snapcraft.yaml b/contrib/snap/snapcraft.yaml
deleted file mode 100644
index 0a77946ae0aa..000000000000
--- a/contrib/snap/snapcraft.yaml
+++ /dev/null
@@ -1,28 +0,0 @@
-name: zstd
-version: git
-summary: Zstandard - Fast real-time compression algorithm
-description: |
- Zstandard, or zstd as short version, is a fast lossless compression
- algorithm, targeting real-time compression scenarios at zlib-level and better
- compression ratios. It's backed by a very fast entropy stage, provided by
- Huff0 and FSE library
-
-grade: devel # must be 'stable' to release into candidate/stable channels
-confinement: devmode # use 'strict' once you have the right plugs and slots
-
-apps:
- zstd:
- command: usr/local/bin/zstd
- plugs: [home, removable-media]
- zstdgrep:
- command: usr/local/bin/zstdgrep
- plugs: [home, removable-media]
- zstdless:
- command: usr/local/bin/zstdless
- plugs: [home, removable-media]
-
-parts:
- zstd:
- source: .
- plugin: make
- build-packages: [g++]
diff --git a/doc/educational_decoder/Makefile b/doc/educational_decoder/Makefile
index 704f867661a7..316c6eadc4ac 100644
--- a/doc/educational_decoder/Makefile
+++ b/doc/educational_decoder/Makefile
@@ -1,10 +1,11 @@
# ################################################################
-# Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+# Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
# LICENSE file in the root directory of this source tree) and the GPLv2 (found
# in the COPYING file in the root directory of this source tree).
+# You may select, at your option, one of the above-listed licenses.
# ################################################################
ZSTD ?= zstd # note: requires zstd installation on local system
@@ -36,7 +37,7 @@ harness: $(HARNESS_FILES)
$(CC) $(FLAGS) $^ -o $@
clean:
- @$(RM) harness
+ @$(RM) harness *.o
@$(RM) -rf harness.dSYM # MacOS specific
test: harness
@@ -59,4 +60,3 @@ test: harness
@./harness tmp.zst tmp dictionary
@$(DIFF) -s tmp README.md
@$(RM) tmp* dictionary
- @$(MAKE) clean
diff --git a/doc/educational_decoder/README.md b/doc/educational_decoder/README.md
index e3b9bf58e5ae..c89451ca0784 100644
--- a/doc/educational_decoder/README.md
+++ b/doc/educational_decoder/README.md
@@ -13,6 +13,13 @@ It also contains implementations of Huffman and FSE table decoding.
[Zstandard format specification]: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md
[format specification]: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md
+While the library's primary objective is code clarity,
+it also happens to compile into a small object file.
+The object file can be made even smaller by removing error messages,
+using the macro directive `ZDEC_NO_MESSAGE` at compilation time.
+This can be reduced even further by foregoing dictionary support,
+by defining `ZDEC_NO_DICTIONARY`.
+
`harness.c` provides a simple test harness around the decoder:
harness <input-file> <output-file> [dictionary]
diff --git a/doc/educational_decoder/harness.c b/doc/educational_decoder/harness.c
index a704f6bdb29f..1403a6ed655b 100644
--- a/doc/educational_decoder/harness.c
+++ b/doc/educational_decoder/harness.c
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2017-present, Facebook, Inc.
+ * Copyright (c) 2017-2020, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
*/
#include <stdio.h>
@@ -21,108 +22,98 @@ typedef unsigned char u8;
// Protect against allocating too much memory for output
#define MAX_OUTPUT_SIZE ((size_t)1024 * 1024 * 1024)
-static size_t read_file(const char *path, u8 **ptr)
+// Error message then exit
+#define ERR_OUT(...) { fprintf(stderr, __VA_ARGS__); exit(1); }
+
+
+typedef struct {
+ u8* address;
+ size_t size;
+} buffer_s;
+
+static void freeBuffer(buffer_s b) { free(b.address); }
+
+static buffer_s read_file(const char *path)
{
FILE* const f = fopen(path, "rb");
- if (!f) {
- fprintf(stderr, "failed to open file %s \n", path);
- exit(1);
- }
+ if (!f) ERR_OUT("failed to open file %s \n", path);
fseek(f, 0L, SEEK_END);
size_t const size = (size_t)ftell(f);
rewind(f);
- *ptr = malloc(size);
- if (!ptr) {
- fprintf(stderr, "failed to allocate memory to hold %s \n", path);
- exit(1);
- }
+ void* const ptr = malloc(size);
+ if (!ptr) ERR_OUT("failed to allocate memory to hold %s \n", path);
- size_t const read = fread(*ptr, 1, size, f);
- if (read != size) { /* must read everything in one pass */
- fprintf(stderr, "error while reading file %s \n", path);
- exit(1);
- }
+ size_t const read = fread(ptr, 1, size, f);
+ if (read != size) ERR_OUT("error while reading file %s \n", path);
fclose(f);
-
- return read;
+ buffer_s const b = { ptr, size };
+ return b;
}
-static void write_file(const char *path, const u8 *ptr, size_t size)
+static void write_file(const char* path, const u8* ptr, size_t size)
{
FILE* const f = fopen(path, "wb");
- if (!f) {
- fprintf(stderr, "failed to open file %s \n", path);
- exit(1);
- }
+ if (!f) ERR_OUT("failed to open file %s \n", path);
size_t written = 0;
while (written < size) {
written += fwrite(ptr+written, 1, size, f);
- if (ferror(f)) {
- fprintf(stderr, "error while writing file %s\n", path);
- exit(1);
- } }
+ if (ferror(f)) ERR_OUT("error while writing file %s\n", path);
+ }
fclose(f);
}
int main(int argc, char **argv)
{
- if (argc < 3) {
- fprintf(stderr, "usage: %s <file.zst> <out_path> [dictionary] \n",
- argv[0]);
+ if (argc < 3)
+ ERR_OUT("usage: %s <file.zst> <out_path> [dictionary] \n", argv[0]);
- return 1;
- }
+ buffer_s const input = read_file(argv[1]);
- u8* input;
- size_t const input_size = read_file(argv[1], &input);
-
- u8* dict = NULL;
- size_t dict_size = 0;
+ buffer_s dict = { NULL, 0 };
if (argc >= 4) {
- dict_size = read_file(argv[3], &dict);
+ dict = read_file(argv[3]);
}
- size_t out_capacity = ZSTD_get_decompressed_size(input, input_size);
+ size_t out_capacity = ZSTD_get_decompressed_size(input.address, input.size);
if (out_capacity == (size_t)-1) {
- out_capacity = MAX_COMPRESSION_RATIO * input_size;
+ out_capacity = MAX_COMPRESSION_RATIO * input.size;
fprintf(stderr, "WARNING: Compressed data does not contain "
"decompressed size, going to assume the compression "
"ratio is at most %d (decompressed size of at most "
"%u) \n",
MAX_COMPRESSION_RATIO, (unsigned)out_capacity);
}
- if (out_capacity > MAX_OUTPUT_SIZE) {
- fprintf(stderr,
- "Required output size too large for this implementation \n");
- return 1;
- }
+ if (out_capacity > MAX_OUTPUT_SIZE)
+ ERR_OUT("Required output size too large for this implementation \n");
u8* const output = malloc(out_capacity);
- if (!output) {
- fprintf(stderr, "failed to allocate memory \n");
- return 1;
- }
+ if (!output) ERR_OUT("failed to allocate memory \n");
dictionary_t* const parsed_dict = create_dictionary();
- if (dict) {
- parse_dictionary(parsed_dict, dict, dict_size);
+ if (dict.size) {
+#if defined (ZDEC_NO_DICTIONARY)
+ printf("dict.size = %zu \n", dict.size);
+ ERR_OUT("no dictionary support \n");
+#else
+ parse_dictionary(parsed_dict, dict.address, dict.size);
+#endif
}
size_t const decompressed_size =
ZSTD_decompress_with_dict(output, out_capacity,
- input, input_size,
+ input.address, input.size,
parsed_dict);
free_dictionary(parsed_dict);
write_file(argv[2], output, decompressed_size);
- free(input);
+ freeBuffer(input);
+ freeBuffer(dict);
free(output);
- free(dict);
return 0;
}
diff --git a/doc/educational_decoder/zstd_decompress.c b/doc/educational_decoder/zstd_decompress.c
index 64e1b8738d06..605918b39f85 100644
--- a/doc/educational_decoder/zstd_decompress.c
+++ b/doc/educational_decoder/zstd_decompress.c
@@ -1,34 +1,52 @@
/*
- * Copyright (c) 2017-present, Facebook, Inc.
+ * Copyright (c) 2017-2020, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
*/
/// Zstandard educational decoder implementation
/// See https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
+#include <stdint.h> // uint8_t, etc.
+#include <stdlib.h> // malloc, free, exit
+#include <stdio.h> // fprintf
+#include <string.h> // memset, memcpy
#include "zstd_decompress.h"
-/******* UTILITY MACROS AND TYPES *********************************************/
-// Max block size decompressed size is 128 KB and literal blocks can't be
-// larger than their block
-#define MAX_LITERALS_SIZE ((size_t)128 * 1024)
+/******* IMPORTANT CONSTANTS *********************************************/
+
+// Zstandard frame
+// "Magic_Number
+// 4 Bytes, little-endian format. Value : 0xFD2FB528"
+#define ZSTD_MAGIC_NUMBER 0xFD2FB528U
+
+// The size of `Block_Content` is limited by `Block_Maximum_Size`,
+#define ZSTD_BLOCK_SIZE_MAX ((size_t)128 * 1024)
+
+// literal blocks can't be larger than their block
+#define MAX_LITERALS_SIZE ZSTD_BLOCK_SIZE_MAX
+
+
+/******* UTILITY MACROS AND TYPES *********************************************/
#define MAX(a, b) ((a) > (b) ? (a) : (b))
#define MIN(a, b) ((a) < (b) ? (a) : (b))
+#if defined(ZDEC_NO_MESSAGE)
+#define MESSAGE(...)
+#else
+#define MESSAGE(...) fprintf(stderr, "" __VA_ARGS__)
+#endif
+
/// This decoder calls exit(1) when it encounters an error, however a production
/// library should propagate error codes
#define ERROR(s) \
do { \
- fprintf(stderr, "Error: %s\n", s); \
+ MESSAGE("Error: %s\n", s); \
exit(1); \
} while (0)
#define INP_SIZE() \
@@ -39,12 +57,12 @@
#define BAD_ALLOC() ERROR("Memory allocation error")
#define IMPOSSIBLE() ERROR("An impossibility has occurred")
-typedef uint8_t u8;
+typedef uint8_t u8;
typedef uint16_t u16;
typedef uint32_t u32;
typedef uint64_t u64;
-typedef int8_t i8;
+typedef int8_t i8;
typedef int16_t i16;
typedef int32_t i32;
typedef int64_t i64;
@@ -176,10 +194,6 @@ static void HUF_init_dtable_usingweights(HUF_dtable *const table,
/// Free the malloc'ed parts of a decoding table
static void HUF_free_dtable(HUF_dtable *const dtable);
-
-/// Deep copy a decoding table, so that it can be used and free'd without
-/// impacting the source table.
-static void HUF_copy_dtable(HUF_dtable *const dst, const HUF_dtable *const src);
/*** END HUFFMAN PRIMITIVES ***********/
/*** FSE PRIMITIVES *******************/
@@ -241,10 +255,6 @@ static void FSE_init_dtable_rle(FSE_dtable *const dtable, const u8 symb);
/// Free the malloc'ed parts of a decoding table
static void FSE_free_dtable(FSE_dtable *const dtable);
-
-/// Deep copy a decoding table, so that it can be used and free'd without
-/// impacting the source table.
-static void FSE_copy_dtable(FSE_dtable *const dst, const FSE_dtable *const src);
/*** END FSE PRIMITIVES ***************/
/******* END IMPLEMENTATION PRIMITIVE PROTOTYPES ******************************/
@@ -373,7 +383,7 @@ static void execute_match_copy(frame_context_t *const ctx, size_t offset,
size_t ZSTD_decompress(void *const dst, const size_t dst_len,
const void *const src, const size_t src_len) {
- dictionary_t* uninit_dict = create_dictionary();
+ dictionary_t* const uninit_dict = create_dictionary();
size_t const decomp_size = ZSTD_decompress_with_dict(dst, dst_len, src,
src_len, uninit_dict);
free_dictionary(uninit_dict);
@@ -417,12 +427,7 @@ static void decompress_data(frame_context_t *const ctx, ostream_t *const out,
static void decode_frame(ostream_t *const out, istream_t *const in,
const dictionary_t *const dict) {
const u32 magic_number = (u32)IO_read_bits(in, 32);
- // Zstandard frame
- //
- // "Magic_Number
- //
- // 4 Bytes, little-endian format. Value : 0xFD2FB528"
- if (magic_number == 0xFD2FB528U) {
+ if (magic_number == ZSTD_MAGIC_NUMBER) {
// ZSTD frame
decode_data_frame(out, in, dict);
@@ -576,43 +581,6 @@ static void parse_frame_header(frame_header_t *const header,
}
}
-/// A dictionary acts as initializing values for the frame context before
-/// decompression, so we implement it by applying it's predetermined
-/// tables and content to the context before beginning decompression
-static void frame_context_apply_dict(frame_context_t *const ctx,
- const dictionary_t *const dict) {
- // If the content pointer is NULL then it must be an empty dict
- if (!dict || !dict->content)
- return;
-
- // If the requested dictionary_id is non-zero, the correct dictionary must
- // be present
- if (ctx->header.dictionary_id != 0 &&
- ctx->header.dictionary_id != dict->dictionary_id) {
- ERROR("Wrong dictionary provided");
- }
-
- // Copy the dict content to the context for references during sequence
- // execution
- ctx->dict_content = dict->content;
- ctx->dict_content_len = dict->content_size;
-
- // If it's a formatted dict copy the precomputed tables in so they can
- // be used in the table repeat modes
- if (dict->dictionary_id != 0) {
- // Deep copy the entropy tables so they can be freed independently of
- // the dictionary struct
- HUF_copy_dtable(&ctx->literals_dtable, &dict->literals_dtable);
- FSE_copy_dtable(&ctx->ll_dtable, &dict->ll_dtable);
- FSE_copy_dtable(&ctx->of_dtable, &dict->of_dtable);
- FSE_copy_dtable(&ctx->ml_dtable, &dict->ml_dtable);
-
- // Copy the repeated offsets
- memcpy(ctx->previous_offsets, dict->previous_offsets,
- sizeof(ctx->previous_offsets));
- }
-}
-
/// Decompress the data from a frame block by block
static void decompress_data(frame_context_t *const ctx, ostream_t *const out,
istream_t *const in) {
@@ -1411,7 +1379,7 @@ size_t ZSTD_get_decompressed_size(const void *src, const size_t src_len) {
{
const u32 magic_number = (u32)IO_read_bits(&in, 32);
- if (magic_number == 0xFD2FB528U) {
+ if (magic_number == ZSTD_MAGIC_NUMBER) {
// ZSTD frame
frame_header_t header;
parse_frame_header(&header, &in);
@@ -1431,17 +1399,33 @@ size_t ZSTD_get_decompressed_size(const void *src, const size_t src_len) {
/******* END OUTPUT SIZE COUNTING *********************************************/
/******* DICTIONARY PARSING ***************************************************/
-#define DICT_SIZE_ERROR() ERROR("Dictionary size cannot be less than 8 bytes")
-#define NULL_SRC() ERROR("Tried to create dictionary with pointer to null src");
-
dictionary_t* create_dictionary() {
- dictionary_t* dict = calloc(1, sizeof(dictionary_t));
+ dictionary_t* const dict = calloc(1, sizeof(dictionary_t));
if (!dict) {
BAD_ALLOC();
}
return dict;
}
+/// Free an allocated dictionary
+void free_dictionary(dictionary_t *const dict) {
+ HUF_free_dtable(&dict->literals_dtable);
+ FSE_free_dtable(&dict->ll_dtable);
+ FSE_free_dtable(&dict->of_dtable);
+ FSE_free_dtable(&dict->ml_dtable);
+
+ free(dict->content);
+
+ memset(dict, 0, sizeof(dictionary_t));
+
+ free(dict);
+}
+
+
+#if !defined(ZDEC_NO_DICTIONARY)
+#define DICT_SIZE_ERROR() ERROR("Dictionary size cannot be less than 8 bytes")
+#define NULL_SRC() ERROR("Tried to create dictionary with pointer to null src");
+
static void init_dictionary_content(dictionary_t *const dict,
istream_t *const in);
@@ -1513,19 +1497,93 @@ static void init_dictionary_content(dictionary_t *const dict,
memcpy(dict->content, content, dict->content_size);
}
-/// Free an allocated dictionary
-void free_dictionary(dictionary_t *const dict) {
- HUF_free_dtable(&dict->literals_dtable);
- FSE_free_dtable(&dict->ll_dtable);
- FSE_free_dtable(&dict->of_dtable);
- FSE_free_dtable(&dict->ml_dtable);
+static void HUF_copy_dtable(HUF_dtable *const dst,
+ const HUF_dtable *const src) {
+ if (src->max_bits == 0) {
+ memset(dst, 0, sizeof(HUF_dtable));
+ return;
+ }
- free(dict->content);
+ const size_t size = (size_t)1 << src->max_bits;
+ dst->max_bits = src->max_bits;
- memset(dict, 0, sizeof(dictionary_t));
+ dst->symbols = malloc(size);
+ dst->num_bits = malloc(size);
+ if (!dst->symbols || !dst->num_bits) {
+ BAD_ALLOC();
+ }
- free(dict);
+ memcpy(dst->symbols, src->symbols, size);
+ memcpy(dst->num_bits, src->num_bits, size);
}
+
+static void FSE_copy_dtable(FSE_dtable *const dst, const FSE_dtable *const src) {
+ if (src->accuracy_log == 0) {
+ memset(dst, 0, sizeof(FSE_dtable));
+ return;
+ }
+
+ size_t size = (size_t)1 << src->accuracy_log;
+ dst->accuracy_log = src->accuracy_log;
+
+ dst->symbols = malloc(size);
+ dst->num_bits = malloc(size);
+ dst->new_state_base = malloc(size * sizeof(u16));
+ if (!dst->symbols || !dst->num_bits || !dst->new_state_base) {
+ BAD_ALLOC();
+ }
+
+ memcpy(dst->symbols, src->symbols, size);
+ memcpy(dst->num_bits, src->num_bits, size);
+ memcpy(dst->new_state_base, src->new_state_base, size * sizeof(u16));
+}
+
+/// A dictionary acts as initializing values for the frame context before
+/// decompression, so we implement it by applying it's predetermined
+/// tables and content to the context before beginning decompression
+static void frame_context_apply_dict(frame_context_t *const ctx,
+ const dictionary_t *const dict) {
+ // If the content pointer is NULL then it must be an empty dict
+ if (!dict || !dict->content)
+ return;
+
+ // If the requested dictionary_id is non-zero, the correct dictionary must
+ // be present
+ if (ctx->header.dictionary_id != 0 &&
+ ctx->header.dictionary_id != dict->dictionary_id) {
+ ERROR("Wrong dictionary provided");
+ }
+
+ // Copy the dict content to the context for references during sequence
+ // execution
+ ctx->dict_content = dict->content;
+ ctx->dict_content_len = dict->content_size;
+
+ // If it's a formatted dict copy the precomputed tables in so they can
+ // be used in the table repeat modes
+ if (dict->dictionary_id != 0) {
+ // Deep copy the entropy tables so they can be freed independently of
+ // the dictionary struct
+ HUF_copy_dtable(&ctx->literals_dtable, &dict->literals_dtable);
+ FSE_copy_dtable(&ctx->ll_dtable, &dict->ll_dtable);
+ FSE_copy_dtable(&ctx->of_dtable, &dict->of_dtable);
+ FSE_copy_dtable(&ctx->ml_dtable, &dict->ml_dtable);
+
+ // Copy the repeated offsets
+ memcpy(ctx->previous_offsets, dict->previous_offsets,
+ sizeof(ctx->previous_offsets));
+ }
+}
+
+#else // ZDEC_NO_DICTIONARY is defined
+
+static void frame_context_apply_dict(frame_context_t *const ctx,
+ const dictionary_t *const dict) {
+ (void)ctx;
+ if (dict && dict->content) ERROR("dictionary not supported");
+}
+
+#endif
/******* END DICTIONARY PARSING ***********************************************/
/******* IO STREAM OPERATIONS *************************************************/
@@ -1945,26 +2003,6 @@ static void HUF_free_dtable(HUF_dtable *const dtable) {
free(dtable->num_bits);
memset(dtable, 0, sizeof(HUF_dtable));
}
-
-static void HUF_copy_dtable(HUF_dtable *const dst,
- const HUF_dtable *const src) {
- if (src->max_bits == 0) {
- memset(dst, 0, sizeof(HUF_dtable));
- return;
- }
-
- const size_t size = (size_t)1 << src->max_bits;
- dst->max_bits = src->max_bits;
-
- dst->symbols = malloc(size);
- dst->num_bits = malloc(size);
- if (!dst->symbols || !dst->num_bits) {
- BAD_ALLOC();
- }
-
- memcpy(dst->symbols, src->symbols, size);
- memcpy(dst->num_bits, src->num_bits, size);
-}
/******* END HUFFMAN PRIMITIVES ***********************************************/
/******* FSE PRIMITIVES *******************************************************/
@@ -2279,25 +2317,4 @@ static void FSE_free_dtable(FSE_dtable *const dtable) {
free(dtable->new_state_base);
memset(dtable, 0, sizeof(FSE_dtable));
}
-
-static void FSE_copy_dtable(FSE_dtable *const dst, const FSE_dtable *const src) {
- if (src->accuracy_log == 0) {
- memset(dst, 0, sizeof(FSE_dtable));
- return;
- }
-
- size_t size = (size_t)1 << src->accuracy_log;
- dst->accuracy_log = src->accuracy_log;
-
- dst->symbols = malloc(size);
- dst->num_bits = malloc(size);
- dst->new_state_base = malloc(size * sizeof(u16));
- if (!dst->symbols || !dst->num_bits || !dst->new_state_base) {
- BAD_ALLOC();
- }
-
- memcpy(dst->symbols, src->symbols, size);
- memcpy(dst->num_bits, src->num_bits, size);
- memcpy(dst->new_state_base, src->new_state_base, size * sizeof(u16));
-}
/******* END FSE PRIMITIVES ***************************************************/
diff --git a/doc/educational_decoder/zstd_decompress.h b/doc/educational_decoder/zstd_decompress.h
index 74b18533850a..2b44eee95cec 100644
--- a/doc/educational_decoder/zstd_decompress.h
+++ b/doc/educational_decoder/zstd_decompress.h
@@ -1,10 +1,11 @@
/*
- * Copyright (c) 2016-present, Facebook, Inc.
+ * Copyright (c) 2016-2020, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
*/
#include <stddef.h> /* size_t */
diff --git a/doc/zstd_compression_format.md b/doc/zstd_compression_format.md
index 90ac0fe9bcd5..fc61726fc98c 100644
--- a/doc/zstd_compression_format.md
+++ b/doc/zstd_compression_format.md
@@ -16,7 +16,7 @@ Distribution of this document is unlimited.
### Version
-0.3.4 (16/08/19)
+0.3.5 (13/11/19)
Introduction
@@ -341,6 +341,8 @@ The structure of a block is as follows:
|:--------------:|:---------------:|
| 3 bytes | n bytes |
+__`Block_Header`__
+
`Block_Header` uses 3 bytes, written using __little-endian__ convention.
It contains 3 fields :
@@ -385,17 +387,30 @@ There are 4 block types :
__`Block_Size`__
The upper 21 bits of `Block_Header` represent the `Block_Size`.
+
When `Block_Type` is `Compressed_Block` or `Raw_Block`,
-`Block_Size` is the size of `Block_Content`, hence excluding `Block_Header`.
-When `Block_Type` is `RLE_Block`, `Block_Content`’s size is always 1,
-and `Block_Size` represents the number of times this byte must be repeated.
-A block can contain and decompress into any number of bytes (even zero),
-up to `Block_Maximum_Decompressed_Size`, which is the smallest of:
-- Window_Size
+`Block_Size` is the size of `Block_Content` (hence excluding `Block_Header`).
+
+When `Block_Type` is `RLE_Block`, since `Block_Content`’s size is always 1,
+`Block_Size` represents the number of times this byte must be repeated.
+
+`Block_Size` is limited by `Block_Maximum_Size` (see below).
+
+__`Block_Content`__ and __`Block_Maximum_Size`__
+
+The size of `Block_Content` is limited by `Block_Maximum_Size`,
+which is the smallest of:
+- `Window_Size`
- 128 KB
-If this condition cannot be respected when generating a `Compressed_Block`,
-the block must be sent uncompressed instead (`Raw_Block`).
+`Block_Maximum_Size` is constant for a given frame.
+This maximum is applicable to both the decompressed size
+and the compressed size of any block in the frame.
+
+The reasoning for this limit is that a decoder can read this information
+at the beginning of a frame and use it to allocate buffers.
+The guarantees on the size of blocks ensure that
+the buffers will be large enough for any following block of the valid frame.
Compressed Blocks
@@ -1658,6 +1673,7 @@ or at least provide a meaningful error code explaining for which reason it canno
Version changes
---------------
+- 0.3.5 : clarifications for Block_Maximum_Size
- 0.3.4 : clarifications for FSE decoding table
- 0.3.3 : clarifications for field Block_Size
- 0.3.2 : remove additional block size restriction on compressed blocks
diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html
index 43c5555b8ca8..fe58f78cb153 100644
--- a/doc/zstd_manual.html
+++ b/doc/zstd_manual.html
@@ -1,10 +1,10 @@
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
-<title>zstd 1.4.4 Manual</title>
+<title>zstd 1.4.5 Manual</title>
</head>
<body>
-<h1>zstd 1.4.4 Manual</h1>
+<h1>zstd 1.4.5 Manual</h1>
<hr>
<a name="Contents"></a><h2>Contents</h2>
<ol>
@@ -217,7 +217,10 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
* Default level is ZSTD_CLEVEL_DEFAULT==3.
* Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT.
* Note 1 : it's possible to pass a negative compression level.
- * Note 2 : setting a level resets all other compression parameters to default */
+ * Note 2 : setting a level does not automatically set all other compression parameters
+ * to default. Setting this will however eventually dynamically impact the compression
+ * parameters which have not been manually set. The manually set
+ * ones will 'stick'. */
</b>/* Advanced compression parameters :<b>
* It's possible to pin down compression parameters to some specific values.
* In which case, these values are no longer dynamically selected by the compressor */
@@ -451,11 +454,13 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
</b>/* note : additional experimental parameters are also available<b>
* within the experimental section of the API.
* At the time of this writing, they include :
- * ZSTD_c_format
+ * ZSTD_d_format
+ * ZSTD_d_stableOutBuffer
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
* note : never ever use experimentalParam? names directly
*/
- ZSTD_d_experimentalParam1=1000
+ ZSTD_d_experimentalParam1=1000,
+ ZSTD_d_experimentalParam2=1001
} ZSTD_dParameter;
</b></pre><BR>
@@ -1055,23 +1060,28 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params);
size_t ZSTD_estimateDCtxSize(void);
-</b><p> These functions make it possible to estimate memory usage of a future
- {D,C}Ctx, before its creation.
-
- ZSTD_estimateCCtxSize() will provide a budget large enough for any
- compression level up to selected one. Unlike ZSTD_estimateCStreamSize*(),
- this estimate does not include space for a window buffer, so this estimate
- is guaranteed to be enough for single-shot compressions, but not streaming
- compressions. It will however assume the input may be arbitrarily large,
- which is the worst case. If srcSize is known to always be small,
- ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation.
- ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with
- ZSTD_getCParams() to create cParams from compressionLevel.
- ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with
- ZSTD_CCtxParams_setParameter().
-
- Note: only single-threaded compression is supported. This function will
- return an error code if ZSTD_c_nbWorkers is >= 1.
+</b><p> These functions make it possible to estimate memory usage
+ of a future {D,C}Ctx, before its creation.
+
+ ZSTD_estimateCCtxSize() will provide a memory budget large enough
+ for any compression level up to selected one.
+ Note : Unlike ZSTD_estimateCStreamSize*(), this estimate
+ does not include space for a window buffer.
+ Therefore, the estimation is only guaranteed for single-shot compressions, not streaming.
+ The estimate will assume the input may be arbitrarily large,
+ which is the worst case.
+
+ When srcSize can be bound by a known and rather "small" value,
+ this fact can be used to provide a tighter estimation
+ because the CCtx compression context will need less memory.
+ This tighter estimation can be provided by more advanced functions
+ ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(),
+ and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter().
+ Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits.
+
+ Note 2 : only single-threaded compression is supported.
+ ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1.
+
</p></pre><BR>
<pre><b>size_t ZSTD_estimateCStreamSize(int compressionLevel);
diff --git a/examples/Makefile b/examples/Makefile
index 65ea8abad5d6..1ae6bce83d0c 100644
--- a/examples/Makefile
+++ b/examples/Makefile
@@ -1,10 +1,11 @@
# ################################################################
-# Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+# Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
# LICENSE file in the root directory of this source tree) and the GPLv2 (found
# in the COPYING file in the root directory of this source tree).
+# You may select, at your option, one of the above-listed licenses.
# ################################################################
# This Makefile presumes libzstd is installed, using `sudo make install`
diff --git a/examples/common.h b/examples/common.h
index a714cbb72c36..4492c7e4efa7 100644
--- a/examples/common.h
+++ b/examples/common.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
diff --git a/examples/dictionary_compression.c b/examples/dictionary_compression.c
index 9efdb785c112..d9aad45a7b07 100644
--- a/examples/dictionary_compression.c
+++ b/examples/dictionary_compression.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020 Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
diff --git a/examples/dictionary_decompression.c b/examples/dictionary_decompression.c
index f683bbb43800..7e50986e37aa 100644
--- a/examples/dictionary_decompression.c
+++ b/examples/dictionary_decompression.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
diff --git a/examples/multiple_simple_compression.c b/examples/multiple_simple_compression.c
index a44ac8b442f9..e409467b226b 100644
--- a/examples/multiple_simple_compression.c
+++ b/examples/multiple_simple_compression.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
diff --git a/examples/multiple_streaming_compression.c b/examples/multiple_streaming_compression.c
index ad98b1bd1b09..8a4dc96c1121 100644
--- a/examples/multiple_streaming_compression.c
+++ b/examples/multiple_streaming_compression.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
diff --git a/examples/simple_compression.c b/examples/simple_compression.c
index 019a143d4c85..618080b338f7 100644
--- a/examples/simple_compression.c
+++ b/examples/simple_compression.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
diff --git a/examples/simple_decompression.c b/examples/simple_decompression.c
index 1aa57c7b0934..e108987c625d 100644
--- a/examples/simple_decompression.c
+++ b/examples/simple_decompression.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
diff --git a/examples/streaming_compression.c b/examples/streaming_compression.c
index d0b04895f018..f0f1065b1d27 100644
--- a/examples/streaming_compression.c
+++ b/examples/streaming_compression.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
diff --git a/examples/streaming_decompression.c b/examples/streaming_decompression.c
index d26b45b34c74..26eda3441b7f 100644
--- a/examples/streaming_decompression.c
+++ b/examples/streaming_decompression.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
diff --git a/examples/streaming_memory_usage.c b/examples/streaming_memory_usage.c
index 26835788abed..37dd660e4a64 100644
--- a/examples/streaming_memory_usage.c
+++ b/examples/streaming_memory_usage.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2017-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/Makefile b/lib/Makefile
index 273ceb90490d..7c6dff02468a 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -1,12 +1,24 @@
# ################################################################
-# Copyright (c) 2015-present, Yann Collet, Facebook, Inc.
+# Copyright (c) 2015-2020, Yann Collet, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
# LICENSE file in the root directory of this source tree) and the GPLv2 (found
# in the COPYING file in the root directory of this source tree).
+# You may select, at your option, one of the above-listed licenses.
# ################################################################
+Q = $(if $(filter 1,$(V) $(VERBOSE)),,@)
+
+# When cross-compiling from linux to windows, you might
+# need to specify this as "Windows." Fedora build fails
+# without it.
+#
+# Note: mingw-w64 build from linux to windows does not
+# fail on other tested distros (ubuntu, debian) even
+# without manually specifying the TARGET_SYSTEM.
+TARGET_SYSTEM ?= $(OS)
+
# Version numbers
LIBVER_MAJOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ./zstd.h`
LIBVER_MINOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ./zstd.h`
@@ -19,11 +31,10 @@ LIBVER := $(shell echo $(LIBVER_SCRIPT))
VERSION?= $(LIBVER)
CCVER := $(shell $(CC) --version)
-CPPFLAGS+= -I. -I./common -DXXH_NAMESPACE=ZSTD_
-ifeq ($(OS),Windows_NT) # MinGW assumed
+CPPFLAGS+= -DXXH_NAMESPACE=ZSTD_
+ifeq ($(TARGET_SYSTEM),Windows_NT) # MinGW assumed
CPPFLAGS += -D__USE_MINGW_ANSI_STDIO # compatibility with %zu formatting
endif
-CFLAGS ?= -O3
DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
-Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
-Wstrict-prototypes -Wundef -Wpointer-arith \
@@ -50,18 +61,46 @@ ifeq ($(findstring GCC,$(CCVER)),GCC)
decompress/zstd_decompress_block.o : CFLAGS+=-fno-tree-vectorize
endif
-ZSTD_LEGACY_SUPPORT ?= 5
+# This is a helper variable that configures a bunch of other variables to new,
+# space-optimized defaults.
+ZSTD_LIB_MINIFY ?= 0
+ifneq ($(ZSTD_LIB_MINIFY), 0)
+ HAVE_CC_OZ ?= $(shell echo "" | $(CC) -Oz -x c -c - -o /dev/null 2> /dev/null && echo 1 || echo 0)
+ ZSTD_LEGACY_SUPPORT ?= 0
+ ZSTD_LIB_DEPRECATED ?= 0
+ HUF_FORCE_DECOMPRESS_X1 ?= 1
+ ZSTD_FORCE_DECOMPRESS_SHORT ?= 1
+ ZSTD_NO_INLINE ?= 1
+ ZSTD_STRIP_ERROR_STRINGS ?= 1
+ ifneq ($(HAVE_CC_OZ), 0)
+ # Some compilers (clang) support an even more space-optimized setting.
+ CFLAGS += -Oz
+ else
+ CFLAGS += -Os
+ endif
+ CFLAGS += -fno-stack-protector -fomit-frame-pointer -fno-ident \
+ -DDYNAMIC_BMI2=0 -DNDEBUG
+else
+ CFLAGS += -O3
+endif
+
+# Modules
ZSTD_LIB_COMPRESSION ?= 1
ZSTD_LIB_DECOMPRESSION ?= 1
ZSTD_LIB_DICTBUILDER ?= 1
ZSTD_LIB_DEPRECATED ?= 1
+
+# Legacy support
+ZSTD_LEGACY_SUPPORT ?= 5
+ZSTD_LEGACY_MULTITHREADED_API ?= 0
+
+# Build size optimizations
HUF_FORCE_DECOMPRESS_X1 ?= 0
HUF_FORCE_DECOMPRESS_X2 ?= 0
ZSTD_FORCE_DECOMPRESS_SHORT ?= 0
ZSTD_FORCE_DECOMPRESS_LONG ?= 0
ZSTD_NO_INLINE ?= 0
ZSTD_STRIP_ERROR_STRINGS ?= 0
-ZSTD_LEGACY_MULTITHREADED_API ?= 0
ifeq ($(ZSTD_LIB_COMPRESSION), 0)
ZSTD_LIB_DICTBUILDER = 0
@@ -121,7 +160,6 @@ ifneq ($(ZSTD_LEGACY_SUPPORT), 0)
ifeq ($(shell test $(ZSTD_LEGACY_SUPPORT) -lt 8; echo $$?), 0)
ZSTD_FILES += $(shell ls legacy/*.c | $(GREP) 'v0[$(ZSTD_LEGACY_SUPPORT)-7]')
endif
- CPPFLAGS += -I./legacy
endif
CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT)
@@ -142,26 +180,26 @@ else
endif
-.PHONY: default all clean install uninstall
+.PHONY: default lib-all all clean install uninstall
default: lib-release
+# alias
+lib-all: all
+
all: lib
libzstd.a: ARFLAGS = rcs
libzstd.a: $(ZSTD_OBJ)
@echo compiling static library
- @$(AR) $(ARFLAGS) $@ $^
-
-libzstd.a-mt: CPPFLAGS += -DZSTD_MULTITHREAD
-libzstd.a-mt: libzstd.a
+ $(Q)$(AR) $(ARFLAGS) $@ $^
-ifneq (,$(filter Windows%,$(OS)))
+ifneq (,$(filter Windows%,$(TARGET_SYSTEM)))
LIBZSTD = dll\libzstd.dll
$(LIBZSTD): $(ZSTD_FILES)
@echo compiling dynamic library $(LIBVER)
- $(CC) $(FLAGS) -DZSTD_DLL_EXPORT=1 -Wl,--out-implib,dll\libzstd.lib -shared $^ -o $@
+ $(CC) $(FLAGS) -DZSTD_DLL_EXPORT=1 -Wl,--out-implib,dll\libzstd.dll.a -shared $^ -o $@
else
@@ -169,27 +207,30 @@ LIBZSTD = libzstd.$(SHARED_EXT_VER)
$(LIBZSTD): LDFLAGS += -shared -fPIC -fvisibility=hidden
$(LIBZSTD): $(ZSTD_FILES)
@echo compiling dynamic library $(LIBVER)
- @$(CC) $(FLAGS) $^ $(LDFLAGS) $(SONAME_FLAGS) -o $@
+ $(Q)$(CC) $(FLAGS) $^ $(LDFLAGS) $(SONAME_FLAGS) -o $@
@echo creating versioned links
- @ln -sf $@ libzstd.$(SHARED_EXT_MAJOR)
- @ln -sf $@ libzstd.$(SHARED_EXT)
+ $(Q)ln -sf $@ libzstd.$(SHARED_EXT_MAJOR)
+ $(Q)ln -sf $@ libzstd.$(SHARED_EXT)
endif
-
+.PHONY: libzstd
libzstd : $(LIBZSTD)
-libzstd-mt : CPPFLAGS += -DZSTD_MULTITHREAD
-libzstd-mt : libzstd
+.PHONY: lib
+lib : libzstd.a libzstd
-lib: libzstd.a libzstd
+.PHONY: lib-mt
+%-mt : CPPFLAGS += -DZSTD_MULTITHREAD
+%-mt : LDFLAGS += -pthread
+%-mt : %
+ @echo multi-threading build completed
-lib-mt: CPPFLAGS += -DZSTD_MULTITHREAD
-lib-mt: lib
+.PHONY: lib-release
+%-release : DEBUGFLAGS :=
+%-release : %
+ @echo release build completed
-lib-release lib-release-mt: DEBUGFLAGS :=
-lib-release: lib
-lib-release-mt: lib-mt
# Special case : building library in single-thread mode _and_ without zstdmt_compress.c
ZSTDMT_FILES = compress/zstdmt_compress.c
@@ -198,20 +239,22 @@ libzstd-nomt: LDFLAGS += -shared -fPIC -fvisibility=hidden
libzstd-nomt: $(ZSTD_NOMT_FILES)
@echo compiling single-thread dynamic library $(LIBVER)
@echo files : $(ZSTD_NOMT_FILES)
- @$(CC) $(FLAGS) $^ $(LDFLAGS) $(SONAME_FLAGS) -o $@
+ $(Q)$(CC) $(FLAGS) $^ $(LDFLAGS) $(SONAME_FLAGS) -o $@
clean:
- @$(RM) -r *.dSYM # macOS-specific
- @$(RM) core *.o *.a *.gcda *.$(SHARED_EXT) *.$(SHARED_EXT).* libzstd.pc
- @$(RM) dll/libzstd.dll dll/libzstd.lib libzstd-nomt*
- @$(RM) common/*.o compress/*.o decompress/*.o dictBuilder/*.o legacy/*.o deprecated/*.o
+ $(Q)$(RM) -r *.dSYM # macOS-specific
+ $(Q)$(RM) core *.o *.a *.gcda *.$(SHARED_EXT) *.$(SHARED_EXT).* libzstd.pc
+ $(Q)$(RM) dll/libzstd.dll dll/libzstd.lib libzstd-nomt*
+ $(Q)$(RM) common/*.o compress/*.o decompress/*.o dictBuilder/*.o legacy/*.o deprecated/*.o
@echo Cleaning library completed
#-----------------------------------------------------------------------------
-# make install is validated only for Linux, macOS, BSD, Hurd and Solaris targets
+# make install is validated only for below listed environments
#-----------------------------------------------------------------------------
ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku))
+all: libzstd.pc
+
DESTDIR ?=
# directory variables : GNU conventions prefer lowercase
# see https://www.gnu.org/prep/standards/html_node/Makefile-Conventions.html
@@ -219,11 +262,31 @@ DESTDIR ?=
prefix ?= /usr/local
PREFIX ?= $(prefix)
exec_prefix ?= $(PREFIX)
-libdir ?= $(exec_prefix)/lib
+EXEC_PREFIX ?= $(exec_prefix)
+libdir ?= $(EXEC_PREFIX)/lib
LIBDIR ?= $(libdir)
includedir ?= $(PREFIX)/include
INCLUDEDIR ?= $(includedir)
+PCLIBDIR ?= $(shell echo "$(LIBDIR)" | sed -n -E -e "s@^$(EXEC_PREFIX)(/|$$)@@p")
+PCINCDIR ?= $(shell echo "$(INCLUDEDIR)" | sed -n -E -e "s@^$(PREFIX)(/|$$)@@p")
+
+ifeq (,$(PCLIBDIR))
+# Additional prefix check is required, since the empty string is technically a
+# valid PCLIBDIR
+ifeq (,$(shell echo "$(LIBDIR)" | sed -n -E -e "\\@^$(EXEC_PREFIX)(/|$$)@ p"))
+$(error configured libdir ($(LIBDIR)) is outside of prefix ($(PREFIX)), can't generate pkg-config file)
+endif
+endif
+
+ifeq (,$(PCINCDIR))
+# Additional prefix check is required, since the empty string is technically a
+# valid PCINCDIR
+ifeq (,$(shell echo "$(INCLUDEDIR)" | sed -n -E -e "\\@^$(PREFIX)(/|$$)@ p"))
+$(error configured includedir ($(INCLUDEDIR)) is outside of exec_prefix ($(EXEC_PREFIX)), can't generate pkg-config file)
+endif
+endif
+
ifneq (,$(filter $(shell uname),FreeBSD NetBSD DragonFly))
PKGCONFIGDIR ?= $(PREFIX)/libdata/pkgconfig
else
@@ -243,47 +306,49 @@ INSTALL_DATA ?= $(INSTALL) -m 644
libzstd.pc:
libzstd.pc: libzstd.pc.in
@echo creating pkgconfig
- @sed -e 's|@PREFIX@|$(PREFIX)|' \
- -e 's|@VERSION@|$(VERSION)|' \
- $< >$@
+ $(Q)@sed -E -e 's|@PREFIX@|$(PREFIX)|' \
+ -e 's|@LIBDIR@|$(PCLIBDIR)|' \
+ -e 's|@INCLUDEDIR@|$(PCINCDIR)|' \
+ -e 's|@VERSION@|$(VERSION)|' \
+ $< >$@
install: install-pc install-static install-shared install-includes
@echo zstd static and shared library installed
install-pc: libzstd.pc
- @$(INSTALL) -d -m 755 $(DESTDIR)$(PKGCONFIGDIR)/
- @$(INSTALL_DATA) libzstd.pc $(DESTDIR)$(PKGCONFIGDIR)/
+ $(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(PKGCONFIGDIR)/
+ $(Q)$(INSTALL_DATA) libzstd.pc $(DESTDIR)$(PKGCONFIGDIR)/
install-static: libzstd.a
@echo Installing static library
- @$(INSTALL) -d -m 755 $(DESTDIR)$(LIBDIR)/
- @$(INSTALL_DATA) libzstd.a $(DESTDIR)$(LIBDIR)
+ $(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(LIBDIR)/
+ $(Q)$(INSTALL_DATA) libzstd.a $(DESTDIR)$(LIBDIR)
install-shared: libzstd
@echo Installing shared library
- @$(INSTALL) -d -m 755 $(DESTDIR)$(LIBDIR)/
- @$(INSTALL_PROGRAM) $(LIBZSTD) $(DESTDIR)$(LIBDIR)
- @ln -sf $(LIBZSTD) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT_MAJOR)
- @ln -sf $(LIBZSTD) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT)
+ $(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(LIBDIR)/
+ $(Q)$(INSTALL_PROGRAM) $(LIBZSTD) $(DESTDIR)$(LIBDIR)
+ $(Q)ln -sf $(LIBZSTD) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT_MAJOR)
+ $(Q)ln -sf $(LIBZSTD) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT)
install-includes:
@echo Installing includes
- @$(INSTALL) -d -m 755 $(DESTDIR)$(INCLUDEDIR)/
- @$(INSTALL_DATA) zstd.h $(DESTDIR)$(INCLUDEDIR)
- @$(INSTALL_DATA) common/zstd_errors.h $(DESTDIR)$(INCLUDEDIR)
- @$(INSTALL_DATA) deprecated/zbuff.h $(DESTDIR)$(INCLUDEDIR) # prototypes generate deprecation warnings
- @$(INSTALL_DATA) dictBuilder/zdict.h $(DESTDIR)$(INCLUDEDIR)
+ $(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(INCLUDEDIR)/
+ $(Q)$(INSTALL_DATA) zstd.h $(DESTDIR)$(INCLUDEDIR)
+ $(Q)$(INSTALL_DATA) common/zstd_errors.h $(DESTDIR)$(INCLUDEDIR)
+ $(Q)$(INSTALL_DATA) deprecated/zbuff.h $(DESTDIR)$(INCLUDEDIR) # prototypes generate deprecation warnings
+ $(Q)$(INSTALL_DATA) dictBuilder/zdict.h $(DESTDIR)$(INCLUDEDIR)
uninstall:
- @$(RM) $(DESTDIR)$(LIBDIR)/libzstd.a
- @$(RM) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT)
- @$(RM) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT_MAJOR)
- @$(RM) $(DESTDIR)$(LIBDIR)/$(LIBZSTD)
- @$(RM) $(DESTDIR)$(PKGCONFIGDIR)/libzstd.pc
- @$(RM) $(DESTDIR)$(INCLUDEDIR)/zstd.h
- @$(RM) $(DESTDIR)$(INCLUDEDIR)/zstd_errors.h
- @$(RM) $(DESTDIR)$(INCLUDEDIR)/zbuff.h # Deprecated streaming functions
- @$(RM) $(DESTDIR)$(INCLUDEDIR)/zdict.h
+ $(Q)$(RM) $(DESTDIR)$(LIBDIR)/libzstd.a
+ $(Q)$(RM) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT)
+ $(Q)$(RM) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT_MAJOR)
+ $(Q)$(RM) $(DESTDIR)$(LIBDIR)/$(LIBZSTD)
+ $(Q)$(RM) $(DESTDIR)$(PKGCONFIGDIR)/libzstd.pc
+ $(Q)$(RM) $(DESTDIR)$(INCLUDEDIR)/zstd.h
+ $(Q)$(RM) $(DESTDIR)$(INCLUDEDIR)/zstd_errors.h
+ $(Q)$(RM) $(DESTDIR)$(INCLUDEDIR)/zbuff.h # Deprecated streaming functions
+ $(Q)$(RM) $(DESTDIR)$(INCLUDEDIR)/zdict.h
@echo zstd libraries successfully uninstalled
endif
diff --git a/lib/README.md b/lib/README.md
index 0062c0d63e04..6ccffb13868c 100644
--- a/lib/README.md
+++ b/lib/README.md
@@ -85,28 +85,48 @@ The file structure is designed to make this selection manually achievable for an
- While invoking `make libzstd`, it's possible to define build macros
`ZSTD_LIB_COMPRESSION, ZSTD_LIB_DECOMPRESSION`, `ZSTD_LIB_DICTBUILDER`,
- and `ZSTD_LIB_DEPRECATED` as `0` to forgo compilation of the corresponding features.
- This will also disable compilation of all dependencies
- (eg. `ZSTD_LIB_COMPRESSION=0` will also disable dictBuilder).
-
-- There are some additional build macros that can be used to minify the decoder.
-
- Zstandard often has more than one implementation of a piece of functionality,
- where each implementation optimizes for different scenarios. For example, the
- Huffman decoder has complementary implementations that decode the stream one
- symbol at a time or two symbols at a time. Zstd normally includes both (and
- dispatches between them at runtime), but by defining `HUF_FORCE_DECOMPRESS_X1`
- or `HUF_FORCE_DECOMPRESS_X2`, you can force the use of one or the other, avoiding
+ and `ZSTD_LIB_DEPRECATED` as `0` to forgo compilation of the
+ corresponding features. This will also disable compilation of all
+ dependencies (eg. `ZSTD_LIB_COMPRESSION=0` will also disable
+ dictBuilder).
+
+- There are a number of options that can help minimize the binary size of
+ `libzstd`.
+
+ The first step is to select the components needed (using the above-described
+ `ZSTD_LIB_COMPRESSION` etc.).
+
+ The next step is to set `ZSTD_LIB_MINIFY` to `1` when invoking `make`. This
+ disables various optional components and changes the compilation flags to
+ prioritize space-saving.
+
+ Detailed options: Zstandard's code and build environment is set up by default
+ to optimize above all else for performance. In pursuit of this goal, Zstandard
+ makes significant trade-offs in code size. For example, Zstandard often has
+ more than one implementation of a particular component, with each
+ implementation optimized for different scenarios. For example, the Huffman
+ decoder has complementary implementations that decode the stream one symbol at
+ a time or two symbols at a time. Zstd normally includes both (and dispatches
+ between them at runtime), but by defining `HUF_FORCE_DECOMPRESS_X1` or
+ `HUF_FORCE_DECOMPRESS_X2`, you can force the use of one or the other, avoiding
compilation of the other. Similarly, `ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT`
and `ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG` force the compilation and use of
only one or the other of two decompression implementations. The smallest
binary is achieved by using `HUF_FORCE_DECOMPRESS_X1` and
- `ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT`.
+ `ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT` (implied by `ZSTD_LIB_MINIFY`).
For squeezing the last ounce of size out, you can also define
`ZSTD_NO_INLINE`, which disables inlining, and `ZSTD_STRIP_ERROR_STRINGS`,
which removes the error messages that are otherwise returned by
- `ZSTD_getErrorName`.
+ `ZSTD_getErrorName` (implied by `ZSTD_LIB_MINIFY`).
+
+ Finally, when integrating into your application, make sure you're doing link-
+ time optimation and unused symbol garbage collection (via some combination of,
+ e.g., `-flto`, `-ffat-lto-objects`, `-fuse-linker-plugin`,
+ `-ffunction-sections`, `-fdata-sections`, `-fmerge-all-constants`,
+ `-Wl,--gc-sections`, `-Wl,-z,norelro`, and an archiver that understands
+ the compiler's intermediate representation, e.g., `AR=gcc-ar`). Consult your
+ compiler's documentation.
- While invoking `make libzstd`, the build macro `ZSTD_LEGACY_MULTITHREADED_API=1`
will expose the deprecated `ZSTDMT` API exposed by `zstdmt_compress.h` in
diff --git a/lib/common/bitstream.h b/lib/common/bitstream.h
index 1c294b80d13f..37b99c01eed3 100644
--- a/lib/common/bitstream.h
+++ b/lib/common/bitstream.h
@@ -1,35 +1,15 @@
/* ******************************************************************
- bitstream
- Part of FSE library
- Copyright (C) 2013-present, Yann Collet.
-
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are
- met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
- copyright notice, this list of conditions and the following disclaimer
- in the documentation and/or other materials provided with the
- distribution.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- You can contact the author at :
- - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ * bitstream
+ * Part of FSE library
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
****************************************************************** */
#ifndef BITSTREAM_H_MODULE
#define BITSTREAM_H_MODULE
@@ -48,6 +28,7 @@ extern "C" {
* Dependencies
******************************************/
#include "mem.h" /* unaligned access routines */
+#include "compiler.h" /* UNLIKELY() */
#include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */
#include "error_private.h" /* error codes and messages */
@@ -161,8 +142,7 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val)
{
# if defined(_MSC_VER) /* Visual */
unsigned long r=0;
- _BitScanReverse ( &r, val );
- return (unsigned) r;
+ return _BitScanReverse ( &r, val ) ? (unsigned)r : 0;
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
return __builtin_clz (val) ^ 31;
# elif defined(__ICCARM__) /* IAR Intrinsic */
@@ -411,6 +391,23 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
return value;
}
+/*! BIT_reloadDStreamFast() :
+ * Similar to BIT_reloadDStream(), but with two differences:
+ * 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!
+ * 2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this
+ * point you must use BIT_reloadDStream() to reload.
+ */
+MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
+{
+ if (UNLIKELY(bitD->ptr < bitD->limitPtr))
+ return BIT_DStream_overflow;
+ assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
+ bitD->ptr -= bitD->bitsConsumed >> 3;
+ bitD->bitsConsumed &= 7;
+ bitD->bitContainer = MEM_readLEST(bitD->ptr);
+ return BIT_DStream_unfinished;
+}
+
/*! BIT_reloadDStream() :
* Refill `bitD` from buffer previously set in BIT_initDStream() .
* This function is safe, it guarantees it will not read beyond src buffer.
@@ -422,10 +419,7 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
return BIT_DStream_overflow;
if (bitD->ptr >= bitD->limitPtr) {
- bitD->ptr -= bitD->bitsConsumed >> 3;
- bitD->bitsConsumed &= 7;
- bitD->bitContainer = MEM_readLEST(bitD->ptr);
- return BIT_DStream_unfinished;
+ return BIT_reloadDStreamFast(bitD);
}
if (bitD->ptr == bitD->start) {
if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
diff --git a/lib/common/compiler.h b/lib/common/compiler.h
index 1877a0c1d9be..95e9483521d4 100644
--- a/lib/common/compiler.h
+++ b/lib/common/compiler.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -17,7 +17,7 @@
/* force inlining */
#if !defined(ZSTD_NO_INLINE)
-#if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
+#if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
# define INLINE_KEYWORD inline
#else
# define INLINE_KEYWORD
@@ -114,6 +114,9 @@
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
+# elif defined(__aarch64__)
+# define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr)))
+# define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr)))
# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
@@ -136,7 +139,7 @@
/* vectorization
* older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */
-#if !defined(__clang__) && defined(__GNUC__)
+#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__)
# if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5)
# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
# else
@@ -146,6 +149,19 @@
# define DONT_VECTORIZE
#endif
+/* Tell the compiler that a branch is likely or unlikely.
+ * Only use these macros if it causes the compiler to generate better code.
+ * If you can remove a LIKELY/UNLIKELY annotation without speed changes in gcc
+ * and clang, please do.
+ */
+#if defined(__GNUC__)
+#define LIKELY(x) (__builtin_expect((x), 1))
+#define UNLIKELY(x) (__builtin_expect((x), 0))
+#else
+#define LIKELY(x) (x)
+#define UNLIKELY(x) (x)
+#endif
+
/* disable warnings */
#ifdef _MSC_VER /* Visual Studio */
# include <intrin.h> /* For Visual 2005 */
diff --git a/lib/common/cpu.h b/lib/common/cpu.h
index 5f0923fc9289..6e8a974f62d7 100644
--- a/lib/common/cpu.h
+++ b/lib/common/cpu.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-present, Facebook, Inc.
+ * Copyright (c) 2018-2020, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/common/debug.c b/lib/common/debug.c
index 3ebdd1cb15a6..f303f4a2e530 100644
--- a/lib/common/debug.c
+++ b/lib/common/debug.c
@@ -1,35 +1,15 @@
/* ******************************************************************
- debug
- Part of FSE library
- Copyright (C) 2013-present, Yann Collet.
-
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are
- met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
- copyright notice, this list of conditions and the following disclaimer
- in the documentation and/or other materials provided with the
- distribution.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- You can contact the author at :
- - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ * debug
+ * Part of FSE library
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
****************************************************************** */
diff --git a/lib/common/debug.h b/lib/common/debug.h
index b4fc89d49741..ac6224888d8b 100644
--- a/lib/common/debug.h
+++ b/lib/common/debug.h
@@ -1,35 +1,15 @@
/* ******************************************************************
- debug
- Part of FSE library
- Copyright (C) 2013-present, Yann Collet.
-
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are
- met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
- copyright notice, this list of conditions and the following disclaimer
- in the documentation and/or other materials provided with the
- distribution.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- You can contact the author at :
- - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ * debug
+ * Part of FSE library
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
****************************************************************** */
diff --git a/lib/common/entropy_common.c b/lib/common/entropy_common.c
index b12944e1de93..9d3e4e8e36ab 100644
--- a/lib/common/entropy_common.c
+++ b/lib/common/entropy_common.c
@@ -1,36 +1,16 @@
-/*
- Common functions of New Generation Entropy library
- Copyright (C) 2016, Yann Collet.
-
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are
- met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
- copyright notice, this list of conditions and the following disclaimer
- in the documentation and/or other materials provided with the
- distribution.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- You can contact the author at :
- - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
- - Public forum : https://groups.google.com/forum/#!forum/lz4c
-*************************************************************************** */
+/* ******************************************************************
+ * Common functions of New Generation Entropy library
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ * - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
/* *************************************
* Dependencies
diff --git a/lib/common/error_private.c b/lib/common/error_private.c
index 7c1bb67a23f4..cd437529c12b 100644
--- a/lib/common/error_private.c
+++ b/lib/common/error_private.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -47,6 +47,7 @@ const char* ERR_getErrorString(ERR_enum code)
/* following error codes are not stable and may be removed or changed in a future version */
case PREFIX(frameIndex_tooLarge): return "Frame index is too large";
case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking";
+ case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong";
case PREFIX(maxCode):
default: return notErrorCode;
}
diff --git a/lib/common/error_private.h b/lib/common/error_private.h
index 0d2fa7e34b01..982cf8e9fe6f 100644
--- a/lib/common/error_private.h
+++ b/lib/common/error_private.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -49,7 +49,7 @@ typedef ZSTD_ErrorCode ERR_enum;
/*-****************************************
* Error codes handling
******************************************/
-#undef ERROR /* reported already defined on VS 2015 (Rich Geldreich) */
+#undef ERROR /* already defined on Visual Studio */
#define ERROR(name) ZSTD_ERROR(name)
#define ZSTD_ERROR(name) ((size_t)-PREFIX(name))
@@ -57,6 +57,10 @@ ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); }
+/* check and forward error code */
+#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
+#define CHECK_F(f) { CHECK_V_F(_var_err__, f); }
+
/*-****************************************
* Error Strings
diff --git a/lib/common/fse.h b/lib/common/fse.h
index a7553e3721c4..ff54e70ea75c 100644
--- a/lib/common/fse.h
+++ b/lib/common/fse.h
@@ -1,35 +1,15 @@
/* ******************************************************************
- FSE : Finite State Entropy codec
- Public Prototypes declaration
- Copyright (C) 2013-2016, Yann Collet.
-
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are
- met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
- copyright notice, this list of conditions and the following disclaimer
- in the documentation and/or other materials provided with the
- distribution.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- You can contact the author at :
- - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ * FSE : Finite State Entropy codec
+ * Public Prototypes declaration
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
****************************************************************** */
#if defined (__cplusplus)
diff --git a/lib/common/fse_decompress.c b/lib/common/fse_decompress.c
index 4f0737898209..bcc2223ccc65 100644
--- a/lib/common/fse_decompress.c
+++ b/lib/common/fse_decompress.c
@@ -1,35 +1,15 @@
/* ******************************************************************
- FSE : Finite State Entropy decoder
- Copyright (C) 2013-2015, Yann Collet.
-
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are
- met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
- copyright notice, this list of conditions and the following disclaimer
- in the documentation and/or other materials provided with the
- distribution.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- You can contact the author at :
- - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
- - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ * FSE : Finite State Entropy decoder
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ * - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
****************************************************************** */
@@ -51,11 +31,6 @@
#define FSE_isError ERR_isError
#define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */
-/* check and forward error code */
-#ifndef CHECK_F
-#define CHECK_F(f) { size_t const e = f; if (FSE_isError(e)) return e; }
-#endif
-
/* **************************************************************
* Templates
@@ -287,7 +262,7 @@ size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size
/* normal FSE decoding mode */
size_t const NCountLength = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
if (FSE_isError(NCountLength)) return NCountLength;
- //if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong); /* too small input size; supposed to be already checked in NCountLength, only remaining case : NCountLength==cSrcSize */
+ /* if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong); */ /* too small input size; supposed to be already checked in NCountLength, only remaining case : NCountLength==cSrcSize */
if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
ip += NCountLength;
cSrcSize -= NCountLength;
diff --git a/lib/common/huf.h b/lib/common/huf.h
index 6b572c448d9d..ef432685dac8 100644
--- a/lib/common/huf.h
+++ b/lib/common/huf.h
@@ -1,35 +1,15 @@
/* ******************************************************************
- huff0 huffman codec,
- part of Finite State Entropy library
- Copyright (C) 2013-present, Yann Collet.
-
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are
- met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
- copyright notice, this list of conditions and the following disclaimer
- in the documentation and/or other materials provided with the
- distribution.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- You can contact the author at :
- - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ * huff0 huffman codec,
+ * part of Finite State Entropy library
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
****************************************************************** */
#if defined (__cplusplus)
@@ -110,7 +90,7 @@ HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity,
/** HUF_compress4X_wksp() :
* Same as HUF_compress2(), but uses externally allocated `workSpace`.
* `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */
-#define HUF_WORKSPACE_SIZE (6 << 10)
+#define HUF_WORKSPACE_SIZE ((6 << 10) + 256)
#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32))
HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
@@ -208,6 +188,8 @@ typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */
size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */
size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
+size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
+int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
typedef enum {
HUF_repeat_none, /**< Cannot use the previous table */
@@ -246,7 +228,7 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize,
/** HUF_readCTable() :
* Loading a CTable saved with HUF_writeCTable() */
-size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
+size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights);
/** HUF_getNbBits() :
* Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX
diff --git a/lib/common/mem.h b/lib/common/mem.h
index 530d30c8f758..89c8aea7d221 100644
--- a/lib/common/mem.h
+++ b/lib/common/mem.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/common/pool.c b/lib/common/pool.c
index f575935076cf..aa4b4de0d3f6 100644
--- a/lib/common/pool.c
+++ b/lib/common/pool.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/common/pool.h b/lib/common/pool.h
index 458d37f13c3e..259bafc97570 100644
--- a/lib/common/pool.h
+++ b/lib/common/pool.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -18,7 +18,7 @@ extern "C" {
#include <stddef.h> /* size_t */
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_customMem */
-#include "zstd.h"
+#include "../zstd.h"
typedef struct POOL_ctx_s POOL_ctx;
diff --git a/lib/common/threading.c b/lib/common/threading.c
index 482664bd9ada..e2edb313ebff 100644
--- a/lib/common/threading.c
+++ b/lib/common/threading.c
@@ -2,12 +2,13 @@
* Copyright (c) 2016 Tino Reichardt
* All rights reserved.
*
+ * You can contact the author at:
+ * - zstdmt source repository: https://github.com/mcmilk/zstdmt
+ *
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
- *
- * You can contact the author at:
- * - zstdmt source repository: https://github.com/mcmilk/zstdmt
+ * You may select, at your option, one of the above-listed licenses.
*/
/**
diff --git a/lib/common/threading.h b/lib/common/threading.h
index 3193ca7db86c..fd0060d5aa2a 100644
--- a/lib/common/threading.h
+++ b/lib/common/threading.h
@@ -2,12 +2,13 @@
* Copyright (c) 2016 Tino Reichardt
* All rights reserved.
*
+ * You can contact the author at:
+ * - zstdmt source repository: https://github.com/mcmilk/zstdmt
+ *
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
- *
- * You can contact the author at:
- * - zstdmt source repository: https://github.com/mcmilk/zstdmt
+ * You may select, at your option, one of the above-listed licenses.
*/
#ifndef THREADING_H_938743
diff --git a/lib/common/xxhash.c b/lib/common/xxhash.c
index 99d245962185..597de18fc895 100644
--- a/lib/common/xxhash.c
+++ b/lib/common/xxhash.c
@@ -1,35 +1,15 @@
/*
-* xxHash - Fast Hash algorithm
-* Copyright (C) 2012-2016, Yann Collet
-*
-* BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-*
-* Redistribution and use in source and binary forms, with or without
-* modification, are permitted provided that the following conditions are
-* met:
-*
-* * Redistributions of source code must retain the above copyright
-* notice, this list of conditions and the following disclaimer.
-* * Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following disclaimer
-* in the documentation and/or other materials provided with the
-* distribution.
-*
-* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*
-* You can contact the author at :
-* - xxHash homepage: http://www.xxhash.com
-* - xxHash source repository : https://github.com/Cyan4973/xxHash
+ * xxHash - Fast Hash algorithm
+ * Copyright (c) 2012-2020, Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - xxHash homepage: http://www.xxhash.com
+ * - xxHash source repository : https://github.com/Cyan4973/xxHash
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
*/
@@ -115,7 +95,7 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcp
/* *************************************
* Compiler Specific Options
***************************************/
-#if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
+#if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
# define INLINE_KEYWORD inline
#else
# define INLINE_KEYWORD
@@ -729,7 +709,9 @@ FORCE_INLINE_TEMPLATE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, c
state->total_len += len;
if (state->memsize + len < 32) { /* fill in tmp buffer */
- XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
+ if (input != NULL) {
+ XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
+ }
state->memsize += (U32)len;
return XXH_OK;
}
diff --git a/lib/common/xxhash.h b/lib/common/xxhash.h
index 9bad1f59f63a..4207eba8328f 100644
--- a/lib/common/xxhash.h
+++ b/lib/common/xxhash.h
@@ -1,35 +1,15 @@
/*
- xxHash - Extremely Fast Hash algorithm
- Header File
- Copyright (C) 2012-2016, Yann Collet.
-
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are
- met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
- copyright notice, this list of conditions and the following disclaimer
- in the documentation and/or other materials provided with the
- distribution.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- You can contact the author at :
- - xxHash source repository : https://github.com/Cyan4973/xxHash
+ * xxHash - Extremely Fast Hash algorithm
+ * Header File
+ * Copyright (c) 2012-2020, Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - xxHash source repository : https://github.com/Cyan4973/xxHash
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
*/
/* Notice extracted from xxHash homepage :
diff --git a/lib/common/zstd_common.c b/lib/common/zstd_common.c
index 667f4a27fc69..91fe3323a5ba 100644
--- a/lib/common/zstd_common.c
+++ b/lib/common/zstd_common.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
diff --git a/lib/common/zstd_errors.h b/lib/common/zstd_errors.h
index 92a3433896c5..998398e7e57f 100644
--- a/lib/common/zstd_errors.h
+++ b/lib/common/zstd_errors.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -76,6 +76,7 @@ typedef enum {
/* following error codes are __NOT STABLE__, they can be removed or changed in future versions */
ZSTD_error_frameIndex_tooLarge = 100,
ZSTD_error_seekableIO = 102,
+ ZSTD_error_dstBuffer_wrong = 104,
ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */
} ZSTD_ErrorCode;
diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h
index dcdcbdb81cd7..3bc7e55a0a97 100644
--- a/lib/common/zstd_internal.h
+++ b/lib/common/zstd_internal.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -19,12 +19,15 @@
/*-*************************************
* Dependencies
***************************************/
+#ifdef __aarch64__
+#include <arm_neon.h>
+#endif
#include "compiler.h"
#include "mem.h"
#include "debug.h" /* assert, DEBUGLOG, RAWLOG, g_debuglevel */
#include "error_private.h"
#define ZSTD_STATIC_LINKING_ONLY
-#include "zstd.h"
+#include "../zstd.h"
#define FSE_STATIC_LINKING_ONLY
#include "fse.h"
#define HUF_STATIC_LINKING_ONLY
@@ -54,6 +57,31 @@ extern "C" {
#define MAX(a,b) ((a)>(b) ? (a) : (b))
/**
+ * Ignore: this is an internal helper.
+ *
+ * This is a helper function to help force C99-correctness during compilation.
+ * Under strict compilation modes, variadic macro arguments can't be empty.
+ * However, variadic function arguments can be. Using a function therefore lets
+ * us statically check that at least one (string) argument was passed,
+ * independent of the compilation flags.
+ */
+static INLINE_KEYWORD UNUSED_ATTR
+void _force_has_format_string(const char *format, ...) {
+ (void)format;
+}
+
+/**
+ * Ignore: this is an internal helper.
+ *
+ * We want to force this function invocation to be syntactically correct, but
+ * we don't want to force runtime evaluation of its arguments.
+ */
+#define _FORCE_HAS_FORMAT_STRING(...) \
+ if (0) { \
+ _force_has_format_string(__VA_ARGS__); \
+ }
+
+/**
* Return the specified error if the condition evaluates to true.
*
* In debug modes, prints additional information.
@@ -62,7 +90,9 @@ extern "C" {
*/
#define RETURN_ERROR_IF(cond, err, ...) \
if (cond) { \
- RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \
+ RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \
+ __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \
+ _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
RAWLOG(3, ": " __VA_ARGS__); \
RAWLOG(3, "\n"); \
return ERROR(err); \
@@ -75,7 +105,9 @@ extern "C" {
*/
#define RETURN_ERROR(err, ...) \
do { \
- RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \
+ RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
+ __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \
+ _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
RAWLOG(3, ": " __VA_ARGS__); \
RAWLOG(3, "\n"); \
return ERROR(err); \
@@ -90,7 +122,9 @@ extern "C" {
do { \
size_t const err_code = (err); \
if (ERR_isError(err_code)) { \
- RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \
+ RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \
+ __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \
+ _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
RAWLOG(3, ": " __VA_ARGS__); \
RAWLOG(3, "\n"); \
return err_code; \
@@ -128,6 +162,8 @@ static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;
+#define ZSTD_FRAMECHECKSUMSIZE 4
+
#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */
@@ -191,10 +227,22 @@ static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
/*-*******************************************
* Shared functions to include for inlining
*********************************************/
-static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
+static void ZSTD_copy8(void* dst, const void* src) {
+#ifdef __aarch64__
+ vst1_u8((uint8_t*)dst, vld1_u8((const uint8_t*)src));
+#else
+ memcpy(dst, src, 8);
+#endif
+}
#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
-static void ZSTD_copy16(void* dst, const void* src) { memcpy(dst, src, 16); }
+static void ZSTD_copy16(void* dst, const void* src) {
+#ifdef __aarch64__
+ vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src));
+#else
+ memcpy(dst, src, 16);
+#endif
+}
#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
#define WILDCOPY_OVERLENGTH 32
@@ -213,7 +261,7 @@ typedef enum {
* - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart.
* The src buffer must be before the dst buffer.
*/
-MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
+MEM_STATIC FORCE_INLINE_ATTR
void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype)
{
ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
@@ -230,13 +278,18 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e
} while (op < oend);
} else {
assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN);
- /* Separate out the first two COPY16() calls because the copy length is
+ /* Separate out the first COPY16() call because the copy length is
* almost certain to be short, so the branches have different
- * probabilities.
- * On gcc-9 unrolling once is +1.6%, twice is +2%, thrice is +1.8%.
- * On clang-8 unrolling once is +1.4%, twice is +3.3%, thrice is +3%.
+ * probabilities. Since it is almost certain to be short, only do
+ * one COPY16() in the first call. Then, do two calls per loop since
+ * at that point it is more likely to have a high trip count.
*/
- COPY16(op, ip);
+#ifndef __aarch64__
+ do {
+ COPY16(op, ip);
+ }
+ while (op < oend);
+#else
COPY16(op, ip);
if (op >= oend) return;
do {
@@ -244,9 +297,29 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e
COPY16(op, ip);
}
while (op < oend);
+#endif
+ }
+}
+
+MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+ size_t const length = MIN(dstCapacity, srcSize);
+ if (length > 0) {
+ memcpy(dst, src, length);
}
+ return length;
}
+/* define "workspace is too large" as this number of times larger than needed */
+#define ZSTD_WORKSPACETOOLARGE_FACTOR 3
+
+/* when workspace is continuously too large
+ * during at least this number of times,
+ * context's memory usage is considered wasteful,
+ * because it's sized to handle a worst case scenario which rarely happens.
+ * In which case, resize it down to free some memory */
+#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128
+
/*-*******************************************
* Private declarations
@@ -271,6 +344,31 @@ typedef struct {
U32 longLengthPos;
} seqStore_t;
+typedef struct {
+ U32 litLength;
+ U32 matchLength;
+} ZSTD_sequenceLength;
+
+/**
+ * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences
+ * indicated by longLengthPos and longLengthID, and adds MINMATCH back to matchLength.
+ */
+MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq)
+{
+ ZSTD_sequenceLength seqLen;
+ seqLen.litLength = seq->litLength;
+ seqLen.matchLength = seq->matchLength + MINMATCH;
+ if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) {
+ if (seqStore->longLengthID == 1) {
+ seqLen.litLength += 0xFFFF;
+ }
+ if (seqStore->longLengthID == 2) {
+ seqLen.matchLength += 0xFFFF;
+ }
+ }
+ return seqLen;
+}
+
/**
* Contains the compressed frame size and an upper-bound for the decompressed frame size.
* Note: before using `compressedSize`, check for errors using ZSTD_isError().
@@ -297,8 +395,7 @@ MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus
{
# if defined(_MSC_VER) /* Visual */
unsigned long r=0;
- _BitScanReverse(&r, val);
- return (unsigned)r;
+ return _BitScanReverse(&r, val) ? (unsigned)r : 0;
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
return __builtin_clz (val) ^ 31;
# elif defined(__ICCARM__) /* IAR Intrinsic */
diff --git a/lib/compress/fse_compress.c b/lib/compress/fse_compress.c
index 68b47e109351..a42759814fdd 100644
--- a/lib/compress/fse_compress.c
+++ b/lib/compress/fse_compress.c
@@ -1,35 +1,15 @@
/* ******************************************************************
- FSE : Finite State Entropy encoder
- Copyright (C) 2013-present, Yann Collet.
-
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are
- met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
- copyright notice, this list of conditions and the following disclaimer
- in the documentation and/or other materials provided with the
- distribution.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- You can contact the author at :
- - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
- - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ * FSE : Finite State Entropy encoder
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ * - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
****************************************************************** */
/* **************************************************************
@@ -37,14 +17,14 @@
****************************************************************/
#include <stdlib.h> /* malloc, free, qsort */
#include <string.h> /* memcpy, memset */
-#include "compiler.h"
-#include "mem.h" /* U32, U16, etc. */
-#include "debug.h" /* assert, DEBUGLOG */
+#include "../common/compiler.h"
+#include "../common/mem.h" /* U32, U16, etc. */
+#include "../common/debug.h" /* assert, DEBUGLOG */
#include "hist.h" /* HIST_count_wksp */
-#include "bitstream.h"
+#include "../common/bitstream.h"
#define FSE_STATIC_LINKING_ONLY
-#include "fse.h"
-#include "error_private.h"
+#include "../common/fse.h"
+#include "../common/error_private.h"
/* **************************************************************
@@ -645,9 +625,6 @@ size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
-#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
-#define CHECK_F(f) { CHECK_V_F(_var_err__, f); }
-
/* FSE_compress_wksp() :
* Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
* `wkspSize` size must be `(1<<tableLog)`.
diff --git a/lib/compress/hist.c b/lib/compress/hist.c
index 45b7babc1e23..61e08c7968be 100644
--- a/lib/compress/hist.c
+++ b/lib/compress/hist.c
@@ -1,42 +1,22 @@
/* ******************************************************************
- hist : Histogram functions
- part of Finite State Entropy project
- Copyright (C) 2013-present, Yann Collet.
-
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are
- met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
- copyright notice, this list of conditions and the following disclaimer
- in the documentation and/or other materials provided with the
- distribution.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- You can contact the author at :
- - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
- - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ * hist : Histogram functions
+ * part of Finite State Entropy project
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ * - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
****************************************************************** */
/* --- dependencies --- */
-#include "mem.h" /* U32, BYTE, etc. */
-#include "debug.h" /* assert, DEBUGLOG */
-#include "error_private.h" /* ERROR */
+#include "../common/mem.h" /* U32, BYTE, etc. */
+#include "../common/debug.h" /* assert, DEBUGLOG */
+#include "../common/error_private.h" /* ERROR */
#include "hist.h"
diff --git a/lib/compress/hist.h b/lib/compress/hist.h
index 8b389358dc10..77e3ec4fb192 100644
--- a/lib/compress/hist.h
+++ b/lib/compress/hist.h
@@ -1,36 +1,16 @@
/* ******************************************************************
- hist : Histogram functions
- part of Finite State Entropy project
- Copyright (C) 2013-present, Yann Collet.
-
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are
- met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
- copyright notice, this list of conditions and the following disclaimer
- in the documentation and/or other materials provided with the
- distribution.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- You can contact the author at :
- - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
- - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ * hist : Histogram functions
+ * part of Finite State Entropy project
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ * - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
****************************************************************** */
/* --- dependencies --- */
diff --git a/lib/compress/huf_compress.c b/lib/compress/huf_compress.c
index f074f1e0a95d..546879868a53 100644
--- a/lib/compress/huf_compress.c
+++ b/lib/compress/huf_compress.c
@@ -1,35 +1,15 @@
/* ******************************************************************
- Huffman encoder, part of New Generation Entropy library
- Copyright (C) 2013-2016, Yann Collet.
-
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are
- met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
- copyright notice, this list of conditions and the following disclaimer
- in the documentation and/or other materials provided with the
- distribution.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- You can contact the author at :
- - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
- - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ * Huffman encoder, part of New Generation Entropy library
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ * - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
****************************************************************** */
/* **************************************************************
@@ -45,14 +25,14 @@
****************************************************************/
#include <string.h> /* memcpy, memset */
#include <stdio.h> /* printf (debug) */
-#include "compiler.h"
-#include "bitstream.h"
+#include "../common/compiler.h"
+#include "../common/bitstream.h"
#include "hist.h"
#define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */
-#include "fse.h" /* header compression */
+#include "../common/fse.h" /* header compression */
#define HUF_STATIC_LINKING_ONLY
-#include "huf.h"
-#include "error_private.h"
+#include "../common/huf.h"
+#include "../common/error_private.h"
/* **************************************************************
@@ -60,8 +40,6 @@
****************************************************************/
#define HUF_isError ERR_isError
#define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */
-#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
-#define CHECK_F(f) { CHECK_V_F(_var_err__, f); }
/* **************************************************************
@@ -110,18 +88,18 @@ static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weight
CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) );
/* Write table description header */
- { CHECK_V_F(hSize, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
+ { CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), norm, maxSymbolValue, tableLog) );
op += hSize;
}
/* Compress */
CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) );
- { CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, weightTable, wtSize, CTable) );
+ { CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, CTable) );
if (cSize == 0) return 0; /* not enough space for compressed data */
op += cSize;
}
- return op-ostart;
+ return (size_t)(op-ostart);
}
@@ -169,7 +147,7 @@ size_t HUF_writeCTable (void* dst, size_t maxDstSize,
}
-size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize)
+size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights)
{
BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; /* init not required, even though some static analyzer may complain */
U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */
@@ -192,9 +170,11 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
} }
/* fill nbBits */
+ *hasZeroWeights = 0;
{ U32 n; for (n=0; n<nbSymbols; n++) {
const U32 w = huffWeight[n];
- CTable[n].nbBits = (BYTE)(tableLog + 1 - w);
+ *hasZeroWeights |= (w == 0);
+ CTable[n].nbBits = (BYTE)(tableLog + 1 - w) & -(w != 0);
} }
/* fill val */
@@ -240,7 +220,7 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
/* there are several too large elements (at least >= 2) */
{ int totalCost = 0;
const U32 baseCost = 1 << (largestBits - maxNbBits);
- U32 n = lastNonNull;
+ int n = (int)lastNonNull;
while (huffNode[n].nbBits > maxNbBits) {
totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
@@ -255,22 +235,22 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
/* repay normalized cost */
{ U32 const noSymbol = 0xF0F0F0F0;
U32 rankLast[HUF_TABLELOG_MAX+2];
- int pos;
/* Get pos of last (smallest) symbol per rank */
memset(rankLast, 0xF0, sizeof(rankLast));
{ U32 currentNbBits = maxNbBits;
+ int pos;
for (pos=n ; pos >= 0; pos--) {
if (huffNode[pos].nbBits >= currentNbBits) continue;
currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */
- rankLast[maxNbBits-currentNbBits] = pos;
+ rankLast[maxNbBits-currentNbBits] = (U32)pos;
} }
while (totalCost > 0) {
- U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1;
+ U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1;
for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
- U32 highPos = rankLast[nBitsToDecrease];
- U32 lowPos = rankLast[nBitsToDecrease-1];
+ U32 const highPos = rankLast[nBitsToDecrease];
+ U32 const lowPos = rankLast[nBitsToDecrease-1];
if (highPos == noSymbol) continue;
if (lowPos == noSymbol) break;
{ U32 const highTotal = huffNode[highPos].count;
@@ -297,7 +277,8 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */
while (huffNode[n].nbBits == maxNbBits) n--;
huffNode[n+1].nbBits--;
- rankLast[1] = n+1;
+ assert(n >= 0);
+ rankLast[1] = (U32)(n+1);
totalCost++;
continue;
}
@@ -309,29 +290,36 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
return maxNbBits;
}
-
typedef struct {
U32 base;
U32 current;
} rankPos;
-static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue)
+typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
+
+#define RANK_POSITION_TABLE_SIZE 32
+
+typedef struct {
+ huffNodeTable huffNodeTbl;
+ rankPos rankPosition[RANK_POSITION_TABLE_SIZE];
+} HUF_buildCTable_wksp_tables;
+
+static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue, rankPos* rankPosition)
{
- rankPos rank[32];
U32 n;
- memset(rank, 0, sizeof(rank));
+ memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE);
for (n=0; n<=maxSymbolValue; n++) {
U32 r = BIT_highbit32(count[n] + 1);
- rank[r].base ++;
+ rankPosition[r].base ++;
}
- for (n=30; n>0; n--) rank[n-1].base += rank[n].base;
- for (n=0; n<32; n++) rank[n].current = rank[n].base;
+ for (n=30; n>0; n--) rankPosition[n-1].base += rankPosition[n].base;
+ for (n=0; n<32; n++) rankPosition[n].current = rankPosition[n].base;
for (n=0; n<=maxSymbolValue; n++) {
U32 const c = count[n];
U32 const r = BIT_highbit32(c+1) + 1;
- U32 pos = rank[r].current++;
- while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) {
+ U32 pos = rankPosition[r].current++;
+ while ((pos > rankPosition[r].base) && (c > huffNode[pos-1].count)) {
huffNode[pos] = huffNode[pos-1];
pos--;
}
@@ -343,45 +331,48 @@ static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValu
/** HUF_buildCTable_wksp() :
* Same as HUF_buildCTable(), but using externally allocated scratch buffer.
- * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of HUF_CTABLE_WORKSPACE_SIZE_U32 unsigned.
+ * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables).
*/
#define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
-typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
+
size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
{
- nodeElt* const huffNode0 = (nodeElt*)workSpace;
+ HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)workSpace;
+ nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
nodeElt* const huffNode = huffNode0+1;
- U32 n, nonNullRank;
+ int nonNullRank;
int lowS, lowN;
- U16 nodeNb = STARTNODE;
- U32 nodeRoot;
+ int nodeNb = STARTNODE;
+ int n, nodeRoot;
/* safety checks */
if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
- if (wkspSize < sizeof(huffNodeTable)) return ERROR(workSpace_tooSmall);
+ if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
+ return ERROR(workSpace_tooSmall);
if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
- if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
+ if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
+ return ERROR(maxSymbolValue_tooLarge);
memset(huffNode0, 0, sizeof(huffNodeTable));
/* sort, decreasing order */
- HUF_sort(huffNode, count, maxSymbolValue);
+ HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition);
/* init for parents */
- nonNullRank = maxSymbolValue;
+ nonNullRank = (int)maxSymbolValue;
while(huffNode[nonNullRank].count == 0) nonNullRank--;
lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb;
huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count;
- huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb;
+ huffNode[lowS].parent = huffNode[lowS-1].parent = (U16)nodeNb;
nodeNb++; lowS-=2;
for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);
huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */
/* create parents */
while (nodeNb <= nodeRoot) {
- U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
- U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
+ int const n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
+ int const n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;
- huffNode[n1].parent = huffNode[n2].parent = nodeNb;
+ huffNode[n1].parent = huffNode[n2].parent = (U16)nodeNb;
nodeNb++;
}
@@ -393,24 +384,25 @@ size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbo
huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
/* enforce maxTableLog */
- maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits);
+ maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
/* fill result into tree (val, nbBits) */
{ U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
+ int const alphabetSize = (int)(maxSymbolValue + 1);
if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */
for (n=0; n<=nonNullRank; n++)
nbPerRank[huffNode[n].nbBits]++;
/* determine stating value per rank */
{ U16 min = 0;
- for (n=maxNbBits; n>0; n--) {
+ for (n=(int)maxNbBits; n>0; n--) {
valPerRank[n] = min; /* get starting value within each rank */
min += nbPerRank[n];
min >>= 1;
} }
- for (n=0; n<=maxSymbolValue; n++)
+ for (n=0; n<alphabetSize; n++)
tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */
- for (n=0; n<=maxSymbolValue; n++)
+ for (n=0; n<alphabetSize; n++)
tree[n].val = valPerRank[tree[n].nbBits]++; /* assign value within rank, symbol order */
}
@@ -423,11 +415,11 @@ size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbo
*/
size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits)
{
- huffNodeTable nodeTable;
- return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable));
+ HUF_buildCTable_wksp_tables workspace;
+ return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, &workspace, sizeof(workspace));
}
-static size_t HUF_estimateCompressedSize(HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
+size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
{
size_t nbBits = 0;
int s;
@@ -437,7 +429,7 @@ static size_t HUF_estimateCompressedSize(HUF_CElt* CTable, const unsigned* count
return nbBits >> 3;
}
-static int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
+int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
int bad = 0;
int s;
for (s = 0; s <= (int)maxSymbolValue; ++s) {
@@ -476,7 +468,7 @@ HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
/* init */
if (dstSize < 8) return 0; /* not enough space to compress */
- { size_t const initErr = BIT_initCStream(&bitC, op, oend-op);
+ { size_t const initErr = BIT_initCStream(&bitC, op, (size_t)(oend-op));
if (HUF_isError(initErr)) return 0; }
n = srcSize & ~3; /* join to mod 4 */
@@ -573,7 +565,8 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
if (srcSize < 12) return 0; /* no saving possible : too small input */
op += 6; /* jumpTable */
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
+ assert(op <= oend);
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
if (cSize==0) return 0;
assert(cSize <= 65535);
MEM_writeLE16(ostart, (U16)cSize);
@@ -581,7 +574,8 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
}
ip += segmentSize;
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
+ assert(op <= oend);
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
if (cSize==0) return 0;
assert(cSize <= 65535);
MEM_writeLE16(ostart+2, (U16)cSize);
@@ -589,7 +583,8 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
}
ip += segmentSize;
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
+ assert(op <= oend);
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
if (cSize==0) return 0;
assert(cSize <= 65535);
MEM_writeLE16(ostart+4, (U16)cSize);
@@ -597,12 +592,14 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
}
ip += segmentSize;
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, iend-ip, CTable, bmi2) );
+ assert(op <= oend);
+ assert(ip <= iend);
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) );
if (cSize==0) return 0;
op += cSize;
}
- return op-ostart;
+ return (size_t)(op-ostart);
}
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
@@ -618,20 +615,21 @@ static size_t HUF_compressCTable_internal(
HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2)
{
size_t const cSize = (nbStreams==HUF_singleStream) ?
- HUF_compress1X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2) :
- HUF_compress4X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2);
+ HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) :
+ HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2);
if (HUF_isError(cSize)) { return cSize; }
if (cSize==0) { return 0; } /* uncompressible */
op += cSize;
/* check compressibility */
+ assert(op >= ostart);
if ((size_t)(op-ostart) >= srcSize-1) { return 0; }
- return op-ostart;
+ return (size_t)(op-ostart);
}