From 1c6b7cf07ad69323ed22ee073c3e2f26b04df614 Mon Sep 17 00:00:00 2001 From: Nickolay Alexandrov Date: Thu, 19 Apr 2018 15:22:20 +0700 Subject: [PATCH] deps,src: upgrade xxhash to v0.6.5 --- deps/xxhash/.gitignore | 17 + deps/xxhash/.travis.yml | 2 - deps/xxhash/Makefile | 162 ++++++-- deps/xxhash/README.md | 114 ++++-- deps/xxhash/appveyor.yml | 70 ++++ deps/xxhash/cmake_unofficial/.gitignore | 14 + deps/xxhash/cmake_unofficial/CMakeLists.txt | 120 ++++-- deps/xxhash/cmake_unofficial/README.md | 6 + deps/xxhash/doc/xxhash_spec.md | 311 +++++++++++++++ deps/xxhash/xxhash.c | 399 +++++++++++++------- deps/xxhash/xxhash.h | 177 +++++---- deps/xxhash/xxhsum.c | 230 ++++++----- src/common.hpp | 2 +- 13 files changed, 1244 insertions(+), 380 deletions(-) create mode 100644 deps/xxhash/.gitignore create mode 100644 deps/xxhash/appveyor.yml create mode 100644 deps/xxhash/cmake_unofficial/.gitignore create mode 100644 deps/xxhash/cmake_unofficial/README.md create mode 100644 deps/xxhash/doc/xxhash_spec.md diff --git a/deps/xxhash/.gitignore b/deps/xxhash/.gitignore new file mode 100644 index 0000000..36639c6 --- /dev/null +++ b/deps/xxhash/.gitignore @@ -0,0 +1,17 @@ +# objects +*.o + +# libraries +libxxhash.* + +# Executables +xxh32sum +xxh64sum +xxhsum +xxhsum32 +xxhsum_privateXXH +xxhsum_inlinedXXH + +# Mac OS-X artefacts +*.dSYM +.DS_Store diff --git a/deps/xxhash/.travis.yml b/deps/xxhash/.travis.yml index 4adeb39..895da85 100644 --- a/deps/xxhash/.travis.yml +++ b/deps/xxhash/.travis.yml @@ -7,5 +7,3 @@ before_install: - sudo apt-get install -qq clang - sudo apt-get install -qq g++-multilib - sudo apt-get install -qq gcc-multilib - - sudo apt-get install -qq valgrind - diff --git a/deps/xxhash/Makefile b/deps/xxhash/Makefile index f1b4238..6dd738f 100644 --- a/deps/xxhash/Makefile +++ b/deps/xxhash/Makefile @@ -25,15 +25,27 @@ # ################################################################ # Version numbers -LIBVER_MAJOR:=`sed -n '/define XXH_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < xxhash.h` -LIBVER_MINOR:=`sed -n '/define XXH_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < xxhash.h` -LIBVER_PATCH:=`sed -n '/define XXH_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < xxhash.h` +LIBVER_MAJOR_SCRIPT:=`sed -n '/define XXH_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < xxhash.h` +LIBVER_MINOR_SCRIPT:=`sed -n '/define XXH_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < xxhash.h` +LIBVER_PATCH_SCRIPT:=`sed -n '/define XXH_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < xxhash.h` +LIBVER_MAJOR := $(shell echo $(LIBVER_MAJOR_SCRIPT)) +LIBVER_MINOR := $(shell echo $(LIBVER_MINOR_SCRIPT)) +LIBVER_PATCH := $(shell echo $(LIBVER_PATCH_SCRIPT)) LIBVER := $(LIBVER_MAJOR).$(LIBVER_MINOR).$(LIBVER_PATCH) -CFLAGS ?= -O3 +# SSE4 detection +HAVE_SSE4 := $(shell $(CC) -dM -E - < /dev/null | grep "SSE4" > /dev/null && echo 1 || echo 0) +ifeq ($(HAVE_SSE4), 1) +NOSSE4 := -mno-sse4 +else +NOSSE4 := +endif + +CFLAGS ?= -O2 $(NOSSE4) # disables potential auto-vectorization CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \ - -Wstrict-prototypes -Wundef + -Wstrict-prototypes -Wundef + FLAGS = $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(MOREFLAGS) XXHSUM_VERSION=$(LIBVER) MD2ROFF = ronn @@ -46,27 +58,73 @@ else EXT = endif +# OS X linker doesn't support -soname, and use different extension +# see : https://developer.apple.com/library/mac/documentation/DeveloperTools/Conceptual/DynamicLibraries/100-Articles/DynamicLibraryDesignGuidelines.html +ifeq ($(shell uname), Darwin) + SHARED_EXT = dylib + SHARED_EXT_MAJOR = $(LIBVER_MAJOR).$(SHARED_EXT) + SHARED_EXT_VER = $(LIBVER).$(SHARED_EXT) + SONAME_FLAGS = -install_name $(LIBDIR)/libxxhash.$(SHARED_EXT_MAJOR) -compatibility_version $(LIBVER_MAJOR) -current_version $(LIBVER) +else + SONAME_FLAGS = -Wl,-soname=libxxhash.$(SHARED_EXT).$(LIBVER_MAJOR) + SHARED_EXT = so + SHARED_EXT_MAJOR = $(SHARED_EXT).$(LIBVER_MAJOR) + SHARED_EXT_VER = $(SHARED_EXT).$(LIBVER) +endif + +LIBXXH = libxxhash.$(SHARED_EXT_VER) + + .PHONY: default -default: xxhsum +default: lib xxhsum_and_links .PHONY: all -all: xxhsum xxhsum32 xxhsum_inlinedXXH +all: lib xxhsum xxhsum_inlinedXXH xxhsum32: CFLAGS += -m32 xxhsum xxhsum32: xxhash.c xxhsum.c - $(CC) $(FLAGS) $^ -o $@$(EXT) - ln -sf $@ xxh32sum - ln -sf $@ xxh64sum + $(CC) $(FLAGS) $^ -o $@$(EXT) + +.PHONY: xxhsum_and_links +xxhsum_and_links: xxhsum + ln -sf xxhsum xxh32sum + ln -sf xxhsum xxh64sum xxhsum_inlinedXXH: xxhsum.c $(CC) $(FLAGS) -DXXH_PRIVATE_API $^ -o $@$(EXT) -.PHONY: test -test: xxhsum + +# library + +libxxhash.a: ARFLAGS = rcs +libxxhash.a: xxhash.o + @echo compiling static library + @$(AR) $(ARFLAGS) $@ $^ + +$(LIBXXH): LDFLAGS += -shared +ifeq (,$(filter Windows%,$(OS))) +$(LIBXXH): LDFLAGS += -fPIC +endif +$(LIBXXH): xxhash.c + @echo compiling dynamic library $(LIBVER) + @$(CC) $(FLAGS) $^ $(LDFLAGS) $(SONAME_FLAGS) -o $@ + @echo creating versioned links + @ln -sf $@ libxxhash.$(SHARED_EXT_MAJOR) + @ln -sf $@ libxxhash.$(SHARED_EXT) + +libxxhash : $(LIBXXH) + +lib: libxxhash.a libxxhash + + +# tests + +.PHONY: check +check: xxhsum # stdin ./xxhsum < xxhash.c # multiple files - ./xxhsum * + ./xxhsum xxhash.* xxhsum.* # internal bench ./xxhsum -bi1 # file bench @@ -76,21 +134,21 @@ test: xxhsum test-mem: xxhsum # memory tests valgrind --leak-check=yes --error-exitcode=1 ./xxhsum -bi1 xxhash.c - valgrind --leak-check=yes --error-exitcode=1 ./xxhsum -H0 xxhash.c - valgrind --leak-check=yes --error-exitcode=1 ./xxhsum -H1 xxhash.c + valgrind --leak-check=yes --error-exitcode=1 ./xxhsum -H0 xxhash.c + valgrind --leak-check=yes --error-exitcode=1 ./xxhsum -H1 xxhash.c .PHONY: test32 test32: clean xxhsum32 - @echo ---- test 32-bits ---- + @echo ---- test 32-bit ---- ./xxhsum32 -bi1 xxhash.c test-xxhsum-c: xxhsum # xxhsum to/from pipe - ./xxhsum * | ./xxhsum -c - - ./xxhsum -H0 * | ./xxhsum -c - + ./xxhsum lib* | ./xxhsum -c - + ./xxhsum -H0 lib* | ./xxhsum -c - # xxhsum to/from file, shell redirection - ./xxhsum * > .test.xxh64 - ./xxhsum -H0 * > .test.xxh32 + ./xxhsum lib* > .test.xxh64 + ./xxhsum -H0 lib* > .test.xxh32 ./xxhsum -c .test.xxh64 ./xxhsum -c .test.xxh32 ./xxhsum -c < .test.xxh64 @@ -104,8 +162,6 @@ test-xxhsum-c: xxhsum # Expects "FAILED open or read" echo "0000000000000000 test-expects-file-not-found" | ./xxhsum -c -; test $$? -eq 1 echo "00000000 test-expects-file-not-found" | ./xxhsum -c -; test $$? -eq 1 - -clean-xxhsum-c: @$(RM) -f .test.xxh32 .test.xxh64 armtest: clean @@ -125,9 +181,10 @@ c90test: clean $(CC) -std=c90 -Werror -pedantic -DXXH_NO_LONG_LONG -c xxhash.c $(RM) xxhash.o +usan: CC=clang usan: clean @echo ---- check undefined behavior - sanitize ---- - $(MAKE) clean test CC=clang MOREFLAGS="-g -fsanitize=undefined" + $(MAKE) clean test CC=$(CC) MOREFLAGS="-g -fsanitize=undefined -fno-sanitize-recover=all" staticAnalyze: clean @echo ---- static analyzer - scan-build ---- @@ -150,11 +207,23 @@ clean-man: preview-man: clean-man man man ./xxhsum.1 -test-all: clean all namespaceTest test test32 test-xxhsum-c clean-xxhsum-c \ - armtest clangtest gpptest c90test test-mem usan staticAnalyze +test: all namespaceTest check test-xxhsum-c c90test + +test-all: test test32 armtest clangtest gpptest usan listL120 trailingWhitespace staticAnalyze -clean: clean-xxhsum-c - @$(RM) -f core *.o xxhsum$(EXT) xxhsum32$(EXT) xxhsum_inlinedXXH$(EXT) xxh32sum xxh64sum +.PHONY: listL120 +listL120: # extract lines >= 120 characters in *.{c,h}, by Takayuki Matsuoka (note : $$, for Makefile compatibility) + find . -type f -name '*.c' -o -name '*.h' | while read -r filename; do awk 'length > 120 {print FILENAME "(" FNR "): " $$0}' $$filename; done + +.PHONY: trailingWhitespace +trailingWhitespace: + ! grep -E "`printf '[ \\t]$$'`" *.1 *.c *.h LICENSE Makefile cmake_unofficial/CMakeLists.txt + +.PHONY: clean +clean: + @$(RM) -r *.dSYM # Mac OS-X specific + @$(RM) core *.o libxxhash.* + @$(RM) xxhsum$(EXT) xxhsum32$(EXT) xxhsum_inlinedXXH$(EXT) xxh32sum xxh64sum @echo cleaning completed @@ -163,6 +232,10 @@ clean: clean-xxhsum-c #----------------------------------------------------------------------------- ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS)) +.PHONY: list +list: + @$(MAKE) -pRrq -f $(lastword $(MAKEFILE_LIST)) : 2>/dev/null | awk -v RS= -F: '/^# File/,/^# Finished Make data base/ {if ($$1 !~ "^[#.]") {print $$1}}' | sort | egrep -v -e '^[^[:alnum:]]' -e '^$@$$' | xargs + DESTDIR ?= # directory variables : GNU conventions prefer lowercase # see https://www.gnu.org/prep/standards/html_node/Makefile-Conventions.html @@ -170,6 +243,10 @@ DESTDIR ?= prefix ?= /usr/local PREFIX ?= $(prefix) exec_prefix ?= $(PREFIX) +libdir ?= $(exec_prefix)/lib +LIBDIR ?= $(libdir) +includedir ?= $(PREFIX)/include +INCLUDEDIR ?= $(includedir) bindir ?= $(exec_prefix)/bin BINDIR ?= $(bindir) datarootdir ?= $(PREFIX)/share @@ -193,8 +270,16 @@ INSTALL_DATA ?= $(INSTALL) -m 644 .PHONY: install -install: xxhsum - @echo Installing binaries +install: lib xxhsum + @echo Installing libxxhash + @$(INSTALL) -d -m 755 $(DESTDIR)$(LIBDIR) + @$(INSTALL_DATA) libxxhash.a $(DESTDIR)$(LIBDIR) + @$(INSTALL_PROGRAM) $(LIBXXH) $(DESTDIR)$(LIBDIR) + @ln -sf $(LIBXXH) $(DESTDIR)$(LIBDIR)/libxxhash.$(SHARED_EXT_MAJOR) + @ln -sf $(LIBXXH) $(DESTDIR)$(LIBDIR)/libxxhash.$(SHARED_EXT) + @$(INSTALL) -d -m 755 $(DESTDIR)$(INCLUDEDIR) # includes + @$(INSTALL_DATA) xxhash.h $(DESTDIR)$(INCLUDEDIR) + @echo Installing xxhsum @$(INSTALL) -d -m 755 $(DESTDIR)$(BINDIR)/ $(DESTDIR)$(MANDIR)/ @$(INSTALL_PROGRAM) xxhsum $(DESTDIR)$(BINDIR)/xxhsum @ln -sf xxhsum $(DESTDIR)$(BINDIR)/xxh32sum @@ -203,16 +288,21 @@ install: xxhsum @$(INSTALL_DATA) xxhsum.1 $(DESTDIR)$(MANDIR)/xxhsum.1 @ln -sf xxhsum.1 $(DESTDIR)$(MANDIR)/xxh32sum.1 @ln -sf xxhsum.1 $(DESTDIR)$(MANDIR)/xxh64sum.1 - @echo xxhsum installation completed + @echo xxhash installation completed .PHONY: uninstall uninstall: - $(RM) $(DESTDIR)$(BINDIR)/xxh32sum - $(RM) $(DESTDIR)$(BINDIR)/xxh64sum - $(RM) $(DESTDIR)$(BINDIR)/xxhsum - $(RM) $(DESTDIR)$(MANDIR)/xxh32sum.1 - $(RM) $(DESTDIR)$(MANDIR)/xxh64sum.1 - $(RM) $(DESTDIR)$(MANDIR)/xxhsum.1 + @$(RM) $(DESTDIR)$(LIBDIR)/libxxhash.a + @$(RM) $(DESTDIR)$(LIBDIR)/libxxhash.$(SHARED_EXT) + @$(RM) $(DESTDIR)$(LIBDIR)/libxxhash.$(SHARED_EXT_MAJOR) + @$(RM) $(DESTDIR)$(LIBDIR)/$(LIBXXH) + @$(RM) $(DESTDIR)$(INCLUDEDIR)/xxhash.h + @$(RM) $(DESTDIR)$(BINDIR)/xxh32sum + @$(RM) $(DESTDIR)$(BINDIR)/xxh64sum + @$(RM) $(DESTDIR)$(BINDIR)/xxhsum + @$(RM) $(DESTDIR)$(MANDIR)/xxh32sum.1 + @$(RM) $(DESTDIR)$(MANDIR)/xxh64sum.1 + @$(RM) $(DESTDIR)$(MANDIR)/xxhsum.1 @echo xxhsum successfully uninstalled endif diff --git a/deps/xxhash/README.md b/deps/xxhash/README.md index 675add3..30318a9 100644 --- a/deps/xxhash/README.md +++ b/deps/xxhash/README.md @@ -16,7 +16,7 @@ Code is highly portable, and hashes are identical on all platforms (little / big Benchmarks ------------------------- -The benchmark uses SMHasher speed test, compiled with Visual 2010 on a Windows Seven 32-bits box. +The benchmark uses SMHasher speed test, compiled with Visual 2010 on a Windows Seven 32-bit box. The reference system uses a Core 2 Duo @3GHz @@ -40,13 +40,13 @@ It depends on successfully passing SMHasher test set. Algorithms with a score < 5 are not listed on this table. A more recent version, XXH64, has been created thanks to [Mathias Westerdahl](https://github.com/JCash), -which offers superior speed and dispersion for 64-bits systems. -Note however that 32-bits applications will still run faster using the 32-bits version. +which offers superior speed and dispersion for 64-bit systems. +Note however that 32-bit applications will still run faster using the 32-bit version. -SMHasher speed test, compiled using GCC 4.8.2, on Linux Mint 64-bits. +SMHasher speed test, compiled using GCC 4.8.2, on Linux Mint 64-bit. The reference system uses a Core i5-3340M @2.7GHz -| Version | Speed on 64-bits | Speed on 32-bits | +| Version | Speed on 64-bit | Speed on 32-bit | |------------|------------------|------------------| | XXH64 | 13.8 GB/s | 1.9 GB/s | | XXH32 | 6.8 GB/s | 6.0 GB/s | @@ -63,27 +63,93 @@ The utility `xxhsum` is GPL licensed. ### Build modifiers -The following macros influence xxhash behavior. They are all disabled by default. - -- `XXH_FORCE_NATIVE_FORMAT` : on big-endian systems : use native number representation, - resulting in system-specific results. +The following macros can be set at compilation time, +they modify xxhash behavior. They are all disabled by default. + +- `XXH_INLINE_ALL` : Make all functions `inline`, with bodies directly included within `xxhash.h`. + There is no need for an `xxhash.o` module in this case. + Inlining functions is generally beneficial for speed on small keys. + It's especially effective when key length is a compile time constant, + with observed performance improvement in the +200% range . + See [this article](https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html) for details. +- `XXH_ACCEPT_NULL_INPUT_POINTER` : if set to `1`, when input is a null-pointer, + xxhash result is the same as a zero-length key + (instead of a dereference segfault). +- `XXH_FORCE_MEMORY_ACCESS` : default method `0` uses a portable `memcpy()` notation. + Method `1` uses a gcc-specific `packed` attribute, which can provide better performance for some targets. + Method `2` forces unaligned reads, which is not standard compliant, but might sometimes be the only way to extract better performance. +- `XXH_CPU_LITTLE_ENDIAN` : by default, endianess is determined at compile time. + It's possible to skip auto-detection and force format to little-endian, by setting this macro to 1. + Setting it to 0 forces big-endian. +- `XXH_FORCE_NATIVE_FORMAT` : on big-endian systems : use native number representation. Breaks consistency with little-endian results. -- `XXH_ACCEPT_NULL_INPUT_POINTER` : if presented with a null-pointer, - xxhash result is the same as a null-length key, - instead of a dereference segfault. +- `XXH_PRIVATE_API` : same impact as `XXH_INLINE_ALL`. + Name underlines that symbols will not be published on library public interface. +- `XXH_NAMESPACE` : prefix all symbols with the value of `XXH_NAMESPACE`. + Useful to evade symbol naming collisions, + in case of multiple inclusions of xxHash source code. + Client applications can still use regular function name, + symbols are automatically translated through `xxhash.h`. +- `XXH_STATIC_LINKING_ONLY` : gives access to state declaration for static allocation. + Incompatible with dynamic linking, due to risks of ABI changes. - `XXH_NO_LONG_LONG` : removes support for XXH64, - useful for targets without 64-bits support. -- `XXH_STATIC_LINKING_ONLY` : gives access to state definition for static allocation. - Incompatible with dynamic linking, due to risks of ABI changes. -- `XXH_PRIVATE_API` : Make all functions `static` and accessible through `xxhash.h` for inlining. - Do not compile `xxhash.c` as a separate module in this case. -- `XXH_NAMESPACE` : prefix all symbols with the value of `XXH_NAMESPACE`, - in order to evade symbol naming collisions, - in case of multiple inclusions of xxHash library - (typically via intermediate libraries). - - -### Other languages + for targets without 64-bit support. + + +### Example + +Calling xxhash 64-bit variant from a C program : + +``` +#include "xxhash.h" + +unsigned long long calcul_hash(const void* buffer, size_t length) +{ + unsigned long long const seed = 0; /* or any other value */ + unsigned long long const hash = XXH64(buffer, length, seed); + return hash; +} +``` + +Using streaming variant is more involved, but makes it possible to provide data in multiple rounds : +``` +#include "stdlib.h" /* abort() */ +#include "xxhash.h" + + +unsigned long long calcul_hash_streaming(someCustomType handler) +{ + XXH64_state_t* const state = XXH64_createState(); + if (state==NULL) abort(); + + size_t const bufferSize = SOME_VALUE; + void* const buffer = malloc(bufferSize); + if (buffer==NULL) abort(); + + unsigned long long const seed = 0; /* or any other value */ + XXH_errorcode const resetResult = XXH64_reset(state, seed); + if (resetResult == XXH_ERROR) abort(); + + (...) + while ( /* any condition */ ) { + size_t const length = get_more_data(buffer, bufferSize, handler); /* undescribed */ + XXH_errorcode const addResult = XXH64_update(state, buffer, length); + if (addResult == XXH_ERROR) abort(); + (...) + } + + (...) + unsigned long long const hash = XXH64_digest(state); + + free(buffer); + XXH64_freeState(state); + + return hash; +} +``` + + +### Other programming languages Beyond the C reference version, xxHash is also available on many programming languages, diff --git a/deps/xxhash/appveyor.yml b/deps/xxhash/appveyor.yml new file mode 100644 index 0000000..aa71222 --- /dev/null +++ b/deps/xxhash/appveyor.yml @@ -0,0 +1,70 @@ +version: 1.0.{build} +environment: + matrix: + - COMPILER: "gcc" + PLATFORM: "mingw64" + - COMPILER: "gcc" + PLATFORM: "mingw32" + +install: + - ECHO Installing %COMPILER% %PLATFORM% %CONFIGURATION% + - MKDIR bin + - if [%COMPILER%]==[gcc] SET PATH_ORIGINAL=%PATH% + - if [%COMPILER%]==[gcc] ( + SET "PATH_MINGW32=c:\MinGW\bin;c:\MinGW\usr\bin" && + SET "PATH_MINGW64=c:\msys64\mingw64\bin;c:\msys64\usr\bin" && + COPY C:\MinGW\bin\mingw32-make.exe C:\MinGW\bin\make.exe && + COPY C:\MinGW\bin\gcc.exe C:\MinGW\bin\cc.exe + ) else ( + IF [%PLATFORM%]==[x64] (SET ADDITIONALPARAM=/p:LibraryPath="C:\Program Files\Microsoft SDKs\Windows\v7.1\lib\x64;c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\lib\amd64;C:\Program Files (x86)\Microsoft Visual Studio 10.0\;C:\Program Files (x86)\Microsoft Visual Studio 10.0\lib\amd64;") + ) + +build_script: + - if [%PLATFORM%]==[mingw32] SET PATH=%PATH_MINGW32%;%PATH_ORIGINAL% + - if [%PLATFORM%]==[mingw64] SET PATH=%PATH_MINGW64%;%PATH_ORIGINAL% + - if [%PLATFORM%]==[clang] SET PATH=%PATH_MINGW64%;%PATH_ORIGINAL% + - ECHO *** && + ECHO Building %COMPILER% %PLATFORM% %CONFIGURATION% && + ECHO *** + - if [%PLATFORM%]==[clang] (clang -v) + - if [%COMPILER%]==[gcc] (gcc -v) + - if [%COMPILER%]==[gcc] ( + echo ----- && + make -v && + echo ----- && + if not [%PLATFORM%]==[clang] ( + make -B clean test MOREFLAGS=-Werror + ) ELSE ( + make -B clean test CC=clang MOREFLAGS="--target=x86_64-w64-mingw32 -Werror -Wconversion -Wno-sign-conversion" + ) + ) + - if [%COMPILER%]==[visual] ( + ECHO *** && + ECHO *** Building Visual Studio 2010 %PLATFORM%\%CONFIGURATION% && + ECHO *** && + msbuild "visual\VS2010\lz4.sln" %ADDITIONALPARAM% /m /verbosity:minimal /property:PlatformToolset=v100 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /p:EnableWholeProgramOptimization=true /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" && + ECHO *** && + ECHO *** Building Visual Studio 2012 %PLATFORM%\%CONFIGURATION% && + ECHO *** && + msbuild "visual\VS2010\lz4.sln" /m /verbosity:minimal /property:PlatformToolset=v110 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" && + ECHO *** && + ECHO *** Building Visual Studio 2013 %PLATFORM%\%CONFIGURATION% && + ECHO *** && + msbuild "visual\VS2010\lz4.sln" /m /verbosity:minimal /property:PlatformToolset=v120 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" && + ECHO *** && + ECHO *** Building Visual Studio 2015 %PLATFORM%\%CONFIGURATION% && + ECHO *** && + msbuild "visual\VS2010\lz4.sln" /m /verbosity:minimal /property:PlatformToolset=v140 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" && + COPY visual\VS2010\bin\%PLATFORM%_%CONFIGURATION%\*.exe programs\ + ) + +test_script: + - ECHO *** && + ECHO Testing %COMPILER% %PLATFORM% %CONFIGURATION% && + ECHO *** + - if not [%COMPILER%]==[unknown] ( + xxhsum -h && + xxhsum xxhsum.exe && + xxhsum -bi1 && + echo ------- xxhsum tested ------- + ) diff --git a/deps/xxhash/cmake_unofficial/.gitignore b/deps/xxhash/cmake_unofficial/.gitignore new file mode 100644 index 0000000..bdb2593 --- /dev/null +++ b/deps/xxhash/cmake_unofficial/.gitignore @@ -0,0 +1,14 @@ +# cmake artifacts + +CMakeCache.txt +CMakeFiles +Makefile +cmake_install.cmake + + +# make compilation results + +libxxhash.0.6.3.dylib +libxxhash.0.dylib +libxxhash.a +libxxhash.dylib diff --git a/deps/xxhash/cmake_unofficial/CMakeLists.txt b/deps/xxhash/cmake_unofficial/CMakeLists.txt index 3becba8..1ca7a06 100644 --- a/deps/xxhash/cmake_unofficial/CMakeLists.txt +++ b/deps/xxhash/cmake_unofficial/CMakeLists.txt @@ -1,40 +1,100 @@ -cmake_minimum_required(VERSION 2.6) -cmake_policy(VERSION 2.6) +# To the extent possible under law, the author(s) have dedicated all +# copyright and related and neighboring rights to this software to +# the public domain worldwide. This software is distributed without +# any warranty. +# +# For details, see . -project(xxhash) +set(XXHASH_DIR "${CMAKE_CURRENT_SOURCE_DIR}/..") -set(XXHASH_LIB_VERSION "0.42.0") -set(XXHASH_LIB_SOVERSION "0") +file(STRINGS "${XXHASH_DIR}/xxhash.h" XXHASH_VERSION_MAJOR REGEX "^#define XXH_VERSION_MAJOR +([0-9]+) *$") +string(REGEX REPLACE "^#define XXH_VERSION_MAJOR +([0-9]+) *$" "\\1" XXHASH_VERSION_MAJOR "${XXHASH_VERSION_MAJOR}") +file(STRINGS "${XXHASH_DIR}/xxhash.h" XXHASH_VERSION_MINOR REGEX "^#define XXH_VERSION_MINOR +([0-9]+) *$") +string(REGEX REPLACE "^#define XXH_VERSION_MINOR +([0-9]+) *$" "\\1" XXHASH_VERSION_MINOR "${XXHASH_VERSION_MINOR}") +file(STRINGS "${XXHASH_DIR}/xxhash.h" XXHASH_VERSION_RELEASE REGEX "^#define XXH_VERSION_RELEASE +([0-9]+) *$") +string(REGEX REPLACE "^#define XXH_VERSION_RELEASE +([0-9]+) *$" "\\1" XXHASH_VERSION_RELEASE "${XXHASH_VERSION_RELEASE}") +set(XXHASH_VERSION_STRING "${XXHASH_VERSION_MAJOR}.${XXHASH_VERSION_MINOR}.${XXHASH_VERSION_RELEASE}") +set(XXHASH_LIB_VERSION ${XXHASH_VERSION_STRING}) +set(XXHASH_LIB_SOVERSION "${XXHASH_VERSION_MAJOR}") +mark_as_advanced(XXHASH_VERSION_MAJOR XXHASH_VERSION_MINOR XXHASH_VERSION_RELEASE XXHASH_VERSION_STRING XXHASH_LIB_VERSION XXHASH_LIB_SOVERSION) option(BUILD_XXHSUM "Build the xxhsum binary" ON) +option(BUILD_SHARED_LIBS "Build shared library" ON) -# Make CMake's RPATH handling not be insane. This suff has cmake set rpaths appropriately for -# where things end up in the install tree. For some reason that's not the default: -# https://cmake.org/Wiki/CMake_RPATH_handling -SET(CMAKE_SKIP_BUILD_RPATH FALSE) -SET(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE) +if("${CMAKE_VERSION}" VERSION_LESS "3.0") + project(XXHASH C) +else() + cmake_policy (SET CMP0048 NEW) + project(XXHASH + VERSION ${XXHASH_VERSION_STRING} + LANGUAGES C) +endif() -# Where we search for shared libraries -SET(CMAKE_INSTALL_RPATH "./lib") +cmake_minimum_required (VERSION 2.8.12) -# add the automatically determined parts of the RPATH -# which point to directories outside the build tree to the install RPATH -SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) +# If XXHASH is being bundled in another project, we don't want to +# install anything. However, we want to let people override this, so +# we'll use the XXHASH_BUNDLED_MODE variable to let them do that; just +# set it to OFF in your project before you add_subdirectory(xxhash/contrib/cmake_unofficial). +if(CMAKE_CURRENT_SOURCE_DIR STREQUAL "${CMAKE_SOURCE_DIR}") + # Bundled mode hasn't been set one way or the other, set the default + # depending on whether or not we are the top-level project. + if("${XXHASH_PARENT_DIRECTORY}" STREQUAL "") + set(XXHASH_BUNDLED_MODE OFF) + else() + set(XXHASH_BUNDLED_MODE ON) + endif() +endif() +mark_as_advanced(XXHASH_BUNDLED_MODE) -add_library(xxhash ../xxhash.c) -set_target_properties(xxhash PROPERTIES COMPILE_DEFINITIONS "XXHASH_EXPORT" - VERSION "${XXHASH_LIB_VERSION}" - SOVERSION "${XXHASH_LIB_SOVERSION}") +# Allow people to choose whether to build shared or static libraries +# via the BUILD_SHARED_LIBS option unless we are in bundled mode, in +# which case we always use static libraries. +include(CMakeDependentOption) +CMAKE_DEPENDENT_OPTION(BUILD_SHARED_LIBS "Build shared libraries" ON "NOT XXHASH_BUNDLED_MODE" OFF) -if (BUILD_XXHSUM) - add_executable(xxhsum ../xxhsum.c) - target_link_libraries(xxhsum xxhash) -endif() +include_directories("${XXHASH_DIR}") + +# libxxhash +add_library(xxhash "${XXHASH_DIR}/xxhash.c") +set_target_properties(xxhash PROPERTIES + SOVERSION "${XXHASH_VERSION_STRING}" + VERSION "${XXHASH_VERSION_STRING}") + +# xxhsum +add_executable(xxhsum "${XXHASH_DIR}/xxhsum.c") +target_link_libraries(xxhsum xxhash) + +# Extra warning flags +include (CheckCCompilerFlag) +foreach (flag + -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow + -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement + -Wstrict-prototypes -Wundef) + # Because https://gcc.gnu.org/wiki/FAQ#wnowarning + string(REGEX REPLACE "\\-Wno\\-(.+)" "-W\\1" flag_to_test "${flag}") + string(REGEX REPLACE "[^a-zA-Z0-9]+" "_" test_name "CFLAG_${flag_to_test}") + + check_c_compiler_flag("${ADD_COMPILER_FLAGS_PREPEND} ${flag_to_test}" ${test_name}) + + if(${test_name}) + set(CMAKE_C_FLAGS "${flag} ${CMAKE_C_FLAGS}") + endif() + + unset(test_name) + unset(flag_to_test) +endforeach (flag) + +if(NOT XXHASH_BUNDLED_MODE) + include(GNUInstallDirs) -INSTALL(FILES ../xxhash.h DESTINATION include) -INSTALL( - TARGETS xxhash xxhsum - RUNTIME DESTINATION bin - ARCHIVE DESTINATION lib - LIBRARY DESTINATION lib -) + install(TARGETS xxhsum + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}") + install(TARGETS xxhash + LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" + ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}") + install(FILES "${XXHASH_DIR}/xxhash.h" + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}") + install(FILES "${XXHASH_DIR}/xxhsum.1" + DESTINATION "${CMAKE_INSTALL_MANDIR}/man1") +endif(NOT XXHASH_BUNDLED_MODE) diff --git a/deps/xxhash/cmake_unofficial/README.md b/deps/xxhash/cmake_unofficial/README.md new file mode 100644 index 0000000..4fca58d --- /dev/null +++ b/deps/xxhash/cmake_unofficial/README.md @@ -0,0 +1,6 @@ + + +The `cmake` script present in this directory offers the following options : + +- `BUILD_XXHSUM` : build the command line binary. ON by default +- `BUILD_SHARED_LIBS` : build dynamic library. ON by default. diff --git a/deps/xxhash/doc/xxhash_spec.md b/deps/xxhash/doc/xxhash_spec.md new file mode 100644 index 0000000..e673334 --- /dev/null +++ b/deps/xxhash/doc/xxhash_spec.md @@ -0,0 +1,311 @@ +xxHash fast digest algorithm +====================== + +### Notices + +Copyright (c) Yann Collet + +Permission is granted to copy and distribute this document +for any purpose and without charge, +including translations into other languages +and incorporation into compilations, +provided that the copyright notice and this notice are preserved, +and that any substantive changes or deletions from the original +are clearly marked. +Distribution of this document is unlimited. + +### Version + +0.1.0 (15/01/18) + + +Table of Contents +--------------------- +- [Introduction](#introduction) +- [XXH32 algorithm description](#xxh32-algorithm-description) +- [XXH64 algorithm description](#xxh64-algorithm-description) +- [Performance considerations](#performance-considerations) +- [Reference Implementation](#reference-implementation) + + +Introduction +---------------- + +This document describes the xxHash digest algorithm, for both 32 and 64 variants, named `XXH32` and `XXH64`. The algorithm takes as input a message of arbitrary length and an optional seed value, it then produces an output of 32 or 64-bit as "fingerprint" or "digest". + +xxHash is primarily designed for speed. It is labelled non-cryptographic, and is not meant to avoid intentional collisions (same digest for 2 different messages), or to prevent producing a message with predefined digest. + +XXH32 is designed to be fast on 32-bits machines. +XXH64 is designed to be fast on 64-bits machines. +Both variants produce different output. +However, a given variant shall produce exactly the same output, irrespective of the cpu / os used. In particular, the result remains identical whatever the endianness and width of the cpu. + +### Operation notations + +All operations are performed modulo {32,64} bits. Arithmetic overflows are expected. +`XXH32` uses 32-bit modular operations. `XXH64` uses 64-bit modular operations. + +- `+` : denote modular addition +- `*` : denote modular multiplication +- `X <<< s` : denote the value obtained by circularly shifting (rotating) `X` left by `s` bit positions. +- `X >> s` : denote the value obtained by shifting `X` right by s bit positions. Upper `s` bits become `0`. +- `X xor Y` : denote the bit-wise XOR of `X` and `Y` (same width). + + +XXH32 Algorithm Description +------------------------------------- + +### Overview + +We begin by supposing that we have a message of any length `L` as input, and that we wish to find its digest. Here `L` is an arbitrary nonnegative integer; `L` may be zero. The following steps are performed to compute the digest of the message. + +The algorithm collect and transform input in _stripes_ of 16 bytes. The transforms are stored inside 4 "accumulators", each one storing an unsigned 32-bit value. Each accumulator can be processed independently in parallel, speeding up processing for cpu with multiple execution units. + +The algorithm uses 32-bits addition, multiplication, rotate, shift and xor operations. Many operations require some 32-bits prime number constants, all defined below : + + static const u32 PRIME32_1 = 2654435761U; + static const u32 PRIME32_2 = 2246822519U; + static const u32 PRIME32_3 = 3266489917U; + static const u32 PRIME32_4 = 668265263U; + static const u32 PRIME32_5 = 374761393U; + +### Step 1. Initialise internal accumulators + +Each accumulator gets an initial value based on optional `seed` input. Since the `seed` is optional, it can be `0`. + + u32 acc1 = seed + PRIME32_1 + PRIME32_2; + u32 acc2 = seed + PRIME32_2; + u32 acc3 = seed + 0; + u32 acc4 = seed - PRIME32_1; + +#### Special case : input is less than 16 bytes + +When input is too small (< 16 bytes), the algorithm will not process any stripe. Consequently, it will not make use of parallel accumulators. + +In which case, a simplified initialization is performed, using a single accumulator : + + u32 acc = seed + PRIME32_5; + +The algorithm then proceeds directly to step 4. + +### Step 2. Process stripes + +A stripe is a contiguous segment of 16 bytes. +It is evenly divided into 4 _lanes_, of 4 bytes each. +The first lane is used to update accumulator 1, the second lane is used to update accumulator 2, and so on. + +Each lane read its associated 32-bit value using __little-endian__ convention. + +For each {lane, accumulator}, the update process is called a _round_, and applies the following formula : + + accN = accN + (laneN * PRIME32_2); + accN = accN <<< 13; + accN = accN * PRIME32_1; + +This shuffles the bits so that any bit from input _lane_ impacts several bits in output _accumulator_. All operations are performed modulo 2^32. + +Input is consumed one full stripe at a time. Step 2 is looped as many times as necessary to consume the whole input, except the last remaining bytes which cannot form a stripe (< 16 bytes). +When that happens, move to step 3. + +### Step 3. Accumulator convergence + +All 4 lane accumulators from previous steps are merged to produce a single remaining accumulator of same width (32-bit). The associated formula is as follows : + + acc = (acc1 <<< 1) + (acc2 <<< 7) + (acc3 <<< 12) + (acc4 <<< 18); + +### Step 4. Add input length + +The input total length is presumed known at this stage. This step is just about adding the length to accumulator, so that it participates to final mixing. + + acc = acc + (u32)inputLength; + +Note that, if input length is so large that it requires more than 32-bits, only the lower 32-bits are added to the accumulator. + +### Step 5. Consume remaining input + +There may be up to 15 bytes remaining to consume from the input. +The final stage will digest them according to following pseudo-code : + + while (remainingLength >= 4) { + lane = read_32bit_little_endian(input_ptr); + acc = acc + lane * PRIME32_3; + acc = (acc <<< 17) * PRIME32_4; + input_ptr += 4; remainingLength -= 4; + } + + while (remainingLength >= 1) { + lane = read_byte(input_ptr); + acc = acc + lane * PRIME32_5; + acc = (acc <<< 11) * PRIME32_1; + input_ptr += 1; remainingLength -= 1; + } + +This process ensures that all input bytes are present in the final mix. + +### Step 6. Final mix (avalanche) + +The final mix ensures that all input bits have a chance to impact any bit in the output digest, resulting in an unbiased distribution. This is also called avalanche effect. + + acc = acc xor (acc >> 15); + acc = acc * PRIME32_2; + acc = acc xor (acc >> 13); + acc = acc * PRIME32_3; + acc = acc xor (acc >> 16); + +### Step 7. Output + +The `XXH32()` function produces an unsigned 32-bit value as output. + +For systems which require to store and/or display the result in binary or hexadecimal format, the canonical format is defined to reproduce the same value as the natural decimal format, hence follows __big-endian__ convention (most significant byte first). + + +XXH64 Algorithm Description +------------------------------------- + +### Overview + +`XXH64` algorithm structure is very similar to `XXH32` one. The major difference is that `XXH64` uses 64-bit arithmetic, speeding up memory transfer for 64-bit compliant systems, but also relying on cpu capability to efficiently perform 64-bit operations. + +The algorithm collects and transforms input in _stripes_ of 32 bytes. The transforms are stored inside 4 "accumulators", each one storing an unsigned 64-bit value. Each accumulator can be processed independently in parallel, speeding up processing for cpu with multiple execution units. + +The algorithm uses 64-bit addition, multiplication, rotate, shift and xor operations. Many operations require some 64-bit prime number constants, all defined below : + + static const u64 PRIME64_1 = 11400714785074694791ULL; + static const u64 PRIME64_2 = 14029467366897019727ULL; + static const u64 PRIME64_3 = 1609587929392839161ULL; + static const u64 PRIME64_4 = 9650029242287828579ULL; + static const u64 PRIME64_5 = 2870177450012600261ULL; + +### Step 1. Initialise internal accumulators + +Each accumulator gets an initial value based on optional `seed` input. Since the `seed` is optional, it can be `0`. + + u64 acc1 = seed + PRIME64_1 + PRIME64_2; + u64 acc2 = seed + PRIME64_2; + u64 acc3 = seed + 0; + u64 acc4 = seed - PRIME64_1; + +#### Special case : input is less than 32 bytes + +When input is too small (< 32 bytes), the algorithm will not process any stripe. Consequently, it will not make use of parallel accumulators. + +In which case, a simplified initialization is performed, using a single accumulator : + + u64 acc = seed + PRIME64_5; + +The algorithm then proceeds directly to step 4. + +### Step 2. Process stripes + +A stripe is a contiguous segment of 32 bytes. +It is evenly divided into 4 _lanes_, of 8 bytes each. +The first lane is used to update accumulator 1, the second lane is used to update accumulator 2, and so on. + +Each lane read its associated 64-bit value using __little-endian__ convention. + +For each {lane, accumulator}, the update process is called a _round_, and applies the following formula : + + round(accN,laneN): + accN = accN + (laneN * PRIME64_2); + accN = accN <<< 31; + return accN * PRIME64_1; + +This shuffles the bits so that any bit from input _lane_ impacts several bits in output _accumulator_. All operations are performed modulo 2^64. + +Input is consumed one full stripe at a time. Step 2 is looped as many times as necessary to consume the whole input, except the last remaining bytes which cannot form a stripe (< 32 bytes). +When that happens, move to step 3. + +### Step 3. Accumulator convergence + +All 4 lane accumulators from previous steps are merged to produce a single remaining accumulator of same width (64-bit). The associated formula is as follows. + +Note that accumulator convergence is more complex than 32-bit variant, and requires to define another function called _mergeAccumulator()_ : + + mergeAccumulator(acc,accN): + acc = acc xor round(0, accN); + acc = acc * PRIME64_1 + return acc + PRIME64_4; + +which is then used in the convergence formula : + + acc = (acc1 <<< 1) + (acc2 <<< 7) + (acc3 <<< 12) + (acc4 <<< 18); + acc = mergeAccumulator(acc, acc1); + acc = mergeAccumulator(acc, acc2); + acc = mergeAccumulator(acc, acc3); + acc = mergeAccumulator(acc, acc4); + +### Step 4. Add input length + +The input total length is presumed known at this stage. This step is just about adding the length to accumulator, so that it participates to final mixing. + + acc = acc + inputLength; + +### Step 5. Consume remaining input + +There may be up to 31 bytes remaining to consume from the input. +The final stage will digest them according to following pseudo-code : + + while (remainingLength >= 8) { + lane = read_64bit_little_endian(input_ptr); + acc = acc xor round(0, lane); + acc = (acc <<< 27) * PRIME64_1; + acc = acc + PRIME64_4; + input_ptr += 8; remainingLength -= 8; + } + + if (remainingLength >= 4) { + lane = read_32bit_little_endian(input_ptr); + acc = acc xor (lane * PRIME64_1); + acc = (acc <<< 23) * PRIME64_2; + acc = acc + PRIME64_3; + input_ptr += 4; remainingLength -= 4; + } + + while (remainingLength >= 1) { + lane = read_byte(input_ptr); + acc = acc xor (lane * PRIME64_5); + acc = (acc <<< 11) * PRIME64_1; + input_ptr += 1; remainingLength -= 1; + } + +This process ensures that all input bytes are present in the final mix. + +### Step 6. Final mix (avalanche) + +The final mix ensures that all input bits have a chance to impact any bit in the output digest, resulting in an unbiased distribution. This is also called avalanche effect. + + acc = acc xor (acc >> 33); + acc = acc * PRIME64_2; + acc = acc xor (acc >> 29); + acc = acc * PRIME64_3; + acc = acc xor (acc >> 32); + +### Step 7. Output + +The `XXH64()` function produces an unsigned 64-bit value as output. + +For systems which require to store and/or display the result in binary or hexadecimal format, the canonical format is defined to reproduce the same value as the natural decimal format, hence follows __big-endian__ convention (most significant byte first). + +Performance considerations +---------------------------------- + +The xxHash algorithms are simple and compact to implement. They provide a system independent "fingerprint" or digest of a message of arbitrary length. + +The algorithm allows input to be streamed and processed in multiple steps. In such case, an internal buffer is needed to ensure data is presented to the algorithm in full stripes. + +On 64-bit systems, the 64-bit variant `XXH64` is generally faster to compute, so it is a recommended variant, even when only 32-bit are needed. + +On 32-bit systems though, positions are reversed : `XXH64` performance is reduced, due to its usage of 64-bit arithmetic. `XXH32` becomes a faster variant. + + +Reference Implementation +---------------------------------------- + +A reference library written in C is available at http://www.xxhash.com . +The web page also links to multiple other implementations written in many different languages. +It links to the [github project page](https://github.com/Cyan4973/xxHash) where an [issue board](https://github.com/Cyan4973/xxHash/issues) can be used for further public discussions on the topic. + + +Version changes +-------------------- +v0.1.0 : initial release diff --git a/deps/xxhash/xxhash.c b/deps/xxhash/xxhash.c index 46797b9..da06ea7 100644 --- a/deps/xxhash/xxhash.c +++ b/deps/xxhash/xxhash.c @@ -50,20 +50,26 @@ * Prefer these methods in priority order (0 > 1 > 2) */ #ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) +# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ + || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \ + || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) # define XXH_FORCE_MEMORY_ACCESS 2 -# elif defined(__INTEL_COMPILER) || \ - (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) +# elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || \ + (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \ + || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \ + || defined(__ARM_ARCH_7S__) )) # define XXH_FORCE_MEMORY_ACCESS 1 # endif #endif /*!XXH_ACCEPT_NULL_INPUT_POINTER : - * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer. - * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input. - * By default, this option is disabled. To enable it, uncomment below define : + * If input pointer is NULL, xxHash default behavior is to dereference it, triggering a segfault. + * When this macro is enabled, xxHash actively checks input for null pointer. + * It it is, result for null input pointers is the same as a null-length input. */ -/* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */ +#ifndef XXH_ACCEPT_NULL_INPUT_POINTER /* can be defined externally */ +# define XXH_ACCEPT_NULL_INPUT_POINTER 0 +#endif /*!XXH_FORCE_NATIVE_FORMAT : * By default, xxHash library provides endian-independent Hash values, based on little-endian convention. @@ -105,6 +111,8 @@ static void XXH_free (void* p) { free(p); } #include static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); } +#include /* assert */ + #define XXH_STATIC_LINKING_ONLY #include "xxhash.h" @@ -132,7 +140,9 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcp * Basic Types ***************************************/ #ifndef MEM_MODULE -# if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# if !defined (__VMS) \ + && (defined (__cplusplus) \ + || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) # include typedef uint8_t BYTE; typedef uint16_t U16; @@ -207,8 +217,12 @@ typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; /* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */ #ifndef XXH_CPU_LITTLE_ENDIAN - static const int g_one = 1; -# define XXH_CPU_LITTLE_ENDIAN (*(const char*)(&g_one)) +static int XXH_isLittleEndian(void) +{ + const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ + return one.c[0]; +} +# define XXH_CPU_LITTLE_ENDIAN XXH_isLittleEndian() #endif @@ -239,12 +253,12 @@ static U32 XXH_readBE32(const void* ptr) /* ************************************* * Macros ***************************************/ -#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ +#define XXH_STATIC_ASSERT(c) { enum { XXH_sa = 1/(int)(!!(c)) }; } /* use after variable declarations */ XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; } /* ******************************************************************* -* 32-bits hash functions +* 32-bit hash functions *********************************************************************/ static const U32 PRIME32_1 = 2654435761U; static const U32 PRIME32_2 = 2246822519U; @@ -260,14 +274,89 @@ static U32 XXH32_round(U32 seed, U32 input) return seed; } -FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align) +/* mix all bits */ +static U32 XXH32_avalanche(U32 h32) +{ + h32 ^= h32 >> 15; + h32 *= PRIME32_2; + h32 ^= h32 >> 13; + h32 *= PRIME32_3; + h32 ^= h32 >> 16; + return(h32); +} + +#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align) + +static U32 +XXH32_finalize(U32 h32, const void* ptr, size_t len, + XXH_endianess endian, XXH_alignment align) + +{ + const BYTE* p = (const BYTE*)ptr; +#define PROCESS1 \ + h32 += (*p) * PRIME32_5; \ + p++; \ + h32 = XXH_rotl32(h32, 11) * PRIME32_1 ; + +#define PROCESS4 \ + h32 += XXH_get32bits(p) * PRIME32_3; \ + p+=4; \ + h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; + + switch(len&15) /* or switch(bEnd - p) */ + { + case 12: PROCESS4; + /* fallthrough */ + case 8: PROCESS4; + /* fallthrough */ + case 4: PROCESS4; + return XXH32_avalanche(h32); + + case 13: PROCESS4; + /* fallthrough */ + case 9: PROCESS4; + /* fallthrough */ + case 5: PROCESS4; + PROCESS1; + return XXH32_avalanche(h32); + + case 14: PROCESS4; + /* fallthrough */ + case 10: PROCESS4; + /* fallthrough */ + case 6: PROCESS4; + PROCESS1; + PROCESS1; + return XXH32_avalanche(h32); + + case 15: PROCESS4; + /* fallthrough */ + case 11: PROCESS4; + /* fallthrough */ + case 7: PROCESS4; + /* fallthrough */ + case 3: PROCESS1; + /* fallthrough */ + case 2: PROCESS1; + /* fallthrough */ + case 1: PROCESS1; + /* fallthrough */ + case 0: return XXH32_avalanche(h32); + } + assert(0); + return h32; /* reaching this point is deemed impossible */ +} + + +FORCE_INLINE U32 +XXH32_endian_align(const void* input, size_t len, U32 seed, + XXH_endianess endian, XXH_alignment align) { const BYTE* p = (const BYTE*)input; const BYTE* bEnd = p + len; U32 h32; -#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align) -#ifdef XXH_ACCEPT_NULL_INPUT_POINTER +#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) if (p==NULL) { len=0; bEnd=p=(const BYTE*)(size_t)16; @@ -275,7 +364,7 @@ FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH #endif if (len>=16) { - const BYTE* const limit = bEnd - 16; + const BYTE* const limit = bEnd - 15; U32 v1 = seed + PRIME32_1 + PRIME32_2; U32 v2 = seed + PRIME32_2; U32 v3 = seed + 0; @@ -286,34 +375,17 @@ FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4; v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4; v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4; - } while (p<=limit); + } while (p < limit); - h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); + h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); } else { h32 = seed + PRIME32_5; } - h32 += (U32) len; + h32 += (U32)len; - while (p+4<=bEnd) { - h32 += XXH_get32bits(p) * PRIME32_3; - h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; - p+=4; - } - - while (p> 15; - h32 *= PRIME32_2; - h32 ^= h32 >> 13; - h32 *= PRIME32_3; - h32 ^= h32 >> 16; - - return h32; + return XXH32_finalize(h32, p, len&15, endian, align); } @@ -365,23 +437,25 @@ XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed) { XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ - memset(&state, 0, sizeof(state)-4); /* do not write into reserved, for future removal */ + memset(&state, 0, sizeof(state)); state.v1 = seed + PRIME32_1 + PRIME32_2; state.v2 = seed + PRIME32_2; state.v3 = seed + 0; state.v4 = seed - PRIME32_1; - memcpy(statePtr, &state, sizeof(state)); + /* do not write into reserved, planned to be removed in a future version */ + memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved)); return XXH_OK; } -FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian) +FORCE_INLINE +XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian) { const BYTE* p = (const BYTE*)input; const BYTE* const bEnd = p + len; if (input==NULL) -#ifdef XXH_ACCEPT_NULL_INPUT_POINTER +#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) return XXH_OK; #else return XXH_ERROR; @@ -436,6 +510,7 @@ FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void return XXH_OK; } + XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len) { XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; @@ -447,40 +522,23 @@ XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* } - -FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian) +FORCE_INLINE U32 +XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian) { - const BYTE * p = (const BYTE*)state->mem32; - const BYTE* const bEnd = (const BYTE*)(state->mem32) + state->memsize; U32 h32; if (state->large_len) { - h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18); + h32 = XXH_rotl32(state->v1, 1) + + XXH_rotl32(state->v2, 7) + + XXH_rotl32(state->v3, 12) + + XXH_rotl32(state->v4, 18); } else { h32 = state->v3 /* == seed */ + PRIME32_5; } h32 += state->total_len_32; - while (p+4<=bEnd) { - h32 += XXH_readLE32(p, endian) * PRIME32_3; - h32 = XXH_rotl32(h32, 17) * PRIME32_4; - p+=4; - } - - while (p> 15; - h32 *= PRIME32_2; - h32 ^= h32 >> 13; - h32 *= PRIME32_3; - h32 ^= h32 >> 16; - - return h32; + return XXH32_finalize(h32, state->mem32, state->memsize, endian, XXH_aligned); } @@ -500,7 +558,7 @@ XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in) /*! Default XXH result types are basic unsigned 32 and 64 bits. * The canonical representation follows human-readable write convention, aka big-endian (large digits first). * These functions allow transformation of hash result into and from its canonical format. -* This way, hash values can be written into a file or buffer, and remain comparable across different systems and programs. +* This way, hash values can be written into a file or buffer, remaining comparable across different systems. */ XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash) @@ -519,18 +577,21 @@ XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src #ifndef XXH_NO_LONG_LONG /* ******************************************************************* -* 64-bits hash functions +* 64-bit hash functions *********************************************************************/ /*====== Memory access ======*/ #ifndef MEM_MODULE # define MEM_MODULE -# if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# if !defined (__VMS) \ + && (defined (__cplusplus) \ + || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) # include typedef uint64_t U64; # else - typedef unsigned long long U64; /* if your compiler doesn't support unsigned long long, replace by another 64-bit type here. Note that xxhash.h will also need to be updated. */ + /* if compiler doesn't support unsigned long long, replace by another 64-bit type */ + typedef unsigned long long U64; # endif #endif @@ -623,14 +684,138 @@ static U64 XXH64_mergeRound(U64 acc, U64 val) return acc; } -FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align) +static U64 XXH64_avalanche(U64 h64) +{ + h64 ^= h64 >> 33; + h64 *= PRIME64_2; + h64 ^= h64 >> 29; + h64 *= PRIME64_3; + h64 ^= h64 >> 32; + return h64; +} + + +#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align) + +static U64 +XXH64_finalize(U64 h64, const void* ptr, size_t len, + XXH_endianess endian, XXH_alignment align) +{ + const BYTE* p = (const BYTE*)ptr; + +#define PROCESS1_64 \ + h64 ^= (*p) * PRIME64_5; \ + p++; \ + h64 = XXH_rotl64(h64, 11) * PRIME64_1; + +#define PROCESS4_64 \ + h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; \ + p+=4; \ + h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; + +#define PROCESS8_64 { \ + U64 const k1 = XXH64_round(0, XXH_get64bits(p)); \ + p+=8; \ + h64 ^= k1; \ + h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; \ +} + + switch(len&31) { + case 24: PROCESS8_64; + /* fallthrough */ + case 16: PROCESS8_64; + /* fallthrough */ + case 8: PROCESS8_64; + return XXH64_avalanche(h64); + + case 28: PROCESS8_64; + /* fallthrough */ + case 20: PROCESS8_64; + /* fallthrough */ + case 12: PROCESS8_64; + /* fallthrough */ + case 4: PROCESS4_64; + return XXH64_avalanche(h64); + + case 25: PROCESS8_64; + /* fallthrough */ + case 17: PROCESS8_64; + /* fallthrough */ + case 9: PROCESS8_64; + PROCESS1_64; + return XXH64_avalanche(h64); + + case 29: PROCESS8_64; + /* fallthrough */ + case 21: PROCESS8_64; + /* fallthrough */ + case 13: PROCESS8_64; + /* fallthrough */ + case 5: PROCESS4_64; + PROCESS1_64; + return XXH64_avalanche(h64); + + case 26: PROCESS8_64; + /* fallthrough */ + case 18: PROCESS8_64; + /* fallthrough */ + case 10: PROCESS8_64; + PROCESS1_64; + PROCESS1_64; + return XXH64_avalanche(h64); + + case 30: PROCESS8_64; + /* fallthrough */ + case 22: PROCESS8_64; + /* fallthrough */ + case 14: PROCESS8_64; + /* fallthrough */ + case 6: PROCESS4_64; + PROCESS1_64; + PROCESS1_64; + return XXH64_avalanche(h64); + + case 27: PROCESS8_64; + /* fallthrough */ + case 19: PROCESS8_64; + /* fallthrough */ + case 11: PROCESS8_64; + PROCESS1_64; + PROCESS1_64; + PROCESS1_64; + return XXH64_avalanche(h64); + + case 31: PROCESS8_64; + /* fallthrough */ + case 23: PROCESS8_64; + /* fallthrough */ + case 15: PROCESS8_64; + /* fallthrough */ + case 7: PROCESS4_64; + /* fallthrough */ + case 3: PROCESS1_64; + /* fallthrough */ + case 2: PROCESS1_64; + /* fallthrough */ + case 1: PROCESS1_64; + /* fallthrough */ + case 0: return XXH64_avalanche(h64); + } + + /* impossible to reach */ + assert(0); + return 0; /* unreachable, but some compilers complain without it */ +} + +FORCE_INLINE U64 +XXH64_endian_align(const void* input, size_t len, U64 seed, + XXH_endianess endian, XXH_alignment align) { const BYTE* p = (const BYTE*)input; const BYTE* bEnd = p + len; U64 h64; -#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align) -#ifdef XXH_ACCEPT_NULL_INPUT_POINTER +#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) if (p==NULL) { len=0; bEnd=p=(const BYTE*)(size_t)32; @@ -663,32 +848,7 @@ FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH h64 += (U64) len; - while (p+8<=bEnd) { - U64 const k1 = XXH64_round(0, XXH_get64bits(p)); - h64 ^= k1; - h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; - p+=8; - } - - if (p+4<=bEnd) { - h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; - h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; - p+=4; - } - - while (p> 33; - h64 *= PRIME64_2; - h64 ^= h64 >> 29; - h64 *= PRIME64_3; - h64 ^= h64 >> 32; - - return h64; + return XXH64_finalize(h64, p, len, endian, align); } @@ -738,22 +898,24 @@ XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed) { XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ - memset(&state, 0, sizeof(state)-8); /* do not write into reserved, for future removal */ + memset(&state, 0, sizeof(state)); state.v1 = seed + PRIME64_1 + PRIME64_2; state.v2 = seed + PRIME64_2; state.v3 = seed + 0; state.v4 = seed - PRIME64_1; - memcpy(statePtr, &state, sizeof(state)); + /* do not write into reserved, planned to be removed in a future version */ + memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved)); return XXH_OK; } -FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian) +FORCE_INLINE +XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian) { const BYTE* p = (const BYTE*)input; const BYTE* const bEnd = p + len; if (input==NULL) -#ifdef XXH_ACCEPT_NULL_INPUT_POINTER +#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) return XXH_OK; #else return XXH_ERROR; @@ -817,8 +979,6 @@ XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian) { - const BYTE * p = (const BYTE*)state->mem64; - const BYTE* const bEnd = (const BYTE*)state->mem64 + state->memsize; U64 h64; if (state->total_len >= 32) { @@ -833,37 +993,12 @@ FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess h64 = XXH64_mergeRound(h64, v3); h64 = XXH64_mergeRound(h64, v4); } else { - h64 = state->v3 + PRIME64_5; + h64 = state->v3 /*seed*/ + PRIME64_5; } h64 += (U64) state->total_len; - while (p+8<=bEnd) { - U64 const k1 = XXH64_round(0, XXH_readLE64(p, endian)); - h64 ^= k1; - h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; - p+=8; - } - - if (p+4<=bEnd) { - h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1; - h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; - p+=4; - } - - while (p> 33; - h64 *= PRIME64_2; - h64 ^= h64 >> 29; - h64 *= PRIME64_3; - h64 ^= h64 >> 32; - - return h64; + return XXH64_finalize(h64, state->mem64, (size_t)state->total_len, endian, XXH_aligned); } XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in) diff --git a/deps/xxhash/xxhash.h b/deps/xxhash/xxhash.h index a74cc03..d6bad94 100644 --- a/deps/xxhash/xxhash.h +++ b/deps/xxhash/xxhash.h @@ -57,8 +57,8 @@ Q.Score is a measure of quality of the hash function. It depends on successfully passing SMHasher test set. 10 is a perfect score. -A 64-bits version, named XXH64, is available since r35. -It offers much better speed, but for 64-bits applications only. +A 64-bit version, named XXH64, is available since r35. +It offers much better speed, but for 64-bit applications only. Name Speed on 64 bits Speed on 32 bits XXH64 13.8 GB/s 1.9 GB/s XXH32 6.8 GB/s 6.0 GB/s @@ -80,18 +80,19 @@ typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; /* **************************** -* API modifier -******************************/ -/** XXH_PRIVATE_API -* This is useful to include xxhash functions in `static` mode -* in order to inline them, and remove their symbol from the public list. -* Methodology : -* #define XXH_PRIVATE_API -* #include "xxhash.h" -* `xxhash.c` is automatically included. -* It's not useful to compile and link it as a separate module. -*/ -#ifdef XXH_PRIVATE_API + * API modifier + ******************************/ +/** XXH_INLINE_ALL (and XXH_PRIVATE_API) + * This is useful to include xxhash functions in `static` mode + * in order to inline them, and remove their symbol from the public list. + * Inlining can offer dramatic performance improvement on small keys. + * Methodology : + * #define XXH_INLINE_ALL + * #include "xxhash.h" + * `xxhash.c` is automatically included. + * It's not useful to compile and link it as a separate module. + */ +#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) # ifndef XXH_STATIC_LINKING_ONLY # define XXH_STATIC_LINKING_ONLY # endif @@ -102,23 +103,24 @@ typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; # elif defined(_MSC_VER) # define XXH_PUBLIC_API static __inline # else -# define XXH_PUBLIC_API static /* this version may generate warnings for unused static functions; disable the relevant warning */ + /* this version may generate warnings for unused static functions */ +# define XXH_PUBLIC_API static # endif #else # define XXH_PUBLIC_API /* do nothing */ -#endif /* XXH_PRIVATE_API */ - -/*!XXH_NAMESPACE, aka Namespace Emulation : - -If you want to include _and expose_ xxHash functions from within your own library, -but also want to avoid symbol collisions with other libraries which may also include xxHash, - -you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library -with the value of XXH_NAMESPACE (therefore, avoid NULL and numeric values). - -Note that no change is required within the calling program as long as it includes `xxhash.h` : -regular symbol name will be automatically translated by this header. -*/ +#endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */ + +/*! XXH_NAMESPACE, aka Namespace Emulation : + * + * If you want to include _and expose_ xxHash functions from within your own library, + * but also want to avoid symbol collisions with other libraries which may also include xxHash, + * + * you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library + * with the value of XXH_NAMESPACE (therefore, avoid NULL and numeric values). + * + * Note that no change is required within the calling program as long as it includes `xxhash.h` : + * regular symbol name will be automatically translated by this header. + */ #ifdef XXH_NAMESPACE # define XXH_CAT(A,B) A##B # define XXH_NAME2(A,B) XXH_CAT(A,B) @@ -149,18 +151,18 @@ regular symbol name will be automatically translated by this header. ***************************************/ #define XXH_VERSION_MAJOR 0 #define XXH_VERSION_MINOR 6 -#define XXH_VERSION_RELEASE 3 +#define XXH_VERSION_RELEASE 5 #define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE) XXH_PUBLIC_API unsigned XXH_versionNumber (void); /*-********************************************************************** -* 32-bits hash +* 32-bit hash ************************************************************************/ typedef unsigned int XXH32_hash_t; /*! XXH32() : - Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input". + Calculate the 32-bit hash of sequence "length" bytes stored at memory address "input". The memory between input & input+length must be valid (allocated and read-accessible). "seed" can be used to alter the result predictably. Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s */ @@ -177,26 +179,25 @@ XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr); /* -These functions generate the xxHash of an input provided in multiple segments. -Note that, for small input, they are slower than single-call functions, due to state management. -For small input, prefer `XXH32()` and `XXH64()` . - -XXH state must first be allocated, using XXH*_createState() . - -Start a new hash by initializing state with a seed, using XXH*_reset(). - -Then, feed the hash state by calling XXH*_update() as many times as necessary. -Obviously, input must be allocated and read accessible. -The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. - -Finally, a hash value can be produced anytime, by using XXH*_digest(). -This function returns the nn-bits hash as an int or long long. - -It's still possible to continue inserting input into the hash state after a digest, -and generate some new hashes later on, by calling again XXH*_digest(). - -When done, free XXH state space if it was allocated dynamically. -*/ + * Streaming functions generate the xxHash of an input provided in multiple segments. + * Note that, for small input, they are slower than single-call functions, due to state management. + * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized. + * + * XXH state must first be allocated, using XXH*_createState() . + * + * Start a new hash by initializing state with a seed, using XXH*_reset(). + * + * Then, feed the hash state by calling XXH*_update() as many times as necessary. + * The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. + * + * Finally, a hash value can be produced anytime, by using XXH*_digest(). + * This function returns the nn-bits hash as an int or long long. + * + * It's still possible to continue inserting input into the hash state after a digest, + * and generate some new hashes later on, by calling again XXH*_digest(). + * + * When done, free XXH state space if it was allocated dynamically. + */ /*====== Canonical representation ======*/ @@ -205,22 +206,22 @@ XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src); /* Default result type for XXH functions are primitive unsigned 32 and 64 bits. -* The canonical representation uses human-readable write convention, aka big-endian (large digits first). -* These functions allow transformation of hash result into and from its canonical format. -* This way, hash values can be written into a file / memory, and remain comparable on different systems and programs. -*/ + * The canonical representation uses human-readable write convention, aka big-endian (large digits first). + * These functions allow transformation of hash result into and from its canonical format. + * This way, hash values can be written into a file / memory, and remain comparable on different systems and programs. + */ #ifndef XXH_NO_LONG_LONG /*-********************************************************************** -* 64-bits hash +* 64-bit hash ************************************************************************/ typedef unsigned long long XXH64_hash_t; /*! XXH64() : - Calculate the 64-bits hash of sequence of length "len" stored at memory address "input". + Calculate the 64-bit hash of sequence of length "len" stored at memory address "input". "seed" can be used to alter the result predictably. - This function runs faster on 64-bits systems, but slower on 32-bits systems (see benchmark). + This function runs faster on 64-bit systems, but slower on 32-bit systems (see benchmark). */ XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed); @@ -241,18 +242,49 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src #endif /* XXH_NO_LONG_LONG */ + #ifdef XXH_STATIC_LINKING_ONLY /* ================================================================================================ - This section contains definitions which are not guaranteed to remain stable. + This section contains declarations which are not guaranteed to remain stable. They may change in future versions, becoming incompatible with a different version of the library. - They shall only be used with static linking. - Never use these definitions in association with dynamic linking ! + These declarations should only be used with static linking. + Never use them in association with dynamic linking ! =================================================================================================== */ -/* These definitions are only meant to make possible - static allocation of XXH state, on stack or in a struct for example. - Never use members directly. */ +/* These definitions are only present to allow + * static allocation of XXH state, on stack or in a struct for example. + * Never **ever** use members directly. */ + +#if !defined (__VMS) \ + && (defined (__cplusplus) \ + || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# include + +struct XXH32_state_s { + uint32_t total_len_32; + uint32_t large_len; + uint32_t v1; + uint32_t v2; + uint32_t v3; + uint32_t v4; + uint32_t mem32[4]; + uint32_t memsize; + uint32_t reserved; /* never read nor write, might be removed in a future version */ +}; /* typedef'd to XXH32_state_t */ + +struct XXH64_state_s { + uint64_t total_len; + uint64_t v1; + uint64_t v2; + uint64_t v3; + uint64_t v4; + uint64_t mem64[4]; + uint32_t memsize; + uint32_t reserved[2]; /* never read nor write, might be removed in a future version */ +}; /* typedef'd to XXH64_state_t */ + +# else struct XXH32_state_s { unsigned total_len_32; @@ -261,25 +293,28 @@ struct XXH32_state_s { unsigned v2; unsigned v3; unsigned v4; - unsigned mem32[4]; /* buffer defined as U32 for alignment */ + unsigned mem32[4]; unsigned memsize; - unsigned reserved; /* never read nor write, will be removed in a future version */ + unsigned reserved; /* never read nor write, might be removed in a future version */ }; /* typedef'd to XXH32_state_t */ -#ifndef XXH_NO_LONG_LONG /* remove 64-bits support */ +# ifndef XXH_NO_LONG_LONG /* remove 64-bit support */ struct XXH64_state_s { unsigned long long total_len; unsigned long long v1; unsigned long long v2; unsigned long long v3; unsigned long long v4; - unsigned long long mem64[4]; /* buffer defined as U64 for alignment */ + unsigned long long mem64[4]; unsigned memsize; - unsigned reserved[2]; /* never read nor write, will be removed in a future version */ + unsigned reserved[2]; /* never read nor write, might be removed in a future version */ }; /* typedef'd to XXH64_state_t */ -#endif +# endif + +# endif + -#ifdef XXH_PRIVATE_API +#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) # include "xxhash.c" /* include xxhash function bodies as `static`, for inlining */ #endif diff --git a/deps/xxhash/xxhsum.c b/deps/xxhash/xxhsum.c index e284fa8..69931f7 100644 --- a/deps/xxhash/xxhsum.c +++ b/deps/xxhash/xxhsum.c @@ -32,8 +32,8 @@ #define XXHASH_C_2097394837 /* ************************************ -* Compiler Options -**************************************/ + * Compiler Options + **************************************/ /* MS Visual */ #if defined(_MSC_VER) || defined(_WIN32) # define _CRT_SECURE_NO_WARNINGS /* removes visual warnings */ @@ -46,28 +46,26 @@ /* ************************************ -* Includes -**************************************/ + * Includes + **************************************/ #include /* malloc, calloc, free, exit */ -#include /* fprintf, fopen, ftello64, fread, stdin, stdout; when present : _fileno */ +#include /* fprintf, fopen, ftello64, fread, stdin, stdout, _fileno (when present) */ #include /* strcmp */ -#include /* stat64 */ -#include /* stat64 */ +#include /* stat, stat64, _stat64 */ +#include /* stat, stat64, _stat64 */ #include /* clock_t, clock, CLOCKS_PER_SEC */ +#include /* assert */ #define XXH_STATIC_LINKING_ONLY /* *_state_t */ #include "xxhash.h" -/*-************************************ -* OS-Specific Includes -**************************************/ +/* ************************************ + * OS-Specific Includes + **************************************/ #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__) # include /* _O_BINARY */ # include /* _setmode, _isatty */ -# ifdef __MINGW32__ - int _fileno(FILE *stream); /* MINGW somehow forgets to include this windows declaration into */ -# endif # define SET_BINARY_MODE(file) _setmode(_fileno(file), _O_BINARY) # define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream)) #else @@ -110,8 +108,8 @@ static unsigned BMK_isLittleEndian(void) /* ************************************* -* Constants -***************************************/ + * Constants + ***************************************/ #define LIB_VERSION XXH_VERSION_MAJOR.XXH_VERSION_MINOR.XXH_VERSION_RELEASE #define QUOTE(str) #str #define EXPAND_AND_QUOTE(str) QUOTE(str) @@ -121,18 +119,20 @@ static const char g_lename[] = "little endian"; static const char g_bename[] = "big endian"; #define ENDIAN_NAME (BMK_isLittleEndian() ? g_lename : g_bename) static const char author[] = "Yann Collet"; -#define WELCOME_MESSAGE(exename) "%s %s (%i-bits %s), by %s \n", exename, PROGRAM_VERSION, g_nbBits, ENDIAN_NAME, author +#define WELCOME_MESSAGE(exename) "%s %s (%i-bits %s), by %s \n", \ + exename, PROGRAM_VERSION, g_nbBits, ENDIAN_NAME, author +#define KB *( 1<<10) +#define MB *( 1<<20) +#define GB *(1U<<30) + +static size_t XXH_DEFAULT_SAMPLE_SIZE = 100 KB; #define NBLOOPS 3 /* Default number of benchmark iterations */ #define TIMELOOP_S 1 #define TIMELOOP (TIMELOOP_S * CLOCKS_PER_SEC) /* Minimum timing per iteration */ #define XXHSUM32_DEFAULT_SEED 0 /* Default seed for algo_xxh32 */ #define XXHSUM64_DEFAULT_SEED 0 /* Default seed for algo_xxh64 */ -#define KB *( 1<<10) -#define MB *( 1<<20) -#define GB *(1U<<30) - #define MAX_MEM (2 GB - 64 MB) static const char stdinName[] = "-"; @@ -148,24 +148,23 @@ static const algoType g_defaultAlgo = algo_xxh64; /* required within main() & /* ************************************ -* Display macros -**************************************/ + * Display macros + **************************************/ #define DISPLAY(...) fprintf(stderr, __VA_ARGS__) #define DISPLAYRESULT(...) fprintf(stdout, __VA_ARGS__) -#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) DISPLAY(__VA_ARGS__); -static U32 g_displayLevel = 1; +#define DISPLAYLEVEL(l, ...) do { if (g_displayLevel>=l) DISPLAY(__VA_ARGS__); } while (0) +static int g_displayLevel = 2; /* ************************************ -* Local variables -**************************************/ -static size_t g_sampleSize = 100 KB; + * Local variables + **************************************/ static U32 g_nbIterations = NBLOOPS; /* ************************************ -* Benchmark Functions -**************************************/ + * Benchmark Functions + **************************************/ static clock_t BMK_clockSpan( clock_t start ) { return clock() - start; /* works even if overflow; Typical max span ~ 30 mn */ @@ -218,51 +217,74 @@ static U32 localXXH64(const void* buffer, size_t bufferSize, U32 seed) { return static void BMK_benchHash(hashFunction h, const char* hName, const void* buffer, size_t bufferSize) { - static const U32 nbh_perloop = 100; + U32 nbh_perIteration = ((300 MB) / (bufferSize+1)) + 1; /* first loop conservatively aims for 300 MB/s */ U32 iterationNb; double fastestH = 100000000.; - DISPLAY("\r%79s\r", ""); /* Clean display line */ + DISPLAYLEVEL(2, "\r%70s\r", ""); /* Clean display line */ if (g_nbIterations<1) g_nbIterations=1; for (iterationNb = 1; iterationNb <= g_nbIterations; iterationNb++) { - U32 nbHashes = 0, r=0; + U32 r=0; clock_t cStart; - DISPLAY("%1i-%-17.17s : %10u ->\r", iterationNb, hName, (U32)bufferSize); + DISPLAYLEVEL(2, "%1i-%-17.17s : %10u ->\r", iterationNb, hName, (U32)bufferSize); cStart = clock(); while (clock() == cStart); /* starts clock() at its exact beginning */ cStart = clock(); - while (BMK_clockSpan(cStart) < TIMELOOP) { - U32 i; - for (i=0; i %7.1f MB/s\r", iterationNb, hName, (U32)bufferSize, ((double)bufferSize / (1<<20)) / fastestH ); + DISPLAYLEVEL(2, "%1i-%-17.17s : %10u -> %8.0f it/s (%7.1f MB/s) \r", + iterationNb, hName, (U32)bufferSize, + (double)1 / fastestH, + ((double)bufferSize / (1<<20)) / fastestH ); } + assert(fastestH > 1./2000000000); /* avoid U32 overflow */ + nbh_perIteration = (U32)(1 / fastestH) + 1; /* adjust nbh_perIteration to last roughtly one second */ } - DISPLAY("%-19.19s : %10u -> %7.1f MB/s \n", hName, (U32)bufferSize, ((double)bufferSize / (1<<20)) / fastestH); + DISPLAYLEVEL(1, "%-19.19s : %10u -> %8.0f it/s (%7.1f MB/s) \n", hName, (U32)bufferSize, + (double)1 / fastestH, + ((double)bufferSize / (1<<20)) / fastestH); + if (g_displayLevel<1) + DISPLAYLEVEL(0, "%u, ", (U32)((double)1 / fastestH)); } -/* Note : buffer is supposed malloc'ed, hence aligned */ -static void BMK_benchMem(const void* buffer, size_t bufferSize) +/* BMK_benchMem(): + * specificTest : 0 == run all tests, 1+ run only specific test + * buffer : is supposed 8-bytes aligned (if malloc'ed, it should be) + * the real allocated size of buffer is supposed to be >= (bufferSize+3). + * @return : 0 on success, 1 if error (invalid mode selected) */ +static int BMK_benchMem(const void* buffer, size_t bufferSize, U32 specificTest) { + assert((((size_t)buffer) & 8) == 0); /* ensure alignment */ + /* XXH32 bench */ - BMK_benchHash(localXXH32, "XXH32", buffer, bufferSize); + if ((specificTest==0) | (specificTest==1)) + BMK_benchHash(localXXH32, "XXH32", buffer, bufferSize); /* Bench XXH32 on Unaligned input */ - BMK_benchHash(localXXH32, "XXH32 unaligned", ((const char*)buffer)+1, bufferSize); + if ((specificTest==0) | (specificTest==2)) + BMK_benchHash(localXXH32, "XXH32 unaligned", ((const char*)buffer)+1, bufferSize); /* Bench XXH64 */ - BMK_benchHash(localXXH64, "XXH64", buffer, bufferSize); + if ((specificTest==0) | (specificTest==3)) + BMK_benchHash(localXXH64, "XXH64", buffer, bufferSize); /* Bench XXH64 on Unaligned input */ - BMK_benchHash(localXXH64, "XXH64 unaligned", ((const char*)buffer)+3, bufferSize); + if ((specificTest==0) | (specificTest==4)) + BMK_benchHash(localXXH64, "XXH64 unaligned", ((const char*)buffer)+3, bufferSize); + + if (specificTest > 4) { + DISPLAY("benchmark mode invalid \n"); + return 1; + } + return 0; } @@ -277,19 +299,21 @@ static size_t BMK_selectBenchedSize(const char* fileName) } -static int BMK_benchFiles(const char** fileNamesTable, int nbFiles) +static int BMK_benchFiles(const char** fileNamesTable, int nbFiles, U32 specificTest) { + int result = 0; int fileIdx; + for (fileIdx=0; fileIdx> 10)); - BMK_benchMem(buffer, benchedSize); + DISPLAYLEVEL(1, "Sample of "); + if (keySize > 10 KB) { + DISPLAYLEVEL(1, "%u KB", (U32)(keySize >> 10)); + } else { + DISPLAYLEVEL(1, "%u bytes", (U32)keySize); + } + DISPLAYLEVEL(1, "... \n"); - free(buffer); - return 0; + { int const result = BMK_benchMem(alignedBuffer, keySize, specificTest); + free(buffer); + return result; + } } static void BMK_checkResult(U32 r1, U32 r2) { static int nbTests = 1; - if (r1==r2) DISPLAY("\rTest%3i : %08X == %08X ok ", nbTests, r1, r2); - else { + if (r1==r2) { + DISPLAYLEVEL(3, "\rTest%3i : %08X == %08X ok ", nbTests, r1, r2); + } else { DISPLAY("\rERROR : Test%3i : %08X <> %08X !!!!! \n", nbTests, r1, r2); exit(1); } @@ -354,7 +386,7 @@ static void BMK_checkResult64(U64 r1, U64 r2) { static int nbTests = 1; if (r1!=r2) { - DISPLAY("\rERROR : Test%3i : 64-bits values non equals !!!!! \n", nbTests); + DISPLAY("\rERROR : Test%3i : 64-bit values non equals !!!!! \n", nbTests); DISPLAY("\r %08X%08X != %08X%08X \n", (U32)(r1>>32), (U32)r1, (U32)(r2>>32), (U32)r2); exit(1); } @@ -377,7 +409,8 @@ static void BMK_testSequence64(void* sentence, size_t len, U64 seed, U64 Nresult BMK_checkResult64(Dresult, Nresult); XXH64_reset(&state, seed); - for (pos=0; pos 30 ? 30 : fileNameSize; - size_t infoFilenameSize = 1; - while ( (infoFilenameSize < maxInfoFilenameSize) - &&(fileNameEnd[-1-infoFilenameSize] != '/') - &&(fileNameEnd[-1-infoFilenameSize] != '\\') ) + const int maxInfoFilenameSize = (int)(fileNameSize > 30 ? 30 : fileNameSize); + int infoFilenameSize = 1; + while ((infoFilenameSize < maxInfoFilenameSize) + && (fileNameEnd[-1-infoFilenameSize] != '/') + && (fileNameEnd[-1-infoFilenameSize] != '\\') ) infoFilenameSize++; DISPLAY("\rLoading %s... \r", fileNameEnd - infoFilenameSize); @@ -1123,10 +1157,30 @@ static int badusage(const char* exename) return 1; } +/*! readU32FromChar() : + @return : unsigned integer value read from input in `char` format, + 0 is no figure at *stringPtr position. + Interprets K, KB, KiB, M, MB and MiB suffix. + Modifies `*stringPtr`, advancing it to position where reading stopped. + Note : function result can overflow if digit string > MAX_UINT */ +static unsigned readU32FromChar(const char** stringPtr) +{ + unsigned result = 0; + while ((**stringPtr >='0') && (**stringPtr <='9')) + result *= 10, result += **stringPtr - '0', (*stringPtr)++ ; + if ((**stringPtr=='K') || (**stringPtr=='M')) { + result <<= 10; + if (**stringPtr=='M') result <<= 10; + (*stringPtr)++ ; + if (**stringPtr=='i') (*stringPtr)++; + if (**stringPtr=='B') (*stringPtr)++; + } + return result; +} int main(int argc, const char** argv) { - int i, filenamesStart=0; + int i, filenamesStart = 0; const char* const exename = argv[0]; U32 benchmarkMode = 0; U32 fileCheckMode = 0; @@ -1134,7 +1188,9 @@ int main(int argc, const char** argv) U32 statusOnly = 0; U32 warn = 0; U32 quiet = 0; - algoType algo = g_defaultAlgo; + U32 specificTest = 0; + size_t keySize = XXH_DEFAULT_SAMPLE_SIZE; + algoType algo = g_defaultAlgo; endianess displayEndianess = big_endian; /* special case : xxh32sum default to 32 bits checksum */ @@ -1194,21 +1250,26 @@ int main(int argc, const char** argv) /* Trigger benchmark mode */ case 'b': argument++; - benchmarkMode=1; + benchmarkMode = 1; + specificTest = readU32FromChar(&argument); /* select one specific test (hidden option) */ break; /* Modify Nb Iterations (benchmark only) */ case 'i': - g_nbIterations = argument[1] - '0'; - argument+=2; + argument++; + g_nbIterations = readU32FromChar(&argument); break; /* Modify Block size (benchmark only) */ case 'B': argument++; - g_sampleSize = 0; - while (argument[0]>='0' && argument[0]<='9') - g_sampleSize *= 10, g_sampleSize += argument[0]-'0', argument++; + keySize = readU32FromChar(&argument); + break; + + /* Modify verbosity of benchmark output (hidden option) */ + case 'q': + argument++; + g_displayLevel--; break; default: @@ -1219,10 +1280,10 @@ int main(int argc, const char** argv) /* Check benchmark mode */ if (benchmarkMode) { - DISPLAY( WELCOME_MESSAGE(exename) ); + DISPLAYLEVEL(2, WELCOME_MESSAGE(exename) ); BMK_sanityCheck(); - if (filenamesStart==0) return BMK_benchInternal(); - return BMK_benchFiles(argv+filenamesStart, argc-filenamesStart); + if (filenamesStart==0) return BMK_benchInternal(keySize, specificTest); + return BMK_benchFiles(argv+filenamesStart, argc-filenamesStart, specificTest); } /* Check if input is defined as console; trigger an error in this case */ @@ -1230,7 +1291,8 @@ int main(int argc, const char** argv) if (filenamesStart==0) filenamesStart = argc; if (fileCheckMode) { - return checkFiles(argv+filenamesStart, argc-filenamesStart, displayEndianess, strictMode, statusOnly, warn, quiet); + return checkFiles(argv+filenamesStart, argc-filenamesStart, + displayEndianess, strictMode, statusOnly, warn, quiet); } else { return BMK_hashFiles(argv+filenamesStart, argc-filenamesStart, algo, displayEndianess); } diff --git a/src/common.hpp b/src/common.hpp index bb08eca..d842e1b 100644 --- a/src/common.hpp +++ b/src/common.hpp @@ -7,7 +7,7 @@ #define XXH_STATIC_LINKING_ONLY #define XXH_PRIVATE_API -#define XXH_ACCEPT_NULL_INPUT_POINTER +#define XXH_ACCEPT_NULL_INPUT_POINTER 1 #include "xxhash.h" #if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \