Skip to content

Commit e6f8564

Browse files
authored
Merge pull request #86 from VectorCamp/develop
New release 5.4.6
2 parents 1b6f37d + f9b6526 commit e6f8564

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+3983
-1938
lines changed

CMakeLists.txt

Lines changed: 200 additions & 133 deletions
Large diffs are not rendered by default.

benchmarks/benchmarks.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,34 @@ int main(){
191191
);
192192
}
193193

194+
for (size_t i = 0; i < std::size(sizes); i++) {
195+
MicroBenchmark bench("Vermicelli", sizes[i]);
196+
run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], matches[m], false, bench,
197+
[&](MicroBenchmark &b) {
198+
b.chars.set('a');
199+
ue2::truffleBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi);
200+
memset(b.buf.data(), 'b', b.size);
201+
},
202+
[&](MicroBenchmark &b) {
203+
return vermicelliExec('a', 'b', b.buf.data(), b.buf.data() + b.size);
204+
}
205+
);
206+
}
207+
208+
for (size_t i = 0; i < std::size(sizes); i++) {
209+
MicroBenchmark bench("Reverse Vermicelli", sizes[i]);
210+
run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], matches[m], true, bench,
211+
[&](MicroBenchmark &b) {
212+
b.chars.set('a');
213+
ue2::truffleBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi);
214+
memset(b.buf.data(), 'b', b.size);
215+
},
216+
[&](MicroBenchmark &b) {
217+
return rvermicelliExec('a', 'b', b.buf.data(), b.buf.data() + b.size);
218+
}
219+
);
220+
}
221+
194222
for (size_t i = 0; i < std::size(sizes); i++) {
195223
//we imitate the noodle unit tests
196224
std::string str;

benchmarks/benchmarks.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "nfa/shufticompile.h"
3131
#include "nfa/truffle.h"
3232
#include "nfa/trufflecompile.h"
33+
#include "nfa/vermicelli.hpp"
3334
#include "hwlm/noodle_build.h"
3435
#include "hwlm/noodle_engine.h"
3536
#include "hwlm/noodle_internal.h"

cmake/arch.cmake

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ elseif (HAVE_C_INTRIN_H)
99
elseif (HAVE_C_ARM_NEON_H)
1010
set (INTRIN_INC_H "arm_neon.h")
1111
set (FAT_RUNTIME OFF)
12+
elseif (HAVE_C_PPC64EL_ALTIVEC_H)
13+
set (INTRIN_INC_H "altivec.h")
14+
set (FAT_RUNTIME OFF)
1215
else()
1316
message (FATAL_ERROR "No intrinsics header found")
1417
endif ()
@@ -85,7 +88,7 @@ if (FAT_RUNTIME)
8588
set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} ${SKYLAKE_FLAG}")
8689
endif (BUILD_AVX512VBMI)
8790
elseif (BUILD_AVX2)
88-
set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} -march=core-avx2 -mavx")
91+
set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} -march=core-avx2 -mavx2")
8992
elseif ()
9093
set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} -march=core-i7 -mssse3")
9194
endif ()
@@ -95,12 +98,12 @@ else (NOT FAT_RUNTIME)
9598
endif ()
9699

97100
if (ARCH_IA32 OR ARCH_X86_64)
98-
# ensure we have the minimum of SSSE3 - call a SSSE3 intrinsic
101+
# ensure we have the minimum of SSE4.2 - call a SSE4.2 intrinsic
99102
CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}>
100103
int main() {
101104
__m128i a = _mm_set1_epi8(1);
102105
(void)_mm_shuffle_epi8(a, a);
103-
}" HAVE_SSSE3)
106+
}" HAVE_SSE42)
104107

105108
# now look for AVX2
106109
CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}>
@@ -136,13 +139,26 @@ int main(){
136139
(void)_mm512_permutexvar_epi8(idx, a);
137140
}" HAVE_AVX512VBMI)
138141

139-
elseif (!ARCH_ARM32 AND !ARCH_AARCH64)
142+
143+
elseif (ARCH_ARM32 OR ARCH_AARCH64)
144+
CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}>
145+
int main() {
146+
int32x4_t a = vdupq_n_s32(1);
147+
(void)a;
148+
}" HAVE_NEON)
149+
elseif (ARCH_PPC64EL)
150+
CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}>
151+
int main() {
152+
vector int a = vec_splat_s32(1);
153+
(void)a;
154+
}" HAVE_VSX)
155+
else ()
140156
message (FATAL_ERROR "Unsupported architecture")
141157
endif ()
142158

143159
if (FAT_RUNTIME)
144-
if ((ARCH_IA32 OR ARCH_X86_64) AND NOT HAVE_SSSE3)
145-
message(FATAL_ERROR "SSSE3 support required to build fat runtime")
160+
if ((ARCH_IA32 OR ARCH_X86_64) AND NOT HAVE_SSE42)
161+
message(FATAL_ERROR "SSE4.2 support required to build fat runtime")
146162
endif ()
147163
if ((ARCH_IA32 OR ARCH_X86_64) AND BUILD_AVX2 AND NOT HAVE_AVX2)
148164
message(FATAL_ERROR "AVX2 support required to build fat runtime")
@@ -163,12 +179,16 @@ else (NOT FAT_RUNTIME)
163179
if ((ARCH_IA32 OR ARCH_X86_64) AND NOT HAVE_AVX512VBMI)
164180
message(STATUS "Building without AVX512VBMI support")
165181
endif ()
166-
if ((ARCH_IA32 OR ARCH_X86_64) AND NOT HAVE_SSSE3)
167-
message(FATAL_ERROR "A minimum of SSSE3 compiler support is required")
182+
if ((ARCH_IA32 OR ARCH_X86_64) AND NOT HAVE_SSE42)
183+
message(FATAL_ERROR "A minimum of SSE4.2 compiler support is required")
168184
endif ()
169185
if ((ARCH_ARM32 OR ARCH_AARCH64) AND NOT HAVE_NEON)
170186
message(FATAL_ERROR "NEON support required for ARM support")
171187
endif ()
188+
if (ARCH_PPPC64EL AND NOT HAVE_VSX)
189+
message(FATAL_ERROR "VSX support required for Power support")
190+
endif ()
191+
172192
endif ()
173193

174194
unset (PREV_FLAGS)

cmake/config.h.in

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@
2121
/* "Define if building for AARCH64" */
2222
#cmakedefine ARCH_AARCH64
2323

24+
/* "Define if building for PPC64EL" */
25+
#cmakedefine ARCH_PPC64EL
26+
2427
/* "Define if cross compiling for AARCH64" */
2528
#cmakedefine CROSS_COMPILE_AARCH64
2629

@@ -75,6 +78,9 @@
7578
/* C compiler has arm_sve.h */
7679
#cmakedefine HAVE_C_ARM_SVE_H
7780

81+
/* C compiler has arm_neon.h */
82+
#cmakedefine HAVE_C_PPC64EL_ALTIVEC_H
83+
7884
/* Define to 1 if you have the declaration of `pthread_setaffinity_np', and to
7985
0 if you don't. */
8086
#cmakedefine HAVE_DECL_PTHREAD_SETAFFINITY_NP

cmake/platform.cmake

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
# determine compiler
2+
if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
3+
set(CMAKE_COMPILER_IS_CLANG TRUE)
4+
endif()
5+
16
# determine the target arch
27

38
if (CROSS_COMPILE_AARCH64)
@@ -7,15 +12,13 @@ if (CROSS_COMPILE_AARCH64)
712
else()
813
# really only interested in the preprocessor here
914
CHECK_C_SOURCE_COMPILES("#if !(defined(__x86_64__) || defined(_M_X64))\n#error not 64bit\n#endif\nint main(void) { return 0; }" ARCH_X86_64)
10-
1115
CHECK_C_SOURCE_COMPILES("#if !(defined(__i386__) || defined(_M_IX86))\n#error not 32bit\n#endif\nint main(void) { return 0; }" ARCH_IA32)
12-
1316
CHECK_C_SOURCE_COMPILES("#if !defined(__ARM_ARCH_ISA_A64)\n#error not 64bit\n#endif\nint main(void) { return 0; }" ARCH_AARCH64)
1417
CHECK_C_SOURCE_COMPILES("#if !defined(__ARM_ARCH_ISA_ARM)\n#error not 32bit\n#endif\nint main(void) { return 0; }" ARCH_ARM32)
15-
16-
if (ARCH_X86_64 OR ARCH_AARCH64)
18+
CHECK_C_SOURCE_COMPILES("#if !defined(__PPC64__) && !(defined(__LITTLE_ENDIAN__) && defined(__VSX__))\n#error not ppc64el\n#endif\nint main(void) { return 0; }" ARCH_PPC64EL)
19+
if (ARCH_X86_64 OR ARCH_AARCH64 OR ARCH_PPC64EL)
1720
set(ARCH_64_BIT TRUE)
1821
else()
1922
set(ARCH_32_BIT TRUE)
2023
endif()
21-
endif()
24+
endif()

examples/patbench.cc

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@
112112
*
113113
*/
114114

115+
#include <random>
115116
#include <algorithm>
116117
#include <cstring>
117118
#include <chrono>
@@ -151,6 +152,8 @@ using std::set;
151152
using std::min;
152153
using std::max;
153154
using std::copy;
155+
using std::random_device;
156+
using std::mt19937;
154157

155158
enum Criterion {
156159
CRITERION_THROUGHPUT,
@@ -731,7 +734,9 @@ int main(int argc, char **argv) {
731734
count++;
732735
cout << "." << std::flush;
733736
vector<unsigned> sv(s.begin(), s.end());
734-
random_shuffle(sv.begin(), sv.end());
737+
random_device rng;
738+
mt19937 urng(rng());
739+
shuffle(sv.begin(), sv.end(), urng);
735740
unsigned groups = factor_max + 1;
736741
for (unsigned current_group = 0; current_group < groups;
737742
current_group++) {

src/fdr/teddy.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -893,10 +893,10 @@ do { \
893893
#define CONFIRM_TEDDY(var, bucket, offset, reason, conf_fn) \
894894
do { \
895895
if (unlikely(diff128(var, ones128()))) { \
896-
u64a __attribute__((aligned(16))) vector[2]; \
897-
store128(vector, var); \
898-
u64a lo = vector[0]; \
899-
u64a hi = vector[1]; \
896+
u64a __attribute__((aligned(16))) vec[2]; \
897+
store128(vec, var); \
898+
u64a lo = vec[0]; \
899+
u64a hi = vec[1]; \
900900
CONF_CHUNK_64(lo, bucket, offset, reason, conf_fn); \
901901
CONF_CHUNK_64(hi, bucket, offset + 8, reason, conf_fn); \
902902
} \

src/hs_valid_platform.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,5 +44,7 @@ hs_error_t HS_CDECL hs_valid_platform(void) {
4444
}
4545
#elif defined(ARCH_ARM32) || defined(ARCH_AARCH64)
4646
return HS_SUCCESS;
47+
#elif defined(ARCH_PPC64EL)
48+
return HS_SUCCESS;
4749
#endif
4850
}

src/hwlm/hwlm.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
#include "nfa/accel.h"
4040
#include "nfa/shufti.h"
4141
#include "nfa/truffle.h"
42-
#include "nfa/vermicelli.h"
42+
#include "nfa/vermicelli.hpp"
4343
#include <string.h>
4444

4545
#define MIN_ACCEL_LEN_BLOCK 16

0 commit comments

Comments
 (0)