Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/cygwin-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ jobs:
- name: info
run: bash -c '/usr/local/bin/rsync --version'
- name: check
run: bash -c 'RSYNC_EXPECT_SKIPPED=acls-default,acls,chown,devices,dir-sgid,open-noatime,protected-regular make check'
run: bash -c 'RSYNC_EXPECT_SKIPPED=acls-default,acls,chown,devices,dir-sgid,open-noatime,protected-regular,simd-checksum make check'
- name: ssl file list
run: bash -c 'PATH="/usr/local/bin:$PATH" rsync-ssl --no-motd download.samba.org::rsyncftp/ || true'
- name: save artifact
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/macos-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ jobs:
- name: info
run: rsync --version
- name: check
run: sudo RSYNC_EXPECT_SKIPPED=acls-default,chmod-temp-dir,chown-fake,devices-fake,dir-sgid,open-noatime,protected-regular,xattrs-hlink,xattrs make check
run: sudo RSYNC_EXPECT_SKIPPED=acls-default,chmod-temp-dir,chown-fake,devices-fake,dir-sgid,open-noatime,protected-regular,simd-checksum,xattrs-hlink,xattrs make check
- name: ssl file list
run: rsync-ssl --no-motd download.samba.org::rsyncftp/ || true
- name: save artifact
Expand Down
11 changes: 10 additions & 1 deletion Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ TLS_OBJ = tls.o syscall.o util2.o t_stub.o lib/compat.o lib/snprintf.o lib/perms

# Programs we must have to run the test cases
CHECK_PROGS = rsync$(EXEEXT) tls$(EXEEXT) getgroups$(EXEEXT) getfsdev$(EXEEXT) \
testrun$(EXEEXT) trimslash$(EXEEXT) t_unsafe$(EXEEXT) wildtest$(EXEEXT)
testrun$(EXEEXT) trimslash$(EXEEXT) t_unsafe$(EXEEXT) wildtest$(EXEEXT) \
simdtest$(EXEEXT)

CHECK_SYMLINKS = testsuite/chown-fake.test testsuite/devices-fake.test testsuite/xattrs-hlink.test

Expand Down Expand Up @@ -326,6 +327,14 @@ wildtest.o: wildtest.c t_stub.o lib/wildmatch.c rsync.h config.h
wildtest$(EXEEXT): wildtest.o lib/compat.o lib/snprintf.o @BUILD_POPT@
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ wildtest.o lib/compat.o lib/snprintf.o @BUILD_POPT@ $(LIBS)

simdtest$(EXEEXT): simd-checksum-x86_64.cpp $(HEADERS)
@if test x"@ROLL_SIMD@" != x; then \
$(CXX) -I. $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) -DTEST_SIMD_CHECKSUM1 \
-o $@ $(srcdir)/simd-checksum-x86_64.cpp @ROLL_ASM@ $(LIBS); \
else \
touch $@; \
fi

testsuite/chown-fake.test:
ln -s chown.test $(srcdir)/testsuite/chown-fake.test

Expand Down
115 changes: 113 additions & 2 deletions simd-checksum-x86_64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -347,8 +347,7 @@ __attribute__ ((target("avx2"))) MVSTATIC int32 get_checksum1_avx2_64(schar* buf
__m128i tmp = _mm_load_si128((__m128i*) mul_t1_buf);
__m256i mul_t1 = _mm256_cvtepu8_epi16(tmp);
__m256i mul_const = _mm256_broadcastd_epi32(_mm_cvtsi32_si128(4 | (3 << 8) | (2 << 16) | (1 << 24)));
__m256i mul_one;
mul_one = _mm256_abs_epi8(_mm256_cmpeq_epi16(mul_one,mul_one)); // set all vector elements to 1
__m256i mul_one = _mm256_set1_epi8(1);

for (; i < (len-64); i+=64) {
// Load ... 4*[int8*16]
Expand Down Expand Up @@ -548,6 +547,118 @@ int main() {
#pragma clang optimize on
#endif /* BENCHMARK_SIMD_CHECKSUM1 */

#ifdef TEST_SIMD_CHECKSUM1

static uint32 checksum_via_default(char *buf, int32 len)
{
uint32 s1 = 0, s2 = 0;
get_checksum1_default_1((schar*)buf, len, 0, &s1, &s2);
return (s1 & 0xffff) + (s2 << 16);
}

static uint32 checksum_via_sse2(char *buf, int32 len)
{
int32 i;
uint32 s1 = 0, s2 = 0;
i = get_checksum1_sse2_32((schar*)buf, len, 0, &s1, &s2);
get_checksum1_default_1((schar*)buf, len, i, &s1, &s2);
return (s1 & 0xffff) + (s2 << 16);
}

static uint32 checksum_via_ssse3(char *buf, int32 len)
{
int32 i;
uint32 s1 = 0, s2 = 0;
i = get_checksum1_ssse3_32((schar*)buf, len, 0, &s1, &s2);
get_checksum1_default_1((schar*)buf, len, i, &s1, &s2);
return (s1 & 0xffff) + (s2 << 16);
}

static uint32 checksum_via_avx2(char *buf, int32 len)
{
int32 i;
uint32 s1 = 0, s2 = 0;
#ifdef USE_ROLL_ASM
i = get_checksum1_avx2_asm((schar*)buf, len, 0, &s1, &s2);
#else
i = get_checksum1_avx2_64((schar*)buf, len, 0, &s1, &s2);
#endif
get_checksum1_default_1((schar*)buf, len, i, &s1, &s2);
return (s1 & 0xffff) + (s2 << 16);
}

int main()
{
static const int sizes[] = {1, 4, 31, 32, 33, 63, 64, 65, 128, 129, 256, 700, 1024, 4096, 65536};
int num_sizes = sizeof(sizes) / sizeof(sizes[0]);
int max_size = sizes[num_sizes - 1];
int failures = 0;

/* Allocate with extra bytes for unaligned test */
unsigned char *raw = (unsigned char *)malloc(max_size + 64 + 1);
if (!raw) {
fprintf(stderr, "malloc failed\n");
return 1;
}

/* Fill with deterministic data */
for (int i = 0; i < max_size + 64 + 1; i++)
raw[i] = (i + (i % 3) + (i % 11)) % 256;

/* Test with aligned buffer (64-byte aligned) */
unsigned char *aligned = raw + (64 - ((uintptr_t)raw % 64));

/* Test with unaligned buffer (+1 byte offset) */
unsigned char *unaligned = aligned + 1;

struct { const char *name; unsigned char *buf; } buffers[] = {
{"aligned", aligned},
{"unaligned", unaligned},
};

for (int b = 0; b < 2; b++) {
char *buf = (char *)buffers[b].buf;
const char *bname = buffers[b].name;

for (int s = 0; s < num_sizes; s++) {
int32 len = sizes[s];
uint32 ref = checksum_via_default(buf, len);
uint32 cs_sse2 = checksum_via_sse2(buf, len);
uint32 cs_ssse3 = checksum_via_ssse3(buf, len);
uint32 cs_avx2 = checksum_via_avx2(buf, len);
uint32 cs_auto = get_checksum1(buf, len);

if (cs_sse2 != ref) {
printf("FAIL %-9s size=%5d: SSE2=%08x ref=%08x\n", bname, len, cs_sse2, ref);
failures++;
}
if (cs_ssse3 != ref) {
printf("FAIL %-9s size=%5d: SSSE3=%08x ref=%08x\n", bname, len, cs_ssse3, ref);
failures++;
}
if (cs_avx2 != ref) {
printf("FAIL %-9s size=%5d: AVX2=%08x ref=%08x\n", bname, len, cs_avx2, ref);
failures++;
}
if (cs_auto != ref) {
printf("FAIL %-9s size=%5d: auto=%08x ref=%08x\n", bname, len, cs_auto, ref);
failures++;
}
}
}

free(raw);

if (failures) {
printf("%d checksum mismatches!\n", failures);
return 1;
}
printf("All SIMD checksum tests passed.\n");
return 0;
}

#endif /* TEST_SIMD_CHECKSUM1 */

#endif /* } USE_ROLL_SIMD */
#endif /* } __cplusplus */
#endif /* } __x86_64__ */
11 changes: 11 additions & 0 deletions testsuite/simd-checksum.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/sh

# Test SIMD checksum implementations against the C reference

. "$suitedir/rsync.fns"

if ! test -x "$TOOLDIR/simdtest"; then
test_skipped "simdtest not built (SIMD not available)"
fi

"$TOOLDIR/simdtest"
Loading