diff --git a/cava/nightwatch/generator/c/cmakelists.py b/cava/nightwatch/generator/c/cmakelists.py index b6e5a36a..185f1308 100644 --- a/cava/nightwatch/generator/c/cmakelists.py +++ b/cava/nightwatch/generator/c/cmakelists.py @@ -97,6 +97,7 @@ def source(api: API, errors: List[Any]) -> Tuple[str, str]: ${{GLIB2_LIBRARIES}} ${{Boost_LIBRARIES}} Threads::Threads + rt ${{Config++}} ) target_compile_options({api.soname} diff --git a/cava/nightwatch/generator/c/guestlib.py b/cava/nightwatch/generator/c/guestlib.py index 837a0da8..2b4074cc 100644 --- a/cava/nightwatch/generator/c/guestlib.py +++ b/cava/nightwatch/generator/c/guestlib.py @@ -13,6 +13,7 @@ def source(api: API, errors: List[Any]) -> Tuple[str, str]: #define ava_is_worker 0 #define ava_is_guest 1 +#include "common/extensions/migration_barrier.h" #include "guestlib.h" {handle_command_header(api)} diff --git a/cava/nightwatch/generator/c/stubs.py b/cava/nightwatch/generator/c/stubs.py index 841ee59a..0b82030d 100644 --- a/cava/nightwatch/generator/c/stubs.py +++ b/cava/nightwatch/generator/c/stubs.py @@ -71,9 +71,14 @@ def function_implementation(f: Function) -> Union[str, Expr]: intptr_t __call_id = ava_get_call_id(&__ava_endpoint); #ifdef AVA_BENCHMARKING_MIGRATE + migration_barrier_wait(__call_id); + struct timespec tp; + clock_gettime(CLOCK_MONOTONIC, &tp); + printf("--- [%9ld] @ %lld.%.9ld executing {f.name}\\n", __call_id, + (long long)tp.tv_sec, tp.tv_nsec); if (__ava_endpoint.migration_call_id >= 0 && __call_id == __ava_endpoint.migration_call_id) {{ - printf("start live migration at call_id %d\\n", __call_id); + printf("start live migration at call_id %ld\\n", __call_id); __ava_endpoint.migration_call_id = -2; start_live_migration(__chan); }} diff --git a/cava/samples/cudart/cudart.cpp b/cava/samples/cudart/cudart.cpp index 64618983..dc380bc0 100644 --- a/cava/samples/cudart/cudart.cpp +++ b/cava/samples/cudart/cudart.cpp @@ -4,8 +4,10 @@ ava_identifier(CUDART); ava_number(9); ava_cxxflags(-I/usr/local/cuda-10.1/include -I../headers); ava_libs(-L/usr/local/cuda-10.1/lib64 -lcudart -lcuda -lcublas -lcudnn); -ava_guestlib_srcs(../common/extensions/cudart_10.1_utilities.cpp); -ava_worker_srcs(../common/extensions/cudart_10.1_utilities.cpp); +<<<<<<< HEAD:cava/samples/cudart/cudart.cpp +ava_guestlib_srcs(../common/extensions/cudart_10.1_utilities.cpp extensions/migration_barrier.c); +ava_worker_srcs(../common/extensions/cudart_10.1_utilities.cpp extensions/migration_barrier.c); +// TODO (#86) the migration_barrier is not used by the worker but this is required to link correctly ava_export_qualifier(); /** @@ -37,7 +39,7 @@ ava_begin_utility; #include "cudart_nw_internal.h" #include "common/linkage.h" #include "common/extensions/cudart_10.1_utilities.hpp" - +#include "common/extensions/migration_barrier.h" #include #include #include @@ -1691,3 +1693,14 @@ cudnnPoolingBackward(cudnnHandle_t handle, ava_argument(dxDesc) ava_handle; ava_argument(dx) ava_handle; } + +ava_utility void __helper_guestlib_init_prologue() { + migration_barrier_init(); +} + +ava_utility void __helper_guestlib_fini_epilogue() { + migration_barrier_destroy(); +} + +ava_guestlib_init_prologue(__helper_guestlib_init_prologue()); +ava_guestlib_fini_epilogue(__helper_guestlib_fini_epilogue()); diff --git a/guestlib/extensions/migration_barrier.c b/guestlib/extensions/migration_barrier.c new file mode 100644 index 00000000..2defce98 --- /dev/null +++ b/guestlib/extensions/migration_barrier.c @@ -0,0 +1,137 @@ +#include "common/extensions/migration_barrier.h" + +#include +#include /* For O_* constants */ +#include +#include +#include +#include +#include +#include /* For mode constants */ +#include +#include + +#define CHECK_ERR(expr, failure, error_value) \ + do { \ + if (expr == failure) { \ + fprintf(stderr, #expr " failed: %s\n", strerror(error_value)); \ + exit(EXIT_FAILURE); \ + } \ + } while (0) + +#define CHECK_RET(expr, success, error_value) \ + do { \ + if (expr != success) { \ + fprintf(stderr, #expr " failed: %s\n", strerror(error_value)); \ + exit(EXIT_FAILURE); \ + } \ + } while (0) + +static int migration_barrier_participants = 0; +static int migration_barrier_index = -1; +static long long int migration_barrier_api_id = -1; +static int barrier_shm_fd = -1; +static const char *ava_barrier_shm_name = "/ava_barrier_shm"; + +typedef struct { + pthread_barrier_t barrier; + int flag; +} barrier_plus_flag; + +static barrier_plus_flag *migration_barrier; + +void migration_barrier_init(void) { + char *env_migration_barrier_participants = NULL; + env_migration_barrier_participants = + getenv("AVA_MIGRATION_BARRIER_PARTICIPANTS"); + if (env_migration_barrier_participants != NULL) { + migration_barrier_participants = + atoi(env_migration_barrier_participants); + printf("AVA_MIGRATION_BARRIER_PARTICIPANTS=%d\n", + migration_barrier_participants); + fflush(stdout); + } + char *env_migration_barrier_index = NULL; + env_migration_barrier_index = getenv("AVA_MIGRATION_BARRIER_INDEX"); + if (env_migration_barrier_index != NULL) { + migration_barrier_index = atoi(env_migration_barrier_index); + printf("AVA_MIGRATION_BARRIER_INDEX=%d\n", migration_barrier_index); + fflush(stdout); + } + char *env_migration_barrier_api_id = NULL; + env_migration_barrier_api_id = getenv("AVA_MIGRATION_BARRIER_API_ID"); + if (env_migration_barrier_api_id != NULL) { + migration_barrier_api_id = atoll(env_migration_barrier_api_id); + printf("AVA_MIGRATION_BARRIER_API_ID=%lld\n", migration_barrier_api_id); + fflush(stdout); + } + if (migration_barrier_participants && migration_barrier_api_id != -1) { + if (migration_barrier_index == 0) { + // the first process creates the shared memory object + CHECK_ERR((barrier_shm_fd = shm_open(ava_barrier_shm_name, + O_RDWR | O_CREAT, 0666)), + -1, errno); + CHECK_ERR(ftruncate(barrier_shm_fd, sizeof(barrier_plus_flag)), -1, + errno); + } else { + // all other processes just open an existing shared memory object + do { + // loop until the shm object is created + barrier_shm_fd = shm_open(ava_barrier_shm_name, O_RDWR, 0666); + } while (errno == ENOENT && barrier_shm_fd == -1); + CHECK_ERR(barrier_shm_fd, -1, errno); + } + CHECK_ERR((migration_barrier = mmap(NULL, sizeof(barrier_plus_flag), + PROT_READ | PROT_WRITE, MAP_SHARED, + barrier_shm_fd, 0)), + MAP_FAILED, errno); + CHECK_ERR(close(barrier_shm_fd), -1, errno); + + if (migration_barrier_index == 0) { + int ret; + pthread_barrierattr_t attr; + CHECK_RET((ret = pthread_barrierattr_init(&attr)), 0, ret); + CHECK_RET((ret = pthread_barrierattr_setpshared( + &attr, PTHREAD_PROCESS_SHARED)), + 0, ret); + CHECK_RET( + (ret = pthread_barrier_init(&migration_barrier->barrier, &attr, + migration_barrier_participants)), + 0, ret); + CHECK_RET((ret = pthread_barrierattr_destroy(&attr)), 0, ret); + migration_barrier->flag = 1; + } else { + // spin waiting for barrier to be available + // migration_barrier->flag is initialized to zero by shm_open with + // O_CREAT + do { + ; + } while (!migration_barrier->flag); + } + } +} + +void migration_barrier_destroy(void) { + if (migration_barrier_participants && migration_barrier_index == 0) { + // pthread_barrier_destroy and munmap crash when this is executed, probably because + // this is executed as part of library unloading + //int ret; + //CHECK_RET((ret = pthread_barrier_destroy(&migration_barrier->barrier)), + // 0, ret); + //CHECK_RET(munmap(migration_barrier, sizeof(barrier_plus_flag)), -1, + // errno); + // CHECK_RET(shm_unlink(ava_barrier_shm_name), -1, errno); + } +} + +void migration_barrier_wait(long long int call_id) { + if (migration_barrier_participants && migration_barrier_api_id == call_id) { + printf("migration barrier wait %d %lld %lld\n", migration_barrier_participants, + migration_barrier_api_id, call_id); + fflush(stdout); + pthread_barrier_wait(&migration_barrier->barrier); + printf("migration barrier waited %d %lld %lld\n", migration_barrier_participants, + migration_barrier_api_id, call_id); + fflush(stdout); + } +} diff --git a/include/extensions/migration_barrier.h b/include/extensions/migration_barrier.h new file mode 100644 index 00000000..15fa11bd --- /dev/null +++ b/include/extensions/migration_barrier.h @@ -0,0 +1,16 @@ +#ifndef AVA_MIGRATION_BARRIER_H +#define AVA_MIGRATION_BARRIER_H + +#ifdef __cplusplus +extern "C" { +#endif + +void migration_barrier_init(void); +void migration_barrier_destroy(void); +void migration_barrier_wait(long long int call_id); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/worker/extensions/migration_barrier.c b/worker/extensions/migration_barrier.c new file mode 100644 index 00000000..1656a363 --- /dev/null +++ b/worker/extensions/migration_barrier.c @@ -0,0 +1,14 @@ +#include "common/extensions/migration_barrier.h" + +// migration barrier not valid for the worker. This code is only here due to +// AvA's build system requiring extensions have the same interface on guestlib +// and worker. (#86) + +void migration_barrier_init(void) { +} + +void migration_barrier_destroy(void) { +} + +void migration_barrier_wait(long long int call_id) { +}