Skip to content

Commit 9b289bf

Browse files
authored
Robj/deadlock fix (#644)
This patch fixes two deadlocks, also enabling threads to perform updates to splinterdb while also performing asynchronous queries. The first deadlock was caused by trunk_merge_lookup acquiring two locks on the root. This deadlock prone because, if another thread begins to GC the root, it will set the writer bit on the root lock and then wait for the readers to clear out. But, if the query code has already acquired its first read lock on the root and attempts to acquire its second read lock after the writer bit is set, then it will deadlock with the GC thread. The fix is to acquire a lock on the root only once. The second deadlock could occur when a thread performing asynchronous queries performs an update which results in a GC. In this case, one of the thread's async queries may hold a read lock on a node-to-be-GCed, but the GC code will wait forever for that read lock to be released, causing the thread to deadlock with itself. The fix in this case is to defer GC until there are no more readers. So there's now a queue of GC tasks in the trunk context. Any time GC reaches a node with a read lock, it stops and puts that node on the queue. Every trunk modification checks the queue and processes any GCs that are now available to perform.
1 parent 7fecf01 commit 9b289bf

File tree

6 files changed

+258
-146
lines changed

6 files changed

+258
-146
lines changed

src/cache.h

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,8 @@ typedef void (*extent_sync_fn)(cache *cc,
129129
uint64 *pages_outstanding);
130130
typedef void (*page_prefetch_fn)(cache *cc, uint64 addr, page_type type);
131131
typedef int (*evict_fn)(cache *cc, bool32 ignore_pinned);
132-
typedef void (*assert_ungot_fn)(cache *cc, uint64 addr);
132+
typedef bool32 (*page_addr_pred_fn)(cache *cc, uint64 addr);
133+
typedef void (*page_addr_fn)(cache *cc, uint64 addr);
133134
typedef void (*validate_page_fn)(cache *cc, page_handle *page, uint64 addr);
134135
typedef void (*io_stats_fn)(cache *cc, uint64 *read_bytes, uint64 *write_bytes);
135136
typedef uint32 (*count_dirty_fn)(cache *cc);
@@ -168,7 +169,8 @@ typedef struct cache_ops {
168169
cache_generic_fn flush;
169170
evict_fn evict;
170171
cache_generic_fn cleanup;
171-
assert_ungot_fn assert_ungot;
172+
page_addr_pred_fn in_use;
173+
page_addr_fn assert_ungot;
172174
cache_generic_fn assert_free;
173175
validate_page_fn validate_page;
174176
cache_present_fn cache_present;
@@ -552,6 +554,20 @@ cache_cleanup(cache *cc)
552554
return cc->ops->cleanup(cc);
553555
}
554556

557+
/*
558+
*-----------------------------------------------------------------------------
559+
* cache_in_use
560+
*
561+
* Returns TRUE if there is a reader or writer holding a reference
562+
* to the page at addr.
563+
*-----------------------------------------------------------------------------
564+
*/
565+
static inline bool32
566+
cache_in_use(cache *cc, uint64 addr)
567+
{
568+
return cc->ops->in_use(cc, addr);
569+
}
570+
555571
/*
556572
*-----------------------------------------------------------------------------
557573
* cache_assert_ungot

src/clockcache.c

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1480,7 +1480,7 @@ void
14801480
clockcache_extent_discard(clockcache *cc, uint64 addr, page_type type)
14811481
{
14821482
debug_assert(addr % clockcache_extent_size(cc) == 0);
1483-
debug_assert(allocator_get_refcount(cc->al, addr) == 1);
1483+
debug_assert(allocator_get_refcount(cc->al, addr) == AL_NO_REFS);
14841484

14851485
clockcache_log(addr, 0, "hard evict extent: addr %lu\n", addr);
14861486
for (uint64 i = 0; i < cc->cfg->pages_per_extent; i++) {
@@ -1697,10 +1697,10 @@ clockcache_get_internal(clockcache *cc, // IN
16971697
refcount extent_ref_count = allocator_get_refcount(cc->al, base_addr);
16981698

16991699
// Dump allocated extents info for deeper debugging.
1700-
if (extent_ref_count <= 1) {
1700+
if (extent_ref_count == AL_FREE) {
17011701
allocator_print_allocated(cc->al);
17021702
}
1703-
debug_assert((extent_ref_count > 1),
1703+
debug_assert((extent_ref_count != AL_FREE),
17041704
"Attempt to get a buffer for page addr=%lu"
17051705
", page type=%d ('%s'),"
17061706
" from extent addr=%lu, (extent number=%lu)"
@@ -1917,10 +1917,10 @@ clockcache_get_internal_async(clockcache_get_async_state *state, uint64 depth)
19171917
allocator_get_refcount(state->cc->al, state->base_addr);
19181918

19191919
// Dump allocated extents info for deeper debugging.
1920-
if (state->extent_ref_count <= 1) {
1920+
if (state->extent_ref_count == AL_FREE) {
19211921
allocator_print_allocated(state->cc->al);
19221922
}
1923-
debug_assert((state->extent_ref_count > 1),
1923+
debug_assert((state->extent_ref_count != AL_FREE),
19241924
"Attempt to get a buffer for page addr=%lu"
19251925
", page type=%d ('%s'),"
19261926
" from extent addr=%lu, (extent number=%lu)"
@@ -2680,6 +2680,21 @@ clockcache_count_dirty(clockcache *cc)
26802680
return dirty_count;
26812681
}
26822682

2683+
bool32
2684+
clockcache_in_use(clockcache *cc, uint64 addr)
2685+
{
2686+
uint32 entry_no = clockcache_lookup(cc, addr);
2687+
if (entry_no == CC_UNMAPPED_ENTRY) {
2688+
return FALSE;
2689+
}
2690+
for (threadid thr_i = 0; thr_i < CC_RC_WIDTH; thr_i++) {
2691+
if (clockcache_get_ref(cc, entry_no, thr_i) > 0) {
2692+
return TRUE;
2693+
}
2694+
}
2695+
return clockcache_test_flag(cc, entry_no, CC_WRITELOCKED);
2696+
}
2697+
26832698
uint16
26842699
clockcache_get_read_ref(clockcache *cc, page_handle *page)
26852700
{
@@ -2896,6 +2911,13 @@ clockcache_wait_virtual(cache *c)
28962911
return clockcache_wait(cc);
28972912
}
28982913

2914+
bool32
2915+
clockcache_in_use_virtual(cache *c, uint64 addr)
2916+
{
2917+
clockcache *cc = (clockcache *)c;
2918+
return clockcache_in_use(cc, addr);
2919+
}
2920+
28992921
void
29002922
clockcache_assert_ungot_virtual(cache *c, uint64 addr)
29012923
{
@@ -3010,6 +3032,7 @@ static cache_ops clockcache_ops = {
30103032
.flush = clockcache_flush_virtual,
30113033
.evict = clockcache_evict_all_virtual,
30123034
.cleanup = clockcache_wait_virtual,
3035+
.in_use = clockcache_in_use_virtual,
30133036
.assert_ungot = clockcache_assert_ungot_virtual,
30143037
.assert_free = clockcache_assert_no_locks_held_virtual,
30153038
.print = clockcache_print_virtual,

src/core.c

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -156,8 +156,11 @@ core_set_super_block(core_handle *spl,
156156
super = (core_super_block *)super_page->data;
157157
uint64 old_root_addr = super->root_addr;
158158

159-
if (spl->trunk_context.root != NULL) {
160-
super->root_addr = spl->trunk_context.root->addr;
159+
trunk_ondisk_node_handle root_handle;
160+
trunk_init_root_handle(&spl->trunk_context, &root_handle);
161+
uint64 root_addr = trunk_ondisk_node_handle_addr(&root_handle);
162+
if (root_addr != 0) {
163+
super->root_addr = root_addr;
161164
rc = trunk_inc_ref(spl->cfg.trunk_node_cfg,
162165
spl->heap_id,
163166
spl->cc,
@@ -169,6 +172,8 @@ core_set_super_block(core_handle *spl,
169172
} else {
170173
super->root_addr = 0;
171174
}
175+
trunk_ondisk_node_handle_deinit(&root_handle);
176+
172177
if (spl->cfg.use_log) {
173178
if (spl->log) {
174179
super->log_addr = log_addr(spl->log);
@@ -1259,7 +1264,6 @@ core_lookup(core_handle *spl, key target, merge_accumulator *result)
12591264

12601265
rc = trunk_merge_lookup(
12611266
&spl->trunk_context, &root_handle, target, result, NULL);
1262-
// Release the node handle before handling any errors
12631267
trunk_ondisk_node_handle_deinit(&root_handle);
12641268
if (!SUCCESS(rc)) {
12651269
return rc;
@@ -1339,10 +1343,8 @@ core_lookup_async(core_lookup_async_state *state)
13391343
NULL,
13401344
state->callback,
13411345
state->callback_arg);
1342-
rc = async_result(&state->trunk_node_state);
1343-
1344-
// Release the node handle before handling any errors
13451346
trunk_ondisk_node_handle_deinit(&state->root_handle);
1347+
rc = async_result(&state->trunk_node_state);
13461348
if (!SUCCESS(rc)) {
13471349
async_return(state, rc);
13481350
}

0 commit comments

Comments
 (0)