Skip to content

Commit f1e4f71

Browse files
Ibrar Ahmedmason-sharp
authored andcommitted
Fix hash table iteration safety issues.
Fix hash table removal during iteration to prevent corruption. Use PostgreSQL-style single-pass removal for full flushes and two-pass approach for partial removals. Add error checking for hash_search(HASH_REMOVE) operations. Add NULL checks for HASH_FIXED_SIZE hash tables that can return NULL when full to prevent dereferencing NULL pointers. Add spinlock protection for reading counter values to prevent torn reads of 64-bit counters.
1 parent ad22c07 commit f1e4f71

File tree

4 files changed

+104
-29
lines changed

4 files changed

+104
-29
lines changed

src/spock_functions.c

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3002,8 +3002,16 @@ get_channel_stats(PG_FUNCTION_ARGS)
30023002
values[i++] = ObjectIdGetDatum(entry->key.subid);
30033003
values[i++] = ObjectIdGetDatum(entry->key.relid);
30043004

3005+
/*
3006+
* Acquire spinlock before reading counter values to prevent torn
3007+
* reads. The writer (handle_stats_counter) uses entry->mutex to
3008+
* protect counter updates, so we must use the same lock for reads
3009+
* to ensure atomic access to 64-bit counter values.
3010+
*/
3011+
SpinLockAcquire(&entry->mutex);
30053012
for (j = 0; j < SPOCK_STATS_NUM_COUNTERS; j++)
30063013
values[i++] = Int64GetDatum(entry->counter[j]);
3014+
SpinLockRelease(&entry->mutex);
30073015

30083016
tuplestore_putvalues(tupstore, tupdesc, values, nulls);
30093017
}
@@ -3031,10 +3039,19 @@ reset_channel_stats(PG_FUNCTION_ARGS)
30313039

30323040
LWLockAcquire(SpockCtx->lock, LW_EXCLUSIVE);
30333041

3034-
hash_seq_init(&hash_seq, SpockHash);
3035-
while ((entry = hash_seq_search(&hash_seq)) != NULL)
3036-
{
3037-
hash_search(SpockHash, &entry->key, HASH_REMOVE, NULL);
3042+
/*
3043+
* In principle we could reset only specific channel statistics; but that
3044+
* would be more complicated, and it's probably not worth the trouble.
3045+
* So for now, just reset all entries.
3046+
*/
3047+
hash_seq_init(&hash_seq, SpockHash);
3048+
while ((entry = hash_seq_search(&hash_seq)) != NULL)
3049+
{
3050+
if (hash_search(SpockHash,
3051+
&entry->key,
3052+
HASH_REMOVE,
3053+
NULL) == NULL)
3054+
elog(ERROR, "hash table corrupted");
30383055
}
30393056

30403057
LWLockRelease(SpockCtx->lock);

src/spock_group.c

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,19 @@ spock_group_attach(Oid dbid, Oid node_id, Oid remote_node_id)
217217

218218
entry = (SpockGroupEntry *) hash_search(SpockGroupHash, &key,
219219
HASH_ENTER, &found);
220+
221+
/*
222+
* HASH_FIXED_SIZE hash tables can return NULL when full. Check for this
223+
* to prevent dereferencing NULL pointer.
224+
*/
225+
if (entry == NULL)
226+
{
227+
elog(ERROR, "SpockGroupHash is full, cannot attach to group "
228+
"(dbid=%u, node_id=%u, remote_node_id=%u)",
229+
dbid, node_id, remote_node_id);
230+
return NULL;
231+
}
232+
220233
if (!found)
221234
{
222235
/*
@@ -357,6 +370,19 @@ spock_group_progress_update(const SpockApplyProgress *sap)
357370
entry = (SpockGroupEntry *) hash_search(SpockGroupHash, &sap->key,
358371
HASH_ENTER, &found);
359372

373+
/*
374+
* HASH_FIXED_SIZE hash tables can return NULL when full. Check for this
375+
* to prevent dereferencing NULL pointer.
376+
*/
377+
if (entry == NULL)
378+
{
379+
LWLockRelease(SpockCtx->apply_group_master_lock);
380+
elog(WARNING, "SpockGroupHash is full, cannot update progress for group "
381+
"(dbid=%u, node_id=%u, remote_node_id=%u)",
382+
sap->key.dbid, sap->key.node_id, sap->key.remote_node_id);
383+
return false;
384+
}
385+
360386
if (!found)
361387
{
362388
/*

src/spock_output_plugin.c

Lines changed: 45 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1488,8 +1488,6 @@ relmetacache_init(MemoryContext decoding_context)
14881488

14891489
if (RelMetaCache == NULL)
14901490
{
1491-
MemoryContext old_ctxt;
1492-
14931491
RelMetaCacheContext = AllocSetContextCreate(TopMemoryContext,
14941492
"spock output relmetacache",
14951493
ALLOCSET_DEFAULT_SIZES);
@@ -1502,11 +1500,9 @@ relmetacache_init(MemoryContext decoding_context)
15021500
ctl.entrysize = sizeof(struct SPKRelMetaCacheEntry);
15031501
ctl.hcxt = RelMetaCacheContext;
15041502
hash_flags |= HASH_BLOBS;
1505-
old_ctxt = MemoryContextSwitchTo(RelMetaCacheContext);
15061503
RelMetaCache = hash_create("spock relation metadata cache",
15071504
RELMETACACHE_INITIAL_SIZE,
15081505
&ctl, hash_flags);
1509-
(void) MemoryContextSwitchTo(old_ctxt);
15101506

15111507
Assert(RelMetaCache != NULL);
15121508

@@ -1530,14 +1526,11 @@ relmetacache_get_relation(struct SpockOutputData *data,
15301526
{
15311527
struct SPKRelMetaCacheEntry *hentry;
15321528
bool found;
1533-
MemoryContext old_mctx;
15341529

15351530
/* Find cached function info, creating if not found */
1536-
old_mctx = MemoryContextSwitchTo(RelMetaCacheContext);
15371531
hentry = (struct SPKRelMetaCacheEntry *) hash_search(RelMetaCache,
15381532
(void *) (&RelationGetRelid(rel)),
15391533
HASH_ENTER, &found);
1540-
(void) MemoryContextSwitchTo(old_mctx);
15411534

15421535
/* If not found or not valid, it can't be cached. */
15431536
if (!found || !hentry->is_valid)
@@ -1567,17 +1560,22 @@ relmetacache_flush(void)
15671560
HASH_SEQ_STATUS status;
15681561
struct SPKRelMetaCacheEntry *hentry;
15691562

1570-
if (RelMetaCache != NULL)
1571-
{
1572-
hash_seq_init(&status, RelMetaCache);
1563+
if (RelMetaCache == NULL)
1564+
return;
15731565

1574-
while ((hentry = (struct SPKRelMetaCacheEntry *) hash_seq_search(&status)) != NULL)
1575-
{
1576-
if (hash_search(RelMetaCache,
1577-
(void *) &hentry->relid,
1578-
HASH_REMOVE, NULL) == NULL)
1579-
elog(ERROR, "hash table corrupted");
1580-
}
1566+
/*
1567+
* In principle we could flush only cache entries relating to specific
1568+
* relations; but that would be more complicated, and it's probably not
1569+
* worth the trouble. So for now, just flush all entries.
1570+
*/
1571+
hash_seq_init(&status, RelMetaCache);
1572+
while ((hentry = (struct SPKRelMetaCacheEntry *) hash_seq_search(&status)) != NULL)
1573+
{
1574+
if (hash_search(RelMetaCache,
1575+
(void *) &hentry->relid,
1576+
HASH_REMOVE,
1577+
NULL) == NULL)
1578+
elog(ERROR, "hash table corrupted");
15811579
}
15821580
}
15831581

@@ -1592,27 +1590,49 @@ relmetacache_prune(void)
15921590
{
15931591
HASH_SEQ_STATUS status;
15941592
struct SPKRelMetaCacheEntry *hentry;
1593+
Oid *relids_to_remove;
1594+
int num_entries;
1595+
int i;
1596+
int idx;
15951597

15961598
/*
1597-
* Since the pruning can be expensive, do it only if ig we invalidated at
1599+
* Since the pruning can be expensive, do it only if we invalidated at
15981600
* least half of initial cache size.
15991601
*/
16001602
if (InvalidRelMetaCacheCnt < RELMETACACHE_INITIAL_SIZE / 2)
16011603
return;
16021604

1603-
hash_seq_init(&status, RelMetaCache);
1605+
/*
1606+
* Cannot call hash_search(HASH_REMOVE) during hash_seq_search iteration
1607+
* because removing entries corrupts the iterator state (bucket pointers,
1608+
* freed memory), causing crashes or skipped entries. Use two-pass
1609+
* approach: collect relids of invalid entries, then remove.
1610+
*/
1611+
num_entries = hash_get_num_entries(RelMetaCache);
1612+
if (num_entries == 0)
1613+
return;
1614+
1615+
relids_to_remove = (Oid *) palloc(num_entries * sizeof(Oid));
1616+
idx = 0;
16041617

1618+
/* First pass: collect invalid entries */
1619+
hash_seq_init(&status, RelMetaCache);
16051620
while ((hentry = (struct SPKRelMetaCacheEntry *) hash_seq_search(&status)) != NULL)
16061621
{
16071622
if (!hentry->is_valid)
1608-
{
1609-
if (hash_search(RelMetaCache,
1610-
(void *) &hentry->relid,
1611-
HASH_REMOVE, NULL) == NULL)
1612-
elog(ERROR, "hash table corrupted");
1613-
}
1623+
relids_to_remove[idx++] = hentry->relid;
1624+
}
1625+
1626+
/* Second pass: remove entries */
1627+
for (i = 0; i < idx; i++)
1628+
{
1629+
if (hash_search(RelMetaCache,
1630+
(void *) &relids_to_remove[i],
1631+
HASH_REMOVE, NULL) == NULL)
1632+
elog(ERROR, "hash table corrupted");
16141633
}
16151634

1635+
pfree(relids_to_remove);
16161636
InvalidRelMetaCacheCnt = 0;
16171637
}
16181638

src/spock_worker.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1018,6 +1018,18 @@ handle_stats_counter(Relation relation, Oid subid, spockStatsType typ, int ntup)
10181018

10191019
entry = (spockStatsEntry *) hash_search(SpockHash, &key,
10201020
HASH_ENTER, &found);
1021+
1022+
/*
1023+
* HASH_FIXED_SIZE hash tables can return NULL when full. Check for
1024+
* this to prevent dereferencing NULL pointer.
1025+
*/
1026+
if (entry == NULL)
1027+
{
1028+
LWLockRelease(SpockCtx->lock);
1029+
elog(WARNING, "SpockHash is full, cannot add stats entry");
1030+
spock_stats_hash_full = true;
1031+
return;
1032+
}
10211033
}
10221034

10231035
if (!found)

0 commit comments

Comments
 (0)