Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -140,3 +140,66 @@ jobs:
with:
name: cloudsql-bin-${{ matrix.compiler }}
path: build/cloudSQL

performance-benchmarks:
needs: style-check
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- name: Install dependencies
run: |
sudo apt-get update
sudo apt-get install -y cmake clang ninja-build ccache python3

- name: Configure CMake (Release)
run: |
mkdir build
cd build
cmake .. -G Ninja \
-DCMAKE_BUILD_TYPE=Release \
-DBUILD_BENCHMARKS=ON \
-DBUILD_TESTS=OFF

- name: Build Benchmarks
run: |
cd build
ninja storage_bench execution_bench network_bench

- name: Restore Performance Baseline
id: restore-baseline
uses: actions/cache/restore@v4
with:
path: build/baseline.json
key: perf-baseline-${{ runner.os }}-main

- name: Run Benchmarks
run: |
cd build
./storage_bench --benchmark_format=json > storage.json
./execution_bench --benchmark_format=json > execution.json
./network_bench --benchmark_format=json > network.json

# Merge results into one current.json
python3 -c "import json; s=json.load(open('storage.json')); e=json.load(open('execution.json')); n=json.load(open('network.json')); s['benchmarks'].extend(e['benchmarks']); s['benchmarks'].extend(n['benchmarks']); json.dump(s, open('current.json', 'w'))"

- name: Check for Performance Regressions
run: |
if [ -f build/baseline.json ]; then
python3 scripts/check_perf_regression.py build/current.json build/baseline.json 0.20
else
echo "No baseline found to compare against."
fi

- name: Save New Baseline
if: github.ref == 'refs/heads/main'
uses: actions/cache/save@v4
with:
path: build/current.json
key: perf-baseline-${{ runner.os }}-main-${{ github.sha }}

- name: Upload Current Results
uses: actions/upload-artifact@v4
with:
name: performance-results
path: build/current.json
96 changes: 46 additions & 50 deletions benchmarks/execution_bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,32 +29,30 @@ static void SetupBenchTable(HeapTable& table, int num_rows) {

static void BM_ExecutionSeqScan(benchmark::State& state) {
std::string test_dir = "./bench_exec_scan_" + std::to_string(state.range(0));
std::filesystem::remove_all(test_dir);
std::filesystem::create_directories(test_dir);
StorageManager disk_manager(test_dir);
BufferPoolManager bpm(2000, disk_manager);

Schema schema;
schema.add_column("id", common::ValueType::TYPE_INT64);
schema.add_column("data", common::ValueType::TYPE_TEXT);

for (auto _ : state) {
state.PauseTiming();
auto table = std::make_unique<HeapTable>("scan_table", bpm, schema);
{
StorageManager disk_manager(test_dir);
BufferPoolManager bpm(2000, disk_manager);

Schema schema;
schema.add_column("id", common::ValueType::TYPE_INT64);
schema.add_column("data", common::ValueType::TYPE_TEXT);

auto table = std::make_shared<HeapTable>("scan_table", bpm, schema);
table->create();
SetupBenchTable(*table, state.range(0));
state.ResumeTiming();

auto scan_op = std::make_unique<SeqScanOperator>(std::move(table));
scan_op->init();
Tuple tuple;
while (scan_op->next(tuple)) {
benchmark::DoNotOptimize(tuple);
for (auto _ : state) {
auto scan_op = std::make_unique<SeqScanOperator>(table);
scan_op->init();
scan_op->open();
Tuple tuple;
while (scan_op->next(tuple)) {
benchmark::DoNotOptimize(tuple);
}
}

state.PauseTiming();
std::filesystem::remove_all(test_dir);
std::filesystem::create_directories(test_dir);
state.ResumeTiming();
}

state.SetItemsProcessed(state.iterations() * state.range(0));
Expand All @@ -64,45 +62,43 @@ BENCHMARK(BM_ExecutionSeqScan)->Arg(1000)->Arg(10000);

static void BM_ExecutionHashJoin(benchmark::State& state) {
std::string test_dir = "./bench_exec_join_" + std::to_string(state.range(0));
std::filesystem::remove_all(test_dir);
std::filesystem::create_directories(test_dir);
StorageManager disk_manager(test_dir);
BufferPoolManager bpm(4000, disk_manager);

Schema schema;
schema.add_column("id", common::ValueType::TYPE_INT64);
schema.add_column("data", common::ValueType::TYPE_TEXT);

for (auto _ : state) {
state.PauseTiming();
auto left_table = std::make_unique<HeapTable>("left_table", bpm, schema);
{
StorageManager disk_manager(test_dir);
BufferPoolManager bpm(4000, disk_manager);

Schema schema;
schema.add_column("id", common::ValueType::TYPE_INT64);
schema.add_column("data", common::ValueType::TYPE_TEXT);

auto left_table = std::make_shared<HeapTable>("left_table", bpm, schema);
left_table->create();
SetupBenchTable(*left_table, state.range(0));

auto right_table = std::make_unique<HeapTable>("right_table", bpm, schema);
auto right_table = std::make_shared<HeapTable>("right_table", bpm, schema);
right_table->create();
SetupBenchTable(*right_table, state.range(0));
state.ResumeTiming();

auto left_scan = std::make_unique<SeqScanOperator>(std::move(left_table));
auto right_scan = std::make_unique<SeqScanOperator>(std::move(right_table));

// Join on "id"
auto left_key = std::make_unique<ColumnExpr>("id");
auto right_key = std::make_unique<ColumnExpr>("id");

auto join_op = std::make_unique<HashJoinOperator>(
std::move(left_scan), std::move(right_scan), std::move(left_key), std::move(right_key));

join_op->init();
Tuple tuple;
while (join_op->next(tuple)) {
benchmark::DoNotOptimize(tuple);
for (auto _ : state) {
auto left_scan = std::make_unique<SeqScanOperator>(left_table);
auto right_scan = std::make_unique<SeqScanOperator>(right_table);

// Join on "id"
auto left_key = std::make_unique<ColumnExpr>("id");
auto right_key = std::make_unique<ColumnExpr>("id");

auto join_op = std::make_unique<HashJoinOperator>(
std::move(left_scan), std::move(right_scan), std::move(left_key), std::move(right_key));

join_op->init();
join_op->open();
Tuple tuple;
while (join_op->next(tuple)) {
benchmark::DoNotOptimize(tuple);
}
}

state.PauseTiming();
std::filesystem::remove_all(test_dir);
std::filesystem::create_directories(test_dir);
state.ResumeTiming();
}

state.SetItemsProcessed(state.iterations() * state.range(0));
Expand Down
11 changes: 11 additions & 0 deletions include/common/value.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ class Value {
[[nodiscard]] ValueType type() const { return type_; }
[[nodiscard]] bool is_null() const { return type_ == ValueType::TYPE_NULL; }
[[nodiscard]] bool is_numeric() const;
[[nodiscard]] bool is_integer() const;
[[nodiscard]] bool is_float() const;

[[nodiscard]] bool as_bool() const;
[[nodiscard]] int8_t as_int8() const;
Expand Down Expand Up @@ -184,6 +186,15 @@ inline bool Value::is_numeric() const {
type_ == ValueType::TYPE_DECIMAL;
}

inline bool Value::is_integer() const {
return type_ == ValueType::TYPE_INT8 || type_ == ValueType::TYPE_INT16 ||
type_ == ValueType::TYPE_INT32 || type_ == ValueType::TYPE_INT64;
}

inline bool Value::is_float() const {
return type_ == ValueType::TYPE_FLOAT32 || type_ == ValueType::TYPE_FLOAT64;
}

// Accessors
inline bool Value::as_bool() const {
if (type_ != ValueType::TYPE_BOOL) {
Expand Down
9 changes: 5 additions & 4 deletions include/executor/operator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,12 +105,13 @@ class Operator {
class SeqScanOperator : public Operator {
private:
std::string table_name_;
std::unique_ptr<storage::HeapTable> table_;
std::shared_ptr<storage::HeapTable> table_;
std::unique_ptr<storage::HeapTable::Iterator> iterator_;

Schema schema_;

public:
explicit SeqScanOperator(std::unique_ptr<storage::HeapTable> table, Transaction* txn = nullptr,
explicit SeqScanOperator(std::shared_ptr<storage::HeapTable> table, Transaction* txn = nullptr,
LockManager* lock_manager = nullptr);
Comment on lines +114 to 115
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Find remaining scan/index operator call sites that still create HeapTable with unique_ptr.
rg -n -C3 'make_unique<storage::HeapTable>|make_shared<storage::HeapTable>|SeqScanOperator|IndexScanOperator' src include

Repository: poyrazK/cloudSQL

Length of output: 9868


Three uncompiled call sites still pass std::unique_ptr<storage::HeapTable> to operators expecting std::shared_ptr.

The signature change to SeqScanOperator and IndexScanOperator requires all callers to pass std::make_shared<storage::HeapTable> instead of std::make_unique. Three locations in src/executor/query_executor.cpp still need updating:

  1. Lines 789–793 (IndexScanOperator)
  2. Lines 805–807 (SeqScanOperator, first join table)
  3. Lines 851–853 (SeqScanOperator, second join table)

Replace all three instances of std::make_unique<storage::HeapTable>(...) with std::make_shared<storage::HeapTable>(...) to resolve the compilation errors.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@include/executor/operator.hpp` around lines 114 - 115, The call sites still
construct HeapTable with std::make_unique but the operator constructors now take
std::shared_ptr; update the three locations that construct a HeapTable for
IndexScanOperator and SeqScanOperator to use
std::make_shared<storage::HeapTable>(...) instead of
std::make_unique<storage::HeapTable>(...), ensuring the produced shared_ptr is
passed to the IndexScanOperator and SeqScanOperator constructors (refer to the
IndexScanOperator and SeqScanOperator calls around the join setup in
query_executor.cpp).


bool init() override;
Expand Down Expand Up @@ -153,15 +154,15 @@ class IndexScanOperator : public Operator {
private:
std::string table_name_;
std::string index_name_;
std::unique_ptr<storage::HeapTable> table_;
std::shared_ptr<storage::HeapTable> table_;
std::unique_ptr<storage::BTreeIndex> index_;
common::Value search_key_;
std::vector<storage::HeapTable::TupleId> matching_ids_;
size_t current_match_index_ = 0;
Schema schema_;

public:
IndexScanOperator(std::unique_ptr<storage::HeapTable> table,
IndexScanOperator(std::shared_ptr<storage::HeapTable> table,
std::unique_ptr<storage::BTreeIndex> index, common::Value search_key,
Transaction* txn = nullptr, LockManager* lock_manager = nullptr);

Expand Down
1 change: 1 addition & 0 deletions include/storage/heap_table.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ class HeapTable {
std::string filename_;
BufferPoolManager& bpm_;
executor::Schema schema_;
uint32_t last_page_id_ = 0;

public:
/**
Expand Down
15 changes: 9 additions & 6 deletions include/storage/lru_replacer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,7 @@
#ifndef CLOUDSQL_STORAGE_LRU_REPLACER_HPP
#define CLOUDSQL_STORAGE_LRU_REPLACER_HPP

#include <list>
#include <mutex>
#include <unordered_map>
#include <vector>

namespace cloudsql::storage {
Expand All @@ -17,8 +15,9 @@ namespace cloudsql::storage {
* @class LRUReplacer
* @brief Tracks page usage and determines which page to evict
*
* Implements a thread-safe LRU policy. Pages that are pinned are
* removed from the replacer. When unpinned, they are added back.
* Implements a CLOCK (Second Chance) replacement policy.
* This implementation is zero-allocation during hot-path (pin/unpin)
* by using fixed-size bitsets.
*/
class LRUReplacer {
public:
Expand Down Expand Up @@ -64,8 +63,12 @@ class LRUReplacer {
private:
size_t capacity_;
mutable std::mutex latch_;
std::list<uint32_t> lru_list_;
std::unordered_map<uint32_t, std::list<uint32_t>::iterator> lru_map_;

// CLOCK State
std::vector<bool> in_replacer_; // true if frame is a candidate for eviction
std::vector<bool> referenced_; // "Second chance" bit
size_t clock_hand_ = 0;
size_t current_size_ = 0;
};

} // namespace cloudsql::storage
Expand Down
72 changes: 72 additions & 0 deletions scripts/check_perf_regression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#!/usr/bin/env python3
import json
import sys
import os

def check_regression(current_file, baseline_file, threshold=0.2):
# Load current results - failure here is a fatal error
try:
with open(current_file) as f:
current = json.load(f)
except Exception as e:
print(f"Error loading current performance results from {current_file}: {e}")
return False

# Load baseline results - missing file is handled gracefully
try:
with open(baseline_file) as f:
baseline = json.load(f)
except FileNotFoundError:
print(f"No baseline found at {baseline_file}. Skipping comparison.")
return True
except Exception as e:
print(f"Error loading baseline performance results from {baseline_file}: {e}")
return False

regressions = []

# Create map of baseline metrics
base_map = {b['name']: b.get('real_time') for b in baseline['benchmarks']}

print(f"{'Benchmark':<40} | {'Old (ns)':<12} | {'New (ns)':<12} | {'Change':<10}")
print("-" * 85)

for b in current['benchmarks']:
name = b['name']
if name in base_map:
old_time = base_map[name]
new_time = b.get('real_time')

if old_time is None or new_time is None:
print(f"{name:<40} | {'N/A':<12} | {'N/A':<12} | {'N/A':>9}")
continue

# Guard against division by zero
if old_time <= 0:
print(f"{name:<40} | {old_time:<12.2f} | {new_time:<12.2f} | {'NEW/ZERO':>9}")
continue

# Increase in time means decrease in performance
change = (new_time - old_time) / old_time
print(f"{name:<40} | {old_time:<12.2f} | {new_time:<12.2f} | {change:>+9.1%}")

if change > threshold:
regressions.append(f"{name} regressed by {change:.1%}")

if regressions:
print("\n!!! PERFORMANCE REGRESSION DETECTED !!!")
for r in regressions:
print(f" - {r}")
return False

print("\nPerformance is within acceptable limits.")
return True

if __name__ == "__main__":
if len(sys.argv) < 3:
print("Usage: check_perf_regression.py <current.json> <baseline.json> [threshold]")
sys.exit(1)

thresh = float(sys.argv[3]) if len(sys.argv) > 3 else 0.2
if not check_regression(sys.argv[1], sys.argv[2], thresh):
sys.exit(1)
5 changes: 2 additions & 3 deletions src/executor/operator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,7 @@
namespace cloudsql::executor {

/* --- SeqScanOperator --- */

SeqScanOperator::SeqScanOperator(std::unique_ptr<storage::HeapTable> table, Transaction* txn,
SeqScanOperator::SeqScanOperator(std::shared_ptr<storage::HeapTable> table, Transaction* txn,
LockManager* lock_manager)
: Operator(OperatorType::SeqScan, txn, lock_manager),
table_name_(table->table_name()),
Expand Down Expand Up @@ -131,7 +130,7 @@ Schema& BufferScanOperator::output_schema() {

/* --- IndexScanOperator --- */

IndexScanOperator::IndexScanOperator(std::unique_ptr<storage::HeapTable> table,
IndexScanOperator::IndexScanOperator(std::shared_ptr<storage::HeapTable> table,
std::unique_ptr<storage::BTreeIndex> index,
common::Value search_key, Transaction* txn,
LockManager* lock_manager)
Expand Down
2 changes: 1 addition & 1 deletion src/executor/query_executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -787,7 +787,7 @@ std::unique_ptr<Operator> QueryExecutor::build_plan(const parser::SelectStatemen
col_name) {
common::ValueType ktype = base_table_meta->columns[pos].type;
current_root = std::make_unique<IndexScanOperator>(
std::make_unique<storage::HeapTable>(base_table_name, bpm_,
std::make_shared<storage::HeapTable>(base_table_name, bpm_,
base_schema),
std::make_unique<storage::BTreeIndex>(idx_info.name, bpm_,
ktype),
Expand Down
Loading
Loading