Skip to content
This repository was archived by the owner on Aug 11, 2025. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 45 additions & 5 deletions classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import numpy as np
import matplotlib.pyplot as plt
import pickle
import plotly.graph_objects as go

from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
Expand All @@ -26,12 +27,12 @@

DEFAULT_FEATURES = [
"percent_redundant_boost",
"difflib_rewards",
"difflib_slot",
"difflib_slot_rev",
"spearman_correlation",
"norm_reward",
]

DEFAULT_GRAFFITI_ONLY = ["Lodestar"]
DEFAULT_GRAFFITI_ONLY = ["Grandine", "Lodestar"] # too hard rn

VIABLE_FEATURES = [
"percent_redundant_boost",
Expand Down Expand Up @@ -183,7 +184,7 @@ def plot_feature_matrix(self, output_path):
z = self.feature_matrix[:, 2]

scatter = ax.scatter(
x, y, z, c=self.training_labels, marker=".", alpha=0.25, cmap="Set1"
x, y, z, c=self.training_labels, marker=".", alpha=0.25, cmap="Dark2"
)

handles, _ = scatter.legend_elements()
Expand All @@ -204,6 +205,45 @@ def plot_feature_matrix(self, output_path):
else:
fig.savefig(output_path)

def plot_feature_matrix_interactive(self, output_path):
int_to_client_name = {i: client for (i, client) in enumerate(CLIENTS)}
text = [int_to_client_name[i] for i in self.training_labels]

fig = go.Figure(
data=go.Scatter3d(
x=self.feature_matrix[:, 0],
y=self.feature_matrix[:, 1],
z=self.feature_matrix[:, 2],
mode="markers",
marker=dict(
size=5,
color=self.training_labels,
colorscale="ylgnbu",
opacity=0.8,
colorbar=dict(labelalias=int_to_client_name),
),
text=text, # hover text
hovertemplate="%{text}<br>"
+ f"{self.features[0]}: %{{x}}<br>"
+ f"{self.features[1]}: %{{y}}<br>"
+ f"{self.features[2]}: %{{z}}<extra></extra>",
)
)

fig.update_layout(
scene=dict(
xaxis_title=self.features[0],
yaxis_title=self.features[1],
zaxis_title=self.features[2],
),
title="3D Feature Matrix",
)

if output_path is None:
fig.show()
else:
fig.write_html(output_path) # Creates interactive HTML file


def compute_guess_list(probability_map, enabled_clients) -> list:
guesses = []
Expand Down Expand Up @@ -364,7 +404,7 @@ def main():
)

if args.plot is not None:
classifier.plot_feature_matrix(args.plot)
classifier.plot_feature_matrix_interactive(args.plot)
print("plot of training data written to {}".format(args.plot))

frequency_map = {}
Expand Down
39 changes: 39 additions & 0 deletions feature_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,32 @@ def feat_spearman_correlation(block_reward):
).correlation


def feat_spearman_correlation_lodestar(block_reward):
"""Spearman correlation coefficient for the per attestation rewards vs their sorted version

This variant sorts by total_rewards / inclusion distance, which is what Lodestar uses.
"""
per_attestation_rewards = block_reward["attestation_rewards"][
"per_attestation_rewards"
]
slot = int(block_reward["meta"]["slot"])
attestation_data = block_reward["attestation_rewards"].get("attestations") or []
inclusion_distances = [int(att["slot"]) - slot for att in attestation_data]
attestation_totals = [
sum(rewards.values()) / inclusion_distances[i]
for i, rewards in enumerate(per_attestation_rewards)
]
sorted_attestation_totals = sorted(attestation_totals)
# Spearman coefficient isn't defined for uniform/constant sequences, so we just default
# that to 1.0
if attestation_totals == sorted_attestation_totals:
return 1.0
else:
return scipy.stats.spearmanr(
attestation_totals, sorted_attestation_totals
).correlation


def feat_total_reward(block_reward):
total_reward = block_reward["attestation_rewards"]["total"]
return total_reward
Expand Down Expand Up @@ -134,6 +160,17 @@ def feat_median_density(block_reward):
return safe_median(densities)


def feat_median_density_electra(block_reward):
per_attestation_rewards = block_reward["attestation_rewards"][
"per_attestation_rewards"
]
densities = [
len(rewards) / (32 * TARGET_COMMITTEE_SIZE)
for rewards in per_attestation_rewards
]
return safe_median(densities)


def feat_mean_density(block_reward):
per_attestation_rewards = block_reward["attestation_rewards"][
"per_attestation_rewards"
Expand Down Expand Up @@ -206,11 +243,13 @@ def f(block_reward):
lambda x: (x[0], x[3]), reverse=True
),
"spearman_correlation": feat_spearman_correlation,
"spearman_correlation_lodestar": feat_spearman_correlation_lodestar,
"reward": feat_total_reward,
"norm_reward": feat_total_reward_norm,
"norm_reward_per_slot": scale_by_num_slots(feat_total_reward_norm),
"reward_per_attestation": scale_by_num_attestations(feat_total_reward),
"median_density": feat_median_density,
"median_density_electra": feat_median_density_electra,
"mean_density": feat_mean_density,
"num_single_bit": feat_num_single_bit,
"percent_single_bit": scale_by_num_attestations(feat_num_single_bit),
Expand Down
3 changes: 2 additions & 1 deletion prepare_training_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
CLIENTS = ["Grandine", "Lighthouse", "Lodestar", "Nimbus", "Other", "Prysm", "Teku"]

REGEX_PATTERNS = {
"Grandine": [],
"Grandine": [r".*[Gg]randine.*"],
"Lighthouse": [r".*[Ll]ighthouse", r"RP-[A-Z]?L v[0-9]*\.[0-9]*\.[0-9]*.*"],
"Teku": [r".*[Tt]eku", r"RP-[A-Z]?T v[0-9]*\.[0-9]*\.[0-9]*.*"],
"Nimbus": [r".*[Nn]imbus", r"RP-[A-Z]?N v[0-9]*\.[0-9]*\.[0-9]*.*"],
Expand Down Expand Up @@ -62,6 +62,7 @@ def process_file(
raw_data_dir: str, proc_data_dir: str, disabled_clients: list[str], file_name: str
) -> None:
with open(os.path.join(raw_data_dir, file_name), "r") as f:
print(f"Processing {file_name}")
rewards = json.load(f)

res = classify_rewards_by_graffiti(rewards, disabled_clients=disabled_clients)
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ sseclient-py==1.8.0
gunicorn==21.2.0
matplotlib==3.8.0
scipy==1.11.3
plotly==6.1.2