Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4,370 changes: 2,400 additions & 1,970 deletions Cargo.lock

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -176,9 +176,10 @@ members = [
"rs/ic_os/guest_upgrade/shared",
"rs/ic_os/guest_upgrade/tests",
"rs/ic_os/linux_kernel_command_line",
"rs/ic_os/metrics/fstrim_tool",
"rs/ic_os/metrics/metrics_tool",
"rs/ic_os/os_tools/fstrim_tool",
"rs/ic_os/metrics/custom_metrics",
"rs/ic_os/metrics/nft_exporter",
"rs/ic_os/metrics/utils",
"rs/ic_os/networking/deterministic_ips",
"rs/ic_os/networking/network",
"rs/ic_os/networking/nss_icos",
Expand Down
6 changes: 3 additions & 3 deletions ic-os/components/guestos.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def component_files(mode):
Label("monitoring/guestos/ipv4-connectivity-check/ipv4-connectivity-check.timer"): "/etc/systemd/system/ipv4-connectivity-check.timer",
Label("monitoring/guestos/fstrim/fstrim_tool.service"): "/etc/systemd/system/fstrim_tool.service",
Label("monitoring/guestos/fstrim/fstrim_tool.timer"): "/etc/systemd/system/fstrim_tool.timer",
Label("monitoring/guestos/fstrim/setup-fstrim-metrics.service"): "/etc/systemd/system/setup-fstrim-metrics.service",
Label("monitoring/guestos/fstrim/setup-fstrim-tool.service"): "/etc/systemd/system/setup-fstrim-tool.service",
Label("monitoring/guestos/nft-exporter/nft-exporter.service"): "/etc/systemd/system/nft-exporter.service",
Label("monitoring/guestos/nft-exporter/nft-exporter.timer"): "/etc/systemd/system/nft-exporter.timer",
Label("monitoring/guestos/custom-metrics.sh"): "/opt/ic/bin/custom-metrics.sh",
Expand All @@ -104,8 +104,8 @@ def component_files(mode):
Label("monitoring/guestos/boot-metrics/boot-metrics.timer"): "/etc/systemd/system/boot-metrics.timer",
Label("monitoring/guestos/boot-logging/log-boot-failure.service"): "/etc/systemd/system/log-boot-failure.service",
Label("monitoring/guestos/boot-logging/log-boot-success.service"): "/etc/systemd/system/log-boot-success.service",
Label("monitoring/guestos/metrics_tool.service"): "/etc/systemd/system/metrics_tool.service",
Label("monitoring/guestos/metrics_tool.timer"): "/etc/systemd/system/metrics_tool.timer",
Label("monitoring/guestos/custom_metrics.service"): "/etc/systemd/system/custom_metrics.service",
Label("monitoring/guestos/custom_metrics.timer"): "/etc/systemd/system/custom_metrics.timer",
Label("monitoring/node_exporter/node_exporter.crt"): "/etc/node_exporter/node_exporter.crt",
Label("monitoring/node_exporter/node_exporter.key"): "/etc/node_exporter/node_exporter.key",
Label("monitoring/node_exporter/web.yml"): "/etc/node_exporter/web.yml",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ Before=node_exporter.service

[Service]
Type=oneshot
ExecStart=/opt/ic/bin/metrics_tool --metrics /run/node_exporter/collector_textfile/custom_metrics.prom
ExecStart=/opt/ic/bin/custom_metrics --metrics /run/node_exporter/collector_textfile/custom_metrics.prom

# Disable systemd start and stop logs
LogLevelMax=1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ Description=Collect custom metrics every minute
[Timer]
OnBootSec=60s
OnUnitActiveSec=60s
Unit=metrics_tool.service
Unit=custom_metrics.service

[Install]
WantedBy=timers.target
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[Unit]
Description=Discard unused blocks on /var/lib/ic/crypto filesystem
After=setup-fstrim-metrics.service
Wants=setup-fstrim-metrics.service
After=setup-fstrim-tool.service
Wants=setup-fstrim-tool.service

[Service]
Type=oneshot
Expand Down
2 changes: 1 addition & 1 deletion ic-os/guestos/defs.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def image_deps(mode, malicious = False):
"//rs/ic_os/release:vsock_guest": "/opt/ic/bin/vsock_guest:0755", # HostOS <--> GuestOS communication client.
"//cpp:infogetty": "/opt/ic/bin/infogetty:0755", # Terminal manager that replaces the login shell.
"//rs/ic_os/release:metrics-proxy": "/opt/ic/bin/metrics-proxy:0755", # Proxies, filters, and serves public node metrics.
"//rs/ic_os/release:metrics_tool": "/opt/ic/bin/metrics_tool:0755", # Collects and reports custom metrics.
"//rs/ic_os/release:custom_metrics": "/opt/ic/bin/custom_metrics:0755", # Collects and reports custom metrics.
"//rs/ic_os/remote_attestation/server": "/opt/ic/bin/remote_attestation_server:0755", # Remote Attestation service
"//rs/ic_os/guest_upgrade/client": "/opt/ic/bin/guest_upgrade_client:0755", # Disk encryption key exchange client

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@ load("@rules_rust//rust:defs.bzl", "rust_binary")
package(default_visibility = ["//rs:ic-os-pkg"])

rust_binary(
name = "metrics_tool_bin",
name = "custom_metrics_bin",
srcs = ["src/main.rs"],
deps = [
# Keep sorted.
"//rs/sys",
"//rs/ic_os/metrics/utils:metrics_utils",
"@crate_index//:anyhow",
"@crate_index//:clap",
"@crate_index//:prometheus",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
[package]
name = "ic-metrics-tool"
name = "ic-custom-metrics"
version = "0.1.0"
edition.workspace = true

[[bin]]
name = "metrics_tool"
name = "custom_metrics"
path = "src/main.rs"

[dependencies]
anyhow = { workspace = true }
clap = { workspace = true }
prometheus = { workspace = true }
ic-os-metrics-utils = { path = "../utils" }
prometheus = { workspace = true }
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
use anyhow::{Context, Result};
use clap::Parser;
use prometheus::{Encoder, IntGaugeVec, Opts, Registry, TextEncoder};

use ic_os_metrics_utils::write_registry_to_file;
use prometheus::{IntGauge, Opts, Registry};
use std::fs::File;
use std::io::{self, BufRead, BufWriter, Write};
use std::io::{self, BufRead};
use std::path::{Path, PathBuf};

const INTERRUPT_FILTER: &str = "TLB shootdowns";
Expand Down Expand Up @@ -50,31 +50,16 @@ pub fn main() -> Result<()> {
let tlb_shootdowns = get_sum_tlb_shootdowns()?;

let registry = Registry::new();
let gauge = IntGaugeVec::new(
Opts::new(TLB_SHOOTDOWN_METRIC_NAME, TLB_SHOOTDOWN_METRIC_ANNOTATION),
&[],
)
let gauge = IntGauge::with_opts(Opts::new(
TLB_SHOOTDOWN_METRIC_NAME,
TLB_SHOOTDOWN_METRIC_ANNOTATION,
))
.context("Failed to create gauge")?;
gauge.set(tlb_shootdowns);

registry
.register(Box::new(gauge.clone()))
.register(Box::new(gauge))
.context("Failed to register gauge")?;

// Set the metric value
gauge.with_label_values::<&str>(&[]).set(tlb_shootdowns);

// Write metrics to file
let mut file = BufWriter::new(File::create(&opts.metrics_filename).with_context(|| {
format!(
"Failed to create metrics file: {}",
opts.metrics_filename.display()
)
})?);
TextEncoder::new()
.encode(&registry.gather(), &mut file)
.context("Failed to encode metrics")?;

file.flush().context("Failed to flush metrics file")?;

Ok(())
write_registry_to_file(&registry, &opts.metrics_filename)
}
5 changes: 4 additions & 1 deletion rs/ic_os/metrics/nft_exporter/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,13 @@ load("@rules_rust//rust:defs.bzl", "rust_binary")
package(default_visibility = ["//rs:ic-os-pkg"])

DEPENDENCIES = [
# Keep sorted.
"//rs/ic_os/metrics/utils:metrics_utils",
"@crate_index//:anyhow",
"@crate_index//:clap",
"@crate_index//:serde_json",
"@crate_index//:prometheus",
"@crate_index//:serde",
"@crate_index//:serde_json",
]

rust_binary(
Expand Down
2 changes: 2 additions & 0 deletions rs/ic_os/metrics/nft_exporter/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,7 @@ edition.workspace = true
[dependencies]
anyhow = { workspace = true }
clap = { workspace = true }
ic-os-metrics-utils = { path = "../utils" }
prometheus = { workspace = true }
serde = { workspace = true }
serde_json = "1.0"
41 changes: 14 additions & 27 deletions rs/ic_os/metrics/nft_exporter/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
use anyhow::{Context, Error};
use clap::Parser;
use ic_os_metrics_utils::write_registry_to_file;
use prometheus::{IntCounter, Registry};
use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::fs::File;
use std::io::Write;
use std::path::PathBuf;
use std::process::Command;

Expand All @@ -29,17 +29,6 @@ struct Counter {
bytes: u32,
}

impl Counter {
fn to_metric_str(&self) -> String {
format!(
"# HELP {} Total number of packets the corresponding rule has been applied to.\n\
# TYPE {} counter\n\
{} {:?}",
self.name, self.name, self.name, self.packets,
)
}
}

fn get_nft_json_ruleset() -> Result<Value, Error> {
let mut cmd = Command::new("nft");
cmd.args(["--json", "list", "ruleset"]);
Expand Down Expand Up @@ -73,25 +62,23 @@ fn get_counters(json_nft_ruleset: &Value) -> Result<Vec<Counter>, Error> {
fn main() -> Result<(), Error> {
let cli = Cli::parse();

// Get the current nft ruleset in JSON format
let json_ruleset = get_nft_json_ruleset().context("Failed to get JSON ruleset")?;

// Extract the counters from the JSON ruleset
let counters = get_counters(&json_ruleset).context("Failed to get the counters")?;

// Turn the counters into prometheus metrics and write it to a file
let mut metrics = Vec::new();
for counter in counters {
metrics.push(counter.to_metric_str());
let registry = Registry::new();
for counter in &counters {
let prom_counter = IntCounter::new(
&counter.name,
"Total number of packets the corresponding rule has been applied to.",
)
.with_context(|| format!("Failed to create counter for '{}'", counter.name))?;
prom_counter.inc_by(counter.packets as u64);
registry
.register(Box::new(prom_counter))
.with_context(|| format!("Failed to register counter for '{}'", counter.name))?;
}

let mut metrics_str = metrics.join("\n");
metrics_str.push('\n');

let mut file = File::create(cli.metrics_file)?;
file.write_all(metrics_str.as_bytes())?;

Ok(())
write_registry_to_file(&registry, &cli.metrics_file)
}

#[cfg(test)]
Expand Down
17 changes: 17 additions & 0 deletions rs/ic_os/metrics/utils/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
load("@rules_rust//rust:defs.bzl", "rust_library")

package(default_visibility = ["//rs:ic-os-pkg"])

DEPENDENCIES = [
# Keep sorted.
"//rs/sys",
"@crate_index//:anyhow",
"@crate_index//:prometheus",
]

rust_library(
name = "metrics_utils",
srcs = glob(["src/**/*.rs"]),
crate_name = "ic_os_metrics_utils",
deps = DEPENDENCIES,
)
9 changes: 9 additions & 0 deletions rs/ic_os/metrics/utils/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[package]
name = "ic-os-metrics-utils"
version = "0.1.0"
edition.workspace = true

[dependencies]
anyhow = { workspace = true }
ic-sys = { path = "../../../sys" }
prometheus = { workspace = true }
18 changes: 18 additions & 0 deletions rs/ic_os/metrics/utils/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
use anyhow::{Context, Result};
use ic_sys::fs::write_string_using_tmp_file;
use prometheus::{Encoder, Registry, TextEncoder};
use std::path::Path;

/// Encodes the given prometheus registry and atomically writes it to `path`.
///
/// The write is atomic: the content is first written to a temporary file in
/// the same directory, then renamed into place, so a partially-written file
/// is never visible to concurrent readers.
pub fn write_registry_to_file(registry: &Registry, path: &Path) -> Result<()> {
let mut buf = Vec::new();
TextEncoder::new()
.encode(&registry.gather(), &mut buf)
.context("Failed to encode metrics")?;
let content = String::from_utf8(buf).context("Metrics output is not valid UTF-8")?;
write_string_using_tmp_file(path, &content).context("Failed to write metrics to file")
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ DEPENDENCIES = [
"//rs/sys",
"@crate_index//:anyhow",
"@crate_index//:clap",
"@crate_index//:prometheus",
]

DEV_DEPENDENCIES = [
Expand All @@ -33,7 +34,6 @@ rust_library(
aliases = ALIASES,
crate_name = "ic_fstrim_tool",
proc_macro_deps = MACRO_DEPENDENCIES,
# TODO: is this really needed?
visibility = ["//rs:system-tests-pkg"],
deps = DEPENDENCIES,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ path = "src/main.rs"
anyhow = { workspace = true }
clap = { workspace = true }
ic-sys = { path = "../../../sys" }
prometheus = { workspace = true }

[dev-dependencies]
assert_cmd = { workspace = true }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ fn parse_existing_metrics_from_file(metrics_filename: &str) -> Result<Option<FsT

fn write_metrics_using_tmp_file(metrics: &FsTrimMetrics, metrics_filename: &str) -> Result<()> {
let path = PathBuf::from(metrics_filename);
write_string_using_tmp_file(path, metrics.to_p8s_metrics_string().as_str())
write_string_using_tmp_file(path, metrics.to_p8s_metrics_string()?.as_str())
.context("Failed to write metrics to file")
}

Expand Down Expand Up @@ -100,7 +100,7 @@ fn is_node_assigned() -> bool {
Path::new("/var/lib/ic/data/cups/cup.types.v1.CatchUpPackage.pb").exists()
}

pub fn fstrim_tool(
pub fn run(
command: &str,
metrics_filename: String,
target: String,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ struct FsTrimArgs {
pub fn main() -> Result<()> {
let opts = FsTrimArgs::parse();

ic_fstrim_tool::fstrim_tool(
ic_fstrim_tool::run(
FSTRIM_COMMAND,
opts.metrics_filename,
opts.target,
Expand Down
Loading
Loading