Skip to content

Commit

Permalink
fb303_prometheus_exporter
Browse files Browse the repository at this point in the history
Summary:
I'm changing the approach of how mononoke->prometheus export will work:
 * before I've planned to use OSS `metrics` crate for logging from mononoke instead of our internal stack (which exposes fb303 endpoint)
 * this diff fully relies on internal stack instead:
   * all the metrics will be exposed in fb303 endpoint
   * the new fb303_prometheus_exporter will query fb303 endpoint and export same metrics in prometheus-friendly format

This has many benefits:
  * we don't need to modify `stats` crate with all its macros
  * we rely on scalable internal stack for metrics
  * there's 1:1 feature parity: all aggregations, histograms, etc work out of the box
  * the only difference is the counters naming: prometheus only allows alphanumeric chars and `_` so we have to sanitise the names.

In addition the adapter server component implementation turned out to be really simple.

Reviewed By: andreacampi

Differential Revision: D66663139

fbshipit-source-id: fda13aed293079691b9c2041f2c27b8c8ec2693c
  • Loading branch information
mitrandir77 authored and facebook-github-bot committed Dec 4, 2024
1 parent 1d84330 commit f463021
Show file tree
Hide file tree
Showing 10 changed files with 142 additions and 168 deletions.
2 changes: 1 addition & 1 deletion eden/mononoke/async_requests/worker/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ fn main(fb: FacebookInit) -> Result<()> {
let blobstore = runtime.block_on(open_blobstore(fb, &app))?;
let will_exit = Arc::new(AtomicBool::new(false));

app.start_monitoring(SERVICE_NAME, AliveService)?;
app.start_monitoring(app.runtime(), SERVICE_NAME, AliveService)?;
app.start_stats_aggregation()?;

if let Some(mut executor) = args.sharded_executor_args.clone().build_executor(
Expand Down
1 change: 1 addition & 0 deletions eden/mononoke/cmdlib/mononoke_app/TARGETS
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ rust_library(
"//eden/mononoke/common/rust/sql_ext:sql_ext",
"//eden/mononoke/common/scuba_ext:scuba_ext",
"//eden/mononoke/derived_data:derived_data_remote",
"//eden/mononoke/facebook/fb303_prometheus_exporter:fb303_prometheus_exporter",
"//eden/mononoke/megarepo_api:megarepo_config",
"//eden/mononoke/metaconfig:metaconfig_parser",
"//eden/mononoke/metaconfig:metaconfig_types",
Expand Down
23 changes: 17 additions & 6 deletions eden/mononoke/cmdlib/mononoke_app/src/app.rs
Original file line number Diff line number Diff line change
Expand Up @@ -148,12 +148,23 @@ impl MononokeApp {
}

/// Start the monitoring server for the provided service.
pub fn start_monitoring<Service>(&self, app_name: &str, service: Service) -> Result<()>
pub fn start_monitoring<Service>(
&self,
handle: &Handle,
app_name: &str,
service: Service,
) -> Result<()>
where
Service: Fb303Service + Sync + Send + 'static,
{
let monitoring_args = self.extension_args::<MonitoringAppExtension>()?;
monitoring_args.start_monitoring_server(self.fb, app_name, self.logger(), service)?;
monitoring_args.start_monitoring_server(
self.fb,
handle,
app_name,
self.logger(),
service,
)?;
Ok(())
}

Expand Down Expand Up @@ -201,14 +212,14 @@ impl MononokeApp {
Fut: Future<Output = Result<()>>,
Service: Fb303Service + Sync + Send + 'static,
{
self.start_monitoring(app_name, service)?;
self.start_stats_aggregation()?;

let logger = self.logger().clone();
let runtime = self
.runtime
.take()
.ok_or_else(|| anyhow!("MononokeApp already started"))?;
self.start_monitoring(runtime.handle(), app_name, service)?;
self.start_stats_aggregation()?;

let logger = self.logger().clone();
let result = runtime.block_on(main(self));

if let Err(e) = result {
Expand Down
64 changes: 0 additions & 64 deletions eden/mononoke/cmdlib/mononoke_app/src/args/prometheus.rs

This file was deleted.

109 changes: 103 additions & 6 deletions eden/mononoke/cmdlib/mononoke_app/src/monitoring.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,114 @@
use std::sync::atomic::AtomicBool;
use std::sync::atomic::Ordering;
use std::sync::Arc;
use std::thread;

use anyhow::Error;
use anyhow::Result;
use clap::Args;
use fbinit::FacebookInit;
// Re-eport AliveService for convenience so callers do not have to get the services dependency to
// get AliveService.
pub use services::AliveService;
use services::Fb303Service;
use services::FbStatus;
use slog::info;
use slog::Logger;
use slog::Never;
use slog::SendSyncRefUnwindSafeDrain;
use tokio::runtime::Handle;

mod fb303;
use crate::AppExtension;

pub use fb303::Fb303AppExtension as MonitoringAppExtension;
pub use fb303::Fb303Args as MonitoringArgs;
// Re-eport AliveService for convenience so callers do not have to get the services dependency to
// get AliveService.
pub use services::AliveService;
/// Command line arguments that fb303 for service
#[derive(Args, Debug)]
pub struct MonitoringArgs {
/// Port for fb303 service
// thrift_port alias is for compatibility with mononoke server
// TODO: switch mononoke server to use the same flags as everybody.
#[clap(long, alias = "thrift_port", value_name = "PORT")]
pub fb303_thrift_port: Option<i32>,

/// Spawn promethus metrics exporter listening on HOST:PORT, requires
/// fb303_thrift_port to be set
#[clap(long, requires("fb303_thrift_port"), value_name = "HOST:PORT")]
#[cfg(fbcode_build)]
pub prometheus_host_port: Option<String>,
}

impl MonitoringArgs {
/// This is a lower-level function that requires you to spawn the stats aggregation future
/// yourself. This is useful if you'd like to be able to drop it in order to cancel it.
///
/// Usually starting the fb303 server and stats aggregation is done by functions like
/// `MononokeApp::run_with_monitoring_and_logging`.
pub fn start_monitoring_server<S: Fb303Service + Sync + Send + 'static>(
&self,
fb: FacebookInit,
handle: &Handle,
service_name: &str,
logger: &Logger,
service: S,
) -> Result<Option<()>, Error> {
let service_name = service_name.to_string();
self.fb303_thrift_port
.map(|port| {
info!(logger, "Initializing fb303 thrift server on port {}", port);

thread::Builder::new()
.name("fb303_thrift_service".to_owned())
.spawn(move || {
services::run_service_framework(
fb,
service_name,
port,
0, // Disables separate status http server
Box::new(service),
)
.expect("failure while running thrift service framework")
})
.map_err(Error::from)?;

#[cfg(fbcode_build)]
{
if let Some(prometheus_host_port) = &self.prometheus_host_port {
info!(
logger,
"Initializing prometheus exporter on {}", prometheus_host_port
);
fb303_prometheus_exporter::run_fb303_to_prometheus_exporter(
fb,
handle,
prometheus_host_port.clone(),
format!("[::0]:{}", port),
)?;
}
}

Ok(())
})
.transpose()
}
}

pub struct MonitoringAppExtension;

impl AppExtension for MonitoringAppExtension {
type Args = MonitoringArgs;

/// Hook executed after creating the log drain allowing for augmenting the logging.
fn log_drain_hook(
&self,
args: &MonitoringArgs,
drain: Arc<dyn SendSyncRefUnwindSafeDrain<Ok = (), Err = Never>>,
) -> Result<Arc<dyn SendSyncRefUnwindSafeDrain<Ok = (), Err = Never>>> {
if args.fb303_thrift_port.is_some() {
Ok(Arc::new(slog_stats::StatsDrain::new(drain)))
} else {
Ok(drain)
}
}
}

/// A FB303 service that reports healthy once set_ready has been called.
#[derive(Clone)]
Expand Down
88 changes: 0 additions & 88 deletions eden/mononoke/cmdlib/mononoke_app/src/monitoring/fb303.rs

This file was deleted.

17 changes: 17 additions & 0 deletions eden/mononoke/facebook/fb303_prometheus_exporter/TARGETS
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
load("@fbcode_macros//build_defs:rust_library.bzl", "rust_library")

oncall("scm_server_infra")

rust_library(
name = "fb303_prometheus_exporter",
srcs = glob(["src/**/*.rs"]),
deps = [
"fbsource//third-party/rust:anyhow",
"fbsource//third-party/rust:gotham",
"fbsource//third-party/rust:hyper",
"fbsource//third-party/rust:tokio",
"//common/rust/gflags:gflags",
"//common/rust/shed/fbinit:fbinit",
"//fb303/thrift:fb303_core-rust-thriftclients",
],
)
2 changes: 1 addition & 1 deletion eden/mononoke/git_server/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ fn main(fb: FacebookInit) -> Result<(), Error> {
.sharded_service_name
.as_ref()
.map(|_| ShardedService::MononokeGitServer);
app.start_monitoring(SERVICE_NAME, AliveService)?;
app.start_monitoring(app.runtime(), SERVICE_NAME, AliveService)?;
app.start_stats_aggregation()?;

let requests_counter = Arc::new(AtomicI64::new(0));
Expand Down
2 changes: 1 addition & 1 deletion eden/mononoke/lfs_server/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ fn main(fb: FacebookInit) -> Result<(), Error> {
LogMiddleware::slog(logger.clone())
};

app.start_monitoring(SERVICE_NAME, AliveService)?;
app.start_monitoring(app.runtime(), SERVICE_NAME, AliveService)?;
app.start_stats_aggregation()?;

let common = &app.repo_configs().common;
Expand Down
2 changes: 1 addition & 1 deletion eden/mononoke/server/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ fn main(fb: FacebookInit) -> Result<()> {
.sharded_service_name
.as_ref()
.map(|_| ShardedService::SaplingRemoteApi);
app.start_monitoring("mononoke_server", service.clone())?;
app.start_monitoring(&runtime, "mononoke_server", service.clone())?;
app.start_stats_aggregation()?;

let repo_listeners = {
Expand Down

0 comments on commit f463021

Please sign in to comment.