Locust の実行中のworker数やuser数、pathごとのレスポンスタイムなどのメトリクスを Prometheus で収集できるようにしてみました。 実装 import logging from flask import request, Response import six from itertools import chain from locust import ( stats as locust_stats, runners as locust_runners, ) from locust.env import Environment from prometheus_client import Metric, REGISTRY, exposition # Prometheus metricsを収集するCollector class LocustCollector(object): registry = REGISTRY def __init__(self, environment, runner, target_service): self.environment = environment self.runner = runner self.target_service = target_service def collect(self): # collect metrics only when locust runner is spawning or running. runner = self.runner base_label = {"service": self.target_service} metric = Metric("locust_state", "State of the locust swarm", "gauge") metric.add_sample( "locust_state", value=1, labels=base_label | {"state": runner.state} ) yield metric if runner and runner.state in ( locust_runners.STATE_SPAWNING, locust_runners.STATE_RUNNING, ): stats = [] for s in chain( locust_stats.sort_stats(runner.stats.entries), [runner.stats.total] ): stats.append( { "method": s.method, "name": s.name, "num_requests": s.num_requests, "num_failures": s.num_failures, "avg_response_time": s.avg_response_time, "min_response_time": s.min_response_time or 0, "max_response_time": s.max_response_time, "current_rps": s.current_rps, "median_response_time": s.median_response_time, "ninetieth_response_time": s.get_response_time_percentile(0.9), # only total stats can use current_response_time, so sad. # "current_response_time_percentile_95": s.get_current_response_time_percentile(0.95), "avg_content_length": s.avg_content_length, "current_fail_per_sec": s.current_fail_per_sec, } ) # perhaps StatsError.parse_error in e.to_dict only works in python slave, take notices! errors = [e.to_dict() for e in six.itervalues(runner.stats.errors)] metric = Metric("locust_user_count", "Swarmed users", "gauge") metric.add_sample( "locust_user_count", value=runner.user_count, labels=base_label ) yield metric metric = Metric("locust_errors", "Locust requests errors", "gauge") for err in errors: metric.add_sample( "locust_errors", value=err["occurrences"], labels=base_label | { "path": err["name"], "method": err["method"], "error": err["error"], }, ) yield metric is_distributed = isinstance(runner, locust_runners.MasterRunner) if is_distributed: metric = Metric( "locust_slave_count", "Locust number of slaves", "gauge" ) metric.add_sample( "locust_slave_count", value=len(runner.clients.values()), labels=base_label, ) yield metric metric = Metric("locust_fail_ratio", "Locust failure ratio", "gauge") metric.add_sample( "locust_fail_ratio", value=runner.stats.total.fail_ratio, labels=base_label, ) yield metric stats_metrics = [ "avg_content_length", "avg_response_time", "current_rps", "current_fail_per_sec", "max_response_time", "ninetieth_response_time", "median_response_time", "min_response_time", "num_failures", "num_requests", ] for mtr in stats_metrics: mtype = "gauge" if mtr in ["num_requests", "num_failures"]: mtype = "counter" metric = Metric("locust_stats_" + mtr, "Locust stats " + mtr, mtype) for stat in stats: # Aggregated stat's method label is None, so name it as Aggregated # locust has changed name Total to Aggregated since 0.12.1 if "Aggregated" != stat["name"]: metric.add_sample( "locust_stats_" + mtr, value=stat[mtr], labels=base_label | {"path": stat["name"], "method": stat["method"]}, ) else: metric.add_sample( "locust_stats_" + mtr, value=stat[mtr], labels=base_label | {"path": stat["name"], "method": "Aggregated"}, ) yield metric def add_metrics_endpoint( environment: Environment, runner: locust_runners.Runner, target_service: str ): @environment.web_ui.app.route("/metrics") def prometheus_exporter(): registry = REGISTRY encoder, content_type = exposition.choose_encoder(request.headers.get("Accept")) if "name[]" in request.args: registry = REGISTRY.restricted_registry(request.args.get("name[]")) body = encoder(registry) return Response(body, content_type=content_type) REGISTRY.register(LocustCollector(environment, runner, target_service)) 解説 Prometheus Collectorを実装する Prometheus Python Client を使い、メトリクスを収集するCollectorを実装します。 locust.statsよりpathごとのmethod、リクエスト数、成功/失敗数といった各値が取れるので、これをPrometheusのmetricに追加することができます。 Prometheus Registryに登録する 先程作ったCollectorを、prometheus_clientのRegistryに登録します。 /metricsエンドポイントで公開する @environment.web_ui.app.route("/metrics") のアノテーションをつけることで、Web UIモードで起動したときに /metrics にアクセスすると登録した関数が実行されます。 ここでPrometheusが収集できる形式でメトリクスを出力します。