diff --git a/vectordb_bench/backend/runner/mp_runner.py b/vectordb_bench/backend/runner/mp_runner.py index 596865b4..8f35dcd8 100644 --- a/vectordb_bench/backend/runner/mp_runner.py +++ b/vectordb_bench/backend/runner/mp_runner.py @@ -89,11 +89,12 @@ def get_mp_context(): - def _run_all_concurrencies_mem_efficient(self) -> float: + def _run_all_concurrencies_mem_efficient(self): max_qps = 0 conc_num_list = [] conc_qps_list = [] conc_latency_p99_list = [] + conc_latency_avg_list = [] try: for conc in self.concurrencies: with mp.Manager() as m: @@ -113,13 +114,15 @@ def _run_all_concurrencies_mem_efficient(self) -> float: start = time.perf_counter() all_count = sum([r.result()[0] for r in future_iter]) latencies = sum([r.result()[2] for r in future_iter], start=[]) - latency_p99 = np.percentile(latencies, 0.99) + latency_p99 = np.percentile(latencies, 99) + latency_avg = np.mean(latencies) cost = time.perf_counter() - start qps = round(all_count / cost, 4) conc_num_list.append(conc) conc_qps_list.append(qps) conc_latency_p99_list.append(latency_p99) + conc_latency_avg_list.append(latency_avg) log.info(f"End search in concurrency {conc}: dur={cost}s, total_count={all_count}, qps={qps}") if qps > max_qps: @@ -136,7 +139,7 @@ def _run_all_concurrencies_mem_efficient(self) -> float: finally: self.stop() - return max_qps, conc_num_list, conc_qps_list, conc_latency_p99_list + return max_qps, conc_num_list, conc_qps_list, conc_latency_p99_list, conc_latency_avg_list def run(self) -> float: """ diff --git a/vectordb_bench/backend/task_runner.py b/vectordb_bench/backend/task_runner.py index c275ebe9..0b115bba 100644 --- a/vectordb_bench/backend/task_runner.py +++ b/vectordb_bench/backend/task_runner.py @@ -176,6 +176,9 @@ def _run_perf_case(self, drop_old: bool = True) -> Metric: or TaskStage.SEARCH_CONCURRENT in self.config.stages ): self._init_search_runner() + if TaskStage.SEARCH_CONCURRENT in self.config.stages: + search_results = self._conc_search() + m.qps, m.conc_num_list, m.conc_qps_list, m.conc_latency_p99_list, m.conc_latency_avg_list = search_results if TaskStage.SEARCH_SERIAL in self.config.stages: search_results = self._serial_search() ''' @@ -183,9 +186,6 @@ def _run_perf_case(self, drop_old: bool = True) -> Metric: m.serial_latencies = search_results.serial_latencies ''' m.recall, m.ndcg, m.serial_latency_p99 = search_results - if TaskStage.SEARCH_CONCURRENT in self.config.stages: - search_results = self._conc_search() - m.qps, m.conc_num_list, m.conc_qps_list, m.conc_latency_p99_list = search_results except Exception as e: log.warning(f"Failed to run performance case, reason = {e}") diff --git a/vectordb_bench/frontend/components/concurrent/charts.py b/vectordb_bench/frontend/components/concurrent/charts.py index 68e7f321..11379c4b 100644 --- a/vectordb_bench/frontend/components/concurrent/charts.py +++ b/vectordb_bench/frontend/components/concurrent/charts.py @@ -6,7 +6,7 @@ from vectordb_bench.frontend.config.styles import COLOR_MAP -def drawChartsByCase(allData, showCaseNames: list[str], st): +def drawChartsByCase(allData, showCaseNames: list[str], st, latency_type: str): initMainExpanderStyle(st) for caseName in showCaseNames: chartContainer = st.expander(caseName, True) @@ -14,15 +14,24 @@ def drawChartsByCase(allData, showCaseNames: list[str], st): data = [ { "conc_num": caseData["conc_num_list"][i], - "qps": caseData["conc_qps_list"][i], - "latency_p99": caseData["conc_latency_p99_list"][i] * 1000, + "qps": caseData["conc_qps_list"][i] + if 0 <= i < len(caseData["conc_qps_list"]) + else 0, + "latency_p99": caseData["conc_latency_p99_list"][i] * 1000 + if 0 <= i < len(caseData["conc_latency_p99_list"]) + else 0, + "latency_avg": caseData["conc_latency_avg_list"][i] * 1000 + if 0 <= i < len(caseData["conc_latency_avg_list"]) + else 0, "db_name": caseData["db_name"], "db": caseData["db"], } for caseData in caseDataList for i in range(len(caseData["conc_num_list"])) ] - drawChart(data, chartContainer, key=f"{caseName}-qps-p99") + drawChart( + data, chartContainer, key=f"{caseName}-qps-p99", x_metric=latency_type + ) def getRange(metric, data, padding_multipliers): @@ -36,14 +45,21 @@ def getRange(metric, data, padding_multipliers): return rangeV -def drawChart(data, st, key: str): +def gen_title(s: str) -> str: + if "latency" in s: + return f'{s.replace("_", " ").title()} (ms)' + else: + return s.upper() + + +def drawChart(data, st, key: str, x_metric: str = "latency_p99", y_metric: str = "qps"): if len(data) == 0: return - x = "latency_p99" + x = x_metric xrange = getRange(x, data, [0.05, 0.1]) - y = "qps" + y = y_metric yrange = getRange(y, data, [0.2, 0.1]) color = "db" @@ -69,8 +85,8 @@ def drawChart(data, st, key: str): }, height=720, ) - fig.update_xaxes(range=xrange, title_text="Latency P99 (ms)") - fig.update_yaxes(range=yrange, title_text="QPS") + fig.update_xaxes(range=xrange, title_text=gen_title(x_metric)) + fig.update_yaxes(range=yrange, title_text=gen_title(y_metric)) fig.update_traces(textposition="bottom right", texttemplate="conc-%{text:,.4~r}") st.plotly_chart(fig, use_container_width=True, key=key) diff --git a/vectordb_bench/frontend/pages/concurrent.py b/vectordb_bench/frontend/pages/concurrent.py index b4eae339..94167543 100644 --- a/vectordb_bench/frontend/pages/concurrent.py +++ b/vectordb_bench/frontend/pages/concurrent.py @@ -55,7 +55,11 @@ def check_conc_data(res: TestResult): resultesContainer = st.sidebar.container() getResults(resultesContainer, "vectordb_bench_concurrent") - drawChartsByCase(shownData, showCaseNames, st.container()) + # main + latency_type = st.radio("Latency Type", options=["latency_p99", "latency_avg"]) + drawChartsByCase( + shownData, showCaseNames, st.container(), latency_type=latency_type + ) # footer footer(st.container()) diff --git a/vectordb_bench/metric.py b/vectordb_bench/metric.py index 5c23072e..9f083a5c 100644 --- a/vectordb_bench/metric.py +++ b/vectordb_bench/metric.py @@ -23,6 +23,7 @@ class Metric: conc_num_list: list[int] = field(default_factory=list) conc_qps_list: list[float] = field(default_factory=list) conc_latency_p99_list: list[float] = field(default_factory=list) + conc_latency_avg_list: list[float] = field(default_factory=list) QURIES_PER_DOLLAR_METRIC = "QP$ (Quries per Dollar)"