diff --git a/ooniapi/common/src/common/routers.py b/ooniapi/common/src/common/routers.py index 305d3079..a6602b1a 100644 --- a/ooniapi/common/src/common/routers.py +++ b/ooniapi/common/src/common/routers.py @@ -1,4 +1,5 @@ from datetime import date, datetime +from typing import Union from pydantic import BaseModel as PydandicBaseModel from pydantic import ConfigDict diff --git a/ooniapi/services/oonimeasurements/src/oonimeasurements/routers/v1/aggregation.py b/ooniapi/services/oonimeasurements/src/oonimeasurements/routers/v1/aggregation.py index 2c049a0d..03abdee0 100644 --- a/ooniapi/services/oonimeasurements/src/oonimeasurements/routers/v1/aggregation.py +++ b/ooniapi/services/oonimeasurements/src/oonimeasurements/routers/v1/aggregation.py @@ -2,13 +2,12 @@ Aggregation API """ -from datetime import datetime, timedelta, date +from datetime import datetime, timedelta, date, timezone from typing import List, Any, Dict, Optional, Union import logging from fastapi import APIRouter, Depends, Query, HTTPException from fastapi.responses import Response -from pydantic import BaseModel from typing_extensions import Annotated from clickhouse_driver import Client as ClickhouseClient @@ -20,7 +19,7 @@ from oonimeasurements.common.clickhouse_utils import query_click, query_click_one_row from oonimeasurements.common.utils import jerror, commasplit, convert_to_csv from ...dependencies import get_clickhouse_session - +from ...common.routers import BaseModel router = APIRouter() @@ -115,7 +114,7 @@ class AggregationResult(BaseModel): failure_count: int ok_count: int measurement_count: int - measurement_start_day: Optional[date] = None + measurement_start_day: Optional[str] = None blocking_type: Optional[str] = None category_code: Optional[str] = None domain: Optional[str] = None @@ -132,7 +131,11 @@ class MeasurementAggregation(BaseModel): result: Union[List[AggregationResult], AggregationResult] -@router.get("/v1/aggregation", response_model_exclude_none=True) +@router.get( + "/v1/aggregation", + response_model_exclude_none=True, + response_model=MeasurementAggregation, +) async def get_measurements( response: Response, input: Annotated[ @@ -340,12 +343,16 @@ async def get_measurements( group_by: List = [] try: if axis_x == "measurement_start_day": - group_by_date(since, until, time_grain, cols, colnames, group_by) + time_grain = group_by_date( + since, until, time_grain, cols, colnames, group_by + ) elif axis_x: add_axis(axis_x, cols, colnames, group_by) if axis_y == "measurement_start_day": - group_by_date(since, until, time_grain, cols, colnames, group_by) + time_grain = group_by_date( + since, until, time_grain, cols, colnames, group_by + ) elif axis_y: add_axis(axis_y, cols, colnames, group_by) @@ -370,7 +377,17 @@ async def get_measurements( try: if dimension_cnt > 0: - r: Any = list(query_click(db, query, query_params, query_prio=4)) + str_format = "%Y-%m-%d" + if time_grain == "hour": + str_format = "%Y-%m-%dT%H:%M:%SZ" + r: Any = [] + for row in query_click(db, query, query_params, query_prio=4): + ## Handle the difference in formatting between hourly and daily measurement_start_day + if "measurement_start_day" in row: + row["measurement_start_day"] = row[ + "measurement_start_day" + ].strftime(str_format) + r.append(row) else: r = query_click_one_row(db, query, query_params, query_prio=4) @@ -408,7 +425,8 @@ async def get_measurements( elapsed_seconds=pq.elapsed, ), result=r, - ).model_dump(exclude_none=True) + ) except Exception as e: + print(e) raise HTTPException(status_code=400, detail=str(e)) diff --git a/ooniapi/services/oonimeasurements/tests/test_measurements.py b/ooniapi/services/oonimeasurements/tests/test_measurements.py index 65f8c3a7..558c54d5 100644 --- a/ooniapi/services/oonimeasurements/tests/test_measurements.py +++ b/ooniapi/services/oonimeasurements/tests/test_measurements.py @@ -313,19 +313,123 @@ def test_aggregation_x_axis_only_hour(client): "dimension_count": 1, "result": [ { - "anomaly_count": 686, - "confirmed_count": 42, - "failure_count": 777, - "measurement_count": 9990, + "anomaly_count": 9, + "confirmed_count": 0, + "failure_count": 2, + "measurement_count": 20, "measurement_start_day": "2021-07-09T00:00:00Z", - "ok_count": 8485, + "ok_count": 9, + }, + { + "anomaly_count": 0, + "confirmed_count": 0, + "failure_count": 0, + "measurement_count": 4, + "measurement_start_day": "2021-07-09T02:00:00Z", + "ok_count": 4, + }, + { + "anomaly_count": 0, + "confirmed_count": 0, + "failure_count": 0, + "measurement_count": 1, + "measurement_start_day": "2021-07-09T03:00:00Z", + "ok_count": 1, + }, + { + "anomaly_count": 0, + "confirmed_count": 0, + "failure_count": 0, + "measurement_count": 1, + "measurement_start_day": "2021-07-09T04:00:00Z", + "ok_count": 1, + }, + { + "anomaly_count": 1, + "confirmed_count": 0, + "failure_count": 0, + "measurement_count": 20, + "measurement_start_day": "2021-07-09T05:00:00Z", + "ok_count": 19, + }, + { + "anomaly_count": 0, + "confirmed_count": 0, + "failure_count": 0, + "measurement_count": 1, + "measurement_start_day": "2021-07-09T07:00:00Z", + "ok_count": 1, + }, + { + "anomaly_count": 0, + "confirmed_count": 0, + "failure_count": 0, + "measurement_count": 1, + "measurement_start_day": "2021-07-09T08:00:00Z", + "ok_count": 1, + }, + { + "anomaly_count": 0, + "confirmed_count": 0, + "failure_count": 0, + "measurement_count": 5, + "measurement_start_day": "2021-07-09T09:00:00Z", + "ok_count": 5, + }, + { + "anomaly_count": 0, + "confirmed_count": 0, + "failure_count": 0, + "measurement_count": 1, + "measurement_start_day": "2021-07-09T10:00:00Z", + "ok_count": 1, + }, + { + "anomaly_count": 0, + "confirmed_count": 0, + "failure_count": 0, + "measurement_count": 2, + "measurement_start_day": "2021-07-09T12:00:00Z", + "ok_count": 2, + }, + { + "anomaly_count": 0, + "confirmed_count": 0, + "failure_count": 0, + "measurement_count": 3, + "measurement_start_day": "2021-07-09T14:00:00Z", + "ok_count": 3, + }, + { + "anomaly_count": 0, + "confirmed_count": 0, + "failure_count": 0, + "measurement_count": 2, + "measurement_start_day": "2021-07-09T15:00:00Z", + "ok_count": 2, + }, + { + "anomaly_count": 2, + "confirmed_count": 0, + "failure_count": 0, + "measurement_count": 2, + "measurement_start_day": "2021-07-09T16:00:00Z", + "ok_count": 0, + }, + { + "anomaly_count": 1, + "confirmed_count": 0, + "failure_count": 2, + "measurement_count": 6, + "measurement_start_day": "2021-07-09T17:00:00Z", + "ok_count": 3, }, { "anomaly_count": 0, "confirmed_count": 0, "failure_count": 0, "measurement_count": 1, - "measurement_start_day": "2021-07-09T01:00:00Z", + "measurement_start_day": "2021-07-09T19:00:00Z", "ok_count": 1, }, ], @@ -341,14 +445,14 @@ def test_aggregation_x_axis_domain(client): r.pop("db_stats", None) assert r["dimension_count"] == 1 for x in r["result"]: - if x["domain"] == "www.theregister.co.uk": + if x["domain"] == "anonym.to": assert x == { - "anomaly_count": 0, + "anomaly_count": 1, "confirmed_count": 0, - "domain": "www.theregister.co.uk", + "domain": "anonym.to", "failure_count": 0, "measurement_count": 1, - "ok_count": 1, + "ok_count": 0, } return @@ -383,34 +487,25 @@ def test_aggregation_x_axis_only_probe_cc(client): url = "aggregation?since=2021-07-09&until=2021-07-10&axis_x=probe_cc" r = api(client, url) assert r["dimension_count"] == 1 - assert len(r["result"]) == 33 + assert len(r["result"]) == 4 def test_aggregation_x_axis_only_category_code(client): # 1-dimensional data - url = "aggregation?probe_cc=IE&category_code=HACK&since=2021-07-09&until=2021-07-10&axis_x=measurement_start_day" + url = "aggregation?probe_cc=CH&category_code=GAME&since=2021-07-09&until=2021-07-10&axis_x=measurement_start_day" r = api(client, url) - expected = { - "dimension_count": 1, - "result": [ - { - "anomaly_count": 32, - "confirmed_count": 0, - "failure_count": 0, - "measurement_count": 1302, - "measurement_start_day": "2021-07-10", - }, - { - "anomaly_count": 13, - "confirmed_count": 0, - "failure_count": 0, - "measurement_count": 1236, - "measurement_start_day": "2021-07-10", - }, - ], - "v": 0, - } - assert r == expected, fjd(r) + assert r["result"] == [ + { + "anomaly_count": 0, + "confirmed_count": 0, + "failure_count": 0, + "measurement_count": 1, + "ok_count": 1, + "measurement_start_day": "2021-07-09T00:00:00Z", + }, + ] + assert r["dimension_count"] == 1 + assert r["v"] == 0 def test_aggregation_x_axis_only_csv(client):