diff --git a/audbcards/core/dataset.py b/audbcards/core/dataset.py
index 36137df..5290b80 100644
--- a/audbcards/core/dataset.py
+++ b/audbcards/core/dataset.py
@@ -22,7 +22,9 @@ class _Dataset:
_table_related_cached_properties = [
"segment_durations",
"segments",
+ "tables_columns",
"tables_preview",
+ "tables_rows",
]
"""Cached properties relying on table data.
@@ -510,6 +512,22 @@ def tables(self) -> typing.List[str]:
tables = list(db)
return tables
+ @functools.cached_property
+ def tables_columns(self) -> typing.Dict[str, int]:
+ """Number of columns for each table of the dataset.
+
+ Returns:
+ dictionary with table IDs as keys
+ and number of columns as values
+
+ Examples:
+ >>> ds = Dataset("emodb", "1.4.1")
+ >>> ds.tables_columns["speaker"]
+ 3
+
+ """
+ return {table: stats["columns"] for table, stats in self._tables_stats.items()}
+
@functools.cached_property
def tables_preview(self) -> typing.Dict[str, typing.List[typing.List[str]]]:
"""Table preview for each table of the dataset.
@@ -540,21 +558,32 @@ def tables_preview(self) -> typing.Dict[str, typing.List[typing.List[str]]]:
"""
preview = {}
- for table in list(self.header):
- df = audb.load_table(
- self.name,
- table,
- version=self.version,
- verbose=False,
- )
+ for table, stats in self._tables_stats.items():
+ df = stats["preview"]
df = df.reset_index()
header = [df.columns.tolist()]
- body = df.head(5).astype("string").values.tolist()
+ body = df.astype("string").values.tolist()
# Remove unwanted chars and limit length of each entry
body = [[self._parse_text(column) for column in row] for row in body]
preview[table] = header + body
return preview
+ @functools.cached_property
+ def tables_rows(self) -> typing.Dict[str, int]:
+ """Number of rows for each table of the dataset.
+
+ Returns:
+ dictionary with table IDs as keys
+ and number of rows as values
+
+ Examples:
+ >>> ds = Dataset("emodb", "1.4.1")
+ >>> ds.tables_rows["speaker"]
+ 10
+
+ """
+ return {table: stats["rows"] for table, stats in self._tables_stats.items()}
+
@functools.cached_property
def tables_table(self) -> typing.List[str]:
"""Tables of the dataset."""
@@ -751,6 +780,39 @@ def _segments(self) -> pd.MultiIndex:
index = audformat.utils.union([index, df.index])
return index
+ @functools.cached_property
+ def _tables_stats(self) -> typing.Dict[str, dict]:
+ """Table information of tables in the dataset.
+
+ Caches table information to improve performance
+ of multiple table-related properties.
+ This property computes and stores statistics for all tables,
+ reducing repeated computations.
+ It significantly improves performance
+ when accessing multiple table properties frequently.
+
+ Returns:
+ A dictionary with table names as keys and dictionaries containing:
+ - "columns": number of columns
+ - "rows": number of rows
+ - "preview": dataframe preview (first 5 rows)
+
+ """
+ stats = {}
+ for table in list(self.header):
+ df = audb.load_table(
+ self.name,
+ table,
+ version=self.version,
+ verbose=False,
+ )
+ stats[table] = {
+ "columns": len(df.columns),
+ "rows": len(df),
+ "preview": df.head(5),
+ }
+ return stats
+
@staticmethod
def _map_iso_languages(languages: typing.List[str]) -> typing.List[str]:
r"""Calculate ISO languages for a list of languages.
diff --git a/audbcards/core/templates/datacard_tables.j2 b/audbcards/core/templates/datacard_tables.j2
index 0b11de5..5fc9807 100644
--- a/audbcards/core/templates/datacard_tables.j2
+++ b/audbcards/core/templates/datacard_tables.j2
@@ -41,8 +41,10 @@ Tables
{% for column in row %}
{{ column }} |
{% endfor %}
+
{% endif %}
{% endfor %}
+ {{ tables_rows[row[0]] }} {% if tables_rows[row[0]] == 1 %}row{% else %}rows{% endif %} x {{ tables_columns[row[0]] }} {% if tables_columns[row[0]] == 1 %}column{% else %}columns{% endif %} |
diff --git a/audbcards/sphinx/table-preview.css b/audbcards/sphinx/table-preview.css
index ce82007..ba4019f 100644
--- a/audbcards/sphinx/table-preview.css
+++ b/audbcards/sphinx/table-preview.css
@@ -34,6 +34,10 @@ table.preview td {
border-top: none;
border-bottom: none;
}
+table.preview td p.table-statistic {
+ /* Make "N rows x M columns" smaller */
+ font-size: 90%;
+}
table.clickable td:not(.expanded-row-content),
table.clickable th {
/* Allow to center cell copntent with `margin: auto` */
diff --git a/tests/test_data/rendered_templates/medium_db.rst b/tests/test_data/rendered_templates/medium_db.rst
index 5abaa7a..0dd4912 100644
--- a/tests/test_data/rendered_templates/medium_db.rst
+++ b/tests/test_data/rendered_templates/medium_db.rst
@@ -73,10 +73,13 @@ Tables
data/f0.wav |
0 |
-
+
+
data/f1.wav |
1 |
-
+
+ 2 rows x 1 column |
+
@@ -104,22 +107,27 @@ Tables
0 days 00:00:00 |
0 days 00:00:00.500000 |
neutral |
-
+
+
data/f0.wav |
0 days 00:00:00.500000 |
0 days 00:00:01 |
neutral |
-
+
+
data/f1.wav |
0 days 00:00:00 |
0 days 00:02:30 |
happy |
-
+
+
data/f1.wav |
0 days 00:02:30 |
0 days 00:05:01 |
angry |
-
+
+ 4 rows x 1 column |
+
@@ -145,11 +153,14 @@ Tables
0 |
23 |
female |
-
+
+
1 |
49 |
male |
-
+
+ 2 rows x 2 columns |
+
diff --git a/tests/test_data/rendered_templates/minimal_db.rst b/tests/test_data/rendered_templates/minimal_db.rst
index 97b956a..248991e 100644
--- a/tests/test_data/rendered_templates/minimal_db.rst
+++ b/tests/test_data/rendered_templates/minimal_db.rst
@@ -58,7 +58,9 @@ Tables
f0.wav |
0 |
-
+
+ 1 row x 1 column |
+
diff --git a/tests/test_dataset.py b/tests/test_dataset.py
index 38d559c..d73bdfa 100644
--- a/tests/test_dataset.py
+++ b/tests/test_dataset.py
@@ -7,7 +7,6 @@
import audb
import audeer
-import audformat
import audiofile
import audbcards
@@ -50,12 +49,67 @@ def test_dataset_property_scope(tmpdir, db, request):
@pytest.mark.parametrize(
- "db",
+ "db, "
+ "expected_description, "
+ "expected_schemes_table, "
+ "expected_tables_table, "
+ "expected_tables_columns, "
+ "expected_tables_rows, "
+ "expected_segment_durations",
[
- "medium_db",
+ (
+ "bare_db",
+ "",
+ [[]],
+ [["ID", "Type", "Columns"]],
+ {},
+ {},
+ [],
+ ),
+ (
+ "minimal_db",
+ "Minimal database.",
+ [[]],
+ [["ID", "Type", "Columns"], ["files", "filewise", "speaker"]],
+ {"files": 1},
+ {"files": 1},
+ [],
+ ),
+ (
+ "medium_db",
+ "Medium database. | Some description |.",
+ [
+ ["ID", "Dtype", "Min", "Labels", "Mappings"],
+ ["age", "int", 0, "", ""],
+ ["emotion", "str", "", "angry, happy, neutral", ""],
+ ["gender", "str", "", "female, male", ""],
+ ["speaker", "int", "", "0, 1", "age, gender"],
+ ],
+ [
+ ["ID", "Type", "Columns"],
+ ["files", "filewise", "speaker"],
+ ["segments", "segmented", "emotion"],
+ ["speaker", "misc", "age, gender"],
+ ],
+ {"files": 1, "segments": 1, "speaker": 2},
+ {"files": 2, "segments": 4, "speaker": 2},
+ [0.5, 0.5, 150, 151],
+ ),
],
)
-def test_dataset(audb_cache, tmpdir, repository, db, request):
+def test_dataset(
+ audb_cache,
+ tmpdir,
+ repository,
+ request,
+ db,
+ expected_description,
+ expected_schemes_table,
+ expected_tables_table,
+ expected_tables_columns,
+ expected_tables_rows,
+ expected_segment_durations,
+):
r"""Test audbcards.Dataset object and all its properties."""
db = request.getfixturevalue(db)
@@ -115,7 +169,7 @@ def test_dataset(audb_cache, tmpdir, repository, db, request):
# duration
expected_duration = db.files_duration(db.files).sum()
- assert dataset.duration == expected_duration
+ assert dataset.duration == pd.to_timedelta(expected_duration)
# files
expected_files = len(db.files)
@@ -175,17 +229,9 @@ def test_dataset(audb_cache, tmpdir, repository, db, request):
assert dataset.schemes == expected_schemes
# schemes_table
- expected_schemes_table = [
- ["ID", "Dtype", "Min", "Labels", "Mappings"],
- ["age", "int", 0, "", ""],
- ["emotion", "str", "", "angry, happy, neutral", ""],
- ["gender", "str", "", "female, male", ""],
- ["speaker", "int", "", "0, 1", "age, gender"],
- ]
assert dataset.schemes_table == expected_schemes_table
# segment_durations
- expected_segment_durations = [0.5, 0.5, 150, 151]
assert dataset.segment_durations == expected_segment_durations
# segments
@@ -193,28 +239,19 @@ def test_dataset(audb_cache, tmpdir, repository, db, request):
assert dataset.segments == expected_segments
# short_description
- max_desc_length = 150
- expected_description = (
- db.description
- if (len(db.description) < max_desc_length)
- else f"{db.description[:max_desc_length - 3]}..."
- )
assert dataset.short_description == expected_description
# tables
expected_tables = list(db)
assert dataset.tables == expected_tables
+ # tables_columns
+ assert dataset.tables_columns == expected_tables_columns
+
+ # tables_rows
+ assert dataset.tables_rows == expected_tables_rows
+
# tables_table
- expected_tables_table = [["ID", "Type", "Columns"]]
- for table_id in list(db):
- table = db[table_id]
- if isinstance(table, audformat.MiscTable):
- table_type = "misc"
- else:
- table_type = table.type
- columns = ", ".join(list(table.columns))
- expected_tables_table.append([table_id, table_type, columns])
assert dataset.tables_table == expected_tables_table
# version