Skip to content

Commit

Permalink
get total_samples from the dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
gmauro committed Nov 3, 2024
1 parent a9dc526 commit a1bf4ba
Showing 1 changed file with 4 additions and 0 deletions.
4 changes: 4 additions & 0 deletions gwasstudio/cli/metadata/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from pathlib import Path

import cloup
import pandas as pd

from gwasstudio import logger
from gwasstudio.mongo.models import EnhancedDataProfile
Expand Down Expand Up @@ -36,6 +37,8 @@ def meta_ingest(data_path):
}
logger.info("{} documents to ingest".format(len(file_list)))
for path in file_list:
df = pd.read_csv(Path(path), compression="gzip", nrows=1, usecols=["N"], sep="\t")
total_samples = int(df.loc[0, "N"])
file_hash = compute_sha256(fpath=path)
basename = Path(path).name.split("_")[:-1]
seqid = "_".join([basename[2], basename[3]])
Expand All @@ -52,6 +55,7 @@ def meta_ingest(data_path):
"project": project,
"data_id": file_hash,
"category": category,
"total_samples": total_samples,
"population": population,
"build": build,
"trait_desc": json.dumps(trait_desc),
Expand Down

0 comments on commit a1bf4ba

Please sign in to comment.