diff --git a/nbs/99_manuscript/supplementary_files/01-supplementary_file3.ipynb b/nbs/99_manuscript/supplementary_files/01-supplementary_file3.ipynb
new file mode 100644
index 00000000..8bdf72dc
--- /dev/null
+++ b/nbs/99_manuscript/supplementary_files/01-supplementary_file3.ipynb
@@ -0,0 +1,3646 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "87e0ce1b-7ce6-4499-9342-5ded05307598",
+ "metadata": {
+ "papermill": {
+ "duration": 0.012027,
+ "end_time": "2024-01-05T18:03:25.401599",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:25.389572",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "source": [
+ "# Description"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c9e288bf-415c-44b7-84a4-7999c21af777",
+ "metadata": {
+ "papermill": {
+ "duration": 0.010328,
+ "end_time": "2024-01-05T18:03:25.422531",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:25.412203",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "source": [
+ "Creates **Supplementary File 3**.\n",
+ "\n",
+ "*Description*: Correlations and p-values of a subset of gene pairs across all tissues in GTEx v8."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e020c781-238b-43c2-8cad-2722b8a240e2",
+ "metadata": {
+ "papermill": {
+ "duration": 0.010284,
+ "end_time": "2024-01-05T18:03:25.443447",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:25.433163",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "source": [
+ "# Modules"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "a819bfbc-5009-4c68-ba8d-37d0979d368f",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:25.465283Z",
+ "iopub.status.busy": "2024-01-05T18:03:25.465089Z",
+ "iopub.status.idle": "2024-01-05T18:03:25.861250Z",
+ "shell.execute_reply": "2024-01-05T18:03:25.860761Z"
+ },
+ "papermill": {
+ "duration": 0.409123,
+ "end_time": "2024-01-05T18:03:25.862997",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:25.453874",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import rpy2.robjects as ro\n",
+ "from rpy2.robjects import pandas2ri\n",
+ "from rpy2.robjects.conversion import localconverter\n",
+ "\n",
+ "from ccc import conf"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "860ff6ef-dba1-4647-b77d-ca289cffa36b",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:25.885393Z",
+ "iopub.status.busy": "2024-01-05T18:03:25.885293Z",
+ "iopub.status.idle": "2024-01-05T18:03:25.888526Z",
+ "shell.execute_reply": "2024-01-05T18:03:25.888117Z"
+ },
+ "papermill": {
+ "duration": 0.015781,
+ "end_time": "2024-01-05T18:03:25.889707",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:25.873926",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "readRDS = ro.r[\"readRDS\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "8b82561e-ea69-4e57-ac22-8c026b34b022",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:25.911306Z",
+ "iopub.status.busy": "2024-01-05T18:03:25.911204Z",
+ "iopub.status.idle": "2024-01-05T18:03:25.913877Z",
+ "shell.execute_reply": "2024-01-05T18:03:25.913523Z"
+ },
+ "papermill": {
+ "duration": 0.014723,
+ "end_time": "2024-01-05T18:03:25.915052",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:25.900329",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "saveRDS = ro.r[\"saveRDS\"]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b4834387-58ff-468c-b326-85c408bc5feb",
+ "metadata": {
+ "papermill": {
+ "duration": 0.010406,
+ "end_time": "2024-01-05T18:03:25.936020",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:25.925614",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "source": [
+ "# Settings"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "765e820a-4518-4bbc-a00f-14c9cea03821",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:25.955038Z",
+ "iopub.status.busy": "2024-01-05T18:03:25.954953Z",
+ "iopub.status.idle": "2024-01-05T18:03:25.957323Z",
+ "shell.execute_reply": "2024-01-05T18:03:25.956876Z"
+ },
+ "papermill": {
+ "duration": 0.011454,
+ "end_time": "2024-01-05T18:03:25.958066",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:25.946612",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "DATASET_CONFIG = conf.GTEX"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a1159982-5dd1-4494-97d4-0674eeead1c3",
+ "metadata": {
+ "papermill": {
+ "duration": 0.005497,
+ "end_time": "2024-01-05T18:03:25.969129",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:25.963632",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "source": [
+ "# Paths"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "d50e29a6-0e18-4c3d-b933-f21a17abf831",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:25.981083Z",
+ "iopub.status.busy": "2024-01-05T18:03:25.980717Z",
+ "iopub.status.idle": "2024-01-05T18:03:25.983834Z",
+ "shell.execute_reply": "2024-01-05T18:03:25.983368Z"
+ },
+ "papermill": {
+ "duration": 0.009927,
+ "end_time": "2024-01-05T18:03:25.984572",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:25.974645",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "assert (\n",
+ " conf.MANUSCRIPT[\"BASE_DIR\"] is not None and conf.MANUSCRIPT[\"BASE_DIR\"].exists()\n",
+ "), \"Manuscript dir not set\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "30cce6f5-ca1b-438c-859d-31903a42d4c6",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:25.996739Z",
+ "iopub.status.busy": "2024-01-05T18:03:25.996223Z",
+ "iopub.status.idle": "2024-01-05T18:03:26.005998Z",
+ "shell.execute_reply": "2024-01-05T18:03:26.005538Z"
+ },
+ "papermill": {
+ "duration": 0.016652,
+ "end_time": "2024-01-05T18:03:26.006837",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:25.990185",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "PosixPath('/opt/data/results/gtex_v8/other_tissues')"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "INPUT_DIR = conf.GTEX[\"RESULTS_DIR\"] / \"other_tissues\"\n",
+ "display(INPUT_DIR)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "523c6a4e-87ec-44bf-bad3-e912b8aa0482",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:26.038997Z",
+ "iopub.status.busy": "2024-01-05T18:03:26.038842Z",
+ "iopub.status.idle": "2024-01-05T18:03:26.043285Z",
+ "shell.execute_reply": "2024-01-05T18:03:26.042771Z"
+ },
+ "papermill": {
+ "duration": 0.011579,
+ "end_time": "2024-01-05T18:03:26.044125",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.032546",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "PosixPath('/opt/data/supplementary_material')"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "OUTPUT_DIR = conf.MANUSCRIPT[\"SUPPLEMENTARY_MATERIAL_DIR\"]\n",
+ "OUTPUT_DIR.mkdir(parents=True, exist_ok=True)\n",
+ "display(OUTPUT_DIR)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "8fefcf66-4c68-4086-bf23-7f52c6fd2291",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:26.056526Z",
+ "iopub.status.busy": "2024-01-05T18:03:26.056140Z",
+ "iopub.status.idle": "2024-01-05T18:03:26.059042Z",
+ "shell.execute_reply": "2024-01-05T18:03:26.058533Z"
+ },
+ "papermill": {
+ "duration": 0.010002,
+ "end_time": "2024-01-05T18:03:26.059885",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.049883",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "OUTPUT_FILENAME = \"Supplementary_File_03-Gene_pairs_correlations_all_GTEx_tissues\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "486ca2a1-d8ec-4be4-a208-1315c28d566c",
+ "metadata": {
+ "papermill": {
+ "duration": 0.005708,
+ "end_time": "2024-01-05T18:03:26.071424",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.065716",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "source": [
+ "# Data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cecd38b0-6fe7-4681-af63-c9ba26747767",
+ "metadata": {
+ "papermill": {
+ "duration": 0.00568,
+ "end_time": "2024-01-05T18:03:26.082873",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.077193",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "source": [
+ "## Gene Ensembl ID -> Symbol mapping"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "29cbb445-2dfb-42f1-a56c-6595db019c7a",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:26.095274Z",
+ "iopub.status.busy": "2024-01-05T18:03:26.094858Z",
+ "iopub.status.idle": "2024-01-05T18:03:26.116318Z",
+ "shell.execute_reply": "2024-01-05T18:03:26.115754Z"
+ },
+ "papermill": {
+ "duration": 0.028656,
+ "end_time": "2024-01-05T18:03:26.117267",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.088611",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "gene_map = pd.read_pickle(\n",
+ " DATASET_CONFIG[\"DATA_DIR\"] / \"gtex_gene_id_symbol_mappings.pkl\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "47fdd6e9-3775-4bc3-b65c-e368793a6f33",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:26.129769Z",
+ "iopub.status.busy": "2024-01-05T18:03:26.129377Z",
+ "iopub.status.idle": "2024-01-05T18:03:26.184510Z",
+ "shell.execute_reply": "2024-01-05T18:03:26.183933Z"
+ },
+ "papermill": {
+ "duration": 0.062423,
+ "end_time": "2024-01-05T18:03:26.185468",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.123045",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "gene_map = gene_map.set_index(\"gene_ens_id\")[\"gene_symbol\"].to_dict()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "b8962711-87b0-4664-9bfe-76e48d49b15d",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:26.197726Z",
+ "iopub.status.busy": "2024-01-05T18:03:26.197561Z",
+ "iopub.status.idle": "2024-01-05T18:03:26.200750Z",
+ "shell.execute_reply": "2024-01-05T18:03:26.200196Z"
+ },
+ "papermill": {
+ "duration": 0.010361,
+ "end_time": "2024-01-05T18:03:26.201584",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.191223",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "assert gene_map[\"ENSG00000145309.5\"] == \"CABS1\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3893f4f3-f0c5-464a-ab43-a5d0de0f2ea8",
+ "metadata": {
+ "papermill": {
+ "duration": 0.007364,
+ "end_time": "2024-01-05T18:03:26.215533",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.208169",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "source": [
+ "# List of dataframes to combine"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "dfcf7486-20bc-4144-9b19-b872a35fadaf",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:26.229029Z",
+ "iopub.status.busy": "2024-01-05T18:03:26.228502Z",
+ "iopub.status.idle": "2024-01-05T18:03:26.231189Z",
+ "shell.execute_reply": "2024-01-05T18:03:26.230787Z"
+ },
+ "papermill": {
+ "duration": 0.010211,
+ "end_time": "2024-01-05T18:03:26.231974",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.221763",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "df_list = []"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5b2f0f21-17c4-4046-8a71-74a97c3124ce",
+ "metadata": {
+ "papermill": {
+ "duration": 0.005796,
+ "end_time": "2024-01-05T18:03:26.243771",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.237975",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "source": [
+ "# KDM6A - UTY"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "e7d8b188-ddbc-4bca-9b9a-89863a0e8c47",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:26.256336Z",
+ "iopub.status.busy": "2024-01-05T18:03:26.256055Z",
+ "iopub.status.idle": "2024-01-05T18:03:26.258893Z",
+ "shell.execute_reply": "2024-01-05T18:03:26.258496Z"
+ },
+ "papermill": {
+ "duration": 0.009949,
+ "end_time": "2024-01-05T18:03:26.259663",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.249714",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "gene0_id, gene1_id = \"ENSG00000147050.14\", \"ENSG00000183878.15\"\n",
+ "gene0_symbol, gene1_symbol = \"KDM6A\", \"UTY\"\n",
+ "\n",
+ "assert gene_map[gene0_id] == gene0_symbol\n",
+ "assert gene_map[gene1_id] == gene1_symbol"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "0ad4ef1b-7c1c-4720-ab36-456aa5d23b0a",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:26.272208Z",
+ "iopub.status.busy": "2024-01-05T18:03:26.271939Z",
+ "iopub.status.idle": "2024-01-05T18:03:26.275241Z",
+ "shell.execute_reply": "2024-01-05T18:03:26.274865Z"
+ },
+ "papermill": {
+ "duration": 0.010394,
+ "end_time": "2024-01-05T18:03:26.276008",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.265614",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "PosixPath('/opt/data/results/gtex_v8/other_tissues/kdm6a_vs_uty')"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "GENE_PAIR_INPUT_DIR = INPUT_DIR / f\"{gene0_symbol.lower()}_vs_{gene1_symbol.lower()}\"\n",
+ "display(GENE_PAIR_INPUT_DIR)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2574db4a-dc94-4f8a-89b0-2229b76956d8",
+ "metadata": {
+ "papermill": {
+ "duration": 0.005871,
+ "end_time": "2024-01-05T18:03:26.287887",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.282016",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "source": [
+ "## Correlation values"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "a91a6c4e-4ecd-4bf7-bd25-e3e354d2276a",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:26.300630Z",
+ "iopub.status.busy": "2024-01-05T18:03:26.300289Z",
+ "iopub.status.idle": "2024-01-05T18:03:26.304076Z",
+ "shell.execute_reply": "2024-01-05T18:03:26.303673Z"
+ },
+ "papermill": {
+ "duration": 0.010874,
+ "end_time": "2024-01-05T18:03:26.304806",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.293932",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "res_all = pd.read_pickle(GENE_PAIR_INPUT_DIR / \"coef_values.pkl\").rename(\n",
+ " columns={\n",
+ " \"cm\": \"ccc_coef\",\n",
+ " \"pearson\": \"pearson_coef\",\n",
+ " \"spearman\": \"spearman_coef\",\n",
+ " }\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "1425c2ab-e665-429c-a9c8-50b585fc221e",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:26.317421Z",
+ "iopub.status.busy": "2024-01-05T18:03:26.317166Z",
+ "iopub.status.idle": "2024-01-05T18:03:26.320277Z",
+ "shell.execute_reply": "2024-01-05T18:03:26.319880Z"
+ },
+ "papermill": {
+ "duration": 0.010262,
+ "end_time": "2024-01-05T18:03:26.321074",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.310812",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(50, 3)"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "res_all.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "ee21a161-eaae-4242-904f-82f373616064",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:26.333956Z",
+ "iopub.status.busy": "2024-01-05T18:03:26.333608Z",
+ "iopub.status.idle": "2024-01-05T18:03:26.341071Z",
+ "shell.execute_reply": "2024-01-05T18:03:26.340670Z"
+ },
+ "papermill": {
+ "duration": 0.014695,
+ "end_time": "2024-01-05T18:03:26.341853",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.327158",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ccc_coef | \n",
+ " pearson_coef | \n",
+ " spearman_coef | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " colon_transverse | \n",
+ " 0.336727 | \n",
+ " -0.517899 | \n",
+ " -0.408343 | \n",
+ "
\n",
+ " \n",
+ " brain_amygdala | \n",
+ " 0.280524 | \n",
+ " 0.037541 | \n",
+ " 0.147571 | \n",
+ "
\n",
+ " \n",
+ " artery_coronary | \n",
+ " 0.274554 | \n",
+ " -0.413862 | \n",
+ " -0.391764 | \n",
+ "
\n",
+ " \n",
+ " artery_aorta | \n",
+ " 0.429771 | \n",
+ " -0.485788 | \n",
+ " -0.363510 | \n",
+ "
\n",
+ " \n",
+ " adrenal_gland | \n",
+ " 0.260197 | \n",
+ " -0.459190 | \n",
+ " -0.354190 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ccc_coef pearson_coef spearman_coef\n",
+ "colon_transverse 0.336727 -0.517899 -0.408343\n",
+ "brain_amygdala 0.280524 0.037541 0.147571\n",
+ "artery_coronary 0.274554 -0.413862 -0.391764\n",
+ "artery_aorta 0.429771 -0.485788 -0.363510\n",
+ "adrenal_gland 0.260197 -0.459190 -0.354190"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "res_all.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f55c1996-9eaa-48c6-a376-8b9255971af0",
+ "metadata": {
+ "papermill": {
+ "duration": 0.006081,
+ "end_time": "2024-01-05T18:03:26.354171",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.348090",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "source": [
+ "## P-values"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "58ef1571-13a9-4406-a991-fc2703f74095",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:26.366973Z",
+ "iopub.status.busy": "2024-01-05T18:03:26.366848Z",
+ "iopub.status.idle": "2024-01-05T18:03:26.370381Z",
+ "shell.execute_reply": "2024-01-05T18:03:26.369964Z"
+ },
+ "papermill": {
+ "duration": 0.010868,
+ "end_time": "2024-01-05T18:03:26.371159",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.360291",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "res_pval_all = pd.read_pickle(GENE_PAIR_INPUT_DIR / \"coef_pvalues.pkl\").rename(\n",
+ " columns={\n",
+ " \"cm\": \"ccc_pvalue\",\n",
+ " \"pearson\": \"pearson_pvalue\",\n",
+ " \"spearman\": \"spearman_pvalue\",\n",
+ " }\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "aa2385b5-9914-424b-8d47-cc45a323e83c",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:26.384306Z",
+ "iopub.status.busy": "2024-01-05T18:03:26.383996Z",
+ "iopub.status.idle": "2024-01-05T18:03:26.387279Z",
+ "shell.execute_reply": "2024-01-05T18:03:26.386885Z"
+ },
+ "papermill": {
+ "duration": 0.010673,
+ "end_time": "2024-01-05T18:03:26.388046",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.377373",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(50, 3)"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "res_pval_all.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "id": "734fc9d9-17d4-422f-9b1d-45dbe1a76263",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:26.401353Z",
+ "iopub.status.busy": "2024-01-05T18:03:26.401042Z",
+ "iopub.status.idle": "2024-01-05T18:03:26.407260Z",
+ "shell.execute_reply": "2024-01-05T18:03:26.406902Z"
+ },
+ "papermill": {
+ "duration": 0.013613,
+ "end_time": "2024-01-05T18:03:26.407996",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.394383",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ccc_pvalue | \n",
+ " pearson_pvalue | \n",
+ " spearman_pvalue | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " colon_transverse | \n",
+ " 9.999990e-07 | \n",
+ " 3.063714e-29 | \n",
+ " 9.539164e-18 | \n",
+ "
\n",
+ " \n",
+ " brain_amygdala | \n",
+ " 9.999990e-07 | \n",
+ " 6.461089e-01 | \n",
+ " 6.963023e-02 | \n",
+ "
\n",
+ " \n",
+ " artery_coronary | \n",
+ " 9.999990e-07 | \n",
+ " 2.389970e-11 | \n",
+ " 3.159321e-10 | \n",
+ "
\n",
+ " \n",
+ " artery_aorta | \n",
+ " 9.999990e-07 | \n",
+ " 5.775754e-27 | \n",
+ " 6.092383e-15 | \n",
+ "
\n",
+ " \n",
+ " adrenal_gland | \n",
+ " 9.999990e-07 | \n",
+ " 7.334489e-15 | \n",
+ " 4.847677e-09 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ccc_pvalue pearson_pvalue spearman_pvalue\n",
+ "colon_transverse 9.999990e-07 3.063714e-29 9.539164e-18\n",
+ "brain_amygdala 9.999990e-07 6.461089e-01 6.963023e-02\n",
+ "artery_coronary 9.999990e-07 2.389970e-11 3.159321e-10\n",
+ "artery_aorta 9.999990e-07 5.775754e-27 6.092383e-15\n",
+ "adrenal_gland 9.999990e-07 7.334489e-15 4.847677e-09"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "res_pval_all.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "47367871-fc3c-463e-b2b8-d6dce217bd23",
+ "metadata": {
+ "papermill": {
+ "duration": 0.006276,
+ "end_time": "2024-01-05T18:03:26.420645",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.414369",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "source": [
+ "## Combine"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "id": "6faf9276-9683-413c-8393-4e689da262f0",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:26.433903Z",
+ "iopub.status.busy": "2024-01-05T18:03:26.433784Z",
+ "iopub.status.idle": "2024-01-05T18:03:26.439299Z",
+ "shell.execute_reply": "2024-01-05T18:03:26.438904Z"
+ },
+ "papermill": {
+ "duration": 0.013049,
+ "end_time": "2024-01-05T18:03:26.440072",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.427023",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "df = res_all.join(res_pval_all, how=\"inner\").rename_axis(\"tissue\").reset_index()\n",
+ "assert df.shape[0] == res_all.shape[0]\n",
+ "assert df.shape[0] == res_pval_all.shape[0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "id": "1cd14e69-5adb-48d6-98a1-9b7567a24ea3",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:26.453711Z",
+ "iopub.status.busy": "2024-01-05T18:03:26.453386Z",
+ "iopub.status.idle": "2024-01-05T18:03:26.458014Z",
+ "shell.execute_reply": "2024-01-05T18:03:26.457622Z"
+ },
+ "papermill": {
+ "duration": 0.012287,
+ "end_time": "2024-01-05T18:03:26.458780",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.446493",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "df.insert(0, \"gene0_id\", gene0_id)\n",
+ "df.insert(1, \"gene1_id\", gene1_id)\n",
+ "df.insert(2, \"gene0_symbol\", gene0_symbol)\n",
+ "df.insert(3, \"gene1_symbol\", gene1_symbol)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "id": "598c8dfd-da7a-4d9d-9a8c-02244c9f8f1a",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:26.472413Z",
+ "iopub.status.busy": "2024-01-05T18:03:26.471994Z",
+ "iopub.status.idle": "2024-01-05T18:03:26.475346Z",
+ "shell.execute_reply": "2024-01-05T18:03:26.474976Z"
+ },
+ "papermill": {
+ "duration": 0.01092,
+ "end_time": "2024-01-05T18:03:26.476114",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.465194",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(50, 11)"
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "id": "4c4e4515-1f2a-4f15-ac0d-b292b17a9115",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:26.489694Z",
+ "iopub.status.busy": "2024-01-05T18:03:26.489548Z",
+ "iopub.status.idle": "2024-01-05T18:03:26.501596Z",
+ "shell.execute_reply": "2024-01-05T18:03:26.501100Z"
+ },
+ "papermill": {
+ "duration": 0.019818,
+ "end_time": "2024-01-05T18:03:26.502420",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.482602",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " gene0_id | \n",
+ " gene1_id | \n",
+ " gene0_symbol | \n",
+ " gene1_symbol | \n",
+ " tissue | \n",
+ " ccc_coef | \n",
+ " pearson_coef | \n",
+ " spearman_coef | \n",
+ " ccc_pvalue | \n",
+ " pearson_pvalue | \n",
+ " spearman_pvalue | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " ENSG00000147050.14 | \n",
+ " ENSG00000183878.15 | \n",
+ " KDM6A | \n",
+ " UTY | \n",
+ " colon_transverse | \n",
+ " 0.336727 | \n",
+ " -0.517899 | \n",
+ " -0.408343 | \n",
+ " 9.999990e-07 | \n",
+ " 3.063714e-29 | \n",
+ " 9.539164e-18 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " ENSG00000147050.14 | \n",
+ " ENSG00000183878.15 | \n",
+ " KDM6A | \n",
+ " UTY | \n",
+ " brain_amygdala | \n",
+ " 0.280524 | \n",
+ " 0.037541 | \n",
+ " 0.147571 | \n",
+ " 9.999990e-07 | \n",
+ " 6.461089e-01 | \n",
+ " 6.963023e-02 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " ENSG00000147050.14 | \n",
+ " ENSG00000183878.15 | \n",
+ " KDM6A | \n",
+ " UTY | \n",
+ " artery_coronary | \n",
+ " 0.274554 | \n",
+ " -0.413862 | \n",
+ " -0.391764 | \n",
+ " 9.999990e-07 | \n",
+ " 2.389970e-11 | \n",
+ " 3.159321e-10 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " ENSG00000147050.14 | \n",
+ " ENSG00000183878.15 | \n",
+ " KDM6A | \n",
+ " UTY | \n",
+ " artery_aorta | \n",
+ " 0.429771 | \n",
+ " -0.485788 | \n",
+ " -0.363510 | \n",
+ " 9.999990e-07 | \n",
+ " 5.775754e-27 | \n",
+ " 6.092383e-15 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " ENSG00000147050.14 | \n",
+ " ENSG00000183878.15 | \n",
+ " KDM6A | \n",
+ " UTY | \n",
+ " adrenal_gland | \n",
+ " 0.260197 | \n",
+ " -0.459190 | \n",
+ " -0.354190 | \n",
+ " 9.999990e-07 | \n",
+ " 7.334489e-15 | \n",
+ " 4.847677e-09 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " gene0_id gene1_id gene0_symbol gene1_symbol \\\n",
+ "0 ENSG00000147050.14 ENSG00000183878.15 KDM6A UTY \n",
+ "1 ENSG00000147050.14 ENSG00000183878.15 KDM6A UTY \n",
+ "2 ENSG00000147050.14 ENSG00000183878.15 KDM6A UTY \n",
+ "3 ENSG00000147050.14 ENSG00000183878.15 KDM6A UTY \n",
+ "4 ENSG00000147050.14 ENSG00000183878.15 KDM6A UTY \n",
+ "\n",
+ " tissue ccc_coef pearson_coef spearman_coef ccc_pvalue \\\n",
+ "0 colon_transverse 0.336727 -0.517899 -0.408343 9.999990e-07 \n",
+ "1 brain_amygdala 0.280524 0.037541 0.147571 9.999990e-07 \n",
+ "2 artery_coronary 0.274554 -0.413862 -0.391764 9.999990e-07 \n",
+ "3 artery_aorta 0.429771 -0.485788 -0.363510 9.999990e-07 \n",
+ "4 adrenal_gland 0.260197 -0.459190 -0.354190 9.999990e-07 \n",
+ "\n",
+ " pearson_pvalue spearman_pvalue \n",
+ "0 3.063714e-29 9.539164e-18 \n",
+ "1 6.461089e-01 6.963023e-02 \n",
+ "2 2.389970e-11 3.159321e-10 \n",
+ "3 5.775754e-27 6.092383e-15 \n",
+ "4 7.334489e-15 4.847677e-09 "
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "id": "4abcf3da-e636-4f0f-9f65-7ed5d1b96fff",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:26.516267Z",
+ "iopub.status.busy": "2024-01-05T18:03:26.516109Z",
+ "iopub.status.idle": "2024-01-05T18:03:26.518879Z",
+ "shell.execute_reply": "2024-01-05T18:03:26.518376Z"
+ },
+ "papermill": {
+ "duration": 0.010665,
+ "end_time": "2024-01-05T18:03:26.519705",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.509040",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "df_list.append(df)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "89e7bc77-29a4-462f-8621-438079de42a6",
+ "metadata": {
+ "papermill": {
+ "duration": 0.00651,
+ "end_time": "2024-01-05T18:03:26.532866",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.526356",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "source": [
+ "# KDM6A - DDX3Y"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "id": "2d44475c-19cc-475f-86bd-0830b8ae2384",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:26.546966Z",
+ "iopub.status.busy": "2024-01-05T18:03:26.546566Z",
+ "iopub.status.idle": "2024-01-05T18:03:26.549884Z",
+ "shell.execute_reply": "2024-01-05T18:03:26.549382Z"
+ },
+ "papermill": {
+ "duration": 0.011236,
+ "end_time": "2024-01-05T18:03:26.550691",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.539455",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "gene0_id, gene1_id = \"ENSG00000147050.14\", \"ENSG00000067048.16\"\n",
+ "gene0_symbol, gene1_symbol = \"KDM6A\", \"DDX3Y\"\n",
+ "\n",
+ "assert gene_map[gene0_id] == gene0_symbol\n",
+ "assert gene_map[gene1_id] == gene1_symbol"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "id": "8ec7ce72-8e84-4c3d-a3be-22fa4b9498fc",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:26.564774Z",
+ "iopub.status.busy": "2024-01-05T18:03:26.564397Z",
+ "iopub.status.idle": "2024-01-05T18:03:26.568343Z",
+ "shell.execute_reply": "2024-01-05T18:03:26.567846Z"
+ },
+ "papermill": {
+ "duration": 0.011848,
+ "end_time": "2024-01-05T18:03:26.569175",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.557327",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "PosixPath('/opt/data/results/gtex_v8/other_tissues/kdm6a_vs_ddx3y')"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "GENE_PAIR_INPUT_DIR = INPUT_DIR / f\"{gene0_symbol.lower()}_vs_{gene1_symbol.lower()}\"\n",
+ "display(GENE_PAIR_INPUT_DIR)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "80011d74-4e2e-488e-a77d-29f7458dedf1",
+ "metadata": {
+ "papermill": {
+ "duration": 0.006649,
+ "end_time": "2024-01-05T18:03:26.582553",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.575904",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "source": [
+ "## Correlation values"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "id": "9846f56e-1b62-43fa-9dfe-38638574a3d7",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:26.596940Z",
+ "iopub.status.busy": "2024-01-05T18:03:26.596547Z",
+ "iopub.status.idle": "2024-01-05T18:03:26.600610Z",
+ "shell.execute_reply": "2024-01-05T18:03:26.600105Z"
+ },
+ "papermill": {
+ "duration": 0.012115,
+ "end_time": "2024-01-05T18:03:26.601450",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.589335",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "res_all = pd.read_pickle(GENE_PAIR_INPUT_DIR / \"coef_values.pkl\").rename(\n",
+ " columns={\n",
+ " \"cm\": \"ccc_coef\",\n",
+ " \"pearson\": \"pearson_coef\",\n",
+ " \"spearman\": \"spearman_coef\",\n",
+ " }\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "id": "0778f0f4-a292-4f48-811d-17813cbde3bf",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:26.615758Z",
+ "iopub.status.busy": "2024-01-05T18:03:26.615431Z",
+ "iopub.status.idle": "2024-01-05T18:03:26.619215Z",
+ "shell.execute_reply": "2024-01-05T18:03:26.618721Z"
+ },
+ "papermill": {
+ "duration": 0.011815,
+ "end_time": "2024-01-05T18:03:26.620038",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.608223",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(50, 3)"
+ ]
+ },
+ "execution_count": 29,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "res_all.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "id": "85b140ad-e4c8-41ba-8628-e7cd9d768228",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:26.634600Z",
+ "iopub.status.busy": "2024-01-05T18:03:26.634188Z",
+ "iopub.status.idle": "2024-01-05T18:03:26.641032Z",
+ "shell.execute_reply": "2024-01-05T18:03:26.640645Z"
+ },
+ "papermill": {
+ "duration": 0.014937,
+ "end_time": "2024-01-05T18:03:26.641804",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.626867",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ccc_coef | \n",
+ " pearson_coef | \n",
+ " spearman_coef | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " colon_transverse | \n",
+ " 0.280814 | \n",
+ " -0.393881 | \n",
+ " -0.465898 | \n",
+ "
\n",
+ " \n",
+ " brain_amygdala | \n",
+ " 0.305677 | \n",
+ " 0.038732 | \n",
+ " 0.154331 | \n",
+ "
\n",
+ " \n",
+ " artery_coronary | \n",
+ " 0.242720 | \n",
+ " -0.487610 | \n",
+ " -0.426164 | \n",
+ "
\n",
+ " \n",
+ " artery_aorta | \n",
+ " 0.381970 | \n",
+ " -0.579236 | \n",
+ " -0.409761 | \n",
+ "
\n",
+ " \n",
+ " adrenal_gland | \n",
+ " 0.188929 | \n",
+ " -0.489400 | \n",
+ " -0.418784 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ccc_coef pearson_coef spearman_coef\n",
+ "colon_transverse 0.280814 -0.393881 -0.465898\n",
+ "brain_amygdala 0.305677 0.038732 0.154331\n",
+ "artery_coronary 0.242720 -0.487610 -0.426164\n",
+ "artery_aorta 0.381970 -0.579236 -0.409761\n",
+ "adrenal_gland 0.188929 -0.489400 -0.418784"
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "res_all.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2800d481-53ad-4706-9a5c-a28d5a1a0b70",
+ "metadata": {
+ "papermill": {
+ "duration": 0.006796,
+ "end_time": "2024-01-05T18:03:26.655533",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.648737",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "source": [
+ "## P-values"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "id": "6478e953-b946-4eac-9101-237a18f1d6c6",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:26.670427Z",
+ "iopub.status.busy": "2024-01-05T18:03:26.670068Z",
+ "iopub.status.idle": "2024-01-05T18:03:26.674128Z",
+ "shell.execute_reply": "2024-01-05T18:03:26.673730Z"
+ },
+ "papermill": {
+ "duration": 0.01242,
+ "end_time": "2024-01-05T18:03:26.674897",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.662477",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "res_pval_all = pd.read_pickle(GENE_PAIR_INPUT_DIR / \"coef_pvalues.pkl\").rename(\n",
+ " columns={\n",
+ " \"cm\": \"ccc_pvalue\",\n",
+ " \"pearson\": \"pearson_pvalue\",\n",
+ " \"spearman\": \"spearman_pvalue\",\n",
+ " }\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "id": "16f10b8e-9233-4f68-ad90-76accc6dc760",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:26.689596Z",
+ "iopub.status.busy": "2024-01-05T18:03:26.689276Z",
+ "iopub.status.idle": "2024-01-05T18:03:26.692706Z",
+ "shell.execute_reply": "2024-01-05T18:03:26.692311Z"
+ },
+ "papermill": {
+ "duration": 0.011632,
+ "end_time": "2024-01-05T18:03:26.693485",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.681853",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(50, 3)"
+ ]
+ },
+ "execution_count": 32,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "res_pval_all.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "id": "4226d1a6-94fa-463a-8d4a-539cdf667028",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:26.708163Z",
+ "iopub.status.busy": "2024-01-05T18:03:26.707850Z",
+ "iopub.status.idle": "2024-01-05T18:03:26.715232Z",
+ "shell.execute_reply": "2024-01-05T18:03:26.714850Z"
+ },
+ "papermill": {
+ "duration": 0.015561,
+ "end_time": "2024-01-05T18:03:26.716006",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.700445",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ccc_pvalue | \n",
+ " pearson_pvalue | \n",
+ " spearman_pvalue | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " colon_transverse | \n",
+ " 9.999990e-07 | \n",
+ " 1.613504e-16 | \n",
+ " 2.880714e-23 | \n",
+ "
\n",
+ " \n",
+ " brain_amygdala | \n",
+ " 9.999990e-07 | \n",
+ " 6.356755e-01 | \n",
+ " 5.764275e-02 | \n",
+ "
\n",
+ " \n",
+ " artery_coronary | \n",
+ " 9.999990e-07 | \n",
+ " 9.731623e-16 | \n",
+ " 5.220895e-12 | \n",
+ "
\n",
+ " \n",
+ " artery_aorta | \n",
+ " 9.999990e-07 | \n",
+ " 4.513966e-40 | \n",
+ " 6.380372e-19 | \n",
+ "
\n",
+ " \n",
+ " adrenal_gland | \n",
+ " 9.999990e-07 | \n",
+ " 6.058615e-17 | \n",
+ " 2.230048e-12 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ccc_pvalue pearson_pvalue spearman_pvalue\n",
+ "colon_transverse 9.999990e-07 1.613504e-16 2.880714e-23\n",
+ "brain_amygdala 9.999990e-07 6.356755e-01 5.764275e-02\n",
+ "artery_coronary 9.999990e-07 9.731623e-16 5.220895e-12\n",
+ "artery_aorta 9.999990e-07 4.513966e-40 6.380372e-19\n",
+ "adrenal_gland 9.999990e-07 6.058615e-17 2.230048e-12"
+ ]
+ },
+ "execution_count": 33,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "res_pval_all.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "696a2c33-09cc-4114-9457-56491a120d78",
+ "metadata": {
+ "papermill": {
+ "duration": 0.007042,
+ "end_time": "2024-01-05T18:03:26.730163",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.723121",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "source": [
+ "## Combine"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "id": "082566e1-c1c7-41a4-b7ce-48c8023cfe91",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:26.744980Z",
+ "iopub.status.busy": "2024-01-05T18:03:26.744832Z",
+ "iopub.status.idle": "2024-01-05T18:03:26.749668Z",
+ "shell.execute_reply": "2024-01-05T18:03:26.749264Z"
+ },
+ "papermill": {
+ "duration": 0.013207,
+ "end_time": "2024-01-05T18:03:26.750429",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.737222",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "df = res_all.join(res_pval_all, how=\"inner\").rename_axis(\"tissue\").reset_index()\n",
+ "assert df.shape[0] == res_all.shape[0]\n",
+ "assert df.shape[0] == res_pval_all.shape[0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "id": "f375b8ab-e1c7-46d4-9ee8-eea4a4786af4",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:26.765567Z",
+ "iopub.status.busy": "2024-01-05T18:03:26.765246Z",
+ "iopub.status.idle": "2024-01-05T18:03:26.769822Z",
+ "shell.execute_reply": "2024-01-05T18:03:26.769416Z"
+ },
+ "papermill": {
+ "duration": 0.012933,
+ "end_time": "2024-01-05T18:03:26.770586",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.757653",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "df.insert(0, \"gene0_id\", gene0_id)\n",
+ "df.insert(1, \"gene1_id\", gene1_id)\n",
+ "df.insert(2, \"gene0_symbol\", gene0_symbol)\n",
+ "df.insert(3, \"gene1_symbol\", gene1_symbol)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "id": "2cbc5f21-af66-4665-acb2-c7f18a484d01",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:26.785491Z",
+ "iopub.status.busy": "2024-01-05T18:03:26.785344Z",
+ "iopub.status.idle": "2024-01-05T18:03:26.788707Z",
+ "shell.execute_reply": "2024-01-05T18:03:26.788310Z"
+ },
+ "papermill": {
+ "duration": 0.011692,
+ "end_time": "2024-01-05T18:03:26.789488",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.777796",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(50, 11)"
+ ]
+ },
+ "execution_count": 36,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "id": "e478f0df-9fd6-469a-a4cf-1c379f708d27",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:26.804743Z",
+ "iopub.status.busy": "2024-01-05T18:03:26.804418Z",
+ "iopub.status.idle": "2024-01-05T18:03:26.816204Z",
+ "shell.execute_reply": "2024-01-05T18:03:26.815816Z"
+ },
+ "papermill": {
+ "duration": 0.020198,
+ "end_time": "2024-01-05T18:03:26.816990",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.796792",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " gene0_id | \n",
+ " gene1_id | \n",
+ " gene0_symbol | \n",
+ " gene1_symbol | \n",
+ " tissue | \n",
+ " ccc_coef | \n",
+ " pearson_coef | \n",
+ " spearman_coef | \n",
+ " ccc_pvalue | \n",
+ " pearson_pvalue | \n",
+ " spearman_pvalue | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " ENSG00000147050.14 | \n",
+ " ENSG00000067048.16 | \n",
+ " KDM6A | \n",
+ " DDX3Y | \n",
+ " colon_transverse | \n",
+ " 0.280814 | \n",
+ " -0.393881 | \n",
+ " -0.465898 | \n",
+ " 9.999990e-07 | \n",
+ " 1.613504e-16 | \n",
+ " 2.880714e-23 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " ENSG00000147050.14 | \n",
+ " ENSG00000067048.16 | \n",
+ " KDM6A | \n",
+ " DDX3Y | \n",
+ " brain_amygdala | \n",
+ " 0.305677 | \n",
+ " 0.038732 | \n",
+ " 0.154331 | \n",
+ " 9.999990e-07 | \n",
+ " 6.356755e-01 | \n",
+ " 5.764275e-02 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " ENSG00000147050.14 | \n",
+ " ENSG00000067048.16 | \n",
+ " KDM6A | \n",
+ " DDX3Y | \n",
+ " artery_coronary | \n",
+ " 0.242720 | \n",
+ " -0.487610 | \n",
+ " -0.426164 | \n",
+ " 9.999990e-07 | \n",
+ " 9.731623e-16 | \n",
+ " 5.220895e-12 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " ENSG00000147050.14 | \n",
+ " ENSG00000067048.16 | \n",
+ " KDM6A | \n",
+ " DDX3Y | \n",
+ " artery_aorta | \n",
+ " 0.381970 | \n",
+ " -0.579236 | \n",
+ " -0.409761 | \n",
+ " 9.999990e-07 | \n",
+ " 4.513966e-40 | \n",
+ " 6.380372e-19 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " ENSG00000147050.14 | \n",
+ " ENSG00000067048.16 | \n",
+ " KDM6A | \n",
+ " DDX3Y | \n",
+ " adrenal_gland | \n",
+ " 0.188929 | \n",
+ " -0.489400 | \n",
+ " -0.418784 | \n",
+ " 9.999990e-07 | \n",
+ " 6.058615e-17 | \n",
+ " 2.230048e-12 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " gene0_id gene1_id gene0_symbol gene1_symbol \\\n",
+ "0 ENSG00000147050.14 ENSG00000067048.16 KDM6A DDX3Y \n",
+ "1 ENSG00000147050.14 ENSG00000067048.16 KDM6A DDX3Y \n",
+ "2 ENSG00000147050.14 ENSG00000067048.16 KDM6A DDX3Y \n",
+ "3 ENSG00000147050.14 ENSG00000067048.16 KDM6A DDX3Y \n",
+ "4 ENSG00000147050.14 ENSG00000067048.16 KDM6A DDX3Y \n",
+ "\n",
+ " tissue ccc_coef pearson_coef spearman_coef ccc_pvalue \\\n",
+ "0 colon_transverse 0.280814 -0.393881 -0.465898 9.999990e-07 \n",
+ "1 brain_amygdala 0.305677 0.038732 0.154331 9.999990e-07 \n",
+ "2 artery_coronary 0.242720 -0.487610 -0.426164 9.999990e-07 \n",
+ "3 artery_aorta 0.381970 -0.579236 -0.409761 9.999990e-07 \n",
+ "4 adrenal_gland 0.188929 -0.489400 -0.418784 9.999990e-07 \n",
+ "\n",
+ " pearson_pvalue spearman_pvalue \n",
+ "0 1.613504e-16 2.880714e-23 \n",
+ "1 6.356755e-01 5.764275e-02 \n",
+ "2 9.731623e-16 5.220895e-12 \n",
+ "3 4.513966e-40 6.380372e-19 \n",
+ "4 6.058615e-17 2.230048e-12 "
+ ]
+ },
+ "execution_count": 37,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "id": "d22d7b22-c73e-47eb-84d9-408292753242",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:26.832452Z",
+ "iopub.status.busy": "2024-01-05T18:03:26.832158Z",
+ "iopub.status.idle": "2024-01-05T18:03:26.834760Z",
+ "shell.execute_reply": "2024-01-05T18:03:26.834328Z"
+ },
+ "papermill": {
+ "duration": 0.011232,
+ "end_time": "2024-01-05T18:03:26.835539",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.824307",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "df_list.append(df)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fa148b71-a756-4521-b935-74527b4ad649",
+ "metadata": {
+ "papermill": {
+ "duration": 0.007239,
+ "end_time": "2024-01-05T18:03:26.850166",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.842927",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "source": [
+ "# Combine"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "id": "48e3dd9d-0f45-4480-a1bc-b6f829048ce8",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:26.865719Z",
+ "iopub.status.busy": "2024-01-05T18:03:26.865387Z",
+ "iopub.status.idle": "2024-01-05T18:03:26.869247Z",
+ "shell.execute_reply": "2024-01-05T18:03:26.868799Z"
+ },
+ "papermill": {
+ "duration": 0.012543,
+ "end_time": "2024-01-05T18:03:26.870041",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.857498",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "df_final = pd.concat(df_list, ignore_index=True, axis=0)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "id": "d1299505-603f-4f67-99b9-e77911878253",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:26.885614Z",
+ "iopub.status.busy": "2024-01-05T18:03:26.885287Z",
+ "iopub.status.idle": "2024-01-05T18:03:26.889540Z",
+ "shell.execute_reply": "2024-01-05T18:03:26.889128Z"
+ },
+ "papermill": {
+ "duration": 0.012858,
+ "end_time": "2024-01-05T18:03:26.890318",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.877460",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(100, 11)"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "assert df_final.shape[0] == sum(d.shape[0] for d in df_list)\n",
+ "for d in df_list:\n",
+ " assert df_final.shape[1] == d.shape[1]\n",
+ "display(df_final.shape)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "id": "c257fb94-a76d-4739-b5a1-a92be639be14",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:26.906082Z",
+ "iopub.status.busy": "2024-01-05T18:03:26.905791Z",
+ "iopub.status.idle": "2024-01-05T18:03:26.921947Z",
+ "shell.execute_reply": "2024-01-05T18:03:26.921538Z"
+ },
+ "papermill": {
+ "duration": 0.024883,
+ "end_time": "2024-01-05T18:03:26.922717",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.897834",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " gene0_id | \n",
+ " gene1_id | \n",
+ " gene0_symbol | \n",
+ " gene1_symbol | \n",
+ " tissue | \n",
+ " ccc_coef | \n",
+ " pearson_coef | \n",
+ " spearman_coef | \n",
+ " ccc_pvalue | \n",
+ " pearson_pvalue | \n",
+ " spearman_pvalue | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " ENSG00000147050.14 | \n",
+ " ENSG00000183878.15 | \n",
+ " KDM6A | \n",
+ " UTY | \n",
+ " colon_transverse | \n",
+ " 0.336727 | \n",
+ " -0.517899 | \n",
+ " -0.408343 | \n",
+ " 9.999990e-07 | \n",
+ " 3.063714e-29 | \n",
+ " 9.539164e-18 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " ENSG00000147050.14 | \n",
+ " ENSG00000183878.15 | \n",
+ " KDM6A | \n",
+ " UTY | \n",
+ " brain_amygdala | \n",
+ " 0.280524 | \n",
+ " 0.037541 | \n",
+ " 0.147571 | \n",
+ " 9.999990e-07 | \n",
+ " 6.461089e-01 | \n",
+ " 6.963023e-02 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " ENSG00000147050.14 | \n",
+ " ENSG00000183878.15 | \n",
+ " KDM6A | \n",
+ " UTY | \n",
+ " artery_coronary | \n",
+ " 0.274554 | \n",
+ " -0.413862 | \n",
+ " -0.391764 | \n",
+ " 9.999990e-07 | \n",
+ " 2.389970e-11 | \n",
+ " 3.159321e-10 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " ENSG00000147050.14 | \n",
+ " ENSG00000183878.15 | \n",
+ " KDM6A | \n",
+ " UTY | \n",
+ " artery_aorta | \n",
+ " 0.429771 | \n",
+ " -0.485788 | \n",
+ " -0.363510 | \n",
+ " 9.999990e-07 | \n",
+ " 5.775754e-27 | \n",
+ " 6.092383e-15 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " ENSG00000147050.14 | \n",
+ " ENSG00000183878.15 | \n",
+ " KDM6A | \n",
+ " UTY | \n",
+ " adrenal_gland | \n",
+ " 0.260197 | \n",
+ " -0.459190 | \n",
+ " -0.354190 | \n",
+ " 9.999990e-07 | \n",
+ " 7.334489e-15 | \n",
+ " 4.847677e-09 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 95 | \n",
+ " ENSG00000147050.14 | \n",
+ " ENSG00000067048.16 | \n",
+ " KDM6A | \n",
+ " DDX3Y | \n",
+ " artery_tibial | \n",
+ " 0.298440 | \n",
+ " -0.617718 | \n",
+ " -0.387765 | \n",
+ " 9.999990e-07 | \n",
+ " 5.248493e-71 | \n",
+ " 3.246061e-25 | \n",
+ "
\n",
+ " \n",
+ " 96 | \n",
+ " ENSG00000147050.14 | \n",
+ " ENSG00000067048.16 | \n",
+ " KDM6A | \n",
+ " DDX3Y | \n",
+ " brain_hypothalamus | \n",
+ " 0.232632 | \n",
+ " 0.118391 | \n",
+ " 0.251149 | \n",
+ " 9.999990e-07 | \n",
+ " 9.332407e-02 | \n",
+ " 3.117929e-04 | \n",
+ "
\n",
+ " \n",
+ " 97 | \n",
+ " ENSG00000147050.14 | \n",
+ " ENSG00000067048.16 | \n",
+ " KDM6A | \n",
+ " DDX3Y | \n",
+ " lung | \n",
+ " 0.289771 | \n",
+ " -0.252442 | \n",
+ " -0.224470 | \n",
+ " 9.999990e-07 | \n",
+ " 7.462864e-10 | \n",
+ " 4.905714e-08 | \n",
+ "
\n",
+ " \n",
+ " 98 | \n",
+ " ENSG00000147050.14 | \n",
+ " ENSG00000067048.16 | \n",
+ " KDM6A | \n",
+ " DDX3Y | \n",
+ " brain_cerebellum | \n",
+ " 0.219113 | \n",
+ " -0.106469 | \n",
+ " 0.034902 | \n",
+ " 9.999990e-07 | \n",
+ " 9.916004e-02 | \n",
+ " 5.897648e-01 | \n",
+ "
\n",
+ " \n",
+ " 99 | \n",
+ " ENSG00000147050.14 | \n",
+ " ENSG00000067048.16 | \n",
+ " KDM6A | \n",
+ " DDX3Y | \n",
+ " brain_hippocampus | \n",
+ " 0.218583 | \n",
+ " 0.192953 | \n",
+ " 0.297004 | \n",
+ " 9.999990e-07 | \n",
+ " 6.596327e-03 | \n",
+ " 2.253662e-05 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
100 rows × 11 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " gene0_id gene1_id gene0_symbol gene1_symbol \\\n",
+ "0 ENSG00000147050.14 ENSG00000183878.15 KDM6A UTY \n",
+ "1 ENSG00000147050.14 ENSG00000183878.15 KDM6A UTY \n",
+ "2 ENSG00000147050.14 ENSG00000183878.15 KDM6A UTY \n",
+ "3 ENSG00000147050.14 ENSG00000183878.15 KDM6A UTY \n",
+ "4 ENSG00000147050.14 ENSG00000183878.15 KDM6A UTY \n",
+ ".. ... ... ... ... \n",
+ "95 ENSG00000147050.14 ENSG00000067048.16 KDM6A DDX3Y \n",
+ "96 ENSG00000147050.14 ENSG00000067048.16 KDM6A DDX3Y \n",
+ "97 ENSG00000147050.14 ENSG00000067048.16 KDM6A DDX3Y \n",
+ "98 ENSG00000147050.14 ENSG00000067048.16 KDM6A DDX3Y \n",
+ "99 ENSG00000147050.14 ENSG00000067048.16 KDM6A DDX3Y \n",
+ "\n",
+ " tissue ccc_coef pearson_coef spearman_coef ccc_pvalue \\\n",
+ "0 colon_transverse 0.336727 -0.517899 -0.408343 9.999990e-07 \n",
+ "1 brain_amygdala 0.280524 0.037541 0.147571 9.999990e-07 \n",
+ "2 artery_coronary 0.274554 -0.413862 -0.391764 9.999990e-07 \n",
+ "3 artery_aorta 0.429771 -0.485788 -0.363510 9.999990e-07 \n",
+ "4 adrenal_gland 0.260197 -0.459190 -0.354190 9.999990e-07 \n",
+ ".. ... ... ... ... ... \n",
+ "95 artery_tibial 0.298440 -0.617718 -0.387765 9.999990e-07 \n",
+ "96 brain_hypothalamus 0.232632 0.118391 0.251149 9.999990e-07 \n",
+ "97 lung 0.289771 -0.252442 -0.224470 9.999990e-07 \n",
+ "98 brain_cerebellum 0.219113 -0.106469 0.034902 9.999990e-07 \n",
+ "99 brain_hippocampus 0.218583 0.192953 0.297004 9.999990e-07 \n",
+ "\n",
+ " pearson_pvalue spearman_pvalue \n",
+ "0 3.063714e-29 9.539164e-18 \n",
+ "1 6.461089e-01 6.963023e-02 \n",
+ "2 2.389970e-11 3.159321e-10 \n",
+ "3 5.775754e-27 6.092383e-15 \n",
+ "4 7.334489e-15 4.847677e-09 \n",
+ ".. ... ... \n",
+ "95 5.248493e-71 3.246061e-25 \n",
+ "96 9.332407e-02 3.117929e-04 \n",
+ "97 7.462864e-10 4.905714e-08 \n",
+ "98 9.916004e-02 5.897648e-01 \n",
+ "99 6.596327e-03 2.253662e-05 \n",
+ "\n",
+ "[100 rows x 11 columns]"
+ ]
+ },
+ "execution_count": 41,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_final"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "74b046a0-52ed-4134-a572-8aa8dc540052",
+ "metadata": {
+ "papermill": {
+ "duration": 0.007531,
+ "end_time": "2024-01-05T18:03:26.937944",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.930413",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "source": [
+ "# Save"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "id": "dc6d6873-cc91-4fb9-b4d6-9b9607f0100e",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:26.954259Z",
+ "iopub.status.busy": "2024-01-05T18:03:26.953833Z",
+ "iopub.status.idle": "2024-01-05T18:03:26.956359Z",
+ "shell.execute_reply": "2024-01-05T18:03:26.955974Z"
+ },
+ "papermill": {
+ "duration": 0.011526,
+ "end_time": "2024-01-05T18:03:26.957179",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.945653",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "data = df_final"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 43,
+ "id": "0b24557a-e3e9-4527-8551-56b7dee86acc",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:26.973456Z",
+ "iopub.status.busy": "2024-01-05T18:03:26.973142Z",
+ "iopub.status.idle": "2024-01-05T18:03:26.977510Z",
+ "shell.execute_reply": "2024-01-05T18:03:26.977134Z"
+ },
+ "papermill": {
+ "duration": 0.013278,
+ "end_time": "2024-01-05T18:03:26.978284",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.965006",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "dtype('int64')"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "RangeIndex(start=0, stop=100, step=1)"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "display(data.index.dtype)\n",
+ "display(data.index)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "id": "9773177e-a637-41cb-b66b-42a4ffd15a33",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:26.994753Z",
+ "iopub.status.busy": "2024-01-05T18:03:26.994434Z",
+ "iopub.status.idle": "2024-01-05T18:03:26.997355Z",
+ "shell.execute_reply": "2024-01-05T18:03:26.996935Z"
+ },
+ "papermill": {
+ "duration": 0.012005,
+ "end_time": "2024-01-05T18:03:26.998150",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:26.986145",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# reset index to avoid problems with MultiIndex in Pandas\n",
+ "if isinstance(data.index, pd.MultiIndex):\n",
+ " display(\"MultiIndex\")\n",
+ " data = data.reset_index()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e5d04700-54cb-40c5-be15-39bc76eafd2f",
+ "metadata": {
+ "papermill": {
+ "duration": 0.007745,
+ "end_time": "2024-01-05T18:03:27.013744",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:27.005999",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "source": [
+ "## Pickle"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "id": "cda6751b-a78d-4164-b678-cd329a895011",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:27.030036Z",
+ "iopub.status.busy": "2024-01-05T18:03:27.029886Z",
+ "iopub.status.idle": "2024-01-05T18:03:27.033470Z",
+ "shell.execute_reply": "2024-01-05T18:03:27.033074Z"
+ },
+ "papermill": {
+ "duration": 0.012664,
+ "end_time": "2024-01-05T18:03:27.034245",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:27.021581",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "data.to_pickle(OUTPUT_DIR / f\"{OUTPUT_FILENAME}.pkl.gz\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "025e09a9-bd4f-4bc5-a69d-248d7ea8aacb",
+ "metadata": {
+ "papermill": {
+ "duration": 0.00784,
+ "end_time": "2024-01-05T18:03:27.050000",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:27.042160",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "source": [
+ "## RDS"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 46,
+ "id": "c850c34d-ccf3-49ef-b737-0cf040ca77fb",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:27.066436Z",
+ "iopub.status.busy": "2024-01-05T18:03:27.066146Z",
+ "iopub.status.idle": "2024-01-05T18:03:27.069649Z",
+ "shell.execute_reply": "2024-01-05T18:03:27.069273Z"
+ },
+ "papermill": {
+ "duration": 0.012612,
+ "end_time": "2024-01-05T18:03:27.070415",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:27.057803",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "PosixPath('/opt/data/supplementary_material/Supplementary_File_03-Gene_pairs_correlations_all_GTEx_tissues.rds')"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "output_file = OUTPUT_DIR / f\"{OUTPUT_FILENAME}.rds\"\n",
+ "display(output_file)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "id": "33c565ae-2710-4816-91d1-4376ee36a8a3",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:27.086800Z",
+ "iopub.status.busy": "2024-01-05T18:03:27.086653Z",
+ "iopub.status.idle": "2024-01-05T18:03:27.100502Z",
+ "shell.execute_reply": "2024-01-05T18:03:27.100060Z"
+ },
+ "papermill": {
+ "duration": 0.023,
+ "end_time": "2024-01-05T18:03:27.101293",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:27.078293",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "with localconverter(ro.default_converter + pandas2ri.converter):\n",
+ " data_r = ro.conversion.py2rpy(data)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "id": "91cfc710-343a-410d-9782-91eb5ca4623e",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:27.117802Z",
+ "iopub.status.busy": "2024-01-05T18:03:27.117649Z",
+ "iopub.status.idle": "2024-01-05T18:03:27.127159Z",
+ "shell.execute_reply": "2024-01-05T18:03:27.126739Z"
+ },
+ "papermill": {
+ "duration": 0.018722,
+ "end_time": "2024-01-05T18:03:27.127926",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:27.109204",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " R/rpy2 DataFrame (100 x 11)\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " gene0_id | \n",
+ " \n",
+ " gene1_id | \n",
+ " \n",
+ " gene0_symbol | \n",
+ " \n",
+ " ... | \n",
+ " \n",
+ " ccc_pvalue | \n",
+ " \n",
+ " pearson_pvalue | \n",
+ " \n",
+ " spearman_pvalue | \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 'ENSG0000...\n",
+ " | \n",
+ " \n",
+ " \n",
+ " 'ENSG0000...\n",
+ " | \n",
+ " \n",
+ " \n",
+ " 'KDM6A'\n",
+ " | \n",
+ " \n",
+ " \n",
+ " ...\n",
+ " | \n",
+ " \n",
+ " \n",
+ " 0.000001\n",
+ " | \n",
+ " \n",
+ " \n",
+ " 0.000000\n",
+ " | \n",
+ " \n",
+ " \n",
+ " 0.000000\n",
+ " | \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 'ENSG0000...\n",
+ " | \n",
+ " \n",
+ " \n",
+ " 'ENSG0000...\n",
+ " | \n",
+ " \n",
+ " \n",
+ " 'KDM6A'\n",
+ " | \n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 0.000001\n",
+ " | \n",
+ " \n",
+ " \n",
+ " 0.646109\n",
+ " | \n",
+ " \n",
+ " \n",
+ " 0.069630\n",
+ " | \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 'ENSG0000...\n",
+ " | \n",
+ " \n",
+ " \n",
+ " 'ENSG0000...\n",
+ " | \n",
+ " \n",
+ " \n",
+ " 'KDM6A'\n",
+ " | \n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 0.000001\n",
+ " | \n",
+ " \n",
+ " \n",
+ " 0.000000\n",
+ " | \n",
+ " \n",
+ " \n",
+ " 0.000000\n",
+ " | \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 'ENSG0000...\n",
+ " | \n",
+ " \n",
+ " \n",
+ " 'ENSG0000...\n",
+ " | \n",
+ " \n",
+ " \n",
+ " 'KDM6A'\n",
+ " | \n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 0.000001\n",
+ " | \n",
+ " \n",
+ " \n",
+ " 0.000000\n",
+ " | \n",
+ " \n",
+ " \n",
+ " 0.000000\n",
+ " | \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " ...\n",
+ " | \n",
+ " \n",
+ " \n",
+ " ...\n",
+ " | \n",
+ " \n",
+ " \n",
+ " ...\n",
+ " | \n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " ...\n",
+ " | \n",
+ " \n",
+ " \n",
+ " ...\n",
+ " | \n",
+ " \n",
+ " \n",
+ " ...\n",
+ " | \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 'ENSG0000...\n",
+ " | \n",
+ " \n",
+ " \n",
+ " 'ENSG0000...\n",
+ " | \n",
+ " \n",
+ " \n",
+ " 'KDM6A'\n",
+ " | \n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 0.000001\n",
+ " | \n",
+ " \n",
+ " \n",
+ " 0.093324\n",
+ " | \n",
+ " \n",
+ " \n",
+ " 0.000312\n",
+ " | \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 'ENSG0000...\n",
+ " | \n",
+ " \n",
+ " \n",
+ " 'ENSG0000...\n",
+ " | \n",
+ " \n",
+ " \n",
+ " 'KDM6A'\n",
+ " | \n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 0.000001\n",
+ " | \n",
+ " \n",
+ " \n",
+ " 0.000000\n",
+ " | \n",
+ " \n",
+ " \n",
+ " 0.000000\n",
+ " | \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 'ENSG0000...\n",
+ " | \n",
+ " \n",
+ " \n",
+ " 'ENSG0000...\n",
+ " | \n",
+ " \n",
+ " \n",
+ " 'KDM6A'\n",
+ " | \n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 0.000001\n",
+ " | \n",
+ " \n",
+ " \n",
+ " 0.099160\n",
+ " | \n",
+ " \n",
+ " \n",
+ " 0.589765\n",
+ " | \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 'ENSG0000...\n",
+ " | \n",
+ " \n",
+ " \n",
+ " 'ENSG0000...\n",
+ " | \n",
+ " \n",
+ " \n",
+ " 'KDM6A'\n",
+ " | \n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 0.000001\n",
+ " | \n",
+ " \n",
+ " \n",
+ " 0.006596\n",
+ " | \n",
+ " \n",
+ " \n",
+ " 0.000023\n",
+ " | \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ "
\n",
+ " "
+ ],
+ "text/plain": [
+ " [RTYPES.VECSXP]\n",
+ "R classes: ('data.frame',)\n",
+ "[StrSexpVe..., StrSexpVe..., StrSexpVe..., StrSexpVe..., ..., FloatSexp..., FloatSexp..., FloatSexp..., FloatSexp...]\n",
+ " gene0_id: \n",
+ " [RTYPES.STRSXP]\n",
+ "R classes: ('character',)\n",
+ "['ENSG0000..., 'ENSG0000..., 'ENSG0000..., 'ENSG0000..., ..., 'ENSG0000..., 'ENSG0000..., 'ENSG0000..., 'ENSG0000...]\n",
+ " gene1_id: \n",
+ " [RTYPES.STRSXP]\n",
+ "R classes: ('character',)\n",
+ "['ENSG0000..., 'ENSG0000..., 'ENSG0000..., 'ENSG0000..., ..., 'ENSG0000..., 'ENSG0000..., 'ENSG0000..., 'ENSG0000...]\n",
+ " gene0_symbol: \n",
+ " [RTYPES.STRSXP]\n",
+ "R classes: ('character',)\n",
+ "['KDM6A', 'KDM6A', 'KDM6A', 'KDM6A', ..., 'KDM6A', 'KDM6A', 'KDM6A', 'KDM6A']\n",
+ " gene1_symbol: \n",
+ " [RTYPES.STRSXP]\n",
+ "R classes: ('character',)\n",
+ "['UTY', 'UTY', 'UTY', 'UTY', ..., 'DDX3Y', 'DDX3Y', 'DDX3Y', 'DDX3Y']\n",
+ "...\n",
+ " ccc_coef: \n",
+ " [RTYPES.REALSXP]\n",
+ "R classes: ('numeric',)\n",
+ "[-0.408343, 0.147571, -0.391764, -0.363510, ..., 0.251149, -0.224470, 0.034902, 0.297004]\n",
+ " pearson_coef: \n",
+ " [RTYPES.REALSXP]\n",
+ "R classes: ('numeric',)\n",
+ "[0.000001, 0.000001, 0.000001, 0.000001, ..., 0.000001, 0.000001, 0.000001, 0.000001]\n",
+ " spearman_coef: \n",
+ " [RTYPES.REALSXP]\n",
+ "R classes: ('numeric',)\n",
+ "[0.000000, 0.646109, 0.000000, 0.000000, ..., 0.093324, 0.000000, 0.099160, 0.006596]\n",
+ " ccc_pvalue: \n",
+ " [RTYPES.REALSXP]\n",
+ "R classes: ('numeric',)\n",
+ "[0.000000, 0.069630, 0.000000, 0.000000, ..., 0.000312, 0.000000, 0.589765, 0.000023]"
+ ]
+ },
+ "execution_count": 48,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data_r"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "id": "a41d10b1-8cc3-4043-a8e5-9aa8b2bdd8b0",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:27.145238Z",
+ "iopub.status.busy": "2024-01-05T18:03:27.144949Z",
+ "iopub.status.idle": "2024-01-05T18:03:27.149680Z",
+ "shell.execute_reply": "2024-01-05T18:03:27.149298Z"
+ },
+ "papermill": {
+ "duration": 0.01432,
+ "end_time": "2024-01-05T18:03:27.150456",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:27.136136",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ " [RTYPES.NILSXP]"
+ ]
+ },
+ "execution_count": 49,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "saveRDS(data_r, str(output_file))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "id": "18a5dfc2-97dd-4460-9ff0-0b924556d804",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:27.168002Z",
+ "iopub.status.busy": "2024-01-05T18:03:27.167621Z",
+ "iopub.status.idle": "2024-01-05T18:03:27.170759Z",
+ "shell.execute_reply": "2024-01-05T18:03:27.170379Z"
+ },
+ "papermill": {
+ "duration": 0.012739,
+ "end_time": "2024-01-05T18:03:27.171524",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:27.158785",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# testing: load the rds file again\n",
+ "data_r = readRDS(str(output_file))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "id": "e70a4d0a-4439-47c6-b422-6ae58e0ea3da",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:27.189148Z",
+ "iopub.status.busy": "2024-01-05T18:03:27.188839Z",
+ "iopub.status.idle": "2024-01-05T18:03:27.197286Z",
+ "shell.execute_reply": "2024-01-05T18:03:27.196883Z"
+ },
+ "papermill": {
+ "duration": 0.018104,
+ "end_time": "2024-01-05T18:03:27.198077",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:27.179973",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "with localconverter(ro.default_converter + pandas2ri.converter):\n",
+ " data_again = ro.conversion.rpy2py(data_r)\n",
+ " data_again.index = data_again.index.astype(int)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 52,
+ "id": "6e423577-e429-4ffb-b5d5-01968026c1f6",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:27.215628Z",
+ "iopub.status.busy": "2024-01-05T18:03:27.215479Z",
+ "iopub.status.idle": "2024-01-05T18:03:27.219038Z",
+ "shell.execute_reply": "2024-01-05T18:03:27.218658Z"
+ },
+ "papermill": {
+ "duration": 0.013255,
+ "end_time": "2024-01-05T18:03:27.219800",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:27.206545",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(100, 11)"
+ ]
+ },
+ "execution_count": 52,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data_again.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "id": "2c7aa161-a499-4df3-aca0-2cbee5cbfa5d",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:27.237538Z",
+ "iopub.status.busy": "2024-01-05T18:03:27.237289Z",
+ "iopub.status.idle": "2024-01-05T18:03:27.248950Z",
+ "shell.execute_reply": "2024-01-05T18:03:27.248559Z"
+ },
+ "papermill": {
+ "duration": 0.02144,
+ "end_time": "2024-01-05T18:03:27.249765",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:27.228325",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " gene0_id | \n",
+ " gene1_id | \n",
+ " gene0_symbol | \n",
+ " gene1_symbol | \n",
+ " tissue | \n",
+ " ccc_coef | \n",
+ " pearson_coef | \n",
+ " spearman_coef | \n",
+ " ccc_pvalue | \n",
+ " pearson_pvalue | \n",
+ " spearman_pvalue | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " ENSG00000147050.14 | \n",
+ " ENSG00000183878.15 | \n",
+ " KDM6A | \n",
+ " UTY | \n",
+ " colon_transverse | \n",
+ " 0.336727 | \n",
+ " -0.517899 | \n",
+ " -0.408343 | \n",
+ " 9.999990e-07 | \n",
+ " 3.063714e-29 | \n",
+ " 9.539164e-18 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " ENSG00000147050.14 | \n",
+ " ENSG00000183878.15 | \n",
+ " KDM6A | \n",
+ " UTY | \n",
+ " brain_amygdala | \n",
+ " 0.280524 | \n",
+ " 0.037541 | \n",
+ " 0.147571 | \n",
+ " 9.999990e-07 | \n",
+ " 6.461089e-01 | \n",
+ " 6.963023e-02 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " ENSG00000147050.14 | \n",
+ " ENSG00000183878.15 | \n",
+ " KDM6A | \n",
+ " UTY | \n",
+ " artery_coronary | \n",
+ " 0.274554 | \n",
+ " -0.413862 | \n",
+ " -0.391764 | \n",
+ " 9.999990e-07 | \n",
+ " 2.389970e-11 | \n",
+ " 3.159321e-10 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " ENSG00000147050.14 | \n",
+ " ENSG00000183878.15 | \n",
+ " KDM6A | \n",
+ " UTY | \n",
+ " artery_aorta | \n",
+ " 0.429771 | \n",
+ " -0.485788 | \n",
+ " -0.363510 | \n",
+ " 9.999990e-07 | \n",
+ " 5.775754e-27 | \n",
+ " 6.092383e-15 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " ENSG00000147050.14 | \n",
+ " ENSG00000183878.15 | \n",
+ " KDM6A | \n",
+ " UTY | \n",
+ " adrenal_gland | \n",
+ " 0.260197 | \n",
+ " -0.459190 | \n",
+ " -0.354190 | \n",
+ " 9.999990e-07 | \n",
+ " 7.334489e-15 | \n",
+ " 4.847677e-09 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " gene0_id gene1_id gene0_symbol gene1_symbol \\\n",
+ "0 ENSG00000147050.14 ENSG00000183878.15 KDM6A UTY \n",
+ "1 ENSG00000147050.14 ENSG00000183878.15 KDM6A UTY \n",
+ "2 ENSG00000147050.14 ENSG00000183878.15 KDM6A UTY \n",
+ "3 ENSG00000147050.14 ENSG00000183878.15 KDM6A UTY \n",
+ "4 ENSG00000147050.14 ENSG00000183878.15 KDM6A UTY \n",
+ "\n",
+ " tissue ccc_coef pearson_coef spearman_coef ccc_pvalue \\\n",
+ "0 colon_transverse 0.336727 -0.517899 -0.408343 9.999990e-07 \n",
+ "1 brain_amygdala 0.280524 0.037541 0.147571 9.999990e-07 \n",
+ "2 artery_coronary 0.274554 -0.413862 -0.391764 9.999990e-07 \n",
+ "3 artery_aorta 0.429771 -0.485788 -0.363510 9.999990e-07 \n",
+ "4 adrenal_gland 0.260197 -0.459190 -0.354190 9.999990e-07 \n",
+ "\n",
+ " pearson_pvalue spearman_pvalue \n",
+ "0 3.063714e-29 9.539164e-18 \n",
+ "1 6.461089e-01 6.963023e-02 \n",
+ "2 2.389970e-11 3.159321e-10 \n",
+ "3 5.775754e-27 6.092383e-15 \n",
+ "4 7.334489e-15 4.847677e-09 "
+ ]
+ },
+ "execution_count": 53,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data_again.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 54,
+ "id": "efa7326e-69d6-4fa4-b6eb-5fa0b502cbb2",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:27.267845Z",
+ "iopub.status.busy": "2024-01-05T18:03:27.267559Z",
+ "iopub.status.idle": "2024-01-05T18:03:27.275644Z",
+ "shell.execute_reply": "2024-01-05T18:03:27.275249Z"
+ },
+ "papermill": {
+ "duration": 0.01797,
+ "end_time": "2024-01-05T18:03:27.276441",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:27.258471",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# testing\n",
+ "pd.testing.assert_frame_equal(\n",
+ " data,\n",
+ " data_again,\n",
+ " check_dtype=False,\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "50b97893-b49b-42a3-a798-3c94030ab0cf",
+ "metadata": {
+ "papermill": {
+ "duration": 0.008517,
+ "end_time": "2024-01-05T18:03:27.293584",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:27.285067",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "source": [
+ "## Text"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 55,
+ "id": "d548aff3-b850-4050-a21d-6f00b78c280b",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:27.311739Z",
+ "iopub.status.busy": "2024-01-05T18:03:27.311376Z",
+ "iopub.status.idle": "2024-01-05T18:03:27.314954Z",
+ "shell.execute_reply": "2024-01-05T18:03:27.314583Z"
+ },
+ "papermill": {
+ "duration": 0.0135,
+ "end_time": "2024-01-05T18:03:27.315737",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:27.302237",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "PosixPath('/opt/data/supplementary_material/Supplementary_File_03-Gene_pairs_correlations_all_GTEx_tissues.tsv')"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# tsv format\n",
+ "output_file = OUTPUT_DIR / f\"{OUTPUT_FILENAME}.tsv\"\n",
+ "display(output_file)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 56,
+ "id": "19a7cc7d-924b-49be-b11e-e782a601ba28",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:27.333879Z",
+ "iopub.status.busy": "2024-01-05T18:03:27.333557Z",
+ "iopub.status.idle": "2024-01-05T18:03:27.339776Z",
+ "shell.execute_reply": "2024-01-05T18:03:27.339388Z"
+ },
+ "papermill": {
+ "duration": 0.016214,
+ "end_time": "2024-01-05T18:03:27.340636",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:27.324422",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "data.to_csv(output_file, sep=\"\\t\", index=False, float_format=\"%.5e\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 57,
+ "id": "84321ddb-8e26-4d5e-93e1-7e1fdfaefefb",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:27.359032Z",
+ "iopub.status.busy": "2024-01-05T18:03:27.358610Z",
+ "iopub.status.idle": "2024-01-05T18:03:27.363727Z",
+ "shell.execute_reply": "2024-01-05T18:03:27.363337Z"
+ },
+ "papermill": {
+ "duration": 0.015122,
+ "end_time": "2024-01-05T18:03:27.364514",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:27.349392",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# testing\n",
+ "data2 = data # .copy()\n",
+ "# data2.index = list(range(0, data2.shape[0]))\n",
+ "\n",
+ "data_again = pd.read_csv(output_file, sep=\"\\t\", index_col=None)\n",
+ "# data_again.index = data_again.index.map(lambda x: f\"{x:.2f}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 58,
+ "id": "388715f6-edd4-42a4-b1df-2aad3f580d0b",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:27.382743Z",
+ "iopub.status.busy": "2024-01-05T18:03:27.382452Z",
+ "iopub.status.idle": "2024-01-05T18:03:27.385813Z",
+ "shell.execute_reply": "2024-01-05T18:03:27.385438Z"
+ },
+ "papermill": {
+ "duration": 0.013362,
+ "end_time": "2024-01-05T18:03:27.386608",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:27.373246",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(100, 11)"
+ ]
+ },
+ "execution_count": 58,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data_again.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 59,
+ "id": "42224f4a-523d-48f2-b78c-a131b35766bc",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:27.404941Z",
+ "iopub.status.busy": "2024-01-05T18:03:27.404797Z",
+ "iopub.status.idle": "2024-01-05T18:03:27.416217Z",
+ "shell.execute_reply": "2024-01-05T18:03:27.415827Z"
+ },
+ "papermill": {
+ "duration": 0.021585,
+ "end_time": "2024-01-05T18:03:27.417034",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:27.395449",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " gene0_id | \n",
+ " gene1_id | \n",
+ " gene0_symbol | \n",
+ " gene1_symbol | \n",
+ " tissue | \n",
+ " ccc_coef | \n",
+ " pearson_coef | \n",
+ " spearman_coef | \n",
+ " ccc_pvalue | \n",
+ " pearson_pvalue | \n",
+ " spearman_pvalue | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " ENSG00000147050.14 | \n",
+ " ENSG00000183878.15 | \n",
+ " KDM6A | \n",
+ " UTY | \n",
+ " colon_transverse | \n",
+ " 0.336727 | \n",
+ " -0.517899 | \n",
+ " -0.408343 | \n",
+ " 9.999990e-07 | \n",
+ " 3.063710e-29 | \n",
+ " 9.539160e-18 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " ENSG00000147050.14 | \n",
+ " ENSG00000183878.15 | \n",
+ " KDM6A | \n",
+ " UTY | \n",
+ " brain_amygdala | \n",
+ " 0.280524 | \n",
+ " 0.037541 | \n",
+ " 0.147571 | \n",
+ " 9.999990e-07 | \n",
+ " 6.461090e-01 | \n",
+ " 6.963020e-02 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " ENSG00000147050.14 | \n",
+ " ENSG00000183878.15 | \n",
+ " KDM6A | \n",
+ " UTY | \n",
+ " artery_coronary | \n",
+ " 0.274554 | \n",
+ " -0.413862 | \n",
+ " -0.391764 | \n",
+ " 9.999990e-07 | \n",
+ " 2.389970e-11 | \n",
+ " 3.159320e-10 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " ENSG00000147050.14 | \n",
+ " ENSG00000183878.15 | \n",
+ " KDM6A | \n",
+ " UTY | \n",
+ " artery_aorta | \n",
+ " 0.429771 | \n",
+ " -0.485788 | \n",
+ " -0.363510 | \n",
+ " 9.999990e-07 | \n",
+ " 5.775750e-27 | \n",
+ " 6.092380e-15 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " ENSG00000147050.14 | \n",
+ " ENSG00000183878.15 | \n",
+ " KDM6A | \n",
+ " UTY | \n",
+ " adrenal_gland | \n",
+ " 0.260197 | \n",
+ " -0.459190 | \n",
+ " -0.354190 | \n",
+ " 9.999990e-07 | \n",
+ " 7.334490e-15 | \n",
+ " 4.847680e-09 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " gene0_id gene1_id gene0_symbol gene1_symbol \\\n",
+ "0 ENSG00000147050.14 ENSG00000183878.15 KDM6A UTY \n",
+ "1 ENSG00000147050.14 ENSG00000183878.15 KDM6A UTY \n",
+ "2 ENSG00000147050.14 ENSG00000183878.15 KDM6A UTY \n",
+ "3 ENSG00000147050.14 ENSG00000183878.15 KDM6A UTY \n",
+ "4 ENSG00000147050.14 ENSG00000183878.15 KDM6A UTY \n",
+ "\n",
+ " tissue ccc_coef pearson_coef spearman_coef ccc_pvalue \\\n",
+ "0 colon_transverse 0.336727 -0.517899 -0.408343 9.999990e-07 \n",
+ "1 brain_amygdala 0.280524 0.037541 0.147571 9.999990e-07 \n",
+ "2 artery_coronary 0.274554 -0.413862 -0.391764 9.999990e-07 \n",
+ "3 artery_aorta 0.429771 -0.485788 -0.363510 9.999990e-07 \n",
+ "4 adrenal_gland 0.260197 -0.459190 -0.354190 9.999990e-07 \n",
+ "\n",
+ " pearson_pvalue spearman_pvalue \n",
+ "0 3.063710e-29 9.539160e-18 \n",
+ "1 6.461090e-01 6.963020e-02 \n",
+ "2 2.389970e-11 3.159320e-10 \n",
+ "3 5.775750e-27 6.092380e-15 \n",
+ "4 7.334490e-15 4.847680e-09 "
+ ]
+ },
+ "execution_count": 59,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data_again.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 60,
+ "id": "d69f9e89-c489-441b-884c-7260165292db",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2024-01-05T18:03:27.435673Z",
+ "iopub.status.busy": "2024-01-05T18:03:27.435351Z",
+ "iopub.status.idle": "2024-01-05T18:03:27.455544Z",
+ "shell.execute_reply": "2024-01-05T18:03:27.455149Z"
+ },
+ "papermill": {
+ "duration": 0.030234,
+ "end_time": "2024-01-05T18:03:27.456257",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:27.426023",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# testing\n",
+ "pd.testing.assert_frame_equal(\n",
+ " data2,\n",
+ " data_again,\n",
+ " check_categorical=False,\n",
+ " check_dtype=False,\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "25663d89-6720-40d8-946b-3bbb809f00df",
+ "metadata": {
+ "papermill": {
+ "duration": 0.008735,
+ "end_time": "2024-01-05T18:03:27.473877",
+ "exception": false,
+ "start_time": "2024-01-05T18:03:27.465142",
+ "status": "completed"
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "jupytext": {
+ "cell_metadata_filter": "all,-execution,-papermill,-trusted",
+ "notebook_metadata_filter": "-jupytext.text_representation.jupytext_version"
+ },
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.12"
+ },
+ "papermill": {
+ "default_parameters": {},
+ "duration": 3.218195,
+ "end_time": "2024-01-05T18:03:27.697248",
+ "environment_variables": {},
+ "exception": null,
+ "input_path": "nbs/99_manuscript/supplementary_files/01-supplementary_file3.ipynb",
+ "output_path": "nbs/99_manuscript/supplementary_files/01-supplementary_file3.run.ipynb",
+ "parameters": {},
+ "start_time": "2024-01-05T18:03:24.479053",
+ "version": "2.3.4"
+ },
+ "toc-autonumbering": true
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/nbs/99_manuscript/supplementary_files/py/01-supplementary_file3.py b/nbs/99_manuscript/supplementary_files/py/01-supplementary_file3.py
new file mode 100644
index 00000000..fb3a6812
--- /dev/null
+++ b/nbs/99_manuscript/supplementary_files/py/01-supplementary_file3.py
@@ -0,0 +1,348 @@
+# ---
+# jupyter:
+# jupytext:
+# cell_metadata_filter: all,-execution,-papermill,-trusted
+# notebook_metadata_filter: -jupytext.text_representation.jupytext_version
+# text_representation:
+# extension: .py
+# format_name: percent
+# format_version: '1.3'
+# kernelspec:
+# display_name: Python 3 (ipykernel)
+# language: python
+# name: python3
+# ---
+
+# %% [markdown] tags=[]
+# # Description
+
+# %% [markdown] tags=[]
+# Creates **Supplementary File 3**.
+#
+# *Description*: Correlations and p-values of a subset of gene pairs across all tissues in GTEx v8.
+
+# %% [markdown] tags=[]
+# # Modules
+
+# %% tags=[]
+import pandas as pd
+import numpy as np
+import rpy2.robjects as ro
+from rpy2.robjects import pandas2ri
+from rpy2.robjects.conversion import localconverter
+
+from ccc import conf
+
+# %% tags=[]
+readRDS = ro.r["readRDS"]
+
+# %% tags=[]
+saveRDS = ro.r["saveRDS"]
+
+# %% [markdown] tags=[]
+# # Settings
+
+# %% tags=[]
+DATASET_CONFIG = conf.GTEX
+
+# %% [markdown] tags=[]
+# # Paths
+
+# %% tags=[]
+assert (
+ conf.MANUSCRIPT["BASE_DIR"] is not None and conf.MANUSCRIPT["BASE_DIR"].exists()
+), "Manuscript dir not set"
+
+# %% tags=[]
+INPUT_DIR = conf.GTEX["RESULTS_DIR"] / "other_tissues"
+display(INPUT_DIR)
+
+# %% tags=[]
+OUTPUT_DIR = conf.MANUSCRIPT["SUPPLEMENTARY_MATERIAL_DIR"]
+OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
+display(OUTPUT_DIR)
+
+# %% tags=[]
+OUTPUT_FILENAME = "Supplementary_File_03-Gene_pairs_correlations_all_GTEx_tissues"
+
+# %% [markdown] tags=[]
+# # Data
+
+# %% [markdown] tags=[]
+# ## Gene Ensembl ID -> Symbol mapping
+
+# %% tags=[]
+gene_map = pd.read_pickle(
+ DATASET_CONFIG["DATA_DIR"] / "gtex_gene_id_symbol_mappings.pkl"
+)
+
+# %% tags=[]
+gene_map = gene_map.set_index("gene_ens_id")["gene_symbol"].to_dict()
+
+# %% tags=[]
+assert gene_map["ENSG00000145309.5"] == "CABS1"
+
+# %% [markdown] tags=[]
+# # List of dataframes to combine
+
+# %% tags=[]
+df_list = []
+
+# %% [markdown] tags=[]
+# # KDM6A - UTY
+
+# %% tags=[]
+gene0_id, gene1_id = "ENSG00000147050.14", "ENSG00000183878.15"
+gene0_symbol, gene1_symbol = "KDM6A", "UTY"
+
+assert gene_map[gene0_id] == gene0_symbol
+assert gene_map[gene1_id] == gene1_symbol
+
+# %% tags=[]
+GENE_PAIR_INPUT_DIR = INPUT_DIR / f"{gene0_symbol.lower()}_vs_{gene1_symbol.lower()}"
+display(GENE_PAIR_INPUT_DIR)
+
+# %% [markdown] tags=[]
+# ## Correlation values
+
+# %% tags=[]
+res_all = pd.read_pickle(GENE_PAIR_INPUT_DIR / "coef_values.pkl").rename(
+ columns={
+ "cm": "ccc_coef",
+ "pearson": "pearson_coef",
+ "spearman": "spearman_coef",
+ }
+)
+
+# %% tags=[]
+res_all.shape
+
+# %% tags=[]
+res_all.head()
+
+# %% [markdown] tags=[]
+# ## P-values
+
+# %% tags=[]
+res_pval_all = pd.read_pickle(GENE_PAIR_INPUT_DIR / "coef_pvalues.pkl").rename(
+ columns={
+ "cm": "ccc_pvalue",
+ "pearson": "pearson_pvalue",
+ "spearman": "spearman_pvalue",
+ }
+)
+
+# %% tags=[]
+res_pval_all.shape
+
+# %% tags=[]
+res_pval_all.head()
+
+# %% [markdown] tags=[]
+# ## Combine
+
+# %% tags=[]
+df = res_all.join(res_pval_all, how="inner").rename_axis("tissue").reset_index()
+assert df.shape[0] == res_all.shape[0]
+assert df.shape[0] == res_pval_all.shape[0]
+
+# %% tags=[]
+df.insert(0, "gene0_id", gene0_id)
+df.insert(1, "gene1_id", gene1_id)
+df.insert(2, "gene0_symbol", gene0_symbol)
+df.insert(3, "gene1_symbol", gene1_symbol)
+
+# %% tags=[]
+df.shape
+
+# %% tags=[]
+df.head()
+
+# %% tags=[]
+df_list.append(df)
+
+# %% [markdown] tags=[]
+# # KDM6A - DDX3Y
+
+# %% tags=[]
+gene0_id, gene1_id = "ENSG00000147050.14", "ENSG00000067048.16"
+gene0_symbol, gene1_symbol = "KDM6A", "DDX3Y"
+
+assert gene_map[gene0_id] == gene0_symbol
+assert gene_map[gene1_id] == gene1_symbol
+
+# %% tags=[]
+GENE_PAIR_INPUT_DIR = INPUT_DIR / f"{gene0_symbol.lower()}_vs_{gene1_symbol.lower()}"
+display(GENE_PAIR_INPUT_DIR)
+
+# %% [markdown] tags=[]
+# ## Correlation values
+
+# %% tags=[]
+res_all = pd.read_pickle(GENE_PAIR_INPUT_DIR / "coef_values.pkl").rename(
+ columns={
+ "cm": "ccc_coef",
+ "pearson": "pearson_coef",
+ "spearman": "spearman_coef",
+ }
+)
+
+# %% tags=[]
+res_all.shape
+
+# %% tags=[]
+res_all.head()
+
+# %% [markdown] tags=[]
+# ## P-values
+
+# %% tags=[]
+res_pval_all = pd.read_pickle(GENE_PAIR_INPUT_DIR / "coef_pvalues.pkl").rename(
+ columns={
+ "cm": "ccc_pvalue",
+ "pearson": "pearson_pvalue",
+ "spearman": "spearman_pvalue",
+ }
+)
+
+# %% tags=[]
+res_pval_all.shape
+
+# %% tags=[]
+res_pval_all.head()
+
+# %% [markdown] tags=[]
+# ## Combine
+
+# %% tags=[]
+df = res_all.join(res_pval_all, how="inner").rename_axis("tissue").reset_index()
+assert df.shape[0] == res_all.shape[0]
+assert df.shape[0] == res_pval_all.shape[0]
+
+# %% tags=[]
+df.insert(0, "gene0_id", gene0_id)
+df.insert(1, "gene1_id", gene1_id)
+df.insert(2, "gene0_symbol", gene0_symbol)
+df.insert(3, "gene1_symbol", gene1_symbol)
+
+# %% tags=[]
+df.shape
+
+# %% tags=[]
+df.head()
+
+# %% tags=[]
+df_list.append(df)
+
+# %% [markdown] tags=[]
+# # Combine
+
+# %% tags=[]
+df_final = pd.concat(df_list, ignore_index=True, axis=0)
+
+# %% tags=[]
+assert df_final.shape[0] == sum(d.shape[0] for d in df_list)
+for d in df_list:
+ assert df_final.shape[1] == d.shape[1]
+display(df_final.shape)
+
+# %% tags=[]
+df_final
+
+# %% [markdown] tags=[]
+# # Save
+
+# %% tags=[]
+data = df_final
+
+# %% tags=[]
+display(data.index.dtype)
+display(data.index)
+
+# %% tags=[]
+# reset index to avoid problems with MultiIndex in Pandas
+if isinstance(data.index, pd.MultiIndex):
+ display("MultiIndex")
+ data = data.reset_index()
+
+# %% [markdown] tags=[]
+# ## Pickle
+
+# %% tags=[]
+data.to_pickle(OUTPUT_DIR / f"{OUTPUT_FILENAME}.pkl.gz")
+
+# %% [markdown] tags=[]
+# ## RDS
+
+# %% tags=[]
+output_file = OUTPUT_DIR / f"{OUTPUT_FILENAME}.rds"
+display(output_file)
+
+# %% tags=[]
+with localconverter(ro.default_converter + pandas2ri.converter):
+ data_r = ro.conversion.py2rpy(data)
+
+# %% tags=[]
+data_r
+
+# %% tags=[]
+saveRDS(data_r, str(output_file))
+
+# %% tags=[]
+# testing: load the rds file again
+data_r = readRDS(str(output_file))
+
+# %% tags=[]
+with localconverter(ro.default_converter + pandas2ri.converter):
+ data_again = ro.conversion.rpy2py(data_r)
+ data_again.index = data_again.index.astype(int)
+
+# %% tags=[]
+data_again.shape
+
+# %% tags=[]
+data_again.head()
+
+# %% tags=[]
+# testing
+pd.testing.assert_frame_equal(
+ data,
+ data_again,
+ check_dtype=False,
+)
+
+# %% [markdown] tags=[]
+# ## Text
+
+# %% tags=[]
+# tsv format
+output_file = OUTPUT_DIR / f"{OUTPUT_FILENAME}.tsv"
+display(output_file)
+
+# %% tags=[]
+data.to_csv(output_file, sep="\t", index=False, float_format="%.5e")
+
+# %% tags=[]
+# testing
+data2 = data # .copy()
+# data2.index = list(range(0, data2.shape[0]))
+
+data_again = pd.read_csv(output_file, sep="\t", index_col=None)
+# data_again.index = data_again.index.map(lambda x: f"{x:.2f}")
+
+# %% tags=[]
+data_again.shape
+
+# %% tags=[]
+data_again.head()
+
+# %% tags=[]
+# testing
+pd.testing.assert_frame_equal(
+ data2,
+ data_again,
+ check_categorical=False,
+ check_dtype=False,
+)
+
+# %% tags=[]