diff --git a/.gitignore b/.gitignore index 712e4fa..fce95ab 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ *.ipynb_checkpoints/ *.DS_Store +challenges/2023_cloud_classification/src/example_code/mlruns/ diff --git a/challenges/2023_cloud_classification/README.md b/challenges/2023_cloud_classification/README.md new file mode 100755 index 0000000..ec8708b --- /dev/null +++ b/challenges/2023_cloud_classification/README.md @@ -0,0 +1,29 @@ +# understanding-clouds-kaggle +This repository hosts the Data Science Community of Practice Understanding Clouds challenge. This challenge was built +upon the [Kaggle Understanding Clouds from Satellite Images](https://www.kaggle.com/competitions/understanding_cloud_organization/) +challenge hosted by the Max Planck Institute for Meteorology. + +The original challenge required users to segment regions belonging to each of four classes - Fish, Flower, Gravel, and +Sugar. However, this challenge was adapted to a image classification task for the Data Science Community of Practice. +A set of 224x224 images that consist of a single class have been extracted, and randomly separated into training and +test sets. These will be used to train image classification algorithms that can be evaluated against the test images +using a provided script. + +__Environment__ + +The environment file describing the environment used to execute all the code in this subdirectory can be found here: + +/data_science_cop/env/requirements_cloud_class.yml + +This repository contains the following code: + +__src/produce_test_train__ + +Scripts used to produce train / test image sets and labels + +__src/example_code__ + +Examples of image classification algorithms that may be used to develop classifiers + +__TODO:__ +- Add links to sharepoint, kick off meeting, monthly catchups, etc. \ No newline at end of file diff --git a/challenges/2023_cloud_classification/src/__init__.py b/challenges/2023_cloud_classification/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/challenges/2023_cloud_classification/src/example_code/nn_classifier_example.ipynb b/challenges/2023_cloud_classification/src/example_code/nn_classifier_example.ipynb new file mode 100644 index 0000000..132ca3c --- /dev/null +++ b/challenges/2023_cloud_classification/src/example_code/nn_classifier_example.ipynb @@ -0,0 +1,388 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "# Starter code for Cloud Classification Challenge\n", + "\n", + "This code is designed as starter point for your development. You do not have to use it, but feel free to use it if you do not know where to start.\n", + "\n", + "The [Pytorch](https://pytorch.org/) collection of packages is used to define and train the model, and this code is adapted from their [introductory tutorial](https://pytorch.org/tutorials/beginner/basics/intro.html).\n", + "\n", + "Other machine learning python packages that you may wish to use include [TensorFlow](https://www.tensorflow.org/overview) and [scikit-learn](https://scikit-learn.org/stable/index.html)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.environ['SLURM_NTASKS_PER_NODE'] = '1' # set to prevent pytorch_lightning.trainer from breaking\n", + "\n", + "import pandas as pd\n", + "import torch\n", + "from torch.nn import functional as F\n", + "from torchvision.io import read_image\n", + "import torchvision.transforms as transforms\n", + "from torch.utils.data import Dataset, DataLoader\n", + "import pytorch_lightning as pl\n", + "from torchmetrics.functional.classification import multiclass_accuracy\n", + "import mlflow.pytorch\n", + "from mlflow import MlflowClient" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create Custom Dataset for sat images\n", + "\n", + "Dataset instance reads in the directory to the images and their labels.\n", + "The dataloader enables simple iteration over these images when training and testing a model.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Define transforms for label data\n", + "def get_label_dict():\n", + " label_dict = {\"Fish\": 0,\n", + " \"Flower\": 1,\n", + " \"Gravel\": 2,\n", + " \"Sugar\": 3}\n", + " return label_dict\n", + "\n", + "\n", + "def sat_label_transform(label):\n", + " label_dict = get_label_dict()\n", + " return label_dict[label]\n", + "\n", + "\n", + "def sat_label_transform_inv(num):\n", + " label_dict = get_label_dict()\n", + " ret_list = [key for key in label_dict.keys() if label_dict[key]==num]\n", + " return ret_list[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# Define the transform for images.\n", + "# Converts to float and scales values to range 0-1.\n", + "# Normalisation using the mean/std used by AlexNet.\n", + "img_transform = transforms.Compose([\n", + " transforms.ConvertImageDtype(torch.float),\n", + " transforms.Normalize([0.485, 0.456, 0.406],\n", + " [0.229, 0.224, 0.225])\n", + " ])" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# Create class for loading the satellite image into a Dataset\n", + "class SatImageDataset(Dataset):\n", + " def __init__(self, labels_file, img_dir, transform=img_transform, target_transform=sat_label_transform):\n", + " self.img_labels = pd.read_csv(labels_file)[:1000] # TODO: remove, used for testing\n", + " self.img_dir = img_dir\n", + " self.transform = transform\n", + " self.target_transform = target_transform\n", + "\n", + " def __len__(self):\n", + " return len(self.img_labels)\n", + "\n", + " def __getitem__(self, idx):\n", + " img_path = os.path.join(self.img_dir, self.img_labels[\"Image\"].iloc[idx])\n", + " image = read_image(img_path)\n", + " label = self.img_labels[\"Label\"].iloc[idx]\n", + " if self.transform:\n", + " image = self.transform(image)\n", + " if self.target_transform:\n", + " label = self.target_transform(label)\n", + " return image, label" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Load the training and testing data using instances of the SatImageDataset defined above." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the training data.\n", + "train_files_dir = \"/data/users/meastman/understanding_clouds_kaggle/input/single_labels/224s/train/\"\n", + "train_files_labels = \"/data/users/meastman/understanding_clouds_kaggle/input/single_labels/224s/train/train_labels.csv\"\n", + "\n", + "# Create train images dataloader\n", + "train_images = SatImageDataset(labels_file=train_files_labels, img_dir=train_files_dir)\n", + "train_dataloader = DataLoader(train_images, batch_size=32, shuffle=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# Test Data\n", + "test_files_dir = \"/data/users/meastman/understanding_clouds_kaggle/input/single_labels/224s/test/\"\n", + "test_files_labels = \"/data/users/meastman/understanding_clouds_kaggle/input/single_labels/224s/test/test_labels.csv\"\n", + "\n", + "# Create test images dataloader\n", + "test_images = SatImageDataset(labels_file=test_files_labels, img_dir=test_files_dir)\n", + "test_dataloader = DataLoader(test_images, batch_size=32, shuffle=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Building a Neural Network\n", + "\n", + "This is a single layer neural network. For more details on the individual layers, and for further options if you wish to create a different model architecture see [the tutorial](https://pytorch.org/tutorials/beginner/basics/buildmodel_tutorial.html).\n", + "\n", + "Note that the input to the layer has size `150528 = 3*224*224`. The input images are 224 * 224 pixels, with 3 RGB channels.\n", + "\n", + "The output layer has size 4 which matches the number of cloud categories available." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "class NNCloudClassifier(pl.LightningModule):\n", + " def __init__(self):\n", + " super().__init__()\n", + " self.l1 = torch.nn.Linear(3 * 224 * 224, 4)\n", + " self.test_outputs = []\n", + " self.avg_test_acc = None\n", + "\n", + " def forward(self, x):\n", + " \"\"\"\n", + " :param x: Input data\n", + "\n", + " :return: output - mnist digit label for the input image\n", + " \"\"\"\n", + " batch_size = x.size()[0]\n", + "\n", + " # (b, 1, 224, 224) -> (b, 1*224*224)\n", + " x = x.view(batch_size, -1)\n", + "\n", + " # layer 1 (b, 1*224*224) -> (b, 4)\n", + " x = self.l1(x)\n", + " x = torch.relu(x)\n", + "\n", + " return x\n", + "\n", + " def training_step(self, batch, batch_nb):\n", + " x, y = batch\n", + " logits = self(x)\n", + " loss = F.cross_entropy(logits, y)\n", + " pred = logits.argmax(dim=1)\n", + " acc = multiclass_accuracy(pred, y, num_classes=4)\n", + "\n", + " # Use the current of PyTorch logger\n", + " self.log(\"train_loss\", loss, on_epoch=True)\n", + " self.log(\"acc\", acc, on_epoch=True)\n", + " return loss\n", + " \n", + " def test_step(self, test_batch, batch_idx):\n", + " \"\"\"\n", + " Performs test and computes the accuracy of the model\n", + "\n", + " :param test_batch: Batch data\n", + " :param batch_idx: Batch indices\n", + "\n", + " :return: output - Testing accuracy\n", + " \"\"\"\n", + " x, y = test_batch\n", + " output = self.forward(x)\n", + " _, y_hat = torch.max(output, dim=1)\n", + " test_acc = multiclass_accuracy(y_hat, y, num_classes=4)\n", + " self.test_outputs.append(test_acc)\n", + " return {\"test_acc\": test_acc}\n", + " \n", + " def on_test_epoch_end(self):\n", + " \"\"\"\n", + " Computes average test accuracy score\n", + " \"\"\"\n", + " self.avg_test_acc = torch.stack(self.test_outputs).mean()\n", + " self.log(\"avg_test_acc\", self.avg_test_acc, sync_dist=True)\n", + " self.test_outputs.clear()\n", + "\n", + " def configure_optimizers(self):\n", + " return torch.optim.Adam(self.parameters(), lr=0.02)\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "def print_auto_logged_info(r):\n", + " tags = {k: v for k, v in r.data.tags.items() if not k.startswith(\"mlflow.\")}\n", + " artifacts = [f.path for f in MlflowClient().list_artifacts(r.info.run_id, \"model\")]\n", + " print(\"run_id: {}\".format(r.info.run_id))\n", + " print(\"artifacts: {}\".format(artifacts))\n", + " print(\"params: {}\".format(r.data.params))\n", + " print(\"metrics: {}\".format(r.data.metrics))\n", + " print(\"tags: {}\".format(tags))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: False, used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "IPU available: False, using: 0 IPUs\n", + "HPU available: False, using: 0 HPUs\n", + "/home/h06/meastman/.conda/envs/dscop_cloud_class/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py:67: UserWarning: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default\n", + " warning_cache.warn(\n", + "2023/08/14 15:52:54 WARNING mlflow.utils.autologging_utils: MLflow autologging encountered a warning: \"/home/h06/meastman/.conda/envs/dscop_cloud_class/lib/python3.11/site-packages/mlflow/pytorch/_lightning_autolog.py:351: UserWarning: Autologging is known to be compatible with pytorch-lightning versions between 1.0.5 and 2.0.5 and may not succeed with packages outside this range.\"\n", + "\n", + " | Name | Type | Params\n", + "--------------------------------\n", + "0 | l1 | Linear | 602 K \n", + "--------------------------------\n", + "602 K Trainable params\n", + "0 Non-trainable params\n", + "602 K Total params\n", + "2.408 Total estimated model params size (MB)\n", + "SLURM auto-requeueing enabled. Setting signal handlers.\n", + "/home/h06/meastman/.conda/envs/dscop_cloud_class/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:432: PossibleUserWarning: The dataloader, train_dataloader, does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` (try 48 which is the number of cpus on this machine) in the `DataLoader` init to improve performance.\n", + " rank_zero_warn(\n", + "/home/h06/meastman/.conda/envs/dscop_cloud_class/lib/python3.11/site-packages/pytorch_lightning/loops/fit_loop.py:280: PossibleUserWarning: The number of training batches (32) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.\n", + " rank_zero_warn(\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "e31104f0153e4f61a536a441f0e3da97", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Training: 0it [00:00, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "`Trainer.fit` stopped: `max_epochs=20` reached.\n", + "2023/08/14 15:55:07 WARNING mlflow.utils.autologging_utils: MLflow autologging encountered a warning: \"/home/h06/meastman/.conda/envs/dscop_cloud_class/lib/python3.11/site-packages/_distutils_hack/__init__.py:33: UserWarning: Setuptools is replacing distutils.\"\n", + "SLURM auto-requeueing enabled. Setting signal handlers.\n", + "/home/h06/meastman/.conda/envs/dscop_cloud_class/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:480: PossibleUserWarning: Your `test_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.\n", + " rank_zero_warn(\n", + "/home/h06/meastman/.conda/envs/dscop_cloud_class/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:432: PossibleUserWarning: The dataloader, test_dataloader, does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` (try 48 which is the number of cpus on this machine) in the `DataLoader` init to improve performance.\n", + " rank_zero_warn(\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "d3a4e63cabe646099d19510c7e01782e", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Testing: 0it [00:00, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────\n", + "Runningstage.testing metric DataLoader 0\n", + "────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────\n", + " avg_test_acc 0.2444826066493988\n", + "────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────\n", + "run_id: f58b85a4b8764ad0893caa657c682a16\n", + "artifacts: ['model/MLmodel', 'model/conda.yaml', 'model/data', 'model/python_env.yaml', 'model/requirements.txt']\n", + "params: {'epochs': '20', 'optimizer_name': 'Adam', 'lr': '0.02', 'betas': '(0.9, 0.999)', 'eps': '1e-08', 'weight_decay': '0', 'amsgrad': 'False', 'maximize': 'False', 'foreach': 'None', 'capturable': 'False', 'differentiable': 'False', 'fused': 'None'}\n", + "metrics: {'train_loss': 1.2448924779891968, 'train_loss_step': 1.0397207736968994, 'acc': 0.307343989610672, 'acc_step': 0.6666666865348816, 'train_loss_epoch': 1.2448924779891968, 'acc_epoch': 0.307343989610672, 'avg_test_acc': 0.2444826066493988}\n", + "tags: {'Mode': 'testing'}\n" + ] + } + ], + "source": [ + "# Initialize our model\n", + "classifier = NNCloudClassifier()\n", + "\n", + "# Initialize a trainer\n", + "trainer = pl.Trainer(max_epochs=20, devices=1, num_nodes=1)\n", + "\n", + "# Auto log all MLflow entities\n", + "mlflow.pytorch.autolog()\n", + "\n", + "# Train the model\n", + "with mlflow.start_run() as run:\n", + " trainer.fit(classifier, train_dataloader)\n", + " trainer.test(classifier, test_dataloader)\n", + " mlflow.log_metric('avg_test_acc', classifier.avg_test_acc)\n", + "\n", + "# fetch the auto logged parameters and metrics\n", + "print_auto_logged_info(mlflow.get_run(run_id=run.info.run_id))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".conda-dscop_cloud_class Python (Conda)", + "language": "python", + "name": "conda-env-.conda-dscop_cloud_class-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/challenges/2023_cloud_classification/src/functions/__init__.py b/challenges/2023_cloud_classification/src/functions/__init__.py new file mode 100755 index 0000000..e69de29 diff --git a/challenges/2023_cloud_classification/src/functions/extract_labelled_images.py b/challenges/2023_cloud_classification/src/functions/extract_labelled_images.py new file mode 100755 index 0000000..c61081b --- /dev/null +++ b/challenges/2023_cloud_classification/src/functions/extract_labelled_images.py @@ -0,0 +1,225 @@ +""" +Raw data is stored as a series of images and run-length-encoded labels. +This script converts run-length encoded labels to a 2d array that matches the corresponding image. This image is then +queried for rectangles that satisfy a certain criteria to produce a series of images that have a single label +""" + +import argparse +import os +import numpy as np +import pandas as pd +from PIL import Image + +class Label_Images: + def __init__(self, input_fp, output_fp, labels_fn='train.csv', images_fp='train_images', image_shape=None, + labels_suffix=None): + self.input_fp = input_fp + self.output_fp = output_fp + self.labels_fn = labels_fn + self.images_fp = images_fp + self.image_shape = image_shape + self.labels_suffix = labels_suffix + + self.images_fn = os.listdir(os.path.join(input_fp, images_fp)) + + self.label_codes = None + self.labels_rle = None + self.labels_2d = {} + + def read_labels(self): + """Read and process labels rle file""" + labels_rle = pd.read_csv(os.path.join(self.input_fp, self.labels_fn)) + + # labels stored with image name. Separate and remove original column + labels_rle['Image'] = labels_rle['Image_Label'].apply(lambda img_lbl: self.split_img_label(img_lbl)[0]) + labels_rle['Label'] = labels_rle['Image_Label'].apply(lambda img_lbl: self.split_img_label(img_lbl)[1]) + del labels_rle['Image_Label'] + + # set label codes + self.label_codes = {k: v for v, k in enumerate(set(labels_rle['Label']))} + + self.labels_rle = labels_rle + + def split_img_label(self, img_lbl): + """Return image and label from file name like '0011165.jpg_Flower'""" + s = img_lbl.split("_") + assert len(s) == 2 + return s[0], s[1] + + def read_image(self, fn): + """read image into numpy array""" + return Image.open(os.path.join(self.input_fp, self.images_fp, fn)) + + def rle_decode(self, rle, shape, value=1): + """ + Decodes an RLE-encoded string. + + Parameters + ---------- + encoded + RLE mask. + shape + Mask shape in (height, width) format. + value + Value to fill in the mask. + + Returns + ------- + mask + The decoded mask as 2D image of shape (height, width). + """ + + s = rle.split() + starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])] + starts -= 1 + ends = starts + lengths + img = np.empty(shape[0] * shape[1]) + img[:] = np.nan + for lo, hi in zip(starts, ends): + img[lo:hi] = value + return img.reshape(shape) + + def produce_2d_labels(self): + """converts rle labels into 2d np arrays of labels""" + + if self.labels_rle is None: + self.read_labels() + + images = self.labels_rle['Image'] + + for image_i in set(images): + print(f"Decoding {image_i}...") + shape = self.read_image(fn=image_i).size + self.labels_2d[image_i] = np.zeros(shape) + image_l_rle = self.labels_rle[self.labels_rle['Image'] == image_i].copy() + image_l_rle.dropna(inplace=True) + + for label_i in set(image_l_rle['Label']): + v = image_l_rle['EncodedPixels'][image_l_rle['Label'] == label_i].values[0] + self.labels_2d[image_i] += self.rle_decode(v, shape=shape, value=self.label_codes[label_i]) + + def sample_rectangles_idx(self, arr, value=np.nan, rectangle_size=(224, 224), num_samples=np.Inf): + arr = arr.copy() + if arr.dtype != 'float64': + Warning('Converting array to float64') + arr = arr.astype(float) + + sample_rectangles = [] + failed_to_sample = False + while (~failed_to_sample) & (len(sample_rectangles) < num_samples): + print(f"Sampling rectangles: {len(sample_rectangles) + 1} of {num_samples}") + + valid_indices = np.argwhere(arr == value) + np.random.shuffle(valid_indices) + + # iterate through random valid indices until criteria are satisfied, or loop ends + for idx in valid_indices: + x0, y0 = idx + x1, y1 = x0 + rectangle_size[0], y0 + rectangle_size[1] + + sample_arr = arr[x0:x1, y0:y1] + + # if criteria is satisfied save indices, set values to na to sample without replacement, + # and start next cycle + if ((sample_arr != value).sum() == 0) & (sample_arr.shape == rectangle_size): + sample_rectangles.append((x0, y0)) + + arr[x0: x1, + y0: y1] = np.nan + + break + + # if reach end of loop and haven't found suitable rectangle, end while loop + if all(idx == valid_indices[-1]): + failed_to_sample = True + + return sample_rectangles + + def plot_2d_labels(self, ): + pass + + def uniquify(self, path): + """ + return path with suffixed numbers if path already exists + """ + filename, extension = os.path.splitext(path) + counter = 1 + + while os.path.exists(path): + path = filename + " (" + str(counter) + ")" + extension + counter += 1 + + return path + + def extract_labelled_image(self, img_idx_to_load=None): + """saves images and labels where images are subsets of those provided that satisfy certain criteria + :param img_idx_to_load: list of images to load. Used for parallelisation. If None, does all + """ + if self.labels_rle is None: + self.read_labels() + + images = self.labels_rle['Image'] + labels = [] + + if img_idx_to_load is None: + img_idx_to_load = [x for x in range(len(images.unique()))] + + # remove those outside of range (cases come bash/sbatch scripts) + img_idx_to_load = [x for x in img_idx_to_load if x < len(images.unique())] + + for image_i in images.unique()[img_idx_to_load]: + print(f"Analysing {image_i}...") + + img = self.read_image(fn=image_i) + + # labels and rle pixels + image_l_rle = self.labels_rle[self.labels_rle['Image'] == image_i].copy() + image_l_rle.dropna(inplace=True) + + for label_i in image_l_rle['Label']: + # first decode the rle into a 2d array + v = image_l_rle['EncodedPixels'][image_l_rle['Label'] == label_i].values[0] + labels_2d = self.rle_decode(v, shape=img.size, value=self.label_codes[label_i]) + + # exhaustively search the rle for rectangles of predefined shape that satisfy criteria + idxs = self.sample_rectangles_idx(arr=labels_2d, value=self.label_codes[label_i], rectangle_size=(224, 224)) + + # output images and labels + for i, idx in enumerate(idxs): + cropped_img = img.crop((idx[0], idx[1], + idx[0] + 224, idx[1] + 224)) + + fp = os.path.join(self.output_fp, 'single_labels', '224s') + os.makedirs(fp, exist_ok=True) + + fn = f"{image_i.split('.')[0]}_{i}.jpg" + fp_fn = self.uniquify(os.path.join(fp, fn)) + + cropped_img.save(fp_fn) + labels.append(pd.DataFrame({ + "Image": [fp_fn.split('/')[-1]], + "Label": [label_i] + })) + + labels = pd.concat(labels) + labels.to_csv(self.uniquify(os.path.join(fp, f"labels_{self.labels_suffix}.csv"))) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--step', nargs='?', help='If running in steps, which step this iteration is', type=int) + parser.add_argument('--step_len', nargs='?', help='If running in steps, length of step', type=int) + + args = parser.parse_args() + step = args.step + step_len = args.step_len + + labels_suffix = None # default is None, change if processing slices so file is not overwritten/parallel written + if step is not None: + idxs = [x for x in range(step * step_len, step * step_len + step_len)] + labels_suffix = f"{str(min(idxs))}-{str(max(idxs))}" + + label_images_class = Label_Images(input_fp="/data/users/meastman/understanding_clouds_kaggle/input", + output_fp="/data/users/meastman/understanding_clouds_kaggle/input", + labels_suffix=labels_suffix) + label_images_class.extract_labelled_image(idxs) diff --git a/challenges/2023_cloud_classification/src/produce_test_train/extract_labelled_images.sbatch b/challenges/2023_cloud_classification/src/produce_test_train/extract_labelled_images.sbatch new file mode 100755 index 0000000..b4b304f --- /dev/null +++ b/challenges/2023_cloud_classification/src/produce_test_train/extract_labelled_images.sbatch @@ -0,0 +1,11 @@ +#!/bin/bash +#SBATCH --array=0-220 +#SBATCH --mem=8000 +#SBATCH --output=src/sbatch/o/extract_lab_img_%a.out +#SBATCH --error=src/sbatch/e/extract_lab_img_%a.err +#SBATCH --time=240s + +module load scitools + +# 5546 unique images. Iterate through and process 25 at a time +python src/functions/extract_labelled_images.py --step $SLURM_ARRAY_TASK_ID --step_len 25 diff --git a/challenges/2023_cloud_classification/src/produce_test_train/post_process_labelled_images.py b/challenges/2023_cloud_classification/src/produce_test_train/post_process_labelled_images.py new file mode 100755 index 0000000..d6e4ed6 --- /dev/null +++ b/challenges/2023_cloud_classification/src/produce_test_train/post_process_labelled_images.py @@ -0,0 +1,81 @@ +""" +Takes output from extract_labelled_images.sbatch and separates into test/train images, and produces test/train labels +""" + +import os +import glob +import shutil +import pandas as pd +from sklearn.model_selection import train_test_split + +class PPLabelledImages: + + def __init__(self, parent_dir): + self.parent_dir = parent_dir + + self.fps = None + self.labels = None + self.image_names = None + self.train_images = None + self.test_images = None + + def extract_labels(self): + """Read label files""" + self.fps = glob.glob(os.path.join(self.parent_dir, '*.csv')) + + dfs = [pd.read_csv(fp, index_col=0) for fp in self.fps] + self.labels = pd.concat(dfs, ignore_index=True) + + def move_test_train_files(self): + if self.train_images is None or self.test_images is None: + ValueError("No images in self.train_images or self.test_images to move") + return + + os.makedirs(os.path.join(self.parent_dir, 'train'), exist_ok=True) + os.makedirs(os.path.join(self.parent_dir, 'test'), exist_ok=True) + + train_fps = [glob.glob(os.path.join(self.parent_dir, img + '*')) for img in self.train_images] + test_fps = [glob.glob(os.path.join(self.parent_dir, img + '*')) for img in self.test_images] + + # join lists together + train_fps = sum(train_fps, []) + test_fps = sum(test_fps, []) + + # move files + for fp in train_fps: + new_fp = os.path.join(self.parent_dir, 'train', fp.split('/')[-1]) + shutil.move(fp, new_fp) + for fp in test_fps: + new_fp = os.path.join(self.parent_dir, 'test', fp.split('/')[-1]) + shutil.move(fp, new_fp) + + def delete_files(self): + pass + + def process_images(self): + # read all label files into pd.DataFrame + self.extract_labels() + + # separate entire images into test/train + image_names = [img.split('.')[0].split('_')[0] for img in self.labels['Image']] + self.image_names = list(set(image_names)) + + self.train_images, self.test_images = train_test_split(self.image_names, test_size=.2, random_state=11) + + # move files into new directories with train/test specific label files + self.move_test_train_files() + train_labels = self.labels[[x in self.train_images for x in image_names]] + test_labels = self.labels[[x in self.test_images for x in image_names]] + + train_labels.to_csv(os.path.join(self.parent_dir, 'train', 'train_labels.csv')) + test_labels.to_csv(os.path.join(self.parent_dir, 'test', 'test_labels.csv')) + + # delete old label files + for fn in glob.glob(os.path.join(self.parent_dir, '*.csv')): + os.remove(fn) + + +if __name__ == '__main__': + lab_img_processor = PPLabelledImages( + parent_dir='/data/users/meastman/understanding_clouds_kaggle/input/single_labels/224s') + lab_img_processor.process_images() diff --git a/env/README.md b/env/README.md index 4fdd5a3..182c24d 100644 --- a/env/README.md +++ b/env/README.md @@ -51,6 +51,7 @@ Each environment has a focus on a particular technology, below lists the existin + `requirements_tensorflow.yml`: [Tensorflow](https://www.tensorflow.org/) + `requirements_xgboost.yml`: [XGBoost](https://xgboost.ai/) + `requirements_pymc.yml`: [PyMC](https://www.pymc.io/welcome.html) and [ArviZ](https://python.arviz.org/en/stable/) ++ `requirements_cloud_class.yml`: [Cloud Classification Challenge](https://github.com/MetOffice/data_science_cop/tree/understanding-clouds-challenge/challenges/2023_cloud_classification) ## Use on Met Office systems diff --git a/env/requirements_cloud_class.yml b/env/requirements_cloud_class.yml new file mode 100644 index 0000000..8921c97 --- /dev/null +++ b/env/requirements_cloud_class.yml @@ -0,0 +1,375 @@ +name: dscop_cloud_class +channels: + - pytorch + - conda-forge + - defaults + - conda-main + - conda-mo-internal + - conda-r +dependencies: + - _libgcc_mutex=0.1=conda_forge + - _openmp_mutex=4.5=2_kmp_llvm + - alembic=1.11.2=pyhd8ed1ab_0 + - alsa-lib=1.2.8=h166bdaf_0 + - anyio=3.7.1=pyhd8ed1ab_0 + - argon2-cffi=21.3.0=pyhd8ed1ab_0 + - argon2-cffi-bindings=21.2.0=py311hd4cff14_3 + - arrow=1.2.3=pyhd8ed1ab_0 + - asttokens=2.2.1=pyhd8ed1ab_0 + - async-lru=2.0.4=pyhd8ed1ab_0 + - attr=2.5.1=h166bdaf_1 + - attrs=23.1.0=pyh71513ae_1 + - aws-c-auth=0.7.0=hf8751d9_2 + - aws-c-cal=0.6.0=h93469e0_0 + - aws-c-common=0.8.23=hd590300_0 + - aws-c-compression=0.2.17=h862ab75_1 + - aws-c-event-stream=0.3.1=h9599702_1 + - aws-c-http=0.7.11=hbe98c3e_0 + - aws-c-io=0.13.28=h3870b5a_0 + - aws-c-mqtt=0.8.14=h2e270ba_2 + - aws-c-s3=0.3.13=heb0bb06_2 + - aws-c-sdkutils=0.1.11=h862ab75_1 + - aws-checksums=0.1.16=h862ab75_1 + - aws-crt-cpp=0.20.3=he9c0e7f_4 + - aws-sdk-cpp=1.10.57=hbc2ea52_17 + - babel=2.12.1=pyhd8ed1ab_1 + - backcall=0.2.0=pyh9f0ad1d_0 + - backports=1.0=pyhd8ed1ab_3 + - backports.functools_lru_cache=1.6.5=pyhd8ed1ab_0 + - bcrypt=3.2.2=py311hd4cff14_1 + - beautifulsoup4=4.12.2=pyha770c72_0 + - blas=1.0=mkl + - bleach=6.0.0=pyhd8ed1ab_0 + - blinker=1.6.2=pyhd8ed1ab_0 + - brotli=1.0.9=h166bdaf_9 + - brotli-bin=1.0.9=h166bdaf_9 + - brotli-python=1.0.9=py311ha362b79_9 + - bzip2=1.0.8=h7f98852_4 + - c-ares=1.19.1=hd590300_0 + - ca-certificates=2023.7.22=hbcca054_0 + - cached-property=1.5.2=hd8ed1ab_1 + - cached_property=1.5.2=pyha770c72_1 + - cairo=1.16.0=ha61ee94_1014 + - certifi=2023.7.22=pyhd8ed1ab_0 + - cffi=1.15.1=py311h409f033_3 + - charset-normalizer=3.2.0=pyhd8ed1ab_0 + - click=8.1.6=unix_pyh707e725_0 + - cloudpickle=2.2.1=pyhd8ed1ab_0 + - colorama=0.4.6=pyhd8ed1ab_0 + - comm=0.1.4=pyhd8ed1ab_0 + - configparser=5.3.0=pyhd8ed1ab_0 + - contourpy=1.1.0=py311h9547e67_0 + - cryptography=41.0.3=py311h63ff55d_0 + - cycler=0.11.0=pyhd8ed1ab_0 + - databricks-cli=0.17.7=pyhd8ed1ab_0 + - dbus=1.13.6=h5008d03_3 + - debugpy=1.6.8=py311hb755f60_0 + - decorator=5.1.1=pyhd8ed1ab_0 + - defusedxml=0.7.1=pyhd8ed1ab_0 + - docker-py=6.1.3=pyhd8ed1ab_0 + - entrypoints=0.4=pyhd8ed1ab_0 + - exceptiongroup=1.1.2=pyhd8ed1ab_0 + - executing=1.2.0=pyhd8ed1ab_0 + - expat=2.5.0=hcb278e6_1 + - ffmpeg=4.3=hf484d3e_0 + - fftw=3.3.10=nompi_hc118613_108 + - filelock=3.12.2=pyhd8ed1ab_0 + - flask=2.3.2=pyhd8ed1ab_0 + - flit-core=3.9.0=pyhd8ed1ab_0 + - font-ttf-dejavu-sans-mono=2.37=hab24e00_0 + - font-ttf-inconsolata=3.000=h77eed37_0 + - font-ttf-source-code-pro=2.038=h77eed37_0 + - font-ttf-ubuntu=0.83=hab24e00_0 + - fontconfig=2.14.2=h14ed4e7_0 + - fonts-conda-ecosystem=1=0 + - fonts-conda-forge=1=0 + - fonttools=4.42.0=py311h459d7ec_0 + - fqdn=1.5.1=pyhd8ed1ab_0 + - freetype=2.12.1=hca18f0e_1 + - fsspec=2023.6.0=pyh1a96a4e_0 + - gettext=0.21.1=h27087fc_0 + - gflags=2.2.2=he1b5a44_1004 + - gitdb=4.0.10=pyhd8ed1ab_0 + - gitpython=3.1.32=pyhd8ed1ab_0 + - glib=2.76.4=hfc55251_0 + - glib-tools=2.76.4=hfc55251_0 + - glog=0.6.0=h6f12383_0 + - gmp=6.2.1=h58526e2_0 + - gmpy2=2.1.2=py311h6a5fa03_1 + - gnutls=3.6.13=h85f3911_1 + - graphite2=1.3.13=h58526e2_1001 + - greenlet=2.0.2=py311hb755f60_1 + - gst-plugins-base=1.22.0=h4243ec0_2 + - gstreamer=1.22.0=h25f0c4b_2 + - gstreamer-orc=0.4.34=hd590300_0 + - gunicorn=20.1.0=py311h38be061_3 + - harfbuzz=6.0.0=h8e241bc_0 + - icu=70.1=h27087fc_0 + - idna=3.4=pyhd8ed1ab_0 + - importlib-metadata=6.8.0=pyha770c72_0 + - importlib_metadata=6.8.0=hd8ed1ab_0 + - importlib_resources=6.0.1=pyhd8ed1ab_0 + - ipykernel=6.25.1=pyh71e2992_0 + - ipython=8.14.0=pyh41d4057_0 + - ipython_genutils=0.2.0=py_1 + - ipywidgets=8.1.0=pyhd8ed1ab_0 + - isoduration=20.11.0=pyhd8ed1ab_0 + - itsdangerous=2.1.2=pyhd8ed1ab_0 + - jack=1.9.22=h11f4161_0 + - jedi=0.19.0=pyhd8ed1ab_0 + - jinja2=3.1.2=pyhd8ed1ab_1 + - joblib=1.3.2=pyhd8ed1ab_0 + - jpeg=9e=h0b41bf4_3 + - json5=0.9.14=pyhd8ed1ab_0 + - jsonpointer=2.0=py_0 + - jsonschema=4.19.0=pyhd8ed1ab_0 + - jsonschema-specifications=2023.7.1=pyhd8ed1ab_0 + - jsonschema-with-format-nongpl=4.19.0=pyhd8ed1ab_0 + - jupyter=1.0.0=py311h38be061_8 + - jupyter-lsp=2.2.0=pyhd8ed1ab_0 + - jupyter_client=8.3.0=pyhd8ed1ab_0 + - jupyter_console=6.6.3=pyhd8ed1ab_0 + - jupyter_core=5.3.1=py311h38be061_0 + - jupyter_events=0.7.0=pyhd8ed1ab_2 + - jupyter_server=2.7.0=pyhd8ed1ab_0 + - jupyter_server_terminals=0.4.4=pyhd8ed1ab_1 + - jupyterlab=4.0.4=pyhd8ed1ab_0 + - jupyterlab_pygments=0.2.2=pyhd8ed1ab_0 + - jupyterlab_server=2.24.0=pyhd8ed1ab_0 + - jupyterlab_widgets=3.0.8=pyhd8ed1ab_0 + - keyutils=1.6.1=h166bdaf_0 + - kiwisolver=1.4.4=py311h4dd048b_1 + - krb5=1.20.1=h81ceb04_0 + - lame=3.100=h166bdaf_1003 + - lcms2=2.15=hfd0df8a_0 + - ld_impl_linux-64=2.40=h41732ed_0 + - lerc=4.0.0=h27087fc_0 + - libabseil=20230125.3=cxx17_h59595ed_0 + - libarrow=12.0.1=h657c46f_7_cpu + - libblas=3.9.0=16_linux64_mkl + - libbrotlicommon=1.0.9=h166bdaf_9 + - libbrotlidec=1.0.9=h166bdaf_9 + - libbrotlienc=1.0.9=h166bdaf_9 + - libcap=2.67=he9d0100_0 + - libcblas=3.9.0=16_linux64_mkl + - libclang=15.0.7=default_h7634d5b_3 + - libclang13=15.0.7=default_h9986a30_3 + - libcrc32c=1.1.2=h9c3ff4c_0 + - libcups=2.3.3=h36d4200_3 + - libcurl=8.1.2=h409715c_0 + - libdb=6.2.32=h9c3ff4c_0 + - libdeflate=1.17=h0b41bf4_0 + - libedit=3.1.20191231=he28a2e2_2 + - libev=4.33=h516909a_1 + - libevent=2.1.10=h28343ad_4 + - libexpat=2.5.0=hcb278e6_1 + - libffi=3.4.2=h7f98852_5 + - libflac=1.4.3=h59595ed_0 + - libgcc-ng=13.1.0=he5830b7_0 + - libgcrypt=1.10.1=h166bdaf_0 + - libgfortran-ng=13.1.0=h69a702a_0 + - libgfortran5=13.1.0=h15d22d2_0 + - libglib=2.76.4=hebfc3b9_0 + - libgomp=13.1.0=he5830b7_0 + - libgoogle-cloud=2.12.0=h840a212_1 + - libgpg-error=1.47=h71f35ed_0 + - libgrpc=1.56.2=h3905398_0 + - libhwloc=2.9.1=hd6dc26d_0 + - libiconv=1.17=h166bdaf_0 + - liblapack=3.9.0=16_linux64_mkl + - libllvm15=15.0.7=hadd5161_1 + - libnghttp2=1.52.0=h61bc06f_0 + - libnsl=2.0.0=h7f98852_0 + - libnuma=2.0.16=h0b41bf4_1 + - libogg=1.3.4=h7f98852_1 + - libopenblas=0.3.23=pthreads_h80387f5_0 + - libopus=1.3.1=h7f98852_1 + - libpng=1.6.39=h753d276_0 + - libpq=15.3=hbcd7760_1 + - libprotobuf=4.23.3=hd1fb520_0 + - libsndfile=1.2.0=hb75c966_0 + - libsodium=1.0.18=h36c2ea0_1 + - libsqlite=3.42.0=h2797004_0 + - libssh2=1.11.0=h0841786_0 + - libstdcxx-ng=13.1.0=hfd8a6a1_0 + - libsystemd0=253=h8c4010b_1 + - libthrift=0.18.1=h5e4af38_0 + - libtiff=4.5.0=h6adf6a1_2 + - libtool=2.4.7=h27087fc_0 + - libudev1=253=h0b41bf4_1 + - libutf8proc=2.8.0=h166bdaf_0 + - libuuid=2.38.1=h0b41bf4_0 + - libvorbis=1.3.7=h9c3ff4c_0 + - libwebp-base=1.3.1=hd590300_0 + - libxcb=1.13=h7f98852_1004 + - libxkbcommon=1.5.0=h79f4944_1 + - libxml2=2.10.3=hca2bb57_4 + - libzlib=1.2.13=hd590300_5 + - lightning-utilities=0.9.0=pyhd8ed1ab_0 + - llvm-openmp=16.0.6=h4dfa4b3_0 + - lz4-c=1.9.4=hcb278e6_0 + - mako=1.2.4=pyhd8ed1ab_0 + - markdown=3.4.4=pyhd8ed1ab_0 + - markupsafe=2.1.3=py311h459d7ec_0 + - matplotlib-base=3.7.2=py311h54ef318_0 + - matplotlib-inline=0.1.6=pyhd8ed1ab_0 + - mistune=3.0.0=pyhd8ed1ab_0 + - mkl=2022.2.1=h84fe81f_16997 + - mlflow=2.5.0=py311h1cf4b12_0 + - mpc=1.3.1=hfe3b2da_0 + - mpfr=4.2.0=hb012696_0 + - mpg123=1.31.3=hcb278e6_0 + - mpmath=1.3.0=pyhd8ed1ab_0 + - munkres=1.1.4=pyh9f0ad1d_0 + - mysql-common=8.0.33=hf1915f5_2 + - mysql-libs=8.0.33=hca2cd23_2 + - nbclient=0.8.0=pyhd8ed1ab_0 + - nbconvert=7.7.3=pyhd8ed1ab_0 + - nbconvert-core=7.7.3=pyhd8ed1ab_0 + - nbconvert-pandoc=7.7.3=pyhd8ed1ab_0 + - nbformat=5.9.2=pyhd8ed1ab_0 + - ncurses=6.4=hcb278e6_0 + - nest-asyncio=1.5.6=pyhd8ed1ab_0 + - nettle=3.6=he412f7d_0 + - networkx=3.1=pyhd8ed1ab_0 + - notebook=7.0.2=pyhd8ed1ab_0 + - notebook-shim=0.2.3=pyhd8ed1ab_0 + - nspr=4.35=h27087fc_0 + - nss=3.89=he45b914_0 + - numpy=1.25.2=py311h64a7726_0 + - oauthlib=3.2.2=pyhd8ed1ab_0 + - openh264=2.1.1=h780b84a_0 + - openjpeg=2.5.0=hfec8fc6_2 + - openssl=3.1.2=hd590300_0 + - orc=1.9.0=h385abfd_1 + - overrides=7.4.0=pyhd8ed1ab_0 + - packaging=23.1=pyhd8ed1ab_0 + - pandas=2.0.3=py311h320fe9a_1 + - pandoc=3.1.3=h32600fe_0 + - pandocfilters=1.5.0=pyhd8ed1ab_0 + - paramiko=3.3.1=pyhd8ed1ab_0 + - parso=0.8.3=pyhd8ed1ab_0 + - pcre2=10.40=hc3806b6_0 + - pexpect=4.8.0=pyh1a96a4e_2 + - pickleshare=0.7.5=py_1003 + - pillow=9.4.0=py311h50def17_1 + - pip=23.2.1=pyhd8ed1ab_0 + - pixman=0.40.0=h36c2ea0_0 + - pkgutil-resolve-name=1.3.10=pyhd8ed1ab_0 + - platformdirs=3.10.0=pyhd8ed1ab_0 + - ply=3.11=py_1 + - pooch=1.7.0=pyha770c72_3 + - prometheus_client=0.17.1=pyhd8ed1ab_0 + - prometheus_flask_exporter=0.22.4=pyhd8ed1ab_0 + - prompt-toolkit=3.0.39=pyha770c72_0 + - prompt_toolkit=3.0.39=hd8ed1ab_0 + - protobuf=4.23.3=py311hbec7ed6_0 + - psutil=5.9.5=py311h2582759_0 + - pthread-stubs=0.4=h36c2ea0_1001 + - ptyprocess=0.7.0=pyhd3deb0d_0 + - pulseaudio=16.1=hcb278e6_3 + - pulseaudio-client=16.1=h5195f5e_3 + - pulseaudio-daemon=16.1=ha8d29e2_3 + - pure_eval=0.2.2=pyhd8ed1ab_0 + - pyarrow=12.0.1=py311h39c9aba_7_cpu + - pycparser=2.21=pyhd8ed1ab_0 + - pygments=2.16.1=pyhd8ed1ab_0 + - pyjwt=2.8.0=pyhd8ed1ab_0 + - pynacl=1.5.0=py311hd4cff14_2 + - pyparsing=3.0.9=pyhd8ed1ab_0 + - pyqt=5.15.9=py311hf0fb5b6_4 + - pyqt5-sip=12.12.2=py311hb755f60_4 + - pysocks=1.7.1=pyha2e5f31_6 + - python=3.11.4=hab00c5b_0_cpython + - python-dateutil=2.8.2=pyhd8ed1ab_0 + - python-fastjsonschema=2.18.0=pyhd8ed1ab_0 + - python-json-logger=2.0.7=pyhd8ed1ab_0 + - python-tzdata=2023.3=pyhd8ed1ab_0 + - python_abi=3.11=3_cp311 + - pytorch=2.0.1=py3.11_cpu_0 + - pytorch-lightning=2.0.6=pyhd8ed1ab_0 + - pytorch-mutex=1.0=cpu + - pytz=2023.3=pyhd8ed1ab_0 + - pywin32-on-windows=0.1.0=pyh1179c8e_3 + - pyyaml=6.0=py311hd4cff14_5 + - pyzmq=25.1.1=py311h75c88c4_0 + - qt-main=5.15.8=h5d23da1_6 + - qtconsole=5.4.3=pyhd8ed1ab_0 + - qtconsole-base=5.4.3=pyha770c72_0 + - qtpy=2.3.1=pyhd8ed1ab_0 + - querystring_parser=1.2.4=py_0 + - rdma-core=28.9=h59595ed_1 + - re2=2023.03.02=h8c504da_0 + - readline=8.2=h8228510_1 + - referencing=0.30.2=pyhd8ed1ab_0 + - requests=2.31.0=pyhd8ed1ab_0 + - rfc3339-validator=0.1.4=pyhd8ed1ab_0 + - rfc3986-validator=0.1.1=pyh9f0ad1d_0 + - rocm-smi=5.6.0=h59595ed_1 + - rpds-py=0.9.2=py311h46250e7_0 + - s2n=1.3.46=h06160fa_0 + - scikit-learn=1.3.0=py311hc009520_0 + - scipy=1.11.1=py311h64a7726_0 + - send2trash=1.8.2=pyh41d4057_0 + - setuptools=68.0.0=pyhd8ed1ab_0 + - sip=6.7.11=py311hb755f60_0 + - six=1.16.0=pyh6c4a22f_0 + - smmap=3.0.5=pyh44b312d_0 + - snappy=1.1.10=h9fff704_0 + - sniffio=1.3.0=pyhd8ed1ab_0 + - soupsieve=2.3.2.post1=pyhd8ed1ab_0 + - sqlalchemy=2.0.19=py311h459d7ec_0 + - sqlparse=0.4.4=pyhd8ed1ab_0 + - stack_data=0.6.2=pyhd8ed1ab_0 + - sympy=1.12=pypyh9d50eac_103 + - tabulate=0.9.0=pyhd8ed1ab_1 + - tbb=2021.9.0=hf52228f_0 + - terminado=0.17.1=pyh41d4057_0 + - threadpoolctl=3.2.0=pyha21a80b_0 + - tinycss2=1.2.1=pyhd8ed1ab_0 + - tk=8.6.12=h27826a3_0 + - toml=0.10.2=pyhd8ed1ab_0 + - tomli=2.0.1=pyhd8ed1ab_0 + - torchmetrics=1.0.3=pyhd8ed1ab_0 + - torchvision=0.15.2=py311_cpu + - tornado=6.3.2=py311h459d7ec_0 + - tqdm=4.66.1=pyhd8ed1ab_0 + - traitlets=5.9.0=pyhd8ed1ab_0 + - typing-extensions=4.7.1=hd8ed1ab_0 + - typing_extensions=4.7.1=pyha770c72_0 + - typing_utils=0.1.0=pyhd8ed1ab_0 + - tzdata=2023c=h71feb2d_0 + - ucx=1.14.1=hf587318_2 + - uri-template=1.3.0=pyhd8ed1ab_0 + - urllib3=2.0.4=pyhd8ed1ab_0 + - wcwidth=0.2.6=pyhd8ed1ab_0 + - webcolors=1.13=pyhd8ed1ab_0 + - webencodings=0.5.1=py_1 + - websocket-client=1.6.1=pyhd8ed1ab_0 + - werkzeug=2.3.6=pyhd8ed1ab_0 + - wheel=0.41.1=pyhd8ed1ab_0 + - widgetsnbextension=4.0.8=pyhd8ed1ab_0 + - xcb-util=0.4.0=h516909a_0 + - xcb-util-image=0.4.0=h166bdaf_0 + - xcb-util-keysyms=0.4.0=h516909a_0 + - xcb-util-renderutil=0.3.9=h166bdaf_0 + - xcb-util-wm=0.4.1=h516909a_0 + - xkeyboard-config=2.38=h0b41bf4_0 + - xorg-kbproto=1.0.7=h7f98852_1002 + - xorg-libice=1.1.1=hd590300_0 + - xorg-libsm=1.2.4=h7391055_0 + - xorg-libx11=1.8.4=h0b41bf4_0 + - xorg-libxau=1.0.11=hd590300_0 + - xorg-libxdmcp=1.1.3=h7f98852_0 + - xorg-libxext=1.3.4=h0b41bf4_2 + - xorg-libxrender=0.9.10=h7f98852_1003 + - xorg-renderproto=0.11.1=h7f98852_1002 + - xorg-xextproto=7.3.0=h0b41bf4_1003 + - xorg-xproto=7.0.31=h7f98852_1007 + - xz=5.2.6=h166bdaf_0 + - yaml=0.2.5=h7f98852_2 + - zeromq=4.3.4=h9c3ff4c_1 + - zipp=3.16.2=pyhd8ed1ab_0 + - zlib=1.2.13=hd590300_5 + - zstd=1.5.2=hfc55251_7 +prefix: /home/h06/meastman/.conda/envs/dscop_cloud_class