Skip to content

Commit

Permalink
Add Path in loggers and clearer error handling (#261)
Browse files Browse the repository at this point in the history
* Add Path in loggers and clearer error handling

* Cleanup

* Fix typo

* ADD warning for training config

* remove cmake as horovod is not there anymore

* backup

* cleanup

* Fix path problems

* Use elif when possible

* Fix type hints

* Remove identifier casting

* Refactor action
  • Loading branch information
matbun authored Dec 3, 2024
1 parent a964e47 commit af68d90
Show file tree
Hide file tree
Showing 5 changed files with 96 additions and 70 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,6 @@ jobs:
VALIDATE_ALL_CODEBASE: false
# Fail on errors
DISABLE_ERRORS: false
# Skip linting of docs
FILTER_REGEX_EXCLUDE: .*docs/index.md|.*docs/docs/.*|.*ISSUE_TEMPLATE/.*|use-cases/.*|experimental/.*
BASH_SEVERITY: error
# Skip linting of some locations
FILTER_REGEX_EXCLUDE: .*ISSUE_TEMPLATE/.*|use-cases/.*
BASH_SEVERITY: warning
2 changes: 1 addition & 1 deletion .readthedocs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ build:
apt_packages:
- gcc-11
- g++-11
- cmake
# - cmake
- pandoc

# Build documentation in the "docs/" directory with Sphinx
Expand Down
108 changes: 51 additions & 57 deletions src/itwinai/loggers.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ class Logger(LogMixin):
"""Base class for logger
Args:
savedir (str, optional): filesystem location where logs are stored.
savedir (Union[Path, str], optional): filesystem location where logs are stored.
Defaults to 'mllogs'.
log_freq (Union[int, Literal['epoch', 'batch']], optional):
how often should the logger fulfill calls to the `log()`
Expand All @@ -136,7 +136,7 @@ class Logger(LogMixin):
"""

#: Location on filesystem where to store data.
savedir: str = None
savedir: Path
#: Supported logging 'kind's.
supported_kinds: Tuple[str]
#: Current worker global rank
Expand All @@ -146,13 +146,13 @@ class Logger(LogMixin):

def __init__(
self,
savedir: str = "mllogs",
savedir: Union[Path, str] = "mllogs",
log_freq: Union[int, Literal["epoch", "batch"]] = "epoch",
log_on_workers: Union[int, List[int]] = 0,
experiment_id: Optional[str] = None,
run_id: Optional[Union[int, str]] = None,
) -> None:
self.savedir = savedir
self.savedir = Path(savedir)
self.log_freq = log_freq
self.log_on_workers = log_on_workers
self._experiment_id = experiment_id
Expand Down Expand Up @@ -209,8 +209,7 @@ def start_logging(self, rank: Optional[int] = None):

@abstractmethod
def create_logger_context(self, rank: Optional[int] = None) -> Any:
"""
Initializes the logger context.
"""Initializes the logger context.
Args:
rank (Optional[int]): global rank of current process,
Expand Down Expand Up @@ -240,7 +239,7 @@ def serialize(self, obj: Any, identifier: str) -> str:
Returns:
str: local path of the serialized object to be logged.
"""
itm_path = os.path.join(self.savedir, identifier)
itm_path = self.savedir / identifier
with open(itm_path, "wb") as itm_file:
pickle.dump(obj, itm_file)

Expand Down Expand Up @@ -301,7 +300,7 @@ class _EmptyLogger(Logger):

def __init__(
self,
savedir: str = "mllogs",
savedir: Union[Path, str] = "mllogs",
log_freq: int | Literal["epoch"] | Literal["batch"] = "epoch",
log_on_workers: int | List[int] = 0,
) -> None:
Expand Down Expand Up @@ -332,7 +331,7 @@ class ConsoleLogger(Logger):
"""Simplified logger.
Args:
savedir (str, optional): where to store artifacts.
savedir (Union[Path, str], optional): where to store artifacts.
Defaults to 'mllogs'.
log_freq (Union[int, Literal['epoch', 'batch']], optional):
determines whether the logger should fulfill or ignore
Expand All @@ -349,16 +348,15 @@ class ConsoleLogger(Logger):

def __init__(
self,
savedir: str = "mllogs",
savedir: Union[Path, str] = "mllogs",
log_freq: Union[int, Literal["epoch", "batch"]] = "epoch",
log_on_workers: Union[int, List[int]] = 0,
) -> None:
savedir = savedir = Path(savedir) / "simple-logger"
super().__init__(savedir=savedir, log_freq=log_freq, log_on_workers=log_on_workers)
cl_savedir = Path(savedir) / "simple-logger"
super().__init__(savedir=cl_savedir, log_freq=log_freq, log_on_workers=log_on_workers)

def create_logger_context(self, rank: Optional[int] = None):
"""
Initializes the logger context.
"""Initializes the logger context.
Args:
rank (Optional[int]): global rank of current process,
Expand Down Expand Up @@ -462,7 +460,7 @@ class MLFlowLogger(Logger):
"""Abstraction around MLFlow logger.
Args:
savedir (str, optional): path on local filesystem where logs are
savedir (Union[Path, str], optional): path on local filesystem where logs are
stored. Defaults to 'mllogs'.
experiment_name (str, optional): experiment name. Defaults to
``itwinai.loggers.BASE_EXP_NAME``.
Expand Down Expand Up @@ -501,16 +499,16 @@ class MLFlowLogger(Logger):

def __init__(
self,
savedir: str = "mllogs",
savedir: Union[Path, str] = "mllogs",
experiment_name: str = BASE_EXP_NAME,
tracking_uri: Optional[str] = None,
run_description: Optional[str] = None,
run_name: Optional[str] = None,
log_freq: Union[int, Literal["epoch", "batch"]] = "epoch",
log_on_workers: Union[int, List[int]] = 0,
):
savedir = os.path.join(savedir, "mlflow")
super().__init__(savedir=savedir, log_freq=log_freq, log_on_workers=log_on_workers)
mfl_savedir = Path(savedir) / "mlflow"
super().__init__(savedir=mfl_savedir, log_freq=log_freq, log_on_workers=log_on_workers)
self.tracking_uri = tracking_uri
self.run_description = run_description
self.run_name = run_name
Expand All @@ -523,8 +521,7 @@ def __init__(
)

def create_logger_context(self, rank: Optional[int] = None) -> mlflow.ActiveRun:
"""
Initializes the logger context. Start MLFLow run.
"""Initializes the logger context. Start MLFLow run.
Args:
rank (Optional[int]): global rank of current process,
Expand Down Expand Up @@ -600,22 +597,22 @@ def log(

if kind == "metric":
mlflow.log_metric(key=identifier, value=item, step=step)
if kind == "artifact":
elif kind == "artifact":
if not isinstance(item, str):
# Save the object locally and then log it
name = os.path.basename(identifier)
save_path = Path(self.savedir) / ".trash" / name
save_path = self.savedir / ".trash" / str(name)
save_path.mkdir(os.path.dirname(save_path), exist_ok=True)
item = self.serialize(item, save_path)
mlflow.log_artifact(local_path=item, artifact_path=identifier)
if kind == "model":
elif kind == "model":
import torch

if isinstance(item, torch.nn.Module):
mlflow.pytorch.log_model(item, identifier)
else:
print("WARNING: unrecognized model type")
if kind == "dataset":
elif kind == "dataset":
# Log mlflow dataset
# https://mlflow.org/docs/latest/python_api/mlflow.html#mlflow.log_input
# It may be needed to convert item into a mlflow dataset, e.g.:
Expand All @@ -625,37 +622,37 @@ def log(
mlflow.log_input(item)
else:
print("WARNING: unrecognized dataset type. " "Must be an MLFlow dataset")
if kind == "torch":
elif kind == "torch":
import torch

# Save the object locally and then log it
name = os.path.basename(identifier)
save_path = Path(self.savedir) / ".trash" / name
save_path = self.savedir / ".trash" / str(name)
save_path.mkdir(os.path.dirname(save_path), exist_ok=True)
torch.save(item, save_path)
# Log into mlflow
mlflow.log_artifact(local_path=save_path, artifact_path=identifier)
if kind == "dict":
elif kind == "dict":
mlflow.log_dict(dictionary=item, artifact_file=identifier)
if kind == "figure":
elif kind == "figure":
mlflow.log_figure(
artifact_file=identifier,
figure=item,
save_kwargs=kwargs.get("save_kwargs"),
)
if kind == "image":
elif kind == "image":
mlflow.log_image(artifact_file=identifier, image=item)
if kind == "param":
elif kind == "param":
mlflow.log_param(key=identifier, value=item)
if kind == "text":
elif kind == "text":
mlflow.log_text(artifact_file=identifier, text=item)


class WandBLogger(Logger):
"""Abstraction around WandB logger.
Args:
savedir (str, optional): location on local filesystem where logs
savedir (Union[Path, str], optional): location on local filesystem where logs
are stored. Defaults to 'mllogs'.
project_name (str, optional): experiment name. Defaults to
``itwinai.loggers.BASE_EXP_NAME``.
Expand Down Expand Up @@ -685,18 +682,17 @@ class WandBLogger(Logger):

def __init__(
self,
savedir: str = "mllogs",
savedir: Union[Path, str] = "mllogs",
project_name: str = BASE_EXP_NAME,
log_freq: Union[int, Literal["epoch", "batch"]] = "epoch",
log_on_workers: Union[int, List[int]] = 0,
) -> None:
savedir = os.path.join(savedir, "wandb")
super().__init__(savedir=savedir, log_freq=log_freq, log_on_workers=log_on_workers)
wbl_savedir = Path(savedir) / "wandb"
super().__init__(savedir=wbl_savedir, log_freq=log_freq, log_on_workers=log_on_workers)
self.project_name = project_name

def create_logger_context(self, rank: Optional[int] = None) -> None:
"""
Initializes the logger context. Init WandB run.
"""Initializes the logger context. Init WandB run.
Args:
rank (Optional[int]): global rank of current process,
Expand All @@ -707,10 +703,11 @@ def create_logger_context(self, rank: Optional[int] = None) -> None:
if not self.should_log():
return

os.makedirs(os.path.join(self.savedir, "wandb"), exist_ok=True)
self.active_run = wandb.init(
dir=os.path.abspath(self.savedir), project=self.project_name
(self.savedir / "wandb").mkdir(
exist_ok=True,
parents=True,
)
self.active_run = wandb.init(dir=self.savedir.resolve(), project=self.project_name)

def destroy_logger_context(self):
"""Destroy logger."""
Expand Down Expand Up @@ -767,7 +764,7 @@ class TensorBoardLogger(Logger):
TensorFlow.
Args:
savedir (str, optional): location on local filesystem where logs
savedir (Union[Path, str], optional): location on local filesystem where logs
are stored. Defaults to 'mllogs'.
log_freq (Union[int, Literal['epoch', 'batch']], optional):
determines whether the logger should fulfill or ignore
Expand All @@ -793,29 +790,28 @@ class TensorBoardLogger(Logger):

def __init__(
self,
savedir: str = "mllogs",
savedir: Union[Path, str] = "mllogs",
log_freq: Union[int, Literal["epoch", "batch"]] = "epoch",
framework: Literal["tensorflow", "pytorch"] = "pytorch",
log_on_workers: Union[int, List[int]] = 0,
) -> None:
savedir = os.path.join(savedir, "tensorboard")
super().__init__(savedir=savedir, log_freq=log_freq, log_on_workers=log_on_workers)
tbl_savedir = Path(savedir) / "tensorboard"
super().__init__(savedir=tbl_savedir, log_freq=log_freq, log_on_workers=log_on_workers)
self.framework = framework
if framework.lower() == "tensorflow":
import tensorflow as tf

self.tf = tf
self.writer = tf.summary.create_file_writer(savedir)
self.writer = tf.summary.create_file_writer(tbl_savedir.resolve().as_posix())
elif framework.lower() == "pytorch":
from torch.utils.tensorboard import SummaryWriter

self.writer = SummaryWriter(savedir)
self.writer = SummaryWriter(tbl_savedir.resolve().as_posix())
else:
raise ValueError("Framework must be either 'tensorflow' or 'pytorch'")

def create_logger_context(self, rank: Optional[int] = None) -> None:
"""
Initializes the logger context. Init Tensorboard run.
"""Initializes the logger context. Init Tensorboard run.
Args:
rank (Optional[int]): global rank of current process,
Expand Down Expand Up @@ -914,7 +910,7 @@ class LoggersCollection(Logger):
supported_kinds: Tuple[str]

def __init__(self, loggers: List[Logger]) -> None:
super().__init__(savedir="/tmp/mllogs_LoggersCollection", log_freq=1)
super().__init__(savedir=Path("/tmp/mllogs_LoggersCollection"), log_freq=1)
self.loggers = loggers

def should_log(self, batch_idx: int = None) -> bool:
Expand Down Expand Up @@ -964,8 +960,7 @@ def log(
)

def create_logger_context(self, rank: Optional[int] = None) -> Any:
"""
Initializes all loggers.
"""Initializes all loggers.
Args:
rank (Optional[int]): global rank of current process,
Expand Down Expand Up @@ -998,7 +993,7 @@ class Prov4MLLogger(Logger):
files will be uploaded. Defaults to "www.example.org".
experiment_name (str, optional): experiment name.
Defaults to "experiment_name".
provenance_save_dir (str, optional): path where to store provenance
provenance_save_dir (Union[Path, str], optional): path where to store provenance
files and logs. Defaults to "prov".
save_after_n_logs (Optional[int], optional): how often to save
logs to disk from main memory. Defaults to 100.
Expand Down Expand Up @@ -1031,9 +1026,9 @@ class Prov4MLLogger(Logger):

def __init__(
self,
prov_user_namespace="www.example.org",
experiment_name="experiment_name",
provenance_save_dir="mllogs/prov_logs",
prov_user_namespace: str = "www.example.org",
experiment_name: str = "experiment_name",
provenance_save_dir: Union[Path, str] = "mllogs/prov_logs",
save_after_n_logs: Optional[int] = 100,
create_graph: Optional[bool] = True,
create_svg: Optional[bool] = True,
Expand All @@ -1054,8 +1049,7 @@ def __init__(

@override
def create_logger_context(self, rank: Optional[int] = None):
"""
Initializes the logger context.
"""Initializes the logger context.
Args:
rank (Optional[int]): global rank of current process,
Expand Down
Loading

0 comments on commit af68d90

Please sign in to comment.