-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathimputation.py
65 lines (52 loc) · 2.12 KB
/
imputation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
"""Imputation of proteomics data."""
import logging
import numpy as np
import pandas as pd
logger = logging.getLogger()
NP_LOG_FCT = np.log2
def log2(row: pd.Series):
"""Apply log Transformation to values."""
return NP_LOG_FCT(row.where(row != 0.0))
RANDOM_SEED = 123
IMPUTATION_MEAN_SHIFT = 1.8
IMPUTATION_STD_SHRINKAGE = 0.3
def imputation_normal_distribution(log_intensities: pd.Series,
mean_shift=IMPUTATION_MEAN_SHIFT,
std_shrinkage=IMPUTATION_STD_SHRINKAGE):
"""Impute missing log-transformed intensity values of DDA run.
Parameters
----------
log_intensities: pd.Series
Series of normally distributed values. Here usually log-transformed
protein intensities.
mean_shift: integer, float
Shift the mean of the log_intensities by factors of their standard
deviation to the negative.
std_shrinkage: float
Value greater than zero by which to shrink (or inflate) the
standard deviation of the log_intensities.
"""
np.random.seed(RANDOM_SEED)
if not isinstance(log_intensities, pd.Series):
try:
# array-like, Iterable, dict, or scalar value?
log_intensities = pd.Series(log_intensities)
logger.warning("Series created of Iterable.")
except Exception as e:
raise ValueError(
"Plese provided data which is a pandas.Series or an Iterable",
e)
if mean_shift < 0:
raise ValueError(
"Please specify a positive float as the std.-dev. is non-negative.")
if std_shrinkage <= 0:
raise ValueError(
"Please specify a positive float as shrinkage factor for std.-dev.")
if std_shrinkage >= 1:
logger.warning("Standard Deviation will increase for imputed values.")
mean = log_intensities.mean()
std = log_intensities.std()
mean_shifted = mean - (std * mean_shift)
std_shrinked = std * std_shrinkage
return log_intensities.where(log_intensities.notna(),
np.random.normal(mean_shifted, std_shrinked))