-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathmisspandas.py
57 lines (41 loc) · 927 Bytes
/
misspandas.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#Dealing with Missing Data using Pandas
import numpy as np
import pandas as pd
d = {'A':[1,2, np.nan], 'B':[5, np.nan, np.nan], 'C':[1,2,3]}
df = pd.DataFrame(d)
df
#>>> df
# A B C
#0 1.0 5.0 1
#1 2.0 NaN 2
#2 NaN NaN 3
#>>>
#Drop na by default this method drops na with rows meaning with axis = 0
df.dropna()
#>>> df.dropna()
# A B C
#0 1.0 5.0 1
#Therefore you want to remove NA's with axis = 1
df.dropna(axis=1)
# C
#0 1
#1 2
#2 3
df.dropna(thresh=2)
#>>> df.dropna(thresh=2)
# A B C
#0 1.0 5.0 1
#1 2.0 NaN 2
#Let's fill those na values
df.fillna(value="FILL VALUE")
#>>> df.fillna(value="FILL VALUE")
# A B C
#0 1 5 1
#1 2 FILL VALUE 2
#2 FILL VALUE FILL VALUE 3
df['A'].fillna(value=df['A'].mean())
#>>> df['A'].fillna(value=df['A'].mean())
#0 1.0
#1 2.0
#2 1.5
#Name: A, dtype: float64