-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprogress.Rmd
99 lines (68 loc) · 2.86 KB
/
progress.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# Project Progress
## Get and extract data
```{r}
url <- "https://d396qusza40orc.cloudfront.net/getdata%2Fprojectfiles%2FUCI%20HAR%20Dataset.zip"
downloadDir <- "data"
path <- function(...) { paste(..., sep = "/") }
zipFile <- path(downloadDir, "dataset.zip")
if(!file.exists(zipFile)) { download.file(url, zipFile, method = "curl") }
dataDir <- path(downloadDir, "UCI HAR Dataset")
if(!file.exists(dataDir)) { unzip(zipFile, exdir = downloadDir) }
```
## "1. Merges the training and the test sets to create one data set."
Merge and add in the feature names:
```{r}
read <- function(path) { read.table(path(dataDir, path)) }
if(!exists("XTrain")) { XTrain <- read("train/X_train.txt") }
if(!exists("XTest")) { XTest <- read("test/X_test.txt") }
merged <- rbind(XTrain, XTest)
featureNames <- read("features.txt")[, 2]
names(merged) <- featureNames
```
## "2. Extracts only the measurements on the mean and standard deviation for each measurement."
Limit to columns with feature names matching mean() or std():
```{r}
matches <- grep("(mean|std)\\(\\)", names(merged))
limited <- merged[, matches]
```
## "3. Uses descriptive activity names to name the activities in the data set"
Get the activity data and map to nicer names:
```{r}
yTrain <- read("train/y_train.txt")
yTest <- read("test/y_test.txt")
yMerged <- rbind(yTrain, yTest)[, 1]
activityNames <-
c("Walking", "Walking Upstairs", "Walking Downstairs", "Sitting", "Standing", "Laying")
activities <- activityNames[yMerged]
```
## "4. Appropriately labels the data set with descriptive variable names."
Change t to Time-, f to Frequency-, mean() to Mean and std() to Standard-Deviation
```{r}
names(limited) <- gsub("^t", "Time", names(limited))
names(limited) <- gsub("^f", "Frequency", names(limited))
names(limited) <- gsub("-mean\\(\\)", "Mean", names(limited))
names(limited) <- gsub("-std\\(\\)", "StdDev", names(limited))
names(limited) <- gsub("-", "", names(limited))
# Some of the data is misnamed as fBodyBody..., fix it up to have only one Body
names(limited) <- gsub("BodyBody", "Body", names(limited))
```
## Add activities and subject with nice names
```{r}
subjectTrain <- read("train/subject_train.txt")
subjectTest <- read("test/subject_test.txt")
subjects <- rbind(subjectTrain, subjectTest)[, 1]
tidy <- cbind(Subject = subjects, Activity = activities, limited)
```
## "5. Creates a second, independent tidy data set with the average of each variable for each activity and each subject."
```{r}
library(plyr)
# Column means for all but the subject and activity columns
limitedColMeans <- function(data) { colMeans(data[,-c(1,2)]) }
tidyMeans <- ddply(tidy, .(Subject, Activity), limitedColMeans)
names(tidyMeans)[-c(1,2)] <- paste0("Mean", names(tidyMeans)[-c(1,2)])
```
## Write file
```{r}
write.table(tidyMeans, "tidyMeans.txt", row.names = FALSE)
tidyMeansCheck <- read.table("tidyMeans.txt", header = TRUE)
```