-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun_analysis.R
More file actions
99 lines (70 loc) · 4.14 KB
/
run_analysis.R
File metadata and controls
99 lines (70 loc) · 4.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# including packages for access to useful functions
library(dplyr)
library(plyr)
library(reshape2)
# Reading in activity types and their labels
activity <- read.table("./UCI HAR Dataset/activity_labels.txt", stringsAsFactors = FALSE)
names(activity) <- c("activity_ID","activity_label")
# Step 1. Merges the training and the test sets to create one data set.
# 1.1 To obtain complete test data
# 1.1.1 Read in subject_test data
subject_test <- read.table("./UCI HAR Dataset/test/subject_test.txt")
names(subject_test) <- "subject_ID"
# 1.1.2 Read in y_test data on activities performed by subject
Y_test <- read.table("./UCI HAR Dataset/test/y_test.txt")
names(Y_test) <- "activity_ID"
# 1.1.3 Read in X_test data on 561 feature measurements per subject per activity
X_test <- read.table("./UCI HAR Dataset/test/X_test.txt")
# 1.1.4 Then name X_test columns based on feature names taken
# from 561 feature measurement types and their labels read from features.txt
features <- read.table("./UCI HAR Dataset/features.txt", stringsAsFactors = FALSE)
names(features) <- c("feature_ID", "feature_label")
names(X_test) <- features$feature_label
# 1.1.5 Combine all test data
test_Data <- cbind(subject_test, Y_test, X_test)
# 1.2 To obtain complete train data
# 1.2.1 Read in subject_train data
subject_train <- read.table("./UCI HAR Dataset/train/subject_train.txt")
names(subject_train) <- "subject_ID"
# 1.2.2 Read in y_train data on activities performed by subject
Y_train <- read.table("./UCI HAR Dataset/train/y_train.txt")
names(Y_train) <- "activity_ID"
# 1.2.3 Read in X_train data on 561 feature measurements per subject per activity
X_train <- read.table("./UCI HAR Dataset/train/X_train.txt")
# 1.2.4 Name columns based on feature names
names(X_train) <- features$feature_label
# 1.2.5 Combine all train data
train_Data <- cbind(subject_train, Y_train, X_train)
# 1.3 Combine test and train data
combined_Data <- rbind(test_Data, train_Data)
# Step 2. Extracts only the measurements on the mean and standard deviation for each measurement.
# 2.1 List feature types to be retained, i.e. those with "mean" or "std" in them
retainList <- grep("mean|std", features$feature_label, value = TRUE)
# 2.2 Columns in combined_Data to be retained based on the retainList above
retainColIndex <- match(retainList, names(combined_Data))
# 2.3 Extracted data set
extract_Data <- combined_Data[, c(1, 2, retainColIndex)]
# Step 3. Uses descriptive activity names to name the activities in the data set
# 3.1 Join above extracted data set with activity dataset
# This will include into extract_Data a column activity_label to the end
extract_Data <- join(extract_Data,activity,by="activity_ID")
# 3.2 We then reorder extract_Data to bring activity_label next to activity_ID
lastCol <- ncol(extract_Data)
lastbutoneCol <- lastCol-1
extract_Data <- extract_Data[,c(1,2,lastCol,3:lastbutoneCol)]
# Step 4. Appropriately labels the data set with descriptive variable names
# Already achieved in steps 1.1.4 and 1.2.4 above
# We had altered column names based on data in "features.txt"
# Step 5. From the data set in step 4, creates a second, independent tidy data set
# with the average of each variable for each activity and each subject.
# 5.1 Melt extract_Data to convert all feature measurements (4-82) into a single column
# identified by columns 1 to 3, i.e. subject ID, activity ID, and activity label
melt_Data <- melt(extract_Data, id=1:3, measure.vars=4:82)
# 5.2 Group melted data by subject ID, activity ID, and activity label
group_Data <- group_by(melt_Data,subject_ID,activity_ID,activity_label,variable)
# 5.3 Average each feature measurement / variable value for each activity and each subject
avg_Data <- summarize(group_Data, average = mean(value, na.rm=TRUE))
# 5.4 Write the achieved tidy data set, avg_Data, into a .txt file in the working directory
avg_Data$variable <- as.character(avg_Data$variable)
names(avg_Data) <- c("subject","activity_type","activity_label","feature","mean_measure")
write.table(avg_Data, file = "tidy_Data.txt", row.names = FALSE)