R - 데이터 처리를 위한 패키지
dplyr 불러오기
require(dplyr)
Pipe operatior
x <- c(1,-1,2,-2,3,-3)
mean(abs(x))
abs(x) %>% mean()
filter 함수
iris %>% filter(Sepal.Length>5.5 & Petal.Length <6) %>% head()
select 함수
iris[names(iris) != "Sepal.Length" & names(iris) != "Sepal.Width"] #negative indexing이 불가능해서 구현
iris %>% select(-c(Sepal.Length, Sepal.Width)) #negative indexing이 가능
R기본 함수인 subset함수 사용
subset(iris, subset = Sepal.Length>5.5 & Petal.Length <6, select = -c(Sepal.Length, Sepal.Width))
위의 subset구문을 filter와 select로 구현
iris %>% filter(Sepal.Length>5.5 & Petal.Length <6) %>% select(-c(Sepal.Length,Sepal.Width))
mutate 함수
iris %>% transform(length_sum = sum(Sepal.Length+Petal.Length)) %>% head() #transform 이용
iris %>% mutate(length_sum = sum(Sepal.Length,Petal.Length)) %>% head()
mutate는 새로 만드는 변수를 참조해서 추가적인 새로운 변수를 또 만들 수 있음
summamrise 함수
iris %>% summarise(iris_mean = mean(c(Sepal.Length, Petal.Length)))
arrange 함수
iris %>% arrange(Sepal.Length, desc(Sepal.Width)) %>% head()
head(iris[order(iris$Sepal.Length, iris$Sepal.Width, decreasing = c(F,T)),])
group_by 함수
iris %>% group_by(Species) %>% mutate(iris_length = mean(Sepal.Length+ Sepal.Width))
iris %>% group_by(Species) %>% summarise(mean1 = mean(Sepal.Length), mean2 = mean(Petal.Length))
reshape2 패키지
require(reshape2)
melt 함수
melt(iris, id.vars = "Species", measure.vars = c("Sepal.Length", "Sepal.Width"))
cast 함수
iris_melt <- melt(iris, id.vars = "Species", measure.vars = c("Sepal.Length", "Sepal.Width"))
dcast(iris_melt, Species~variable, fun.aggregate = mean)
iris_melt %>% group_by(Species, variable) %>% summarise(mean = mean(value))
과제
data(airquality)
airquality <- airquality %>% mutate(windchill = 35.74+0.6215*Temp-35.75*Wind^(0.16)+0.4275*Temp*Wind^(0.16))
airquality %>% group_by(Month) %>% summarise(Ozone = mean(Ozone,na.rm = T), Solar.R = mean(Solar.R, na.rm =T), Wind = mean(Wind, na.rm = T), Temp = mean(Temp, na.rm =T))