R - 데이터 처리를 위한 패키지 2

merge 함수

dat1 <- data.frame(name = c("김준수", "박명춘", "양승찬"), height = c(190,20,40))
dat2 <- data.frame(name = c("김준수", "박명춘", "양승찬"), weight = c(100,200,300))
merge(dat1,dat2,by="name")
inner_join(dat1,dat2,by="name")

데이터 처리 실습

setwd(r"(C:\Users\BMChun\Desktop\2020b\기초프로그래밍\수업자료\11._data_analysis_(application) (1))")
fb1 <- read.csv(file = "facebook data/facebook1.csv")
fb2 <- read.csv(file = "facebook data/facebook2.csv")
head(fb1)
head(fb2)
names(fb1) <- tolower(names(fb1)) %>% gsub("[.]", "_", .) #이름 변경
names(fb2) <- tolower(names(fb2)) %>% gsub("[.]", "_", .)
fb <- inner_join(fb1,fb2,by=c("post_date","post_hour","post_minute"))
sr <- fb$post_date %>% as.character() %>% as.Date(.,format = "%Y%m%d")
fb <- fb %>% mutate(month = sr %>% month, weekday = sr %>% weekdays)

qt <- quantile(fb$page_total_likes, probs = c(1,2)/3)
fb <- fb %>% mutate(level = ifelse(fb$page_total_likes<qt[1],"Low",ifelse(fb$page_total_likes<qt[2],"Mid","High")))

fb <- fb[complete.cases(fb),]
fb %>% group_by(type) %>% summarise(mean_comment = mean(comment), mean_like = mean(like), mean_share = mean(share))
fb %>% group_by(weekday) %>% summarise(mean_comment = mean(comment), mean_like = mean(like), mean_share = mean(share))
fb %>% group_by(month) %>% summarise(mean_comment = mean(comment), mean_like = mean(like), mean_share = mean(share))

과제

winer <- read.csv("wine quality/winequality-red.csv",sep=";")
wineh <- read.csv("wine quality/winequality-white.csv",sep=";")
winer <- winer %>% mutate(group = "red")
wineh <- wineh %>% mutate(group = "white")
wine <- rbind(winer,wineh)
head(wine)
names(wine) <- tolower(names(wine)) %>% gsub("[.]", "_", .)
wine <- wine %>% mutate(acidity = ifelse(wine$fixed_acidity<=6,"Low",ifelse(wine$fixed_acidity<8,"Mid","High")))

산도에 따른 값

wine %>% group_by(acidity) %>% summarise(mean_volatile = mean(volatile_acidity), mean_suger = mean(residual_sugar))