[고객데이터분석] 중간고사: R로 Covid19 데이터 분석하기

2022. 5. 6. 17:26
install.packages("COVID19")
library("COVID19")
library(dplyr)
library(tidyr)
library(ggplot2)

data <- covid19()
# no.1
data_select = data %>% select('date',
                             'confirmed',
                             'deaths',
                             'recovered',
                             'tests',
                             'people_vaccinated',
                             'people_fully_vaccinated',
                             'vent',
                             'population',
                             'key_apple_mobility',
                             'key_gadm')

# no.2
data_select %>%
  filter(key_apple_mobility == 'Macao') %>%
  View()

# no.3
data_select <- data_select %>% 
  filter(!is.na(key_apple_mobility)) %>%
  group_by(key_apple_mobility) %>%
  arrange(key_apple_mobility, date) %>%
  fill(confirmed, deaths, recovered, tests, people_vaccinated,
       people_fully_vaccinated, vent, population)

# no.4
data_select <- data_select %>%
  mutate(confirm_ratio = confirmed / population,
         death_ratio = deaths / population,
         vent_ratio = vent / population,
         vacc_fully_ratio = people_fully_vaccinated / population,
         vacc_not_fully_ratio = (people_vaccinated - people_fully_vaccinated) / population)

# no.5
data_select %>%
  filter(date == "2022-04-10") %>%
  select('confirm_ratio', 'death_ratio') %>%
  arrange(desc(confirm_ratio))

# no.6
data_select %>%
  filter(date == '2022-04-10') %>%
  ggplot(aes(x = vacc_fully_ratio, y = death_ratio)) +
  geom_point(aes(color = key_apple_mobility), size = 10) +
  geom_smooth(method = 'lm', se = FALSE) +
  geom_text(aes(label = key_apple_mobility), size = 3) +
  theme(legend.position='none')

# 한글이 깨져서 설치했습니다
install.packages("extrafont")
library(extrafont)
font_import()
y
par(family="NanumGothic")
par(family = 'AppleGothic')
theme_set(theme_gray(base_family='AppleGothic'))

# no.7
data_select <- data_select %>%
  mutate(country = if_else(key_apple_mobility %in% c('Albania', 'Argentina', 
                                                     'Brazil', 'Bulgaria', 
                                                     'Cambodia', 'Chile', 
                                                     'Colombia', 'Egypt',  
                                                     'Estonia', 'Georgia', 
                                                     'Greece', 'India', 
                                                     'Indonesia', 'Latvia', 
                                                     'Lithuania', 'Malaysia',  
                                                     'Mexico', 'Philippines', 
                                                     'Puerto Rico', 'Romania', 
                                                     'Serbia', 'Slovakia', 
                                                     'Slovenia', 'South Africa', 
                                                     'Thailand', 'Turkey', 
                                                     'Ukraine', 'Uruguay', 
                                                     'Vietnam'), '신흥국', '선진국'))

data_select %>%
  filter(date == "2022-04-10") %>%
  ggplot(aes(x = vacc_fully_ratio, y = death_ratio)) +
  geom_point(aes(color = country), size = 10) +
  geom_text(aes(label= key_apple_mobility), size = 3)

# no.8
data_select %>%
  ggplot(aes(x = date, y = vacc_fully_ratio, group = key_apple_mobility)) +
  geom_line(aes(color = country))

# no.9
data_select %>%
  mutate(Kor = if_else(key_apple_mobility == 'Republic of Korea', 'Korea', 'Other')) %>%
  ggplot(aes(x = date, y = death_ratio, group = key_apple_mobility)) +
  geom_line(aes(color = Kor, linetype = Kor)) +
  geom_text(
    data = data_select %>% group_by(key_apple_mobility) %>%  
      filter(date == last(date)), 
    aes(label = key_apple_mobility, x = date + 0.5, y = death_ratio), size = 2)

아래의 페이지의 데이터를 활용해서 특성공학 및 시각화를 하는 것이 중간고사 과제였다.

내가 1+2 같이 쉬운 내용을 가르쳐줘도 혼자 못하는 말하는 감자라는 것을 아시는지

힌트를 충분히 주셔서 공부하는 마음으로 코드를 작성할 수 있었다. 아무래도 천사이신듯

능숙하지 못해서 괴로울 뿐이지, 공부 자체는 재미있다!

입력한 대로 결과가 딱 나올 때의 쾌감이란..ㅋㅋ

 

Download Centre

COVID19

covid19datahub.io

 

 

BELATED ARTICLES

more