layout: true <a class="footer-link" href="https://sisteranalyst.org">sisteranalyst.org</a> --- class: title-slide, center, bottom # Import Data to R ## Data Literacy in R ### Tatjana Kecojevic --- name: astroboy background-image: url(images/astro_boy.jpg) background-size: contain background-color: #f6f6f6 # Your Tool: .emphasis[download & install] ## First: [R](https://cran.r-project.org/) -- ## Then: [RStudio](https://rstudio.com/products/rstudio/download/) --- class: center, middle background-image: url("images/RStudio_IDE.png") background-size: cover background-color: #ffffff # .shadow-text[RStudio IDE] --- class: middle, center <div class="flex" style="margin: 0 1em;"> <div class="column"> <h3> What package do I need? </h3> <img src="images/astro_boy_package.jpg" style="width: 100%;"> </div> <div class="column"style="margin: 0 1em;"> <h3> What is it? </h3> <h5> “In R, the fundamental unit of shareable code is the package." <a href="http://r-pkgs.had.co.nz/intro.html">Hadley Wickham, R packages</a></h5> <h5> When you run R you will automatically upload the <b><mark><strong>package:base</strong></mark></b>, which is the system library.</h5> <h5> You can install a package using the <b><mark><strong>install.packages()</strong></mark></b> function.</h5> <h5>Once installed, the package will appear in the list of available packages in your <b><mark><strong>Packages pane</strong></mark></b>.</h5> </div> --- # R can import all types of data: - (Tab, Blank space) Delimited Text - CSV files - Excel files - JSON - SAS - STATA - MiniTab - SPSS… -- #### the R base: Read tabular and csv data into R - `df_txt <- read.table(file_name.txt, header = TRUE)` - `df_txt <- read.csv(file_name.csv, stringsAsFactors = FALSE)` --- ## **Using `readr::read_csv()`** - quickly than base R `read.cdv()` - it assumes characters are strings and not factors by default ## **Importing Excel files with `readxl::read_excel()`** - Importing Excel data files is not straightforward as it might contain multiple sheets - To access the data from an Excel sheet you can’t just copy and paste the URL for the file. You have to download the file first - People like to make their Excel spreadsheets look ‘pretty’ with ‘fancy’ formatting, which could create difficulty when reading them in R --- class: freight-slide, center, middle, inverse # .shadow-text[Accessing ecdc's covid-19 data] ## .emphasis[<https://www.ecdc.europa.eu/en/publications-data>] --- ## Read .csv File .pull-left[ ```r *ecdc_csv <- readr::read_csv("https://opendata.ecdc.europa.eu/covid19/casedistribution/csv") ``` ] .pull-right[ <img src="images/csv1.png" width="1280" /> ] --- ## Check the Dimention of Your Data .pull-left[ ```r ecdc_csv <- readr::read_csv("https://opendata.ecdc.europa.eu/covid19/casedistribution/csv") *dim(ecdc_csv) ``` ] .pull-right[ <img src="images/csv2.png" width="1280" /> ] --- ## Get A Glimpse Of Your Data .pull-left[ ```r ecdc_csv <- readr::read_csv("https://opendata.ecdc.europa.eu/covid19/casedistribution/csv") dim(ecdc_csv) *dplyr::glimpse(ecdc_csv) ``` ] .pull-right[ <img src="images/csv3.png" width="1280" /> ] .footnote[ 💡 If you don't have `dplyr` installed yet run: `install.packages("dplyr")` ] --- ## Filter Your Data with dplyr .pull-left[ ```r ecdc_csv <- readr::read_csv("https://opendata.ecdc.europa.eu/covid19/casedistribution/csv") dim(ecdc_csv) dplyr::glimpse(ecdc_csv) # find your country # ------- SERBIA --------- *library(dplyr) *covid_sr <- ecdc_csv %>% * filter(countryterritoryCode == "SRB") ``` ] .pull-right[ <img src="images/csv4.png" width="1280" /> ] --- ## Get A Glimpse Of Your Filtered Data .pull-left[ ```r ecdc_csv <- readr::read_csv("https://opendata.ecdc.europa.eu/covid19/casedistribution/csv") dim(ecdc_csv) dplyr::glimpse(ecdc_csv) # find your country # ------- SERBIA --------- library(dplyr) covid_sr <- ecdc_csv %>% filter(countryterritoryCode == "SRB") *glimpse(covid_sr) ``` ] .pull-right[ <img src="images/csv5.png" width="1280" /> ] --- class: top, center background-image: url("images/excel.png") background-size: cover background-color: #ffffff # .shadow-text[Importing Excel files with readxl] --- ## Working with URLs and HTTP .pull-left[ ```r *library(httr) *url <- "https://www.ecdc.europa.eu/sites/default/files/documents/COVID-19-geographic-disbtribution-worldwide-2020-04-20.xlsx" *GET(url, write_disk(tmf <- tempfile(fileext = ".xlsx"))) ``` ] .pull-right[ <img src="images/excel1.png" width="1280" /> ] .footnote[ 💡 If you don't have `httr` installed yet run: `install.packages("httr")` ] --- ## Read the Data from an Excel file .pull-left[ ```r library(httr) url <- "https://www.ecdc.europa.eu/sites/default/files/documents/COVID-19-geographic-disbtribution-worldwide-2020-04-20.xlsx" GET(url, write_disk(tmf <- tempfile(fileext = ".xlsx"))) *covid_ecdc <- readxl::read_excel(tmf) ``` ] .pull-right[ <img src="images/excel2.png" width="1280" /> ] .footnote[ 💡 If you don't have `readxl` installed yet run: `install.packages("readxl")` ] --- ## Display The Structure of the Data .pull-left[ ```r library(httr) url <- "https://www.ecdc.europa.eu/sites/default/files/documents/COVID-19-geographic-disbtribution-worldwide-2020-04-20.xlsx" GET(url, write_disk(tmf <- tempfile(fileext = ".xlsx"))) covid_ecdc <- readxl::read_excel(tmf) *str(covid_ecdc) ``` ] .pull-right[ <img src="images/excel3.png" width="1280" /> ] --- ## Filter: Match the Column With Given Values .pull-left[ ```r *library(httr) url <- "https://www.ecdc.europa.eu/sites/default/files/documents/COVID-19-geographic-disbtribution-worldwide-2020-04-20.xlsx" GET(url, write_disk(tmf <- tempfile(fileext = ".xlsx"))) covid_ecdc <- readxl::read_excel(tmf) str(covid_ecdc) # select exYU countries *covid_yu <- covid_ecdc %>% * filter(countriesAndTerritories %in% c("Bosnia_and_Herzegovina", * "Croatia", "Montenegro", * "North_Macedonia", "Serbia", * "Slovenia")) ``` ] .pull-right[ <img src="images/excel4.png" width="1280" /> ] --- ## Get A Glimpse Of Your Filtered Data .pull-left[ ```r library(httr) url <- "https://www.ecdc.europa.eu/sites/default/files/documents/COVID-19-geographic-disbtribution-worldwide-2020-04-20.xlsx" GET(url, write_disk(tmf <- tempfile(fileext = ".xlsx"))) covid_ecdc <- readxl::read_excel(tmf) str(covid_ecdc) # select exYU countries covid_yu <- covid_ecdc %>% filter(countriesAndTerritories %in% c("Bosnia_and_Herzegovina", "Croatia", "Montenegro", "North_Macedonia", "Serbia", "Slovenia")) *glimpse(covid_yu) ``` ] .pull-right[ <img src="images/excel5.png" width="1280" /> ] --- class: freight-slide, center, middle, inverse # .shadow-text[To learn more visit: <https://dataliteracy.rbind.io>] .emphasis[To see it in action visit: <http://covid19sr.rbind.io>] [
@Tatjana_Kec](https://twitter.com/Tatjana_Kec) [
@TanjaKec](https://github.com/TanjaKec)