R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

scatterplot

g <- ggplot(data = mpg, aes(x = displ, y = hwy))
g + geom_point()

# summary(cars)
# data()
g + geom_point(aes(color=class))

g + geom_point(aes(shape=class))
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have 7.
## Consider specifying shapes manually if you must have them.
## Warning: Removed 62 rows containing missing values (geom_point).

g + geom_point(aes(size=class))
## Warning: Using size for a discrete variable is not advised.

ggplot(data = mpg, aes(x = displ, y = hwy)) + 
  geom_point() +
  geom_smooth()
## `geom_smooth()` using method = 'loess'

data wrangling with dplyr

library(tidyverse)
gapminder <- readr::read_csv("https://raw.githubusercontent.com/OHI-Science/data-science-training/master/data/gapminder.csv")
## Parsed with column specification:
## cols(
##   country = col_character(),
##   year = col_integer(),
##   pop = col_double(),
##   continent = col_character(),
##   lifeExp = col_double(),
##   gdpPercap = col_double()
## )
gapminder
## # A tibble: 1,704 x 6
##        country  year      pop continent lifeExp gdpPercap
##          <chr> <int>    <dbl>     <chr>   <dbl>     <dbl>
##  1 Afghanistan  1952  8425333      Asia  28.801  779.4453
##  2 Afghanistan  1957  9240934      Asia  30.332  820.8530
##  3 Afghanistan  1962 10267083      Asia  31.997  853.1007
##  4 Afghanistan  1967 11537966      Asia  34.020  836.1971
##  5 Afghanistan  1972 13079460      Asia  36.088  739.9811
##  6 Afghanistan  1977 14880372      Asia  38.438  786.1134
##  7 Afghanistan  1982 12881816      Asia  39.854  978.0114
##  8 Afghanistan  1987 13867957      Asia  40.822  852.3959
##  9 Afghanistan  1992 16317921      Asia  41.674  649.3414
## 10 Afghanistan  1997 22227415      Asia  41.763  635.3414
## # ... with 1,694 more rows

useful functions to get to know your data

head(gapminder) # show first 6 rows, can specify: , n tail(gapminder) # last 6

check structure of data

str(gapminder)

variable names and data dimensions

names(gapminder) dim(gapminder) ncol(gapminder)

combine

c(nrow(gapminder),ncol(gapminder))

summary(gapminder)

for everthing above, operating on whole data set.

head(gapminder$lifeExp)

work with dplyr

filter(gapminder, lifeExp < 29)
## # A tibble: 2 x 6
##       country  year     pop continent lifeExp gdpPercap
##         <chr> <int>   <dbl>     <chr>   <dbl>     <dbl>
## 1 Afghanistan  1952 8425333      Asia  28.801  779.4453
## 2      Rwanda  1992 7290203    Africa  23.599  737.0686
d <- filter(gapminder, country == "Sweden")
mean(d$lifeExp)
## [1] 76.177
mean(filter(gapminder, country == "Sweden")$lifeExp)
## [1] 76.177
## pipe operator  `%>%` 
gapminder %>% head(10)
## # A tibble: 10 x 6
##        country  year      pop continent lifeExp gdpPercap
##          <chr> <int>    <dbl>     <chr>   <dbl>     <dbl>
##  1 Afghanistan  1952  8425333      Asia  28.801  779.4453
##  2 Afghanistan  1957  9240934      Asia  30.332  820.8530
##  3 Afghanistan  1962 10267083      Asia  31.997  853.1007
##  4 Afghanistan  1967 11537966      Asia  34.020  836.1971
##  5 Afghanistan  1972 13079460      Asia  36.088  739.9811
##  6 Afghanistan  1977 14880372      Asia  38.438  786.1134
##  7 Afghanistan  1982 12881816      Asia  39.854  978.0114
##  8 Afghanistan  1987 13867957      Asia  40.822  852.3959
##  9 Afghanistan  1992 16317921      Asia  41.674  649.3414
## 10 Afghanistan  1997 22227415      Asia  41.763  635.3414
gapminder %>% 
  filter(country == "Sweden") %>% 
  summarize(mean_lifeExp = mean(lifeExp))
## # A tibble: 1 x 1
##   mean_lifeExp
##          <dbl>
## 1       76.177

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.