library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0 ✔ purrr 0.3.0
## ✔ tibble 2.0.1 ✔ dplyr 0.8.0.1
## ✔ tidyr 0.8.2 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ── Conflicts ──────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(broom)
library(rgeos)
## rgeos version: 0.4-2, (SVN revision 581)
## GEOS runtime version: 3.6.1-CAPI-1.10.1
## Linking to sp version: 1.3-1
## Polygon checking: TRUE
library(rgdal)
## Loading required package: sp
## rgdal: version: 1.3-6, (SVN revision 773)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 2.1.3, released 2017/20/01
## Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/3.5/Resources/library/rgdal/gdal
## GDAL binary built with GEOS: FALSE
## Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
## Path to PROJ.4 shared files: /Library/Frameworks/R.framework/Versions/3.5/Resources/library/rgdal/proj
## Linking to sp version: 1.3-1
library(maptools)
## Checking rgeos availability: TRUE
library(mapproj)
## Loading required package: maps
##
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
##
## map
# loading data files
sf_data <- read_csv("sf.csv") # it is in working directory??
## Parsed with column specification:
## cols(
## Incident_Type = col_character(),
## Report_taken_date_EST = col_character(),
## Year = col_double(),
## `Data Type` = col_character(),
## Subject_Race = col_character(),
## Subject_Sex = col_character(),
## Subject_Ethnicity = col_character(),
## `Block Address` = col_character(),
## `Incident Location District` = col_character(),
## `Incident Location PSA` = col_double(),
## Age = col_character()
## )
sf_data_2017 <- read_csv("sf2017.csv")
## Parsed with column specification:
## cols(
## Incident_Type = col_character(),
## Report_taken_date_EST = col_character(),
## Year = col_double(),
## `Data Type` = col_character(),
## Subject_Race = col_character(),
## Subject_Sex = col_character(),
## Subject_Ethnicity = col_character(),
## `Block Address` = col_character(),
## `Incident Location District` = col_character(),
## `Incident Location PSA` = col_double(),
## Age = col_character()
## )
crime <- read_csv("CrimeStatebyState.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## State = col_character()
## )
## See spec(...) for full column specifications.
# bind them together
sf_data <- rbind(sf_data,sf_data_2017)
# remove isolated 2017 data
remove(sf_data_2017)
# putting data into more legible categories
sf_data <- mutate(sf_data, date = mdy(Report_taken_date_EST),
Type=as.factor(`Data Type`),
Gender=as.factor(Subject_Sex),
Ethnicity=as.factor(Subject_Ethnicity),
Race=as.factor(Subject_Race),
District=as.factor(`Incident Location District`),
PSA=as.factor(`Incident Location PSA`),
Year=as.factor(Year)
)
# if age is juvenile or unknown = NA, otherwise age as numerical value
sf_data <- mutate(sf_data, num_age =
ifelse(Age=="Juvenile"|Age=="Unknown",
NA,as.numeric(Age)))
## Warning in ifelse(Age == "Juvenile" | Age == "Unknown", NA,
## as.numeric(Age)): NAs introduced by coercion
# cutting age into 10-year groups
sf_data <- mutate(sf_data, cat_age = cut(num_age,
breaks=c(17,27,37,47,57,67,77,87),
labels=c("18-27","28-37","38-47",
"48-57","58-67","68-77",
"78-87")))
# add "Juvenile" and "Unknown" into categorical age variable
sf_data <- mutate(sf_data, cat_age = ifelse(Age=="Juvenile"|Age=="Unknown",Age,
as.character(cat_age)))
# rename categorical age variable to Age_binned, coerce to factor
# Make "Juvenile" the first level of that factor
sf_data <- mutate(sf_data, Age_binned=as.factor(cat_age))
sf_data <- mutate(sf_data, Age=fct_relevel(Age,"Juvenile"),
Age_binned=fct_relevel(Age_binned,"Juvenile"))
# create Month variable
sf_data <- mutate(sf_data, Month=month(date,label=T,abbr=F))
# import geospatial data about Police Districts
districts <- readOGR("Police_Districts.shp",layer="Police_Districts")
## OGR data source with driver: ESRI Shapefile
## Source: "/Users/evakoplow/Desktop/portfolio_koplow/Police_Districts.shp", layer: "Police_Districts"
## with 7 features
## It has 8 fields
## Integer64 fields read as strings: OBJECTID DISTRICT
# defining districts as "id" variable
names(districts@data)[1] <- "id"
# extracting the lat/long coordinates of district boundary points
# telling the tidy function that the region value can be found in the "id" variable
districts.points <- tidy(districts, region="id")
## Warning in bind_rows_(x, .id): Unequal factor levels: coercing to character
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
# combine lat/long coordinates of district boundary points with data about districts
districts.cart <- full_join(districts.points, districts@data, by="id")
## Warning: Column `id` joining character vector and factor, coercing into
## character vector
# import geospatial data about Police Service Areas
psas <- readOGR("Police_Service_Areas.shp",layer="Police_Service_Areas")
## OGR data source with driver: ESRI Shapefile
## Source: "/Users/evakoplow/Desktop/portfolio_koplow/Police_Service_Areas.shp", layer: "Police_Service_Areas"
## with 56 features
## It has 24 fields
## Integer64 fields read as strings: OBJECTID DISTRICT PSA TOTALPOP WHITE BLACK NAT_AMER ASIAN OTHER TWO_ORMORE HISPANIC POPMALE POPFEMALE POPUNDER18 POP65UP
# extracting the lat/long coordinates of PSA boundary points
# telling the tidy function that the region value can be found in the "PSA" variable
psas.points <- tidy(psas, region="PSA")
## Warning in bind_rows_(x, .id): Unequal factor levels: coercing to character
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
# combine lat/long coordinates of district boundary points with data about districts
# tell join function that "id" in psas.points corresponds to "PSA" in psas@data
psas.cart <- full_join(psas.points, psas@data, by=c("id" = "PSA"))
## Warning: Column `id`/`PSA` joining character vector and factor, coercing
## into character vector
plot_data <- filter(sf_data,!is.na(PSA))
all_cats <- expand(plot_data,PSA,Subject_Sex)
plot_data <- summarize(group_by(plot_data, PSA,Subject_Sex),reports=n())
plot_data <- full_join(plot_data,all_cats,by=c("PSA","Subject_Sex"))
#plot_data <- mutate(plot_data,reports=replace_na(reports,0))
names(plot_data)[1] <- "NAME"
mapdata <- full_join(plot_data,psas.cart,by="NAME")
g <- ggplot() +
geom_polygon(data=drop_na(mapdata), aes(x=long, y=lat, group=group,fill=reports),color="black") +
coord_map()+
theme_void()
g+facet_wrap(~Subject_Sex)+
scale_fill_distiller(palette="Spectral", name="Number of Reports")+
labs(title="SF Reports in Police Service Areas Faceted by Gender")
plot2_data <- filter(sf_data,!is.na(District))
levels(plot2_data$District) <- c("First District","Second District",
"Third District","Fourth District",
"Fifth District","Sixth District",
"Seventh District")
all_cats <- expand(plot2_data,District,Age_binned)
plot2_data <- summarize(group_by(plot2_data, District,Age_binned),reports=n())
## Warning: Factor `Age_binned` contains implicit NA, consider using
## `forcats::fct_explicit_na`
plot2_data <- full_join(plot2_data,all_cats,by=c("District","Age_binned"))
#plot_data <- mutate(plot_data,reports=replace_na(reports,0))
names(plot2_data)[1] <- "NAME"
mapdata <- full_join(plot2_data,districts.cart,by="NAME")
## Warning: Column `NAME` joining factors with different levels, coercing to
## character vector
g <- ggplot() +
geom_polygon(data=mapdata,
aes(x=long, y=lat, group=group,fill=reports),color="black") +
coord_map()+
theme_void()
g+facet_wrap(~Age_binned)+
scale_fill_distiller(palette="Spectral", name="Number of Reports",na.value="white")+
labs(title="SF Reports in Police Districts Faceted by Age Group")
us_data <- filter(crime, State=="United States-Total")
ggplot(us_data) + geom_line(aes(x=Year, y=`Violent Crime rate`), color="green")+
labs(title="Violent Crime Rate in the US", x="Year", Y="Violent Crime Rate")+
theme_classic()
us_4_crimes <- select(us_data, Year, `Aggravated assault rate`,
`Murder and nonnegligent manslaughter rate`,
`Legacy rape rate /1`,`Robbery rate`)
us_4_crimes_plot <- gather(us_4_crimes, key="Violent Crime Type",
value= Rate, `Aggravated assault rate`:`Robbery rate`)
ggplot(us_4_crimes_plot) + geom_line(aes(x=Year, y= Rate, color=`Violent Crime Type`))+
scale_color_discrete(name="Type of Violent Crime",
labels=c("Aggravated Assault","Rape", "Murder and Nonnegligent Manslaughter","Robbery"))+theme_classic()
state_crime <- filter(crime, State!="United States-Total")
northeast_states<- c("Connecticut", "Maine", "Massachusetts", "New Hampshire", "Rhode Island", "Vermont", "New Jersey", "New York", "Pennsylvania")
midwest_states <- c("Illinois", "Indiana", "Michigan", "Ohio", "Wisconsin", "Iowa", "Kansas", "Minnesota", "Missouri", "Nebraska", "North Dakota", "South Dakota")
south_states <- c("Delaware", "Florida", "Georgia", "Maryland", "North Carolina", "South Carolina", "Virginia", "District of Columbia", "West Virginia", "Alabama", "Kentucky", "Mississippi", "Tennessee", "Arkansas", "Louisiana", "Oklahoma", "Texas")
west_states <- c("Arizona", "Colorado", "Idaho", "Montana", "Nevada", "New Mexico", "Utah", "Wyoming", "Alaska", "California", "Hawaii", "Oregon", "Washington")
state_crime <- mutate(state_crime, region=ifelse(State %in% northeast_states, "Northeast",
ifelse(State %in% midwest_states, "Midwest",
ifelse(State %in% south_states, "South",
ifelse(State %in% west_states, "West","NA")))))
regional_rates <- summarize(group_by(state_crime, region, Year),
`Property crime rate`=
sum(`Property crime total`)/sum(Population)*100000)
ggplot(regional_rates) + geom_line(aes(x=Year, y=`Property crime rate`, color=region))+labs(title="Overall Property Crime Rate for the 4 Census Regions")
crime_2013 <- filter(crime, Year==2013, State!="United States-Total")
all_states <- map_data("state")
crime_2013 <- rename(crime_2013,region=State)
crime_2013 <- mutate(crime_2013, region=tolower(region))
stateData <- left_join(all_states,crime_2013,by="region")
ggplot()+
geom_polygon(data=stateData,
aes(x=long, y=lat, group = group,
fill=`Robbery rate`),
color="grey50")+
scale_fill_distiller(palette="Oranges", direction=1)+
coord_map()+labs(title="Robbery Rate in 2013")
library(treemap) # ask professor meyer
crime_2010 <- filter(crime, Year==2010, State!="United States-Total")
treemap(crime_2013,index="region",
vSize="Population",
vColor="Murder and nonnegligent manslaughter rate",
type="value",palette="Reds")
crime_1990 <- filter(crime, Year==1990, State!="United States-Total")
northeast_states<- c("Connecticut", "Maine", "Massachusetts", "New Hampshire", "Rhode Island", "Vermont", "New Jersey", "New York", "Pennsylvania")
midwest_states <- c("Illinois", "Indiana", "Michigan", "Ohio", "Wisconsin", "Iowa", "Kansas", "Minnesota", "Missouri", "Nebraska", "North Dakota", "South Dakota")
south_states <- c("Delaware", "Florida", "Georgia", "Maryland", "North Carolina", "South Carolina", "Virginia", "District of Columbia", "West Virginia", "Alabama", "Kentucky", "Mississippi", "Tennessee", "Arkansas", "Louisiana", "Oklahoma", "Texas")
west_states <- c("Arizona", "Colorado", "Idaho", "Montana", "Nevada", "New Mexico", "Utah", "Wyoming", "Alaska", "California", "Hawaii", "Oregon", "Washington")
crime_1990 <- mutate(crime_1990, region=ifelse(State %in% northeast_states, "Northeast",
ifelse(State %in% midwest_states, "Midwest",
ifelse(State %in% south_states, "South",
ifelse(State %in% west_states, "West","NA")))))
ggplot(crime_1990)+geom_point(aes(x=`Motor vehicle theft rate`,
y=`Burglary rate`,
color=region,
shape=region))+
scale_shape_manual(values=c("Midwest"=2,"Northeast"=5,"South"=0,"West"=1))+scale_color_manual(values=c("Midwest"="purple", "Northeast"="blue", "South"="green", "West"="red"))+
labs(x="Motor Vehicle Theft Rate", y="Burglary Rate", title="Burglary Rate in 1990 vs. Vehicle Theft Rate in 1990")
crime_2014 <- filter(crime, Year==2014, State!="United States-Total")
ggplot(crime_2014)+geom_point(aes(x=`Larceny-theft rate`,
y=`Aggravated assault rate`,
#color=State,
size=Population))+
scale_fill_manual(values=c("Midwest"="purple", "Northeast"="blue", "South"="green", "West"="red"))+
labs(x="Larceny Theft Rate", y="Burglary Rate", title="Larceny Theft Rate in 2014 vs. Aggravated Assault Rate in 2014")
library(tidyverse)
library(tidyverse)
diamonds.small <- diamonds[sample(nrow(diamonds), 500),]
q <- ggplot(diamonds.small,aes(x=carat, y=price))
########GRAPH 1#############
q + geom_point() #scatter plot
########GRAPH 2 ################
q + geom_count(aes(size=..prop..),alpha=.5) #bubble plot
########GRAPH 3 ################
q <- ggplot(diamonds.small,aes(x=carat, y=price))
q + geom_count(aes(size=..prop..,color=as.factor(cut)),alpha=.5) #bubble plot
########GRAPH 4 ################
q + geom_bin2d()
########GRAPH 5 ################
q + stat_density2d() +
scale_x_continuous(lim=c(-.3,4.0)) +
scale_y_continuous(lim=c(-8000,18900))
## Warning: Removed 1 rows containing non-finite values (stat_density2d).
########GRAPH 6 ################
q + geom_jitter() + geom_smooth(method = "lm", se=T, level=.95)
########GRAPH 7 ################
ggplot(diamonds.small,aes(x=carat, y=price,color=as.factor(cut)),alpha=.5 )+ geom_jitter() + geom_smooth(method = "lm", se=T, level=.67)
########GRAPH 8 ################
ggplot(diamonds.small,aes(x=carat, y=price,color=as.factor(cut)),alpha=.5 )+ geom_jitter() + geom_smooth(method = "lm",formula=y~poly(x,2))
These graphs were created using the diamonds dataset in the ggplot2 library. The first few graphs show how to create a scatterplot of carat vs. price and a bubble plot of carat vs. price where the size of the point is representative of its frequency. The last few graphs illustrate a jittered scatterplot with trendlines. The goal of these visualizations is to examine correlations between diamond carat and price. The graphs all show that as carat increases, so does price.