my site: https://b04902122.github.io/CSX_R/final_project/final.html

Import libraries

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(rpart)
library(rpart.plot)
library(ggpubr)
## Loading required package: magrittr
library(Matrix)
library(arules)
## Warning: package 'arules' was built under R version 3.4.4
## 
## Attaching package: 'arules'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following objects are masked from 'package:base':
## 
##     abbreviate, write
library(arulesViz)
## Warning: package 'arulesViz' was built under R version 3.4.4
## Loading required package: grid
library(rio)
## Warning: package 'rio' was built under R version 3.4.4

Initiate variables

date16=array(0:0,c(31,12))
date15=array(0:0,c(31,12))
date14=array(0:0,c(31,12))
topval=array(0:0,c(5,12))
lowval=array(0:0,c(5,12))

Load data

MyData16 <- read.csv(file="/Users/Ymc/Desktop/r_final/cleanData2016.csv", header=TRUE, sep=",", stringsAsFactors = FALSE)
MyData15 <- read.csv(file="/Users/Ymc/Desktop/r_final/cleanData2015.csv", header=TRUE, sep=",", stringsAsFactors = FALSE)
MyData14 <- read.csv(file="/Users/Ymc/Desktop/r_final/cleanData2014.csv", header=TRUE, sep=",", stringsAsFactors = FALSE)
Veh16 <- read.csv(file="/Users/Ymc/Desktop/r_final/Veh16.csv", header=TRUE, sep=",", stringsAsFactors = FALSE)
Veh15 <- read.csv(file="/Users/Ymc/Desktop/r_final/Veh15.csv", header=TRUE, sep=",", stringsAsFactors = FALSE)
Veh14 <- read.csv(file="/Users/Ymc/Desktop/r_final/Veh14.csv", header=TRUE, sep=",", stringsAsFactors = FALSE)

Arrange data for visualization

if (FALSE){
for (i in 1:nrow(MyData16))
  date16[as.numeric(substr(MyData16$Date[i],9,10)),as.numeric(substr(MyData16$Date[i],6,7))] = date16[as.numeric(substr(MyData16$Date[i],9,10)),as.numeric(substr(MyData16$Date[i],6,7))] + 1

for (i in 1:nrow(MyData15))
  date15[as.numeric(substr(MyData15$Date[i],9,10)),as.numeric(substr(MyData15$Date[i],6,7))] = date15[as.numeric(substr(MyData15$Date[i],9,10)),as.numeric(substr(MyData15$Date[i],6,7))] + 1

for (i in 1:nrow(MyData14))
  date14[as.numeric(substr(MyData14$Date[i],9,10)),as.numeric(substr(MyData14$Date[i],6,7))] = date14[as.numeric(substr(MyData14$Date[i],9,10)),as.numeric(substr(MyData14$Date[i],6,7))] + 1

for (i in 1:12)
  topval[,i]=order(date16[,i], decreasing = TRUE)[1:5]
for (i in 1:12)
  lowval[,i]=order(date16[,i], decreasing = FALSE)[1:5]
}

datetable16 <- setNames(data.frame(table(MyData16$Date)),c("Date","Count"))
datetable15 <- setNames(data.frame(table(MyData15$Date)),c("Date","Count"))
datetable14 <- setNames(data.frame(table(MyData14$Date)),c("Date","Count"))

VehNum16 <- setNames(data.frame(table(Veh16$Sex_of_Driver)),c("Sex","count"))
VehNum15 <- setNames(data.frame(table(Veh15$Sex_of_Driver)),c("Sex","count"))
VehNum14 <- setNames(data.frame(table(Veh14$Sex_of_Driver)),c("Sex","count"))

weat16=data.frame(ncol=1)
names(weat16)<-c("weather")
for (i in 1:nrow(MyData16))
  if(MyData16$Accident_Severity[i]!="slight"){
    if(MyData16$Weather_Conditions[i]>0){
      tempd<-as.character(MyData16$Weather_Conditions[i])
      temp<-data.frame(tempd)
      names(temp)<-c("weather")
      weat16<-rbind(weat16,temp)
    }
  }
weat15=data.frame(ncol=1)
names(weat15)<-c("weather")
for (i in 1:nrow(MyData15))
  if(MyData15$Accident_Severity[i]!="slight"){
    if(MyData15$Weather_Conditions[i]>0){
      tempd<-as.character(MyData15$Weather_Conditions[i])
      temp<-data.frame(tempd)
      names(temp)<-c("weather")
      weat15<-rbind(weat15,temp)
    }
  }
weat14=data.frame(ncol=1)
names(weat14)<-c("weather")
for (i in 1:nrow(MyData14))
  if(MyData14$Accident_Severity[i]!="slight"){
    if(MyData14$Weather_Conditions[i]>0){
      tempd<-as.character(MyData14$Weather_Conditions[i])
      temp<-data.frame(tempd)
      names(temp)<-c("weather")
      weat14<-rbind(weat14,temp)
    }
  }

Pie-chart for severity per year

bp16 <- ggplot(MyData16, aes(x = Accident_Severity, fill = Accident_Severity)) +
  geom_bar() +
  theme_light() +
  labs(x = "Accident Severity", y = "Count", title ="2016") 
pie16 <- bp16 + coord_polar("y")
bp15 <- ggplot(MyData15, aes(x = Accident_Severity, fill = Accident_Severity)) +
  geom_bar() +
  theme_light() +
  labs(x = "Accident Severity", y = "Count", title = "2015")
pie15 <- bp15 + coord_polar("y")
bp14 <- ggplot(MyData14, aes(x = Accident_Severity, fill = Accident_Severity)) +
  geom_bar() +
  theme_light() +
  labs(x = "Accident Severity", y = "Count", title = "2014") 
pie14 <- bp14 + coord_polar("y")
ggarrange(pie16, pie15, pie14, nrow = 2, ncol = 2)

Bar-chart for sex per year

Veh16$Sex_of_Driver <- factor(Veh16$Sex_of_Driver, 
                         levels=c("male","female","other"))
Vbar16<-ggplot(Veh16, aes(x = Sex_of_Driver))+
  geom_bar(aes(fill = Sex_of_Driver, color = Sex_of_Driver))+
  theme(legend.position = "none")+
  labs(x = "male = 90240        female = 36890        other = 9456",y = "2016")
Veh15$Sex_of_Driver <- factor(Veh15$Sex_of_Driver, 
                              levels=c("male","female","other"))
Vbar15<-ggplot(Veh15, aes(x = Sex_of_Driver))+
  geom_bar(aes(fill = Sex_of_Driver, color = Sex_of_Driver))+
  theme(legend.position = "none")+
  labs(x = "male = 93288        female = 38237        other = 8501",y = "2015")
val<-c("1","2","3")
Veh14$Sex_of_Driver <- factor(Veh14$Sex_of_Driver, 
                             levels=c("male","female","other"))
Vbar14<-ggplot(Veh14, aes(x = Sex_of_Driver))+
  geom_bar(aes(fill = Sex_of_Driver, color = Sex_of_Driver))+
  theme(legend.position = "none")+
  labs(x = "male = 96812        female = 41073        other = 8426",y = "2014")
ggarrange(Vbar16, Vbar15, Vbar14, nrow = 3)

Bar-chart for weather & ftl/srs severity

weat16$weather <- factor(weat16$weather, 
  levels=c("Fine","Rain","Snow","Fine+hw","Rain+hw","Snow+hw","Fog/Mist","Other","Unkn"))
weat16<-weat16[-c(1), ,drop=FALSE]
wbar16<-ggplot(weat16, aes(x = weather))+
  geom_bar(aes(fill = weather, color = weather))+
  labs(x="2016", y="count")+
  theme(legend.position = "none", axis.text.x = element_text(angle = 30, hjust = 1))
weat15$weather <- factor(weat15$weather, 
  levels=c("Fine","Rain","Snow","Fine+hw","Rain+hw","Snow+hw","Fog/Mist","Other","Unkn"))
weat15<-weat15[-c(1), ,drop=FALSE]
wbar15<-ggplot(weat15, aes(x = weather))+
  geom_bar(aes(fill = weather, color = weather))+
  labs(x="2015", y="count")+
  theme(legend.position = "none", axis.text.x = element_text(angle = 30, hjust = 1))
weat14$weather <- factor(weat14$weather, 
  levels=c("Fine","Rain","Snow","Fine+hw","Rain+hw","Snow+hw","Fog/Mist","Other","Unkn"))
weat14<-weat14[-c(1), ,drop=FALSE]
wbar14<-ggplot(weat14, aes(x = weather))+
  geom_bar(aes(fill = weather, color = weather))+
  labs(x="2014", y="count")+
  theme(legend.position = "none", axis.text.x = element_text(angle = 30, hjust = 1))
ggarrange(wbar16, wbar15, wbar14, nrow = 3)

Bar-chart for total accident for all weeks

MyData16$Day_of_Week <- factor(MyData16$Day_of_Week, levels=c("Sun","Mon","Tue","Wed","Thu","Fri","Sat"))
week16 <- ggplot(MyData16, aes(x = Day_of_Week))+
  geom_bar(aes(fill = Day_of_Week, color=Day_of_Week))+theme_minimal()+
  labs(x="2016", y="count")
week16 <- week16 + theme(legend.position = "none")
MyData15$Day_of_Week <- factor(MyData15$Day_of_Week, levels=c("Sun","Mon","Tue","Wed","Thu","Fri","Sat"))
week15 <- ggplot(MyData15, aes(x = Day_of_Week))+
  geom_bar(aes(fill = Day_of_Week, color=Day_of_Week))+theme_minimal()+
  labs(x="2015", y="count")
week15 <- week15 + theme(legend.position = "none")
MyData14$Day_of_Week <- factor(MyData14$Day_of_Week, levels=c("Sun","Mon","Tue","Wed","Thu","Fri","Sat"))
week14 <- ggplot(MyData14, aes(x = Day_of_Week))+
  geom_bar(aes(fill = Day_of_Week, color=Day_of_Week))+theme_minimal()+
  labs(x="2014", y="count")
week14 <- week14 + theme(legend.position = "none")
ggarrange(week16, week15, week14, nrow = 3)

Line-chart for total accident per month

newdt <- datetable16[1:31,]
jan16 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1))+
  geom_line(linetype="dashed", color = "blue")+
  geom_point(color = "red")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(x = "January16")+
  geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable15[1:31,]
jan15 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1))+
  geom_line(linetype="dashed", color = "blue")+
  geom_point(color = "red")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(x = "January15")+
  geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable14[1:31,]
jan14 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1))+
  geom_line(linetype="dashed", color = "blue")+
  geom_point(color = "red")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(x = "January14")+
  geom_hline(yintercept = mean(newdt$Count), color="gold2")
ggarrange(jan16, jan15, jan14, nrow = 3)

newdt <- datetable16[32:60,]
feb16 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
  geom_line(linetype="dashed", color = "blue")+
  geom_point(color = "red")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(x = "Febuary16")+
  geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable15[32:59,]
feb15 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
  geom_line(linetype="dashed", color = "blue")+
  geom_point(color = "red")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(x = "Febuary15")+
  geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable14[32:59,]
feb14 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
  geom_line(linetype="dashed", color = "blue")+
  geom_point(color = "red")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(x = "Febuary14")+
  geom_hline(yintercept = mean(newdt$Count), color="gold2")
ggarrange(feb16, feb15, feb14, nrow = 3)

newdt <- datetable16[61:91,]
mar16 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
  geom_line(linetype="dashed", color = "blue")+
  geom_point(color = "red")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(x = "March16")+
  geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable15[60:90,]
mar15 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
  geom_line(linetype="dashed", color = "blue")+
  geom_point(color = "red")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(x = "March15")+
  geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable14[60:90,]
mar14 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
  geom_line(linetype="dashed", color = "blue")+
  geom_point(color = "red")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(x = "March14")+
  geom_hline(yintercept = mean(newdt$Count), color="gold2")
ggarrange(mar16, mar15, mar14, nrow = 3)

newdt <- datetable16[92:121,]
apr16 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
  geom_line(linetype="dashed", color = "blue")+
  geom_point(color = "red")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(x = "April16")+
  geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable15[91:120,]
apr15 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
  geom_line(linetype="dashed", color = "blue")+
  geom_point(color = "red")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(x = "April15")+
  geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable14[91:120,]
apr14 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
  geom_line(linetype="dashed", color = "blue")+
  geom_point(color = "red")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(x = "April14")+
  geom_hline(yintercept = mean(newdt$Count), color="gold2")
ggarrange(apr16, apr15, apr14, nrow = 3)

newdt <- datetable16[122:152,]
may16 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
  geom_line(linetype="dashed", color = "blue")+
  geom_point(color = "red")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(x = "May16")+
  geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable15[121:151,]
may15 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
  geom_line(linetype="dashed", color = "blue")+
  geom_point(color = "red")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(x = "May15")+
  geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable14[121:151,]
may14 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
  geom_line(linetype="dashed", color = "blue")+
  geom_point(color = "red")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(x = "May14")+
  geom_hline(yintercept = mean(newdt$Count), color="gold2")
ggarrange(may16, may15, may14, nrow = 3)

newdt <- datetable16[153:182,]
jun16 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
  geom_line(linetype="dashed", color = "blue")+
  geom_point(color = "red")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(x = "June16")+
  geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable15[152:181,]
jun15 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
  geom_line(linetype="dashed", color = "blue")+
  geom_point(color = "red")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(x = "June15")+
  geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable14[152:181,]
jun14 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
  geom_line(linetype="dashed", color = "blue")+
  geom_point(color = "red")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(x = "June14")+
  geom_hline(yintercept = mean(newdt$Count), color="gold2")
ggarrange(jun16, jun15, jun14, nrow = 3)

newdt <- datetable16[183:213,]
jul16 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
  geom_line(linetype="dashed", color = "blue")+
  geom_point(color = "red")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(x = "July16")+
  geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable15[182:212,]
jul15 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
  geom_line(linetype="dashed", color = "blue")+
  geom_point(color = "red")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(x = "July15")+
  geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable14[182:212,]
jul14 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
  geom_line(linetype="dashed", color = "blue")+
  geom_point(color = "red")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(x = "July14")+
  geom_hline(yintercept = mean(newdt$Count), color="gold2")
ggarrange(jul16, jul15, jul14, nrow = 3)

newdt <- datetable16[214:244,]
aug16 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
  geom_line(linetype="dashed", color = "blue")+
  geom_point(color = "red")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(x = "August16")+
  geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable15[213:243,]
aug15 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
  geom_line(linetype="dashed", color = "blue")+
  geom_point(color = "red")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(x = "August15")+
  geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable14[213:243,]
aug14 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
  geom_line(linetype="dashed", color = "blue")+
  geom_point(color = "red")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(x = "August14")+
  geom_hline(yintercept = mean(newdt$Count), color="gold2")
ggarrange(aug16, aug15, aug14, nrow = 3)

newdt <- datetable16[245:274,]
sep16 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
  geom_line(linetype="dashed", color = "blue")+
  geom_point(color = "red")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(x = "September16")+
  geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable15[244:273,]
sep15 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
  geom_line(linetype="dashed", color = "blue")+
  geom_point(color = "red")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(x = "September15")+
  geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable14[244:273,]
sep14 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
  geom_line(linetype="dashed", color = "blue")+
  geom_point(color = "red")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(x = "September14")+
  geom_hline(yintercept = mean(newdt$Count), color="gold2")
ggarrange(sep16, sep15, sep14, nrow = 3)

newdt <- datetable16[275:305,]
oct16 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
  geom_line(linetype="dashed", color = "blue")+
  geom_point(color = "red")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(x = "October16")+
  geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable15[274:304,]
oct15 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
  geom_line(linetype="dashed", color = "blue")+
  geom_point(color = "red")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(x = "October15")+
  geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable14[274:304,]
oct14 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
  geom_line(linetype="dashed", color = "blue")+
  geom_point(color = "red")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(x = "October14")+
  geom_hline(yintercept = mean(newdt$Count), color="gold2")
ggarrange(oct16, oct15, oct14, nrow = 3)

newdt <- datetable16[306:335,]
nov16 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
  geom_line(linetype="dashed", color = "blue")+
  geom_point(color = "red")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(x = "November16")+
  geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable15[305:334,]
nov15 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
  geom_line(linetype="dashed", color = "blue")+
  geom_point(color = "red")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(x = "November15")+
  geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable14[305:334,]
nov14 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
  geom_line(linetype="dashed", color = "blue")+
  geom_point(color = "red")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(x = "November14")+
  geom_hline(yintercept = mean(newdt$Count), color="gold2")
ggarrange(nov16, nov15, nov14, nrow = 3)

newdt <- datetable16[336:366,]
dec16 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
  geom_line(linetype="dashed", color = "blue")+
  geom_point(color = "red")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(x = "December")+
  geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable15[335:365,]
dec15 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
  geom_line(linetype="dashed", color = "blue")+
  geom_point(color = "red")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(x = "December")+
  geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable14[335:365,]
dec14 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
  geom_line(linetype="dashed", color = "blue")+
  geom_point(color = "red")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(x = "December")+
  geom_hline(yintercept = mean(newdt$Count), color="gold2")
ggarrange(dec16, dec15, dec14, nrow = 3)

Apriori for All

Aprio_all <- read.delim("/Users/Ymc/Desktop/alldata1.txt")

tf_all = Aprio_all[,c(1,3:5)]
rule_all <- apriori(
  tf_all,
  parameter = list(minlen=3, supp=0.05, conf=0.9),
  #appearance = list(default="lhs",
    #rhs=c("Accident_Severity=slight","Accident_Severity=serious","Accident_Severity=fatal")
  #)
)
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.9    0.1    1 none FALSE            TRUE       5    0.05      3
##  maxlen target   ext
##      10  rules FALSE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 21150 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[26 item(s), 423001 transaction(s)] done [0.08s].
## sorting and recoding items ... [9 item(s)] done [0.01s].
## creating transaction tree ... done [0.16s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [10 rule(s)] done [0.00s].
## creating S4 object  ... done [0.08s].
sort(rule_all, by="lift")
## set of 10 rules
inspect(rule_all)
##      lhs                              rhs                              support confidence     lift  count
## [1]  {Light_Conditions=Day,                                                                              
##       Weather_Conditions=Rain}     => {Road_Surface_Conditions=Wet} 0.06555776  0.9811074 3.692977  27731
## [2]  {Accident_Severity=slight,                                                                          
##       Weather_Conditions=Rain}     => {Road_Surface_Conditions=Wet} 0.09443004  0.9810153 3.692631  39944
## [3]  {Accident_Severity=serious,                                                                         
##       Road_Surface_Conditions=Dry} => {Weather_Conditions=Fine}     0.10194066  0.9658857 1.184026  43121
## [4]  {Light_Conditions=Night+lit,                                                                        
##       Road_Surface_Conditions=Dry} => {Weather_Conditions=Fine}     0.10832126  0.9560573 1.171978  45820
## [5]  {Light_Conditions=Day,                                                                              
##       Road_Surface_Conditions=Dry} => {Weather_Conditions=Fine}     0.54747152  0.9657296 1.183835 231581
## [6]  {Accident_Severity=slight,                                                                          
##       Road_Surface_Conditions=Dry} => {Weather_Conditions=Fine}     0.57776696  0.9600911 1.176923 244396
## [7]  {Accident_Severity=slight,                                                                          
##       Light_Conditions=Day,                                                                              
##       Weather_Conditions=Rain}     => {Road_Surface_Conditions=Wet} 0.05696913  0.9809892 3.692533  24098
## [8]  {Accident_Severity=serious,                                                                         
##       Light_Conditions=Day,                                                                              
##       Road_Surface_Conditions=Dry} => {Weather_Conditions=Fine}     0.07908492  0.9710031 1.190299  33453
## [9]  {Accident_Severity=slight,                                                                          
##       Light_Conditions=Night+lit,                                                                        
##       Road_Surface_Conditions=Dry} => {Weather_Conditions=Fine}     0.09069246  0.9552302 1.170964  38363
## [10] {Accident_Severity=slight,                                                                          
##       Light_Conditions=Day,                                                                              
##       Road_Surface_Conditions=Dry} => {Weather_Conditions=Fine}     0.46327314  0.9648078 1.182704 195965
sort.rule_all <- sort(rule_all, by="support")
plot(sort.rule_all)

plot(sort.rule_all, method="graph")

Apriori for Fatal

Aprio_ftl <- read.delim("/Users/Ymc/Desktop/data_ftl.txt")

tf_ftl = Aprio_ftl[,c(1,3:5)]
rule_ftl <- apriori(
  tf_ftl,
  parameter = list(minlen=3, supp=0.15, conf=0.9),
  #appearance = list(default="lhs",
    #rhs=c("Accident_Severity=slight","Accident_Severity=serious","Accident_Severity=fatal")
  #)
)
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.9    0.1    1 none FALSE            TRUE       5    0.15      3
##  maxlen target   ext
##      10  rules FALSE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 745 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[21 item(s), 4969 transaction(s)] done [0.00s].
## sorting and recoding items ... [7 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [10 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].
sort(rule_ftl, by="lift")
## set of 10 rules
inspect(rule_ftl)
##      lhs                               rhs                         support confidence     lift count
## [1]  {Light_Conditions=Night+lit,                                                                   
##       Weather_Conditions=Fine}      => {Accident_Severity=fatal} 0.1557657  1.0000000 1.000000   774
## [2]  {Light_Conditions=Night+unlit,                                                                 
##       Weather_Conditions=Fine}      => {Accident_Severity=fatal} 0.1616019  1.0000000 1.000000   803
## [3]  {Weather_Conditions=Fine,                                                                      
##       Road_Surface_Conditions=Wet}  => {Accident_Severity=fatal} 0.1680419  1.0000000 1.000000   835
## [4]  {Light_Conditions=Day,                                                                         
##       Road_Surface_Conditions=Dry}  => {Weather_Conditions=Fine} 0.4352989  0.9682184 1.162096  2163
## [5]  {Light_Conditions=Day,                                                                         
##       Road_Surface_Conditions=Dry}  => {Accident_Severity=fatal} 0.4495874  1.0000000 1.000000  2234
## [6]  {Light_Conditions=Day,                                                                         
##       Weather_Conditions=Fine}      => {Accident_Severity=fatal} 0.5157979  1.0000000 1.000000  2563
## [7]  {Weather_Conditions=Fine,                                                                      
##       Road_Surface_Conditions=Dry}  => {Accident_Severity=fatal} 0.6556651  1.0000000 1.000000  3258
## [8]  {Accident_Severity=fatal,                                                                      
##       Road_Surface_Conditions=Dry}  => {Weather_Conditions=Fine} 0.6556651  0.9636202 1.156577  3258
## [9]  {Light_Conditions=Day,                                                                         
##       Weather_Conditions=Fine,                                                                      
##       Road_Surface_Conditions=Dry}  => {Accident_Severity=fatal} 0.4352989  1.0000000 1.000000  2163
## [10] {Accident_Severity=fatal,                                                                      
##       Light_Conditions=Day,                                                                         
##       Road_Surface_Conditions=Dry}  => {Weather_Conditions=Fine} 0.4352989  0.9682184 1.162096  2163
sort.rule_ftl <- sort(rule_ftl, by="support")
plot(sort.rule_ftl)
## To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.

plot(sort.rule_ftl, method="graph")

Apriori for Serious

Aprio_srs <- read.delim("/Users/Ymc/Desktop/r_final/apriori16_srs.txt")

tf_srs = Aprio_srs[,c(1,3:5)]
rule_srs <- apriori(
  tf_srs,
  parameter = list(minlen=3, supp=0.1, conf=0.9),
  #appearance = list(default="lhs",
    #rhs=c("Accident_Severity=slight","Accident_Severity=serious","Accident_Severity=fatal")
  #)
)
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.9    0.1    1 none FALSE            TRUE       5     0.1      3
##  maxlen target   ext
##      10  rules FALSE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 2342 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[21 item(s), 23420 transaction(s)] done [0.00s].
## sorting and recoding items ... [7 item(s)] done [0.00s].
## creating transaction tree ... done [0.01s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [14 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].
sort(rule_srs, by="lift")
## set of 14 rules
inspect(rule_srs)
##      lhs                            rhs                           support confidence      lift count
## [1]  {Road_Surf=Wet,                                                                                
##       Light_Conditions=Day}      => {Accident_Severity=serious} 0.1577711  0.9253694 0.9975674  3695
## [2]  {Weather_Conditions=Fine,                                                                      
##       Road_Surf=Wet}             => {Accident_Severity=serious} 0.1166524  0.9028420 0.9732824  2732
## [3]  {Road_Surf=Dry,                                                                                
##       Light_Conditions=Night}    => {Weather_Conditions=Fine}   0.2089667  0.9727688 1.1675420  4894
## [4]  {Road_Surf=Dry,                                                                                
##       Light_Conditions=Night}    => {Accident_Severity=serious} 0.1948335  0.9069767 0.9777397  4563
## [5]  {Weather_Conditions=Fine,                                                                      
##       Light_Conditions=Night}    => {Accident_Severity=serious} 0.2305295  0.9043551 0.9749135  5399
## [6]  {Road_Surf=Dry,                                                                                
##       Light_Conditions=Day}      => {Weather_Conditions=Fine}   0.4853544  0.9602129 1.1524720 11367
## [7]  {Road_Surf=Dry,                                                                                
##       Light_Conditions=Day}      => {Accident_Severity=serious} 0.4765158  0.9427268 1.0162790 11160
## [8]  {Weather_Conditions=Fine,                                                                      
##       Light_Conditions=Day}      => {Accident_Severity=serious} 0.5426132  0.9384138 1.0116295 12708
## [9]  {Weather_Conditions=Fine,                                                                      
##       Road_Surf=Dry}             => {Accident_Severity=serious} 0.6474381  0.9324191 1.0051671 15163
## [10] {Accident_Severity=serious,                                                                    
##       Road_Surf=Dry}             => {Weather_Conditions=Fine}   0.6474381  0.9643834 1.1574775 15163
## [11] {Weather_Conditions=Fine,                                                                      
##       Road_Surf=Dry,                                                                                
##       Light_Conditions=Night}    => {Accident_Severity=serious} 0.1896670  0.9076420 0.9784569  4442
## [12] {Accident_Severity=serious,                                                                    
##       Road_Surf=Dry,                                                                                
##       Light_Conditions=Night}    => {Weather_Conditions=Fine}   0.1896670  0.9734824 1.1683983  4442
## [13] {Weather_Conditions=Fine,                                                                      
##       Road_Surf=Dry,                                                                                
##       Light_Conditions=Day}      => {Accident_Severity=serious} 0.4577711  0.9431688 1.0167555 10721
## [14] {Accident_Severity=serious,                                                                    
##       Road_Surf=Dry,                                                                                
##       Light_Conditions=Day}      => {Weather_Conditions=Fine}   0.4577711  0.9606631 1.1530123 10721
sort.rule_srs <- sort(rule_srs, by="support")
plot(sort.rule_srs)

plot(sort.rule_srs, method="graph")

Apriori for Rain

Aprio_rain <- read.delim("/Users/Ymc/Desktop/data_rain.txt")

tf_rain = Aprio_rain[,c(1,3:5)]
rule_rain <- apriori(
  tf_rain,
  parameter = list(minlen=3, supp=0.1, conf=0.9),
  #appearance = list(default="lhs",
    #rhs=c("Accident_Severity=slight","Accident_Severity=serious","Accident_Severity=fatal")
  #)
)
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.9    0.1    1 none FALSE            TRUE       5     0.1      3
##  maxlen target   ext
##      10  rules FALSE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 5380 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[14 item(s), 53800 transaction(s)] done [0.01s].
## sorting and recoding items ... [8 item(s)] done [0.00s].
## creating transaction tree ... done [0.01s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [11 rule(s)] done [0.00s].
## creating S4 object  ... done [0.01s].
sort(rule_rain, by="lift")
## set of 11 rules
inspect(rule_rain)
##      lhs                              rhs                             support confidence     lift count
## [1]  {Accident_Severity=serious,                                                                       
##       Weather_Conditions=Rain}     => {Road_Surface_Conditions=Wet} 0.1187175  0.9812567 1.002728  6387
## [2]  {Accident_Severity=slight,                                                                        
##       Light_Conditions=Night+lit}  => {Road_Surface_Conditions=Wet} 0.2609294  0.9859531 1.007527 14038
## [3]  {Light_Conditions=Night+lit,                                                                      
##       Weather_Conditions=Rain}     => {Road_Surface_Conditions=Wet} 0.2696654  0.9877451 1.009358 14508
## [4]  {Accident_Severity=slight,                                                                        
##       Light_Conditions=Day}        => {Weather_Conditions=Rain}     0.4565985  0.9134347 1.029902 24565
## [5]  {Accident_Severity=slight,                                                                        
##       Light_Conditions=Day}        => {Road_Surface_Conditions=Wet} 0.4895911  0.9794370 1.000868 26340
## [6]  {Light_Conditions=Day,                                                                            
##       Weather_Conditions=Rain}     => {Road_Surface_Conditions=Wet} 0.5154461  0.9811074 1.002575 27731
## [7]  {Light_Conditions=Day,                                                                            
##       Road_Surface_Conditions=Wet} => {Weather_Conditions=Rain}     0.5154461  0.9137068 1.030208 27731
## [8]  {Accident_Severity=slight,                                                                        
##       Weather_Conditions=Rain}     => {Road_Surface_Conditions=Wet} 0.7424535  0.9810153 1.002481 39944
## [9]  {Accident_Severity=slight,                                                                        
##       Light_Conditions=Night+lit,                                                                      
##       Weather_Conditions=Rain}     => {Road_Surface_Conditions=Wet} 0.2269517  0.9877043 1.009316 12210
## [10] {Accident_Severity=slight,                                                                        
##       Light_Conditions=Day,                                                                            
##       Weather_Conditions=Rain}     => {Road_Surface_Conditions=Wet} 0.4479182  0.9809892 1.002454 24098
## [11] {Accident_Severity=slight,                                                                        
##       Light_Conditions=Day,                                                                            
##       Road_Surface_Conditions=Wet} => {Weather_Conditions=Rain}     0.4479182  0.9148823 1.031534 24098
sort.rule_rain <- sort(rule_rain, by="support")
plot(sort.rule_rain)

plot(sort.rule_rain, method="graph")

Apriori for Night

Aprio_nit <- read.delim("/Users/Ymc/Desktop/data_nit.txt")

tf_nit = Aprio_nit[,c(1,3:5)]
rule_nit <- apriori(
  tf_nit,
  parameter = list(minlen=3, supp=0.05, conf=0.9),
  #appearance = list(default="lhs",
    #rhs=c("Accident_Severity=slight","Accident_Severity=serious","Accident_Severity=fatal")
  #)
)
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.9    0.1    1 none FALSE            TRUE       5    0.05      3
##  maxlen target   ext
##      10  rules FALSE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 5667 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[24 item(s), 113340 transaction(s)] done [0.02s].
## sorting and recoding items ... [8 item(s)] done [0.00s].
## creating transaction tree ... done [0.04s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [9 rule(s)] done [0.00s].
## creating S4 object  ... done [0.02s].
sort(rule_nit, by="lift")
## set of 9 rules
inspect(rule_nit)
##     lhs                               rhs                              support confidence     lift count
## [1] {Accident_Severity=serious,                                                                         
##      Road_Surface_Conditions=Dry}  => {Weather_Conditions=Fine}     0.08530086  0.9485871 1.326190  9668
## [2] {Light_Conditions=Night+lit,                                                                        
##      Weather_Conditions=Rain}      => {Road_Surface_Conditions=Wet} 0.12800424  0.9877451 2.371542 14508
## [3] {Accident_Severity=slight,                                                                          
##      Weather_Conditions=Rain}      => {Road_Surface_Conditions=Wet} 0.13980942  0.9810550 2.355480 15846
## [4] {Light_Conditions=Night+unlit,                                                                      
##      Road_Surface_Conditions=Dry}  => {Weather_Conditions=Fine}     0.11799012  0.9002962 1.258676 13373
## [5] {Light_Conditions=Night+lit,                                                                        
##      Road_Surface_Conditions=Dry}  => {Weather_Conditions=Fine}     0.40427034  0.9560573 1.336633 45820
## [6] {Accident_Severity=slight,                                                                          
##      Road_Surface_Conditions=Dry}  => {Weather_Conditions=Fine}     0.42729839  0.9414669 1.316235 48430
## [7] {Accident_Severity=serious,                                                                         
##      Light_Conditions=Night+lit,                                                                        
##      Road_Surface_Conditions=Dry}  => {Weather_Conditions=Fine}     0.06098465  0.9592007 1.341028  6912
## [8] {Accident_Severity=slight,                                                                          
##      Light_Conditions=Night+lit,                                                                        
##      Weather_Conditions=Rain}      => {Road_Surface_Conditions=Wet} 0.10772896  0.9877043 2.371444 12210
## [9] {Accident_Severity=slight,                                                                          
##      Light_Conditions=Night+lit,                                                                        
##      Road_Surface_Conditions=Dry}  => {Weather_Conditions=Fine}     0.33847715  0.9552302 1.335477 38363
sort.rule_nit <- sort(rule_nit, by="support")
plot(sort.rule_nit)

plot(sort.rule_nit, method="graph")

Line-chart for Time

stime<-0
rstime<-"0"
cnt<-1
MyData <- read.csv(file="/Users/Ymc/Desktop/r_final/cleanData2016.csv", header=TRUE, sep=",", stringsAsFactors = FALSE)
for(i in 1:nrow(MyData))
  if(nchar(MyData$Time[i])==4){
    MyData$Time[i]<-paste0("0", MyData$Time[i])
  }
timetable <- setNames(data.frame(table(MyData$Time)),c("Time","Count"))
timetable <- timetable[-c(1), ,drop=FALSE]
data_time=data.frame(a="00:00-00:59",b=0)
names(data_time)<-c("Time","Count")
for (i in 1:9){
  stime<-stime+1
  strtime=paste0("0",as.character(stime),":00-0",as.character(stime),":59")
  tempd=data.frame(Time=strtime,Count=0)
  data_time<-rbind(data_time,tempd)
}
for (i in 1:14){
  stime<-stime+1
  strtime=paste0(as.character(stime),":00-",as.character(stime),":59")
  tempd=data.frame(Time=strtime,Count=0)
  data_time<-rbind(data_time,tempd)
}
stime<-0
for (i in 1:nrow(timetable)){
  if(substr(timetable$Time[i],2,2)==":"){
    checktime<-as.numeric(substr(timetable$Time[i],1,1))
  }else{
    checktime<-as.numeric(substr(timetable$Time[i],1,2))
  }
  if(checktime==stime){
    data_time$Count[cnt]<-data_time$Count[cnt]+timetable[i,2]
  }else{
    cnt<-cnt+1
    stime<-stime+1
  }
}
time16<-ggplot(data=data_time, aes(x=Time, y=Count, group=1)) +
  geom_line(linetype="dashed", color = "blue")+
  geom_point(color = "red")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(x="Time 16")

stime<-0
rstime<-"0"
cnt<-1
MyData <- read.csv(file="/Users/Ymc/Desktop/r_final/cleanData2015.csv", header=TRUE, sep=",", stringsAsFactors = FALSE)
for(i in 1:nrow(MyData))
  if(nchar(MyData$Time[i])==4){
    MyData$Time[i]<-paste0("0", MyData$Time[i])
  }
timetable <- setNames(data.frame(table(MyData$Time)),c("Time","Count"))
timetable <- timetable[-c(1), ,drop=FALSE]
data_time=data.frame(a="00:00-00:59",b=0)
names(data_time)<-c("Time","Count")
for (i in 1:9){
  stime<-stime+1
  strtime=paste0("0",as.character(stime),":00-0",as.character(stime),":59")
  tempd=data.frame(Time=strtime,Count=0)
  data_time<-rbind(data_time,tempd)
}
for (i in 1:14){
  stime<-stime+1
  strtime=paste0(as.character(stime),":00-",as.character(stime),":59")
  tempd=data.frame(Time=strtime,Count=0)
  data_time<-rbind(data_time,tempd)
}
stime<-0
for (i in 1:nrow(timetable)){
  if(substr(timetable$Time[i],2,2)==":"){
    checktime<-as.numeric(substr(timetable$Time[i],1,1))
  }else{
    checktime<-as.numeric(substr(timetable$Time[i],1,2))
  }
  if(checktime==stime){
    data_time$Count[cnt]<-data_time$Count[cnt]+timetable[i,2]
  }else{
    cnt<-cnt+1
    stime<-stime+1
  }
}
time15<-ggplot(data=data_time, aes(x=Time, y=Count, group=1)) +
  geom_line(linetype="dashed", color = "blue")+
  geom_point(color = "red")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(x="Time 15")

stime<-0
rstime<-"0"
cnt<-1
MyData <- read.csv(file="/Users/Ymc/Desktop/r_final/cleanData2014.csv", header=TRUE, sep=",", stringsAsFactors = FALSE)
for(i in 1:nrow(MyData))
  if(nchar(MyData$Time[i])==4){
    MyData$Time[i]<-paste0("0", MyData$Time[i])
  }
timetable <- setNames(data.frame(table(MyData$Time)),c("Time","Count"))
timetable <- timetable[-c(1), ,drop=FALSE]
data_time=data.frame(a="00:00-00:59",b=0)
names(data_time)<-c("Time","Count")
for (i in 1:9){
  stime<-stime+1
  strtime=paste0("0",as.character(stime),":00-0",as.character(stime),":59")
  tempd=data.frame(Time=strtime,Count=0)
  data_time<-rbind(data_time,tempd)
}
for (i in 1:14){
  stime<-stime+1
  strtime=paste0(as.character(stime),":00-",as.character(stime),":59")
  tempd=data.frame(Time=strtime,Count=0)
  data_time<-rbind(data_time,tempd)
}
stime<-0
for (i in 1:nrow(timetable)){
  if(substr(timetable$Time[i],2,2)==":"){
    checktime<-as.numeric(substr(timetable$Time[i],1,1))
  }else{
    checktime<-as.numeric(substr(timetable$Time[i],1,2))
  }
  if(checktime==stime){
    data_time$Count[cnt]<-data_time$Count[cnt]+timetable[i,2]
  }else{
    cnt<-cnt+1
    stime<-stime+1
  }
}
time14<-ggplot(data=data_time, aes(x=Time, y=Count, group=1)) +
  geom_line(linetype="dashed", color = "blue")+
  geom_point(color = "red")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  labs(x="Time 14")

ggarrange(time16,time15,time14,nrow=3)