my site: https://b04902122.github.io/CSX_R/final_project/final.html
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(rpart)
library(rpart.plot)
library(ggpubr)
## Loading required package: magrittr
library(Matrix)
library(arules)
## Warning: package 'arules' was built under R version 3.4.4
##
## Attaching package: 'arules'
## The following object is masked from 'package:dplyr':
##
## recode
## The following objects are masked from 'package:base':
##
## abbreviate, write
library(arulesViz)
## Warning: package 'arulesViz' was built under R version 3.4.4
## Loading required package: grid
library(rio)
## Warning: package 'rio' was built under R version 3.4.4
date16=array(0:0,c(31,12))
date15=array(0:0,c(31,12))
date14=array(0:0,c(31,12))
topval=array(0:0,c(5,12))
lowval=array(0:0,c(5,12))
MyData16 <- read.csv(file="/Users/Ymc/Desktop/r_final/cleanData2016.csv", header=TRUE, sep=",", stringsAsFactors = FALSE)
MyData15 <- read.csv(file="/Users/Ymc/Desktop/r_final/cleanData2015.csv", header=TRUE, sep=",", stringsAsFactors = FALSE)
MyData14 <- read.csv(file="/Users/Ymc/Desktop/r_final/cleanData2014.csv", header=TRUE, sep=",", stringsAsFactors = FALSE)
Veh16 <- read.csv(file="/Users/Ymc/Desktop/r_final/Veh16.csv", header=TRUE, sep=",", stringsAsFactors = FALSE)
Veh15 <- read.csv(file="/Users/Ymc/Desktop/r_final/Veh15.csv", header=TRUE, sep=",", stringsAsFactors = FALSE)
Veh14 <- read.csv(file="/Users/Ymc/Desktop/r_final/Veh14.csv", header=TRUE, sep=",", stringsAsFactors = FALSE)
if (FALSE){
for (i in 1:nrow(MyData16))
date16[as.numeric(substr(MyData16$Date[i],9,10)),as.numeric(substr(MyData16$Date[i],6,7))] = date16[as.numeric(substr(MyData16$Date[i],9,10)),as.numeric(substr(MyData16$Date[i],6,7))] + 1
for (i in 1:nrow(MyData15))
date15[as.numeric(substr(MyData15$Date[i],9,10)),as.numeric(substr(MyData15$Date[i],6,7))] = date15[as.numeric(substr(MyData15$Date[i],9,10)),as.numeric(substr(MyData15$Date[i],6,7))] + 1
for (i in 1:nrow(MyData14))
date14[as.numeric(substr(MyData14$Date[i],9,10)),as.numeric(substr(MyData14$Date[i],6,7))] = date14[as.numeric(substr(MyData14$Date[i],9,10)),as.numeric(substr(MyData14$Date[i],6,7))] + 1
for (i in 1:12)
topval[,i]=order(date16[,i], decreasing = TRUE)[1:5]
for (i in 1:12)
lowval[,i]=order(date16[,i], decreasing = FALSE)[1:5]
}
datetable16 <- setNames(data.frame(table(MyData16$Date)),c("Date","Count"))
datetable15 <- setNames(data.frame(table(MyData15$Date)),c("Date","Count"))
datetable14 <- setNames(data.frame(table(MyData14$Date)),c("Date","Count"))
VehNum16 <- setNames(data.frame(table(Veh16$Sex_of_Driver)),c("Sex","count"))
VehNum15 <- setNames(data.frame(table(Veh15$Sex_of_Driver)),c("Sex","count"))
VehNum14 <- setNames(data.frame(table(Veh14$Sex_of_Driver)),c("Sex","count"))
weat16=data.frame(ncol=1)
names(weat16)<-c("weather")
for (i in 1:nrow(MyData16))
if(MyData16$Accident_Severity[i]!="slight"){
if(MyData16$Weather_Conditions[i]>0){
tempd<-as.character(MyData16$Weather_Conditions[i])
temp<-data.frame(tempd)
names(temp)<-c("weather")
weat16<-rbind(weat16,temp)
}
}
weat15=data.frame(ncol=1)
names(weat15)<-c("weather")
for (i in 1:nrow(MyData15))
if(MyData15$Accident_Severity[i]!="slight"){
if(MyData15$Weather_Conditions[i]>0){
tempd<-as.character(MyData15$Weather_Conditions[i])
temp<-data.frame(tempd)
names(temp)<-c("weather")
weat15<-rbind(weat15,temp)
}
}
weat14=data.frame(ncol=1)
names(weat14)<-c("weather")
for (i in 1:nrow(MyData14))
if(MyData14$Accident_Severity[i]!="slight"){
if(MyData14$Weather_Conditions[i]>0){
tempd<-as.character(MyData14$Weather_Conditions[i])
temp<-data.frame(tempd)
names(temp)<-c("weather")
weat14<-rbind(weat14,temp)
}
}
bp16 <- ggplot(MyData16, aes(x = Accident_Severity, fill = Accident_Severity)) +
geom_bar() +
theme_light() +
labs(x = "Accident Severity", y = "Count", title ="2016")
pie16 <- bp16 + coord_polar("y")
bp15 <- ggplot(MyData15, aes(x = Accident_Severity, fill = Accident_Severity)) +
geom_bar() +
theme_light() +
labs(x = "Accident Severity", y = "Count", title = "2015")
pie15 <- bp15 + coord_polar("y")
bp14 <- ggplot(MyData14, aes(x = Accident_Severity, fill = Accident_Severity)) +
geom_bar() +
theme_light() +
labs(x = "Accident Severity", y = "Count", title = "2014")
pie14 <- bp14 + coord_polar("y")
ggarrange(pie16, pie15, pie14, nrow = 2, ncol = 2)
Veh16$Sex_of_Driver <- factor(Veh16$Sex_of_Driver,
levels=c("male","female","other"))
Vbar16<-ggplot(Veh16, aes(x = Sex_of_Driver))+
geom_bar(aes(fill = Sex_of_Driver, color = Sex_of_Driver))+
theme(legend.position = "none")+
labs(x = "male = 90240 female = 36890 other = 9456",y = "2016")
Veh15$Sex_of_Driver <- factor(Veh15$Sex_of_Driver,
levels=c("male","female","other"))
Vbar15<-ggplot(Veh15, aes(x = Sex_of_Driver))+
geom_bar(aes(fill = Sex_of_Driver, color = Sex_of_Driver))+
theme(legend.position = "none")+
labs(x = "male = 93288 female = 38237 other = 8501",y = "2015")
val<-c("1","2","3")
Veh14$Sex_of_Driver <- factor(Veh14$Sex_of_Driver,
levels=c("male","female","other"))
Vbar14<-ggplot(Veh14, aes(x = Sex_of_Driver))+
geom_bar(aes(fill = Sex_of_Driver, color = Sex_of_Driver))+
theme(legend.position = "none")+
labs(x = "male = 96812 female = 41073 other = 8426",y = "2014")
ggarrange(Vbar16, Vbar15, Vbar14, nrow = 3)
weat16$weather <- factor(weat16$weather,
levels=c("Fine","Rain","Snow","Fine+hw","Rain+hw","Snow+hw","Fog/Mist","Other","Unkn"))
weat16<-weat16[-c(1), ,drop=FALSE]
wbar16<-ggplot(weat16, aes(x = weather))+
geom_bar(aes(fill = weather, color = weather))+
labs(x="2016", y="count")+
theme(legend.position = "none", axis.text.x = element_text(angle = 30, hjust = 1))
weat15$weather <- factor(weat15$weather,
levels=c("Fine","Rain","Snow","Fine+hw","Rain+hw","Snow+hw","Fog/Mist","Other","Unkn"))
weat15<-weat15[-c(1), ,drop=FALSE]
wbar15<-ggplot(weat15, aes(x = weather))+
geom_bar(aes(fill = weather, color = weather))+
labs(x="2015", y="count")+
theme(legend.position = "none", axis.text.x = element_text(angle = 30, hjust = 1))
weat14$weather <- factor(weat14$weather,
levels=c("Fine","Rain","Snow","Fine+hw","Rain+hw","Snow+hw","Fog/Mist","Other","Unkn"))
weat14<-weat14[-c(1), ,drop=FALSE]
wbar14<-ggplot(weat14, aes(x = weather))+
geom_bar(aes(fill = weather, color = weather))+
labs(x="2014", y="count")+
theme(legend.position = "none", axis.text.x = element_text(angle = 30, hjust = 1))
ggarrange(wbar16, wbar15, wbar14, nrow = 3)
MyData16$Day_of_Week <- factor(MyData16$Day_of_Week, levels=c("Sun","Mon","Tue","Wed","Thu","Fri","Sat"))
week16 <- ggplot(MyData16, aes(x = Day_of_Week))+
geom_bar(aes(fill = Day_of_Week, color=Day_of_Week))+theme_minimal()+
labs(x="2016", y="count")
week16 <- week16 + theme(legend.position = "none")
MyData15$Day_of_Week <- factor(MyData15$Day_of_Week, levels=c("Sun","Mon","Tue","Wed","Thu","Fri","Sat"))
week15 <- ggplot(MyData15, aes(x = Day_of_Week))+
geom_bar(aes(fill = Day_of_Week, color=Day_of_Week))+theme_minimal()+
labs(x="2015", y="count")
week15 <- week15 + theme(legend.position = "none")
MyData14$Day_of_Week <- factor(MyData14$Day_of_Week, levels=c("Sun","Mon","Tue","Wed","Thu","Fri","Sat"))
week14 <- ggplot(MyData14, aes(x = Day_of_Week))+
geom_bar(aes(fill = Day_of_Week, color=Day_of_Week))+theme_minimal()+
labs(x="2014", y="count")
week14 <- week14 + theme(legend.position = "none")
ggarrange(week16, week15, week14, nrow = 3)
newdt <- datetable16[1:31,]
jan16 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1))+
geom_line(linetype="dashed", color = "blue")+
geom_point(color = "red")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
labs(x = "January16")+
geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable15[1:31,]
jan15 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1))+
geom_line(linetype="dashed", color = "blue")+
geom_point(color = "red")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
labs(x = "January15")+
geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable14[1:31,]
jan14 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1))+
geom_line(linetype="dashed", color = "blue")+
geom_point(color = "red")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
labs(x = "January14")+
geom_hline(yintercept = mean(newdt$Count), color="gold2")
ggarrange(jan16, jan15, jan14, nrow = 3)
newdt <- datetable16[32:60,]
feb16 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
geom_line(linetype="dashed", color = "blue")+
geom_point(color = "red")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
labs(x = "Febuary16")+
geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable15[32:59,]
feb15 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
geom_line(linetype="dashed", color = "blue")+
geom_point(color = "red")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
labs(x = "Febuary15")+
geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable14[32:59,]
feb14 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
geom_line(linetype="dashed", color = "blue")+
geom_point(color = "red")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
labs(x = "Febuary14")+
geom_hline(yintercept = mean(newdt$Count), color="gold2")
ggarrange(feb16, feb15, feb14, nrow = 3)
newdt <- datetable16[61:91,]
mar16 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
geom_line(linetype="dashed", color = "blue")+
geom_point(color = "red")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
labs(x = "March16")+
geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable15[60:90,]
mar15 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
geom_line(linetype="dashed", color = "blue")+
geom_point(color = "red")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
labs(x = "March15")+
geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable14[60:90,]
mar14 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
geom_line(linetype="dashed", color = "blue")+
geom_point(color = "red")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
labs(x = "March14")+
geom_hline(yintercept = mean(newdt$Count), color="gold2")
ggarrange(mar16, mar15, mar14, nrow = 3)
newdt <- datetable16[92:121,]
apr16 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
geom_line(linetype="dashed", color = "blue")+
geom_point(color = "red")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
labs(x = "April16")+
geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable15[91:120,]
apr15 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
geom_line(linetype="dashed", color = "blue")+
geom_point(color = "red")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
labs(x = "April15")+
geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable14[91:120,]
apr14 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
geom_line(linetype="dashed", color = "blue")+
geom_point(color = "red")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
labs(x = "April14")+
geom_hline(yintercept = mean(newdt$Count), color="gold2")
ggarrange(apr16, apr15, apr14, nrow = 3)
newdt <- datetable16[122:152,]
may16 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
geom_line(linetype="dashed", color = "blue")+
geom_point(color = "red")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
labs(x = "May16")+
geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable15[121:151,]
may15 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
geom_line(linetype="dashed", color = "blue")+
geom_point(color = "red")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
labs(x = "May15")+
geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable14[121:151,]
may14 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
geom_line(linetype="dashed", color = "blue")+
geom_point(color = "red")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
labs(x = "May14")+
geom_hline(yintercept = mean(newdt$Count), color="gold2")
ggarrange(may16, may15, may14, nrow = 3)
newdt <- datetable16[153:182,]
jun16 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
geom_line(linetype="dashed", color = "blue")+
geom_point(color = "red")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
labs(x = "June16")+
geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable15[152:181,]
jun15 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
geom_line(linetype="dashed", color = "blue")+
geom_point(color = "red")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
labs(x = "June15")+
geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable14[152:181,]
jun14 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
geom_line(linetype="dashed", color = "blue")+
geom_point(color = "red")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
labs(x = "June14")+
geom_hline(yintercept = mean(newdt$Count), color="gold2")
ggarrange(jun16, jun15, jun14, nrow = 3)
newdt <- datetable16[183:213,]
jul16 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
geom_line(linetype="dashed", color = "blue")+
geom_point(color = "red")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
labs(x = "July16")+
geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable15[182:212,]
jul15 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
geom_line(linetype="dashed", color = "blue")+
geom_point(color = "red")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
labs(x = "July15")+
geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable14[182:212,]
jul14 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
geom_line(linetype="dashed", color = "blue")+
geom_point(color = "red")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
labs(x = "July14")+
geom_hline(yintercept = mean(newdt$Count), color="gold2")
ggarrange(jul16, jul15, jul14, nrow = 3)
newdt <- datetable16[214:244,]
aug16 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
geom_line(linetype="dashed", color = "blue")+
geom_point(color = "red")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
labs(x = "August16")+
geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable15[213:243,]
aug15 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
geom_line(linetype="dashed", color = "blue")+
geom_point(color = "red")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
labs(x = "August15")+
geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable14[213:243,]
aug14 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
geom_line(linetype="dashed", color = "blue")+
geom_point(color = "red")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
labs(x = "August14")+
geom_hline(yintercept = mean(newdt$Count), color="gold2")
ggarrange(aug16, aug15, aug14, nrow = 3)
newdt <- datetable16[245:274,]
sep16 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
geom_line(linetype="dashed", color = "blue")+
geom_point(color = "red")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
labs(x = "September16")+
geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable15[244:273,]
sep15 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
geom_line(linetype="dashed", color = "blue")+
geom_point(color = "red")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
labs(x = "September15")+
geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable14[244:273,]
sep14 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
geom_line(linetype="dashed", color = "blue")+
geom_point(color = "red")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
labs(x = "September14")+
geom_hline(yintercept = mean(newdt$Count), color="gold2")
ggarrange(sep16, sep15, sep14, nrow = 3)
newdt <- datetable16[275:305,]
oct16 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
geom_line(linetype="dashed", color = "blue")+
geom_point(color = "red")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
labs(x = "October16")+
geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable15[274:304,]
oct15 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
geom_line(linetype="dashed", color = "blue")+
geom_point(color = "red")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
labs(x = "October15")+
geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable14[274:304,]
oct14 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
geom_line(linetype="dashed", color = "blue")+
geom_point(color = "red")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
labs(x = "October14")+
geom_hline(yintercept = mean(newdt$Count), color="gold2")
ggarrange(oct16, oct15, oct14, nrow = 3)
newdt <- datetable16[306:335,]
nov16 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
geom_line(linetype="dashed", color = "blue")+
geom_point(color = "red")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
labs(x = "November16")+
geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable15[305:334,]
nov15 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
geom_line(linetype="dashed", color = "blue")+
geom_point(color = "red")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
labs(x = "November15")+
geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable14[305:334,]
nov14 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
geom_line(linetype="dashed", color = "blue")+
geom_point(color = "red")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
labs(x = "November14")+
geom_hline(yintercept = mean(newdt$Count), color="gold2")
ggarrange(nov16, nov15, nov14, nrow = 3)
newdt <- datetable16[336:366,]
dec16 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
geom_line(linetype="dashed", color = "blue")+
geom_point(color = "red")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
labs(x = "December")+
geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable15[335:365,]
dec15 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
geom_line(linetype="dashed", color = "blue")+
geom_point(color = "red")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
labs(x = "December")+
geom_hline(yintercept = mean(newdt$Count), color="gold2")
newdt <- datetable14[335:365,]
dec14 <- ggplot(data=newdt, aes(x=Date, y=Count, group=1)) +
geom_line(linetype="dashed", color = "blue")+
geom_point(color = "red")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
labs(x = "December")+
geom_hline(yintercept = mean(newdt$Count), color="gold2")
ggarrange(dec16, dec15, dec14, nrow = 3)
Aprio_all <- read.delim("/Users/Ymc/Desktop/alldata1.txt")
tf_all = Aprio_all[,c(1,3:5)]
rule_all <- apriori(
tf_all,
parameter = list(minlen=3, supp=0.05, conf=0.9),
#appearance = list(default="lhs",
#rhs=c("Accident_Severity=slight","Accident_Severity=serious","Accident_Severity=fatal")
#)
)
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.9 0.1 1 none FALSE TRUE 5 0.05 3
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 21150
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[26 item(s), 423001 transaction(s)] done [0.08s].
## sorting and recoding items ... [9 item(s)] done [0.01s].
## creating transaction tree ... done [0.16s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [10 rule(s)] done [0.00s].
## creating S4 object ... done [0.08s].
sort(rule_all, by="lift")
## set of 10 rules
inspect(rule_all)
## lhs rhs support confidence lift count
## [1] {Light_Conditions=Day,
## Weather_Conditions=Rain} => {Road_Surface_Conditions=Wet} 0.06555776 0.9811074 3.692977 27731
## [2] {Accident_Severity=slight,
## Weather_Conditions=Rain} => {Road_Surface_Conditions=Wet} 0.09443004 0.9810153 3.692631 39944
## [3] {Accident_Severity=serious,
## Road_Surface_Conditions=Dry} => {Weather_Conditions=Fine} 0.10194066 0.9658857 1.184026 43121
## [4] {Light_Conditions=Night+lit,
## Road_Surface_Conditions=Dry} => {Weather_Conditions=Fine} 0.10832126 0.9560573 1.171978 45820
## [5] {Light_Conditions=Day,
## Road_Surface_Conditions=Dry} => {Weather_Conditions=Fine} 0.54747152 0.9657296 1.183835 231581
## [6] {Accident_Severity=slight,
## Road_Surface_Conditions=Dry} => {Weather_Conditions=Fine} 0.57776696 0.9600911 1.176923 244396
## [7] {Accident_Severity=slight,
## Light_Conditions=Day,
## Weather_Conditions=Rain} => {Road_Surface_Conditions=Wet} 0.05696913 0.9809892 3.692533 24098
## [8] {Accident_Severity=serious,
## Light_Conditions=Day,
## Road_Surface_Conditions=Dry} => {Weather_Conditions=Fine} 0.07908492 0.9710031 1.190299 33453
## [9] {Accident_Severity=slight,
## Light_Conditions=Night+lit,
## Road_Surface_Conditions=Dry} => {Weather_Conditions=Fine} 0.09069246 0.9552302 1.170964 38363
## [10] {Accident_Severity=slight,
## Light_Conditions=Day,
## Road_Surface_Conditions=Dry} => {Weather_Conditions=Fine} 0.46327314 0.9648078 1.182704 195965
sort.rule_all <- sort(rule_all, by="support")
plot(sort.rule_all)
plot(sort.rule_all, method="graph")
Aprio_ftl <- read.delim("/Users/Ymc/Desktop/data_ftl.txt")
tf_ftl = Aprio_ftl[,c(1,3:5)]
rule_ftl <- apriori(
tf_ftl,
parameter = list(minlen=3, supp=0.15, conf=0.9),
#appearance = list(default="lhs",
#rhs=c("Accident_Severity=slight","Accident_Severity=serious","Accident_Severity=fatal")
#)
)
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.9 0.1 1 none FALSE TRUE 5 0.15 3
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 745
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[21 item(s), 4969 transaction(s)] done [0.00s].
## sorting and recoding items ... [7 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [10 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
sort(rule_ftl, by="lift")
## set of 10 rules
inspect(rule_ftl)
## lhs rhs support confidence lift count
## [1] {Light_Conditions=Night+lit,
## Weather_Conditions=Fine} => {Accident_Severity=fatal} 0.1557657 1.0000000 1.000000 774
## [2] {Light_Conditions=Night+unlit,
## Weather_Conditions=Fine} => {Accident_Severity=fatal} 0.1616019 1.0000000 1.000000 803
## [3] {Weather_Conditions=Fine,
## Road_Surface_Conditions=Wet} => {Accident_Severity=fatal} 0.1680419 1.0000000 1.000000 835
## [4] {Light_Conditions=Day,
## Road_Surface_Conditions=Dry} => {Weather_Conditions=Fine} 0.4352989 0.9682184 1.162096 2163
## [5] {Light_Conditions=Day,
## Road_Surface_Conditions=Dry} => {Accident_Severity=fatal} 0.4495874 1.0000000 1.000000 2234
## [6] {Light_Conditions=Day,
## Weather_Conditions=Fine} => {Accident_Severity=fatal} 0.5157979 1.0000000 1.000000 2563
## [7] {Weather_Conditions=Fine,
## Road_Surface_Conditions=Dry} => {Accident_Severity=fatal} 0.6556651 1.0000000 1.000000 3258
## [8] {Accident_Severity=fatal,
## Road_Surface_Conditions=Dry} => {Weather_Conditions=Fine} 0.6556651 0.9636202 1.156577 3258
## [9] {Light_Conditions=Day,
## Weather_Conditions=Fine,
## Road_Surface_Conditions=Dry} => {Accident_Severity=fatal} 0.4352989 1.0000000 1.000000 2163
## [10] {Accident_Severity=fatal,
## Light_Conditions=Day,
## Road_Surface_Conditions=Dry} => {Weather_Conditions=Fine} 0.4352989 0.9682184 1.162096 2163
sort.rule_ftl <- sort(rule_ftl, by="support")
plot(sort.rule_ftl)
## To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.
plot(sort.rule_ftl, method="graph")
Aprio_srs <- read.delim("/Users/Ymc/Desktop/r_final/apriori16_srs.txt")
tf_srs = Aprio_srs[,c(1,3:5)]
rule_srs <- apriori(
tf_srs,
parameter = list(minlen=3, supp=0.1, conf=0.9),
#appearance = list(default="lhs",
#rhs=c("Accident_Severity=slight","Accident_Severity=serious","Accident_Severity=fatal")
#)
)
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.9 0.1 1 none FALSE TRUE 5 0.1 3
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 2342
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[21 item(s), 23420 transaction(s)] done [0.00s].
## sorting and recoding items ... [7 item(s)] done [0.00s].
## creating transaction tree ... done [0.01s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [14 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
sort(rule_srs, by="lift")
## set of 14 rules
inspect(rule_srs)
## lhs rhs support confidence lift count
## [1] {Road_Surf=Wet,
## Light_Conditions=Day} => {Accident_Severity=serious} 0.1577711 0.9253694 0.9975674 3695
## [2] {Weather_Conditions=Fine,
## Road_Surf=Wet} => {Accident_Severity=serious} 0.1166524 0.9028420 0.9732824 2732
## [3] {Road_Surf=Dry,
## Light_Conditions=Night} => {Weather_Conditions=Fine} 0.2089667 0.9727688 1.1675420 4894
## [4] {Road_Surf=Dry,
## Light_Conditions=Night} => {Accident_Severity=serious} 0.1948335 0.9069767 0.9777397 4563
## [5] {Weather_Conditions=Fine,
## Light_Conditions=Night} => {Accident_Severity=serious} 0.2305295 0.9043551 0.9749135 5399
## [6] {Road_Surf=Dry,
## Light_Conditions=Day} => {Weather_Conditions=Fine} 0.4853544 0.9602129 1.1524720 11367
## [7] {Road_Surf=Dry,
## Light_Conditions=Day} => {Accident_Severity=serious} 0.4765158 0.9427268 1.0162790 11160
## [8] {Weather_Conditions=Fine,
## Light_Conditions=Day} => {Accident_Severity=serious} 0.5426132 0.9384138 1.0116295 12708
## [9] {Weather_Conditions=Fine,
## Road_Surf=Dry} => {Accident_Severity=serious} 0.6474381 0.9324191 1.0051671 15163
## [10] {Accident_Severity=serious,
## Road_Surf=Dry} => {Weather_Conditions=Fine} 0.6474381 0.9643834 1.1574775 15163
## [11] {Weather_Conditions=Fine,
## Road_Surf=Dry,
## Light_Conditions=Night} => {Accident_Severity=serious} 0.1896670 0.9076420 0.9784569 4442
## [12] {Accident_Severity=serious,
## Road_Surf=Dry,
## Light_Conditions=Night} => {Weather_Conditions=Fine} 0.1896670 0.9734824 1.1683983 4442
## [13] {Weather_Conditions=Fine,
## Road_Surf=Dry,
## Light_Conditions=Day} => {Accident_Severity=serious} 0.4577711 0.9431688 1.0167555 10721
## [14] {Accident_Severity=serious,
## Road_Surf=Dry,
## Light_Conditions=Day} => {Weather_Conditions=Fine} 0.4577711 0.9606631 1.1530123 10721
sort.rule_srs <- sort(rule_srs, by="support")
plot(sort.rule_srs)
plot(sort.rule_srs, method="graph")
Aprio_rain <- read.delim("/Users/Ymc/Desktop/data_rain.txt")
tf_rain = Aprio_rain[,c(1,3:5)]
rule_rain <- apriori(
tf_rain,
parameter = list(minlen=3, supp=0.1, conf=0.9),
#appearance = list(default="lhs",
#rhs=c("Accident_Severity=slight","Accident_Severity=serious","Accident_Severity=fatal")
#)
)
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.9 0.1 1 none FALSE TRUE 5 0.1 3
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 5380
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[14 item(s), 53800 transaction(s)] done [0.01s].
## sorting and recoding items ... [8 item(s)] done [0.00s].
## creating transaction tree ... done [0.01s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [11 rule(s)] done [0.00s].
## creating S4 object ... done [0.01s].
sort(rule_rain, by="lift")
## set of 11 rules
inspect(rule_rain)
## lhs rhs support confidence lift count
## [1] {Accident_Severity=serious,
## Weather_Conditions=Rain} => {Road_Surface_Conditions=Wet} 0.1187175 0.9812567 1.002728 6387
## [2] {Accident_Severity=slight,
## Light_Conditions=Night+lit} => {Road_Surface_Conditions=Wet} 0.2609294 0.9859531 1.007527 14038
## [3] {Light_Conditions=Night+lit,
## Weather_Conditions=Rain} => {Road_Surface_Conditions=Wet} 0.2696654 0.9877451 1.009358 14508
## [4] {Accident_Severity=slight,
## Light_Conditions=Day} => {Weather_Conditions=Rain} 0.4565985 0.9134347 1.029902 24565
## [5] {Accident_Severity=slight,
## Light_Conditions=Day} => {Road_Surface_Conditions=Wet} 0.4895911 0.9794370 1.000868 26340
## [6] {Light_Conditions=Day,
## Weather_Conditions=Rain} => {Road_Surface_Conditions=Wet} 0.5154461 0.9811074 1.002575 27731
## [7] {Light_Conditions=Day,
## Road_Surface_Conditions=Wet} => {Weather_Conditions=Rain} 0.5154461 0.9137068 1.030208 27731
## [8] {Accident_Severity=slight,
## Weather_Conditions=Rain} => {Road_Surface_Conditions=Wet} 0.7424535 0.9810153 1.002481 39944
## [9] {Accident_Severity=slight,
## Light_Conditions=Night+lit,
## Weather_Conditions=Rain} => {Road_Surface_Conditions=Wet} 0.2269517 0.9877043 1.009316 12210
## [10] {Accident_Severity=slight,
## Light_Conditions=Day,
## Weather_Conditions=Rain} => {Road_Surface_Conditions=Wet} 0.4479182 0.9809892 1.002454 24098
## [11] {Accident_Severity=slight,
## Light_Conditions=Day,
## Road_Surface_Conditions=Wet} => {Weather_Conditions=Rain} 0.4479182 0.9148823 1.031534 24098
sort.rule_rain <- sort(rule_rain, by="support")
plot(sort.rule_rain)
plot(sort.rule_rain, method="graph")
Aprio_nit <- read.delim("/Users/Ymc/Desktop/data_nit.txt")
tf_nit = Aprio_nit[,c(1,3:5)]
rule_nit <- apriori(
tf_nit,
parameter = list(minlen=3, supp=0.05, conf=0.9),
#appearance = list(default="lhs",
#rhs=c("Accident_Severity=slight","Accident_Severity=serious","Accident_Severity=fatal")
#)
)
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.9 0.1 1 none FALSE TRUE 5 0.05 3
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 5667
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[24 item(s), 113340 transaction(s)] done [0.02s].
## sorting and recoding items ... [8 item(s)] done [0.00s].
## creating transaction tree ... done [0.04s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [9 rule(s)] done [0.00s].
## creating S4 object ... done [0.02s].
sort(rule_nit, by="lift")
## set of 9 rules
inspect(rule_nit)
## lhs rhs support confidence lift count
## [1] {Accident_Severity=serious,
## Road_Surface_Conditions=Dry} => {Weather_Conditions=Fine} 0.08530086 0.9485871 1.326190 9668
## [2] {Light_Conditions=Night+lit,
## Weather_Conditions=Rain} => {Road_Surface_Conditions=Wet} 0.12800424 0.9877451 2.371542 14508
## [3] {Accident_Severity=slight,
## Weather_Conditions=Rain} => {Road_Surface_Conditions=Wet} 0.13980942 0.9810550 2.355480 15846
## [4] {Light_Conditions=Night+unlit,
## Road_Surface_Conditions=Dry} => {Weather_Conditions=Fine} 0.11799012 0.9002962 1.258676 13373
## [5] {Light_Conditions=Night+lit,
## Road_Surface_Conditions=Dry} => {Weather_Conditions=Fine} 0.40427034 0.9560573 1.336633 45820
## [6] {Accident_Severity=slight,
## Road_Surface_Conditions=Dry} => {Weather_Conditions=Fine} 0.42729839 0.9414669 1.316235 48430
## [7] {Accident_Severity=serious,
## Light_Conditions=Night+lit,
## Road_Surface_Conditions=Dry} => {Weather_Conditions=Fine} 0.06098465 0.9592007 1.341028 6912
## [8] {Accident_Severity=slight,
## Light_Conditions=Night+lit,
## Weather_Conditions=Rain} => {Road_Surface_Conditions=Wet} 0.10772896 0.9877043 2.371444 12210
## [9] {Accident_Severity=slight,
## Light_Conditions=Night+lit,
## Road_Surface_Conditions=Dry} => {Weather_Conditions=Fine} 0.33847715 0.9552302 1.335477 38363
sort.rule_nit <- sort(rule_nit, by="support")
plot(sort.rule_nit)
plot(sort.rule_nit, method="graph")
stime<-0
rstime<-"0"
cnt<-1
MyData <- read.csv(file="/Users/Ymc/Desktop/r_final/cleanData2016.csv", header=TRUE, sep=",", stringsAsFactors = FALSE)
for(i in 1:nrow(MyData))
if(nchar(MyData$Time[i])==4){
MyData$Time[i]<-paste0("0", MyData$Time[i])
}
timetable <- setNames(data.frame(table(MyData$Time)),c("Time","Count"))
timetable <- timetable[-c(1), ,drop=FALSE]
data_time=data.frame(a="00:00-00:59",b=0)
names(data_time)<-c("Time","Count")
for (i in 1:9){
stime<-stime+1
strtime=paste0("0",as.character(stime),":00-0",as.character(stime),":59")
tempd=data.frame(Time=strtime,Count=0)
data_time<-rbind(data_time,tempd)
}
for (i in 1:14){
stime<-stime+1
strtime=paste0(as.character(stime),":00-",as.character(stime),":59")
tempd=data.frame(Time=strtime,Count=0)
data_time<-rbind(data_time,tempd)
}
stime<-0
for (i in 1:nrow(timetable)){
if(substr(timetable$Time[i],2,2)==":"){
checktime<-as.numeric(substr(timetable$Time[i],1,1))
}else{
checktime<-as.numeric(substr(timetable$Time[i],1,2))
}
if(checktime==stime){
data_time$Count[cnt]<-data_time$Count[cnt]+timetable[i,2]
}else{
cnt<-cnt+1
stime<-stime+1
}
}
time16<-ggplot(data=data_time, aes(x=Time, y=Count, group=1)) +
geom_line(linetype="dashed", color = "blue")+
geom_point(color = "red")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
labs(x="Time 16")
stime<-0
rstime<-"0"
cnt<-1
MyData <- read.csv(file="/Users/Ymc/Desktop/r_final/cleanData2015.csv", header=TRUE, sep=",", stringsAsFactors = FALSE)
for(i in 1:nrow(MyData))
if(nchar(MyData$Time[i])==4){
MyData$Time[i]<-paste0("0", MyData$Time[i])
}
timetable <- setNames(data.frame(table(MyData$Time)),c("Time","Count"))
timetable <- timetable[-c(1), ,drop=FALSE]
data_time=data.frame(a="00:00-00:59",b=0)
names(data_time)<-c("Time","Count")
for (i in 1:9){
stime<-stime+1
strtime=paste0("0",as.character(stime),":00-0",as.character(stime),":59")
tempd=data.frame(Time=strtime,Count=0)
data_time<-rbind(data_time,tempd)
}
for (i in 1:14){
stime<-stime+1
strtime=paste0(as.character(stime),":00-",as.character(stime),":59")
tempd=data.frame(Time=strtime,Count=0)
data_time<-rbind(data_time,tempd)
}
stime<-0
for (i in 1:nrow(timetable)){
if(substr(timetable$Time[i],2,2)==":"){
checktime<-as.numeric(substr(timetable$Time[i],1,1))
}else{
checktime<-as.numeric(substr(timetable$Time[i],1,2))
}
if(checktime==stime){
data_time$Count[cnt]<-data_time$Count[cnt]+timetable[i,2]
}else{
cnt<-cnt+1
stime<-stime+1
}
}
time15<-ggplot(data=data_time, aes(x=Time, y=Count, group=1)) +
geom_line(linetype="dashed", color = "blue")+
geom_point(color = "red")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
labs(x="Time 15")
stime<-0
rstime<-"0"
cnt<-1
MyData <- read.csv(file="/Users/Ymc/Desktop/r_final/cleanData2014.csv", header=TRUE, sep=",", stringsAsFactors = FALSE)
for(i in 1:nrow(MyData))
if(nchar(MyData$Time[i])==4){
MyData$Time[i]<-paste0("0", MyData$Time[i])
}
timetable <- setNames(data.frame(table(MyData$Time)),c("Time","Count"))
timetable <- timetable[-c(1), ,drop=FALSE]
data_time=data.frame(a="00:00-00:59",b=0)
names(data_time)<-c("Time","Count")
for (i in 1:9){
stime<-stime+1
strtime=paste0("0",as.character(stime),":00-0",as.character(stime),":59")
tempd=data.frame(Time=strtime,Count=0)
data_time<-rbind(data_time,tempd)
}
for (i in 1:14){
stime<-stime+1
strtime=paste0(as.character(stime),":00-",as.character(stime),":59")
tempd=data.frame(Time=strtime,Count=0)
data_time<-rbind(data_time,tempd)
}
stime<-0
for (i in 1:nrow(timetable)){
if(substr(timetable$Time[i],2,2)==":"){
checktime<-as.numeric(substr(timetable$Time[i],1,1))
}else{
checktime<-as.numeric(substr(timetable$Time[i],1,2))
}
if(checktime==stime){
data_time$Count[cnt]<-data_time$Count[cnt]+timetable[i,2]
}else{
cnt<-cnt+1
stime<-stime+1
}
}
time14<-ggplot(data=data_time, aes(x=Time, y=Count, group=1)) +
geom_line(linetype="dashed", color = "blue")+
geom_point(color = "red")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
labs(x="Time 14")
ggarrange(time16,time15,time14,nrow=3)