my site: https://b04902122.github.io/CSX_R/project_3/titanic.html
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(rpart)
library(rpart.plot)
MyData <- read.csv(file="/Users/ymc/Desktop/titanicTrain.csv", header=TRUE, sep=",", stringsAsFactors = FALSE)
MyPred <- read.csv(file="/Users/ymc/Desktop/titanicQuestion.csv", header=TRUE, sep=",", stringsAsFactors = FALSE)
total=nrow(MyData)
total1=nrow(MyPred)
t_age=0
for (i in 1:nrow(MyData))
if(is.na(MyData$age[i]) || MyData$age[i]<1){
total <- total - 1
} else {
t_age <- t_age + MyData$age[i]
}
avage <- t_age/total
for (i in 1:nrow(MyData))
if(is.na(MyData$age[i]) || MyData$age[i]<1){
MyData$age[i] <- 31.44
}
for (i in 1:nrow(MyData))
if(MyData$boat[i] == ''){
MyData$boat[i] <- NA
} else {
MyData$boat[i] <- 1
}
for (i in 1:nrow(MyData))
if(is.na(MyData$boat[i])){
MyData$boat[i] <- 0
}
for (i in 1:nrow(MyPred))
if(is.na(MyPred$age[i]) || MyPred$age[i]<1){
MyPred$age[i] <- 31.44
}
for (i in 1:nrow(MyPred))
if(MyPred$boat[i] == ''){
MyPred$boat[i] <- NA
} else {
MyPred$boat[i] <- 1
}
for (i in 1:nrow(MyPred))
if(is.na(MyPred$boat[i])){
MyPred$boat[i] <- 0
}
write.csv(MyData, file="MyData1.csv")
write.csv(MyPred, file="MyPred1.csv")
#import pandas as pd
#from sklearn.tree import DecisionTreeRegressor
#train = pd.read_csv('/Users/ymc/Desktop/MyData2.csv')
#test = pd.read_csv('/Users/ymc/Desktop/MyPred2.csv')
##factors that will predict the price
#desired_factors = ['boat']
##set my model to DecisionTree
#model = DecisionTreeRegressor()
##set prediction data to factors that will predict, and set target to SalePrice
#train_data = train[desired_factors]
#test_data = test[desired_factors]
#target = train.survived
##fitting model with prediction data and telling it my target
#model.fit(train_data, target)
#res = model.predict(test_data)
##write predicted data
#with open('/Users/ymc/Desktop/result2.csv','a') as file:
# for i in range(len(res)):
# if(res[i]>0.5):
# file.write('1')
# file.write("\n")
# else:
# file.write('0')
# file.write("\n")