bike1 = read.csv('bike project v2/extracted/2013Q1-capitalbikeshare-tripdata.csv')
library(lubridate)
## Warning: package 'lubridate' was built under R version 4.1.1
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
Initial build:
bike1 = bike1[,-c(3, 5, 7)]
bike1$Date = as.Date(bike1$Start.date, '%Y-%m-%d')
bike1$Hour = hour(bike1$Start.date)
bikenext = data.frame(index = 1, Date = unique(bike1$Date))
for (x in 1:length(bikenext$Date)) {
bikenext$index[x] = 273 + x
}
bikenext$Member = 1
bikenext$Casual = 1
date.old = bikenext$Date[1]
i = 1
members = NA
for (x in 1:length(bike1$Date))
{
date.new = bike1$Date[x]
if (date.new != date.old) {
date.old = date.new
bikenext$Member[i] = sum(members == 'Member', na.rm = TRUE)
bikenext$Casual[i] = sum(members == 'Casual', na.rm = TRUE)
members = NA
i = i + 1
}
members = c(members, bike1$Member.type[x])
}
bikenext$Member[i] = sum(members == 'Member', na.rm = TRUE)
bikenext$Casual[i] = sum(members == 'Casual', na.rm = TRUE)
bikestart = bikenext
iterative build:
#load all of the data
a = 'bike project v2/extracted/2013Q2-capitalbikeshare-tripdata.csv'
b = 'bike project v2/extracted/2013Q3-capitalbikeshare-tripdata.csv'
c = 'bike project v2/extracted/2013Q4-capitalbikeshare-tripdata.csv'
for (x in c(a, b, c)) {
bike1 = read.csv(x)
bike1 = bike1[,-c(3, 5, 7)]
bike1$Date = as.Date(bike1$Start.date, '%Y-%m-%d')
bike1$Hour = hour(bike1$Start.date)
bikenext = data.frame(index = 1, Date = unique(bike1$Date))
for (x in 1:length(bikenext$Date)) {
bikenext$index[x] = 273 + x
}
bikenext$Member = 1
bikenext$Casual = 1
date.old = bikenext$Date[1]
i = 1
members = NA
for (x in 1:length(bike1$Date))
{
date.new = bike1$Date[x]
if (date.new != date.old) {
date.old = date.new
bikenext$Member[i] = sum(members == 'Member', na.rm = TRUE)
bikenext$Casual[i] = sum(members == 'Casual', na.rm = TRUE)
members = NA
i = i + 1
}
members = c(members, bike1$Member.type[x])
}
bikenext$Member[i] = sum(members == 'Member', na.rm = TRUE)
bikenext$Casual[i] = sum(members == 'Casual', na.rm = TRUE)
bikestart = rbind(bikestart, bikenext)
}
Using code from: https://stackoverflow.com/questions/36502140/determine-season-from-date-using-lubridate-in-r
getSeason <- function(input.date) {
numeric.date <- 100 * month(input.date) + day(input.date)
## input Seasons upper limits in the form MMDD in the "break =" option:
cuts <- cut(numeric.date, breaks = c(0, 319, 0620, 0921, 1220, 1231))
levels(cuts) <- c("Winter", "Spring", "Summer", "Fall", "Winter")
return(cuts)
}
bikestart$yr = 2
bikestart$weekday = wday(bikestart$Date) - 1
bikestart$season = as.numeric(getSeason(bikestart$Date))
bikestart$mnth = month(bikestart$Date)
holidaylist = as.Date(
c(
'2013-01-01',
'2013-01-21',
'2013-2-18',
'2013-4-16',
'2013-5-27',
'2013-7-4',
'2013-9-2',
'2013-10-14',
'2013-11-11',
'2013-11-28',
'2013-12-25'
)
)
bikestart$holiday = 0
bikestart$holiday[which(bikestart$Date %in% holidaylist)] = 1
bikestart$workingday = 1
bikestart$workingday[which(bikestart$holiday == 1)] = 0
bikestart$workingday[which(bikestart$weekday == 6)] = 0
bikestart$workingday[which(bikestart$weekday == 0)] = 0
bikestart$cnt = bikestart$Casual + bikestart$Member
weatherdata = read.csv('bike project v2/weather/history_data.csv')
bikestart$temp = weatherdata$Temperature
bikestart$hum = weatherdata$Relative.Humidity
bikestart$windspeed = weatherdata$Wind.Speed
bikestart$weathersit = weatherdata$weathersit
write.csv(bikestart, 'bikestart.csv')