import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
import glob


dates_trn = pd.date_range(start = "2013-06-01", end = "2020-05-01", freq='MS')
df_training = pd.DataFrame({'dates':dates_trn})
df_training.head()


dates_test = pd.date_range(start = "2020-06-01", end = "2022-02-01", freq='MS')
df_test = pd.DataFrame({'dates':dates_test})
df_test.head()


path_trn = "data_bikes/training/"
all_files = glob.glob(path_trn + "*.csv")
all_files.sort()

num_trips = []
for filename in all_files:
    df_01 = pd.read_csv(filename)
    df_02 = len(df_01)
    num_trips.append(df_02)

df_training["trips"] = num_trips  
df_training


path_tst = "data_bikes/test/"
all_files_02 = glob.glob(path_tst + "*.csv")
all_files_02.sort()

num_trips_tst = []
for filename_tst in all_files_02:
    df_tst_01 = pd.read_csv(filename_tst)
    df_tst_02 = len(df_tst_01)
    num_trips_tst.append(df_tst_02)

df_test["trips"] = num_trips_tst  
df_test


rain_raw = pd.read_csv("weather_data/monthly_rainfall_average_nyc.csv")
rain = rain_raw.transpose()
rain.columns = rain.iloc[0]
rain = rain.reset_index(drop=True)
rain = rain.drop(0)

temp_raw = pd.read_csv("weather_data/monthly_average_temp_nyc.csv")
temp = temp_raw.transpose()
temp.columns = temp.iloc[0]
temp = temp.reset_index(drop=True)
temp = temp.drop(0)
rain


#export inverted
rain.to_csv('/Users/alexherrera/Desktop/JupiterNotebook/Bicing_Prediction_WML/weather_data/rain_m.csv', index = False, header = True)
temp.to_csv('/Users/alexherrera/Desktop/JupiterNotebook/Bicing_Prediction_WML/weather_data/temp_m.csv', index = False, header = True)


rain_training = pd.read_csv('/Users/alexherrera/Desktop/JupiterNotebook/Bicing_Prediction_WML/weather_data/rain_training.csv')
rain_test = pd.read_csv('/Users/alexherrera/Desktop/JupiterNotebook/Bicing_Prediction_WML/weather_data/rain_test.csv')
temp_training = pd.read_csv('/Users/alexherrera/Desktop/JupiterNotebook/Bicing_Prediction_WML/weather_data/temp_traing.csv')
temp_test = pd.read_csv('/Users/alexherrera/Desktop/JupiterNotebook/Bicing_Prediction_WML/weather_data/temp_test.csv')

df_training['Rain'] = rain_training['month_average_training']
df_training['Temperature'] = temp_training['monthly_av_training']
df_test['Rain'] = rain_test['month_average_test']
df_test['Temperature'] = temp_test['monthly_av_test']

df_training.to_csv('/Users/alexherrera/Desktop/JupiterNotebook/Bicing_Prediction_WML/training.csv', index = False, header = True)
df_test.to_csv('/Users/alexherrera/Desktop/JupiterNotebook/Bicing_Prediction_WML/test.csv', index = False, header = True)

	dates
0	2013-06-01
1	2013-07-01
2	2013-08-01
3	2013-09-01
4	2013-10-01

	dates
0	2020-06-01
1	2020-07-01
2	2020-08-01
3	2020-09-01
4	2020-10-01

	dates	trips
0	2013-06-01	577703
1	2013-07-01	843416
2	2013-08-01	1001958
3	2013-09-01	1034359
4	2013-10-01	1037712
...	...	...
79	2020-01-01	1240596
80	2020-02-01	1146830
81	2020-03-01	1068457
82	2020-04-01	682762
83	2020-05-01	1487890

	dates	trips
0	2020-06-01	1882273
1	2020-07-01	2105808
2	2020-08-01	2329514
3	2020-09-01	2488225
4	2020-10-01	2248869
5	2020-11-01	1736704
6	2020-12-01	1088929
7	2021-01-01	1095346
8	2021-02-01	649983
9	2021-03-01	1531094
10	2021-04-01	2067669
11	2021-05-01	2724165
12	2021-06-01	3177517
13	2021-07-01	3084537
14	2021-08-01	3072478
15	2021-09-01	3280221
16	2021-10-01	3069239
17	2021-11-01	2159283
18	2021-12-01	1748287
19	2022-01-01	1052349
20	2022-02-01	1233714

Year	2013.0	2014.0	2015.0	2016.0	2017.0	2018.0	2019.0	2020.0	2021.0	2022.0
1	2.76	2.79	5.23	4.41	4.83	2.18	3.58	1.93	2.31	4.29
2	4.25	5.48	2.04	4.40	2.48	5.83	3.14	2.54	5.13	3.23
3	2.90	3.67	4.72	1.17	5.25	5.17	3.87	3.78	3.41	0.00
4	1.31	7.85	2.08	1.61	3.84	5.78	4.55	4.49	2.69	0.00
5	8.00	4.37	1.86	3.75	6.38	3.53	6.82	1.65	4.36	0.00
6	10.10	4.26	4.79	2.60	4.76	3.11	5.46	1.76	2.62	0.00
7	2.84	5.59	3.98	7.02	4.19	7.45	5.77	6.58	11.09	0.00
8	2.85	2.25	2.35	1.97	3.34	8.59	3.70	5.03	10.32	0.00
9	2.95	1.21	3.28	2.79	2.00	6.19	0.95	3.94	10.03	0.00
10	0.36	5.77	3.91	4.15	4.18	3.59	6.15	5.05	5.26	0.00
11	3.15	4.51	2.01	5.41	1.58	7.62	1.95	3.99	1.12	0.00
12	4.85	6.04	4.72	2.89	2.21	6.51	7.09	4.61	1.39	0.00

Creation of the Training and Test Data Sets (WML Project)¶