def prophet_features(df, horizon=24*7):
temp_df = df.reset_index()
temp_df = temp_df[['datetime', 'count']]
temp_df.rename(columns={'datetime': 'ds', 'count': 'y'}, inplace=True)
# Using the data from the previous week as an example for validation
train, test = temp_df.iloc[:-horizon,:], temp_df.iloc[-horizon:,:]
# Define the Prophet model
m = Prophet(
growth='linear',
seasonality_mode='additive',
interval_width=0.95,
daily_seasonality=True,
weekly_seasonality=True,
yearly_seasonality=False
)
# Train the Prophet model
m.fit(train)
# Extract features from the data, using Prophet to predict the training set
predictions_train = m.predict(train.drop('y', axis=1))
# Use Prophet to extract features from the data to predict the test set
predictions_test = m.predict(test.drop('y', axis=1))
# Combine predictions from the training and test sets
predictions = pd.concat([predictions_train, predictions_test], axis=0)
return predictions
def train_time_series_with_folds_autoreg_prophet_features(df, horizon=24*7, lags=[1, 2, 3, 4, 5]):
# Create a dataframe containing all the new features created with Prophet
new_prophet_features = prophet_features(df, horizon=horizon)
df.reset_index(inplace=True)
# Merge the Prophet features dataframe with our initial dataframe
df = pd.merge(df, new_prophet_features, left_on=['datetime'], right_on=['ds'], how='inner')
df.drop('ds', axis=1, inplace=True)
df.set_index('datetime', inplace=True)
# Use Prophet predictions to create some lag variables (yhat column)
for lag in lags:
df[f'yhat_lag_{lag}'] = df['yhat'].shift(lag)
df.dropna(axis=0, how='any')
X = df.drop('count', axis=1)
y = df['count']
# Using the data from the previous week as an example for validation
X_train, X_test = X.iloc[:-horizon,:], X.iloc[-horizon:,:]
y_train, y_test = y.iloc[:-horizon], y.iloc[-horizon:]
# Define the LightGBM model, train, and make predictions
model = LGBMRegressor(random_state=42)
model.fit(X_train, y_train)
predictions = model.predict(X_test)
# Calculate MAE
mae = np.round(mean_absolute_error(y_test, predictions), 3)
# Plot the real vs prediction for the last week of the dataset
fig = plt.figure(figsize=(16,6))
plt.title(f'Real vs Prediction - MAE {mae}', fontsize=20)
plt.plot(y_test, color='red')
plt.plot(pd.Series(predictions, index=y_test.index), color='green')
plt.xlabel('Hour', fontsize=16)
plt.ylabel('Number of Shared Bikes', fontsize=16)
plt.legend(labels=['Real', 'Prediction'], fontsize=16)
plt.grid()
plt.show()
Comments