Predicting CPI Performance using an Ensemble of Neural Networks

This project is dedicated to investigating the utility of an ensemble of recurrent neural networks used to predict fluctuations of the Consumer Price Index (CPI). Twenty technical analysis indicators are used as inputs to the network. An individual networks attempt to model a single scenario of the CPI's behavior. Models from each scenario were then averaged to produce a unique prediction. My results show that an average ensemble model consistently outperforms the prediction made by a single network. For an in depth explanation of this research, please click on this paper.
This paper explores the relationship between neural networks found in the brain, classical statistical physics, Ising type models, quantum potentials, and ensembles of recurrent neural networks. Also present in the report are several programs (out of many) that were used to clean the data and generate the desired predictions. Some of this same code can be accessed in the drop down below.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Nov 26 16:43:48 2021
@author: nickdorogy
"""
##Import statements
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
from sklearn.ensemble import RandomForestClassifier
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense
from matplotlib import pyplot
import numpy as np
from numpy import mean
from numpy import std
from numpy import array
from numpy import argmax
from numpy import tensordot
from numpy.linalg import norm
from itertools import product
##Initial data load-in and splitting data into X and y
##X is the raw data and y is the standaridzed Consumer Price Index
X = np.genfromtxt("/Users/nickdorogy/Desktop/school/GitHub/CapstoneCode-/Capstone/Standardized/!ALL_COMBINED/ALL_COMBINED_STANDARDIZED_DATA.csv", delimiter=',', skip_header=1, usecols=(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21))
print(X.shape)
y = np.genfromtxt("/Users/nickdorogy/Desktop/school/GitHub/CapstoneCode-/Capstone/Standardized/Consumer Price Index/STANDARDIZEDConsumerPriceIndex.csv", delimiter=',', skip_header=1)
##Splitting data into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=33) #select size of test data (and therefore size of train)
y_test = np.reshape(y_test, (40,1)) #reshaping so everything is matrix of same dimensions
##Fit model on dataset
def fit_model(X_train, y_train):
baseline_model = models.Sequential()
baseline_model.add(layers.Dense(26, activation='sigmoid')) #input
baseline_model.add(layers.Dense(1)) #output-- obviously want one output
#choose optimizer, loss, & metric
baseline_model.compile(optimizer='adam',
loss=tf.keras.losses.MeanSquaredError(reduction="auto", name="mean_squared_error"),
metrics=['RootMeanSquaredError'])
baseline_model.fit(X_train, y_train, epochs=50,batch_size=1) #Select number of epochs & batch size
return baseline_model
##Make an ensemble prediction
def ensemble_predictions(members, X_test):
#make predictions
y_predict = [model.predict(X_test) for model in members]
#reshaping so they are matrices of the same dimensions
y_predict = [np.reshape(pred, (40,1)) for pred in y_predict]
#averaging
result=np.sum(y_predict,axis=0)/len(members)
return result
##Evaluate accuracy of the model
def evaluate_members(members, X_test, y_test):
y_predict = ensemble_predictions(members, X_test)
accuracy = np.abs(y_predict - y_test)
return accuracy
##Fit all models
n_members = 500 # number of models to be averaged
members = [fit_model(X_train, y_train) for i in range(n_members)]
##Calculate & display error
ensemble_score = evaluate_members(members, X_test, y_test)
single_score = np.abs(np.reshape(members[0].predict(X_test), (40,1))-y_test)
print("----------------------------------")
print("Ensemble Error:", sum(ensemble_score))
print("First Network's Error:", sum(single_score))
##Summarize average accuracy of a single final model
print('Accuracy of single score || Mean: %.3f, Std Dev:, (%.3f) ||' % (mean(single_score), std(single_score)))
print('Accuracy of ensemble score || Mean: %.3f, Std Dev:, (%.3f) ||' % (mean(ensemble_score), std(ensemble_score)))
##Plot scores
plt.plot(np.abs(single_score), label="Single network error")
plt.plot(np.abs(ensemble_score), label="Ensemble of networks' error")
plt.plot(ensemble_score-single_score, label="Ensemble - single error")
plt.legend()
plt.xlabel('Time (Months)')
plt.ylabel('Error')
pyplot.show()