######################################################################################################################## # NEURAL NETWORKS ######################################################################################################################## import tensorflow as tf import matplotlib.pyplot as plt import requests import numpy as np sess = tf.Session() housing_url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data' housing_header = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV'] cols_used = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT'] num_features = len(cols_used) housing_file = requests.get(housing_url) housing_data = [[float(x) for x in y.split(' ') if len(x) >= 1] for y in housing_file.text.split('\n') if len(y) >= 1] x_vals = np.array([[x for i, x in enumerate(y) if housing_header[i] in cols_used] for y in housing_data]) REGRESSION = 0 # REGRESSION: The last column is the "PRICE" and we consider the value we want to regress if REGRESSION: y_vals = np.transpose([np.array([y[13] for y in housing_data])]) else: # CLASSIFICATION: y_vals = np.transpose([np.array([1 if y[13] > 25 else 0 for y in housing_data])]) SIZE_X = x_vals.shape # repeatability seed = 3 tf.set_random_seed(seed) np.random.seed(seed) batch_size = 100 # learning/test split train_indices = np.random.choice(len(x_vals), round(len(x_vals)*0.8), replace=False) test_indices = np.array(list(set(range(len(x_vals))) - set(train_indices))) x_vals_train = x_vals[train_indices] x_vals_test = x_vals[test_indices] y_vals_train = y_vals[train_indices] y_vals_test = y_vals[test_indices] def normalize_cols(m): col_max = m.max(axis=0) col_min = m.min(axis=0) return (m - col_min) / (col_max - col_min) x_vals_train = np.nan_to_num(normalize_cols(x_vals_train)) x_vals_test = np.nan_to_num(normalize_cols(x_vals_test)) ######################################################################################################################## # TENSORFLOW ######################################################################################################################## def init_weight(shape, st_dev): weight = tf.Variable(tf.random_normal(shape, stddev=st_dev)) return weight def init_bias(shape, st_dev): bias = tf.Variable(tf.random_normal(shape, stddev=st_dev)) return bias def fully_connected_layer(input_layer, weights, biases): layer = tf.add(tf.matmul(input_layer, weights), biases) return tf.nn.sigmoid(layer) # create placeholders x_data = tf.placeholder(shape=[None, SIZE_X[1]], dtype=tf.float32) y_target = tf.placeholder(shape=[None, 1], dtype=tf.float32) ######################### # create model TOPOLOGY = [25, 10, 3] weight_1 = init_weight(shape=[SIZE_X[1], TOPOLOGY[0]], st_dev=10.0) bias_1 = init_bias(shape=[TOPOLOGY[0]], st_dev=10.0) layer_1 = fully_connected_layer(x_data, weight_1, bias_1) weight_2 = init_weight(shape=[TOPOLOGY[0], TOPOLOGY[1]], st_dev=10.0) bias_2 = init_bias(shape=[TOPOLOGY[1]], st_dev=10.0) layer_2 = fully_connected_layer(layer_1, weight_2, bias_2) weight_3 = init_weight(shape=[TOPOLOGY[1], TOPOLOGY[2]], st_dev=10.0) bias_3 = init_bias(shape=[TOPOLOGY[2]], st_dev=10.0) layer_3 = fully_connected_layer(layer_2, weight_3, bias_3) weight_4 = init_weight(shape=[TOPOLOGY[2], 1], st_dev=10.0) bias_4 = init_bias(shape=[1], st_dev=10.0) final_output = fully_connected_layer(layer_3, weight_4, bias_4) loss = tf.reduce_mean(tf.abs(y_target - final_output)) my_opt = tf.train.AdamOptimizer(0.01) train_step = my_opt.minimize(loss) init = tf.global_variables_initializer() sess.run(init) ######################################################################################################################## # MAIN() ######################################################################################################################## learn_loss = [] test_loss = [] for i in range(2000): rand_index = np.random.choice(len(x_vals_train), size=batch_size) rand_x = x_vals_train[rand_index] rand_y = y_vals_train[rand_index] sess.run(train_step, feed_dict={x_data: rand_x, y_target: rand_y}) temp_loss = sess.run(loss, feed_dict={x_data: rand_x, y_target: rand_y}) learn_loss.append(temp_loss) temp_loss = sess.run(loss, feed_dict={x_data: x_vals_test, y_target: y_vals_test}) test_loss.append(temp_loss) if (i+1) % 25 == 0: print('Generation: ' + str(i+1) + '. Loss = ' + str(temp_loss)) ########################### # Test test_preds = [x[0] for x in sess.run(final_output, feed_dict={x_data: x_vals_test})] train_preds = [x[0] for x in sess.run(final_output, feed_dict={x_data: x_vals_train})] test_preds=np.squeeze(np.asarray(test_preds)) train_preds=np.squeeze(np.asarray(train_preds)) resid_test=np.subtract(test_preds, np.squeeze(y_vals_test)) resid_train=np.subtract(train_preds, np.squeeze(y_vals_train)) plt.figure(1) plt.plot(learn_loss, 'k-', label='Train Loss') plt.plot(test_loss, 'r--', label='Test Loss') plt.title('Loss per Generation') plt.figure(2) plt.hist(resid_test, bins=30, label='Test') plt.legend(loc='upper right') plt.figure(3) plt.hist(resid_train, bins=30, label='Learn') plt.legend(loc='upper right') plt.show()