|
| 1 | +#!/usr/bin/env python |
| 2 | + |
| 3 | +import numpy as np |
| 4 | +import pandas as pd |
| 5 | +from pandas import Series, DataFrame |
| 6 | +import matplotlib.pyplot as plt |
| 7 | + |
| 8 | + |
| 9 | +## Helper functions |
| 10 | +colName = ['ones', 'size', 'rooms'] |
| 11 | +def featureNormalize(X): |
| 12 | + |
| 13 | + # Goodness, this is easy with pandas. |
| 14 | + X_norm = (X - X.mean())/X.std() |
| 15 | + mu = X.mean() |
| 16 | + sigma = X.std() |
| 17 | + return X_norm, mu, sigma |
| 18 | + |
| 19 | +def predictValues(X, theta): |
| 20 | + predictedValues = X.apply(lambda x: |
| 21 | + (x[colName[0]]*theta[0] + |
| 22 | + x[colName[1]]*theta[1] + |
| 23 | + x[colName[2]]*theta[2]), |
| 24 | + axis=1) |
| 25 | + return predictedValues |
| 26 | + |
| 27 | +def computeCostMulti(X, y, theta): |
| 28 | + m = len(y) |
| 29 | + predictedValues = predictValues(X, theta) |
| 30 | + sumOfSquareErrors = (predictedValues - y['price']).apply(np.square).sum() |
| 31 | + cost = sumOfSquareErrors / (2*m) |
| 32 | + |
| 33 | + return cost |
| 34 | + |
| 35 | +def gradientDescentMulti(X, y, theta, alpha, num_iters, verbose=False): |
| 36 | + m = len(y) # number of training examples |
| 37 | + costHistory = [] |
| 38 | + |
| 39 | + if verbose: |
| 40 | + print 'theta input ', theta |
| 41 | + print 'initial cost %e' % computeCostMulti(X, y, theta) |
| 42 | + |
| 43 | + colName = ['ones', 'size', 'rooms'] |
| 44 | + for i in xrange(num_iters): |
| 45 | + predictedValues = predictValues(X, theta) |
| 46 | + for i in [0,1,2]: |
| 47 | + theta[i] = theta[i] - alpha / m * ((predictedValues - y['price']) * X[colName[i]]).sum() |
| 48 | + |
| 49 | + cost = computeCostMulti(X, y, theta) |
| 50 | + costHistory.append(cost) |
| 51 | + |
| 52 | + if verbose: |
| 53 | + print ' %04i theta' % i, theta |
| 54 | + print ' %04i cost %e' % (i, cost) |
| 55 | + |
| 56 | + return theta, Series(costHistory) |
| 57 | + |
| 58 | +def normalEqn(X, y): |
| 59 | + X = X[[colName[0], colName[1], colName[2]]] |
| 60 | + |
| 61 | + xtx = np.dot(X.transpose(), X) |
| 62 | + pinv = np.linalg.pinv(xtx) |
| 63 | + theta = np.dot(pinv,np.dot(X.transpose(),y)) |
| 64 | + theta = theta.flatten() |
| 65 | + |
| 66 | + return theta |
| 67 | + |
| 68 | +if __name__ == '__main__': |
| 69 | + |
| 70 | + ## Initialization |
| 71 | + |
| 72 | + ## ================ Part 1: Feature Normalization ================ |
| 73 | + |
| 74 | + ## Clear and Close Figures |
| 75 | + plt.close('all') |
| 76 | + |
| 77 | + print 'Loading data ...' |
| 78 | + |
| 79 | + ## Load Data |
| 80 | + data = pd.read_csv('ex1data2.txt', header=None, names=['size', 'rooms', 'price']) |
| 81 | + X = data[['size', 'rooms']] |
| 82 | + y = data[['price']] |
| 83 | + m = len(y) |
| 84 | + |
| 85 | + # Print out some data points |
| 86 | + print 'First 10 examples from the dataset:' |
| 87 | + print data.head(10) |
| 88 | + |
| 89 | + # Scale features and set them to zero mean |
| 90 | + print 'Normalizing Features ...' |
| 91 | + |
| 92 | + X, mu, sigma = featureNormalize(X) |
| 93 | + |
| 94 | + # Add intercept term to X |
| 95 | + X['ones'] = np.ones(m) |
| 96 | + |
| 97 | + |
| 98 | + ## ================ Part 2: Gradient Descent ================ |
| 99 | + |
| 100 | + print 'Running gradient descent ...' |
| 101 | + |
| 102 | + # Choose some alpha value |
| 103 | + alpha = 0.01 |
| 104 | + num_iters = 1000 |
| 105 | + |
| 106 | + # Init Theta and Run Gradient Descent |
| 107 | + theta_grad = np.zeros(3) |
| 108 | + theta_grad, costHistory = gradientDescentMulti(X, y, theta_grad, alpha, num_iters, verbose=False) |
| 109 | + |
| 110 | + # Plot the convergence graph |
| 111 | + print 'Theta found by gradient descent, %04i iter:' % num_iters |
| 112 | + print theta_grad |
| 113 | + f1 = plt.figure() |
| 114 | + p1 = costHistory.plot() |
| 115 | + p1.axes.set_title('Evolution of cost') |
| 116 | + p1.axes.yaxis.label.set_text('Cost function') |
| 117 | + p1.axes.xaxis.label.set_text('Iteration') |
| 118 | + f1.show() |
| 119 | + |
| 120 | + # Estimate the price of a 1650 sq-ft, 3 br house |
| 121 | + X1 = DataFrame({'ones':[1], |
| 122 | + 'size':[(1650-mu['size'])/sigma['size']], |
| 123 | + 'rooms':[(3-mu['rooms'])/sigma['rooms']]}) |
| 124 | + X1 = X1[colName] |
| 125 | + price_grad = predictValues(X1, theta_grad)[0] |
| 126 | + |
| 127 | + print 'Predicted price of a 1650 sq-ft, 3 br house '\ |
| 128 | + '(using gradient descent): \n$%.2f\n' % price_grad |
| 129 | + |
| 130 | + ## ================ Part 3: Normal Equations ================ |
| 131 | + |
| 132 | + print 'Solving with normal equations...' |
| 133 | + |
| 134 | + # Calculate the parameters from the normal equation |
| 135 | + theta_norm = normalEqn(X, y) |
| 136 | + |
| 137 | + # Display normal equation's result |
| 138 | + print 'Theta computed from the normal equations:' |
| 139 | + print theta_norm |
| 140 | + |
| 141 | + # Estimate the price of a 1650 sq-ft, 3 br house |
| 142 | + price_norm = np.dot(X1, theta_norm) |
| 143 | + |
| 144 | + print 'Predicted price of a 1650 sq-ft, 3 br house '\ |
| 145 | + '(using normal equations): \n$%.2f\n' % price_norm |
| 146 | + |
| 147 | + print 'Fractional difference: %.2f%%' % ( (price_norm - price_grad)/((price_grad+price_norm)/2)*100 ) |
0 commit comments