# linear regression using "mini-batch" gradient descent # function to compute hypothesis / predictions def hypothesis(X, theta): return np.dot(X, theta) # function to compute gradient of error function w.r.t. theta def gradient(X, y, theta): h = hypothesis(X, theta) grad = np.dot(X.transpose(), (h - y)) return grad # function to compute the error for current values of theta def cost(X, y, theta): h = hypothesis(X, theta) J = np.dot((h - y).transpose(), (h - y)) J /= 2 return J[0] # function to create a list containing mini-batches def create_mini_batches(X, y, batch_size): mini_batches = [] data = np.hstack((X, y)) np.random.shuffle(data) n_minibatches = data.shape[0] // batch_size i = 0 for i in range(n_minibatches + 1): mini_batch = data[i * batch_size:(i + 1)*batch_size, :] X_mini = mini_batch[:, :-1] Y_mini = mini_batch[:, -1].reshape((-1, 1)) mini_batches.append((X_mini, Y_mini)) if data.shape[0] % batch_size != 0: mini_batch = data[i * batch_size:data.shape[0]] X_mini = mini_batch[:, :-1] Y_mini = mini_batch[:, -1].reshape((-1, 1)) mini_batches.append((X_mini, Y_mini)) return mini_batches # function to perform mini-batch gradient descent def gradientDescent(X, y, learning_rate=0.001, batch_size=32): theta = np.zeros((X.shape[1], 1)) error_list = [] max_iters = 3 for itr in range(max_iters): mini_batches = create_mini_batches(X, y, batch_size) for mini_batch in mini_batches: X_mini, y_mini = mini_batch theta = theta - learning_rate * gradient(X_mini, y_mini, theta) error_list.append(cost(X_mini, y_mini, theta)) return theta, error_list