-
Notifications
You must be signed in to change notification settings - Fork 0
Sourcery refactored master branch #1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,25 +21,24 @@ def apicall(): | |
| except Exception as e: | ||
| raise e | ||
|
|
||
| clf = 'lm_model_v1.pk' | ||
|
|
||
| if test.empty: | ||
| return(bad_request()) | ||
| else: | ||
| #Load the saved model | ||
| print("Loading the model...") | ||
| loaded_model = None | ||
| with open('./models/'+clf,'rb') as f: | ||
| loaded_model = pickle.load(f) | ||
|
|
||
| print("The model has been loaded...doing predictions now...") | ||
| print() | ||
| predictions = loaded_model.predict(test) | ||
|
|
||
| prediction_series = pd.Series(predictions) | ||
| response = jsonify(prediction_series.to_json()) | ||
| response.status_code = 200 | ||
| return (response) | ||
| #Load the saved model | ||
| print("Loading the model...") | ||
| loaded_model = None | ||
| clf = 'lm_model_v1.pk' | ||
|
|
||
| with open(f'./models/{clf}', 'rb') as f: | ||
| loaded_model = pickle.load(f) | ||
|
|
||
| print("The model has been loaded...doing predictions now...") | ||
| print() | ||
| predictions = loaded_model.predict(test) | ||
|
|
||
| prediction_series = pd.Series(predictions) | ||
| response = jsonify(prediction_series.to_json()) | ||
| response.status_code = 200 | ||
| return (response) | ||
|
Comment on lines
-24
to
+41
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
|
||
| @app.errorhandler(400) | ||
| def bad_request(error=None): | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,7 +1,7 @@ | ||
| import numpy as np | ||
| import pandas as pd | ||
|
|
||
| import os | ||
| import os | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Lines
|
||
| import json | ||
| import io | ||
| import requests | ||
|
|
@@ -17,36 +17,36 @@ | |
|
|
||
| # Checks if the dataset is in the local '/data' folder | ||
| # If not present, pulls from Github repo, otherwise reads from the local folder | ||
| if not os.path.isdir(cwd+"/data") or data_filename not in os.listdir(cwd+"/data"): | ||
| if not os.path.isdir(f"{cwd}/data") or data_filename not in os.listdir( | ||
| f"{cwd}/data" | ||
| ): | ||
| url="https://raw.githubusercontent.com/tirthajyoti/Machine-Learning-with-Python/master/Datasets/USA_Housing.csv" | ||
| print("Downloading data from {} ".format(url)) | ||
| print(f"Downloading data from {url} ") | ||
| s=requests.get(url).content | ||
|
|
||
| df = pd.read_csv(io.StringIO(s.decode('utf-8'))) | ||
| print("Dataset is downloaded.") | ||
| # Save the data in local '/data' folder | ||
| if not os.path.isdir(cwd+"/data"): | ||
| os.makedirs(cwd+"/data") | ||
| if not os.path.isdir(f"{cwd}/data"): | ||
| os.makedirs(f"{cwd}/data") | ||
| df.to_csv("data/USA_housing.csv") | ||
| print() | ||
| else: | ||
| df = pd.read_csv("data/USA_housing.csv") | ||
| print("Dataset loaded from local directory") | ||
| print() | ||
|
|
||
| print() | ||
| # Make a list of data frame column names | ||
| l_column = list(df.columns) # Making a list out of column names | ||
| len_feature = len(l_column) # Length of column vector list | ||
|
|
||
| # Put all the numerical features in X and Price in y, | ||
| # Ignore Address which is string for linear regression | ||
| X = df[l_column[0:len_feature-2]] | ||
| X = df[l_column[:len_feature-2]] | ||
| y = df[l_column[len_feature-2]] | ||
|
|
||
| #print("Feature set size:",X.shape) | ||
| #print("Variable set size:",y.shape) | ||
| #print() | ||
| print("Features variables: ",l_column[0:len_feature-2]) | ||
| print("Features variables: ", l_column[:len_feature-2]) | ||
| print() | ||
|
|
||
| # Create X and y train and test splits in one command using a split ratio and a random seed | ||
|
|
@@ -83,16 +83,14 @@ | |
| if __name__ == '__main__': | ||
| filename = 'lm_model_v1.pk' | ||
| print("Now saving the model to a serialized format (pickle)...") | ||
| if not os.path.isdir(cwd+"/models"): | ||
| if not os.path.isdir(f"{cwd}/models"): | ||
| os.makedirs(cwd+"/models") | ||
| with open('models/'+filename, 'wb') as file: | ||
| pickle.dump(lm, file) | ||
| # Save some of the test data in a CSV | ||
| print("Saving test data to a file...") | ||
| print() | ||
| if os.path.isdir(cwd+"/data"): | ||
| X_test.to_csv("data/housing_test.csv") | ||
| else: | ||
| if not os.path.isdir(cwd + "/data"): | ||
| os.makedirs(cwd+"/data") | ||
| X_test.to_csv("data/housing_test.csv") | ||
| X_test.to_csv("data/housing_test.csv") | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -32,8 +32,7 @@ def generate_random_start(model, graph, seed_length=50,new_words=50,diversity=1, | |
| actual = generated[:] + seq[end_idx:end_idx + new_words] | ||
|
|
||
| # Keep adding new words | ||
| for i in range(new_words): | ||
|
|
||
| for _ in range(new_words): | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
This removes the following comments ( why? ): |
||
| # Make a prediction from the seed | ||
| preds = model.predict(np.array(seed).reshape(1, -1))[0].astype(np.float64) | ||
|
|
||
|
|
@@ -54,26 +53,17 @@ def generate_random_start(model, graph, seed_length=50,new_words=50,diversity=1, | |
| seed += [next_idx] | ||
| generated.append(next_idx) | ||
|
|
||
| # Showing generated and actual abstract | ||
| n = [] | ||
|
|
||
| for i in generated: | ||
| n.append(idx_word.get(i, '===')) | ||
|
|
||
| n = [idx_word.get(i, '===') for i in generated] | ||
| gen_list.append(n) | ||
|
|
||
| a = [] | ||
|
|
||
| for i in actual: | ||
| a.append(idx_word.get(i, '===')) | ||
|
|
||
| a = [idx_word.get(i, '===') for i in actual] | ||
| a = a[seed_length:] | ||
|
|
||
| gen_list = [gen[seed_length:seed_length + len(a)] for gen in gen_list] | ||
|
|
||
| if return_output: | ||
| return original_sequence, gen_list, a | ||
|
|
||
| # HTML formatting | ||
| seed_html = '' | ||
| seed_html = addContent(seed_html, header( | ||
|
|
@@ -87,16 +77,14 @@ def generate_random_start(model, graph, seed_length=50,new_words=50,diversity=1, | |
| a_html = '' | ||
| a_html = addContent(a_html, header('Actual', color='darkgreen')) | ||
| a_html = addContent(a_html, box(remove_spaces(' '.join(a)))) | ||
|
|
||
| st = "<div>" + seed_html + "</div><div>" + gen_html + "</div><div>" + a_html + "</div>" | ||
| #return f"<div>{seed_html}</div><div>{gen_html}</div><div>{a_html}</div>" | ||
| return st | ||
|
|
||
| return f"<div>{seed_html}</div><div>{gen_html}</div><div>{a_html}</div>" | ||
|
|
||
| def generate_from_seed(model, graph, seed,new_words=50, diversity=0.75): | ||
| """Generate output from a sequence""" | ||
|
|
||
| # Mapping of words to integers | ||
| word_idx = json.load(open('data/word-index.json')) | ||
| word_idx = json.load(open('data/word-index.json')) | ||
|
Comment on lines
-99
to
+87
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
| idx_word = {idx: word for word, idx in word_idx.items()} | ||
|
|
||
| # Original formated text | ||
|
|
@@ -133,33 +121,33 @@ def generate_from_seed(model, graph, seed,new_words=50, diversity=0.75): | |
| html = addContent(html, header( | ||
| 'Input Seed ', color='black', gen_text='Network Output')) | ||
| html = addContent(html, box(start, gen)) | ||
| st = "<div>"+html+"</div>" | ||
| return st | ||
| return f"<div>{html}</div>" | ||
|
|
||
|
|
||
| def header(text, color='black', gen_text=None): | ||
| """Create an HTML header""" | ||
|
|
||
| if gen_text: | ||
| raw_html = '<h1 style="margin-top:16px;color: {color};font-size:54px"><center>' + str( | ||
| text) + '<span style="color: red">' + str(gen_text) + '</center></h1>' | ||
| else: | ||
| raw_html = '<h1 style="margin-top:12px;color: {color};font-size:54px"><center>' + str( | ||
| text) + '</center></h1>' | ||
| return raw_html | ||
| return ( | ||
| '<h1 style="margin-top:16px;color: {color};font-size:54px"><center>' | ||
| + str(text) | ||
| + '<span style="color: red">' | ||
| + str(gen_text) | ||
| + '</center></h1>' | ||
| if gen_text | ||
| else '<h1 style="margin-top:12px;color: {color};font-size:54px"><center>' | ||
| + str(text) | ||
| + '</center></h1>' | ||
| ) | ||
|
Comment on lines
-143
to
+140
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
|
||
|
|
||
| def box(text, gen_text=None): | ||
| """Create an HTML box of text""" | ||
|
|
||
| if gen_text: | ||
| raw_html = '<div style="padding:8px;font-size:28px;margin-top:28px;margin-bottom:14px;">' + str( | ||
| text) + '<span style="color: red">' + str(gen_text) + '</div>' | ||
|
|
||
| else: | ||
| raw_html = '<div style="border-bottom:1px inset black;border-top:1px inset black;padding:8px;font-size: 28px;">' + str( | ||
| text) + '</div>' | ||
| return raw_html | ||
| return ( | ||
| f'<div style="padding:8px;font-size:28px;margin-top:28px;margin-bottom:14px;">{str(text)}<span style="color: red">{str(gen_text)}</div>' | ||
| if gen_text | ||
| else f'<div style="border-bottom:1px inset black;border-top:1px inset black;padding:8px;font-size: 28px;">{str(text)}</div>' | ||
| ) | ||
|
Comment on lines
-155
to
+150
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
|
||
|
|
||
| def addContent(old_html, raw_html): | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -11,7 +11,11 @@ | |
| def make_data(): | ||
| X,y = make_regression(n_samples=NUM_SAMPLES,n_features=NUM_FEATURES, | ||
| n_informative=NUM_FEATURES,noise=0.5) | ||
| data = pd.DataFrame(X,columns=['X'+str(i) for i in range(1,NUM_FEATURES+1)],dtype=np.float16) | ||
| data = pd.DataFrame( | ||
| X, | ||
| columns=[f'X{str(i)}' for i in range(1, NUM_FEATURES + 1)], | ||
| dtype=np.float16, | ||
| ) | ||
|
Comment on lines
-14
to
+18
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
| data['y']=np.array(y,dtype=np.float16) | ||
| return data | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -11,7 +11,11 @@ | |
| def make_data(): | ||
| X,y = make_regression(n_samples=NUM_SAMPLES,n_features=NUM_FEATURES, | ||
| n_informative=NUM_FEATURES,noise=0.5) | ||
| data = pd.DataFrame(X,columns=['X'+str(i) for i in range(1,NUM_FEATURES+1)],dtype=np.float16) | ||
| data = pd.DataFrame( | ||
| X, | ||
| columns=[f'X{str(i)}' for i in range(1, NUM_FEATURES + 1)], | ||
| dtype=np.float16, | ||
| ) | ||
|
Comment on lines
-14
to
+18
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
| data['y']=np.array(y,dtype=np.float16) | ||
| return data | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -101,7 +101,6 @@ def summary_metrics(self): | |
| if not self.is_fitted: | ||
| print("Model not fitted yet!") | ||
| return None | ||
| metrics = {} | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
| items = ( | ||
| ("sse", self.sse()), | ||
| ("sst", self.sst()), | ||
|
|
@@ -111,9 +110,7 @@ def summary_metrics(self): | |
| ("AIC:", self.aic()), | ||
| ("BIC:", self.bic()), | ||
| ) | ||
| for item in items: | ||
| metrics[item[0]] = item[1] | ||
| return metrics | ||
| return {item[0]: item[1] for item in items} | ||
|
|
||
|
|
||
| class Inference: | ||
|
|
@@ -208,10 +205,7 @@ def fitted_vs_features(self): | |
| print("Model not fitted yet!") | ||
| return None | ||
| num_plots = self.features_.shape[1] | ||
| if num_plots % 3 == 0: | ||
| nrows = int(num_plots / 3) | ||
| else: | ||
| nrows = int(num_plots / 3) + 1 | ||
| nrows = int(num_plots / 3) if num_plots % 3 == 0 else int(num_plots / 3) + 1 | ||
|
Comment on lines
-211
to
+208
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
| ncols = 3 | ||
| fig, ax = plt.subplots(nrows, ncols, figsize=(15, nrows * 3.5)) | ||
| axes = ax.ravel() | ||
|
|
@@ -226,7 +220,7 @@ def fitted_vs_features(self): | |
| alpha=0.8, | ||
| ) | ||
| axes[i].grid(True) | ||
| axes[i].set_xlabel("Feature X[{}]".format(i)) | ||
| axes[i].set_xlabel(f"Feature X[{i}]") | ||
| axes[i].set_ylabel("Residuals") | ||
| axes[i].hlines( | ||
| y=0, | ||
|
|
@@ -412,7 +406,7 @@ def vif(self): | |
| lm = sm.OLS(self.target_, sm.add_constant(self.features_)).fit() | ||
| for i in range(self.features_.shape[1]): | ||
| v = vif(np.matrix(self.features_), i) | ||
| print("Variance inflation factor for feature {}: {}".format(i, round(v, 2))) | ||
| print(f"Variance inflation factor for feature {i}: {round(v, 2)}") | ||
|
Comment on lines
-415
to
+409
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
|
||
|
|
||
| class MyLinearRegression( | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -31,14 +31,14 @@ def train_linear_model(X,y, | |
| # Fit | ||
| model.fit(X_train, y_train) | ||
| # Save | ||
| fname = filename+'.sav' | ||
| fname = f'{filename}.sav' | ||
| dump(model, fname) | ||
| # Compute scores | ||
| r2_train = model.score(X_train,y_train) | ||
| r2_test = model.score(X_test,y_test) | ||
| # Return scores in a dictionary | ||
| return {'Train-score':r2_train, 'Test-score': r2_test} | ||
|
|
||
|
Comment on lines
-34
to
+41
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
| except AssertionError as msg: | ||
| print(msg) | ||
| return msg | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Lines
30-36refactored with the following changes:use-fstring-for-formatting)