ML: simple linear regression
This commit is contained in:
parent
95c3cb4e89
commit
897831b91b
3
ML/udemy/.vscode/settings.json
vendored
3
ML/udemy/.vscode/settings.json
vendored
|
@ -1,3 +1,4 @@
|
|||
{
|
||||
"python.pythonPath": "/home/chris/Projects/_LAB/training/ML/udemy/.env/bin/python3.7"
|
||||
"python.pythonPath": "/home/chris/Projects/_LAB/training/ML/udemy/.env/bin/python3.7",
|
||||
"python.linting.enabled": true
|
||||
}
|
16
ML/udemy/1/data_preprocessing_template.py
Normal file
16
ML/udemy/1/data_preprocessing_template.py
Normal file
|
@ -0,0 +1,16 @@
|
|||
# Data preprocessing
|
||||
import pandas as pd
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
# Importing the dataset
|
||||
dataset = pd.read_csv('Data.csv')
|
||||
X = dataset.iloc[:, :-1].values
|
||||
y = dataset.iloc[:, 3].values
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(
|
||||
X, y, test_size=0.2, random_state=0)
|
||||
|
||||
# feature scaling
|
||||
# sc_X = StandardScaler()
|
||||
# X_train = sc_X.fit_transform(X_train)
|
||||
# X_test = sc_X.fit_transform(X_test)
|
31
ML/udemy/2/Salary_Data.csv
Normal file
31
ML/udemy/2/Salary_Data.csv
Normal file
|
@ -0,0 +1,31 @@
|
|||
YearsExperience,Salary
|
||||
1.1,39343.00
|
||||
1.3,46205.00
|
||||
1.5,37731.00
|
||||
2.0,43525.00
|
||||
2.2,39891.00
|
||||
2.9,56642.00
|
||||
3.0,60150.00
|
||||
3.2,54445.00
|
||||
3.2,64445.00
|
||||
3.7,57189.00
|
||||
3.9,63218.00
|
||||
4.0,55794.00
|
||||
4.0,56957.00
|
||||
4.1,57081.00
|
||||
4.5,61111.00
|
||||
4.9,67938.00
|
||||
5.1,66029.00
|
||||
5.3,83088.00
|
||||
5.9,81363.00
|
||||
6.0,93940.00
|
||||
6.8,91738.00
|
||||
7.1,98273.00
|
||||
7.9,101302.00
|
||||
8.2,113812.00
|
||||
8.7,109431.00
|
||||
9.0,105582.00
|
||||
9.5,116969.00
|
||||
9.6,112635.00
|
||||
10.3,122391.00
|
||||
10.5,121872.00
|
|
33
ML/udemy/2/simple_linear_reg.py
Normal file
33
ML/udemy/2/simple_linear_reg.py
Normal file
|
@ -0,0 +1,33 @@
|
|||
# Data preprocessing
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.linear_model import LinearRegression
|
||||
|
||||
# Importing the dataset
|
||||
dataset = pd.read_csv('Salary_Data.csv')
|
||||
X = dataset.iloc[:, :-1].values
|
||||
y = dataset.iloc[:, 1].values
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(
|
||||
X, y, test_size=0.2, random_state=0)
|
||||
|
||||
# Fitting Simple Linear Regreesion to the Training set
|
||||
regressor = LinearRegression()
|
||||
regressor.fit(X_train, y_train)
|
||||
|
||||
# Prediction the Test set result
|
||||
# y_pred contains the predicted salary from the test sample, y_test is the actual salary.
|
||||
y_pred = regressor.predict(X_test)
|
||||
|
||||
# Visualize the data
|
||||
# data use to train the regression
|
||||
plt.scatter(X_train, y_train, color='red')
|
||||
# actual data we compared with our trained regression
|
||||
plt.scatter(X_test, y_test, color='green')
|
||||
plt.plot(X_train, regressor.predict(X_train), color='blue')
|
||||
plt.title('Salary vs experience (training set)')
|
||||
plt.xlabel('years of experience')
|
||||
plt.ylabel('salary')
|
||||
plt.show()
|
|
@ -2,4 +2,14 @@
|
|||
* you need to split the training set and a test set to balance the machine learning (you train on the test set and test those assumptions on the test set)
|
||||
? what is categorical data, why whould you use it?
|
||||
|
||||
* feature scaling: put all values on the same scale so the larger number do not destroy other numbers => standardisation ou normalisation
|
||||
* feature scaling: put all values on the same scale so the larger number do not destroy other numbers => standardisation ou normalisation
|
||||
|
||||
# Linear regression
|
||||
|
||||
Formula: `y = b0 + b1*x1`
|
||||
|
||||
y is the dependent variable. What is the value that changes in our model, the part that we want to understand from the work. How is this value changing.
|
||||
|
||||
x is the independant variable, the one that has an implied association with y.
|
||||
|
||||
b1 is the coef for the independant variable, how a unit change in x1.
|
|
@ -1,4 +1,5 @@
|
|||
# Data preprocessing
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
|
|
Loading…
Reference in a new issue