update

2019-09-16 08:45:51 +02:00 · 2019-09-16 08:45:51 +02:00 · 0679a1656f
parent 654a5ed2c0
commit 0679a1656f
6 changed files with 58 additions and 21 deletions
--- a/ML/udemy/.vscode/settings.json
+++ b/ML/udemy/.vscode/settings.json
@ -0,0 +1,3 @@
 {
    "python.pythonPath": "/home/chris/Projects/_LAB/training/ML/udemy/.env/bin/python3.7"
 }
--- a/ML/udemy/NOTES.md
+++ b/ML/udemy/NOTES.md
@ -0,0 +1,3 @@
 # SECTION 2
 * you need to split the training set and a test set to balance the machine learning (you train on the test set and test those assumptions on the test set)
 ? what is categorical data, why whould you use it?
--- a/ML/udemy/fill_empty_data.py
+++ b/ML/udemy/fill_empty_data.py
@ -1,21 +0,0 @@
 # Data preprocessing
 import numpy as np
 import matplotlib.pyplot as plt
 import pandas as pd 
 # Importing the dataset
 dataset = pd.read_csv('Data.csv')
 # Create the matrix of features (independant variables)
 # [:, = lines -- all of them
 # :-1] = colums -- all of them unless the last one 
 # X = (Country, Age, Salary)
 X = dataset.iloc[:, :-1].values
 # Create vector of linked variables
 # [:, 3] = all values of the 3rd column
 # Y = (Purchased)
 X = dataset.iloc[:, 3].values
 # Taking care of the missing data
 from sklearn.model_selection import train_test_split
 imputer = Imputer(missing_values = 'NaN', strategy = 'mean', axis = 0)
--- a/ML/udemy/preprocess_data.R
+++ b/ML/udemy/preprocess_data.R
@ -0,0 +1,19 @@
 # Data preprocessing
 # Import the dataset
 dataset = read.csv("Data.csv")
 # Taking care of the missing data
 dataset$Age = ifelse(is.na(dataset$Age),
 ave(dataset$Age, FUN = function(x) mean(x, na.rm = TRUE)),
 dataset$Age)
 dataset$Salary = ifelse(is.na(dataset$Salary),
 ave(dataset$Salary, FUN = function(x) mean(x, na.rm = TRUE)),
 dataset$Salary)
 # Encoding categorical data
 # ! c is a vector
 dataset$Country = factor(dataset$Country, levels=c('France', 'Spain', 'Germany'), labels=c(1, 2, 3))
 dataset$Purchased= factor(dataset$Purchased, levels=c('No', 'Yes'), labels=c(0, 1))
--- a/ML/udemy/preprocess_data.py
+++ b/ML/udemy/preprocess_data.py
@ -0,0 +1,32 @@
 # Data preprocessing
 from sklearn.preprocessing import Imputer, LabelEncoder, OneHotEncoder
 from sklearn.model_selection import train_test_split
 import numpy as np
 import matplotlib.pyplot as plt
 import pandas as pd
 # Importing the dataset
 dataset = pd.read_csv('Data.csv')
 # Create the matrix of features (independant variables)
 # [:, = lines -- all of them
 # :-1] = colums -- all of them unless the last one
 # X = (Country, Age, Salary)
 X = dataset.iloc[:, :-1].values
 # Create vector of linked variables
 # [:, 3] = all values of the 3rd column
 # Y = (Purchased)
 y = dataset.iloc[:, 3].values
 # Taking care of the missing data
 imputer = Imputer(missing_values='NaN', strategy='mean', axis=0)
 imputer = imputer.fit(X[:, 1:3])
 X[:, 1:3] = imputer.transform(X[:, 1:3])
 # Encoding categorical data
 labelencoder_X = LabelEncoder()
 X[:, 0] = labelencoder_X.fit_transform(X[:, 0])
 onehotencoder = OneHotEncoder()
 X = onehotencoder.fit_transform(X).toarray()
 labelencoder_y = LabelEncoder()
 y = labelencoder_y.fit_transform(y)
--- a/react/7/pics/src/index.js
+++ b/react/7/pics/src/index.js
@ -6,3 +6,4 @@ ReactDOM.render(
    <App />,
    document.querySelector('#root')
 );