From 0679a1656f0ce4e0687743d56300ac28c2a89568 Mon Sep 17 00:00:00 2001 From: christalib Date: Mon, 16 Sep 2019 08:45:51 +0200 Subject: [PATCH] update --- ML/udemy/.vscode/settings.json | 3 +++ ML/udemy/NOTES.md | 3 +++ ML/udemy/fill_empty_data.py | 21 --------------------- ML/udemy/preprocess_data.R | 19 +++++++++++++++++++ ML/udemy/preprocess_data.py | 32 ++++++++++++++++++++++++++++++++ react/7/pics/src/index.js | 1 + 6 files changed, 58 insertions(+), 21 deletions(-) create mode 100644 ML/udemy/.vscode/settings.json create mode 100644 ML/udemy/NOTES.md delete mode 100644 ML/udemy/fill_empty_data.py create mode 100644 ML/udemy/preprocess_data.R create mode 100644 ML/udemy/preprocess_data.py diff --git a/ML/udemy/.vscode/settings.json b/ML/udemy/.vscode/settings.json new file mode 100644 index 0000000..0de2132 --- /dev/null +++ b/ML/udemy/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "python.pythonPath": "/home/chris/Projects/_LAB/training/ML/udemy/.env/bin/python3.7" +} \ No newline at end of file diff --git a/ML/udemy/NOTES.md b/ML/udemy/NOTES.md new file mode 100644 index 0000000..3988695 --- /dev/null +++ b/ML/udemy/NOTES.md @@ -0,0 +1,3 @@ +# SECTION 2 +* you need to split the training set and a test set to balance the machine learning (you train on the test set and test those assumptions on the test set) +? what is categorical data, why whould you use it? \ No newline at end of file diff --git a/ML/udemy/fill_empty_data.py b/ML/udemy/fill_empty_data.py deleted file mode 100644 index 06c6970..0000000 --- a/ML/udemy/fill_empty_data.py +++ /dev/null @@ -1,21 +0,0 @@ -# Data preprocessing -import numpy as np -import matplotlib.pyplot as plt -import pandas as pd - -# Importing the dataset -dataset = pd.read_csv('Data.csv') -# Create the matrix of features (independant variables) -# [:, = lines -- all of them -# :-1] = colums -- all of them unless the last one -# X = (Country, Age, Salary) -X = dataset.iloc[:, :-1].values -# Create vector of linked variables -# [:, 3] = all values of the 3rd column -# Y = (Purchased) -X = dataset.iloc[:, 3].values - -# Taking care of the missing data -from sklearn.model_selection import train_test_split - -imputer = Imputer(missing_values = 'NaN', strategy = 'mean', axis = 0) diff --git a/ML/udemy/preprocess_data.R b/ML/udemy/preprocess_data.R new file mode 100644 index 0000000..eeee032 --- /dev/null +++ b/ML/udemy/preprocess_data.R @@ -0,0 +1,19 @@ +# Data preprocessing + +# Import the dataset +dataset = read.csv("Data.csv") + +# Taking care of the missing data +dataset$Age = ifelse(is.na(dataset$Age), +ave(dataset$Age, FUN = function(x) mean(x, na.rm = TRUE)), +dataset$Age) + +dataset$Salary = ifelse(is.na(dataset$Salary), +ave(dataset$Salary, FUN = function(x) mean(x, na.rm = TRUE)), +dataset$Salary) + +# Encoding categorical data +# ! c is a vector +dataset$Country = factor(dataset$Country, levels=c('France', 'Spain', 'Germany'), labels=c(1, 2, 3)) + +dataset$Purchased= factor(dataset$Purchased, levels=c('No', 'Yes'), labels=c(0, 1)) \ No newline at end of file diff --git a/ML/udemy/preprocess_data.py b/ML/udemy/preprocess_data.py new file mode 100644 index 0000000..8688c17 --- /dev/null +++ b/ML/udemy/preprocess_data.py @@ -0,0 +1,32 @@ +# Data preprocessing +from sklearn.preprocessing import Imputer, LabelEncoder, OneHotEncoder +from sklearn.model_selection import train_test_split +import numpy as np +import matplotlib.pyplot as plt +import pandas as pd + +# Importing the dataset +dataset = pd.read_csv('Data.csv') +# Create the matrix of features (independant variables) +# [:, = lines -- all of them +# :-1] = colums -- all of them unless the last one +# X = (Country, Age, Salary) +X = dataset.iloc[:, :-1].values +# Create vector of linked variables +# [:, 3] = all values of the 3rd column +# Y = (Purchased) +y = dataset.iloc[:, 3].values + +# Taking care of the missing data + +imputer = Imputer(missing_values='NaN', strategy='mean', axis=0) +imputer = imputer.fit(X[:, 1:3]) +X[:, 1:3] = imputer.transform(X[:, 1:3]) + +# Encoding categorical data +labelencoder_X = LabelEncoder() +X[:, 0] = labelencoder_X.fit_transform(X[:, 0]) +onehotencoder = OneHotEncoder() +X = onehotencoder.fit_transform(X).toarray() +labelencoder_y = LabelEncoder() +y = labelencoder_y.fit_transform(y) diff --git a/react/7/pics/src/index.js b/react/7/pics/src/index.js index e4bc450..88a12b3 100644 --- a/react/7/pics/src/index.js +++ b/react/7/pics/src/index.js @@ -6,3 +6,4 @@ ReactDOM.render( , document.querySelector('#root') ); +