paddockpass/ML/udemy/fill_empty_data.py

# Data preprocessing
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Importing the dataset
dataset = pd.read_csv('Data.csv')
# Create the matrix of features (independant variables)
# [:, = lines -- all of them
# :-1] = colums -- all of them unless the last one
# X = (Country, Age, Salary)
X = dataset.iloc[:, :-1].values
# Create vector of linked variables
# [:, 3] = all values of the 3rd column
# Y = (Purchased)
X = dataset.iloc[:, 3].values

# Taking care of the missing data
from sklearn.model_selection import train_test_split

imputer = Imputer(missing_values = 'NaN', strategy = 'mean', axis = 0)