paddockpass/ML/udemy/1/preprocess_data.R
2019-10-18 19:26:43 +02:00

20 lines
581 B
R

# Data preprocessing
# Import the dataset
dataset = read.csv("Data.csv")
# Taking care of the missing data
dataset$Age = ifelse(is.na(dataset$Age),
ave(dataset$Age, FUN = function(x) mean(x, na.rm = TRUE)),
dataset$Age)
dataset$Salary = ifelse(is.na(dataset$Salary),
ave(dataset$Salary, FUN = function(x) mean(x, na.rm = TRUE)),
dataset$Salary)
# Encoding categorical data
# ! c is a vector
dataset$Country = factor(dataset$Country, levels=c('France', 'Spain', 'Germany'), labels=c(1, 2, 3))
dataset$Purchased= factor(dataset$Purchased, levels=c('No', 'Yes'), labels=c(0, 1))