paddockpass/ML/udemy/preprocess_data.R

20 lines
581 B
R
Raw Normal View History

2019-09-16 08:45:51 +02:00
# Data preprocessing
# Import the dataset
dataset = read.csv("Data.csv")
# Taking care of the missing data
dataset$Age = ifelse(is.na(dataset$Age),
ave(dataset$Age, FUN = function(x) mean(x, na.rm = TRUE)),
dataset$Age)
dataset$Salary = ifelse(is.na(dataset$Salary),
ave(dataset$Salary, FUN = function(x) mean(x, na.rm = TRUE)),
dataset$Salary)
# Encoding categorical data
# ! c is a vector
dataset$Country = factor(dataset$Country, levels=c('France', 'Spain', 'Germany'), labels=c(1, 2, 3))
2019-09-29 12:39:38 +02:00
dataset$Purchased= factor(dataset$Purchased, levels=c('No', 'Yes'), labels=c(0, 1))