19 lines
580 B
R
19 lines
580 B
R
|
# Data preprocessing
|
||
|
|
||
|
# Import the dataset
|
||
|
dataset = read.csv("Data.csv")
|
||
|
|
||
|
# Taking care of the missing data
|
||
|
dataset$Age = ifelse(is.na(dataset$Age),
|
||
|
ave(dataset$Age, FUN = function(x) mean(x, na.rm = TRUE)),
|
||
|
dataset$Age)
|
||
|
|
||
|
dataset$Salary = ifelse(is.na(dataset$Salary),
|
||
|
ave(dataset$Salary, FUN = function(x) mean(x, na.rm = TRUE)),
|
||
|
dataset$Salary)
|
||
|
|
||
|
# Encoding categorical data
|
||
|
# ! c is a vector
|
||
|
dataset$Country = factor(dataset$Country, levels=c('France', 'Spain', 'Germany'), labels=c(1, 2, 3))
|
||
|
|
||
|
dataset$Purchased= factor(dataset$Purchased, levels=c('No', 'Yes'), labels=c(0, 1))
|