#Data Structures
#Creating a vector
#method 1:
x <- 1:5
#method 2:
x <- seq(1,5,0.5) #sequence with start : end : step
x <- seq(10)
(x <- seq(30,0, by= -3))
#method 3:
x <- rep(1:2,3) # repeated values for 3 times
x <- rep(1:2, each = 3) #repeated values 3
#method 4:
(x <- c(1,2,3,4)) #concatenate values
#Vector : A sequence of values of the same data type.
x <- c(2,4,6)
is.vector(x)
y <- c("a","b","c")
length(y)
#Implicit coercion: R forces all values to have the same data type
z <- c("a",2.2,FALSE)
print(z) # All variables are automatically stored as characters
#Explicit coercion: used to change data type
a <- c(1,2,5)
a <- as.character(a)
a <- as.numeric(a)
#If R cannot coerce the data : NA is produced
(x <- c(0.5,"a"))
as.numeric(x)
# 2- Matrix: a 2 dimensional vector
m1 <- matrix(c(T,T,T,F,F,F), nrow = 2)
m2 <- matrix(c(10,20,30,40), nrow = 2)
m3 <- matrix(nrow=2,ncol=3)
# Matrices can be created by binding vectors
a <- 1:3
b <- 4:6
(m <- cbind(a,b)) # column bind
(m <- rbind(a,b)) # row bind
# Accessing matrix elements
m2[1,1] #single element
m2[1,] #first row all columns
m2[,2] #all rows second column
m2[,-2]#all rows and columns except column 2
# 3- Array: a matrix with 2 or more dimensions.
a1 <- array(c( 1:24), c(3, 4, 2))
is.array(a1)
a1[1:2,,1] # slicing: 2 rows, all columns, first array
# 4- List: a vector that holds different data types
(x <- list(1.2,"a",TRUE))
is.list(x)
typeof(x)
# 5- Dataframe: stores vectors of multiple data types
df <- data.frame(student_id = c(1:3),
student_name = c("Ahmed", "Mohamed",
"Sara"),
gender= c("male","male","female"))
#Show a dataframe
View(df)
typeof(df$student_id)
#Dimensions of dataframe
dim(df)
#Column names
colnames(df)
#Add an empty column
df <- cbind(df, GPA = NA)
#Fill an empty column
df['GPA'] = c(2.5,3.0,3.5)
#Alternative method
df$GPA <- c(2.5,3.0,3.5)
#Extract a column using the $ sign
df$student_id
#Extract data
df[1,] #single row
# remove rows
x <- subset(df, gpa <3.5)
# remove column , - means remove
x <- subset(df, select= -student_id)
#6- Factor: A factor is a vector that can contain only predefined values, and is used to store categorical data.
df$gender <- factor(df$gender,levels = c("male", "female"))
levels(df$gender) #shows all classes
str(df$gender) # shows numerical value of each class
#Count the number of values in each class
table(df$gender)
#You cannot insert new data without selecting one of the classes
df[nrow(df)+1,] <- c(4,"x","other",2.5)
df[nrow(df)+1,] <- c(4,"Salma","female",2.5)
#Slicing a dataset
data(mtcars)
mtcars[1:5, 2] #first 5 samples , second column
mtcars[1:3, c(1,3)] #first 3 samples, first and third columns
mtcars[1, ] #first row, all columns
mtcars.sample <- mtcars[mtcars$vs == 0, ] #Slice with a logical condition
#Factor the am column
mtcars$am<-factor(mtcars$am, levels = c(0,1),labels = c('automatic','manual'))
str(mtcars$am)
#Select all automatic cars in the dataset
df<-mtcars[mtcars$am =='automatic', ]