#Data Structures 

#Creating a vector
#method 1:
x <- 1:5 

#method 2:
x <- seq(1,5,0.5) #sequence with start : end : step
x <- seq(10)
(x <- seq(30,0, by= -3)) 

#method 3:
x <- rep(1:2,3)  # repeated values for 3 times
x <- rep(1:2, each = 3) #repeated values 3 

#method 4:
(x <- c(1,2,3,4)) #concatenate values

#Vector : A sequence of values of the same data type.
x <- c(2,4,6)
is.vector(x)

y <- c("a","b","c")
length(y)

#Implicit coercion: R forces all values to have the same data type
z <- c("a",2.2,FALSE)
print(z) # All variables are automatically stored as characters

#Explicit coercion: used to change data type
a <- c(1,2,5)
a <- as.character(a)
a <- as.numeric(a)

#If R cannot coerce the data : NA is produced
(x <- c(0.5,"a"))
as.numeric(x)

# 2- Matrix: a 2 dimensional vector

m1 <- matrix(c(T,T,T,F,F,F), nrow = 2)
m2 <- matrix(c(10,20,30,40), nrow = 2)
m3 <- matrix(nrow=2,ncol=3)

# Matrices can be created by binding vectors
a <- 1:3
b <- 4:6
(m <- cbind(a,b)) # column bind
(m <- rbind(a,b)) # row bind

# Accessing matrix elements
m2[1,1] #single element
m2[1,] #first row all columns
m2[,2] #all rows second column
m2[,-2]#all rows and columns except column 2

# 3- Array: a matrix with 2 or more dimensions.
a1 <- array(c( 1:24), c(3, 4, 2))
is.array(a1)
a1[1:2,,1] # slicing: 2 rows, all columns, first array

# 4- List: a vector that holds different data types
(x <- list(1.2,"a",TRUE))
is.list(x)
typeof(x)

# 5- Dataframe: stores vectors of multiple data types

df <- data.frame(student_id = c(1:3),
                student_name = c("Ahmed", "Mohamed",
                                 "Sara"),
                gender= c("male","male","female"))
#Show a dataframe
View(df)
typeof(df$student_id)
#Dimensions of dataframe
dim(df)

#Column names
colnames(df)

#Add an empty column
df <- cbind(df, GPA = NA)
#Fill an empty column
df['GPA'] = c(2.5,3.0,3.5)

#Alternative method
df$GPA <- c(2.5,3.0,3.5)

#Extract a column using the $ sign
df$student_id

#Extract data
df[1,] #single row

# remove rows 
x <- subset(df, gpa <3.5)

# remove column , - means remove
x <- subset(df, select= -student_id)

#6- Factor: A factor is a vector that can contain only predefined values, and is used to store categorical data. 
df$gender <- factor(df$gender,levels = c("male", "female"))
levels(df$gender) #shows all classes
str(df$gender) # shows numerical value of each class

#Count the number of values in each class
table(df$gender)

#You cannot insert new data without selecting one of the classes
df[nrow(df)+1,] <- c(4,"x","other",2.5)
df[nrow(df)+1,] <- c(4,"Salma","female",2.5)

#Slicing a dataset
data(mtcars)
mtcars[1:5, 2] #first 5 samples , second column
mtcars[1:3, c(1,3)] #first 3 samples, first and third columns
mtcars[1, ] #first row, all columns
mtcars.sample <- mtcars[mtcars$vs == 0, ] #Slice with a logical condition

#Factor the am column
mtcars$am<-factor(mtcars$am, levels = c(0,1),labels = c('automatic','manual'))
str(mtcars$am)
#Select all automatic cars in the dataset
df<-mtcars[mtcars$am =='automatic', ]