Dataframe

Data Frame

# PART 1 ----

# Data Frames
# A Table or a 2-dimensional array-like structure in which each column contains 
# values of one variable and each row contains one set of values from each column
# Syntax: data.frame(data)

#============#
#  EXAMPLES  #
#============#

RowCount = c(1:5)
PeopleNames = c("Bryan","Jude","kelly","janelle","Rosa")
Values = c(15,25,65,145,74)

df <- data.frame(RowCount,PeopleNames,Values)
df
# This alone will display the valeus of the data frame each vector in the frame
# represents a vertical column of values that each contibue a value to each row

data.frame(airquality) 
# Built in sample data table, can also import Excel files


# PART 2 ----

myDataFrame <- read.csv("20190208 RC Registry.csv")
myDataFrame <- myDataFrame[myDataFrame$Medical..Screening.Due.Date <date, c("CDCR", "Medical..Screening.Due.Date")]
myDataFrame


# PART 3 ----

# setting up the data frame vectors
id <- 1:200
group <- c(rep("Vehicle",100),
	   rep("Drug",100))
response <- c(rnorm(100,mean = 25, sd = 5),
		  rnorm(100,mean = 23, sd=5))
age <- round(rnorm(200,40,20))

#compiling the data frame
myData <- data.frame(Patient = id,
				 Patient.Age = age,
				 Treatment = group,
				 Response = response)

myData
head(myData,10)
tail(myData,10)
dim(myData)
str(myData)
summary(myData)

# subsetting Data.frames ----

myData[1,2]
myData[2,3]
myData[1:20,2:3] # first 20 rows with columns 2 & 3 present
myData[1:20,] # returns 20 rows and all columns if left blank
myData[,1] # returns everythingh in the first column only
myData[,"Response"] # returns just the columns values for the column named "Response"
myData$Response #the Dollar sign $ after the name of the data frame will return the entire column without quotes or brackets

myData[myData$Response>26,] # give me the rows and all columns for every row that meets the criteria
# of Response > 26

#perform a calculation and then add values to a new column of the data frame
myData$Positive <- myData$Response<26
write.csv(myData[myData$Response>26,],file = "testData.csv", row.names = F) # write a CSV file to the current working directory

# multiple filter criteria and then assigned
# to a new object for ease of exporting to CSV
CSVMyData <- myData[myData$Treatment == "Vehicle"
				& myData$Response>26 &
					myData$Patient.Age > 0,] 


write.csv(CSVMyData,file = "testData.csv", row.names = F)

head(CSVMyData)