# Week 3 Data Visualization Lab

# Install the package ggplot
#install.packages("ggplot2")
library(ggplot2)
#View(cars)

#A quick base R plot - this is not gglot
plot(cars)

# Our first ggplot, we need data + aes + geoms
ggplot(data=cars) +
  aes(x=speed, y=dist) +
  geom_point()

p <- ggplot(data=cars) + 
  aes(x=speed, y=dist) +
  geom_point()

# Add a line with geom with geom_line()
p + geom_line()

# Add a trend line close to the data
p + geom_smooth()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

p + geom_smooth(method="lm") 
## `geom_smooth()` using formula = 'y ~ x'

#---------------------------------------------------

# Read in our drug expression data
url <- "https://bioboot.github.io/bimm143_S20/class-material/up_down_expression.txt"
genes <- read.delim(url)
head(genes)
##         Gene Condition1 Condition2      State
## 1      A4GNT -3.6808610 -3.4401355 unchanging
## 2       AAAS  4.5479580  4.3864126 unchanging
## 3      AASDH  3.7190695  3.4787276 unchanging
## 4       AATF  5.0784720  5.0151916 unchanging
## 5       AATK  0.4711421  0.5598642 unchanging
## 6 AB015752.4 -3.6808610 -3.5921390 unchanging
# Q. How many genes are in this dataset?
nrow(genes)
## [1] 5196
# Q. How many 'up' regulated genes?
table( genes$State )
## 
##       down unchanging         up 
##         72       4997        127
# Q. What fraction of total genes is up-regulated?
round((table(genes$State) / nrow(genes)) * 100, 2)
## 
##       down unchanging         up 
##       1.39      96.17       2.44
# Let's make a first plot attempt
g <- ggplot(data=genes) + aes(x=Condition1, y=Condition2, col=State) + geom_point()

g

# Add some color
g + scale_color_manual(values=c("pink", "lightyellow", "lightblue")) +
  labs(title="Gene expression changes", x= "Control (no drug)", y= "Drug Treatment") +
  theme_bw()