library(ggplot2) # for general plotting
library(car)     # for ANOVA (Type II used, better than Type I when there is an unbalanced design)

Information of data source

RT-qPCR performed on samples used for % inhibition assay, to validate gene expression.

Read in the data and view structure to identify any issues in data formatting

Ct.inhibition.data <- read.csv("~/Lab Stuff/Adapted mites/Tomato/% inhibition assays/all on Heinz/qPCR validation/Tu-A R data.csv", header = TRUE)

# Trial as factor
Ct.inhibition.data$Trial <- factor(Ct.inhibition.data$Trial)

str(Ct.inhibition.data)
## 'data.frame':    18 obs. of  4 variables:
##  $ Trial    : Factor w/ 3 levels "1","2","3": 1 1 1 2 2 2 3 3 3 1 ...
##  $ GOI      : Factor w/ 2 levels "Cys PI","PI 1": 2 2 2 2 2 2 2 2 2 1 ...
##  $ Treatment: Factor w/ 3 levels "No mite","TA",..: 1 3 2 1 3 2 1 3 2 1 ...
##  $ Cq       : num  -7.831 2.162 0.972 -8.006 1.709 ...
# Subset GOIs
Ct.inhibition.data.PI <-subset(Ct.inhibition.data,GOI =="PI 1")
Ct.inhibition.data.CPI <-subset(Ct.inhibition.data,GOI =="Cys PI")

Formulate hypothesis

H0: There will be no difference in gene expression levels between treatments.

HA: Gene expression will differ by treatment.

Conduct data exploration

Outliers in the response variable (Cq) within explanatory variables (Trial, Treatment).

ggplot(Ct.inhibition.data.PI, aes(x = Trial, y = Cq)) + geom_boxplot()+ theme_classic()

ggplot(Ct.inhibition.data.PI, aes(x = Treatment, y = Cq)) + geom_boxplot()+ theme_classic()

ggplot(Ct.inhibition.data.CPI, aes(x = Trial, y = Cq)) + geom_boxplot()+ theme_classic()

ggplot(Ct.inhibition.data.CPI, aes(x = Treatment, y = Cq)) + geom_boxplot()+ theme_classic()

No outliers.

Collinearity of the explanatory variables

Des not apply, both categorical/factorial.

Spatial/temporal or other hierarchical aspects of sampling design

No, I am treating Trial as a main effect to check for reproducibility (not a random effect/blocking factor).

Interactions (is the quality of the data good enough to include them?)

No, sample size is not high enough (not enough degrees of freedom to perform all contrasts involved when including the interaction).

Zero inflation in Y

No

Are categorical covariates balanced?

summary(Ct.inhibition.data.PI)
##  Trial     GOI      Treatment       Cq        
##  1:3   Cys PI:0   No mite:3   Min.   :-9.541  
##  2:3   PI 1  :9   TA     :3   1st Qu.:-7.831  
##  3:3              TU     :3   Median :-1.224  
##                               Mean   :-2.648  
##                               3rd Qu.: 0.972  
##                               Max.   : 2.162
summary(Ct.inhibition.data.CPI)
##  Trial     GOI      Treatment       Cq         
##  1:3   Cys PI:9   No mite:3   Min.   :-14.288  
##  2:3   PI 1  :0   TA     :3   1st Qu.:-10.883  
##  3:3              TU     :3   Median : -3.139  
##                               Mean   : -5.520  
##                               3rd Qu.: -2.051  
##                               Max.   :  1.278

Yes

Apply model

Proteinase inhibitor I - Solyc09g084480

# fit linear model and display model fit information and ANOVA table
m.PI <- lm(Cq ~ Treatment + Trial, data = Ct.inhibition.data.PI)
summary(m.PI)
## 
## Call:
## lm(formula = Cq ~ Treatment + Trial, data = Ct.inhibition.data.PI)
## 
## Residuals:
##        1        2        3        4        5        6        7        8 
## -0.45433 -0.50955  0.96387  0.31215 -0.02113 -0.29101  0.14218  0.53068 
##        9 
## -0.67286 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -7.3767     0.5691  -12.96 0.000204 ***
## TreatmentTA   7.3848     0.6235   11.85 0.000291 ***
## TreatmentTU  10.0484     0.6235   16.12 8.67e-05 ***
## Trial2       -0.9411     0.6235   -1.51 0.205668    
## Trial3       -2.3069     0.6235   -3.70 0.020832 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7636 on 4 degrees of freedom
## Multiple R-squared:  0.9865, Adjusted R-squared:  0.973 
## F-statistic: 73.18 on 4 and 4 DF,  p-value: 0.0005403
Anova(m.PI)
## Anova Table (Type II tests)
## 
## Response: Cq
##            Sum Sq Df F value  Pr(>F)    
## Treatment 162.600  2 139.437 0.00020 ***
## Trial       8.073  2   6.923 0.05024 .  
## Residuals   2.332  4                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Calculate effect size and display
result.anova<-Anova(m.PI)
ss<-result.anova$"Sum Sq"  ##ss = sum of squares
pes<-ss/(ss+ss[length(ss)])  ##pes = partial e squared
pes[length(pes)]<-""
result.anova$"Part E Sq"<-pes
result.anova
## Anova Table (Type II tests)
## 
## Response: Cq
##            Sum Sq Df F value   Pr(>F) Part E Sq
## Treatment 162.600  2 139.437 0.000200   0.98586
## Trial       8.073  2   6.923 0.050239   0.77586
## Residuals   2.332  4
# perform post-hoc Tukey-Kramer test of contrasts
TukeyHSD(aov(Cq ~ Treatment + Trial, data = Ct.inhibition.data.PI))
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Cq ~ Treatment + Trial, data = Ct.inhibition.data.PI)
## 
## $Treatment
##                 diff       lwr       upr     p adj
## TA-No mite  7.384842 5.1628222  9.606861 0.0006485
## TU-No mite 10.048384 7.8263648 12.270404 0.0001997
## TU-TA       2.663543 0.4415231  4.885562 0.0279058
## 
## $Trial
##           diff       lwr         upr     p adj
## 2-1 -0.9411452 -3.163165  1.28087428 0.3784814
## 3-1 -2.3069311 -4.528951 -0.08491157 0.0444694
## 3-2 -1.3657859 -3.587805  0.85623367 0.1866534
# interaction plot
interaction.plot(Ct.inhibition.data.PI$Treatment, Ct.inhibition.data.PI$Trial, Ct.inhibition.data.PI$Cq, type="l", leg.bty="o", leg.bg="grey95", lwd=2, ylab="Cq", xlab="Treatment", main="Treatment:Trial")

Cystein Proteinase inhibitor - Solyc00g071180

# fit linear model and display model fit information and ANOVA table
m.CPI <- lm(Cq ~ Treatment + Trial, data = Ct.inhibition.data.CPI)
summary(m.CPI)
## 
## Call:
## lm(formula = Cq ~ Treatment + Trial, data = Ct.inhibition.data.CPI)
## 
## Residuals:
##        10        11        12        13        14        15        16 
## -0.523745  0.512195  0.011550  0.531520 -0.457389 -0.074131 -0.007775 
##        17        18 
## -0.054806  0.062581 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -10.4315     0.3802 -27.434 1.05e-05 ***
## TreatmentTA   8.3685     0.4165  20.091 3.62e-05 ***
## TreatmentTU  11.1969     0.4165  26.881 1.14e-05 ***
## Trial2       -0.9826     0.4165  -2.359 0.077741 .  
## Trial3       -3.8491     0.4165  -9.241 0.000762 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5101 on 4 degrees of freedom
## Multiple R-squared:  0.9954, Adjusted R-squared:  0.9909 
## F-statistic: 218.4 on 4 and 4 DF,  p-value: 6.211e-05
Anova(m.CPI)
## Anova Table (Type II tests)
## 
## Response: Cq
##            Sum Sq Df F value    Pr(>F)    
## Treatment 203.402  2 390.789 2.593e-05 ***
## Trial      23.998  2  46.106  0.001728 ** 
## Residuals   1.041  4                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Calculate effect size and display
result.anova<-Anova(m.CPI)
ss<-result.anova$"Sum Sq"  ##ss = sum of squares
pes<-ss/(ss+ss[length(ss)])  ##pes = partial e squared
pes[length(pes)]<-""
result.anova$"Part E Sq"<-pes
result.anova
## Anova Table (Type II tests)
## 
## Response: Cq
##            Sum Sq Df F value     Pr(>F) Part E Sq
## Treatment 203.402  2 390.789 0.00002593   0.99491
## Trial      23.998  2  46.106 0.00172846   0.95843
## Residuals   1.041  4
# perform post-hoc Tukey-Kramer test of contrasts
TukeyHSD(aov(Cq ~ Treatment + Trial, data = Ct.inhibition.data.CPI))
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Cq ~ Treatment + Trial, data = Ct.inhibition.data.CPI)
## 
## $Treatment
##                 diff      lwr       upr     p adj
## TA-No mite  8.368548 6.884043  9.853054 0.0000958
## TU-No mite 11.196861 9.712355 12.681367 0.0000351
## TU-TA       2.828313 1.343807  4.312819 0.0054190
## 
## $Trial
##           diff       lwr        upr     p adj
## 2-1 -0.9826232 -2.467129  0.5018826 0.1569732
## 3-1 -3.8490981 -5.333604 -2.3645923 0.0016935
## 3-2 -2.8664749 -4.350981 -1.3819690 0.0051564
# interaction plot
interaction.plot(Ct.inhibition.data.CPI$Treatment, Ct.inhibition.data.CPI$Trial, Ct.inhibition.data.CPI$Cq, type="l", leg.bty="o", leg.bg="grey95", lwd=2, ylab="Cq", xlab="Treatment", main="Treatment:Trial")

Validate models

# PI
Ct.inhibition.data.PI$m.PI.fit <- fitted(m.PI) # fitted values
Ct.inhibition.data.PI$m.PI.res <- rstandard(m.PI) # Pearson residuals

# CPI
Ct.inhibition.data.CPI$m.CPI.fit <- fitted(m.CPI) # fitted values
Ct.inhibition.data.CPI$m.CPI.res <- rstandard(m.CPI) # Pearson residuals

Residual distribution / Overdispersion

We assumed normal residuals. This is the least important regression assumption but its can be tested with a qq plot.

# PI
ggplot(Ct.inhibition.data.PI, aes(sample = m.PI.res)) + geom_qq() +
  geom_abline(intercept = 0, slope = 1) + theme_classic()

# CPI
ggplot(Ct.inhibition.data.CPI, aes(sample = m.CPI.res)) + geom_qq() +
  geom_abline(intercept = 0, slope = 1) + theme_classic()

Nice.

Residuals vs fitted values

Testing for:

Linearity - there should be no curvilinear pattern in the residuals.

Equal variance - the vertical spread of the residuals should be constant across all fitted values.

# PI
ggplot(Ct.inhibition.data.PI, aes(x = m.PI.fit, y = m.PI.res)) + 
  geom_point() +   geom_hline(yintercept = 0) +  geom_smooth() + theme_classic() 

# CPI
ggplot(Ct.inhibition.data.CPI, aes(x = m.CPI.fit, y = m.CPI.res)) + 
  geom_point() +   geom_hline(yintercept = 0) +  geom_smooth() + theme_classic()

OK, sample size low.

Residuals vs explanatory variables

Should be centered around 0, if not then model requires another explanatory variable(s), to account for observed variation.

# PI
ggplot(Ct.inhibition.data.PI, aes(x = Treatment, y = m.PI.res)) + 
  geom_boxplot() + theme_classic() + geom_hline(yintercept = 0) 

ggplot(Ct.inhibition.data.PI, aes(x = Trial, y = m.PI.res)) + 
  geom_boxplot() + theme_classic() + geom_hline(yintercept = 0) 

# CPI
ggplot(Ct.inhibition.data.CPI, aes(x = Treatment, y = m.CPI.res)) + 
  geom_boxplot() + theme_classic() + geom_hline(yintercept = 0)