library(ggplot2) # for general plotting
library(car)     # for ANOVA (Type II used, better than Type I when there is an unbalanced design)

Information of data source

RT-qPCR performed on samples used for % inhibition assay, to validate gene expression.

Read in the data and view structure to identify any issues in data formatting

Ct.inhibition.data <- read.csv("~/Lab Stuff/Adapted mites/Tomato/% inhibition assays/qPCR validation of PI induction/Ct R data.csv", header = TRUE)

# Trial as factor
Ct.inhibition.data$Trial <- factor(Ct.inhibition.data$Trial)

str(Ct.inhibition.data)
## 'data.frame':    12 obs. of  4 variables:
##  $ GOI      : Factor w/ 2 levels "Cystein PI","PI I": 2 2 2 2 2 2 1 1 1 1 ...
##  $ Trial    : Factor w/ 2 levels "1","2": 1 2 1 2 1 2 1 2 1 2 ...
##  $ Treatment: Factor w/ 3 levels "No mite","TA",..: 1 1 3 3 2 2 1 1 3 3 ...
##  $ Cq       : num  -7.359 -8.47 1.134 0.813 -1.719 ...
# Subset GOIs
Ct.inhibition.data.PI <-subset(Ct.inhibition.data,GOI =="PI I")
Ct.inhibition.data.CPI <-subset(Ct.inhibition.data,GOI =="Cystein PI")

Formulate hypothesis

H0: There will be no difference in gene expression levels between treatments.

HA: Gene expression will differ by treatment.

Conduct data exploration

Outliers in the response variable (Cq) within explanatory variables (Trial, Treatment).

ggplot(Ct.inhibition.data.PI, aes(x = Trial, y = Cq)) + geom_boxplot()+ theme_classic()

ggplot(Ct.inhibition.data.PI, aes(x = Treatment, y = Cq)) + geom_boxplot()+ theme_classic()

ggplot(Ct.inhibition.data.CPI, aes(x = Trial, y = Cq)) + geom_boxplot()+ theme_classic()

ggplot(Ct.inhibition.data.CPI, aes(x = Treatment, y = Cq)) + geom_boxplot()+ theme_classic()

No outliers.

Collinearity of the explanatory variables

Des not apply, both categorical/factorial.

Spatial/temporal or other hierarchical aspects of sampling design

No, I am treating Trial as a main effect to check for reproducibility (not a random effect/blocking factor).

Interactions (is the quality of the data good enough to include them?)

No, sample size is not high enough (not enough degrees of freedom to perform all contrasts involved when including the interaction).

Zero inflation in Y

No

Are categorical covariates balanced?

summary(Ct.inhibition.data.PI)
##          GOI    Trial   Treatment       Cq         
##  Cystein PI:0   1:3   No mite:2   Min.   :-8.4696  
##  PI I      :6   2:3   TA     :2   1st Qu.:-6.0170  
##                       TU     :2   Median :-1.8547  
##                                   Mean   :-2.9320  
##                                   3rd Qu.: 0.1798  
##                                   Max.   : 1.1336
summary(Ct.inhibition.data.CPI)
##          GOI    Trial   Treatment       Cq         
##  Cystein PI:6   1:3   No mite:2   Min.   :-10.779  
##  PI I      :0   2:3   TA     :2   1st Qu.: -9.555  
##                       TU     :2   Median : -5.860  
##                                   Mean   : -5.981  
##                                   3rd Qu.: -2.449  
##                                   Max.   : -1.289

Yes

Apply model

Proteinase inhibitor I - Solyc09g084480

# fit linear model and display model fit information and ANOVA table
m.PI <- lm(Cq ~ Treatment + Trial , data = Ct.inhibition.data.PI)
summary(m.PI)
## 
## Call:
## lm(formula = Cq ~ Treatment + Trial, data = Ct.inhibition.data.PI)
## 
## Residuals:
##       1       2       3       4       5       6 
##  0.2714 -0.2714 -0.1234  0.1234 -0.1480  0.1480 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  -7.6305     0.2718 -28.079  0.00127 **
## TreatmentTA   6.0597     0.3328  18.207  0.00300 **
## TreatmentTU   8.8875     0.3328  26.703  0.00140 **
## Trial2       -0.5677     0.2718  -2.089  0.17191   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3328 on 2 degrees of freedom
## Multiple R-squared:  0.9973, Adjusted R-squared:  0.9933 
## F-statistic: 249.6 on 3 and 2 DF,  p-value: 0.003993
Anova(m.PI) 
## Anova Table (Type II tests)
## 
## Response: Cq
##           Sum Sq Df F value   Pr(>F)   
## Treatment 82.469  2 372.241 0.002679 **
## Trial      0.483  1   4.364 0.171911   
## Residuals  0.222  2                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Calculate effect size and display
result.anova<-Anova(m.PI)
ss<-result.anova$"Sum Sq"    ##ss = sum of squares
pes<-ss/(ss+ss[length(ss)])  ##pes = partial e squared
pes[length(pes)]<-""
result.anova$"Part E Sq"<-pes
result.anova
## Anova Table (Type II tests)
## 
## Response: Cq
##           Sum Sq Df F value   Pr(>F) Part E Sq
## Treatment 82.469  2 372.241 0.002679   0.99732
## Trial      0.483  1   4.364 0.171911   0.68573
## Residuals  0.222  2
# perform post-hoc Tukey-Kramer test of contrasts
TukeyHSD(aov(Cq ~ Treatment + Trial, data = Ct.inhibition.data.PI))
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Cq ~ Treatment + Trial, data = Ct.inhibition.data.PI)
## 
## $Treatment
##                diff       lwr       upr     p adj
## TA-No mite 6.059683 4.0990825  8.020284 0.0053538
## TU-No mite 8.887487 6.9268861 10.848088 0.0017090
## TU-TA      2.827804 0.8672028  4.788404 0.0246802
## 
## $Trial
##           diff       lwr       upr     p adj
## 2-1 -0.5676936 -1.735943 0.6005554 0.1718727
# interaction plot
interaction.plot(Ct.inhibition.data.PI$Treatment, Ct.inhibition.data.PI$Trial, Ct.inhibition.data.PI$Cq, type="l", leg.bty="o", leg.bg="grey95", lwd=2, ylab="Cq", xlab="Treatment", main="Treatment:Trial")

Cystein Proteinase inhibitor - Solyc00g071180

# fit linear model and display model fit information and ANOVA table
m.CPI <- lm(Cq ~ Treatment + Trial, data = Ct.inhibition.data.CPI)
summary(m.CPI)
## 
## Call:
## lm(formula = Cq ~ Treatment + Trial, data = Ct.inhibition.data.CPI)
## 
## Residuals:
##        7        8        9       10       11       12 
##  0.04864 -0.04864  0.03315 -0.03315 -0.08180  0.08180 
## 
## Coefficients:
##              Estimate Std. Error  t value Pr(>|t|)    
## (Intercept) -10.82743    0.08229 -131.582 5.78e-05 ***
## TreatmentTA   4.89818    0.10078   48.603 0.000423 ***
## TreatmentTU   9.43175    0.10078   93.588 0.000114 ***
## Trial2        0.13940    0.08229    1.694 0.232337    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1008 on 2 degrees of freedom
## Multiple R-squared:  0.9998, Adjusted R-squared:  0.9994 
## F-statistic:  2922 on 3 and 2 DF,  p-value: 0.0003421
Anova(m.CPI)
## Anova Table (Type II tests)
## 
## Response: Cq
##           Sum Sq Df   F value    Pr(>F)    
## Treatment 89.002  2 4381.5146 0.0002282 ***
## Trial      0.029  1    2.8698 0.2323367    
## Residuals  0.020  2                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Calculate effect size and display
result.anova<-Anova(m.CPI)
ss<-result.anova$"Sum Sq"    ##ss = sum of squares
pes<-ss/(ss+ss[length(ss)])  ##pes = partial e squared
pes[length(pes)]<-""
result.anova$"Part E Sq"<-pes
result.anova
## Anova Table (Type II tests)
## 
## Response: Cq
##           Sum Sq Df   F value   Pr(>F) Part E Sq
## Treatment 89.002  2 4381.5146 0.000228   0.99977
## Trial      0.029  1    2.8698 0.232337   0.58931
## Residuals  0.020  2
# perform post-hoc Tukey-Kramer test of contrasts
TukeyHSD(aov(Cq ~ Treatment + Trial, data = Ct.inhibition.data.CPI))
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Cq ~ Treatment + Trial, data = Ct.inhibition.data.CPI)
## 
## $Treatment
##                diff      lwr       upr    p adj
## TA-No mite 4.898175 4.304507  5.491844 1.58e-05
## TU-No mite 9.431750 8.838081 10.025418 0.00e+00
## TU-TA      4.533574 3.939906  5.127243 4.10e-05
## 
## $Trial
##          diff        lwr       upr     p adj
## 2-1 0.1393974 -0.2143476 0.4931424 0.2323056
# interaction plot
interaction.plot(Ct.inhibition.data.CPI$Treatment, Ct.inhibition.data.CPI$Trial, Ct.inhibition.data.CPI$Cq, type="l", leg.bty="o", leg.bg="grey95", lwd=2, ylab="Cq", xlab="Treatment", main="Treatment:Trial")

Validate models

# PI
Ct.inhibition.data.PI$m.PI.fit <- fitted(m.PI)    # fitted values
Ct.inhibition.data.PI$m.PI.res <- rstandard(m.PI) # Pearson residuals

# CPI
Ct.inhibition.data.CPI$m.CPI.fit <- fitted(m.CPI)    # fitted values
Ct.inhibition.data.CPI$m.CPI.res <- rstandard(m.CPI) # Pearson residuals

Residual distribution / Overdispersion

# PI
ggplot(Ct.inhibition.data.PI, aes(sample = m.PI.res)) + geom_qq() +
  geom_abline(intercept = 0, slope = 1) + theme_classic()

# CPI
ggplot(Ct.inhibition.data.CPI, aes(sample = m.CPI.res)) + geom_qq() +
  geom_abline(intercept = 0, slope = 1) + theme_classic()

Nice.

Residuals vs fitted values

Testing for:

Linearity - there should be no curvilinear pattern in the residuals.

Equal variance - the vertical spread of the residuals should be constant across all fitted values.

# PI
ggplot(Ct.inhibition.data.PI, aes(x = m.PI.fit, y = m.PI.res)) + 
  geom_point() +   geom_hline(yintercept = 0) +  geom_smooth() + theme_classic() 

# CPI
ggplot(Ct.inhibition.data.CPI, aes(x = m.CPI.fit, y = m.CPI.res)) + 
  geom_point() +   geom_hline(yintercept = 0) +  geom_smooth() + theme_classic()

Not good, sample size too low.

Residuals vs explanatory variables

Should be centered around 0, if not then model requires another explanatory variable(s), to account for observed variation.

# PI
ggplot(Ct.inhibition.data.PI, aes(x = Treatment, y = m.PI.res)) + 
  geom_boxplot() + theme_classic() + geom_hline(yintercept = 0) 

ggplot(Ct.inhibition.data.PI, aes(x = Trial, y = m.PI.res)) + 
  geom_boxplot() + theme_classic() + geom_hline(yintercept = 0) 

# CPI
ggplot(Ct.inhibition.data.CPI, aes(x = Treatment, y = m.CPI.res)) + 
  geom_boxplot() + theme_classic() + geom_hline(yintercept = 0) 

ggplot(Ct.inhibition.data.CPI, aes(x = Trial, y = m.CPI.res)) + 
  geom_boxplot() + theme_classic() + geom_hline(yintercept = 0)