library(ggplot2) # for general plotting
library(car)     # for ANOVA (Type II used, better than Type I when there is an unbalanced design)

Information of data source

Cathepsin L activity of TU and TU-A mites on Moneymaker tomato leaflets, 2 and 4 dpi following treatment with water (control) or E-64 (cystein protease inhibitor).

Read in the data and view structure to identify any issues in data formatting

E64.activity.data <- read.csv("~/Lab Stuff/Adapted mites/Tomato/E-64 assay/Enzyme activity R data.csv", header = TRUE) 
str(E64.activity.data)
## 'data.frame':    80 obs. of  5 variables:
##  $ Trial      : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Mite.Strain: Factor w/ 2 levels "TU","TU-A": 1 1 1 1 1 2 2 2 2 2 ...
##  $ Treatment  : Factor w/ 2 levels "E-64","Water": 2 2 2 2 2 2 2 2 2 2 ...
##  $ dpi        : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ Activity   : num  14.7 13.1 14.9 15.1 15.2 ...
# Trial and Day as factors
E64.activity.data$Trial <- factor(E64.activity.data$Trial)
E64.activity.data$dpi <- factor(E64.activity.data$dpi)

str(E64.activity.data)
## 'data.frame':    80 obs. of  5 variables:
##  $ Trial      : Factor w/ 2 levels "1","2": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Mite.Strain: Factor w/ 2 levels "TU","TU-A": 1 1 1 1 1 2 2 2 2 2 ...
##  $ Treatment  : Factor w/ 2 levels "E-64","Water": 2 2 2 2 2 2 2 2 2 2 ...
##  $ dpi        : Factor w/ 2 levels "2","4": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Activity   : num  14.7 13.1 14.9 15.1 15.2 ...
# subset by mite strain for separate analyses
E64.activity.data.TU <- subset(E64.activity.data,  Mite.Strain =="TU") 
E64.activity.data.TA <- subset(E64.activity.data, Mite.Strain =="TU-A")

Formulate hypothesis

H0: There will be no difference between water and E-64 treated mites with respect to cathepsin L activity.

HA: E-64 treatment will decrease cathepsin L activity.

Conduct data exploration

Outliers in the response variable (Activity) within explanatory variables (Trial, dpi, Treatment).

# TU
ggplot(E64.activity.data.TU, aes(x = Trial, y = Activity)) + geom_boxplot() + theme_classic()

ggplot(E64.activity.data.TU, aes(x = dpi, y = Activity)) + geom_boxplot() + theme_classic()

ggplot(E64.activity.data.TU, aes(x = Treatment, y = Activity)) + geom_boxplot() + theme_classic()

# TU-A
ggplot(E64.activity.data.TA, aes(x = Trial, y = Activity)) + geom_boxplot() + theme_classic()

ggplot(E64.activity.data.TA, aes(x = dpi, y = Activity)) + geom_boxplot() + theme_classic()

ggplot(E64.activity.data.TA, aes(x = Treatment, y = Activity)) + geom_boxplot() + theme_classic()

Outliers not removed, probably represents real variability.

Spatial/temporal or other hierarchical aspects of sampling design

No, I am treating Trial as a main effect to check for reproducibility (not a random effect/blocking factor).

Interactions (is the quality of the data good enough to include them?)

Interaction between Trial and Treatment will be performed to test for reproducibility.

Interaction between Trial and dpi will be performed to test for reproducibility.

An interaction between dpi and Treatment will also be included to test for possible feedback loop and for planned comparisons.

Zero inflation in Y

No

Are categorical covariates balanced?

summary(E64.activity.data.TU)
##  Trial  Mite.Strain Treatment  dpi       Activity     
##  1:20   TU  :40     E-64 :20   2:20   Min.   : 4.414  
##  2:20   TU-A: 0     Water:20   4:20   1st Qu.: 9.242  
##                                       Median :11.055  
##                                       Mean   :11.179  
##                                       3rd Qu.:14.231  
##                                       Max.   :15.617
summary(E64.activity.data.TA)
##  Trial  Mite.Strain Treatment  dpi       Activity     
##  1:20   TU  : 0     E-64 :20   2:20   Min.   : 6.684  
##  2:20   TU-A:40     Water:20   4:20   1st Qu.: 7.805  
##                                       Median :10.034  
##                                       Mean   : 9.816  
##                                       3rd Qu.:11.536  
##                                       Max.   :13.848

Yes

Apply model

TU

# fit linear model and display model fit information and ANOVA table
# full model including 3 way interaction term - to verify it is not significant, if it is, interpretation of hypothesis testing will be problematic
m.TU.0 <- lm(Activity ~ dpi +Treatment + Trial + dpi:Treatment + Treatment:Trial + dpi:Trial + dpi:Trial:Treatment, data = E64.activity.data.TU)
summary(m.TU.0)
## 
## Call:
## lm(formula = Activity ~ dpi + Treatment + Trial + dpi:Treatment + 
##     Treatment:Trial + dpi:Trial + dpi:Trial:Treatment, data = E64.activity.data.TU)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.4504 -0.4078  0.0188  0.5181  2.3174 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 13.9014     0.5041  27.576  < 2e-16 ***
## dpi4                        -2.0178     0.7129  -2.830  0.00797 ** 
## TreatmentWater               0.6678     0.7129   0.937  0.35593    
## Trial2                      -5.3192     0.7129  -7.461 1.72e-08 ***
## dpi4:TreatmentWater          2.0350     1.0082   2.018  0.05200 .  
## TreatmentWater:Trial2        1.6548     1.0082   1.641  0.11053    
## dpi4:Trial2                 -0.8374     1.0082  -0.831  0.41238    
## dpi4:TreatmentWater:Trial2  -0.8054     1.4259  -0.565  0.57611    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.127 on 32 degrees of freedom
## Multiple R-squared:  0.8974, Adjusted R-squared:  0.875 
## F-statistic:    40 on 7 and 32 DF,  p-value: 4.633e-14
Anova(m.TU.0)
## Anova Table (Type II tests)
## 
## Response: Activity
##                      Sum Sq Df  F value    Pr(>F)    
## dpi                  26.255  1  20.6626 7.404e-05 ***
## Treatment            53.423  1  42.0434 2.693e-07 ***
## Trial               261.310  1 205.6473 1.749e-15 ***
## dpi:Treatment         6.661  1   5.2421   0.02879 *  
## Treatment:Trial       3.919  1   3.0845   0.08861 .  
## dpi:Trial             3.845  1   3.0257   0.09157 .  
## dpi:Treatment:Trial   0.405  1   0.3191   0.57611    
## Residuals            40.661 32                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# linear model without non-significant 3-way interaction (want to reduce comparisons made in Tukey-Kramer post-hoc test)
m.TU <- lm(Activity ~ dpi +Treatment + Trial + dpi:Treatment + Treatment:Trial + dpi:Trial, data = E64.activity.data.TU)
summary(m.TU)
## 
## Call:
## lm(formula = Activity ~ dpi + Treatment + Trial + dpi:Treatment + 
##     Treatment:Trial + dpi:Trial, data = E64.activity.data.TU)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.49527 -0.37122 -0.03987  0.47179  2.21673 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            13.8007     0.4667  29.573  < 2e-16 ***
## dpi4                   -1.8165     0.6110  -2.973  0.00548 ** 
## TreatmentWater          0.8692     0.6110   1.422  0.16427    
## Trial2                 -5.1178     0.6110  -8.376 1.12e-09 ***
## dpi4:TreatmentWater     1.6323     0.7055   2.314  0.02706 *  
## TreatmentWater:Trial2   1.2521     0.7055   1.775  0.08518 .  
## dpi4:Trial2            -1.2401     0.7055  -1.758  0.08808 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.116 on 33 degrees of freedom
## Multiple R-squared:  0.8964, Adjusted R-squared:  0.8776 
## F-statistic:  47.6 on 6 and 33 DF,  p-value: 7.445e-15
Anova(m.TU)
## Anova Table (Type II tests)
## 
## Response: Activity
##                  Sum Sq Df  F value    Pr(>F)    
## dpi              26.255  1  21.0979 6.084e-05 ***
## Treatment        53.423  1  42.9293 1.918e-07 ***
## Trial           261.310  1 209.9801 7.290e-16 ***
## dpi:Treatment     6.661  1   5.3526   0.02706 *  
## Treatment:Trial   3.919  1   3.1495   0.08518 .  
## dpi:Trial         3.845  1   3.0894   0.08808 .  
## Residuals        41.067 33                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Calculate effect size and display
result.anova<-Anova(m.TU)
ss<-result.anova$"Sum Sq"  ##ss = sum of squares
pes<-ss/(ss+ss[length(ss)])  ##pes = partial e squared
pes[length(pes)]<-""
result.anova$"Part E Sq"<-pes
result.anova
## Anova Table (Type II tests)
## 
## Response: Activity
##                  Sum Sq Df  F value   Pr(>F) Part E Sq
## dpi              26.255  1  21.0979 0.000061   0.39000
## Treatment        53.423  1  42.9293 0.000000   0.56538
## Trial           261.310  1 209.9801 0.000000   0.86419
## dpi:Treatment     6.661  1   5.3526 0.027064   0.13956
## Treatment:Trial   3.919  1   3.1495 0.085177   0.08712
## dpi:Trial         3.845  1   3.0894 0.088078   0.08560
## Residuals        41.067 33
# perform post-hoc Tukey-Kramer test of contrasts
TukeyHSD(aov(Activity ~ dpi +Treatment + Trial + dpi:Treatment + Treatment:Trial + dpi:Trial, data = E64.activity.data.TU))
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Activity ~ dpi + Treatment + Trial + dpi:Treatment + Treatment:Trial + dpi:Trial, data = E64.activity.data.TU)
## 
## $dpi
##         diff       lwr        upr    p adj
## 4-2 -1.62035 -2.338062 -0.9026384 6.08e-05
## 
## $Treatment
##               diff      lwr      upr p adj
## Water-E-64 2.31135 1.593638 3.029062 2e-07
## 
## $Trial
##         diff       lwr       upr p adj
## 2-1 -5.11185 -5.829562 -4.394138     0
## 
## $`dpi:Treatment`
##                    diff        lwr        upr     p adj
## 4:E-64-2:E-64   -2.4365 -3.7859687 -1.0870313 0.0001467
## 2:Water-2:E-64   1.4952  0.1457313  2.8446687 0.0252176
## 4:Water-2:E-64   0.6910 -0.6584687  2.0404687 0.5173184
## 2:Water-4:E-64   3.9317  2.5822313  5.2811687 0.0000000
## 4:Water-4:E-64   3.1275  1.7780313  4.4769687 0.0000025
## 4:Water-2:Water -0.8042 -2.1536687  0.5452687 0.3860202
## 
## $`Treatment:Trial`
##                    diff        lwr       upr     p adj
## Water:1-E-64:1   1.6853  0.3358313  3.034769 0.0097050
## E-64:2-E-64:1   -5.7379 -7.0873687 -4.388431 0.0000000
## Water:2-E-64:1  -2.8005 -4.1499687 -1.451031 0.0000174
## E-64:2-Water:1  -7.4232 -8.7726687 -6.073731 0.0000000
## Water:2-Water:1 -4.4858 -5.8352687 -3.136331 0.0000000
## Water:2-E-64:2   2.9374  1.5879313  4.286869 0.0000078
## 
## $`dpi:Trial`
##            diff       lwr        upr     p adj
## 4:1-2:1 -1.0003 -2.349769  0.3491687 0.2067180
## 2:2-2:1 -4.4918 -5.841269 -3.1423313 0.0000000
## 4:2-2:1 -6.7322 -8.081669 -5.3827313 0.0000000
## 2:2-4:1 -3.4915 -4.840969 -2.1420313 0.0000003
## 4:2-4:1 -5.7319 -7.081369 -4.3824313 0.0000000
## 4:2-2:2 -2.2404 -3.589869 -0.8909313 0.0004565
# plot interactions
interaction.plot(E64.activity.data.TU$Treatment, E64.activity.data.TU$Trial,E64.activity.data.TU$Activity, type="b", col=c(1:3), leg.bty="o", leg.bg="grey95", lwd=2, ylab="Activity", xlab="Treatment", main="Treatment:Trial")

interaction.plot(E64.activity.data.TU$dpi, E64.activity.data.TU$Trial, E64.activity.data.TU$Activity, type="b", col=c(1:3), leg.bty="o", leg.bg="grey95", lwd=2, ylab="Activity", xlab="dpi", main="dpi:Trial")

interaction.plot(E64.activity.data.TU$dpi, E64.activity.data.TU$Treatment, E64.activity.data.TU$Activity, type="b", col=c(1:3), leg.bty="o", leg.bg="grey95", lwd=2, ylab="Activity", xlab="dpi", main="dpi:Treatment")

TU-A

# fit linear model and display model fit information and ANOVA table
# full model including 3 way interaction term - to verify it is not significant, if it is, interpretation of hypothesis testing will be problematic
m.TA.0 <- lm(Activity ~ dpi +Treatment + Trial + dpi:Treatment + Treatment:Trial + dpi:Trial + dpi:Trial:Treatment, data = E64.activity.data.TA)
summary(m.TA.0)
## 
## Call:
## lm(formula = Activity ~ dpi + Treatment + Trial + dpi:Treatment + 
##     Treatment:Trial + dpi:Trial + dpi:Trial:Treatment, data = E64.activity.data.TA)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.5608 -0.3488 -0.0321  0.3989  1.6932 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  7.0924     0.2918  24.302  < 2e-16 ***
## dpi4                         0.2688     0.4127   0.651   0.5195    
## TreatmentWater               0.9810     0.4127   2.377   0.0236 *  
## Trial2                       4.6990     0.4127  11.385 8.65e-13 ***
## dpi4:TreatmentWater          0.6690     0.5837   1.146   0.2602    
## TreatmentWater:Trial2       -1.4942     0.5837  -2.560   0.0154 *  
## dpi4:Trial2                 -0.2940     0.5837  -0.504   0.6179    
## dpi4:TreatmentWater:Trial2   0.2328     0.8255   0.282   0.7797    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6526 on 32 degrees of freedom
## Multiple R-squared:  0.9225, Adjusted R-squared:  0.9055 
## F-statistic: 54.41 on 7 and 32 DF,  p-value: 5.589e-16
Anova(m.TA.0)
## Anova Table (Type II tests)
## 
## Response: Activity
##                      Sum Sq Df  F value    Pr(>F)    
## dpi                   2.647  1   6.2159  0.018024 *  
## Treatment             3.926  1   9.2197  0.004733 ** 
## Trial               149.235  1 350.4342 < 2.2e-16 ***
## dpi:Treatment         1.542  1   3.6212  0.066077 .  
## Treatment:Trial       4.746  1  11.1442  0.002149 ** 
## dpi:Trial             0.079  1   0.1852  0.669855    
## dpi:Treatment:Trial   0.034  1   0.0795  0.779739    
## Residuals            13.627 32                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# linear model without non-significant 3-way interaction (want to reduce comparisons made in Tukey-Kramer post-hoc test)
m.TA <- lm(Activity ~ dpi +Treatment + Trial + dpi:Treatment + Treatment:Trial + dpi:Trial, data = E64.activity.data.TA)
summary(m.TA)
## 
## Call:
## lm(formula = Activity ~ dpi + Treatment + Trial + dpi:Treatment + 
##     Treatment:Trial + dpi:Trial, data = E64.activity.data.TA)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.5317 -0.3196 -0.0321  0.3967  1.7223 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             7.1215     0.2692  26.458  < 2e-16 ***
## dpi4                    0.2106     0.3524   0.598  0.55419    
## TreatmentWater          0.9228     0.3524   2.619  0.01323 *  
## Trial2                  4.6408     0.3524  13.169 1.09e-14 ***
## dpi4:TreatmentWater     0.7854     0.4069   1.930  0.06223 .  
## TreatmentWater:Trial2  -1.3778     0.4069  -3.386  0.00185 ** 
## dpi4:Trial2            -0.1776     0.4069  -0.436  0.66536    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6434 on 33 degrees of freedom
## Multiple R-squared:  0.9223, Adjusted R-squared:  0.9082 
## F-statistic: 65.29 on 6 and 33 DF,  p-value: < 2.2e-16
Anova(m.TA)
## Anova Table (Type II tests)
## 
## Response: Activity
##                  Sum Sq Df  F value    Pr(>F)    
## dpi               2.647  1   6.3943  0.016412 *  
## Treatment         3.926  1   9.4842  0.004157 ** 
## Trial           149.235  1 360.4893 < 2.2e-16 ***
## dpi:Treatment     1.542  1   3.7251  0.062232 .  
## Treatment:Trial   4.746  1  11.4639  0.001847 ** 
## dpi:Trial         0.079  1   0.1905  0.665361    
## Residuals        13.661 33                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Calculate effect size and display
result.anova<-Anova(m.TA)
ss<-result.anova$"Sum Sq"  ##ss = sum of squares
pes<-ss/(ss+ss[length(ss)])  ##pes = partial e squared
pes[length(pes)]<-""
result.anova$"Part E Sq"<-pes
result.anova
## Anova Table (Type II tests)
## 
## Response: Activity
##                  Sum Sq Df  F value  Pr(>F) Part E Sq
## dpi               2.647  1   6.3943 0.01641   0.16231
## Treatment         3.926  1   9.4842 0.00416   0.22324
## Trial           149.235  1 360.4893 0.00000   0.91613
## dpi:Treatment     1.542  1   3.7251 0.06223   0.10143
## Treatment:Trial   4.746  1  11.4639 0.00185   0.25783
## dpi:Trial         0.079  1   0.1905 0.66536   0.00574
## Residuals        13.661 33
# perform post-hoc Tukey-Kramer test of contrasts
TukeyHSD(aov(Activity ~ dpi +Treatment + Trial + dpi:Treatment + Treatment:Trial + dpi:Trial, data = E64.activity.data.TA))
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Activity ~ dpi + Treatment + Trial + dpi:Treatment + Treatment:Trial + dpi:Trial, data = E64.activity.data.TA)
## 
## $dpi
##       diff       lwr       upr     p adj
## 4-2 0.5145 0.1005473 0.9284527 0.0164123
## 
## $Treatment
##              diff       lwr      upr    p adj
## Water-E-64 0.6266 0.2126473 1.040553 0.004157
## 
## $Trial
##       diff      lwr      upr p adj
## 2-1 3.8631 3.449147 4.277053     0
## 
## $`dpi:Treatment`
##                   diff        lwr       upr     p adj
## 4:E-64-2:E-64   0.1218 -0.6565297 0.9001297 0.9740939
## 2:Water-2:E-64  0.2339 -0.5444297 1.0122297 0.8479354
## 4:Water-2:E-64  1.1411  0.3627703 1.9194297 0.0020065
## 2:Water-4:E-64  0.1121 -0.6662297 0.8904297 0.9795801
## 4:Water-4:E-64  1.0193  0.2409703 1.7976297 0.0063151
## 4:Water-2:Water 0.9072  0.1288703 1.6855297 0.0172000
## 
## $`Treatment:Trial`
##                    diff        lwr       upr     p adj
## Water:1-E-64:1   1.3155  0.5371703 2.0938297 0.0003618
## E-64:2-E-64:1    4.5520  3.7736703 5.3303297 0.0000000
## Water:2-E-64:1   4.4897  3.7113703 5.2680297 0.0000000
## E-64:2-Water:1   3.2365  2.4581703 4.0148297 0.0000000
## Water:2-Water:1  3.1742  2.3958703 3.9525297 0.0000000
## Water:2-E-64:2  -0.0623 -0.8406297 0.7160297 0.9963430
## 
## $`dpi:Trial`
##           diff        lwr     upr     p adj
## 4:1-2:1 0.6033 -0.1750297 1.38163 0.1752639
## 2:2-2:1 3.9519  3.1735703 4.73023 0.0000000
## 4:2-2:1 4.3776  3.5992703 5.15593 0.0000000
## 2:2-4:1 3.3486  2.5702703 4.12693 0.0000000
## 4:2-4:1 3.7743  2.9959703 4.55263 0.0000000
## 4:2-2:2 0.4257 -0.3526297 1.20403 0.4609470
# plot interactions
interaction.plot(E64.activity.data.TA$Treatment, E64.activity.data.TA$Trial,E64.activity.data.TA$Activity, type="b", col=c(1:3), leg.bty="o", leg.bg="grey95", lwd=2, ylab="Activity", xlab="Treatment", main="Treatment:Trial")

interaction.plot(E64.activity.data.TA$dpi, E64.activity.data.TA$Trial, E64.activity.data.TA$Activity, type="b", col=c(1:3), leg.bty="o", leg.bg="grey95", lwd=2, ylab="Activity", xlab="dpi", main="dpi:Trial")

interaction.plot(E64.activity.data.TA$dpi, E64.activity.data.TA$Treatment, E64.activity.data.TA$Activity, type="b", col=c(1:3), leg.bty="o", leg.bg="grey95", lwd=2, ylab="Activity", xlab="dpi", main="dpi:Treatment")