vignettes/model_fitting.Rmd
model_fitting.Rmd
Download the source (data + Rmd) here.
LDT = readRDS("lexical_decision_latencies.rds")
tibble::as_tibble(LDT)
#> # A tibble: 2,416 x 10
#> Character RT cld.Frequency cld.C1Frequency cld.C1Strokes
#> <chr> <dbl> <dbl> <dbl> <int>
#> 1 啊 584. 4215. 3242. 11
#> 2 哎 568. 164. 168. 9
#> 3 哀 579. 2.93 42.5 9
#> 4 唉 688. 141. 109. 10
#> 5 埃 720. 2.40 39.9 10
#> 6 挨 682. 20.4 24.7 10
#> 7 挨 682. 20.4 24.7 10
#> 8 癌 598. 1.47 28.5 17
#> 9 矮 601. 22.7 21.4 13
#> 10 艾 594. 7.49 67.9 5
#> # … with 2,406 more rows, and 5 more variables: pttFreq.2004_2009 <dbl>,
#> # pttFreq.2010_2014 <dbl>, pttFreq.2015_2019 <dbl>, pttFreq.all <dbl>,
#> # trad_strokes <dbl>
The code below tried to reproduce the results of fitting a linear regression model to the lexical decision latencies for single character words in Table 4 in Sun et al. (2018):
model_glm = glm(-1000/RT ~ log(cld.C1Frequency) + log(cld.Frequency) + sqrt(cld.C1Strokes), data = LDT)
summary(model_glm)
#>
#> Call:
#> glm(formula = -1000/RT ~ log(cld.C1Frequency) + log(cld.Frequency) +
#> sqrt(cld.C1Strokes), data = LDT)
#>
#> Deviance Residuals:
#> Min 1Q Median 3Q Max
#> -0.37894 -0.09127 -0.00326 0.08712 0.60895
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) -1.639230 0.020316 -80.688 < 2e-16 ***
#> log(cld.C1Frequency) -0.044878 0.002658 -16.887 < 2e-16 ***
#> log(cld.Frequency) -0.008296 0.001857 -4.467 8.32e-06 ***
#> sqrt(cld.C1Strokes) 0.039358 0.005441 7.233 6.31e-13 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> (Dispersion parameter for gaussian family taken to be 0.01862335)
#>
#> Null deviance: 74.439 on 2415 degrees of freedom
#> Residual deviance: 44.920 on 2412 degrees of freedom
#> AIC: -2761.4
#>
#> Number of Fisher Scoring iterations: 2
model_glm = glm(-1000/RT ~ log(pttFreq.all) + log(cld.C1Frequency) + log(cld.Frequency) + sqrt(cld.C1Strokes), data = LDT[LDT$pttFreq.all > 0,])
summary(model_glm)
#>
#> Call:
#> glm(formula = -1000/RT ~ log(pttFreq.all) + log(cld.C1Frequency) +
#> log(cld.Frequency) + sqrt(cld.C1Strokes), data = LDT[LDT$pttFreq.all >
#> 0, ])
#>
#> Deviance Residuals:
#> Min 1Q Median 3Q Max
#> -0.38083 -0.09226 -0.00308 0.08752 0.60910
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) -1.666673 0.040546 -41.105 < 2e-16 ***
#> log(pttFreq.all) -0.001910 0.002424 -0.788 0.43091
#> log(cld.C1Frequency) -0.044224 0.002770 -15.963 < 2e-16 ***
#> log(cld.Frequency) -0.007080 0.002425 -2.919 0.00354 **
#> sqrt(cld.C1Strokes) 0.039050 0.005454 7.160 1.07e-12 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> (Dispersion parameter for gaussian family taken to be 0.01864681)
#>
#> Null deviance: 74.153 on 2412 degrees of freedom
#> Residual deviance: 44.902 on 2408 degrees of freedom
#> AIC: -2754
#>
#> Number of Fisher Scoring iterations: 2
model_glm = glm(-1000/RT ~ log(pttFreq.2004_2009) + log(pttFreq.2010_2014) + log(pttFreq.2015_2019) + log(cld.C1Frequency) + log(cld.Frequency) + sqrt(cld.C1Strokes),
data = dplyr::filter(LDT,
pttFreq.2004_2009 > 0,
pttFreq.2010_2014 > 0,
pttFreq.2015_2019 > 0))
summary(model_glm)
#>
#> Call:
#> glm(formula = -1000/RT ~ log(pttFreq.2004_2009) + log(pttFreq.2010_2014) +
#> log(pttFreq.2015_2019) + log(cld.C1Frequency) + log(cld.Frequency) +
#> sqrt(cld.C1Strokes), data = dplyr::filter(LDT, pttFreq.2004_2009 >
#> 0, pttFreq.2010_2014 > 0, pttFreq.2015_2019 > 0))
#>
#> Deviance Residuals:
#> Min 1Q Median 3Q Max
#> -0.38292 -0.09329 -0.00390 0.08833 0.60306
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) -1.660333 0.043192 -38.441 < 2e-16 ***
#> log(pttFreq.2004_2009) -0.011294 0.004813 -2.347 0.01902 *
#> log(pttFreq.2010_2014) 0.019292 0.005927 3.255 0.00115 **
#> log(pttFreq.2015_2019) -0.009172 0.004408 -2.081 0.03755 *
#> log(cld.C1Frequency) -0.043271 0.002839 -15.242 < 2e-16 ***
#> log(cld.Frequency) -0.007949 0.002493 -3.189 0.00145 **
#> sqrt(cld.C1Strokes) 0.039175 0.005531 7.082 1.86e-12 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> (Dispersion parameter for gaussian family taken to be 0.01868542)
#>
#> Null deviance: 71.720 on 2380 degrees of freedom
#> Residual deviance: 44.359 on 2374 degrees of freedom
#> AIC: -2710.4
#>
#> Number of Fisher Scoring iterations: 2
model_glm = glm(-1000/RT ~ log(cld.C1Frequency) + log(cld.Frequency) + sqrt(trad_strokes), data = LDT)
summary(model_glm)
#>
#> Call:
#> glm(formula = -1000/RT ~ log(cld.C1Frequency) + log(cld.Frequency) +
#> sqrt(trad_strokes), data = LDT)
#>
#> Deviance Residuals:
#> Min 1Q Median 3Q Max
#> -0.39985 -0.09459 -0.00168 0.08732 0.61211
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) -1.556067 0.017811 -87.368 < 2e-16 ***
#> log(cld.C1Frequency) -0.047682 0.002645 -18.029 < 2e-16 ***
#> log(cld.Frequency) -0.008186 0.001875 -4.365 1.33e-05 ***
#> sqrt(trad_strokes) 0.014045 0.004366 3.217 0.00131 **
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> (Dispersion parameter for gaussian family taken to be 0.01894603)
#>
#> Null deviance: 74.439 on 2415 degrees of freedom
#> Residual deviance: 45.698 on 2412 degrees of freedom
#> AIC: -2719.9
#>
#> Number of Fisher Scoring iterations: 2
model_glm = glm(-1000/RT ~ log(cld.C1Frequency) + log(cld.Frequency) + sqrt(cld.C1Strokes) + sqrt(trad_strokes), data = LDT)
summary(model_glm)
#>
#> Call:
#> glm(formula = -1000/RT ~ log(cld.C1Frequency) + log(cld.Frequency) +
#> sqrt(cld.C1Strokes) + sqrt(trad_strokes), data = LDT)
#>
#> Deviance Residuals:
#> Min 1Q Median 3Q Max
#> -0.38177 -0.09144 -0.00292 0.08739 0.60775
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) -1.632200 0.021110 -77.320 < 2e-16 ***
#> log(cld.C1Frequency) -0.044730 0.002660 -16.816 < 2e-16 ***
#> log(cld.Frequency) -0.008410 0.001859 -4.523 6.40e-06 ***
#> sqrt(cld.C1Strokes) 0.044172 0.006713 6.580 5.74e-11 ***
#> sqrt(trad_strokes) -0.006538 0.005341 -1.224 0.221
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> (Dispersion parameter for gaussian family taken to be 0.0186195)
#>
#> Null deviance: 74.439 on 2415 degrees of freedom
#> Residual deviance: 44.892 on 2411 degrees of freedom
#> AIC: -2760.9
#>
#> Number of Fisher Scoring iterations: 2