3 秩检验
3.1 主要理论
线性秩统计量:定义、基本性质
符号秩统计量:定义、基本性质,Wilcoxon符号秩检验(对称中心检验、成对数据检验)
wilcox.test(paired = TRUE)
位置参数的检验:Wilcoxon秩和检验
wilcox.test(paired = FALSE)
,Mann-Witney检验(统计量与Wilcoxon秩和统计量等价)尺度参数的检验:Mood检验
mood.test
、平方秩检验多个独立样本问题:Kruskal-Waillis检验
kruskal.test
、Dunn检验DunnTest
(DescTools
包,当Kruskal-Waillis检验拒绝原假设)、Jonckheere-Terpstra检验JonckheereTerpstraTest
(DescTools
包)、区组设计:Friedman检验
friedman.test
、Page检验、Cochran检验、Duribn检验durbin.test
(PMCMRplus
包)相关分析:Spearman秩相关分析、Kendall \(\tau\) 相关检验
cor.test
、多变量Kendall协同系数检验KendallW
(DescTools
包)线性回归的非参数方法
cor.test
3.2 函数实现
将上述提到的平方秩检验,Page检验,Cochran检验进行编码实现。
3.2.1 平方秩检验
# 平方秩检验
# 输入:
# x,y:两总体样本向量
# alternative:备择假设方向
<- function(x, y, alternative = c("two.sided", "less", "greater"))
square_rank.test
{if(!is.vector(x)||!is.vector(y)){
stop("'x' and 'y' must be vector")
}<- length(x); n2 <- length(y); n <- n1+n2;
n1 <- mean(x); my <- mean(y);
mx <- abs(x-mx); V <- abs(y-my); # 绝对离差
U <- rank(c(U,V),ties.method = "average") # 混合离差秩
r <- sum(r[1:n1]^2) # 相应于U的秩平方和
T <- (T-n2*(n+1)*(2*n+1)/6)/sqrt(n1*n2*(n+1)*(2*n+1)*(8*n+11)/180)
z <- pnorm(z)
pl <- pnorm(z,lower.tail = FALSE)
pr <- (n1<=10|n2<=10) # 样本数过小的提醒
Twarning <- ""
s # 如果打结
if(length(unique(r))!=n){
<- mean(r^2)
mrs <- n1*n2*sum(r^4)/(n*(n-1))-n1*n2*mrs^2/(n-1)
s2 <- (T-n1*mrs)/sqrt(s2) # 调整T
T <- pnorm(T)
pl <- pnorm(T,lower.tail = FALSE)
pr <- FALSE
Twarning <- "(with ties)"
s
}# p值
<- switch(alternative, less = pl, greater = pr, two.sided = 2*min(pl,pr))
PVAL names(T) <- "square rank test statistic"
names(n1) <- "the number of x"
names(n2) <- "the number of y"
names(n) <- "the number of x and y"
<- list(statistic = T,
l parameter = c(n1,n2,n),
p.value = PVAL, alternative = alternative,
method = paste("Square rank test",s),
data.name = paste(deparse(substitute(x)),"and",deparse(substitute(y))))
if(Twarning){
warning("The sizes of x and y are less than 10. The test may be inaccurate.")
} structure(l, class = "htest")
}
测试一下:
<- c(74,75,76,79,82,65,86,58)
x <- c(72,73,69,74,68,75,67,76,66)
y square_rank.test(x,y,alternative = "two.sided")
#> Warning in square_rank.test(x, y, alternative = "two.sided"): The sizes of x and
#> y are less than 10. The test may be inaccurate.
#>
#> Square rank test
#>
#> data: x and y
#> square rank test statistic = 1108, the number of x = 8, the number of y
#> = 9, the number of x and y = 17, p-value = 0.4
#> alternative hypothesis: two.sided
3.2.2 Page检验
备择假设为单调趋势。
# page检验
# 输入:
# x:区组设计矩阵,行处理,列区组
# alternative:备择假设,up为单调上升,down为单调下降
<- function(x, alternative = c("increasing","decreasing"))
page.test
{if(!is.matrix(x)){
stop("'x' must be matrix")
}<- nrow(x) # 行数,处理
k <- ncol(x) # 列数,区组
b <- apply(x, 2, rank) # 按列求秩
r <- apply(r,1,sum) # 按行求和
R <- sum((1:k)*R) # Page检验统计量
L # 求标准差
<- unlist(apply(x,2,table))
t <- sum(t^3-t) # 打结修正量
g <- k*(k+1)*(b*k*(k^2-1)-g)/144
sigma2 <- (L-b*k*(k+1)^2/4)/sqrt(sigma2)
z # p值
<- switch(alternative, increasing = pnorm(z,lower.tail = FALSE),
PVAL decreasing = pnorm(z))
names(L) <- "Page test statistic"
<- list(statistic = L,
l parameter = list(blocks=b,treatments=k),
p.value = PVAL, alternative = alternative,
method = "Page test",
data.name = paste(deparse(substitute(x))))
structure(l, class = "htest")
}
3.2.3 Cochran检验
只取二元数据的完全区组设计的假设检验问题。
# Cochran检验
# 输入:x为bool矩阵,行为处理
<- function(x)
cochran.test
{if(!is.matrix(x)||!is.logical(x)){
stop("'x' must be a logical matrix")
}<- nrow(x) # 行数,处理
k <- ncol(x) # 列数,区组
b <- apply(x, 1, sum) # 行和
Ni <- apply(x, 2, sum) # 列和
Lj <- sum(x) # 总和
N <- (k*(k-1)^2*var(Ni))/(k*N-sum(Lj^2)) # 检验统计量
Q # p值
<- pchisq(Q,k-1,lower.tail = FALSE)
PVAL names(Q) <- "Cochran test statistic"
<- list(statistic = Q,
l parameter = list(df=k-1,blocks=b,treatments=k),
p.value = PVAL,
method = "Cochran test",
data.name = deparse(substitute(x)))
structure(l, class = "htest")
}
3.3 函数测试
对课本上的一些例子使用上述函数进行测试。先载入相关包:
library(DescTools)
library(PMCMRplus)
例5.2.2
Wilcoxon符号秩检验:
<- c(34.3,35.8,35.4,34.8,35.2,35.1,35.0,35.5)-35
x wilcox.test(x,alternative = "greater",mu=0,exact = FALSE)
#>
#> Wilcoxon signed rank test with continuity correction
#>
#> data: x
#> V = 20, p-value = 0.2
#> alternative hypothesis: true location is greater than 0
例5.2.3
Wilcoxon符号秩检验:
<- c(42,51,31,61,44,55,48)
x <- c(38,53,36,52,33,49,36)
y wilcox.test(x,y,mu = 0,alternative = "greater",paired = TRUE)
#>
#> Wilcoxon signed rank test
#>
#> data: x and y
#> V = 24, p-value = 0.05
#> alternative hypothesis: true location shift is greater than 0
例5.3.1
Wilcoxon秩和检验:
<- c(1.20,1.63,2.26,1.87,2.20,1.30)
x <- c(0.94,1.26,1.15)
y wilcox.test(x,y,mu = 0,alternative = "greater",paired = FALSE)
#>
#> Wilcoxon rank sum test
#>
#> data: x and y
#> W = 17, p-value = 0.02
#> alternative hypothesis: true location shift is greater than 0
例5.4.1
Mood检验:
<- c(4.5,6.5,7.0,10.0,12.0)
x <- c(6.0,7.2,8.0,9.0,9.8)
y mood.test(x,y,alternative = "two.sided")
#>
#> Mood two-sample test of scale
#>
#> data: x and y
#> Z = 2, p-value = 0.1
#> alternative hypothesis: two.sided
例5.4.2
平方秩检验:
<- c(74,74,76,79,82,65,86,58)
x <- c(72,73,69,74,68,75,67,76,66)
y square_rank.test(x,y,alternative = "two.sided")
#>
#> Square rank test (with ties)
#>
#> data: x and y
#> square rank test statistic = 1, the number of x = 8, the number of y =
#> 9, the number of x and y = 17, p-value = 0.2
#> alternative hypothesis: two.sided
例5.5.1
kruskal检验:
<- c(73,64,67,62,70)
A <- c(84,80,81,77)
B <- c(82,79,71,75)
C <- list(A,B,C)
x kruskal.test(x)
#>
#> Kruskal-Wallis rank sum test
#>
#> data: x
#> Kruskal-Wallis chi-squared = 8, df = 2, p-value = 0.01
这里课本的\(p\)值好像有误。
Dunn检验进行两两比较:
DunnTest(x)
#>
#> Dunn's test of multiple comparisons using rank sums : holm
#>
#> mean.rank.diff pval
#> 2-1 7.30 0.0156 *
#> 3-1 5.05 0.1065
#> 3-2 -2.25 0.4139
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
例5.5.2
Jonckheere-Terpstra检验:
# 先构建列表
<- c(260,200,240,170,270,205,190,200,250,200)
A <- c(310,310,190,225,170,210,280,210,280,240)
B <- c(225,260,360,310,270,380,240,295,260,250)
C <- list(A,B,C)
x JonckheereTerpstraTest(x,alternative = "increasing")
#> Error in JonckheereTerpstraTest.default(x, alternative = "increasing"): group should be numeric or ordered factor
不知道为什么会报错,查了一下源代码,原因是当参数只给x
作为一个列表时,会自动计算g
,但是计算出来的g
的一个无序的factor
,下面的运算都需要g
是一个有序的factor
,不知道是不是代码的问题。
现手动计算g
传入参数:
<- length(x)
k <- sapply(x,length)
l <-ordered(rep(1:k, l))
g <- unlist(x)
x JonckheereTerpstraTest(x,g,alternative = "increasing")
#> Warning in JonckheereTerpstraTest.default(x, g, alternative = "increasing"): Sample size > 100 or data with ties
#> p-value based on normal approximation. Specify nperm for permutation p-value
#>
#> Jonckheere-Terpstra test
#>
#> data: x and g
#> JT = 224, p-value = 0.002
#> alternative hypothesis: increasing
例5.6.1
Friedman检验:
# 先构建矩阵:行为区组,列为处理
# 每个处理的数据
<- c(14,19,17,17,16,15,18,16)
A <- c(23,25,22,21,24,26,26,22)
B <- c(26,25,29,28,28,27,27,30)
C <- c(30,33,28,27,32,26,36,32)
D <- matrix(c(A,B,C,D),ncol = 4)
y
y#> [,1] [,2] [,3] [,4]
#> [1,] 14 23 26 30
#> [2,] 19 25 25 33
#> [3,] 17 22 29 28
#> [4,] 17 21 28 27
#> [5,] 16 24 28 32
#> [6,] 15 26 27 26
#> [7,] 18 26 27 36
#> [8,] 16 22 30 32
friedman.test(y)
#>
#> Friedman rank sum test
#>
#> data: y
#> Friedman chi-squared = 21, df = 3, p-value = 1e-04
例5.6.2
Page检验:
<- c(40,52,80,52,76,100,34,52,51,35,53,65)
x <- matrix(x,3,4)
x page.test(x,alternative = "increasing")
#>
#> Page test
#>
#> data: x
#> Page test statistic = 55, blocks = 4, treatments = 3, p-value = 0.007
#> alternative hypothesis: increasing
例5.6.3
Cochran检验:
<- c(rep(1,8),0,rep(1,5),0)
x1 <- c(1,rep(0,3),1,1,0,1,0,0,0,rep(1,4))
x2 <- c(0,0,0,1,rep(0,7),1,0,0,0)
x3 <- matrix(as.logical(c(x1,x2,x3)),3,15,byrow = TRUE)
x cochran.test(x)
#>
#> Cochran test
#>
#> data: x
#> Cochran test statistic = 14, df = 2, blocks = 15, treatments = 3,
#> p-value = 9e-04
例5.6.4
Durbin检验:
# 先构建矩阵,无实验数据用NA表示
<- c(3.5,2.9,3.7,NA)
A <- c(3.7,3.1,NA,4.4)
B <- c(4.1,NA,4.9,5.8)
C <- c(NA,4.5,5.7,5.9)
D <- matrix(c(A,B,C,D),ncol = 4)
x durbinTest(x)
#>
#> Durbin's rank sum test for a two-way
#> balanced incomplete block design
#>
#> data: x
#> Durbin chi-squared = 8, df = 3, p-value = 0.06
例5.7.1
Spearman秩相关检验:
<- c(452,318,310,409,405,332,497,321,406,413,334,467)
x <- c(107,147,151,120,123,135,100,143,117,118,141,100)
y cor.test(x,y,alternative = "two.sided",method = "spearman",exact = F)
#>
#> Spearman's rank correlation rho
#>
#> data: x and y
#> S = 563, p-value = 2e-07
#> alternative hypothesis: true rho is not equal to 0
#> sample estimates:
#> rho
#> -0.97
例5.7.2
Kendall \(\tau\)相关检验:
<- c(86,78,65,88,90,90,80,77,76,68,85,70)
x <- c(71,69,62,78,82,75,73,65,66,60,70,61)
y cor.test(x,y,alternative = "two.sided",method = "kendall",exact = F)
#>
#> Kendall's rank correlation tau
#>
#> data: x and y
#> z = 4, p-value = 3e-04
#> alternative hypothesis: true tau is not equal to 0
#> sample estimates:
#> tau
#> 0.809
???\(p\)值又跟书本不同。
例5.7.3
Kendall协同系数检验:
# 先构建矩阵,列为评分机构
<- c(12,9,2,4,10,7,11,6,8,5,3,1)
A <- c(10,1,3,12,8,7,5,9,6,11,4,2)
B <- c(11,8,4,12,2,10,9,7,5,6,3,1)
C <- c(9,1,2,10,12,6,7,4,8,5,11,3)
D <- matrix(c(A,B,C,D),ncol = 4)
x KendallW(x,test = TRUE,correct = TRUE)
#>
#> Kendall's coefficient of concordance Wt
#>
#> data: x
#> Kendall chi-squared = 22, df = 11, subjects = 12, raters = 4, p-value =
#> 0.02
#> alternative hypothesis: Wt is greater 0
#> sample estimates:
#> Wt
#> 0.503
3.4 习题5
5.2
解:
对称中心检验问题: \[ \theta=320 \qquad v.s. \qquad \theta>320 \]
# 载入数据
<- c(310,350,370,375,385,400,415,425,440,295,
x 325,295,250,340,295,365,375,360,385)
wilcox.test(x,alternative = "greater",mu = 320,exact = FALSE)
#>
#> Wilcoxon signed rank test with continuity correction
#>
#> data: x
#> V = 158, p-value = 0.006
#> alternative hypothesis: true location is greater than 320
\(p\)值小于0.05,应当拒绝原假设。
5.3
解:
对称中心检验问题: \[ \theta=10 \qquad v.s. \qquad \theta\ne 10 \]
(1)符号检验:
<- c(22,9,4,5,1,16,15,26,47,8,31,7)
d <- 10
x = sum(d<x)
S1 = sum(d>x)
S2 = sum(d==x)
S0 binom.test(x = S1,n = S1+S2,p = 0.5,alternative = "two.sided")
#>
#> Exact binomial test
#>
#> data: S1 and S1 + S2
#> number of successes = 6, number of trials = 12, p-value = 1
#> alternative hypothesis: true probability of success is not equal to 0.5
#> 95 percent confidence interval:
#> 0.211 0.789
#> sample estimates:
#> probability of success
#> 0.5
\(p\)值巨大,应当接受原假设。
(2)Wilcoxon符号秩检验:
wilcox.test(d,alternative = "two.sided",mu = x,exact = FALSE)
#>
#> Wilcoxon signed rank test with continuity correction
#>
#> data: d
#> V = 53, p-value = 0.3
#> alternative hypothesis: true location is not equal to 10
\(p\)值大于0.05也是接受原假设,但\(p\)值相对于符号检验要小很多。
5.4
解:
成对数据的检验,采用Wilcoxon符号秩检验。 \[ E(D)=0\qquad v.s. \qquad E(D)\ne 0 \]
<- c(78,70,67,81,76,72,85,83)
x <- c(62,58,63,77,80,73,82,78)
y <- wilcox.test(x,y,mu = 0,alternative = "two.sided",paired = TRUE)
w #> Warning in wilcox.test.default(x, y, mu = 0, alternative = "two.sided", : cannot
#> compute exact p-value with ties
w#>
#> Wilcoxon signed rank test with continuity correction
#>
#> data: x and y
#> V = 31, p-value = 0.08
#> alternative hypothesis: true location shift is not equal to 0
有打结:
<- table(rank(abs(x-y)))
t >1]
t[t#> 4
#> 3
只有1个结,长度为3.本来写好了一个函数用于修正统计量,但是扒了源代码发现关于打结的修正依旧包含在函数中了,只是当样本数少于50或打结时会出现提醒.可以利用命令查看源代码:
:::wilcox.test.default stats
所以在显著性水平0.05下仍然接受原假设,即认为幼儿园生活对孩子的社会知识没有影响。
5.5
解:
成对数据的检验,采用Wilcoxon符号秩检验。 \[ E(D)=0\qquad v.s. \qquad E(D)\ne 0 \]
<- c(1149,1152,1176,1149,1155,1169,1182,1160,1129,1171)
x <- c(1116,1130,1184,1194,1184,1147,1125,1125,1166,1151)
y wilcox.test(x,y,alternative = "two.sided",mu = 0,paired = TRUE)
#> Warning in wilcox.test.default(x, y, alternative = "two.sided", mu = 0, : cannot
#> compute exact p-value with ties
#>
#> Wilcoxon signed rank test with continuity correction
#>
#> data: x and y
#> V = 32, p-value = 0.7
#> alternative hypothesis: true location shift is not equal to 0
在显著性水平0.05下,接受原假设,即认为这段时间的股票指数的波动程度相同。
5.6
解:
位置参数的检验: \[ \delta=0\qquad v.s\qquad \delta\ne 0 \]
采用Wilcoxon秩和检验:
<- c(134,146,104,119,124,161,112,83,113,129,97,123)
x <- c(70,118,101,85,107,132,94)
y wilcox.test(x,y,alternative = "two.sided",mu = 0,paired = FALSE) # paired = FALSE
#>
#> Wilcoxon rank sum test
#>
#> data: x and y
#> W = 63, p-value = 0.08
#> alternative hypothesis: true location shift is not equal to 0
在显著性水平0.05下应当接受原假设,即认为两种饲料对雌鼠的体重增加的影响不显著。
5.7
解:
位置参数的检验: \[ \delta=0\qquad v.s\qquad \delta\ne 0 \]
采用Wilcoxon秩和检验:
<- c(52,49,54,47,56,55,45,57,55,54)
x <- c(49,48,39,44,40,50,36,41)
y wilcox.test(x,y,alternative = "two.sided",mu = 0,paired = FALSE)
#> Warning in wilcox.test.default(x, y, alternative = "two.sided", mu = 0, : cannot
#> compute exact p-value with ties
#>
#> Wilcoxon rank sum test with continuity correction
#>
#> data: x and y
#> W = 72, p-value = 0.004
#> alternative hypothesis: true location shift is not equal to 0
在显著性水平0.05下应当拒绝原假设,即认为这两个厂产品的寿命不同。
5.8
解:
先载入数据:
<- c(56,105,63,88,72,112,96,93,65,105,94,87,64,65,68,87)
x <- c(88,94,93,96,99,79,91,94,91,100,99,90,100,110,102,95) y
1.成对数据的检验:
wilcox.test(x,y,mu = 0,alternative = "two.sided",paired = TRUE)
#> Warning in wilcox.test.default(x, y, mu = 0, alternative = "two.sided", : cannot
#> compute exact p-value with ties
#>
#> Wilcoxon signed rank test with continuity correction
#>
#> data: x and y
#> V = 29, p-value = 0.05
#> alternative hypothesis: true location shift is not equal to 0
2.位置参数的检验:
wilcox.test(x,y,mu = 0,alternative = "two.sided",paired = FALSE)
#> Warning in wilcox.test.default(x, y, mu = 0, alternative = "two.sided", : cannot
#> compute exact p-value with ties
#>
#> Wilcoxon rank sum test with continuity correction
#>
#> data: x and y
#> W = 72, p-value = 0.03
#> alternative hypothesis: true location shift is not equal to 0
3.尺度参数的检验:
mood.test(x,y,alternative = "two.sided")
#>
#> Mood two-sample test of scale
#>
#> data: x and y
#> Z = 2, p-value = 0.02
#> alternative hypothesis: two.sided
square_rank.test(x,y,alternative = "two.sided")
#>
#> Square rank test (with ties)
#>
#> data: x and y
#> square rank test statistic = 4, the number of x = 16, the number of y =
#> 16, the number of x and y = 32, p-value = 3e-04
#> alternative hypothesis: two.sided
综上,可以看到在显著性水平0.05下,应当认为两个学科的博士论文页数有显著差异,人均页数显著不同,页数离散程度显著不同。
5.9
解:
先载入数据:
<- c(83,79,83,74,75,74,86,76,84,73,78,77,80,83,78)
x <- c(75,62,58,89,77,81,27,85,72,85,74,100,43,52,75) y
1.成对数据的检验:
wilcox.test(x,y,mu = 0,alternative = "two.sided",paired = TRUE)
#> Warning in wilcox.test.default(x, y, mu = 0, alternative = "two.sided", : cannot
#> compute exact p-value with ties
#>
#> Wilcoxon signed rank test with continuity correction
#>
#> data: x and y
#> V = 82, p-value = 0.2
#> alternative hypothesis: true location shift is not equal to 0
2.位置参数的检验:
wilcox.test(x,y,mu = 0,alternative = "two.sided",paired = FALSE)
#> Warning in wilcox.test.default(x, y, mu = 0, alternative = "two.sided", : cannot
#> compute exact p-value with ties
#>
#> Wilcoxon rank sum test with continuity correction
#>
#> data: x and y
#> W = 142, p-value = 0.2
#> alternative hypothesis: true location shift is not equal to 0
3.尺度参数的检验:
mood.test(x,y,alternative = "two.sided")
#>
#> Mood two-sample test of scale
#>
#> data: x and y
#> Z = -3, p-value = 0.007
#> alternative hypothesis: two.sided
square_rank.test(x,y,alternative = "two.sided")
#>
#> Square rank test (with ties)
#>
#> data: x and y
#> square rank test statistic = -3, the number of x = 15, the number of y
#> = 15, the number of x and y = 30, p-value = 5e-04
#> alternative hypothesis: two.sided
综上,可以看到在显著性水平0.05下,应当认为两个学校的学生成绩没有显著差异,平均成绩没有显著不同,离散程度有显著不同。
5.10
解:
可以考虑尺度参数的双边检验,方差比较低说明加工精度比较高。
<- c(18.0,17.1,16.4,16.9,16.9,16.7,16.7,17.2,17.5,16.9)
x <- c(17.0,16.9,17.0,16.9,17.2,17.1,16.8,17.1,17.1,16.2) y
Mood检验:
mood.test(x,y,alternative = "two.sided")
#>
#> Mood two-sample test of scale
#>
#> data: x and y
#> Z = 2, p-value = 0.1
#> alternative hypothesis: two.sided
平方秩检验:
square_rank.test(x,y,alternative = "two.sided")
#>
#> Square rank test (with ties)
#>
#> data: x and y
#> square rank test statistic = 1, the number of x = 10, the number of y =
#> 10, the number of x and y = 20, p-value = 0.2
#> alternative hypothesis: two.sided
\(p\)值都大于0.05,应当接受原假设,即认为他们的水平(加工精度)一致。
5.11
解:
尺度参数的双边检验。
<- c(8.8,8.2,5.6,4.9,8.9,4.2,3.6,7.1,5.5,8.6,6.3,3.9)
x <- c(13.0,14.5,22.8,20.7,19.6,18.4,21.3,24.2,19.6,11.7) y
Mood检验:
mood.test(x,y,alternative = "two.sided")
#>
#> Mood two-sample test of scale
#>
#> data: x and y
#> Z = -0.5, p-value = 0.6
#> alternative hypothesis: two.sided
平方秩检验:
square_rank.test(x,y,alternative = "two.sided")
#>
#> Square rank test (with ties)
#>
#> data: x and y
#> square rank test statistic = -2, the number of x = 12, the number of y
#> = 10, the number of x and y = 22, p-value = 0.03
#> alternative hypothesis: two.sided
在显著性水平0.05下,mood检验表示应当接受原假设,而平方秩检验表示应当拒绝原假设。
但是mood检验需要要求两总体位置参数相等,利用位置参数的检验:
wilcox.test(x,y,alternative = "two.sided",mu = 0,paired = FALSE)
#> Warning in wilcox.test.default(x, y, alternative = "two.sided", mu = 0, : cannot
#> compute exact p-value with ties
#>
#> Wilcoxon rank sum test with continuity correction
#>
#> data: x and y
#> W = 0, p-value = 9e-05
#> alternative hypothesis: true location shift is not equal to 0
\(p\)值很小,应当拒绝原假设,即认为两总体的位置参数不等。为了使用mood检验,我们需要估计两总体位置参数,进而平移使他们相等。
# 平移两组样本
<- x-mean(x)
x1 <- y-mean(y)
y1 # 再进行mood检验
mood.test(x1,y1,alternative = "two.sided")
#>
#> Mood two-sample test of scale
#>
#> data: x1 and y1
#> Z = -2, p-value = 0.04
#> alternative hypothesis: two.sided
\(p\)值小于0.05,应当拒绝原假设。综上,应当认为两组数据的方差存在差异。
5.12
解:
尺度参数的双边检验。
<- c(8.2,10.7,7.5,14.6,6.3,9.2,11.9,5.6,12.8,5.2,4.9,13.5)
x <- c(4.7,6.3,5.2,6.8,5.6,4.2,6.0,7.4,8.1,6.5) y
先进行位置参数的检验:
wilcox.test(x,y,alternative = "two.sided",mu = 0,paired = FALSE)
#> Warning in wilcox.test.default(x, y, alternative = "two.sided", mu = 0, : cannot
#> compute exact p-value with ties
#>
#> Wilcoxon rank sum test with continuity correction
#>
#> data: x and y
#> W = 92, p-value = 0.03
#> alternative hypothesis: true location shift is not equal to 0
应当拒绝原假设,即认为他们的位置参数不相等。平移数据后再进行mood检验:
# 平移两组样本
<- x-mean(x)
x1 <- y-mean(y)
y1 # 再进行mood检验
mood.test(x1,y1,alternative = "two.sided")
#>
#> Mood two-sample test of scale
#>
#> data: x1 and y1
#> Z = 3, p-value = 0.004
#> alternative hypothesis: two.sided
平方秩检验:
square_rank.test(x,y,alternative = "two.sided")
#> Warning in square_rank.test(x, y, alternative = "two.sided"): The sizes of x and
#> y are less than 10. The test may be inaccurate.
#>
#> Square rank test
#>
#> data: x and y
#> square rank test statistic = 3110, the number of x = 12, the number of
#> y = 10, the number of x and y = 22, p-value = 1e-04
#> alternative hypothesis: two.sided
两个检验都表明应当拒绝原假设,即认为他们的尿酸浓度变异不相同。
5.13
解:
关于位置参数的多样本双边检验问题。
<- c(80,203,236,252,284,368,457,393)
A <- c(133,180,100,160)
B <- c(156,295,320,448,465,481,279)
C <- c(194,214,272,330,386,475)
D <- list(A,B,C,D) x
Kruskal-Waillis检验:
kruskal.test(x)
#>
#> Kruskal-Wallis rank sum test
#>
#> data: x
#> Kruskal-Wallis chi-squared = 8, df = 3, p-value = 0.04
在显著性水平0.05下应当拒绝原假设,即认为这四种药物的治疗效果不同。
5.14
解:
关于位置参数的多样本双边检验问题。
<- c(830,910,940,890,890,960,910,920,900)
A <- c(910,900,810,830,840,830,880,910,890,840)
B <- c(1010,1000,910,930,960,950,940)
C <- c(780,820,810,770,790,810,800,810)
D <- list(A,B,C,D)
x kruskal.test(x)
#>
#> Kruskal-Wallis rank sum test
#>
#> data: x
#> Kruskal-Wallis chi-squared = 26, df = 3, p-value = 1e-05
\(p\)值很小,应当拒绝原假设,即认为每种培育方法的水稻产量不相同。为了比较任意两种方法的水稻产量之间的差异,需要用的Dunn检验:
DunnTest(x)
#>
#> Dunn's test of multiple comparisons using rank sums : holm
#>
#> mean.rank.diff pval
#> 2-1 -6.53 0.2438
#> 3-1 7.74 0.2438
#> 4-1 -17.02 0.0021 **
#> 3-2 14.27 0.0141 *
#> 4-2 -10.49 0.0778 .
#> 4-3 -24.76 8.6e-06 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
在显著性水平0.05下,方法4跟方法1、方法3跟方法2、方法4跟方法3的水稻产量有显著差异。
5.15
关于位置参数的多样本趋势性检验问题。
采用Jonckheere-Terpstra检验:
<- c(125,136,116,101,105,109)
A <- c(122,114,132,120,119,127)
B <- c(128,142,128,134,135,132,140,129)
C <- list(A,B,C)
x <- length(x)
k <- sapply(x,length)
l <-ordered(rep(1:k, l))
g <- unlist(x)
x JonckheereTerpstraTest(x,g)
#> Warning in JonckheereTerpstraTest.default(x, g): Sample size > 100 or data with ties
#> p-value based on normal approximation. Specify nperm for permutation p-value
#>
#> Jonckheere-Terpstra test
#>
#> data: x and g
#> JT = 112, p-value = 0.002
#> alternative hypothesis: two.sided
\(p\)值小于显著性水平0.05,应当拒绝原假设,即认为结论可靠。
5.16
解:
关于位置参数的多样本趋势性检验问题。
采用Jonckheere-Terpstra检验:
<- c(40,35,38,43,44,41)
A <- c(38,40,47,44,40,42)
B <- c(48,40,45,43,46,48,44)
C <- list(A,B,C)
x <- length(x)
k <- sapply(x,length)
l <-ordered(rep(1:k, l))
g <- unlist(x)
x JonckheereTerpstraTest(x,g)
#> Warning in JonckheereTerpstraTest.default(x, g): Sample size > 100 or data with ties
#> p-value based on normal approximation. Specify nperm for permutation p-value
#>
#> Jonckheere-Terpstra test
#>
#> data: x and g
#> JT = 91, p-value = 0.02
#> alternative hypothesis: two.sided
\(p\)值小于显著性水平0.05,应当拒绝原假设,即认为研究者的经验可靠。
5.17
解:
完全区组设计的Friedman检验:
<- c(73,75,67,61,69,79)
A <- c(83,81,99,82,85,87)
B <- c(73,60,73,77,68,74)
C <- c(58,64,64,71,77,74)
D <- c(77,75,73,59,85,82)
E <- matrix(c(A,B,C,D,E),ncol = 5)
y friedman.test(y)
#>
#> Friedman rank sum test
#>
#> data: y
#> Friedman chi-squared = 15, df = 4, p-value = 0.006
\(p\)值小于显著性水平0.05,应当拒绝原假设,即认为面积大小有差异。
5.18
解:
二元区组设计的Cochran检验:
<- c(rep(1,9),0)
A <- c(1,1,0,1,0,1,0,0,1,1)
B <- c(0,0,1,1,0,0,0,0,0,1)
C <- as.logical(c(A,B,C))
x <- matrix(x,nrow = 3,byrow = TRUE)
x cochran.test(x)
#>
#> Cochran test
#>
#> data: x
#> Cochran test statistic = 6, df = 2, blocks = 10, treatments = 3,
#> p-value = 0.05
\(p\) 值略小于显著性水平0.05,应当拒绝原假设,即认为顾客对这三种糕点的爱好并不相同。
5.19
解:
完全区组设计的Page检验,单调上升:
<- c(36,51,71,63,82,128)
x1 <- c(62,91,40,51,33,81)
x2 <- c(53,81,67,75,116,38)
x3 <- c(105,63,49,65,107,33)
x4 <- c(36,46,62,63,42,104)
x5 <- c(118,65,126,96,122,112)
x6 <- c(42,108,123,32,69,102)
x7 <- c(51,63,55,86,41,121)
x8 <- c(114,51,30,109,97,86)
x9 <- matrix(c(x1,x2,x3,x4,x5,x6,x7,x8,x9),ncol=9)
x page.test(x,alternative = "increasing")
#>
#> Page test
#>
#> data: x
#> Page test statistic = 685, blocks = 9, treatments = 6, p-value = 0.2
#> alternative hypothesis: increasing
\(p\)值大于显著性水平0.05,应当接受原假设,即认为论断不正确。
或者利用DescTools
的PageTest
函数:
PageTest(t(x))
#>
#> Page test for ordered alternatives
#>
#> data: t(x)
#> L = 685, p-value = 0.2
\(p\)值相差不大。
5.20
解:
平衡的不完全区组设计的Durbin检验:
<- c(73,74,NA,71)
A <- c(NA,75,67,72)
B <- c(74,75,68,NA)
C <- c(75,NA,72,75)
D <- matrix(c(A,B,C,D),ncol = 4)
x durbinTest(x)
#>
#> Durbin's rank sum test for a two-way
#> balanced incomplete block design
#>
#> data: x
#> Durbin chi-squared = 7, df = 3, p-value = 0.06
\(p\)值大于显著性水平0.05,应当接受原假设,即认为四种饲料的品质无差别。
5.21
解:
相关分析:
# 载入数据
<- c(65,89,67,66,89,85,84,73,88,80,86,75)
x <- c(62,66,50,68,88,86,64,62,92,64,81,80) y
三类相关检验:
cor.test(x,y,alternative = "two.sided",method = "spearman",exact = F)
#>
#> Spearman's rank correlation rho
#>
#> data: x and y
#> S = 90, p-value = 0.01
#> alternative hypothesis: true rho is not equal to 0
#> sample estimates:
#> rho
#> 0.685
cor.test(x,y,alternative = "two.sided",method = "kendall",exact = F)
#>
#> Kendall's rank correlation tau
#>
#> data: x and y
#> z = 2, p-value = 0.02
#> alternative hypothesis: true tau is not equal to 0
#> sample estimates:
#> tau
#> 0.543
Spearman秩相关检验以及Kendall \(\tau\) 检验都表明在显著性水平0.05下应当拒绝原假设,即认为两者具有相关关系。
cor.test(x,y,alternative = "two.sided",method = "pearson")
#>
#> Pearson's product-moment correlation
#>
#> data: x and y
#> t = 3, df = 10, p-value = 0.02
#> alternative hypothesis: true correlation is not equal to 0
#> 95 percent confidence interval:
#> 0.118 0.891
#> sample estimates:
#> cor
#> 0.648
\(p\)值小于显著性水平0.05,应当拒绝原假设,即应当认为两者具有线性相关关系。
5.22
解:
相关分析:
# 载入数据
<- c(75,95,85,70,76,68,60,66,80,88)
x <- c(2.62,2.91,2.94,2.11,2.17,1.98,2.04,2.20,2.65,2.69) y
三类相关检验:
cor.test(x,y,alternative = "two.sided",method = "spearman",exact = F)
#>
#> Spearman's rank correlation rho
#>
#> data: x and y
#> S = 26, p-value = 0.002
#> alternative hypothesis: true rho is not equal to 0
#> sample estimates:
#> rho
#> 0.842
cor.test(x,y,alternative = "two.sided",method = "kendall",exact = F)
#>
#> Kendall's rank correlation tau
#>
#> data: x and y
#> z = 3, p-value = 0.006
#> alternative hypothesis: true tau is not equal to 0
#> sample estimates:
#> tau
#> 0.689
Spearman秩相关检验以及Kendall \(\tau\) 检验都表明在显著性水平0.05下应当拒绝原假设,即认为两者具有相关关系。
cor.test(x,y,alternative = "two.sided",method = "pearson")
#>
#> Pearson's product-moment correlation
#>
#> data: x and y
#> t = 5, df = 8, p-value = 8e-04
#> alternative hypothesis: true correlation is not equal to 0
#> 95 percent confidence interval:
#> 0.558 0.971
#> sample estimates:
#> cor
#> 0.879
\(p\)值小于显著性水平0.05,应当拒绝原假设,即应当认为两者具有线性相关关系。
5.23
解:
Kendall协同系数检验:
<- c(9,2,4,10,7,6,8,5,3,1)
A <- c(10,1,3,8,7,5,9,6,4,2)
B <- c(8,4,2,10,9,7,5,6,3,1)
C <- c(9,1,2,10,6,7,4,8,5,3)
D <- matrix(c(A,B,C,D),ncol = 4)
x KendallW(x,test = TRUE,correct = TRUE)
#>
#> Kendall's coefficient of concordance Wt
#>
#> data: x
#> Kendall chi-squared = 31, df = 9, subjects = 10, raters = 4, p-value =
#> 3e-04
#> alternative hypothesis: Wt is greater 0
#> sample estimates:
#> Wt
#> 0.853
\(p\)值很小,应当拒绝原假设,即认为这些排序产生较为一致的效果。
5.24
解:
线性相关分析:
<- c(58.8,61.4,71.3,74.4,76.7,70.7,57.5,46.4,39.1,48.5,70.0,70.1)
x <- c(8.4,9.27,8.73,6.36,8.50,7.82,9.14,8.24,9.57,9.58,8.11,6.83)
y cor.test(x,y,alternative = "two.sided",method = "pearson")
#>
#> Pearson's product-moment correlation
#>
#> data: x and y
#> t = -3, df = 10, p-value = 0.03
#> alternative hypothesis: true correlation is not equal to 0
#> 95 percent confidence interval:
#> -0.8822 -0.0789
#> sample estimates:
#> cor
#> -0.625
\(p\)值小于显著性水平0.05,应当拒绝原假设,即认为两者存在线性关系。