传统的线性回归模型描述了因变量的条件均值分布与自变量X的关系,为了和分位数回归相区别,把传统的回归又称为均值回归(Mean Regression)。其中,OLS是估计回归系数的最基本的方法。
分位数回归是估计一组回归变量X与被解释变量Y的分位数之间关系的建模方法。
问题的提出:恩格尔定律
德国统计学家恩格尔(Engel)使用收集的235个比利时家庭的收入与食物支出数据,得出了著名的恩格尔定律:收入越高的家庭将其收入用于食物支出的比例越低。
#install.packages('quantreg')
library(quantreg)
data(engel)
dim(engel)
## [1] 235 2
head(engel)
## income foodexp
## 1 420.1577 255.8394
## 2 541.4117 310.9587
## 3 901.1575 485.6800
## 4 639.0802 402.9974
## 5 750.8756 495.5608
## 6 945.7989 633.7978
class(engel)
## [1] "data.frame"
typeof(engel)
## [1] "list"
mode(engel)
## [1] "list"
#install.packages('quantreg')
library(quantreg)
# data(engel)
# dim(engel)
# head(engel)
# class(engel)
# typeof(engel)
# mode(engel)
# plot of engel data and some rq lines see KB(1982) for references to data
data(engel)
attach(engel)
plot(income,foodexp,xlab="Household Income",ylab="Food Expenditure",type = "n", cex=.5)
points(income,foodexp,cex=.5,col="blue")
#中位数回归
q_0.5 <- rq(foodexp ~ income, tau=0.5, data=engel)
summary(q_0.5)
##
## Call: rq(formula = foodexp ~ income, tau = 0.5, data = engel)
##
## tau: [1] 0.5
##
## Coefficients:
## coefficients lower bd upper bd
## (Intercept) 81.48225 53.25915 114.01156
## income 0.56018 0.48702 0.60199
#分位数(.05,.1,.25,.75,.9,.95)上的分位数回归
taus <- c(.05,.1,.25,.75,.9,.95)
xx <- seq(min(income),max(income),100)
f1 <- rq((foodexp)~(income),tau=taus)
f2 <- coef(f1)
summary(f1)
##
## Call: rq(formula = (foodexp) ~ (income), tau = taus)
##
## tau: [1] 0.05
##
## Coefficients:
## coefficients lower bd upper bd
## (Intercept) 124.88004 98.30212 130.51695
## income 0.34336 0.34333 0.38975
##
## Call: rq(formula = (foodexp) ~ (income), tau = taus)
##
## tau: [1] 0.1
##
## Coefficients:
## coefficients lower bd upper bd
## (Intercept) 110.14157 79.88753 146.18875
## income 0.40177 0.34210 0.45079
##
## Call: rq(formula = (foodexp) ~ (income), tau = taus)
##
## tau: [1] 0.25
##
## Coefficients:
## coefficients lower bd upper bd
## (Intercept) 95.48354 73.78608 120.09847
## income 0.47410 0.42033 0.49433
##
## Call: rq(formula = (foodexp) ~ (income), tau = taus)
##
## tau: [1] 0.75
##
## Coefficients:
## coefficients lower bd upper bd
## (Intercept) 62.39659 32.74488 107.31362
## income 0.64401 0.58016 0.69041
##
## Call: rq(formula = (foodexp) ~ (income), tau = taus)
##
## tau: [1] 0.9
##
## Coefficients:
## coefficients lower bd upper bd
## (Intercept) 67.35087 37.11802 103.17399
## income 0.68630 0.64937 0.74223
##
## Call: rq(formula = (foodexp) ~ (income), tau = taus)
##
## tau: [1] 0.95
##
## Coefficients:
## coefficients lower bd upper bd
## (Intercept) 64.10396 46.26495 83.57896
## income 0.70907 0.67390 0.73444
summary(f2)
## tau= 0.05 tau= 0.10 tau= 0.25 tau= 0.75
## Min. : 0.3434 Min. : 0.4018 Min. : 0.4741 Min. : 0.644
## 1st Qu.: 31.4775 1st Qu.: 27.8367 1st Qu.:24.2265 1st Qu.:16.082
## Median : 62.6117 Median : 55.2717 Median :47.9788 Median :31.520
## Mean : 62.6117 Mean : 55.2717 Mean :47.9788 Mean :31.520
## 3rd Qu.: 93.7459 3rd Qu.: 82.7066 3rd Qu.:71.7312 3rd Qu.:46.958
## Max. :124.8800 Max. :110.1416 Max. :95.4835 Max. :62.397
## tau= 0.90 tau= 0.95
## Min. : 0.6863 Min. : 0.7091
## 1st Qu.:17.3524 1st Qu.:16.5578
## Median :34.0186 Median :32.4065
## Mean :34.0186 Mean :32.4065
## 3rd Qu.:50.6847 3rd Qu.:48.2552
## Max. :67.3509 Max. :64.1040
f <- coef(rq((foodexp)~(income),tau=taus)) # 提取分位数回归系数
yy <- cbind(1,xx)%*%f # 计算分位数回归拟合值
for(i in 1:length(taus)){
lines(xx,yy[,i],col = "gray")
}
abline(lm(foodexp ~ income),col="red",lty = 2)
abline(rq(foodexp ~ income), col="blue")
legend(3500,620,c("mean (LSE) fit", "median (LAE) fit"), col = c("red","blue"), lty = c(2,1))
#Example of plotting of coefficients and their confidence bands
plot(summary(rq(foodexp~income,tau = 1:49/50,data=engel)))
#Example to illustrate inequality constrained fitting
n <- 100
p <- 5
X <- matrix(rnorm(n*p),n,p)
y <- .95*apply(X,1,sum)+rnorm(n)
#constrain slope coefficients to lie between zero and one
R <- cbind(0,rbind(diag(p),-diag(p)))
r <- c(rep(0,p),-rep(1,p))
rq(y~X,R=R,r=r,method="fnc")
## Call:
## rq(formula = y ~ X, method = "fnc", R = R, r = r)
##
## Coefficients:
## (Intercept) X1 X2 X3 X4 X5
## 0.1819726 0.9323909 1.0000000 0.8925110 1.0000000 0.9920904
##
## Degrees of freedom: 100 total; 94 residual
# ?rq