Standardized mean difference (SMD) is the most commonly used statistic to examine the balance of covariate distribution between treatment groups. Because SMD is independent of the unit of measurement, it allows comparison between variables with different unit of measurement. (Zhang Z et al. 2019)
- to create a simulated dataset
psSim<-function(CatVarN=2,ContVarN=2,
seed=123,n=1000){
set.seed(seed);
Xcont <- replicate(ContVarN, rnorm(n))
Xcat <- replicate(CatVarN, rbinom(n,size = 1,prob = 0.3))
linpredT<-cbind(1, Xcont,Xcat) %*%
sample(c(-5:-1,1:5), 1+CatVarN+ContVarN) +
rnorm(n,-0.8,1)
probTreatment <- exp(linpredT) / (1 + exp(linpredT))
Treat <- rbinom(n, 1, probTreatment);
linpredY <- 1 + cbind(Xcont,Xcat) %*%
rep(1, CatVarN+ContVarN) +
Treat + rnorm(n, -2, 2);
prY = 1/(1+exp(-linpredY));
mort <- rbinom(n,1,prY);
dt <- data.frame(Xcont=Xcont,Xcat=Xcat,Treat, mort)
return(dt)
}
dt<-psSim()
str(dt)
## 'data.frame': 1000 obs. of 6 variables:
## $ Xcont.1: num -0.5605 -0.2302 1.5587 0.0705 0.1293 ...
## $ Xcont.2: num -0.996 -1.04 -0.018 -0.132 -2.549 ...
## $ Xcat.1 : int 0 1 0 1 0 0 1 0 0 1 ...
## $ Xcat.2 : int 0 1 1 0 0 0 0 0 1 0 ...
## $ Treat : int 0 0 0 0 0 0 0 1 0 0 ...
## $ mort : int 0 1 1 0 1 0 1 0 1 1 ...
- to examine the balance of covariates between treated and untreated groups
library(tableone)
myVars <- names(dt)[1:4]
tabbefore <- CreateTableOne(vars = myVars,
data = dt,
strata = 'Treat',
factorVars = c('Xcat.1','Xcat.2'),
smd = T)
tabbefore <- print(tabbefore,
printToggle = FALSE,
noSpaces = TRUE, smd=TRUE,
quote=T)
tabbefore
## "Stratified by Treat"
## "" "0" "1" "p" "test" "SMD"
## "n" "766" "234" "" "" ""
## "Xcont.1 (mean (SD))" "0.12 (0.99)" "-0.32 (0.94)" "<0.001" "" "0.455"
## "Xcont.2 (mean (SD))" "-0.25 (0.89)" "1.00 (0.77)" "<0.001" "" "1.499"
## "Xcat.1 = 1 (%)" "263 (34.3)" "19 (8.1)" "<0.001" "" "0.677"
## "Xcat.2 = 1 (%)" "279 (36.4)" "23 (9.8)" "<0.001" "" "0.665"
- PSM
#install.packages("MatchIt")
library(MatchIt);
m.out<-matchit(Treat~ Xcont.1+Xcont.2+Xcat.1+Xcat.2,
dt, method = "nearest", caliper=0.1)
- SMD
#install.packages("cobalt")
library(cobalt)
bal.tab(m.out,m.threshold=0.1)
## Call
## matchit(formula = Treat ~ Xcont.1 + Xcont.2 + Xcat.1 + Xcat.2,
## data = dt, method = "nearest", caliper = 0.1)
##
## Balance Measures
## Type Diff.Adj M.Threshold
## distance Distance 0.0455 Balanced, <0.1
## Xcont.1 Contin. 0.0271 Balanced, <0.1
## Xcont.2 Contin. 0.0506 Balanced, <0.1
## Xcat.1 Binary -0.0114 Balanced, <0.1
## Xcat.2 Binary 0.0000 Balanced, <0.1
##
## Balance tally for mean differences
## count
## Balanced, <0.1 5
## Not Balanced, >0.1 0
##
## Variable with the greatest mean difference
## Variable Diff.Adj M.Threshold
## Xcont.2 0.0506 Balanced, <0.1
##
## Sample sizes
## Control Treated
## All 766 234
## Matched 88 88
## Unmatched 678 146
- Visualization the distribution changes before and after matching
bal.plot(m.out,var.name = 'Xcont.2',which = 'both')
bal.plot(m.out,var.name = 'Xcat.1',which = 'both')
- Publication quality plot (change the temporary variable names)
v <- data.frame(old = c("Xcont.1","Xcont.2","Xcat.1","Xcat.2"),
new = c("Age", "WBC", "Gender",'Surgery'))
love.plot(bal.tab(m.out, m.threshold=0.1),
stat = "mean.diffs", var.names = v, abs = F)
## Warning: Standardized mean differences and raw mean differences are present in the same plot.
## Use the 'stars' argument to distinguish between them and appropriately label the x-axis.
- to compare difference after PSM (refer to tabbefore)
df.match <- match.data(m.out)[1:ncol(dt)]
tabafter <- CreateTableOne(vars = myVars,
data = df.match,
strata = 'Treat',
factorVars = c('Xcat.1','Xcat.2'),
smd = T)
tabafter <- print(tabafter,
printToggle = FALSE,
noSpaces = TRUE,smd=TRUE)
tabafter;
## Stratified by Treat
## 0 1 p test SMD
## n "88" "88" "" "" ""
## Xcont.1 (mean (SD)) "-0.18 (0.93)" "-0.15 (1.04)" "0.864" "" "0.026"
## Xcont.2 (mean (SD)) "0.50 (0.67)" "0.54 (0.76)" "0.719" "" "0.054"
## Xcat.1 = 1 (%) "9 (10.2)" "8 (9.1)" "1.000" "" "0.038"
## Xcat.2 = 1 (%) "12 (13.6)" "12 (13.6)" "1.000" "" "<0.001"
8.1 SMD calculation in CreateTableOne()
abs((mean(df.match[df.match$Treat==1,'Xcont.1'])-
mean(df.match[df.match$Treat==0,'Xcont.1'])))/
sqrt((var(df.match[df.match$Treat==1,'Xcont.1'])+
var(df.match[df.match$Treat==0,'Xcont.1']))/2)
## [1] 0.02581165
8.2 SMD calculation in tab():
(mean(df.match[df.match$Treat==1,'Xcont.1'])-
mean(df.match[df.match$Treat==0,'Xcont.1']))/
sqrt((var(dt[dt$Treat==1,'Xcont.1'])))
## [1] 0.02705606
- Variance ratio
Note: A variance ratio of 1 in matched sample indicates a good matching, and a variance ratio below 2 is generally acceptable.
bal.tab(m.out,v.threshold=2)
## Call
## matchit(formula = Treat ~ Xcont.1 + Xcont.2 + Xcat.1 + Xcat.2,
## data = dt, method = "nearest", caliper = 0.1)
##
## Balance Measures
## Type Diff.Adj V.Ratio.Adj V.Threshold
## distance Distance 0.0455 1.0950 Balanced, <2
## Xcont.1 Contin. 0.0271 1.2512 Balanced, <2
## Xcont.2 Contin. 0.0506 1.2954 Balanced, <2
## Xcat.1 Binary -0.0114 .
## Xcat.2 Binary 0.0000 .
##
## Balance tally for variance ratios
## count
## Balanced, <2 3
## Not Balanced, >2 0
##
## Variable with the greatest variance ratio
## Variable V.Ratio.Adj V.Threshold
## Xcont.2 1.2954 Balanced, <2
##
## Sample sizes
## Control Treated
## All 766 234
## Matched 88 88
## Unmatched 678 146
- Prognostic score for assessing balance
The prognostic score is defined as the predicted probability of outcome under the control condition. It can be estimated by regressing the outcome on covariates in the control group. Then that fitted model is used to predict outcome for all subjects.
ctrl.data <- dt[dt$Treat == 0,]
ctrl.fit <- glm(mort ~ Xcont.1+Xcont.2+Xcat.1+Xcat.2,
data = ctrl.data)
dt$prog.score <- predict(ctrl.fit, dt)
bal.tab(m.out, distance = dt["prog.score"])
## Call
## matchit(formula = Treat ~ Xcont.1 + Xcont.2 + Xcat.1 + Xcat.2,
## data = dt, method = "nearest", caliper = 0.1)
##
## Balance Measures
## Type Diff.Adj
## prog.score Distance 0.0360
## distance Distance 0.0455
## Xcont.1 Contin. 0.0271
## Xcont.2 Contin. 0.0506
## Xcat.1 Binary -0.0114
## Xcat.2 Binary 0.0000
##
## Sample sizes
## Control Treated
## All 766 234
## Matched 88 88
## Unmatched 678 146
- to identify evidence of model misspecification
library(car)
mod1<-glm(Treat~Xcont.1+Xcont.2+Xcat.1+Xcat.2,
dt,family = binomial)
residualPlots(mod1,terms=~Xcont.1+Xcont.2,fitted=T)
## Test stat Pr(>|Test stat|)
## Xcont.1 0.5216 0.4701
## Xcont.2 0.6596 0.4167
Reference:
Zhang Z, Kim HJ, Lonjon G, Zhu Y; written on behalf of AME Big-Data Clinical Trial Collaborative Group. Balance diagnostics after propensity score matching. Ann Transl Med 2019;7(1):16. doi: 10.21037/atm.2018.12.10