The echoice2
package is available from github. In some version of install_github
, warnings are converted to errors, which might prevent succesfull installation. Setting the corresponding environment variable to true will resolve the issue.
Sys.setenv("R_REMOTES_NO_ERRORS_FROM_WARNINGS" = "true")
remotes::install_github("ninohardt/echoice2")
Once installed, load packages echoice2
and tidyverse
:
suppressPackageStartupMessages(library(tidyverse))
library(echoice2)
Choice data should be provided in a `long’ format, i.e. one row per alternative, choice task and respondent.
load('data/pizza_long.rdata')
For hold-out validation, we keep 1 task per respondent. In v-fold cross-validation, this is done several times. However, each re-run of the model may take a while. For this example, we only use 1 set of holdout tasks. Hold-out tasks shown in this vignette may be different from those shown in the paper - however, the superiority of the proposed model should hold.
set.seed(1.2335252)
pizza_ho_tasks=
pizza_long %>%
distinct(id,task) %>%
mutate(id=as.integer(id))%>%
group_by(id) %>%
summarise(task=sample(task,1))
set.seed(NULL)
pizza_cal= pizza_long %>% mutate(id=as.integer(id)) %>%
anti_join(pizza_ho_tasks, by=c('id','task'))
pizza_ho= pizza_long %>% mutate(id=as.integer(id)) %>%
semi_join(pizza_ho_tasks, by=c('id','task'))
Estimate both models using 1M draws.
#compensatory
out_pizza_cal = pizza_cal %>% vd_est_vdmn(R=1000000, keep=50)
save(out_pizza_cal,file='draws/out_pizza_cal.rdata')
#conjunctive screening
out_pizza_screening_cal = pizza_cal %>% vd_est_vdm_screenpr(R=1000000, keep=50)
save(out_pizza_screening_cal,file='draws/out_pizza_screening_cal.rdata')
I draws have already been saved, no beed to re-run estimation.
load('draws/out_pizza_cal.rdata')
load('draws/out_pizza_screening_cal.rdata')
out_pizza_cal_ = vd_thin_draw(out_pizza_cal, .2, 5000)
out_pizza_screening_cal_ = vd_thin_draw(out_pizza_screening_cal, .4, 5000)
Quick check of convergence.
Compensatory:
out_pizza_cal_ %>% ec_trace_MU(burnin = 100)
Conjunctive Screening:
out_pizza_screening_cal_ %>% ec_trace_MU(burnin = 100)
First, we compare in-sample fit. The proposed model fits a lot better.
list(compensatory=out_pizza_cal_,
conjunctive=out_pizza_screening_cal_) %>%
map_dfr(ec_lmd_NR, .id = 'model') %>%
filter(part==1) %>% select(-part)
## # A tibble: 2 x 2
## model lmd
## <chr> <dbl>
## 1 compensatory -8368.
## 2 conjunctive -7851.
Now, we compare out of sample fit. For illustration purposes, only one fold is used for holdout fit. Moreover, only 5000 draws and 5000 simulated error terms are used.
seeed=5959
#generate predictions
ho_dem_vd=
pizza_ho %>%
prep_newprediction(pizza_cal) %>%
vd_dem_vdmn(out_pizza_cal_,
ec_gen_err_normal(pizza_ho, out_pizza_cal_, seed=seeed))
## Using 16 cores
## Computation in progress
ho_dem_vdsrpr=
pizza_ho %>%
prep_newprediction(pizza_cal) %>%
vd_dem_vdmsrpr(out_pizza_screening_cal_,
ec_gen_err_normal(pizza_ho, out_pizza_screening_cal_, seed=seeed))
## Using 16 cores
## Computation in progress
#evaluate
list(compensatory=ho_dem_vd,
conjunctive=ho_dem_vdsrpr) %>%
map_dfr(.%>%
vd_dem_summarise() %>% select(id:cheese, .pred=`E(demand)`) %>%
mutate(pmMSE=(x-.pred)^2,
pmMAE=abs(x-.pred),
pmbias=.pred-x) %>%
summarise(MSE=mean(pmMSE),
MAE=mean(pmMAE),
bias=mean(pmbias)),
.id = 'model')
## # A tibble: 2 x 4
## model MSE MAE bias
## <chr> <dbl> <dbl> <dbl>
## 1 compensatory 1.36 0.585 0.105
## 2 conjunctive 1.23 0.552 0.118
out_pizza_cal %>% ec_estimates_MU()
## Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if `.name_repair` is omitted as of tibble 2.0.0.
## Using compatibility `.name_repair`.
## # A tibble: 20 x 12
## attribute lvl par mean sd `CI-5%` `CI-95%` sig model error
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <lgl> <chr> <chr>
## 1 <NA> <NA> int -2.79 0.124 -2.99 -2.60 TRUE VD-c~ Norm~
## 2 brand Fresc bran~ -0.351 0.0976 -0.514 -0.191 TRUE VD-c~ Norm~
## 3 brand Priv bran~ -0.686 0.101 -0.853 -0.524 TRUE VD-c~ Norm~
## 4 brand RedBa bran~ -0.603 0.0991 -0.766 -0.441 TRUE VD-c~ Norm~
## 5 brand Tomb bran~ -0.664 0.0938 -0.821 -0.513 TRUE VD-c~ Norm~
## 6 brand Tony bran~ -1.05 0.0998 -1.21 -0.891 TRUE VD-c~ Norm~
## 7 size ForTwo size~ 0.605 0.0679 0.496 0.717 TRUE VD-c~ Norm~
## 8 crust StufCr crus~ -0.142 0.0693 -0.256 -0.0282 TRUE VD-c~ Norm~
## 9 crust Thin crus~ -0.102 0.0730 -0.222 0.0174 FALSE VD-c~ Norm~
## 10 crust TrCr crus~ 0.00902 0.0624 -0.0935 0.112 FALSE VD-c~ Norm~
## 11 topping HI topp~ -0.628 0.115 -0.819 -0.440 TRUE VD-c~ Norm~
## 12 topping Pepperoni topp~ 0.208 0.0902 0.0603 0.356 TRUE VD-c~ Norm~
## 13 topping PepSauHam topp~ 0.153 0.0905 0.00340 0.301 TRUE VD-c~ Norm~
## 14 topping Surp topp~ -0.0123 0.0947 -0.170 0.143 FALSE VD-c~ Norm~
## 15 topping Veg topp~ -0.655 0.0933 -0.810 -0.502 TRUE VD-c~ Norm~
## 16 coverage ModCover cove~ -0.0540 0.0499 -0.136 0.0273 FALSE VD-c~ Norm~
## 17 cheese realchee~ chee~ 0.120 0.0501 0.0394 0.202 TRUE VD-c~ Norm~
## 18 <NA> <NA> sigma -0.255 0.0537 -0.340 -0.170 TRUE VD-c~ Norm~
## 19 <NA> <NA> gamma -0.460 0.0695 -0.573 -0.347 TRUE VD-c~ Norm~
## 20 <NA> <NA> E 3.61 0.0706 3.49 3.72 TRUE VD-c~ Norm~
## # ... with 2 more variables: reference_lvl <chr>, parameter <chr>
out_pizza_screening_cal %>% ec_estimates_MU()
## # A tibble: 20 x 12
## attribute lvl par mean sd `CI-5%` `CI-95%` sig model error
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <lgl> <chr> <chr>
## 1 <NA> <NA> int -2.17 0.135 -2.39 -1.96 TRUE VD-c~ Norm~
## 2 brand Fresc brand~ -0.167 0.107 -0.348 0.00464 FALSE VD-c~ Norm~
## 3 brand Priv brand~ -0.472 0.114 -0.663 -0.289 TRUE VD-c~ Norm~
## 4 brand RedBa brand~ -0.404 0.112 -0.592 -0.226 TRUE VD-c~ Norm~
## 5 brand Tomb brand~ -0.480 0.112 -0.667 -0.299 TRUE VD-c~ Norm~
## 6 brand Tony brand~ -0.693 0.122 -0.894 -0.497 TRUE VD-c~ Norm~
## 7 size ForTwo size:~ 0.679 0.0742 0.560 0.798 TRUE VD-c~ Norm~
## 8 crust StufCr crust~ -0.140 0.0779 -0.270 -0.0142 TRUE VD-c~ Norm~
## 9 crust Thin crust~ -0.0559 0.0788 -0.184 0.0749 FALSE VD-c~ Norm~
## 10 crust TrCr crust~ 0.0354 0.0683 -0.0767 0.148 FALSE VD-c~ Norm~
## 11 topping HI toppi~ 0.0639 0.115 -0.123 0.252 FALSE VD-c~ Norm~
## 12 topping Pepperoni toppi~ 0.276 0.0864 0.134 0.418 TRUE VD-c~ Norm~
## 13 topping PepSauHam toppi~ 0.336 0.0890 0.191 0.481 TRUE VD-c~ Norm~
## 14 topping Surp toppi~ 0.297 0.0908 0.145 0.444 TRUE VD-c~ Norm~
## 15 topping Veg toppi~ -0.319 0.108 -0.496 -0.139 TRUE VD-c~ Norm~
## 16 coverage ModCover cover~ -0.0764 0.0559 -0.168 0.0148 FALSE VD-c~ Norm~
## 17 cheese realcheese chees~ 0.126 0.0541 0.0372 0.216 TRUE VD-c~ Norm~
## 18 <NA> <NA> sigma -0.0881 0.0556 -0.180 0.00262 FALSE VD-c~ Norm~
## 19 <NA> <NA> gamma -0.0444 0.0760 -0.171 0.0773 FALSE VD-c~ Norm~
## 20 <NA> <NA> E 3.46 0.0681 3.35 3.57 TRUE VD-c~ Norm~
## # ... with 2 more variables: reference_lvl <chr>, parameter <chr>
out_pizza_screening_cal %>% ec_boxplot_MU()
out_pizza_screening_cal %>% ec_estimates_screen()
## Joining, by = "par"
## # A tibble: 22 x 8
## attribute lvl par mean sd `CI-5%` `CI-95%` limit
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 brand DiGi brand:DiGi 0.0333 0.0223 9.14e-3 0.0632 0.0718
## 2 brand Fresc brand:Fresc 0.103 0.0332 5.41e-2 0.155 0.182
## 3 brand Priv brand:Priv 0.170 0.0406 1.07e-1 0.236 0.287
## 4 brand RedBa brand:RedBa 0.136 0.0377 7.79e-2 0.196 0.227
## 5 brand Tomb brand:Tomb 0.159 0.0480 7.77e-2 0.232 0.282
## 6 brand Tony brand:Tony 0.280 0.0560 1.87e-1 0.363 0.420
## 7 cheese NoInfo cheese:NoInfo 0.00647 0.0168 3.45e-4 0.0175 0.0110
## 8 cheese realcheese cheese:realchee~ 0.00634 0.0166 3.48e-4 0.0169 0.00552
## 9 coverage densetop coverage:denset~ 0.0131 0.0193 1.05e-3 0.0315 0.0166
## 10 coverage ModCover coverage:ModCov~ 0.00649 0.0167 3.63e-4 0.0170 0.0110
## # ... with 12 more rows
out_pizza_screening_cal %>% ec_boxplot_screen()
Side-by-side part-worths of the volumetric demand models
list(compensatory=out_pizza_cal,
conjunctive =out_pizza_screening_cal) %>%
map_dfr(ec_estimates_MU,.id='model') %>%
select(model, attribute, lvl, par, mean) %>%
pivot_wider(names_from = model, values_from = mean)
## # A tibble: 20 x 5
## attribute lvl par compensatory conjunctive
## <chr> <chr> <chr> <dbl> <dbl>
## 1 <NA> <NA> int -2.79 -2.17
## 2 brand Fresc brand:Fresc -0.351 -0.167
## 3 brand Priv brand:Priv -0.686 -0.472
## 4 brand RedBa brand:RedBa -0.603 -0.404
## 5 brand Tomb brand:Tomb -0.664 -0.480
## 6 brand Tony brand:Tony -1.05 -0.693
## 7 size ForTwo size:ForTwo 0.605 0.679
## 8 crust StufCr crust:StufCr -0.142 -0.140
## 9 crust Thin crust:Thin -0.102 -0.0559
## 10 crust TrCr crust:TrCr 0.00902 0.0354
## 11 topping HI topping:HI -0.628 0.0639
## 12 topping Pepperoni topping:Pepperoni 0.208 0.276
## 13 topping PepSauHam topping:PepSauHam 0.153 0.336
## 14 topping Surp topping:Surp -0.0123 0.297
## 15 topping Veg topping:Veg -0.655 -0.319
## 16 coverage ModCover coverage:ModCover -0.0540 -0.0764
## 17 cheese realcheese cheese:realcheese 0.120 0.126
## 18 <NA> <NA> sigma -0.255 -0.0881
## 19 <NA> <NA> gamma -0.460 -0.0444
## 20 <NA> <NA> E 3.61 3.46
testm_pizza =
tibble(
id=1L,task=1L,alt=1:6,
brand= c("DiGi", "Fresc", "Priv", "RedBa", "Tomb", "Tony"),
size= "forOne",
crust="Thin",
topping="Veg",
coverage="ModCover",
cheese="NoInfo",
p=c(3.5,3,2,2,2,1.5)
) %>% prep_newprediction(pizza_long)
testmarket=
tibble(
id = rep(seq_len(n_distinct(pizza_long$id)),each=nrow(testm_pizza)),
task = 1,
alt = rep(1:nrow(testm_pizza),n_distinct(pizza_long$id))) %>%
bind_cols(
testm_pizza[rep(1:nrow(testm_pizza),n_distinct(pizza_long$id)),-(1:3)]
)
focal_alternatives =
testmarket %>% transmute(focal=brand=='Priv') %>% pull(focal)
#pre-sim error terms
eps_not <- testmarket %>% ec_gen_err_normal(out_pizza_cal_, 55667)
#demand curve compensatory
vd_demc_comp =
testmarket %>%
ec_demcurve(focal_alternatives,
seq(0.5,1.5,,9),
vd_dem_vdmn,
out_pizza_cal_,
eps_not)
## Computation in progress
## Computation in progress
## Computation in progress
## Computation in progress
## Computation in progress
## Computation in progress
## Computation in progress
## Computation in progress
## Computation in progress
#demand curve conjunctive screening
vd_demc_screenpr =
testmarket %>%
ec_demcurve(focal_alternatives,
seq(0.5,1.5,,9),
vd_dem_vdmsrpr,
out_pizza_screening_cal_,
eps_not)
## Computation in progress
## Computation in progress
## Computation in progress
## Computation in progress
## Computation in progress
## Computation in progress
## Computation in progress
## Computation in progress
## Computation in progress
#combine demand curves from both models
vd_outputs=rbind(
vd_demc_comp %>% do.call('rbind',.) %>% bind_cols(model='comp') %>% bind_cols(demand='volumetric'),
vd_demc_screenpr %>% do.call('rbind',.) %>% bind_cols(model='screenpr') %>% bind_cols(demand='volumetric'))
Plotting demand curves:
vd_outputs%>%
ggplot(aes(x=scenario, y=`E(demand)`, color=brand)) + geom_line() + facet_wrap(~model)+
xlab("Price (as % of original)") + scale_x_continuous(labels = scales::percent_format(), n.breaks = 5)
vd_outputs%>%
filter(brand=="Priv") %>%
ggplot(aes(x=scenario, y=`E(demand)`, color=model)) + geom_line() +
xlab("Price (as % of original)") + scale_x_continuous(labels = scales::percent_format(), n.breaks = 5)
While demand curves look similar, incidence curves reveal that drastic price decreases lead to a smaller increase in people buying when accounting for screening:
#demand curve compensatory
vd_demc_comp_inci =
testmarket %>%
ec_demcurve_inci(focal_alternatives,
seq(0.25,1.5,,9),
vd_dem_vdmn,
out_pizza_cal_,
eps_not)
## Computation in progress
## Computation in progress
## Computation in progress
## Computation in progress
## Computation in progress
## Computation in progress
## Computation in progress
## Computation in progress
## Computation in progress
#demand curve conjunctive screening
vd_demc_screenpr_inci =
testmarket %>%
ec_demcurve_inci(focal_alternatives,
seq(0.25,1.5,,9),
vd_dem_vdmsrpr,
out_pizza_screening_cal_,
eps_not)
## Computation in progress
## Computation in progress
## Computation in progress
## Computation in progress
## Computation in progress
## Computation in progress
## Computation in progress
## Computation in progress
## Computation in progress
#combine demand curves from both models
vd_outputs_inci=rbind(
vd_demc_comp_inci %>% do.call('rbind',.) %>% bind_cols(model='comp') %>% bind_cols(demand='volumetric'),
vd_demc_screenpr_inci %>% do.call('rbind',.) %>% bind_cols(model='screenpr') %>% bind_cols(demand='volumetric'))
vd_outputs_inci%>%
ggplot(aes(x=scenario, y=`E(demand)`, color=brand)) + geom_line() + facet_wrap(~model)+
xlab("Price (as % of original)") + scale_x_continuous(labels = scales::percent_format(), n.breaks = 9)