Skip to content

Commit f695f71

Browse files
committed
merge main into 1.0.X
2 parents c572221 + afc2efe commit f695f71

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+2562
-161
lines changed

.github/workflows/check_styler.yml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ jobs:
2222
fail-fast: false
2323
matrix:
2424
config:
25-
- {os: ubuntu-20.04, r: 'release', mlr3: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
25+
- {os: ubuntu-22.04, r: 'release', mlr3: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
2626

2727
env:
2828
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
@@ -43,7 +43,8 @@ jobs:
4343
- name: Install dependencies
4444
run: |
4545
install.packages('remotes')
46-
remotes::install_github("pat-s/styler@mlr-style", dependencies=TRUE)
46+
remotes::install_github("mlr-org/styler.mlr", dependencies=TRUE)
47+
install.packages("styler")
4748
shell: Rscript {0}
4849

4950
- name: Install system dependencies
@@ -55,6 +56,6 @@ jobs:
5556
5657
- name: Check styler
5758
run: |
58-
res = styler::style_pkg(style = styler::mlr_style)
59+
res = styler::style_pkg(style = styler.mlr::mlr_style, include_roxygen_examples = FALSE)
5960
if (!all(res$changed==FALSE)) stop("Code is not in line with the style guidelines (see https://github.com/DoubleML/doubleml-for-r/wiki/Style-Guidelines#use-styler-mlr-style)")
6061
shell: Rscript {0}

.github/workflows/deploy_docu.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ on:
1414
jobs:
1515
build:
1616

17-
runs-on: ubuntu-20.04
17+
runs-on: ubuntu-22.04
1818

1919
steps:
2020
- uses: actions/checkout@v4

.github/workflows/deploy_docu_dev.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ on:
1313

1414
jobs:
1515
build:
16-
runs-on: ubuntu-20.04
16+
runs-on: ubuntu-22.04
1717

1818
steps:
1919
- uses: actions/checkout@v4

.github/workflows/deploy_pkg.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ on:
99

1010
jobs:
1111
build:
12-
runs-on: ubuntu-20.04
12+
runs-on: ubuntu-22.04
1313

1414
steps:
1515
- uses: actions/checkout@v4

.github/workflows/rcheck.yml

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,11 @@ jobs:
2424
config:
2525
- {os: windows-latest, r: 'release', mlr3: 'release', cran_no_glmnet: 'false'}
2626
- {os: macOS-latest, r: 'release', mlr3: 'release', cran_no_glmnet: 'false'}
27-
- {os: ubuntu-20.04, r: 'release', mlr3: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest", cran_no_glmnet: 'false'}
28-
- {os: ubuntu-20.04, r: 'devel', mlr3: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest", cran_no_glmnet: 'false'}
29-
- {os: ubuntu-20.04, r: 'devel', mlr3: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest", cran_no_glmnet: 'true'}
30-
- {os: ubuntu-20.04, r: 'devel', mlr3: 'dev', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest", cran_no_glmnet: 'false'}
27+
- {os: ubuntu-22.04, r: 'release', mlr3: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest", cran_no_glmnet: 'false'}
28+
- {os: ubuntu-22.04, r: 'release', mlr3: 'dev', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest", cran_no_glmnet: 'false'}
29+
- {os: ubuntu-22.04, r: 'devel', mlr3: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest", cran_no_glmnet: 'false'}
30+
- {os: ubuntu-22.04, r: 'devel', mlr3: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest", cran_no_glmnet: 'true'}
31+
- {os: ubuntu-22.04, r: 'devel', mlr3: 'dev', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest", cran_no_glmnet: 'false'}
3132

3233
env:
3334
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
@@ -79,6 +80,7 @@ jobs:
7980
run: |
8081
remotes::install_github("mlr-org/mlr3")
8182
remotes::install_github("mlr-org/paradox")
83+
remotes::install_github("mlr-org/mlr3learners")
8284
shell: Rscript {0}
8385

8486
- name: Check
@@ -122,9 +124,35 @@ jobs:
122124
if: runner.os == 'macOS'
123125
run: |
124126
remotes::install_cran("covr")
127+
remotes::install_cran("xml2")
128+
cov <- covr::package_coverage(
129+
quiet = FALSE,
130+
clean = FALSE,
131+
install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package")
132+
)
133+
covr::to_cobertura(cov)
125134
shell: Rscript {0}
126-
127-
- name: Test coverage
135+
136+
- uses: codecov/codecov-action@v4
128137
if: runner.os == 'macOS'
129-
run: covr::codecov()
130-
shell: Rscript {0}
138+
with:
139+
# Fail if error if not on PR, or if on PR and token is given
140+
fail_ci_if_error: ${{ github.event_name != 'pull_request' || secrets.CODECOV_TOKEN }}
141+
file: ./cobertura.xml
142+
plugin: noop
143+
disable_search: true
144+
token: ${{ secrets.CODECOV_TOKEN }}
145+
146+
- name: Show testthat output
147+
if: runner.os == 'macOS'
148+
run: |
149+
## --------------------------------------------------------------------
150+
find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true
151+
shell: bash
152+
153+
- name: Upload test results
154+
if: failure() && runner.os == 'macOS'
155+
uses: actions/upload-artifact@v4
156+
with:
157+
name: coverage-test-failures
158+
path: ${{ runner.temp }}/package

CONTRIBUTING.md

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -115,10 +115,11 @@ devtools::test()
115115
- [x] Check whether your changes adhere to the **"mlr-style" standards**.
116116
For the check you can use the following code
117117
```R
118-
require(styler)
119-
remotes::install_github("pat-s/styler@mlr-style")
120-
styler::style_pkg(style = styler::mlr_style) # entire package
121-
styler::style_file(<file>, style = styler::mlr_style) # specific file
118+
install.packages('remotes')
119+
remotes::install_github("mlr-org/styler.mlr", dependencies=TRUE)
120+
install.packages("styler")
121+
styler::style_pkg(style = styler.mlr::mlr_style) # entire package
122+
styler::style_file(<file>, style = styler.mlr::mlr_style) # specific file
122123
```
123124

124125
If your PR is still **work in progress**, please consider marking it a **draft PR**

DESCRIPTION

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: DoubleML
22
Type: Package
33
Title: Double Machine Learning in R
4-
Version: 1.0.1
4+
Version: 1.0.1.9000
55
Authors@R: c(
66
person("Philipp", "Bach", email = "philipp.bach@uni-hamburg.de", role=c("aut", "cre")),
77
person("Victor", "Chernozhukov", role="aut"),
@@ -38,7 +38,7 @@ Imports:
3838
mlr3learners (>= 0.3.0),
3939
mlr3misc
4040
Roxygen: list(markdown = TRUE, r6 = TRUE)
41-
RoxygenNote: 7.3.1
41+
RoxygenNote: 7.3.2
4242
Suggests:
4343
knitr,
4444
rmarkdown,
@@ -59,6 +59,7 @@ VignetteBuilder: knitr
5959
Collate:
6060
'double_ml.R'
6161
'double_ml_data.R'
62+
'double_ml_ssm.R'
6263
'double_ml_iivm.R'
6364
'double_ml_irm.R'
6465
'double_ml_pliv.R'

NAMESPACE

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ export(DoubleMLIIVM)
66
export(DoubleMLIRM)
77
export(DoubleMLPLIV)
88
export(DoubleMLPLR)
9+
export(DoubleMLSSM)
910
export(double_ml_data_from_data_frame)
1011
export(double_ml_data_from_matrix)
1112
export(fetch_401k)
@@ -16,6 +17,7 @@ export(make_pliv_CHS2015)
1617
export(make_pliv_multiway_cluster_CKMS2021)
1718
export(make_plr_CCDDHNR2018)
1819
export(make_plr_turrell2018)
20+
export(make_ssm_data)
1921
import(checkmate)
2022
importFrom(R6,R6Class)
2123
importFrom(clusterGeneration,genPositiveDefMat)
@@ -32,7 +34,7 @@ importFrom(mlr3,resample)
3234
importFrom(mlr3,rsmp)
3335
importFrom(mlr3learners,LearnerRegrLM)
3436
importFrom(mlr3misc,insert_named)
35-
importFrom(mlr3tuning,TuningInstanceSingleCrit)
37+
importFrom(mlr3tuning,TuningInstanceBatchSingleCrit)
3638
importFrom(mlr3tuning,tnr)
3739
importFrom(mlr3tuning,trm)
3840
importFrom(mvtnorm,rmvnorm)

R/datasets.R

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -958,3 +958,117 @@ make_pliv_multiway_cluster_CKMS2021 = function(N = 25, M = 25, dim_X = 100,
958958
}
959959
}
960960
}
961+
962+
#' Generates data from a sample selection model (SSM).
963+
#'
964+
#' The data generating process is defined as:
965+
#'
966+
#' \deqn{
967+
#' y_i = \theta d_i + x_i' \beta + u_i,}
968+
#'
969+
#' \deqn{s_i = 1\lbrace d_i + \gamma z_i + x_i' \beta + v_i > 0 \rbrace,}
970+
#'
971+
#' \deqn{d_i = 1\lbrace x_i' \beta + w_i > 0 \rbrace,}
972+
#'
973+
#' with \eqn{y_i} being observed if \eqn{s_i = 1} and covariates \eqn{x_i \sim \mathcal{N}(0, \Sigma^2_x)}, where
974+
#' \eqn{\Sigma^2_x} is a matrix with entries
975+
#' \eqn{\Sigma_{kj} = 0.5^{|j-k|}}.
976+
#' \eqn{\beta} is a \code{dim_x}-vector with entries \eqn{\beta_j=\frac{0.4}{j^2}}
977+
#' \eqn{z_i \sim \mathcal{N}(0, 1)},
978+
#' \eqn{(u_i,v_i) \sim \mathcal{N}(0, \Sigma^2_{u,v})},
979+
#' \eqn{w_i \sim \mathcal{N}(0, 1)}.
980+
#'
981+
#' The data generating process is inspired by a process used in the simulation study (see Appendix E) of Bia,
982+
#' Huber and Lafférs (2023).
983+
#'
984+
#' @param n_obs (`integer(1)`) \cr
985+
#' The number of observations to simulate.
986+
#' @param dim_x (`integer(1)`) \cr
987+
#' The number of covariates.
988+
#' @param theta (`numeric(1)`) \cr
989+
#' The value of the causal parameter.
990+
#' @param mar (`logical(1)`) \cr
991+
#' Indicates whether missingness at random holds.
992+
#' @param return_type (`character(1)`) \cr
993+
#' If `"DoubleMLData"`, returns a `DoubleMLData` object.
994+
#' If `"data.frame"` returns a `data.frame()`.
995+
#' If `"data.table"` returns a `data.table()`.
996+
#' Default is `"DoubleMLData"`.
997+
#'
998+
#' @references Michela Bia, Martin Huber & Lukáš Lafférs (2023) Double Machine Learning for Sample Selection Models,
999+
#' Journal of Business & Economic Statistics, DOI: 10.1080/07350015.2023.2271071
1000+
#'
1001+
#' @return Depending on the `return_type`, returns an object or set of objects as specified.
1002+
#' @export
1003+
make_ssm_data = function(n_obs = 8000, dim_x = 100, theta = 1, mar = TRUE, return_type = "DoubleMLData") {
1004+
1005+
assert_choice(
1006+
return_type,
1007+
c("data.table", "matrix", "data.frame", "DoubleMLData")
1008+
)
1009+
1010+
assert_count(n_obs)
1011+
assert_count(dim_x)
1012+
assert_numeric(theta, len = 1)
1013+
1014+
if (mar == TRUE) {
1015+
sigma = matrix(c(1, 0, 0, 1), 2, 2)
1016+
gamma = 0
1017+
} else {
1018+
sigma = matrix(c(1, 0.8, 0.8, 1), 2, 2)
1019+
gamma = 1
1020+
}
1021+
1022+
e = t(rmvnorm(n_obs, rep(0, 2), sigma))
1023+
cov_mat = toeplitz(0.5^(0:(dim_x - 1)))
1024+
x = rmvnorm(n_obs, rep(0, dim_x), cov_mat)
1025+
beta = 0.4 / ((1:dim_x)^2)
1026+
d = ifelse(x %*% beta + rnorm(n_obs) > 0, 1, 0)
1027+
z = as.matrix(rnorm(n_obs))
1028+
s = ifelse(x %*% beta + d + gamma * z + e[1, ] > 0, 1, 0)
1029+
y = x %*% beta + theta * d + e[2, ]
1030+
y[s == 0] = 0
1031+
1032+
colnames(x) = paste0("X", 1:dim_x)
1033+
colnames(y) = "y"
1034+
colnames(d) = "d"
1035+
colnames(z) = "z"
1036+
colnames(s) = "s"
1037+
1038+
if (return_type == "matrix") {
1039+
if (mar == TRUE) {
1040+
return(list("X" = x, "y" = y, "d" = d, "s" = s))
1041+
} else {
1042+
return(list("X" = x, "y" = y, "d" = d, "z" = z, "s" = s))
1043+
}
1044+
}
1045+
if (return_type == "data.frame") {
1046+
if (mar == TRUE) {
1047+
data = data.frame(x, y, d, s)
1048+
return(data)
1049+
} else {
1050+
data = data.frame(x, y, d, z, s)
1051+
return(data)
1052+
}
1053+
}
1054+
if (return_type == "data.table") {
1055+
if (mar == TRUE) {
1056+
data = data.table(x, y, d, s)
1057+
return(data)
1058+
} else {
1059+
data = data.table(x, y, d, z, s)
1060+
return(data)
1061+
}
1062+
}
1063+
if (return_type == "DoubleMLData") {
1064+
if (mar == TRUE) {
1065+
dt = data.table(x, y, d, s)
1066+
data = DoubleMLData$new(dt, y_col = "y", d_cols = "d", s_col = "s")
1067+
return(data)
1068+
} else {
1069+
dt = data.table(x, y, d, z, s)
1070+
data = DoubleMLData$new(dt, y_col = "y", d_cols = "d", z_cols = "z", s_col = "s")
1071+
return(data)
1072+
}
1073+
}
1074+
}

R/double_ml.R

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,8 @@ DoubleML = R6Class("DoubleML",
280280
} else {
281281
stop("can't set field tuning_res")
282282
}
283-
}),
283+
}
284+
),
284285

285286
public = list(
286287
#' @description
@@ -311,6 +312,7 @@ DoubleML = R6Class("DoubleML",
311312
"\n",
312313
"Covariates: ", paste0(self$data$x_cols, collapse = ", "), "\n",
313314
"Instrument(s): ", paste0(self$data$z_cols, collapse = ", "), "\n",
315+
"Selection variable: ", paste0(self$data$s_col, collapse = ", "), "\n",
314316
cluster_info,
315317
"No. Observations: ", self$data$n_obs, "\n")
316318

@@ -674,7 +676,8 @@ DoubleML = R6Class("DoubleML",
674676
function(x) {
675677
check_smpl_split(x, self$data$n_obs,
676678
check_intersect = TRUE)
677-
})
679+
}
680+
)
678681
private$smpls_ = smpls
679682
}
680683
} else {
@@ -697,7 +700,8 @@ DoubleML = R6Class("DoubleML",
697700
function(x) {
698701
check_smpl_split(x, self$data$n_obs,
699702
check_intersect = TRUE)
700-
})
703+
}
704+
)
701705
private$smpls_ = smpls
702706
}
703707
} else {
@@ -752,7 +756,7 @@ DoubleML = R6Class("DoubleML",
752756
#' @param tune_settings (named `list()`) \cr
753757
#' A named `list()` with arguments passed to the hyperparameter-tuning with
754758
#' [mlr3tuning](https://mlr3tuning.mlr-org.com/) to set up
755-
#' [TuningInstance][mlr3tuning::TuningInstanceSingleCrit] objects.
759+
#' [TuningInstance][mlr3tuning::TuningInstanceBatchSingleCrit] objects.
756760
#' `tune_settings` has entries
757761
#' * `terminator` ([Terminator][bbotk::Terminator]) \cr
758762
#' A [Terminator][bbotk::Terminator] object. Specification of `terminator`
@@ -1465,7 +1469,7 @@ DoubleML = R6Class("DoubleML",
14651469
for (learner in self$params_names()) {
14661470
if (!is.null(models[[learner]])) {
14671471
private$models_[[learner]][[self$data$treat_col]][[
1468-
private$i_rep]] = models[[learner]]
1472+
private$i_rep]] = models[[learner]]
14691473
}
14701474
}
14711475
},

0 commit comments

Comments
 (0)