These functions simplify and unify sampling in various ways.
resample(..., replace = TRUE)
deal(...)
shuffle(x, replace = FALSE, prob = NULL, groups = NULL, orig.ids = FALSE)
sample(x, size, replace = FALSE, ...)
# S3 method for default
sample(
x,
size,
replace = FALSE,
prob = NULL,
groups = NULL,
orig.ids = FALSE,
...
)
# S3 method for data.frame
sample(
x,
size,
replace = FALSE,
prob = NULL,
groups = NULL,
orig.ids = TRUE,
fixed = names(x),
shuffled = c(),
invisibly.return = NULL,
...
)
# S3 method for matrix
sample(
x,
size,
replace = FALSE,
prob = NULL,
groups = NULL,
orig.ids = FALSE,
...
)
# S3 method for factor
sample(
x,
size,
replace = FALSE,
prob = NULL,
groups = NULL,
orig.ids = FALSE,
drop.unused.levels = FALSE,
...
)
# S3 method for lm
sample(
x,
size,
replace = FALSE,
prob = NULL,
groups = NULL,
orig.ids = FALSE,
drop.unused.levels = FALSE,
parametric = FALSE,
transformation = NULL,
...
)
additional arguments passed to
base::sample()
or sample()
.
Should sampling be with replacement?
Either a vector of one or more elements from which to choose, or a positive integer.
A vector of probability weights for obtaining the elements of the vector being sampled.
a vector (or variable in a data frame) specifying groups to sample within. This will be recycled if necessary.
a logical; should original ids be included in returned data frame?
a non-negative integer giving the number of items to choose.
a vector of column names. These variables are shuffled en masse, preserving associations among these columns.
a vector of column names.
these variables are reshuffled individually (within groups if groups
is
specified), breaking associations among these columns.
examples.
a logical, should return be invisible?
a logical, should unused levels be dropped?
A logical indicating whether the resampling should be done parametrically.
NULL or a function providing a transformation to be applied to the
synthetic responses. If NULL, an attempt it made to infer the appropriate transformation
from the original call as recorded in x
.
These functions are wrappers around sample()
providing different defaults and
natural names.
# 100 Bernoulli trials -- no need for replace=TRUE
resample(0:1, 100)
#> [1] 0 0 1 0 1 1 0 0 1 0 1 1 1 0 1 0 1 1 0 0 1 0 1 0 0 1 0 0 0 0 1 1 1 1 1 1 0
#> [38] 0 0 0 1 0 1 0 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0 1 1 1 1 1 1 0 0 0 1 1 0 1 0 1
#> [75] 1 1 0 1 1 1 0 0 1 1 0 0 1 0 1 1 0 0 1 1 0 1 0 1 0 0
tally(resample(0:1, 100))
#> X
#> 0 1
#> 52 48
if (require(mosaicData)) {
Small <- sample(KidsFeet, 10)
resample(Small)
tally(~ sex, data=resample(Small))
tally(~ sex, data=resample(Small))
# fixed marginals for sex
tally(~ sex, data=Small)
tally(~ sex, data=resample(Small, groups=sex))
# shuffled can be used to reshuffle some variables within groups
# orig.id shows where the values were in original data frame.
Small <- mutate(Small,
id1 = paste(sex,1:10, sep=":"),
id2 = paste(sex,1:10, sep=":"))
resample(Small, groups=sex, shuffled=c("id1","id2"))
}
#> name birthmonth birthyear length width sex biggerfoot domhand orig.id
#> 3 Zach 12 87 24.5 9.7 B R R 8.8.1
#> 6 Scotty 3 88 25.7 9.7 B R R 1.1.8
#> 6.1 Scotty 3 88 25.7 9.7 B R R 1.1.1
#> 38 Hayley 1 88 21.6 7.9 G R R 10.6.7
#> 20 Heather 3 88 25.5 9.5 G R R 6.2.2
#> 15 Julie 11 87 26.0 9.3 G L R 2.7.6
#> 20.1 Heather 3 88 25.5 9.5 G R R 6.3.3
#> 27 Abby 2 88 26.1 9.5 G L R 3.6.10
#> 8 Caitlin 6 88 23.0 8.8 G L R 7.7.6
#> 8.1 Caitlin 6 88 23.0 8.8 G L R 7.10.7
#> id1 id2
#> 3 B:8 B:1
#> 6 B:1 B:8
#> 6.1 B:1 B:1
#> 38 G:6 G:7
#> 20 G:2 G:2
#> 15 G:7 G:6
#> 20.1 G:3 G:3
#> 27 G:6 G:10
#> 8 G:7 G:6
#> 8.1 G:10 G:7
deal(Cards, 13) # A Bridge hand
#> [1] "AC" "AS" "KH" "3H" "8H" "9H" "10H" "8C" "AD" "7C" "4C" "KS"
#> [13] "6D"
shuffle(Cards)
#> [1] "JC" "6H" "5S" "4H" "KS" "3C" "7D" "2D" "QD" "2H" "10D" "8C"
#> [13] "QH" "JD" "5H" "4S" "5C" "AD" "8H" "10C" "KH" "4D" "QS" "AH"
#> [25] "4C" "7H" "KD" "JS" "KC" "6S" "7S" "7C" "10H" "9H" "9C" "JH"
#> [37] "AC" "3H" "2C" "3D" "6C" "5D" "AS" "8D" "10S" "3S" "2S" "6D"
#> [49] "9S" "QC" "9D" "8S"
model <- lm(width ~length * sex, data = KidsFeet)
KidsFeet |> head()
#> name birthmonth birthyear length width sex biggerfoot domhand
#> 1 David 5 88 24.4 8.4 B L R
#> 2 Lars 10 87 25.4 8.8 B L L
#> 3 Zach 12 87 24.5 9.7 B R R
#> 4 Josh 1 88 25.2 9.8 B L R
#> 5 Lang 2 88 25.1 8.9 B L R
#> 6 Scotty 3 88 25.7 9.7 B R R
resample(model) |> head()
#> width length sex
#> 1 8.593635 24.4 B
#> 2 9.420481 25.4 B
#> 3 9.444851 24.5 B
#> 4 8.763734 25.2 B
#> 5 9.141002 25.1 B
#> 6 9.090498 25.7 B
Boot <- do(500) * lm(width ~ length * sex, data = resample(KidsFeet))
#> Using parallel package.
#> * Set seed with set.rseed().
#> * Disable this message with options(`mosaic:parallelMessage` = FALSE)
df_stats(~ Intercept + length + sexG + length.sexG, data = Boot, sd)
#> response sd
#> 1 Intercept 1.34047944
#> 2 length 0.05289191
#> 3 sexG 1.99298145
#> 4 length.sexG 0.07979467
head(Boot)
#> Intercept length sexG length.sexG sigma r.squared F
#> 1 4.061731 0.1998271 -1.627390 0.055943489 0.3988934 0.4812676 10.824056
#> 2 3.396541 0.2307611 2.059666 -0.095689727 0.3896124 0.4227335 8.543525
#> 3 2.657438 0.2533723 1.783024 -0.071805843 0.3559803 0.3278852 5.691478
#> 4 3.890018 0.2063346 -6.725233 0.257798306 0.4043891 0.4622521 10.028755
#> 5 2.346084 0.2797201 -0.195402 -0.009428255 0.3656148 0.5828985 16.304145
#> 6 5.350636 0.1572710 -2.605837 0.090509618 0.3595442 0.5509561 14.314462
#> numdf dendf .row .index
#> 1 3 35 1 1
#> 2 3 35 1 2
#> 3 3 35 1 3
#> 4 3 35 1 4
#> 5 3 35 1 5
#> 6 3 35 1 6
summary(coef(model))
#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> -0.6239 -0.1441 0.1142 0.8642 1.1225 3.8521