- In most cases a complete enumeration is not possible, but if done properly, properties of a population can be inferred from a randomly selected sample
# synthetic population set.seed(236542) N <- 1000 yi <- rnorm(N, mean=5000, s=1000) # attributes at population level mean (yi)
## [1] 4965.94
hist (yi)
# take a sample of size n n <- 10 s <- sample(1:length(yi), n, replace = FALSE) # mean of sample mean(yi[s])
## [1] 4729.023
mean(yi)
## [1] 4965.94
# repeat sampling K times K <- 1000 mu_hat <- numeric(K) for (i in 1:K){ s <- sample(1:length(yi), n, replace = FALSE) mu_hat[i] <- mean(yi[s]) } mean(mu_hat)
## [1] 4964.755
# sampling distribution hist(mu_hat)
yi <- c(rep(42,23), rep(41,4), 36, 32, 29, rep(27,2), 23, 19, rep(16,2), rep(15,2), 14, 11, 10, 9, 7, rep(6,3), rep(5,2), 4, 3)
N <- 676 (n <- length(yi))
## [1] 50
hist(yi, breaks=seq(0,42,2))
(Yhat <- N * sum(yi) / n)
## [1] 19887.92
Yhat + c(-1,1) * qnorm(0.9) * N * sd(yi) * sqrt(1-n/N) / sqrt(n)
## [1] 18103.84 21672.00
library(survey)
sample.data <- data.frame(i=1:n, y=yi, N=N) head(sample.data, n=5)
## i y N ## 1 1 42 676 ## 2 2 42 676 ## 3 3 42 676 ## 4 4 42 676 ## 5 5 42 676
svydesign
srs.design <- svydesign(id=~1, fpc=~N, data=sample.data)
summary(srs.design)
## Independent Sampling design ## svydesign(id = ~1, fpc = ~N, data = sample.data) ## Probabilities: ## Min. 1st Qu. Median Mean 3rd Qu. Max. ## 0.07396 0.07396 0.07396 0.07396 0.07396 0.07396 ## Population size (PSUs): 676 ## Data variables: ## [1] "i" "y" "N"
(Yhat <- svytotal(~y, srs.design))
## total SE ## y 19888 1392.1
confint(Yhat, level=0.80)
## 10 % 90 % ## y 18103.84 21672