R code demonstrated at
6PAS2 Probability and Statistic 2, summer semester 2025/2026.
Class 8 (April 1 2026)
Generating data and testing it for Simpson's paradox:
generate_data <- function (n)
{
p = sample.int (n, 8, replace=TRUE);
d1_m_admitted = p[1];
d1_m_notadmitted = p[2];
d1_w_admitted = p[3];
d1_w_notadmitted = p[4];
d2_m_admitted = p[5];
d2_m_notadmitted = p[6];
d2_w_admitted = p[7];
d2_w_notadmitted = p[8];
d1_m = d1_m_admitted + d1_m_notadmitted; # number of men in d1
r_d1_m = d1_m_admitted / d1_m; # ratio of admitted men in d1
d1_w = d1_w_admitted + d1_w_notadmitted; # number of women in d1
r_d1_w = d1_w_admitted / d1_w; # ratio of admitted women in d1
d2_m = d2_m_admitted + d2_m_notadmitted; # number of men in d2
r_d2_m = d2_m_admitted / d2_m; # ratio of admitted men in d2
d2_w = d2_w_admitted + d2_w_notadmitted; # number of women in d1
r_d2_w = d2_w_admitted / d2_w; # ratio of admitted women in d2
r_m = (d1_m_admitted + d2_m_admitted) / (d1_m + d2_m); # ratio admitted men
r_w = (d1_w_admitted + d2_w_admitted) / (d1_w + d2_w); # ratio admitted women
simpson = ((r_d1_m < r_d1_w) & (r_d2_m < r_d2_w) & (r_m > r_w)) ||
((r_d1_m > r_d1_w) & (r_d2_m > r_d2_w) & (r_m < r_w));
return (list(simpson, p));
}
count = 0;
for (i in 1:1000)
{
r = generate_data (100);
if (r[[1]])
{
count = count + 1;
print (r[[2]]);
}
}
Function computing normal scores:
# take n values from v, m times
normscores <- function (v, n, m)
{
s <- rep (0, n);
for (i in 1:m)
{
s <- s + sort (sample (v, n));
}
return (s/m);
}
Computing normal scores using randomly generated sample:
normscores (rnorm (1000, mean=0, sd=1), 10, 100)
Computing normal scores using built-in R function (be sure that the package
SuppDists is installed):
library (SuppDists)
normOrder (10)
Function for normality testing using normal scores:
testnorm <- function (v, n, m)
{
a <- normscores (v, n, m);
b <- sort (rnorm (n, mean=mean(v), sd=sd(v)));
plot (b,a);
abline (lm (a ~ b));
}
Test for normality SP500 (from MASS package):
testnorm (SP500, 10, 100)
testnorm (SP500, 100, 1000)
Function for normality testing using Q-Q plot:
QQ <- function (v)
{
p <- seq (0.1, 0.9, by=0.1);
qv <- quantile (v, p, names=F);
qn <- qnorm (p, sd=sd(v), mean=mean(v));
plot (qn,qv);
abline (lm (qv ~ qn));
}
Test for normality SP500 using built-in R functions:
qqnorm (SP500)
qqline (SP500)
Function for computation of iterative correlation matrix:
icorr <- function (A)
{
n <- dim(A)[1];
B <- matrix (c(1),n,n);
for (i in 1:(n-1))
{
for (j in (i+1):n)
{
B[i,j] <- cor (A[i,], A[j,]);
B[j,i] <- B[i,j];
}
}
return (B);
}
A <- matrix (c(1, 0.1, 0.2, 0.1, 1, -0.5, 0.2, -0.5, 1), 3, 3);
for (i in 1:10)
{
A <- icorr (A);
print (A);
}
Example of a matrix with a very slow convergence:
A <- matrix (c(1, -0.73333333333, -0.2, 0.2, -0.73333333333, 1, -0.2, 0.2, -0.2, -0.2, 1, 0.33333333333, 0.2, 0.2, 0.33333333333, 1), 4, 4)
iter <- function (A, n)
{
for (i in 1:n)
{
A <- icorr (A);
}
return (A);
}
iter (A, 100)
iter (A, 1000)
iter (A, 10000)
Created: Thu Oct 29 2015
Last modified: Wed Apr 8 2026 16:13:15 CEST