Pasha Zusmanovich - teachning

R code demonstrated at 6PAS2 Probability and Statistic 2, summer semester 2025/2026.

Class 8 (April 1 2026)

Generating data and testing it for Simpson's paradox:

generate_data <- function (n)
{
	p = sample.int (n, 8, replace=TRUE);

	d1_m_admitted    = p[1];
	d1_m_notadmitted = p[2];
	d1_w_admitted    = p[3];
	d1_w_notadmitted = p[4];

	d2_m_admitted    = p[5];
	d2_m_notadmitted = p[6];
	d2_w_admitted    = p[7];
	d2_w_notadmitted = p[8];

	d1_m   = d1_m_admitted + d1_m_notadmitted; # number of men in d1
	r_d1_m = d1_m_admitted / d1_m; 	           # ratio of admitted men in d1
	d1_w   = d1_w_admitted + d1_w_notadmitted; # number of women in d1
	r_d1_w = d1_w_admitted / d1_w; 	           # ratio of admitted women in d1

	d2_m   = d2_m_admitted + d2_m_notadmitted; # number of men in d2
	r_d2_m = d2_m_admitted / d2_m;             # ratio of admitted men in d2
	d2_w   = d2_w_admitted + d2_w_notadmitted; # number of women in d1
	r_d2_w = d2_w_admitted / d2_w;             # ratio of admitted women in d2

	r_m = (d1_m_admitted + d2_m_admitted) / (d1_m + d2_m); # ratio admitted men
	r_w = (d1_w_admitted + d2_w_admitted) / (d1_w + d2_w); # ratio admitted women
	
	simpson = ((r_d1_m < r_d1_w) & (r_d2_m < r_d2_w) & (r_m > r_w)) ||
	          ((r_d1_m > r_d1_w) & (r_d2_m > r_d2_w) & (r_m < r_w));

	return (list(simpson, p));
}

count = 0;
for (i in 1:1000)
{
	r = generate_data (100);
	if (r[[1]])
	{
		count = count + 1;
		print (r[[2]]);
	}
}

Function computing normal scores:

# take n values from v, m times
normscores <- function (v, n, m)
{
	s <- rep (0, n);
	for (i in 1:m)
	{
		s <- s + sort (sample (v, n));
	}
	return (s/m);
}

Computing normal scores using randomly generated sample:
normscores (rnorm (1000, mean=0, sd=1), 10, 100)

Computing normal scores using built-in R function (be sure that the package SuppDists is installed):

library (SuppDists)
normOrder (10)

Function for normality testing using normal scores:

testnorm <- function (v, n, m)
{
	a <- normscores (v, n, m);
	b <- sort (rnorm (n, mean=mean(v), sd=sd(v)));
	plot (b,a);
	abline (lm (a ~ b));
}

Test for normality SP500 (from MASS package):

testnorm (SP500, 10, 100)
testnorm (SP500, 100, 1000)

Function for normality testing using Q-Q plot:

QQ <- function (v)
{
	p <- seq (0.1, 0.9, by=0.1);
	qv <- quantile (v, p, names=F);
	qn <- qnorm (p, sd=sd(v), mean=mean(v));
	plot (qn,qv);
	abline (lm (qv ~ qn));
}

Test for normality SP500 using built-in R functions:

qqnorm (SP500)
qqline (SP500)

Function for computation of iterative correlation matrix:

icorr <- function (A)
{
	n <- dim(A)[1];
	B <- matrix (c(1),n,n);
	for (i in 1:(n-1))
	{	
		for (j in (i+1):n)
		{ 
			B[i,j] <- cor (A[i,], A[j,]);
			B[j,i] <- B[i,j];
		}
	}
	return (B);
}

A <- matrix (c(1, 0.1, 0.2, 0.1, 1, -0.5, 0.2, -0.5, 1), 3, 3);

for (i in 1:10)
{
	A <- icorr (A);
	print (A);
}

Example of a matrix with a very slow convergence:

A <- matrix (c(1, -0.73333333333, -0.2, 0.2, -0.73333333333, 1, -0.2, 0.2, -0.2, -0.2, 1, 0.33333333333, 0.2, 0.2, 0.33333333333, 1), 4, 4)

iter <- function (A, n)
{
	for (i in 1:n)
	{
		A <- icorr (A);
	}
	return (A);	
}	

iter (A, 100)
iter (A, 1000)
iter (A, 10000)

Created: Thu Oct 29 2015
Last modified: Wed Apr 8 2026 16:13:15 CEST