Example R programs and commands Data visualization: bivariate densities # All lines preceded by the "#" character are my comments. # All other left-justified lines are my input. # All other indented lines are the R program output. ##### ESTIMATING the mean and covariance of a bivariate normal from samples # Read 17 samples from a bivariate normal density: # x1 x2 i (of 17 samples) #---------- ---------- -- data <- scan() 1.033024 0.418857 1 0.322703 0.613913 2 -1.949918 0.505329 3 -3.910608 0.081451 4 -1.163915 0.904262 5 -4.944068 -0.086801 6 0.936824 0.889408 7 -2.697000 0.185941 8 -1.484696 0.318464 9 -2.969133 -0.019170 10 -0.565761 0.682739 11 0.314755 0.010014 12 3.181993 0.862641 13 -0.816990 0.440558 14 -2.053569 0.444235 15 -0.311087 0.797796 16 -1.131430 0.522798 17 # Impose structure: 3 columns (x1, x2, i) with 17 rows of data triplets. x123 <- matrix(data, ncol=3, byrow=T); x123 # NOTE: R fills columns first by default, in the top-to-bottom then # left-to-right order. Use byrow=TRUE (or byrow=T) to fill by rows. # Extract the first two (data) columns, omitting the third (index i) column, # giving default names to the rows and columns: x<- data.frame(x123[,1:2]); x # Calculate the mean of the columns: eMu <- colMeans(x); eMu # Compute the sum-of-squares matrix and divide by the number of degrees # of freedom (number of samples less one) to get the covariance matrix: # using R's covariance function: eSig <- cov(x); eSig # Find the eigenvalues (and eigenvectors) of the covariance matrix: eigen(eSig) ## The result is estimates for the mean (mu) and covariance (Sigma) ## parameters of the bivariate normal density that produced the ## original samples. ##### GENERATE random samples with the estimated mean and covariance: ## Use the function mvrnorm() after adding the MASS package to R: library("MASS") # perhaps after 'install.packages("MASS")' n <- 17 # since there are 17 old samples sim <- mvrnorm(n, mu=eMu, Sigma=eSig); all <- rbind(x,sim) # concatenate the old and new points. blackred <- gl(2,n,2*n) # color labels: 1=black, 2=red plot(all, col=blackred) # compare old and new samplings ## Alternative: obtain random samples with the same covariance directly by ## factoring the covariance matrix to find the right rotation: library("mgcv") # load the matrix square root function from its library y<- rbind(rnorm(n), rnorm(n)) # two rows of indep N(0,1) coordinates new <- mroot(eSig)%*%y + eMu # mix the coordinates to get new points new <- t(new) # ...and transpose to get one new point per row all <- rbind(x,new) # concatenate the old and new points. blackred <- gl(2,n,2*n) # color labels: 1=black, 2=red plot(all, col=blackred) # compare old and new samplings # The covariance of the 'new' samples should approximate 'cov(x)' but # is almost certain to be different: cov(new) # covariance of the new points cov(x) # covariance of the old points ##### PLOT a bivariate normal density from its formula. ## Use "contour()" and "persp()" after producing samples # Use the function "bvnpdf()" defined on the class website under LINKS # Bivariate normal probability density function # AUTHOR: M.V.Wickerhauser # DATE: 6 February 2011 # 9 February 2012 corrected for(j) loop limit # # Inputs: # # x: Vector of x-values # y: Vector of y-values # mu: Mean of the pdf [default (0,0)] # Sigma: Covariance of the pdf [default Id]. # This must be a positive definite (hence nonsingular) # symmetric 2x2 matrix. # # Output: # z: Matrix of `length(x)' rows and `length(y)' columns # containing f(x[i],y[j]) at index i,j, where # f is the bivariate normal pdf with mean `mu' # and covariance `Sigma'. # bvnpdf <- function( x, y, mu=c(0,0), Sigma=diag(c(1,1)) ) { A <- 0.5*solve(Sigma); # matrix for the quadratic form xx <- x-mu[1]; yy <- y-mu[2]; # mean-subtraction c <- 1/(2 * pi * sqrt(det(Sigma))); # normalization z<- matrix( 0, nrow=length(x), ncol<-length(y) ); # initialization for( i in 1:length(x) ) { for( j in 1:length(y) ) { xy <- rbind( xx[i], yy[j] ) z[i,j] <- c*exp(- t(xy) %*% A %*% xy ) } } return(z); } # Generate x,y coordinates for a (square) grid of points to evaluate: x<-seq(-9,9,by=0.25); y<-x; # Evaluate "bvnpdf()" at the grid of points: z<-bvnpdf(x,y,mu=eMu, Sigma=eSig ); # Perspective plot of the graph in (x,y,z): persp(x,y,z) # Contour plot of the graph in (x,y,z): contour(x,y,z)