# THE DATA # 40 points from N(-1,1): xnorm <- c(0.75745087478711, -3.67833850920761, 0.773765294923852, -2.5166350926628, -0.866664528466648, -1.63695628038082, -2.69436694486401, -1.17670482134698, -0.93268038000603, -2.68863707210148, -2.55723562317876, -0.96943475222958, -0.502247844160163, -0.95460405243514, -1.35246808601944, 0.430539874991081, -0.216421013142778, -0.0735422349319686, 0.929234311847512, -2.55736988978725, 0.659802037906305, 0.487098508940152, -2.25146570024105, -1.54900454926781, -0.0399614473021575, -1.27424418773441, -1.39537014482172, -0.378208795173341, -1.43844136511807, -2.03013360311522, -2.60644108937020, -1.03736744149929, -1.06954070965917, -1.97334401699724, 0.34551263753373, -1.00174729815340, 0.227300814797052, -1.21912077204945, -2.36256531047837, -1.07478823528914 ) # 40 points from Exp(1): xexp <- c(0.225873099482754, 0.148239626311750, 0.163836174178869, 3.04540461285, 2.72215233916853, 1.25697952970387, 0.413789347279817, 0.241668664384633, 2.18483032420187, 4.2592981774477, 1.07056121710212, 0.708313713739526, 1.05090031489254, 0.188016591127962, 0.970786548177612, 1.01574974502893, 0.623112535104156, 0.0752273758896854, 1.42379735390819, 1.16037799600036, 3.44145662320738, 2.70706842640851, 1.31681641652705, 0.663615722674876, 0.179161738153013, 1.52543949513676, 0.117737954482436, 1.22918485007705, 0.940607616864749, 0.384938156697899, 1.17735727499695, 0.812610192970475, 0.971939288205176, 0.0429625739343464, 0.286606042180210, 1.36010397035153, 0.384745879098773, 1.42643168746474, 2.45802981952641, 0.565702312625945 ) # 20 points from Unif[0,1]: xunif <- c(0.748017165577039, 0.363418926019222, 0.459732743212953, 0.0838238240685314, 0.817100885789841, 0.792823903961107, 0.77300950884819, 0.366373649565503, 0.464972200803459, 0.666654106695205, 0.58317914721556, 0.519858546787873, 0.181064054602757, 0.743976694066077, 0.668645858066157, 0.940935818711296, 0.288860949221998, 0.085076387040317, 0.854602423030883, 0.69708909583278 ) # collect them into a single sample of 100 data points: xdata <- c(xnorm, xexp, xunif) n <- length(xdata) # define the true (mixture) density: truedens = function(x) { return( 0.4 * dnorm(x,-1,1) + 0.4 * dexp(x) + 0.2 * dunif(x) ) } # compute the centered interval density estimator: centintdens = function(x) { sum = 0 for (i in 1:n) { if (abs(xdata[i]-x) < h) sum = sum + 1 } return(sum / 2 / n / h) } # choose a kernel: K = function(x) {dnorm(x)} # compute the kernel density estimator: kerdens = function(x) { sum = 0 for (i in 1:n) { sum = sum + K((x-xdata[i])/h) } return(sum / n / h) } # define my own plotting routine (avoiding problems of "plot" & "curve"): # Note: STA410/2102 students do *NOT* need to study this function, # though you may use it if you wish. plotfunction = function(ff, from=xlim[1], to=xlim[2], col="black", add=(dev.cur()!=1), xlim=c(0,1) ) { numpoints = 1000; xlist = ylist = NULL; ymin = +Inf; ymax = -Inf; for (i in 1:numpoints) { xval = from + (to-from) * i / numpoints; yval = ff(xval); xlist = c(xlist, xval); ylist = c(ylist, yval); if (yval < ymin) ymin = yval; if (yval > ymax) ymax = yval; } if (add==FALSE) { plot( c((from+to)/2, (ymin+ymax)/2), type='n', xlim=c(from,to), ylim=c(ymin,ymax), xlab="x", ylab=paste( deparse(substitute(ff)), "(x)", sep="" ) ); } lines(xlist, ylist, col=col); } # colors() shows list of colours ... e.g. magenta, hotpink1, turquoise, ... # DO SOME PLOTTING (can copy-and-paste individual lines for individual plots): # compute the left and right ends of the graph, and the data dots' y level: range = max(xdata)-min(xdata) leftend = min(xdata) - 0.1*range rightend = max(xdata) + 0.1*range ylevel = -0.2 ydata <- rep(ylevel,n) # plot original data (at y-value = ylevel), and the y-axis black line: plot(xdata, ydata, ylim=c(2*ylevel,1) ) lines(c(leftend,rightend), c(0,0), col="black") # plot true density: curve(truedens, leftend, rightend, add=TRUE, col="red") # plot histogram: h=0.2; hist(xdata, breaks=((-4/h):(5/h))*h, freq=FALSE, add=TRUE, border="blue") # plot centered interval density: h=0.2; plotfunction(centintdens, leftend, rightend, col="green") # plot kernel density estimator: h=0.2; curve(kerdens, leftend, rightend, add=TRUE, col="purple") # try R's built-in density-estimation function: zz = density(xdata) ; xR = zz$x ; yR = zz$y lines(xR, yR, col="orange") # do some comparisons print( sum((yR - truedens(xR))^2) ) h=0.2; print( sum((kerdens(xR) - truedens(xR))^2) ) h=0.592; print( sum((kerdens(xR) - truedens(xR))^2) ) h=0.2 # sum((centintdens(xR) - truedens(xR))^2) ss = 0 ; for (i in 1:length(xR)) { ss = ss + (centintdens(xR[i]) - truedens(xR[i]))^2 } ; print( ss )