Some exercise on R functions

rm(list = ls()) # clean up workspace first

case study on numerical integration

  1. Finish the code that calculates midpoint and trapezoid values.
# if you are modifying this source file directly, remember to change the above flag to eval=TRUE
# finish the code below
midpoint <- function(f, a, b) {
  result <- (b - a) * f((a + b) / 2)
  return(result)
}

trapezoid <- function(f, a, b) {
  result <- (b - a) / 2 * (f(a) + f(b))
  return(result)
}

# what do you get?
midpoint(sin, 0, pi)
## [1] 3.141593
trapezoid(sin, 0, pi)
## [1] 1.923671e-16
midpoint.composite <- function(f, a, b, n = 10) {
  points <- seq(a, b, length = n + 1)

  area <- 0
  for (i in seq_len(n)) {
    area <- area + midpoint(f, points[i], points[i + 1])
  }
  return(area)
}

trapezoid.composite <- function(f, a, b, n = 10) {
  points <- seq(a, b, length = n + 1)
  
  area <- 0
  for (i in seq_len(n)) {
    area <- area + trapezoid(f, points[i], points[i + 1])
  }
  return(area)
}

midpoint.composite(sin, 0, pi, n = 10)
## [1] 2.008248
midpoint.composite(sin, 0, pi, n = 100)
## [1] 2.000082
midpoint.composite(sin, 0, pi, n = 1000)
## [1] 2.000001
trapezoid.composite(sin, 0, pi, n = 10)
## [1] 1.983524
trapezoid.composite(sin, 0, pi, n = 100)
## [1] 1.999836
trapezoid.composite(sin, 0, pi, n = 1000)
## [1] 1.999998
  1. (not in the above solutions, will be provided after Monday) Well, the above functions have loop! Let’s try to avoid loops.
midpoint.composite.vectorize <- function(f, a, b, n = 10) {
  points <- seq(a, b, length = n + 1)
  areas <- midpoint(f, points[-(n + 1)], points[-1])
  return(sum(areas))
}

trapezoid.composite.vectorize <- function(f, a, b, n = 10) {
  points <- seq(a, b, length = n + 1)
  areas <- trapezoid(f, points[-(n + 1)], points[-1])
  return(sum(areas))
}

midpoint.composite.vectorize(sin, 0, pi, n = 10)
## [1] 2.008248
midpoint.composite.vectorize(sin, 0, pi, n = 100)
## [1] 2.000082
midpoint.composite.vectorize(sin, 0, pi, n = 1000)
## [1] 2.000001
trapezoid.composite.vectorize(sin, 0, pi, n = 10)
## [1] 1.983524
trapezoid.composite.vectorize(sin, 0, pi, n = 100)
## [1] 1.999836
trapezoid.composite.vectorize(sin, 0, pi, n = 1000)
## [1] 1.999998
  1. compare run time
system.time(midpoint.composite(sin, 0, pi, n = 10000))
##    user  system elapsed 
##   0.013   0.000   0.013
system.time(trapezoid.composite(sin, 0, pi, n = 10000))
##    user  system elapsed 
##   0.015   0.000   0.015
system.time(midpoint.composite.vectorize(sin, 0, pi, n = 10000))
##    user  system elapsed 
##   0.001   0.001   0.001
system.time(trapezoid.composite.vectorize(sin, 0, pi, n = 10000))
##    user  system elapsed 
##   0.001   0.000   0.001

Normal equations

Now try to implement the Normal equations from scratch. \(\hat{\beta} = (X^{\top}X)^{-1}X^{\top}Y\).

my.normal.equations <- function(X, Y) {
  if (!is.vector(Y)) {
    stop("Y is not a vector!")
  }
  
  if (!is.matrix(X)) {  # force X to be a matrix for now
    stop("X is not a matrix!")
  }
  
  if (dim(X)[1] != length(Y)) {
    stop("Dimension mismatch between X and Y!")
  }
  
  result <- solve(t(X) %*% X, t(X) %*% Y)
  return(result) # finish the calculation
}

set.seed(7360)
sample.size <- 100
num.col <- 2
X <- matrix(rnorm(sample.size * num.col), nrow = sample.size, ncol = num.col)
X <- cbind(1, X)
Y <- rnorm(sample.size)

system.time(result.lm <- lm(Y ~ X[, 2] + X[, 3]))
##    user  system elapsed 
##   0.003   0.000   0.003
summary(result.lm)
## 
## Call:
## lm(formula = Y ~ X[, 2] + X[, 3])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.92783 -0.58015  0.05852  0.57220  1.82080 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  0.04356    0.09662   0.451   0.6532  
## X[, 2]      -0.05051    0.09222  -0.548   0.5852  
## X[, 3]       0.17642    0.09189   1.920   0.0578 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9171 on 97 degrees of freedom
## Multiple R-squared:  0.04138,    Adjusted R-squared:  0.02162 
## F-statistic: 2.094 on 2 and 97 DF,  p-value: 0.1288
system.time(result.my.normal.equations <- my.normal.equations(X, Y))
##    user  system elapsed 
##   0.001   0.000   0.000
result.my.normal.equations
##             [,1]
## [1,]  0.04355651
## [2,] -0.05050778
## [3,]  0.17641649