############################################################
# Maximum Likelihood for Binary model with Latent Class
# By: Mauricio Sarrias
############################################################

# Helper function for time
my.time <- function(object){
  et <- object[3]
  s <- round(et,0)
  h <- s%/%3600
  s <- s-3600*h
  m <- s%/%60
  s <- s-60*m
  paste(h, "h:", m, "m:", s, "s", sep = "")
}

## suml function from mlogit (Croissant, 2013)
suml <- function(x){
  n <- length(x)
  if (!is.null(dim(x[[1]]))){
    d <- dim(x[[1]])
    s <- matrix(0,d[1], d[2])
    for (i in 1:n){
      x[[i]][is.na(x[[i]])] <- 0
      s <- s + x[[i]]
    }
  }
  else{
    s <- rep(0,length(x[[n]]))
    for (i in 1:n){
      x[[i]][is.na(x[[i]])] <- 0
      s <- s + x[[i]]
    }
  }
  s
}

repRows <- function(x, n){
  matrix(rep(x, each = n), nrow = n)
}

repCols <- function(x, n){
  matrix(rep(x, each = n), ncol = n, byrow = TRUE)
}

## ml function
ml_bin_lc <- function(theta, y, X, H, Q, id = NULL, weights = NULL,
                      gradient = TRUE, get.bi = TRUE){
  # Parameters and globals
  K <- ncol(X)
  N <- nrow(X)
  panel <- !is.null(id)
  if (panel) if (length(weights == 1)) weights <- rep(weights, N)
  
  beta <- matrix(theta[1L:(K * Q)], nrow = K, ncol = Q)
  rownames(beta) <- colnames(X); colnames(beta) <- paste("class", 1:Q, sep = ":")
  gamma <- theta[-c(1L:(K * Q))]
  
  if(get.bi) bi <- t(beta)
  
  # Make weights from a multinomial logit model
  ew <- lapply(H, function(x) exp(crossprod(t(x), gamma)))
  sew <- suml(ew)
  Wnq <- lapply(ew, function(x){ v <- x / sew;
                                v[is.na(v)] <- 0;
                                as.vector(v)})
  Wnq <- Reduce(cbind, Wnq) # W
  
  # Make Probit Probability
  pfun <- pnorm
  dfun <- dnorm
  mill <- function(x) dfun(x) / pmax(pfun(x), .Machine$double.eps)
  q <- 2 * y - 1
  
  index <- tcrossprod(X, t(beta))
  Pnq <- pfun(q * index) # N * Q
  if (panel) Pnq <- apply(Pnq, 2, tapply, id, prod)
  WPnq <- Wnq * Pnq 
  Ln <- apply(WPnq, 1, sum)
  if (get.bi) Qnr <- WPnq / Ln
  lnL <- if (panel) sum(log(Ln) * weights[!duplicated(id)]) else sum(log(Ln) * weights)
  
  ## Gradient
  if (gradient){
    lambda <- q * mill(q * index) # N * Q
    Qnr <- WPnq / Ln
    if (panel) Qnr <- Qnr[id, ]
    eta <- lambda * Qnr
    etar <- eta[, rep(1:Q, each = K)]
    Xg <- X[, rep(1:K, Q)] # N * (K * Q)
    grad.beta <- Xg * etar
    
    if (panel){
      Wnq <- Wnq[id, ]
      H <- lapply(H, function(x) x[id, ])
    }
    Wg <- vector(mode = "list", length = Q)
    IQ <- diag(Q)
    for(q in 1:Q) Wg[[q]] <- rowSums(Qnr * (repRows(IQ[q, ], N) - repCols(Wnq[, q], Q)))
    grad.gamma <- suml(mapply("*", H, Wg, SIMPLIFY = FALSE))
    gari <- cbind(grad.beta, grad.gamma)
    colnames(gari) <- names(theta)
    attr(lnL, "gradient") <- gari * weights
  }
  if (get.bi){
    attr(lnL, "bi") <- bi
    attr(lnL, "Qir") <- Qnr
    attr(lnL, "Wnq") <- Wnq
  }
  lnL
}