###################
# Novel Binomial Method

# Generates from correlated binomials according to the input parameters
# Uses an optimization method to get an appropriate pmf to generate 
# multivariate binomial values directly 

# Outputs a matrix of observations
GenerateAnySnpOptParamBinomial <- function(
  pAllele = c(0.1, 0.1, 0.1),
  matTargetCor = diag(3),
  nObs = 500
){
  # pAllele = The minor allele frequency of each SNP in order
  # matTargetCor = The target correlation matrix
  # nObs = the number of observations to generate
  
  # Generate integer values from the pmf generated by the method
  pmf <- calcOptPmfAnySnpBinomial(pAllele, matTargetCor)
  
  if (isTRUE(all.equal(sum(pmf), 0))) {
    return(matrix(NA, ncol = length(pAllele), nrow = nObs))
  }
  
  
  genIntVals <- genBag(nObs, pmf)
  
  # Constructs the Map between the integer values and the binomial values
  # for each SNP. 
  # Each column is a SNP, each row is a  value, 
  # each entry is the appropriate
  # binomial value for the integer value.
  # Note that the first column of the map matrix corresponds to the last snp,
  # The second column of the map matrix is the second last snp...
  # ....
  # The last column of the map matrix is the first snp
  # Snp naming convention is
  
  # Example of naming convention for 3 SNPs together
  # 111, 112, 113, 121, 122, 123, 131, 132, 133,
  # 211, 212, 213, 221, 222, 223, 231, 232, 233
  # 311, 312, 313, 321, 322, 323, 331, 332, 333
  
  # Corresponding values are
  # 1, 2, 3, 4, 5, 6, 7, 8, 9, 
  # 10, 11, 12, 13, 14, 15, 16, 17, 18
  # 19, 20, 21, 22, 23, 24, 25, 26, 27
  numSnp <- length(pAllele)
  mapIntToBin <- sapply(c(1:numSnp), function(snpInd){
    inner <- 3 ^ (snpInd - 1)
    outer <- 3 ^ (numSnp - snpInd)
    eexp <- snpInd - 1
    toAdd <- rep(
      c(rep(0, inner),
        rep(1, inner),
        rep(2, inner)),
      outer
    )
    
    return(toAdd)
  })
  
  # Reverse Ordering so SNP1 corresponds to column 1
  mapIntToBinOrdered <- mapIntToBin[,ncol(mapIntToBin):1]
  
  # use a simple apply to get the binomial values using the mapping
  multiBinomialVals <- apply(mapIntToBinOrdered, 2, function(colMap){
    colMap[genIntVals]
  })
  
  return(multiBinomialVals)
}

# Sets up the equations associated with the marginal binomial distributions
# Outputs a matrix
calcMatMarginalBinomial <- function(numSnp = 3,
                                    pAlleles = c(0.1, 0.1, 0.1)) {
  # Calculates the constraint matrix associated to the marginal equations
  # numSnp = the number of snps
  # pAlleles = the minor allele frequency for each snp
  # Returns a typical constraint matrix where the last column 
  # is the RHS of all the equations and the LHS
  # denotes the coefficient on each variable
  # The parameters are the the probability of each element in the support of the
  # target random variable. 
  
  # First, construct the Left hand side (LHS) of the equations
  snpMatLHS <- matrix(double(), 0, (3 ^ numSnp))
  snpVecRHS <- vector("double", 0)
  
  for (indSnp in c(1:numSnp)) {
    # For each snp,we separately work on the values they take on
    # Note that we let the SNP value take on (1,2,3) instead of (0,1,2)
    # so we don't need to deal with some index issues
    
    # construct the three rows associated to a particular SNP
    snpRows <- t(sapply(c(1:3), function(valSnp) {
      # Example of naming convention for 3 SNPs together
      # 111, 112, 113, 121, 122, 123, 131, 132, 133,
      # 211, 212, 213, 221, 222, 223, 231, 232, 233
      # 311, 312, 313, 321, 322, 323, 331, 332, 333
      
      # If we're interested in finding the SNPs that represent the marginals
      # i.e. if we want the marginal for SNP1 = 1,
      # then we want all of the above naming convention that 
      # have a 1 in the first place (1??)
      # i.e. if we want the marginal for SNP2 = 3,
      # then we want all of the above naming convention that 
      # have a 3 in the second place (?3?)
      
      # We can automate the the way we find the marginals.
      # Note that the SNPs are ordered lowest to highest 
      # (i.e. 111, 112, ..., 332, 333)
      
      # There will be a specific number of blocks
      # This number is related to the index of the SNP we're looking at
      numBlocks <- 3 ^ (indSnp - 1)
      
      # we will have a slew/block of 1's
      blockOneWidth <- 3 ^ (numSnp - indSnp)
      
      # The space between the first block of 1's is 
      # dependent on the value/snp of interest
      blockStartZeros <- (valSnp - 1) * blockOneWidth
      
      # The block of 1's will always be followed by a block of 0's
      blockEndZeros <- (3 - valSnp) * blockOneWidth
      
      ValBlock <- c(rep(0, blockStartZeros),
                    rep(1, blockOneWidth),
                    rep(0, blockEndZeros))
      ValRow <- rep(ValBlock, numBlocks)
      
      return(ValRow)
    }))
    
    # Append the 3 rows for a particular SNP to the matrix of constraints
    snpMatLHS <- rbind(snpMatLHS, snpRows)
    
    # Append the RHS of what the above 3 rows should equal
    pSnp <- pAlleles[indSnp]
    snpVecRHS <- c(snpVecRHS, #Previous values
                   (1 - pSnp) ^ 2, #1
                   2 * (1 - pSnp) * pSnp, #2
                   pSnp ^ 2) #3
  }
  
  # Combine into a matrix and return
  snpMatConstraints <- as.matrix(cbind(snpMatLHS, snpVecRHS))
  return(snpMatConstraints)
}


# Sets up the equations associated to the target correlation constraints
# Outputs a matrix
calcMatTargetCorBinomial <- function(numSnp = 3,
                                     matTargetCor = diag(3),
                                     pAlleles = c(0.1, 0.1, 0.1)) {
  # Calculates the constraint matrix associated to the 
  # Target Correlation equations
  # numSnp = the number of snps
  # matTargetCor = the target correlation matrix
  # pAlleles = the alleles associated to each snp
  # Returns a typical constraint matrix where the last 
  # column is the RHS of all the equations and the LHS
  # denotes the coefficient on each variable
  # The variables are the frequencies of the multivariate SNPs. 
  
  # The snp values that are used in target correlation calculation
  valSnpXYmat <- matrix(c(2, 2, 3, 3,
                          2, 3, 2, 3), 2, 4, T)
  
  # The coefficients to be placed on the variables
  valXYTargetCor <- c(1, 2, 2, 4)
  
  # We will be appending to construct the matrix
  snpLHSMatTarCor <- matrix(double(), 0, (3 ^ numSnp))
  snpRHSVecTarCor <- vector("double", 0)
  
  for (indSnpX in c(1:numSnp)) {
    for (indSnpY in c((indSnpX):numSnp)) {
      if (isTRUE(all.equal(indSnpX, indSnpY)))
        next #Skip if we look a SNP paired with itself
      # Looking at all pairs of SNPs
      # SnpX is the on a lower index (more left) than SnpY
      
      # For each pair of SNPs
      # we will assemble the row for each value used in the target correlation
      # separately
      
      
      # Construct a matrix where each row indicates where to place
      # The appropriate value a single pair of values 
      # involved in the target correlation
      # i.e. the snp values of 2-2, 2-3, 3-2 and 3-3 are 
      # all used in the target correlation
      # The contribution of each pair is calculated separately and 
      # then added to get a separate row
      separateConstraint <- t(sapply(c(1:4), function(indXY) {
        valSnpX <- valSnpXYmat[1, indXY]
        valSnpY <- valSnpXYmat[2, indXY]
        valPlace <- valXYTargetCor[indXY]
        
        bigBlockNum <- 3 ^ (indSnpX - 1)
        bigBlockWidth <- 3 ^ (numSnp - indSnpX)
        
        bigBlockStartZeros <- (valSnpX - 1) * bigBlockWidth
        
        # Constructing smaller block inside big block
        smlBlockNum <- 3 ^ (indSnpY - indSnpX - 1)
        smlBlockoneWidth <- 3 ^ (numSnp - indSnpY)
        
        smlBlockStartZeros <- (valSnpY - 1) * smlBlockoneWidth
        smlBlockOnes <- smlBlockoneWidth
        smlBlockEndZeros <- (3 - valSnpY) * smlBlockoneWidth
        
        smlblock <- c(
          rep(0, smlBlockStartZeros),
          rep(valPlace, smlBlockOnes),
          rep(0, smlBlockEndZeros)
        )
        smlBlockRow <- rep(smlblock, smlBlockNum)
        
        bigBlockOnes <- smlBlockRow
        
        bigBlockEndZeros <- (3 - valSnpX) * bigBlockWidth
        
        # Constructing final big block
        bigBlock <- c(rep(0, bigBlockStartZeros),
                      rep(smlBlockRow, 1),
                      rep(0, bigBlockEndZeros))
        
        bigBlockRow <- rep(bigBlock, bigBlockNum)
        
        return(bigBlockRow)
      }))
      
      # collapse the above matrix into a row vector to be 
      # used in linear optimization
      singleRowConstraint <- apply(separateConstraint, 2, sum)
      snpLHSMatTarCor <- rbind(snpLHSMatTarCor, singleRowConstraint)
      
      
      # RHS of target correlation is targetCor * sqrt(VarX*VarY) + eX*EY
      pX <- pAlleles[indSnpX]
      pY <- pAlleles[indSnpY]
      nXY <- 2
      rhseXYTarCor <-
        matTargetCor[indSnpX, indSnpY] * 
        sqrt(nXY * pX * (1 - pX) * nXY * pY * (1 - pY)) +
        (nXY * pX * nXY * pY)
      
      snpRHSVecTarCor <-
        c(snpRHSVecTarCor, rhseXYTarCor)
    }
  }
  
  snpMatTarCor <- as.matrix(cbind(snpLHSMatTarCor, snpRHSVecTarCor))
  # snpMatTarCor
  return(snpMatTarCor)
}


# Combines the marginal and target correlation constraints into a single matrix
# Outputs a matrix
calcMatPmfConstaintBinomial <-function(numSnp = 3,
                                       matTargetCor = diag(3),
                                       pAlleles = c(0.1, 0.1, 0.1)) {
  # Calculates the constraint matrix associated to 
  # the Target Correlation equations
  # numSnp = the number of snps
  # matTargetCor = the target correlation matrix
  # pAlleles = the alleles associated to each snp
  # Returns A typical constraint matrix where the 
  # last column is the RHS of all the equations and the LHS
  # denotes the coefficient on each variable
  # The variables are the frequencies of the multivariate SNPs. 
  
  matMarg <- calcMatMarginalBinomial(numSnp, pAlleles)
  matTarC <- calcMatTargetCorBinomial(numSnp, matTargetCor, pAlleles)
  
  snpMat <- as.matrix(rbind(matMarg, matTarC))
  return(snpMat)
}

# Calculates the pmf based on optimizing the set of constraints 
# as given by the marginals and target correlation
# Outputs a vector that contains the probability of each element
calcOptPmfAnySnpBinomial <- function(pAllele, matTarCor){
  require(lpSolve)
  # pAllele = vector in minor allele frequencies
  # matTarCor = correlation matrix
  
  numSnp <- length(pAllele)
  oriMatSnp <- calcMatPmfConstaintBinomial(length(pAllele), matTarCor, pAllele)
  
  # Need to separate matrix to LHS and RHS and add the unity constraint 
  LHSOriMatSnp <- oriMatSnp[,-ncol(oriMatSnp)]
  RHSOriMatSnp <- as.vector(oriMatSnp[,ncol(oriMatSnp)])
  
  # Add unity constraint to the above.
  # RHS only needs a 1 added
  uniRHSOriMatSnp <- c(RHSOriMatSnp, 1)
  
  # LHS needs an extra row and column, last diagonal entry needs to be 1
  uniLHSOriMatSnp1 <- rbind(LHSOriMatSnp, 0)
  uniLHSOriMatSnp2 <- cbind(uniLHSOriMatSnp1, 0)
  uniLHSOriMatSnp2[nrow(uniLHSOriMatSnp2), ncol(uniLHSOriMatSnp2)] <- 1
  uniLHSOriMatSnp <- uniLHSOriMatSnp2
  
  # Objective function
  # All of the variables added should equal the unity (last) variable
  obj <- c(
    rep(1,ncol(LHSOriMatSnp))
    ,-1)
  
  constranints_direction  <- rep("=", length(uniRHSOriMatSnp))
  
  optimum <-  lp(direction = "max",
                 objective.in = obj,
                 const.mat = uniLHSOriMatSnp,
                 const.dir = constranints_direction,
                 const.rhs = uniRHSOriMatSnp,
                 all.int = F)
  sol <- optimum$solution
  
  # Remove last entry since that's the total sum 
  # Remaining vector contains the entries in the PMF of the target
  # random variable
  pmfSolve <- sol[-length(sol)]
  
  # Name the vector according to the naming convention 
  names(pmfSolve) <- namingForBinomialPmf(numSnp)
  return(pmfSolve)
}


# Gets the name for each entry in the PMF of correlated binomials
# Outputs a vector of names (string)
namingForBinomialPmf <- function(numSnp){
  # numSNP = number of SNPs
  # Naming convention follows a specific pattern
  # they are the ordered (lowest to highest) of what 
  # value (1,2,3) each snp (indicated by index) takes on 
  # For example, if snp1 is 1, snp2 is 3 and snp3 is 2, 
  # then the corresponding name is 132.
  
  # Construct the names from right to left.
  # The pattern of 1,2,3 is a repetitive one.
  nmedoub <- rep(0, 3^numSnp)
  for (snpInd in c(1:numSnp)) {
    # snpInd <- 1
    inner <- 3 ^ (snpInd - 1)
    outer <- 3 ^ (numSnp - snpInd)
    eexp <- snpInd - 1
    
    toAdd <- (10^eexp)*rep(
      c(rep(1, inner),
        rep(2, inner),
        rep(3, inner)),
      outer
    )
    toAdd
    nmedoub <- nmedoub + toAdd
  }
  
  nmestri <- paste("x", nmedoub, sep = "")
  nmestri
}

# Generates integer given the probability for each integer
# nObs = number of observations to generate
# pGeno = vector of probability weights
# Outputs a vector of integers
genBag <- function(nObs, pGeno){
  if (!all.equal(sum(pGeno), 1)) print("Given probabilities don't add to 1 ")
  
  # Generate and return values
  genBagVals <- sample.int(length(pGeno), nObs , replace=TRUE, prob=pGeno)
  return(genBagVals)
}

# Generates correlated Bernoulli's according to the input parameters
# Uses the optimization method to get an appropriate
# Independently generates twice the number of multivariate bernoulli values 
# such that they can be added component wise to get 
# binomial marginal distributions

######################
# Novel bernoulli Method

# Outputs a matrix of observations
GenerateAnySnpOptParamBernoulli <- function(
  pAllele = c(0.1, 0.1, 0.1),
  matTargetCor = diag(3),
  nObs = 500
){
  # pAllele = The minor allele frequency of each SNP in order
  # matTargetCor = The target correlation matrix
  # nObs = the number of observations to generate
  
  # Generate integer values from the pmf generated by the method
  # We'll generate all the needed bernoulli values at once, 
  # separate them at the end and add them to get binomial values
  pmf <- calcOptPmfAnySnpBernoulli(pAllele, matTargetCor)
  
  if (isTRUE(all.equal(sum(pmf), 0))) {
    return(matrix(NA, ncol = length(pAllele), nrow = nObs))
  }
  
  genIntVals <- genBag(2*nObs, pmf)
  
  # Constructs the Map between the integer values and the binomial values
  # for each SNP. 
  # Each column is a SNP, each row is an 
  # integer value, each entry is the appropriate binomial value for 
  # the integer value.
  # Note that the first column of the map matrix corresponds to the last snp,
  # The second column of the map matrix is the second last snp...
  # ....
  # The last column of the map matrix is the first snp
  # Snp naming convention is
  
  # Example of naming convention for 3 SNPs together
  # 111, 112, 121, 122, 211, 212, 221, 222
  
  # Corresponding integer values are
  # 1, 2, 3, 4, 5, 6, 7, 8
  numSnp <- length(pAllele)
  mapIntToBern <- sapply(c(1:numSnp), function(snpInd){
    inner <- 2 ^ (snpInd - 1)
    outer <- 2 ^ (numSnp - snpInd)
    eexp <- snpInd - 1
    toAdd <- rep(
      c(rep(0, inner),
        rep(1, inner)),
      outer
    )
    
    return(toAdd)
  })
  
  # Reverse Ordering so SNP1 corresponds to column 1
  mapIntToBernOrdered <- mapIntToBern[,ncol(mapIntToBern):1]
  
  # use a simple apply to get the bernoulli values using the mapping
  multiBernVals <- apply(mapIntToBernOrdered, 2, function(colMultiMap){
    colMultiMap[genIntVals]
  })
  
  # Add the halves of the bernoulli values together to get binomial values
  multiBinVals <- apply(multiBernVals, 2, function(colBernVal){
    (colBernVal[1:nObs] + colBernVal[(nObs + 1): length(colBernVal)])
  })
  
  return(multiBinVals)
}

# Based on Adding two bernoulli's 
# Sets up the equations associated with the marginal constraints
# Outputs a matrix
calcMatMarginalBernoulli <- function(numSnp = 3,
                                     pAlleles = c(0.1, 0.1, 0.1)) {
  # Calculates the constraint matrix associated to the marginal equations
  # numSnp = the number of snps
  # pAlleles = the minor allele frequency for each snp
  # Returns a typical constraint matrix where the 
  # last column is the RHS of all the equations and the LHS
  # denotes the coefficient on each variable
  # The variables are the frequencies of the multivariate SNPs. 
  # See below for the naming convention.
  
  # First, construct the Left hand side (LHS) of the equations
  snpMatLHS <- matrix(double(), 0, (2 ^ numSnp))
  snpVecRHS <- vector("double", 0)
  
  for (indSnp in c(1:numSnp)) {
    # For each snp,we separately work on the values they take on
    # Note that we let the SNP value take on (1,2,3) instead of (0,1,2)
    # so we don't need to deal with some index issues
    
    # construct the three rows associated to a particular SNP
    snpRows <- t(sapply(c(1:2), function(valSnp) {
      # Example of naming convention for 3 SNPs together
      # 111, 112, 121, 122, 211, 212, 221, 222
      
      # If we're interested in finding the SNPs that represent the marginals
      # i.e. if we want the marginal for SNP1 = 0,
      # then we want all of the above naming convention that have a 1 
      # in the first place (1)
      # i.e. if we want the marginal for SNP2 = 2,
      # then we want all of the above naming convention that 
      # have a 3 in the second place
      
      # We can automate the the way we find the marginals.
      # Note that the SNPs are ordered lowest to highest 
      # (i.e. 111, 112, ..., 221, 222)
      
      # There will be a specific number of blocks
      # This number is related to the index of the SNP we're looking at
      numBlocks <- 2 ^ (indSnp - 1)
      
      # we will have a slew/block of 1's
      blockOneWidth <- 2 ^ (numSnp - indSnp)
      
      # The space between the first block of 1's is dependent 
      # on the value/snp of interest
      blockStartZeros <- (valSnp - 1) * blockOneWidth
      
      # The block of 1's will always be followed by a block of 0's
      blockEndZeros <- (2 - valSnp) * blockOneWidth
      
      ValBlock <- c(rep(0, blockStartZeros),
                    rep(1, blockOneWidth),
                    rep(0, blockEndZeros))
      ValRow <- rep(ValBlock, numBlocks)
      
      return(ValRow)
    }))
    
    # Append the 2 rows for a particular SNP to the matrix of constraints
    snpMatLHS <- rbind(snpMatLHS, snpRows)
    
    # Append the RHS of what the above 2 rows should equal
    pSnp <- pAlleles[indSnp]
    snpVecRHS <- c(snpVecRHS, #Previous values
                   (1 - pSnp), #1
                   pSnp) #2
  }
  
  # Combine into a matrix and return
  snpMatConstraints <- as.matrix(cbind(snpMatLHS, snpVecRHS))
  return(snpMatConstraints)
}


# Sets up the equations associated to the target correlation constraints
# Outputs a matrix
calcMatTargetCorBernoulli <- function(numSnp = 3,
                                      matTargetCor = diag(3),
                                      pAlleles = c(0.1, 0.1, 0.1)) {
  # Calculates the constraint matrix associated to the 
  # Target Correlation equations
  # numSnp = the number of snps
  # matTargetCor = the target correlation matrix
  # pAlleles = the alleles associated to each snp
  # Returns a typical constraint matrix where the last 
  # column is the RHS of all the equations and the LHS
  # denotes the coefficient on each variable
  # The variables are the frequencies of the multivariate 
  # SNPs. See below for the naming convention.
  
  # The snp values that are used in target correlation calculation
  valSnpXYmat <- matrix(c(2,
                          2), 2, 1, T)
  
  # The coefficients to be placed on the variables
  valXYTargetCor <- c(1)
  
  # We will be appending to construct the matrix
  snpLHSMatTarCor <- matrix(double(), 0, (2 ^ numSnp))
  snpRHSVecTarCor <- vector("double", 0)
  
  for (indSnpX in c(1:numSnp)) {
    for (indSnpY in c((indSnpX):numSnp)) {
      if (isTRUE(all.equal(indSnpX, indSnpY)))
        next #Skip if we look a SNP paired with itself
      # Looking at all pairs of SNPs
      # SnpX is the on a lower index (more left) than SnpY
      
      
      # Construct a matrix where each row indicates where to place
      # The appropriate value a single pair of values involved 
      # in the target correlation
      # i.e. the snp values of 2-2, 2-3, 3-2 and 3-3 are 
      # all used in the target correlation
      # The contribution of each pair is calculated separately 
      # and then added to get a separate row
      separateConstraint <- t(sapply(c(1), function(indXY) {
        valSnpX <- valSnpXYmat[1, indXY]
        valSnpY <- valSnpXYmat[2, indXY]
        valPlace <- valXYTargetCor[indXY]
        
        bigBlockNum <- 2 ^ (indSnpX - 1)
        bigBlockWidth <- 2 ^ (numSnp - indSnpX)
        
        bigBlockStartZeros <- (valSnpX - 1) * bigBlockWidth
        
        # Constructing smaller block inside big block
        smlBlockNum <- 2 ^ (indSnpY - indSnpX - 1)
        smlBlockoneWidth <- 2 ^ (numSnp - indSnpY)
        
        smlBlockStartZeros <- (valSnpY - 1) * smlBlockoneWidth
        smlBlockOnes <- smlBlockoneWidth
        smlBlockEndZeros <- (2 - valSnpY) * smlBlockoneWidth
        
        smlblock <- c(
          rep(0, smlBlockStartZeros),
          rep(valPlace, smlBlockOnes),
          rep(0, smlBlockEndZeros)
        )
        smlBlockRow <- rep(smlblock, smlBlockNum)
        
        bigBlockOnes <- smlBlockRow
        
        bigBlockEndZeros <- (2 - valSnpX) * bigBlockWidth
        
        # Constructing final big block
        bigBlock <- c(rep(0, bigBlockStartZeros),
                      rep(smlBlockRow, 1),
                      rep(0, bigBlockEndZeros))
        
        bigBlockRow <- rep(bigBlock, bigBlockNum)
        
        return(bigBlockRow)
      }))
      
      # collapse the above matrix into a row vector to be 
      # used in linear optimization
      singleRowConstraint <- apply(separateConstraint, 2, sum)
      snpLHSMatTarCor <- rbind(snpLHSMatTarCor, singleRowConstraint)
      
      
      # RHS of target correlation is targetCor * sqrt(VarX*VarY) + eX*EY
      pX <- pAlleles[indSnpX]
      pY <- pAlleles[indSnpY]
      nXY <- 1
      rhseXYTarCor <-
        matTargetCor[indSnpX, indSnpY] * 
        sqrt(nXY * pX * (1 - pX) * nXY * pY * (1 - pY)) +
        (nXY * pX * nXY * pY)
      
      snpRHSVecTarCor <-
        c(snpRHSVecTarCor, rhseXYTarCor)
    }
  }
  
  snpMatTarCor <- as.matrix(cbind(snpLHSMatTarCor, snpRHSVecTarCor))
  return(snpMatTarCor)
}


# Combines the marginal and target correlation constraints into a single matrix
# Outputs a matrix
calcMatPmfConstaintBernoulli <- function(numSnp = 3,
                                         matTargetCor = diag(3),
                                         pAlleles = c(0.1, 0.1, 0.1)) {
  # Calculates the constraint matrix associated to the 
  # Target Correlation equations
  # numSnp = the number of snps
  # matTargetCor = the target correlation matrix
  # pAlleles = the alleles associated to each snp
  # Returns A typical constraint matrix where the last column 
  # is the RHS of all the equations and the LHS
  # denotes the coefficient on each variable
  # The variables are the frequencies of the multivariate SNPs. 
  
  
  matMarg <- calcMatMarginalBernoulli(numSnp, pAlleles)
  matTarC <- calcMatTargetCorBernoulli(numSnp, matTargetCor, pAlleles)
  
  snpMat <- as.matrix(rbind(matMarg, matTarC))
  return(snpMat)
}


# Calculates the pmf based on optimizing the set of constraints as 
# given by the marginals and target correlation
# Outputs a vector containing the probability of each element 
calcOptPmfAnySnpBernoulli <- function(pAllele, matTarCor){
  require(lpSolve)
  # pAllele = vector of minor allele frequencies 
  # matTarCor = correlation matrix
  
  numSnp <- length(pAllele)
  oriMatSnp <- calcMatPmfConstaintBernoulli(length(pAllele), matTarCor, pAllele)
  
  # Need to separate matrix to LHS and RHS and add the unity constraint 
  
  LHSOriMatSnp <- oriMatSnp[,-ncol(oriMatSnp)]
  RHSOriMatSnp <- as.vector(oriMatSnp[,ncol(oriMatSnp)])
  
  # Add unity constraint to the above.
  # RHS only needs a 1 added
  uniRHSOriMatSnp <- c(RHSOriMatSnp, 1)
  
  # LHS needs an extra row and column, last diagonal entry needs to be 1
  uniLHSOriMatSnp1 <- rbind(LHSOriMatSnp, 0)
  uniLHSOriMatSnp2 <- cbind(uniLHSOriMatSnp1, 0)
  uniLHSOriMatSnp2[nrow(uniLHSOriMatSnp2), ncol(uniLHSOriMatSnp2)] <- 1
  uniLHSOriMatSnp <- uniLHSOriMatSnp2
  
  # Objective function
  # All of the variables added should equal the unity (last) variable
  obj <- c(
    rep(1,ncol(LHSOriMatSnp))
    ,-1)
  
  constranints_direction  <- rep("=", length(uniRHSOriMatSnp))
  
  optimum <-  lp(direction = "max",
                 objective.in = obj,
                 const.mat = uniLHSOriMatSnp,
                 const.dir = constranints_direction,
                 const.rhs = uniRHSOriMatSnp,
                 all.int = F)
  sol <- optimum$solution
  
  # Remove last entry since that's the total sum 
  pmfSolve <- sol[-length(sol)] 
  
  # Name the vector according to naming convention
  names(pmfSolve) <- namingForBernoullipmf(numSnp)
  return(pmfSolve)
}

# Gets the name for each entry in the PMF of correlated Bernoullis
namingForBernoullipmf <- function(numSnp){
  # Naming convention follows a specific pattern
  # they are the ordered (lowest to highest) of what value (1,2) 
  # each snp (indicated by index) takes on 
  # For example, if snp1 is 1, snp2 is 1 and snp3 is 2, 
  # then the corresponding name is 112.
  
  # Construct the names from right to left.
  # The pattern of 1,2 is a repetitive one. 
  nmedoub <- rep(0, 2^numSnp)
  for (snpInd in c(1:numSnp)) {
    # snpInd <- 1
    inner <- 2 ^ (snpInd - 1)
    outer <- 2 ^ (numSnp - snpInd)
    eexp <- snpInd - 1
    
    toAdd <- (10^eexp)*rep(
      c(rep(1, inner),
        rep(2, inner)),
      outer
    )
    toAdd
    nmedoub <- nmedoub + toAdd
  }
  
  nmestri <- paste("x", nmedoub, sep = "")
  nmestri
}

# Generates integer given the probability for each integer
# nObs = number of observations to generate
# pGeno = vector of probability weights
# Outputs a vector of integers
genBag <- function(nObs, pGeno){
  if (!all.equal(sum(pGeno), 1)) print("Given probabilities don't add to 1 ")
  
  # Generate and return values
  genBagVals <- sample.int(length(pGeno), nObs , replace=TRUE, prob=pGeno)
  return(genBagVals)
}


###########################
# Kang and Jung 2 SNP method

# Simulates a number of observations from 
# two correlated binomials

# Outputs a matrix of observations
Generate2SnpBernoulli <- function(p1, p2, rho, nObs){
  # p1 minor allele frequency of snp 1
  # p2 minor allele frequency of snp 2
  # rho correlation between SNP1 and SNP2
  # nObs number of observations to generate
  
  
  # Compute the four probabilities for the joint distribution.
  a0 <-  rho * sqrt(p1*p2*(1-p1)*(1-p2)) + (1-p1)*(1-p2)
  prob <-
    c(
      `(0,0)` = a0,
      `(1,0)` = 1 - p2 - a0,
      `(0,1)` = 1 - p1 - a0,
      `(1,1)` = a0 + p1 + p2 - 1
    )
  
  # Checks for valid probabilities, skips over if invalid
  # Negative probabilities due to choice of p1, p2 and 
  # correlation coefficient can cause this
  if (min(prob) < 0) {
    return(as.matrix(cbind(NA, NA)))
  }
  
  nbin <- 2
  
  # Sample integers with specific weights
  u <- sample.int(4, nBinObs * nbin, replace=TRUE, prob=prob)
  
  # Split into bernoulli representation
  y <- floor((u-1)/2)
  x <- 1 - u %% 2
  
  # Sum up "columns" to get binomial
  xx <- colSums(matrix(x, nrow=nbin)) # Sum in groups of `n`
  yy <- colSums(matrix(y, nrow=nbin)) # Sum in groups of `n`
  
  return(as.matrix(cbind(xx,yy)))  
}

###################################################
# Madsen and Birkes Method

# Simulate observations from correlated binomials using 
# the Madsen Birkes method

# Returns the generated observations in a matrix as well as the 
# correlations used in the multivariate matrix as additional columns 
# in the matrix
GenerateSnpMadsenBirkes <- function(pAlleleMinor, targetBinCorMat, 
                                    nBinObs, useEigAdjust = F){
  # pAlleleMinor vector of minor allele frequencies
  # targetBinCorMat Matrix of desired correlations
  # nBinObs Number of observations to generate
  
  require(gdata)
  require(matrixcalc)
  require(MASS)
  nBin <- rep(2, length(pAlleleMinor))
  
  # Calculate correlation matrix for multivariate normal
  sigmaZ <- CalculateSigmaZ(nBin, pAlleleMinor, targetBinCorMat)
  
  # Ensures the correlation matrix is positive definite if desired
  if (useEigAdjust) {
    sigmaZ <- CalcAdjustMadsenInverseEigen(sigmaZ)
  }
  
  if (is.symmetric.matrix(sigmaZ)){
    if (!is.positive.definite(sigmaZ)){
      llen <- length(pAlleleMinor) + ncol(sigmaZ)
      retVal <- matrix(NA, ncol = llen)
      return(retVal)
    }
  }
  
  normMeans <- nBin * 0 # mean is just 0
  
  # Simulate from multivariate normal
  generatedNormalObs <- mvrnorm(nBinObs, normMeans, sigmaZ)
  
  # Convert values from multivariate normal to binomial 
  transformedNormToBinMat <- as.matrix(
    TransformNormToBin(
      pAlleleMinor = pAlleleMinor,
      normalGeneratedMat = generatedNormalObs)
  )
  
  # Return value
  retVal <- cbind(transformedNormToBinMat,
                  matrix(upperTriangle(sigmaZ), byrow = T, 
                         ncol = length(upperTriangle(sigmaZ)), 
                         nrow = nBinObs))
  return(as.matrix(retVal))
}


# Calculates the correlation matrix used for the multivariate normal 
# given the target correlation matrix for the correlated binomial random vector

# Outputs a matrix
CalculateSigmaZ <- function(nBin, pAlleleMinor, corDesire) {
  # nBin = Should be a vector of just 2's. The number of "trials"
  # pAlleleMinor = vector of minor allele frequencies
  # corDesire = correlation matrix for the target r.v.
  
  # Calculate correlation matrix for normals
  corLen <- length(corDesire[,1])
  sigmaZ <- diag(length(corDesire[, 1]))
  
  # Note: Can speed up by running loop in parallel
  # Calculates the correlation matrix
  for (i in c(1:corLen)) {
    for (j in c(1:corLen)) {
      
      # Initialize values needed for calculating delta
      corD <- corDesire[i, j]
      pI <- pAlleleMinor[i]
      pJ <- pAlleleMinor[j]
      nI <- nBin[i]
      nJ <- nBin[j]
      sigmaI2 <- pI * nI * (1 - pI)
      sigmaJ2 <- pJ * nJ * (1 - pJ)
      uI <- nI * pI
      uJ <- nJ * pJ
      
      # Calculate the matrix entry by entry
      # The matrix is symmetric and main diagonal is all 1's
      if (i == j) {
        # diagonal elements of correlation matrix
        sigmaZ[i,j] <- 1
      } else if (i < j) {
        # Compute the entry in the upper triangle part 
        sigmaZ[i,j] <- solveSigmaEqnForNormToBin(corD, sigmaI2, sigmaJ2, 
                                                 uI, uJ, nI, nJ, pI, pJ)
      } else if (i > j) {
        # Copy the upper triangle part for the lower triangle 
        sigmaZ[i,j] <- sigmaZ[j,i]
      }
      
    }
  }
  
  return(sigmaZ)
}

# Numerically solves the what the entry in the correlation matrix
# of the multivariate normal should be to get the desired correlation
# after transformation to correlated binomial. 

# Uses optimize to solve the equation. 
# Equation is from Madsen and Birkes (2013)
# Outputs a number value
solveSigmaEqnForNormToBin <- function(corDesire, sigmaI2, sigmaJ2, 
                                      uI, uJ, nI, nJ, pI, pJ){
  # corDesire is the correlation desired between two binomial r.v.
  # sigmaI2, uI, nI, pI are specified parameters for the first binomial r.v.
  # sigmaJ2, uJ, nJ, pJ are specified parameters for the second binomial r.v.
  require(mvtnorm)
  
  # Initialize the values we will be using
  sigmaI <- sqrt(sigmaI2)
  sigmaJ <- sqrt(sigmaJ2)
  
  # Support of distributions. 2 Binomials
  rVals <- c(0,1,2,0,1,2,0,1,2)
  sVals <- c(0,0,0,1,1,1,2,2,2)
  
  # Cdf of R and S values. 
  FiRVec <- pbinom(rVals, nI, pI)
  FjSVec <- pbinom(sVals, nJ, pJ)
  
  # Inverse of Standard Normal of FiR and FiS
  NInvFiRVec <- qnorm(FiRVec)
  NInvFjSVec <- qnorm(FjSVec)
  
  # Function we will be optimizing to find delta
  # delta is what the value should be in the correlation matrix for 
  # the multivariate normal
  fx.opt <- function(deltaInit){
    delta <- deltaInit[1]
    
    # Function to create bivariate normal functions 
    # with the delta for optimization
    fx.BivarNorm <- function(nFiR, nFiS) {
      sigDelta <- matrix(c(1,delta,delta,1), nrow = 2, ncol = 2)
      pmvnorm(upper = c(nFiR, nFiS), mean = c(0,0), sigma = sigDelta)
    }
    
    # Constructs vector so we can sum the bivariate normal values up
    BiNormVec <- vector(mode = "double", length = length(rVals))
    for (i in c(1:9)) {
      BiNormVec[i] <- fx.BivarNorm(NInvFiRVec[i], NInvFjSVec[i])
    }
    
    # Assembling the equation
    eqSumTermVec <- 1 - FiRVec - FjSVec + BiNormVec
    
    rightEqTerm <- 1/(sigmaI*sigmaJ) * (sum(eqSumTermVec) - uI*uJ)
    leftTerm <- corDesire
    
    # What is being optimizd
    (rightEqTerm - leftTerm)^2
  }
  
  # Delta is between -1 to 1
  opt <- optimize(fx.opt, interval = c(-1,1))
  estimatedDelta <- opt$minimum
  return(estimatedDelta)
}

# Maps values from the normal distribution to the binomial distribution
# based on cut-offs of the multivariate normal
# Outputs a matrix of values
TransformNormToBin <- function(pAlleleMinor, normalGeneratedMat) {
  # pAlleleMinor = vector of minor allele frequencies
  # normalGeneratedMat = matrix of values from multivariate 
  # normal distribution
  
  transBinMat <- sapply(c(1:length(pAlleleMinor)),
                        function(indexBin) {
                          x <- normalGeneratedMat[, indexBin]
                          minAllele <-
                            pAlleleMinor[indexBin]
                          
                          # Transform normal marginal to standard uniform
                          unifVal <-
                            pnorm(x, mean = 0, sd = 1)
                          
                          # Map uniform to binomial using the cutoffs
                          binValue <-
                            TransformUnifToBin(minAllele, unifVal)
                          
                          return(binValue)
                        })
  
  return(data.frame(transBinMat))
}

# Transforms standard uniform values to binomial(2,p)
# given that p is the minor allele frequency
TransformUnifToBin <- function(minAllele, unifVal) {
  # pAlleleMinor = minor Allele Frequency
  # unifMat = Vector of uniform values
  
  unifVal <- as.double(unifVal)
  minAllele <- as.double(minAllele)
  
  # Transform to specified marginal binomial
  # Cutoffs for mapping uniform to binomial
  binCutHomoMinor <- minAllele ^ 2
  binCutHomoMajor <- (1 - minAllele) ^ 2
  binCutHetero <- 2 * minAllele * (1 - minAllele)
  
  # Map uniform to binomial using the cutoffs
  binValue <-
    ifelse(unifVal < binCutHomoMinor,
           2,
           ifelse(unifVal < binCutHetero + binCutHomoMinor,
                  1,
                  0))
  
  return(binValue)
}

# Takes a matrix and adjusts it to be positive definite
# by setting any non-positive eigen values to a small positive number
# Outputs a matrix
CalcAdjustMadsenInverseEigen <- function(A, tol = 1e-5){
  # A is the matrix to adjust
  # tol is the small positive to set negative eigen values to be
  
  eigens <- eigen(A)
  eigVal <- eigens$values
  eigVec <- eigens$vectors
  
  # A = Q %*% Lambda %*% Q^-1
  Q <- eigVec
  Qinv <- solve(Q)
  
  # Adjust the lamda by setting any negatives to some small
  # positive number
  eigValAdj <- ifelse(eigVal <= 0, tol, eigVal)
  lamadj <- diag(eigValAdj, 
                 ncol = length(eigVal), nrow = length(eigVal))
  
  # Construct adjusted matrix
  Aadj <- Q %*% lamadj %*% Qinv
  
  # Return matrix as correlation matrix
  return(cov2cor(Aadj))
}