#---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# Author of the R codes: Jen-Chieh Teng.
# For the paper submitted to CSDA: Lim, A., Chiang, C. T., Teng, J.C., 2020. Estimating robot strengths with application to selection of alliance members in FIRST robotics competitions. (Under Revision)
# 
#
# These are NOT but a user-friendly version of those used in the paper. 
# Please inform me of any errors that I might have introduced in trying to make the codes more readable. 
# These codes are self-contained in such a way that parameter estimates are readily available once the 
# user loads their dataset formatted in the appropriate structure.
#---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
library(readxl)

#################################################################################################
# the functions of robot strength estimates and estimated rank correlation of two variables     #
#################################################################################################

bhat.fn = function( x , y ){
  xc = x %*% INF1   
  bhatc = solve( t(xc) %*% xc ) %*% t(xc) %*% y
  binf = INF1 %*% bhatc
  binf}

RC = function( x1 , x2 ){
  t = length(x1)
  r1 = outer(x1,x1,FUN="-")
  r2 = outer(x2,x2,FUN="-")
  RC = (sum(sign(r1*r2)>0)  + 0.5*(sum(sign(r1*r2)==0) -t ))  / (t*(t-1))
  RC
}

##################################################################################################
#     Import excel files of a specific division of a tournament in the qualification stage (e.g. #        
#     Archimedes in 2018_Detroit.xlsx or Carver in 2018_Houston.xlsx) and the playoff stage      #       
#     (e.g. Archimedes in 2018_Detroit_Playoffs.xlsx or Carver in 2018_Houston_Playoffs.xlsx)    #                                      
##################################################################################################

  i = 1 # substitute i=1 by i=2,...,or i=6 for the rest divisions 
  Qualification <- read_excel("2018-Detroit.xlsx", sheet = i) 
  Playoffs <- read_excel("2018-Detroit-Playoffs.xlsx" , sheet = i)
  # The input files are named 2018-Detroit.xlsx and Data_2018-Detroit-Playoffs.xlsx (or 2018-Houston.xlsx 
  # and Data_2018-Houston-Playoffs.xlsx) with the following format:
  # the 1st column: match numbers
  # the 2nd to 4th columns: robot IDs in the red alliance
  # the 5th to 7th columns: robot IDs in the blue alliance
  # the 8th column: a score of the red alliace
  # the 9th column: a score of the blue alliance
  # the 10th column: an adjusted score of the red alliance
  # the 11th column: an adjusted score of the blue allaince
  # the 15th column: match numbers
  # the remaining columns: covariate matrix X with X_ij being equal to 1 if the jth robot at the ith match 
  # assigned to the red alliance; -1 if the jth robot at the ith match assigned to the blue alliance; and 
  # 0 otherwise. 
  
  
  # rename 2018_Detroit.xlsx (or 2018_Houston.xlsx) as Data 
  Data = data.frame(Qualification)  
  
  # rename 2018_Detroit_Playoffs.xlsx (or 2018_Houston_Playoffs.xlsx) as Data 
  Data_po = data.frame(Playoffs)  
  
  # number of matches (M), number of robotics teams (K), and IDs in the qualification stage 
  M = length(Data[,1]) 
  K = length(Data[1,]) - 15
  ID0 = colnames(Qualification)[16:(K+15)]
  ID = cbind(as.numeric(ID0),seq(1,K))
  
  # the minimum number (m0) of matches played by the robotics teams
  m0 = ceiling(M*6/K)-1
  
  # the corresponding response y0 and covariates x0, which are derived from qualification data
  yr = as.matrix(Data[1:M,8])
  yb = as.matrix(Data[1:M,9])
  y0 = rbind(yr,yb)
  ny = length(y0)
  x00 = as.matrix(Data[1:M,1:K+15])
  xr = ( x00 == 1 )*1
  xb = ( x00 == -1 )*1
  x0 = rbind(xr,xb)
  
  # the number of matches (Mpo) and number of robotics teams (K0po) in the playoff stage
  Mpo = length(t(Data_po[,1])) 
  K0po = length(Data_po[1,]) - 15
  
  # the corresponding response y0po, covariates x0po, and IDs, which are derived from playoff data 
  yrpo = as.matrix(Data_po[1:Mpo,8])
  ybpo = as.matrix(Data_po[1:Mpo,9])
  y0po = rbind(yrpo,ybpo)
  x0po0 = as.matrix(Data_po[1:Mpo,1:K0po+15])
  xrpo = ( x0po0 == 1 )*1
  xbpo = ( x0po0 == -1 )*1
  x0po = rbind(xrpo,xbpo)
  nypo = length(y0po)
  dim(y0po) = c(nypo,1)
  Kpo = sum((colSums(abs(x0po)) > 0)*1)
  po = colSums((abs(x0po)))
  dim(po) = c(K,1)
  poinf = cbind(po,ID)
  row_sub = apply ( poinf, 1 ,function(row) all(row !=0 ))
  poat = as.matrix(poinf[row_sub, ])
  ID.po = poat[,2:3]
  
  ##################################################################################################
  # the LSE of robot strengths (bhat0) in the OPR model                                            #
  ##################################################################################################
  bhat0 = solve( t(x0) %*% x0 ) %*% t(x0) %*% y0
  
  ##################################################################################################
  # the information matrices for clusters (cluster.inf0) and parameter estimates (bhat.inf0)       #
  # under variant cluster numbers = 2,…,K (clus_n) for the proposed methods 1                      #      
  ################################################################################################## 
  clus_n = seq(2,K)
  l_n = length( clus_n )
  cluster.inf0 = bhat.inf0 =  matrix( 0 , K , l_n)
  fit = hclust( dist(bhat0) , method = "centroid" )
  cluster.inf0[1:K,l_n] = cutree( fit , k = clus_n[l_n])
  bhat.inf0[1:K,l_n] = bhat0
  
  for(w in 1:(l_n -1)){
    grp = cutree( fit , k = clus_n[(l_n-w)])
    grp0 = sort(unique(grp))
    lgrp0 = length(grp0)
    INF1 = outer( grp , grp0 , FUN="==") *1 
    cluster.inf0[1:K , ( l_n-w)] = grp
    bhat.inf0[ 1:K, ( l_n-w)] = bhat.fn(x0,y0)
  }
  
  
  
  ##################################################################################################
  # predictive matrices PR’s and MSPE’s, which are computed based on qualification data,           #
  # under variant combinations of cluster number (c) and number of matches (M_l), l = 6,…,m0       #
  ##################################################################################################
  m = seq(6,m0)
  l_m = length(m)
  c0 = ceiling(m * K /6)
  c0 = c(c0 , c0[l_m] +1 )
  
  ##################################################################################################
  #                                     Method 1                                                   #                                   
  ##################################################################################################
    PRhatcv = MSPEcv = matrix (0 , l_n , l_m)
    
    for( w1 in 1:l_m){ #crossvalidation
      yrm = yr[-c( (c0[w1] +1) : c0[l_m+1] )]
      ybm = yb[-c( (c0[w1] +1) : c0[l_m+1] )]
      nym = length(yrm) 
      xrm = xr[-c(c0[w1] + 1 : c0[l_m+1]) ,]
      xbm = xb[-c(c0[w1] + 1 : c0[l_m+1]) ,]
      
      for(w2 in 1:l_n){
        grp = cluster.inf0[(1:K),w2]
        grp0 = sort(unique(grp))
        lgrp0 = length(grp0)
        INF1 = outer( grp , grp0 , FUN="==") *1
        
        # estimated PR and MSPE
        
        ycv = c(ybm, yrm)
        nycvm = length(ycv)
        dim(ycv) = c(nycvm,1)
        dy_cv = yrm-ybm
        
        xcv = rbind(xbm,xrm)%*%INF1
        H0=solve(t(xcv)%*%xcv)%*%t(xcv)
        H_cv=xcv%*%H0
        
        bhat_cv=H0%*%ycv
        pr_cv0=(xcv[-c(1:nym),]-xcv[1:nym,])%*%bhat_cv
        pr_cv0=pr_cv0[1:nym]
        resid_cv0=ycv-H_cv%*%ycv
        
        w01=diag(H_cv)
        w02=diag(H_cv[1:nym,-c(1:nym)])
        wd_cv=(1-w01[1:nym])*(1-w01[-c(1:nym)])-w02^2
        w1_cv=((1-w01[-c(1:nym)])*resid_cv0[1:nym]+w02*resid_cv0[-c(1:nym)])/wd_cv
        w2_cv=((1-w01[1:nym])*resid_cv0[-c(1:nym)]+w02*resid_cv0[1:nym])/wd_cv
        pr_cv=pr_cv0-t(t(H_cv[-c(1:nym),1:nym]-H_cv[1:nym,1:nym])*w1_cv)-t(t(H_cv[-c(1:nym),-c(1:nym)]-H_cv[1:nym,-c(1:nym)])*w2_cv)
        
        
        
        resid_cv=dy_cv-pr_cv
        Fhat_cv0=(t(t(resid_cv)+diag(pr_cv))<=0)*1
        diag(Fhat_cv0)=0
        Fhat_cv=colSums(Fhat_cv0)/(nym-1)
        
        PRhatcv[w2,w1] = sum((dy_cv*(0.5 - Fhat_cv) > 0) +  0.5* ( dy_cv*(0.5 - Fhat_cv) ==0)) /nym
        MSPEcv[w2,w1] =sum((diag(resid_cv)^2))/nym																			                                                                                
      }
      
      
    }
    
    
    ##################################################################################################
    # the optimal cluster number chosen by either maximizing PR(c) or minimizing MSPE(c) for c=      #
    # 2,…,K                                                                                          #
    ##################################################################################################
    PR.MSPE = cbind(seq(1,(K-1)), MSPEcv[,l_m], PRhatcv[,l_m])
    nclustera =  PR.MSPE[PR.MSPE[,2] == min(PR.MSPE[,2]),][1]  #nclustera + 1 = c
    nclusterb =  PR.MSPE[PR.MSPE[,3] == max(PR.MSPE[,3]),][1]  #nclusterb + 1 = c
    
    
    ##################################################################################################
    # the information matrix for the stability of PR and MSPE for varying M_l, l = 6,…,m0            #
    ##################################################################################################
    rbind( PRhatcv[nclusterb, ] , MSPEcv[nclustera, ] )
    
    ##################################################################################################
    # the LSEs of robot strengths in the proposed OPR model for varying Ml, l = 6,…,m0               #
    ##################################################################################################
    bhatinfm = matrix( 0 , K , l_m)
    
    for( w in 1:l_m){
      ym = c( yr[-c( (c0[w] +1) : c0[l_m+1] )] , yb[-c( (c0[w] +1) : c0[l_m+1] )])
      nym = length(ym)
      dim(ym) = c(nym,1)
      xm = rbind( xr[-c(c0[w] + 1 : c0[l_m+1]) ,] , xb[-c(c0[w] + 1 : c0[l_m+1]) ,]) 
      
      grp = cluster.inf0[(1:K),nclusterb]
      grp0 = sort(unique(grp))
      lgrp0 = length(grp0)
      INF1 = outer( grp , grp0 , FUN="==") *1
      bhatinfm[ ,w] = bhat.fn(xm,ym)
    }
    
    
    
    # estimated strengths of robots in the playoff stage
    bhatpo = cbind(ID,bhatinfm,bhat0,po)
    bhatpo = bhatpo[bhatpo[,(l_m+4)] != 0 , 1:(l_m+3)]
    potinfmb = bhatpo[,3:(l_m+3)]
    
    # ranks of robot strengths in decreasing order
    bhatm0rk = bhatinfm[order(bhatinfm[,l_m] , decreasing = TRUE), 1:l_m]
    
    
    
    ##################################################################################################
    # the information matrix for the stability of robot strengths ( all robots, top-8 robots, robots #
    # in the playoff stage) for varying M_l, l = 6,…,m0                                              #                       
    ##################################################################################################
    RCs = matrix(0,3,l_m-1)
    
    for(w in 1:(l_m-1)){
      RCs[1,w] = RC(bhatm0rk[,w] , bhatm0rk[,(w+1)])
      RCs[2,w] = RC(bhatm0rk[(1:8),w] , bhatm0rk[(1:8),(w+1)])
      RCs[3,w] = RC(potinfmb[,w] , potinfmb[,(w+1)])
    }
    
    
    ##################################################################################################
    # PRs and MSPEs, which are computed based on playoff data with c-hat and K clusters              #  
    ##################################################################################################
    yhatpor = xrpo %*% bhatinfm[,l_m]
    yhatpob = xbpo %*% bhatinfm[,l_m]
    yhatpor0 = xrpo %*% bhat0
    yhatpob0 = xbpo %*% bhat0
    
    Fhatpo =  colSums ( outer( (yr - yb) - (xr - xb) %*% bhatinfm[,l_m] , -( yhatpor - yhatpob ) , FUN = "<=")*1  ) / ( M)
    dim(Fhatpo) = c(Mpo,1)
    PRpo = (sum( ((yrpo - ybpo)*(0.5 - Fhatpo) > 0)*1 +  0.5* ((yrpo - ybpo)*(0.5 - Fhatpo) ==0)*1)) / (Mpo )
    MSPEpo =  sum(((yrpo - ybpo) - (xrpo - xbpo) %*% bhatinfm[,l_m])^2)/ Mpo
    
    Fhatpo0 =  colSums ( outer( (yr - yb) - (xr - xb) %*% bhat0 , -( yhatpor0 - yhatpob0 ) , FUN = "<=")*1  ) / ( M)
    dim(Fhatpo0) = c(Mpo,1)
    PRpo0 = (sum( ((yrpo - ybpo)*(0.5 - Fhatpo0) > 0)*1 +  0.5* ((yrpo - ybpo)*(0.5 - Fhatpo0) ==0)*1)) / (Mpo )
    MSPEpo0 =  sum(((yrpo - ybpo) - (xrpo - xbpo) %*% bhat0)^2)/ Mpo
    
    # information matrix for PRs and MSPEs in the qualification and playoff stages
    rbind(c( PRhatcv[nclusterb,l_m] , PRhatcv[K-1,l_m] , MSPEcv[nclustera,l_m] , MSPEcv[ K-1 ,l_m] ) , c( PRpo , PRpo0 , MSPEpo , MSPEpo0) )

    
    ##################################################################################################
    #agreement metrics (correlation, precision, and recall) between FRC ratings (all robots, top-8   #
    #robots, robots in the playoff stage) and the proposed robots strength estimates                 #
    ##################################################################################################
    
    # rank correlations computed under the OPR and OPRC models
    RCm0 = matrix(0 , 2 , 3)
    
    RCm0[1,1] = RC( seq(K,1) , bhat.inf0[(1:K),l_n])
    RCm0[1,2] = RC( seq(8,1) , bhat.inf0[(1:8),l_n])
    RCm0[1,3] = RC( seq(Kpo,1) , bhatpo[,l_m+3])
    RCm0[2,1] = RC( seq(K,1) , bhatinfm[,l_m])
    RCm0[2,2] = RC( seq(8,1) , bhatinfm[1:8,l_m])
    RCm0[2,3] = RC( seq(Kpo,1) , bhatpo[,l_m+2])
    
    # precisions and recalls computed under the OPR and OPRC models
    Pm0 = order(bhat.inf0[(1:K),l_n],decreasing = TRUE)
    Pm = order(bhatinfm[,l_m],decreasing = TRUE)
    
    Pre_Rec = matrix(0,K,4)
    
    for(w in 1:K){
      Pre_Rec[w,1] = sum(outer(Pm0[1:w],seq(1,w),FUN="==")*1)/w
      Pre_Rec[w,2] = sum(outer(Pm[1:w],seq(1,w),FUN="==")*1)/w
      Pre_Rec[w,3] = sum(outer(Pm0[1:w],seq(1,8),FUN="==")*1)/8
      Pre_Rec[w,4] = sum(outer(Pm[1:w],seq(1,8),FUN="==")*1)/8
    }