# Copyright (c) 2014,
# Mathias Kuhring, KuhringM@rki.de, Robert Koch Institute, Germany, 
# All rights reserved. For details, please note the license.txt.

# import of the scores from the java routine
importScores <- function(score.files, assembly.files, reference.files){
  scores <- vector('list', length(score.files))
  if (DEBUGGING){ print("metrics import") }
  for (i in 1:length(score.files)){
    if (DEBUGGING){ print(as.character(score.files[i])) }
    scores[[i]] <- cbind(Assembly=assembly.files[i],
                         Reference=reference.files[i],
                         read.table(score.files[i], header=TRUE,
                                    colClasses=get.colClasses(score.files[i])))
  }
  if (DEBUGGING){ print("import done") }
  return(scores)
}


# removes contig duplicates with same ID (e.g. different local matches)
# returns best contig per ID depending on filter metric values (highest MatchCount per default)
bestDuplicate <- function(metrics, filter="MatchCount", highest=TRUE){
  if (highest){ mod <- -1 } else{ mod <- 1 }
  sorted <- metrics[order(metrics$ContigID, mod * metrics[filter]), ]
  uniques <- sorted[!duplicated(sorted$ContigID),]
  return(uniques)
}


# removes redundant and correlating scores
correctScores <- function(metrics){
  for (i in 1:length(metrics)){
    metrics[[i]]$Sample <- NULL
    
    # unnormalized scores are not further used
    metrics[[i]]$MatchCount <- NULL
    metrics[[i]]$ErrorCount <- NULL
    metrics[[i]]$ErrorSubtraction <- NULL
    
    # following scores are directly correlated and yield no further information
    metrics[[i]]$NormedErrorCount2 <- NULL
    metrics[[i]]$NormedErrorSubtraction <- NULL
    metrics[[i]]$MaxEndContError <- NULL
    
    metrics[[i]]$NormedContigLength2 <- NULL
  }
  return(metrics)
}


# separates the scores into two classes each based on the manual thresholds
manualClasses <- function(scores, thresholds){
  return(regToClassByThresholds(scores, thresholds))
}


# separates the scores into two classes each based on exponential fittings
expoClasses <- function(scores, quantile){
  
  thresholds <- getExpoClassThresholds(scores, quantile)
  scores <- regToClassByThresholds(scores, thresholds)
  
  return(scores)
}


# separates data into classes by given thresholds
regToClassByThresholds <- function(scores, thresholds){  
  firstMetric <- which(colnames(scores)=="NormedMatchCount1")
  lastMetric <- ncol(scores)
  
  for (i in firstMetric:lastMetric){
    
    if (any(colnames(scores)[i] == 
              c("NormedMatchCount1", "NormedMatchCount2", 
                "NormedContigLength1", "VitorScore")))
    {
      scores[i] <- as.factor(scores[i] >= thresholds[i])
    }
    else
    {
      scores[i] <- as.factor(scores[i] <= thresholds[i])
    }
  }
  
  return(scores)
}