4144 lines
		
	
	
		
			152 KiB
		
	
	
	
		
			R
		
	
	
	
	
	
			
		
		
	
	
			4144 lines
		
	
	
		
			152 KiB
		
	
	
	
		
			R
		
	
	
	
	
	
 | 
						|
pkgs <- c("arrow", "class", "DT", "future", "ggplot2", "glmnet", "htmlTable",
 | 
						|
          "LiblineaR", "Matrix","Metrics", "promises", "quanteda",
 | 
						|
          "quanteda.textmodels", "ranger", "rlang", "Rtsne", 
 | 
						|
          "RJSONIO", "shiny", "SparseM", "stringi", "uwot")
 | 
						|
pkgs <- pkgs[! pkgs %in% rownames(installed.packages())]
 | 
						|
if (length(pkgs)) {
 | 
						|
  warning(paste0("Packages ", paste0(pkgs, collapse = " , "), " not available. ", 
 | 
						|
                 "App might crash."))
 | 
						|
}
 | 
						|
# for (i in seq_along(pkgs)) install.packages(pkgs[i])
 | 
						|
 | 
						|
verbose <- TRUE
 | 
						|
# sink("ActiveTigger.log")
 | 
						|
 | 
						|
options(shiny.maxRequestSize = 30*1024^3) # File upload size
 | 
						|
 | 
						|
modelnames <- read.csv("modelnames.csv")
 | 
						|
modelnames_labels <- modelnames$short
 | 
						|
names(modelnames_labels) <- modelnames$short_lang
 | 
						|
 | 
						|
library(quanteda)
 | 
						|
library(ggplot2)
 | 
						|
library(promises)
 | 
						|
library(future)
 | 
						|
plan(multisession)
 | 
						|
 | 
						|
 | 
						|
################################################################################
 | 
						|
## Global functions
 | 
						|
################################################################################
 | 
						|
 | 
						|
label2hash <- function(label) rlang::hash(label)
 | 
						|
 | 
						|
cleanFileName <- function(name) 
 | 
						|
  gsub('\\s|[/]|[\\]|[:]|[*]|[?]|["]|[<]|[>]|[+]|[$]|[=]', "_", 
 | 
						|
       gsub("^\\s+|\\s+$", "", name))
 | 
						|
 | 
						|
wtdF1 <- function(truth, pred) {
 | 
						|
  pred <- pred[!is.na(truth)]
 | 
						|
  truth <- truth[!is.na(truth)]
 | 
						|
  sum(sapply(unique(truth), function(ilab)
 | 
						|
    Metrics::fbeta_score(truth == ilab, pred == ilab) *
 | 
						|
      sum(truth == ilab) / length(truth)), na.rm = TRUE)
 | 
						|
}
 | 
						|
 | 
						|
metricsTable <- function(truth, pred) {
 | 
						|
  do.call(rbind, lapply(sort(unique(truth)), function(ilab) {
 | 
						|
    tmptrue <- truth == ilab
 | 
						|
    tmppred <- pred == ilab
 | 
						|
    tmp <- data.frame(
 | 
						|
      "Tag" = ilab,
 | 
						|
      "N.cases" = sum(tmptrue),
 | 
						|
      "N.predict" = sum(tmppred),
 | 
						|
      "Accuracy" = sprintf("%f", round(Metrics::accuracy(tmptrue, tmppred), 6)),
 | 
						|
      "Precision" = sprintf("%f", round(Metrics::precision(tmptrue, tmppred), 6)),
 | 
						|
      "Recall" = sprintf("%f", round(Metrics::recall(tmptrue, tmppred), 6)),
 | 
						|
      "F1" = sprintf("%f", round(Metrics::fbeta_score(tmptrue, tmppred), 6)))
 | 
						|
    tmp$F1[tmp$F1 == "NaN"] <- sprintf("%f", 0)
 | 
						|
    tmp
 | 
						|
  }))
 | 
						|
}
 | 
						|
 | 
						|
bertPlotFromPath <- function(path) {
 | 
						|
  if (!file.exists(paste0(path, "/trainer_state.json"))) return(NULL)
 | 
						|
  state <- RJSONIO::fromJSON(paste0(path, "/trainer_state.json"))
 | 
						|
  logs <- data.table::rbindlist(lapply(state$log_history, as.list), fill = TRUE)
 | 
						|
  epochmin <- logs$epoch[which.min(logs$eval_loss)]
 | 
						|
  
 | 
						|
  gdat <- data.table::rbindlist(
 | 
						|
    use.names = FALSE,
 | 
						|
    list(
 | 
						|
      cbind("train", logs[!is.na(logs$loss), c("epoch", "loss")]), 
 | 
						|
      cbind("valid", logs[!is.na(logs$eval_loss), c("epoch", "eval_loss")])
 | 
						|
    ))
 | 
						|
  colnames(gdat) <- c("data", "epoch", "loss")
 | 
						|
  
 | 
						|
  ggplot(gdat, aes_string("epoch", "loss", color = "data", shape = "data")) + 
 | 
						|
    geom_point(size = 2) + geom_line() + 
 | 
						|
    geom_vline(xintercept = epochmin, linetype = 2) +
 | 
						|
    theme_bw() + expand_limits(y=0)
 | 
						|
}
 | 
						|
 | 
						|
bertHyperToMsg <- function(pars) {
 | 
						|
  if (is.null(pars)) return(NULL)
 | 
						|
  paste0("Scheme : ", pars$bertScheme, "; ",
 | 
						|
         "Model : ", pars$bertModel, "\n* ",
 | 
						|
         "Epochs : ", pars$bertEpochs, " ; ",
 | 
						|
         "Lrate : ", pars$bertLrate, " ; ",
 | 
						|
         "Wdecay : ", pars$bertWdecay, " ; ",
 | 
						|
         "Batch size : ", pars$bertBatchsize, " ; ",
 | 
						|
         "Grad. accum. : ", pars$bertGradacc, "\n* ",
 | 
						|
         "Keep best : ", pars$bertBest, " ; ",
 | 
						|
         "ValidFrac : ", pars$bertValidFrac, " ; ",
 | 
						|
         "ValidSeed : ", pars$bertValidSeed, " \n* ",
 | 
						|
         pars$bertNtrain, " training samples : ", 
 | 
						|
         paste0(names(pars$bertTrainTable), " (", 
 | 
						|
                as.numeric(pars$bertTrainTable), ")",
 | 
						|
                collapse = " ; "))
 | 
						|
}
 | 
						|
 | 
						|
get_status <- function(dafile)
 | 
						|
  scan(dafile, what = "character", sep="\n", quiet = TRUE)
 | 
						|
set_status <- function(dafile, msg) write(msg, dafile)
 | 
						|
 | 
						|
 | 
						|
 | 
						|
################################################################################
 | 
						|
## Main server function
 | 
						|
################################################################################
 | 
						|
 | 
						|
# options(shiny.maxRequestSize=2^34) # Max filesize
 | 
						|
 | 
						|
shinyServer(function(input, output, session) {
 | 
						|
  
 | 
						|
  values <- reactiveValues()
 | 
						|
  values$visuGo <- FALSE
 | 
						|
  values$dfmGo <- FALSE
 | 
						|
  # queryTrigger <- reactiveVal(0)
 | 
						|
  # predlabTrigger <- reactiveVal(0)
 | 
						|
  initDataTrigger <- reactiveVal(0)
 | 
						|
  queryNext <- reactiveVal(0)
 | 
						|
  trainTrigger <- reactiveVal(0)
 | 
						|
  predTrigger <- reactiveVal(0)
 | 
						|
  diagTrigger <- reactiveVal(0)
 | 
						|
  ready2tag <- reactiveVal(0)
 | 
						|
  trainCountdown <- reactiveVal(0)
 | 
						|
  
 | 
						|
  ok.data <- reactiveVal()
 | 
						|
  ok.data.async <- reactiveVal()
 | 
						|
  ok.data.running <- reactiveVal(FALSE)
 | 
						|
 | 
						|
  bpall_nclicks <- reactiveVal(0)
 | 
						|
  bertpred_nclicks <- reactiveVal(0)
 | 
						|
  
 | 
						|
  ## NEW use_regressors list as values$new_use_regressors 
 | 
						|
  observeEvent(input$use_regressors, {
 | 
						|
    values$new_use_regressors <- input$use_regressors
 | 
						|
    cat("DEBUG new_use_regressors: ", paste(input$use_regressors, collapse = " "), "\n")
 | 
						|
  })
 | 
						|
 | 
						|
  ## NEW trainCountdown with input$trainCountdown
 | 
						|
  ## Policy: 0, don't train ; 1, trigger train on tag ; >1, decrement on tag
 | 
						|
  observeEvent(input$trainCountdown, trainCountdown(input$trainCountdown))
 | 
						|
 | 
						|
  ####################
 | 
						|
  ## Projects and data
 | 
						|
  
 | 
						|
  ## Read config files, update UI forms accordingly
 | 
						|
  ## This should happen only once
 | 
						|
  observe({
 | 
						|
    if (verbose)
 | 
						|
      cat("DEBUG read project configs\n")
 | 
						|
    # values$conf <- RJSONIO::fromJSON("conf.json")
 | 
						|
    projfiles <- dir(pattern = "^tigger_.*[.]json$")
 | 
						|
    isolate({
 | 
						|
      if (!length(projfiles)) {
 | 
						|
        values$conf <- NULL
 | 
						|
        return(NULL)
 | 
						|
      }
 | 
						|
      values$confnames <- gsub("^tigger_(.*)[.]json$", "\\1", projfiles)
 | 
						|
      
 | 
						|
      values$conf <- RJSONIO::fromJSON(projfiles[1])
 | 
						|
    })
 | 
						|
  })
 | 
						|
  
 | 
						|
  ## When projects added, update selector
 | 
						|
  observeEvent(values$confnames, {
 | 
						|
    if (verbose)
 | 
						|
      cat("DEBUG update selectProject from confnames\n")
 | 
						|
    updateSelectInput(session, "selectProject", choices = values$confnames)
 | 
						|
  })
 | 
						|
  
 | 
						|
  ## Event from project change
 | 
						|
  observeEvent(input$selectProject, {
 | 
						|
    if (is.null(input$selectProject)) return(NULL)
 | 
						|
    if (!nchar(input$selectProject)) return(NULL)
 | 
						|
    if (verbose)
 | 
						|
      cat("DEBUG project change step 1\n")
 | 
						|
    # values$project <- input$selectProject
 | 
						|
    # values$conf <- values$confs[[values$project]]
 | 
						|
    values$conf <- RJSONIO::fromJSON(
 | 
						|
      paste0("tigger_", input$selectProject, ".json"))
 | 
						|
    ok.data(NULL)
 | 
						|
    ok.visu(NULL)
 | 
						|
    initDataTrigger(initDataTrigger() + 1)
 | 
						|
    
 | 
						|
    if (verbose)
 | 
						|
      cat("DEBUG project change step 2\n")
 | 
						|
    if (!is.null(values$conf$dataNrows))
 | 
						|
      updateNumericInput(session, "dataNrows", value = as.numeric(values$conf$dataNrows))
 | 
						|
    if (!is.null(values$conf$dataSkipRows))
 | 
						|
      updateNumericInput(session, "dataSkipRows", value = as.numeric(values$conf$dataSkipRows))
 | 
						|
    
 | 
						|
    #### Python ####
 | 
						|
    ## Default use_python if not found in conf file: FALSE
 | 
						|
    if(is.null(values$conf$use_python))
 | 
						|
      values$conf$use_python <- FALSE
 | 
						|
    updateCheckboxInput(session, "sys_use_python", value = values$conf$use_python)
 | 
						|
    
 | 
						|
    ## Set default python if not found in conf file
 | 
						|
    if(is.null(values$conf$python))
 | 
						|
      values$conf$python <- "python3"
 | 
						|
    updateTextInput(session, "sys_which_python", value = values$conf$python)
 | 
						|
    
 | 
						|
    values$python_ok <- FALSE
 | 
						|
    if (values$conf$use_python) {
 | 
						|
      ## Check whether python is working
 | 
						|
      pytest <- try(system(paste(values$conf$python, "--version"), 
 | 
						|
                           intern = TRUE))
 | 
						|
      if (inherits(pytest, "try-error")) {
 | 
						|
        showNotification(paste(
 | 
						|
          "Python path `", values$conf$python, 
 | 
						|
          "` not valid, try changing it in Project/System tab"), 
 | 
						|
          duration = 10, type = "error")
 | 
						|
      } else 
 | 
						|
        values$python_ok <- TRUE
 | 
						|
    }
 | 
						|
    if (verbose) 
 | 
						|
      cat("Python ok:", values$python_ok, "\n")
 | 
						|
    
 | 
						|
    
 | 
						|
    #### GPU ####
 | 
						|
    if (!is.null(values$conf$use_gpu))
 | 
						|
      updateCheckboxInput(session, "sys_use_gpu", value = values$conf$use_gpu)
 | 
						|
    
 | 
						|
    #### Spacy ####
 | 
						|
    if (!is.null(values$conf$use_spacy))
 | 
						|
      updateCheckboxInput(session, "sys_use_spacy", value = values$conf$use_spacy)
 | 
						|
    updateTextInput(session, "sys_use_spacy_model", value = values$conf$use_spacy_model)
 | 
						|
    
 | 
						|
    #### fastText ####
 | 
						|
    if (!is.null(values$conf$use_ft))
 | 
						|
      updateCheckboxInput(session, "sys_use_ft", value = values$conf$use_ft)
 | 
						|
    updateTextInput(session, "sys_use_ft_model", value = values$conf$use_ft_model)
 | 
						|
    
 | 
						|
    #### SBERT ####
 | 
						|
    if (!is.null(values$conf$use_sb))
 | 
						|
      updateCheckboxInput(session, "sys_use_sb", value = values$conf$use_sb)
 | 
						|
    updateTextInput(session, "sys_use_sb_model", value = values$conf$use_sb_model)
 | 
						|
    
 | 
						|
  })
 | 
						|
 | 
						|
  
 | 
						|
  ##############################################################################
 | 
						|
  ## Reactives : project settings UI
 | 
						|
  ##############################################################################
 | 
						|
  
 | 
						|
  output$sys_datadir <- renderText(values$conf$datadir)
 | 
						|
  output$sys_datafile <- renderText(paste0(values$conf$dataname, ".feather"))
 | 
						|
  output$sys_var_id <- renderUI(renderText(values$conf$idcol))
 | 
						|
  output$sys_var_text <- renderUI(renderText(values$conf$textcols))
 | 
						|
  output$sys_var_tag <- renderUI(renderText(values$conf$tagcols))
 | 
						|
  output$sys_var_context_ui <- renderUI(selectInput(
 | 
						|
    "sys_var_context", NULL, values$data_orig_colnames, values$conf$contextcols, 
 | 
						|
    multiple = TRUE))
 | 
						|
  output$sys_var_comm_ui <- renderUI(selectInput(
 | 
						|
    "sys_var_comm", NULL, c("(none)", values$data_orig_colnames), 
 | 
						|
    values$conf$commcol))
 | 
						|
  
 | 
						|
  output$sys_ex_lang_ui <- renderUI(selectInput(
 | 
						|
    "sys_ex_lang", NULL, modelnames_labels, "en"
 | 
						|
  ))
 | 
						|
  output$sys_ex_spacy <- renderText(
 | 
						|
    modelnames$spacy_name[modelnames$short == input$sys_ex_lang])
 | 
						|
  output$sys_ex_spacy_dl <- renderText(paste0(
 | 
						|
    "Download with python module: ", values$conf$python, " -m spacy download ", 
 | 
						|
    modelnames$spacy_name[modelnames$short == input$sys_ex_lang]))
 | 
						|
  output$sys_ex_ft <- renderText(
 | 
						|
    modelnames$fasttext_name[modelnames$short == input$sys_ex_lang])
 | 
						|
  output$sys_ex_ft_dl <- renderText(paste0(
 | 
						|
    "Manual download link : ", 
 | 
						|
    modelnames$fasttext_url[modelnames$short == input$sys_ex_lang]))
 | 
						|
  output$sys_ex_sb <- renderText(
 | 
						|
    ifelse(input$sys_ex_lang %in% c("ar", "zh", "nl", "en", "fr", "de", 
 | 
						|
                                    "it", "ko", "pl", "pt", "ru", "es", "tr"), 
 | 
						|
           "distiluse-base-multilingual-cased-v1", 
 | 
						|
           "distiluse-base-multilingual-cased-v2"))
 | 
						|
 | 
						|
  
 | 
						|
  ## Save system changes on button click, with modal
 | 
						|
  output$save_config_msg <- renderUI({
 | 
						|
    req(values$save_config_msg)
 | 
						|
    p(strong(values$save_config_msg))
 | 
						|
  })
 | 
						|
  output$save_embed_msg <- renderUI({
 | 
						|
    tmp <- ""
 | 
						|
    if (input$sys_use_spacy) if (!identical(input$sys_use_spacy_model, values$conf$use_spacy_model))
 | 
						|
      tmp <- paste0(tmp, "<li> Changing the spacy model will delete the current tokenized text and fasttext word embeddings, to prevent conflicts</li>")
 | 
						|
    if (input$sys_use_ft) if (!identical(input$sys_use_ft_model, values$conf$use_ft_model))
 | 
						|
      tmp <- paste0(tmp, "<li> Changing the fasttext model will delete the current fasttext word embeddings, to prevent conflicts.</li>")
 | 
						|
    if (input$sys_use_sb) if (!identical(input$sys_use_sb_model, values$conf$use_sb_model))
 | 
						|
      tmp <- paste0(tmp, "<li> Changing the SBERT model will delete the current SBERT sentence embeddings, to prevent conflicts.</li>")
 | 
						|
    
 | 
						|
    if (nchar(tmp)) {
 | 
						|
      return(HTML(paste("<br><p> <strong>Warning:</strong> <ul>", tmp, "</ul></p>")))
 | 
						|
    } else 
 | 
						|
      return(NULL)
 | 
						|
  })
 | 
						|
  observeEvent(input$saveSystem, {
 | 
						|
    values$save_config_msg <- NULL
 | 
						|
    showModal(modalDialog(
 | 
						|
      title = "Save project configuration", 
 | 
						|
      paste0("Click 'Save' to check and confirm the new configuration"),
 | 
						|
      uiOutput("save_embed_msg"),
 | 
						|
      uiOutput("save_config_msg"),
 | 
						|
      footer = tagList(actionButton(paste0("save_conf_confirm"), "Save"),
 | 
						|
                       modalButton("Cancel"))))
 | 
						|
  })
 | 
						|
  observeEvent(input$save_conf_confirm, {
 | 
						|
    newconf <- values$conf
 | 
						|
    newconf$contextcols <- input$sys_var_context
 | 
						|
    newconf$commcol <- input$sys_var_comm
 | 
						|
    
 | 
						|
    if (input$sys_use_python) {
 | 
						|
      pytest <- try(system(paste(input$sys_which_python, "--version"), 
 | 
						|
                           intern = TRUE))
 | 
						|
      if (inherits(pytest, "try-error")) {
 | 
						|
        values$save_config_msg <- paste0(
 | 
						|
          "Error: python path `", input$sys_which_python, "` not valid")
 | 
						|
        return(NULL)
 | 
						|
      }
 | 
						|
    }
 | 
						|
    newconf$use_python <- input$sys_use_python
 | 
						|
    newconf$python <- input$sys_which_python
 | 
						|
    newconf$use_gpu <- input$sys_use_gpu
 | 
						|
    
 | 
						|
    if (input$sys_use_spacy) {
 | 
						|
      sptest <- system(paste0(
 | 
						|
        newconf$python, " -m spacy info ", input$sys_use_spacy_model), intern = TRUE)
 | 
						|
      if (length(sptest) == 0) {
 | 
						|
        values$save_config_msg <- paste(
 | 
						|
          "Error loading spacy, check that it is installed in the specified python env")
 | 
						|
        return(NULL)
 | 
						|
      }
 | 
						|
      if (length(attr(sptest, "status"))) {
 | 
						|
        values$save_config_msg <- paste(
 | 
						|
          "Error loading spacy model, check that it has been downloaded")
 | 
						|
        return(NULL)
 | 
						|
      }
 | 
						|
      newconf$use_spacy_model <- input$sys_use_spacy_model
 | 
						|
    }
 | 
						|
    newconf$use_spacy <- input$sys_use_spacy
 | 
						|
    
 | 
						|
    if (input$sys_use_ft) {
 | 
						|
      if (!file.exists(input$sys_use_ft_model)) {
 | 
						|
        values$save_config_msg <- paste(
 | 
						|
          "Error loading fasttext model, check the specified path")
 | 
						|
        return(NULL)
 | 
						|
      }
 | 
						|
      newconf$use_ft_model <- input$sys_use_ft_model
 | 
						|
    }
 | 
						|
    newconf$use_ft <- input$sys_use_ft
 | 
						|
    
 | 
						|
    if (input$sys_use_sb) 
 | 
						|
      newconf$use_sb_model <- input$sys_use_sb_model
 | 
						|
    newconf$use_sb <- input$sys_use_sb
 | 
						|
    
 | 
						|
    # if (newconf$use_spacy) if (newconf$use_spacy_model != values$conf$use_spacy_model) {
 | 
						|
    if (newconf$use_spacy) if (!identical(newconf$use_spacy_model, values$conf$use_spacy_model)) {
 | 
						|
      file.remove(paste0(values$conf$datadir, values$conf$dataname, "_spa.feather"))
 | 
						|
      file.remove(paste0(values$conf$datadir, values$conf$dataname, "_ft.feather"))
 | 
						|
    }
 | 
						|
    # if (newconf$use_ft) if (newconf$use_ft_model != values$conf$use_ft_model)
 | 
						|
    if (newconf$use_ft) if (!identical(newconf$use_ft_model, values$conf$use_ft_model))
 | 
						|
      file.remove(paste0(values$conf$datadir, values$conf$dataname, "_ft.feather"))
 | 
						|
    # if (newconf$use_sb) if (newconf$use_sb_model != values$conf$use_sb_model)
 | 
						|
    if (newconf$use_sb) if (!identical(newconf$use_sb_model, values$conf$use_sb_model))
 | 
						|
      file.remove(paste0(values$conf$datadir, values$conf$dataname, "_sb.feather"))
 | 
						|
    
 | 
						|
    writeLines(RJSONIO::toJSON(newconf), 
 | 
						|
               paste0("tigger_", newconf$projectid, ".json"))
 | 
						|
    values$conf <- newconf
 | 
						|
    initDataTrigger(initDataTrigger() + 1)
 | 
						|
    removeModal()
 | 
						|
  })
 | 
						|
  
 | 
						|
  ##############################################################################
 | 
						|
  ## Reactives : data
 | 
						|
  ##############################################################################
 | 
						|
  
 | 
						|
  ok.commentCol <- reactive(values$conf$commcol)
 | 
						|
  
 | 
						|
  ## Current imported data (async)
 | 
						|
  observeEvent(input$dataImport, {
 | 
						|
    if (ok.data.running()) return(NULL)
 | 
						|
    ## Save nrows to conf file on change
 | 
						|
    if (!is.numeric(input$dataNrows) | !is.numeric(input$dataSkipRows)) return(NULL)
 | 
						|
    values$conf$dataNrows <- input$dataNrows
 | 
						|
    values$conf$dataSkipRows <- input$dataSkipRows
 | 
						|
    if (!is.null(values$conf$projectid))
 | 
						|
      writeLines(RJSONIO::toJSON(values$conf), paste0(
 | 
						|
        "tigger_", values$conf$projectid, ".json"))
 | 
						|
    initDataTrigger(initDataTrigger() + 1)
 | 
						|
  })
 | 
						|
  
 | 
						|
  
 | 
						|
  ## Read data event
 | 
						|
  observeEvent(initDataTrigger(), {
 | 
						|
    if (is.null(values$conf)) {
 | 
						|
      ok.data(NULL)
 | 
						|
      return(NULL)
 | 
						|
    }
 | 
						|
    if (initDataTrigger() == 0) return(NULL)
 | 
						|
    if (verbose) 
 | 
						|
      cat("DEBUG enter ok.data\n")
 | 
						|
    if (verbose) 
 | 
						|
      cat("DEBUG enter ok.data trigger", initDataTrigger(), "\n")
 | 
						|
    ok.data.running(TRUE)
 | 
						|
    
 | 
						|
    ## Prepare variables for async launch
 | 
						|
    coco <- values$conf
 | 
						|
    coco$python_ok <- values$python_ok
 | 
						|
    
 | 
						|
    da_token <- session$token
 | 
						|
    data_status <- paste0("tigger_", da_token, "_data")
 | 
						|
    
 | 
						|
    file_orig_colnames <- paste0(data_status, "_origcols")
 | 
						|
    file_colstok <- paste0(data_status, "_tok")
 | 
						|
    file_colsft <- paste0(data_status, "_ft")
 | 
						|
    file_colssb <- paste0(data_status, "_sb")
 | 
						|
    file_colsbertpred <- paste0(data_status, "_bertpred")
 | 
						|
    da_missing <- paste0(data_status, "_bertpred_missing")
 | 
						|
    
 | 
						|
    projpath <- paste0(coco$datadir, coco$projectid)
 | 
						|
    if (!file.exists(projpath))
 | 
						|
      dir.create(projpath)
 | 
						|
    
 | 
						|
    ## Read data: async launch
 | 
						|
    data_async <- future({
 | 
						|
      #################
 | 
						|
      ## Main data file
 | 
						|
      
 | 
						|
      set_status(data_status, "Importing main data file...")
 | 
						|
      res <- arrow::read_feather(
 | 
						|
        paste0(coco$datadir, coco$dataname, ".feather"))
 | 
						|
      if (is.null(coco$dataNrows)) return(NULL)
 | 
						|
      rowselect <- 
 | 
						|
        intersect(1:nrow(res), (1:(coco$dataNrows)) + coco$dataSkipRows)
 | 
						|
      if (!length(rowselect)) return(NULL)
 | 
						|
      res <- res[rowselect, , drop = FALSE]
 | 
						|
      
 | 
						|
      writeLines(colnames(res), file_orig_colnames)
 | 
						|
      
 | 
						|
      
 | 
						|
      #################
 | 
						|
      ## Tokenized text
 | 
						|
      
 | 
						|
      ## Import data : feather of two cols, one the id, other the tokenized
 | 
						|
      set_status(data_status, "Importing tokenized text...")
 | 
						|
      file_tok <- paste0(coco$datadir, coco$dataname, "_spa.feather")
 | 
						|
      if (file.exists(file_tok)) {
 | 
						|
        tokdat <- arrow::read_feather(file_tok)
 | 
						|
      } else 
 | 
						|
        tokdat <- NULL
 | 
						|
      tok_remaining <- res[[coco$idcol]]
 | 
						|
      tok_remaining <- 
 | 
						|
        tok_remaining[! tok_remaining %in% tokdat[[coco$idcol]]]
 | 
						|
      
 | 
						|
      ## Tokenize remaining texts
 | 
						|
      if (length(tok_remaining)) {
 | 
						|
        if (!coco$python_ok | !coco$use_spacy) {
 | 
						|
          ## If remaining texts to tokenize but no python, fall back on 
 | 
						|
          ## untokenized text
 | 
						|
          ## TODO here use quanteda tokenizer with options?
 | 
						|
          tokdat <- data.frame(
 | 
						|
            res[[coco$idcol]], 
 | 
						|
            do.call(paste, c(res[match(tok_remaining, res[[coco$idcol]]), 
 | 
						|
                                 coco$textcols, drop = FALSE], sep = "\n\n")))
 | 
						|
          colnames(tokdat) <- c(coco$idcol, "text_spa")
 | 
						|
          set_status(data_status, "Importing tokenized text: no spacy ok")
 | 
						|
        } else {
 | 
						|
          ## If python present, tokenize with spacy
 | 
						|
          set_status(data_status, 
 | 
						|
                     paste("Tokenizing", length(tok_remaining), "new texts"))
 | 
						|
          tmpfile <- paste0("tigger_tok_", da_token)
 | 
						|
          arrow::write_feather(
 | 
						|
            data.frame(
 | 
						|
              id = tok_remaining, 
 | 
						|
              text = do.call(
 | 
						|
                paste, c(res[match(tok_remaining, res[[coco$idcol]]), 
 | 
						|
                             coco$textcols, drop = FALSE], sep = "\n\n"))), 
 | 
						|
            paste0(tmpfile, ".feather")
 | 
						|
          )
 | 
						|
          system(paste0(
 | 
						|
            coco$python, " tokenize_spacy.py -d ", tmpfile, ".feather", 
 | 
						|
            " -m ", coco$use_spacy_model))
 | 
						|
          tok_new <- arrow::read_feather(paste0(tmpfile, "_spa.feather"))
 | 
						|
          colnames(tok_new)[colnames(tok_new) == "id"] <- coco$idcol
 | 
						|
          tokdat <- rbind(tokdat, tok_new)
 | 
						|
          arrow::write_feather(tokdat, file_tok)
 | 
						|
          system(paste0("rm ", tmpfile, "*"))
 | 
						|
          set_status(data_status, "Importing tokenized text: spacy ok")
 | 
						|
        }
 | 
						|
      }
 | 
						|
      
 | 
						|
      ## Save colname for tokenized text
 | 
						|
      tmp_cols_tok <- colnames(tokdat)[colnames(tokdat) != coco$idcol]
 | 
						|
      writeLines(tmp_cols_tok, file_colstok)
 | 
						|
      
 | 
						|
      set_status(data_status, "Importing tokenized text: wrote colnames")
 | 
						|
      
 | 
						|
      ## Merge with current data
 | 
						|
      res <- merge(res, tokdat, by = coco$idcol,
 | 
						|
                   all.y = FALSE, sort = FALSE)
 | 
						|
      rm(tokdat)
 | 
						|
      
 | 
						|
      set_status(data_status, "Importing tokenized text: merged")
 | 
						|
      
 | 
						|
      #################
 | 
						|
      ## FastText embeddings
 | 
						|
      
 | 
						|
      set_status(data_status, paste("Loading fastText embeddings"))
 | 
						|
      
 | 
						|
      ## FastText embeddings : feather file with id and embeddings
 | 
						|
      file_ft <- paste0(coco$datadir, coco$dataname, "_ft.feather")
 | 
						|
      if (file.exists(file_ft)) {
 | 
						|
        ftdat <- arrow::read_feather(file_ft)
 | 
						|
      } else 
 | 
						|
        ftdat <- NULL
 | 
						|
      ft_remaining <- res[[coco$idcol]]
 | 
						|
      ft_remaining <- 
 | 
						|
        ft_remaining[! ft_remaining %in% ftdat[[coco$idcol]]]
 | 
						|
      
 | 
						|
      ## Embed remaining texts, if FT in use
 | 
						|
      if (length(ft_remaining)) {
 | 
						|
        if (!coco$python_ok | !coco$use_ft) { 
 | 
						|
          ## If no python or FT, abort if not all rows already embedded
 | 
						|
          ftdat <- NULL
 | 
						|
        } else {
 | 
						|
          set_status(data_status, paste(
 | 
						|
            "Embedding", length(ft_remaining), "new texts with fastText"))
 | 
						|
          tmpfile <- paste0("tigger_ft_", da_token)
 | 
						|
          arrow::write_feather(
 | 
						|
            data.frame(
 | 
						|
              id = ft_remaining, 
 | 
						|
              text = do.call(
 | 
						|
                paste, res[match(ft_remaining, res[[coco$idcol]]), 
 | 
						|
                           tmp_cols_tok, drop = FALSE])), 
 | 
						|
            paste0(tmpfile, ".feather")
 | 
						|
          )
 | 
						|
          system(paste0(
 | 
						|
            coco$python, " embed_fasttext.py -d ", tmpfile, ".feather", 
 | 
						|
            " -m ", coco$use_ft_model))
 | 
						|
          ft_new <- arrow::read_feather(paste0(tmpfile, "_ft.feather"))
 | 
						|
          colnames(ft_new)[colnames(ft_new) == "id"] <- coco$idcol
 | 
						|
          ftdat <- rbind(ftdat, ft_new)
 | 
						|
          arrow::write_feather(ftdat, file_ft)
 | 
						|
          system(paste0("rm ", tmpfile, "*"))
 | 
						|
        } 
 | 
						|
      }
 | 
						|
      
 | 
						|
      if (!is.null(ftdat)) {
 | 
						|
        ## Merge with current data
 | 
						|
        writeLines(colnames(ftdat)[colnames(ftdat) != coco$idcol], file_colsft)
 | 
						|
        res <- merge(res, ftdat, by = coco$idcol, all.y = FALSE, sort = FALSE)
 | 
						|
        rm(ftdat)
 | 
						|
      }
 | 
						|
      
 | 
						|
      
 | 
						|
      #################
 | 
						|
      ## SBERT embeddings
 | 
						|
      
 | 
						|
      set_status(data_status, "Loading SBERT embeddings")
 | 
						|
      
 | 
						|
      ## SBERT file : feather of id and embeddings
 | 
						|
      file_sb <- paste0(coco$datadir, coco$dataname, "_sb.feather")
 | 
						|
      if (file.exists(file_sb)) {
 | 
						|
        sbdat <- arrow::read_feather(file_sb)
 | 
						|
      }else 
 | 
						|
        sbdat <- NULL
 | 
						|
      sb_remaining <- res[[coco$idcol]]
 | 
						|
      sb_remaining <- 
 | 
						|
        sb_remaining[! sb_remaining %in% sbdat[[coco$idcol]]]
 | 
						|
      
 | 
						|
      ## Embed remaining texts
 | 
						|
      if (length(sb_remaining)) {
 | 
						|
        if (!coco$python_ok | !coco$use_sb) { 
 | 
						|
          ## If no python or SB, abort if not all rows already embedded
 | 
						|
          sbdat <- NULL
 | 
						|
        } else {
 | 
						|
          set_status(data_status, paste(
 | 
						|
            "Embedding", length(sb_remaining), "new texts with SBERT"))
 | 
						|
          tmpfile <- paste0("tigger_sb_", da_token)
 | 
						|
          arrow::write_feather(
 | 
						|
            data.frame(
 | 
						|
              id = sb_remaining, 
 | 
						|
              text = do.call(
 | 
						|
                paste, res[match(sb_remaining, res[[coco$idcol]]), 
 | 
						|
                           coco$textcols, drop = FALSE])), 
 | 
						|
            paste0(tmpfile, ".feather")
 | 
						|
          )
 | 
						|
          system(paste0(
 | 
						|
            coco$python, " embed_sbert.py -d ", tmpfile, ".feather", 
 | 
						|
            " -m ", coco$use_sb_model))
 | 
						|
          sb_new <- arrow::read_feather(paste0(tmpfile, "_sb.feather"))
 | 
						|
          colnames(sb_new)[colnames(sb_new) == "id"] <- coco$idcol
 | 
						|
          sbdat <- rbind(sbdat, sb_new)
 | 
						|
          arrow::write_feather(sbdat, file_sb)
 | 
						|
          system(paste0("rm ", tmpfile, "*"))
 | 
						|
        }
 | 
						|
      }
 | 
						|
      
 | 
						|
      if (!is.null(sbdat)) {
 | 
						|
        ## Merge with current data
 | 
						|
        writeLines(colnames(sbdat)[colnames(sbdat) != coco$idcol], file_colssb)
 | 
						|
        res <- merge(res, sbdat, by = coco$idcol, all.y = FALSE, sort = FALSE)
 | 
						|
        rm(sbdat)
 | 
						|
      }
 | 
						|
      
 | 
						|
      #################
 | 
						|
      ## BERT predictions
 | 
						|
      
 | 
						|
      set_status(data_status, "Loading BERT predictions")
 | 
						|
      
 | 
						|
      ## BERT predictions : feathers of id and probs
 | 
						|
      ## TODO: take full prediction if existing
 | 
						|
      
 | 
						|
      files_pred <- dir(
 | 
						|
        coco$datadir, paste0("^", coco$dataname, "_bertpred_.*[.]feather$"))
 | 
						|
      # files_predall <- dir(
 | 
						|
      #   coco$datadir, paste0("^", coco$dataname, "_bertpredall_.*[.]feather$"))
 | 
						|
      # files_pred <- files_pred[
 | 
						|
      #   ! gsub("_bertpred_", "_bertpredall_", files_pred) %in% files_predall]
 | 
						|
      # files_pred <- c(files_predall, files_pred)
 | 
						|
      
 | 
						|
      tmp_bertpred_cols <- list()
 | 
						|
      if (length(files_pred)) {
 | 
						|
        for (ipred in files_pred) {
 | 
						|
          preddat <- arrow::read_feather(paste0(coco$datadir, ipred))
 | 
						|
          predname <-
 | 
						|
            gsub(paste0("^", coco$dataname, "_bertpred_(.*)[.]feather$"),
 | 
						|
                 "\\1", ipred)
 | 
						|
          # predname <- 
 | 
						|
          #   gsub(paste0("^", coco$dataname, "_bertpred(all)?_(.*)[.]feather$"), 
 | 
						|
          #        "\\2", ipred, perl = TRUE)
 | 
						|
          n_missing <- sum(! res[[coco$idcol]] %in% preddat[[coco$idcol]])
 | 
						|
          if (n_missing) {
 | 
						|
            write(
 | 
						|
              paste0("BERT prediction ", predname, " : ", n_missing, 
 | 
						|
                     " texts missing in prediction."), 
 | 
						|
              da_missing, append = TRUE)
 | 
						|
          } else {
 | 
						|
            colnames(preddat) <- gsub(
 | 
						|
              "^bertpred_", paste0("bertpred_", predname), colnames(preddat))
 | 
						|
            tmp_bertpred_cols[[predname]] <- 
 | 
						|
              colnames(preddat)[colnames(preddat) != coco$idcol]
 | 
						|
            res <- merge(res, preddat, by = coco$idcol,
 | 
						|
                         all.y = FALSE, sort = FALSE)
 | 
						|
          }
 | 
						|
          rm(preddat)
 | 
						|
        }
 | 
						|
        if (length(tmp_bertpred_cols)) {
 | 
						|
          writeLines(names(tmp_bertpred_cols), file_colsbertpred)
 | 
						|
          lapply(names(tmp_bertpred_cols), function(x) {
 | 
						|
            writeLines(tmp_bertpred_cols[[x]], paste0(file_colsbertpred, "_", x))
 | 
						|
          })
 | 
						|
        }
 | 
						|
      }
 | 
						|
      
 | 
						|
      ## Comment col
 | 
						|
      set_status(data_status, "Loading comments")
 | 
						|
      tmpfile <- paste0(projpath, "/", coco$commcol, ".csv")
 | 
						|
      if (file.exists(tmpfile)) {
 | 
						|
        tmpcol <- read.csv(tmpfile)
 | 
						|
        colnames(tmpcol)[2] <- coco$commcol
 | 
						|
        ## Remove duplicates, keep newest
 | 
						|
        tmpcol <- tmpcol[nrow(tmpcol):1, , drop = FALSE]
 | 
						|
        tmpcol <- tmpcol[!duplicated(tmpcol[, 1]), , drop = FALSE]
 | 
						|
        tmpcol <- tmpcol[nrow(tmpcol):1, , drop = FALSE]
 | 
						|
        res <- merge(res, tmpcol, by = coco$idcol, 
 | 
						|
                     all.x = TRUE, all.y = FALSE, sort = FALSE)
 | 
						|
        res[[coco$commcol]][is.na(res[[coco$commcol]])] <- ""
 | 
						|
      } else 
 | 
						|
        res[[coco$commcol]] <- ""
 | 
						|
      
 | 
						|
      # ## Tagging cols
 | 
						|
      # set_status(data_status, "Loading tags")
 | 
						|
      # for (itag in coco$tagcols) {
 | 
						|
      #   tmpfile <- paste0(projpath, "/", itag, ".csv")
 | 
						|
      #   if (file.exists(tmpfile)) {
 | 
						|
      #     tmpcol <- read.csv(tmpfile)
 | 
						|
      #     tmpcol[[paste0("hist_", itag)]] <- 1:nrow(tmpcol)
 | 
						|
      #     ## Remove duplicates, keep newest
 | 
						|
      #     tmpcol <- tmpcol[nrow(tmpcol):1, ]
 | 
						|
      #     tmpcol <- tmpcol[!duplicated(tmpcol[, 1]), ]
 | 
						|
      #     tmpcol <- tmpcol[nrow(tmpcol):1, ]
 | 
						|
      #     if (verbose)
 | 
						|
      #       cat("DEBUG imported ", itag, nrow(tmpcol), "\n")
 | 
						|
      #     res <- merge(res, tmpcol, by = coco$idcol, 
 | 
						|
      #                  all.x = TRUE, all.y = FALSE, sort = FALSE)
 | 
						|
      #   } else {
 | 
						|
      #     res[[itag]] <- NA
 | 
						|
      #     res[[paste0("hist_", itag)]] <- NA
 | 
						|
      #   }
 | 
						|
      # }
 | 
						|
      ## Tagging cols
 | 
						|
      set_status(data_status, "Loading tags")
 | 
						|
      for (itag in coco$tagcols) {
 | 
						|
        itaghist <- paste0("hist_", itag)
 | 
						|
        if (is.null(res[[itag]]) | all(is.na(res[[itag]]))) {
 | 
						|
          res[[itag]] <- NA
 | 
						|
          res[[itaghist]] <- NA
 | 
						|
        } else {
 | 
						|
          res[[itaghist]][!is.na(res[[itag]])] <- 1:sum(!is.na(res[[itag]]))
 | 
						|
        }
 | 
						|
        tmpfile <- paste0(projpath, "/", itag, ".csv")
 | 
						|
        if (file.exists(tmpfile)) {
 | 
						|
          tmpcol <- read.csv(tmpfile)
 | 
						|
          tmpcol[[itaghist]] <- 1:nrow(tmpcol) + 
 | 
						|
            max(0, suppressWarnings(max(na.omit(res[[itaghist]]))))
 | 
						|
          ## Remove duplicates, keep newest
 | 
						|
          tmpcol <- tmpcol[nrow(tmpcol):1, ]
 | 
						|
          tmpcol <- tmpcol[!duplicated(tmpcol[, 1]), ]
 | 
						|
          tmpcol <- tmpcol[nrow(tmpcol):1, ]
 | 
						|
          tmpcol <- tmpcol[tmpcol[[coco$idcol]] %in% res[[coco$idcol]], ]
 | 
						|
          if (verbose)
 | 
						|
            cat("DEBUG imported", itag, nrow(tmpcol), "\n")
 | 
						|
          # res <- merge(res, tmpcol, by = coco$idcol, 
 | 
						|
          #              all.x = TRUE, all.y = FALSE, sort = FALSE)
 | 
						|
          if (nrow(tmpcol)) {
 | 
						|
            idmatches <- match(tmpcol[[coco$idcol]], res[[coco$idcol]])
 | 
						|
            # idmatches <- idmatches[!is.na(idmatches)]
 | 
						|
            res[[itag]][idmatches] <- tmpcol[[itag]]
 | 
						|
            res[[itaghist]][idmatches] <- tmpcol[[itaghist]]
 | 
						|
          }
 | 
						|
        } 
 | 
						|
      }
 | 
						|
      
 | 
						|
      set_status(data_status, "Finalizing data import")
 | 
						|
      res
 | 
						|
    }) %...>% ok.data.async()
 | 
						|
    data_async <- catch(data_async, function(error) {
 | 
						|
      values$modelDiagno <- paste0("Error in data import: ", error)
 | 
						|
    })
 | 
						|
    data_async <- finally(data_async, function() {
 | 
						|
      #################
 | 
						|
      ## Finalize data importation
 | 
						|
      res <- ok.data.async()
 | 
						|
      if (is.null(res)) return(NULL)
 | 
						|
      
 | 
						|
      set_status(data_status, "Finalizing data import: read columns")
 | 
						|
      ## Read column (tok, ft, sb, ...) names from dedicated files
 | 
						|
      values$tagcols <- coco$tagcols
 | 
						|
      values$cols_tok <- readLines(file_colstok)
 | 
						|
      values$cols_ft <- NULL
 | 
						|
      if (file.exists(file_colsft)) {
 | 
						|
        values$cols_ft <- readLines(file_colsft)
 | 
						|
      }
 | 
						|
      values$cols_sb <- NULL
 | 
						|
      if (file.exists(file_colssb)) {
 | 
						|
        values$cols_sb <- readLines(file_colssb)
 | 
						|
      }
 | 
						|
      set_status(data_status, "Finalizing data import: bertpred")
 | 
						|
      if (file.exists(file_colsbertpred)) {
 | 
						|
        tmp_bertpred <- readLines(file_colsbertpred)
 | 
						|
        values$cols_bertpred <- lapply(tmp_bertpred, function(x) 
 | 
						|
          readLines(paste0(file_colsbertpred, "_", x)))
 | 
						|
        names(values$cols_bertpred) <- tmp_bertpred
 | 
						|
      } else
 | 
						|
        values$cols_bertpred <- NULL
 | 
						|
      set_status(data_status, "Finalizing data import: bertpred missing")
 | 
						|
      if (file.exists(da_missing)) {
 | 
						|
        lapply(readLines(da_missing), function(x) 
 | 
						|
          showNotification(paste(
 | 
						|
            x, "\n", 
 | 
						|
            "You may predict on current text in BERT panel, ", 
 | 
						|
            "and then reload the data.")))
 | 
						|
      }
 | 
						|
      
 | 
						|
      ## Populate regressors list in prediction panel
 | 
						|
      set_status(data_status, "Finalizing data import: populating regressors")
 | 
						|
      regr_choices <- c(
 | 
						|
        "Regex" = "regex", 
 | 
						|
        "Word counts (DFM)" = "dfm")
 | 
						|
      if (!is.null(values$cols_ft))
 | 
						|
        regr_choices <- c(regr_choices, "Word embeddings (FastText)" = "ft")
 | 
						|
      if (!is.null(values$cols_sb))
 | 
						|
        regr_choices <- c(regr_choices, "Sentence embeddings (SBERT)" = "sb")
 | 
						|
      regr_choices <- c(regr_choices, "Extra predictors" = "extra")
 | 
						|
 | 
						|
      if (length(values$cols_bertpred)) {
 | 
						|
        tmp_choices <- paste0("bertpred_", names(values$cols_bertpred))
 | 
						|
        names(tmp_choices) <- paste0("BERT_", names(values$cols_bertpred))
 | 
						|
        regr_choices <- c(regr_choices, tmp_choices)
 | 
						|
      }
 | 
						|
      
 | 
						|
      # if (length(regr_choices) > 3) {
 | 
						|
        regr_choices_sel <- 
 | 
						|
          regr_choices[regr_choices %in% c("regex", "ft", "sb")]
 | 
						|
      # } else
 | 
						|
      #   regr_choices_sel <- c("regex", "dfm")
 | 
						|
      
 | 
						|
      updateSelectizeInput(
 | 
						|
        session, "use_regressors", choices = regr_choices,
 | 
						|
        selected = regr_choices_sel)
 | 
						|
      values$new_use_regressors <- regr_choices_sel
 | 
						|
      
 | 
						|
      othercols <- colnames(res)
 | 
						|
      othercols <- othercols[! othercols %in% c(
 | 
						|
        values$cols_tok, values$cols_ft, values$cols_sb, values$cols_bertpred, 
 | 
						|
        values$tagcols, paste0("hist_", values$tagcols), 
 | 
						|
        coco$commcol, coco$textcols)]
 | 
						|
      updateSelectInput(
 | 
						|
        session, "dlTagSelect", 
 | 
						|
        choices = c("tags", "comments", "predictions", othercols),
 | 
						|
        selected = c("tags", "comments", "predictions"))
 | 
						|
      
 | 
						|
      ## Populate extra regressors lists in prediction panel
 | 
						|
      updateSelectizeInput(
 | 
						|
        session, "use_ootregnum", choices = othercols, selected = NULL)
 | 
						|
      updateSelectizeInput(
 | 
						|
        session, "use_ootregcat", choices = othercols, selected = NULL)
 | 
						|
      
 | 
						|
      ## Project path
 | 
						|
      values$projectdir <- paste0(projpath, "/")
 | 
						|
      
 | 
						|
      ## Create tagmat: data.frame of all current taggings
 | 
						|
      values$tagmat <- res[, coco$tagcols, drop = FALSE]
 | 
						|
      ## Create tagmathist: data.frame of historical order of taggings
 | 
						|
      values$tagmathist <- res[, paste0("hist_", coco$tagcols), drop = FALSE]
 | 
						|
      colnames(values$tagmathist) <- coco$tagcols
 | 
						|
 | 
						|
      ## Save data original colnames
 | 
						|
      values$data_orig_colnames <- readLines(file_orig_colnames)
 | 
						|
      
 | 
						|
      ok.data(ok.data.async())
 | 
						|
      ok.data.async(NULL)
 | 
						|
      ok.data.running(FALSE)
 | 
						|
      set_status(data_status, "Exit data async")
 | 
						|
      system(paste0("rm ", data_status, "*"))
 | 
						|
      values$modelDiagno <- "No model"
 | 
						|
      
 | 
						|
      
 | 
						|
      if(trainTrigger() == 0) trainTrigger(1) ## For first run. TODO set trainTrigger to 0 when changing data?
 | 
						|
      if (verbose) cat("DEBUG Exit data async finally\n")
 | 
						|
    })
 | 
						|
    NULL
 | 
						|
  })
 | 
						|
  
 | 
						|
  ## Read data import status from file
 | 
						|
  observe({
 | 
						|
    if (ok.data.running()) {
 | 
						|
      invalidateLater(100)
 | 
						|
      tmpfile <- paste0("tigger_", session$token, "_data")
 | 
						|
      if (file.exists(tmpfile)) {
 | 
						|
        values$modelDiagno <- paste(collapse = "\n", scan(
 | 
						|
          tmpfile, what = "character", sep="\n", quiet = TRUE))
 | 
						|
      } else
 | 
						|
        values$modelDiagno <- ""
 | 
						|
    }
 | 
						|
  })
 | 
						|
  
 | 
						|
  
 | 
						|
  ###############
 | 
						|
  ## ok.text() reactive
 | 
						|
  ###############
 | 
						|
  ok.text <- eventReactive(ok.data(), {
 | 
						|
    if (is.null(ok.data())) return(NULL)
 | 
						|
    res <- ok.data()[, values$cols_tok[1]]
 | 
						|
    if (length(values$cols_tok) > 1) {
 | 
						|
      for (icol in values$cols_tok[-1]) {
 | 
						|
        res <- paste0(res, "\n\n", ok.data()[, icol])
 | 
						|
      }
 | 
						|
    }
 | 
						|
    # queryTrigger(queryTrigger() + 1)
 | 
						|
    if (verbose)
 | 
						|
      cat("DEBUG ok.text", length(res), "\n")
 | 
						|
    res
 | 
						|
  })
 | 
						|
  
 | 
						|
  ok.labelcol <- reactive({
 | 
						|
    if (is.null(ok.data())) return(NULL)
 | 
						|
    if (verbose) 
 | 
						|
      # cat("DEBUG enter ok.labelcol\n")
 | 
						|
    if (is.null(input$selectScheme)) return(values$conf$tagcols[1])
 | 
						|
    input$selectScheme
 | 
						|
  })
 | 
						|
  
 | 
						|
  
 | 
						|
  ok.nicetext <- reactive({
 | 
						|
    req(ok.data(), values$conf$textcols)
 | 
						|
    # if (is.null(ok.data())) return(NULL)
 | 
						|
    do.call(paste, c(ok.data()[, values$conf$textcols, drop = FALSE],
 | 
						|
                     sep = "\n\n"))
 | 
						|
  })
 | 
						|
  
 | 
						|
  ok.regressor.names <- reactive({
 | 
						|
    daregs <- values$new_use_regressors
 | 
						|
    if (verbose) 
 | 
						|
      cat("DEBUG enter ok.regressor.names ", paste(daregs, collapse = " "), "\n")
 | 
						|
    res <- NULL
 | 
						|
    if ("ft" %in% daregs)
 | 
						|
      res <- c(res, values$cols_ft)
 | 
						|
    if ("sb" %in% daregs)
 | 
						|
      res <- c(res, values$cols_sb)
 | 
						|
    if (length(values$cols_bertpred)) {
 | 
						|
      for (ipred in 1:length(values$cols_bertpred)) {
 | 
						|
        if (paste0("bertpred_", names(values$cols_bertpred)[ipred]) %in% daregs)
 | 
						|
          res <- c(res, values$cols_bertpred[[ipred]])
 | 
						|
      }
 | 
						|
    }
 | 
						|
    if (verbose) 
 | 
						|
      cat("DEBUG exit ok.regressor.names:", length(res), "\n")
 | 
						|
    res
 | 
						|
  })
 | 
						|
  
 | 
						|
  ok.contextCol <- reactive({
 | 
						|
    if (verbose) 
 | 
						|
      cat("DEBUG enter ok.contextCol\n")
 | 
						|
    # res <- isolate(values$conf$contextcols)
 | 
						|
    res <- values$conf$contextcols
 | 
						|
    names(res) <- res
 | 
						|
    res
 | 
						|
  })
 | 
						|
  
 | 
						|
  ## Initiate label and queries history, on ok.text() or labelcol change, 
 | 
						|
  observeEvent(list(ok.text(), ok.labelcol()), {
 | 
						|
    if (verbose)
 | 
						|
      cat("DEBUG initiate label\n")
 | 
						|
    if (is.null(ok.labelcol())) {
 | 
						|
      values$label <- NULL
 | 
						|
      values$labelhist <- NULL
 | 
						|
      values$queries <- NULL
 | 
						|
      values$comment <- NULL
 | 
						|
      values$wasTagged <- NULL
 | 
						|
      values$wasRetagged <- NULL
 | 
						|
    } else {
 | 
						|
      values$label <- values$tagmat[, ok.labelcol()]
 | 
						|
      values$labelhist <- values$tagmathist[, ok.labelcol()]
 | 
						|
      values$queries <- which(!is.na(values$label))
 | 
						|
      values$comment <- ok.data()[[values$conf$commcol]]
 | 
						|
      values$wasTagged <- !is.na(values$label)
 | 
						|
      values$wasRetagged <- rep(FALSE, length(values$label))
 | 
						|
    }
 | 
						|
  })
 | 
						|
  
 | 
						|
  ## On values$label change, values$tagmat is updated for separate history
 | 
						|
  observeEvent(values$label, {
 | 
						|
    if (is.null(values$label)) return(NULL)
 | 
						|
    if (verbose)
 | 
						|
      cat("DEBUG Update tagmat from label\n")
 | 
						|
    values$tagmat[, ok.labelcol()] <- values$label
 | 
						|
    values$tagmathist[, ok.labelcol()] <- values$labelhist
 | 
						|
  })
 | 
						|
  
 | 
						|
  
 | 
						|
  ##############################################################################
 | 
						|
  ## Reactives : training
 | 
						|
  ##############################################################################
 | 
						|
  
 | 
						|
  ## Message of how many tagged and untagged, for model diagnostics box  
 | 
						|
  ok.msgtagged <- reactive({
 | 
						|
    if (verbose) 
 | 
						|
      cat("DEBUG enter ok.msgtagged\n")
 | 
						|
    # if (is.null(ok.data()) | ok.data.running()) {
 | 
						|
    if (is.null(ok.data())) {
 | 
						|
      return("No data yet ")
 | 
						|
    }
 | 
						|
    
 | 
						|
    tmp <- ""
 | 
						|
    # if (sum(is.na(values$label)) == 0)
 | 
						|
    #   tmp <- paste0(tmp, "Data correction mode (0 NA labels left).\n")
 | 
						|
    paste0(tmp, "Tagged: ", sum(!is.na(values$label)),  
 | 
						|
           " ; Untagged: ", sum(is.na(values$label)))
 | 
						|
  })
 | 
						|
  
 | 
						|
  ## Training possible? At least 3 texts for 2 tags
 | 
						|
  ok.train.possible <- reactive({
 | 
						|
    if (verbose) 
 | 
						|
      cat("DEBUG enter ok.train.possible\n")
 | 
						|
    labtab <- table(values$label)
 | 
						|
    if (length(labtab) < 2) return(FALSE)
 | 
						|
    if (sum(labtab > 2) < 2) return(FALSE)
 | 
						|
    TRUE
 | 
						|
  })
 | 
						|
  
 | 
						|
  ## Compute DFM (async)
 | 
						|
  ## TODO make this less reactive
 | 
						|
  observe({
 | 
						|
    # if ("dfm" %in% input$use_regressors & !values$dfmGo)
 | 
						|
    if ("dfm" %in% values$new_use_regressors & !values$dfmGo)
 | 
						|
      values$dfmGo <- TRUE
 | 
						|
  })
 | 
						|
  ok.dfm <- reactiveVal()
 | 
						|
  observe({
 | 
						|
    if (is.null(ok.text())) return(NULL)
 | 
						|
    if (! values$dfmGo) return(NULL)
 | 
						|
    if (!is.numeric(input$dfmNgrams)) return(NULL)
 | 
						|
    if (verbose)
 | 
						|
      cat("DEBUG enter dfm observer\n")
 | 
						|
 | 
						|
    datext <- ok.text()
 | 
						|
    dfmNgrams <- input$dfmNgrams
 | 
						|
    dfmTfIdf <- input$dfmTfIdf
 | 
						|
    dfmMinDocfreq <- input$dfmMinDocfreq
 | 
						|
    dfmMinTermfreq <- input$dfmMinTermfreq
 | 
						|
    dfmTfScheme <- input$dfmTfScheme
 | 
						|
    dfmDfScheme <- input$dfmDfScheme
 | 
						|
 | 
						|
    dfm_async <- future(seed = TRUE, {
 | 
						|
      dadfm <- datext %>%
 | 
						|
        tolower() %>% tokens(remove_punct = F) %>%
 | 
						|
        # tokens_select(pattern = stopwords(language= "fr"), selection = "remove") %>%
 | 
						|
        tokens_ngrams(n= 1:dfmNgrams) %>% dfm() %>%
 | 
						|
        dfm_trim(min_docfreq = dfmMinDocfreq, min_termfreq = dfmMinTermfreq,
 | 
						|
                 verbose = TRUE)
 | 
						|
      if (dfmTfIdf) {
 | 
						|
        dadfm <- dadfm %>%
 | 
						|
          dfm_tfidf(scheme_tf = dfmTfScheme, scheme_df = dfmDfScheme)
 | 
						|
      }
 | 
						|
      suppressWarnings({ # TODO : check
 | 
						|
        dadfm <- quanteda::convert(dadfm, to= "tm")
 | 
						|
      })
 | 
						|
      dadfm <- Matrix::sparseMatrix(i= dadfm$i, j= dadfm$j, x= dadfm$v,
 | 
						|
                                    dimnames = dadfm$dimnames)
 | 
						|
      dadfm
 | 
						|
    }) %...>% ok.dfm()
 | 
						|
    dfm_async <- catch(dfm_async, function(error) {
 | 
						|
      values$modelDiagno <- paste0("Error in DFM: ", error)
 | 
						|
    })
 | 
						|
  })
 | 
						|
  
 | 
						|
  ## Predictor : df or matrix of dfm and extra
 | 
						|
  ok.regressor.matrix <- reactive({
 | 
						|
    if (is.null(ok.text())) return(NULL)
 | 
						|
    if (is.null(ok.regressor.names())) return(NULL)
 | 
						|
    if (verbose) 
 | 
						|
      cat("DEBUG enter ok.regressor.matrix\n")
 | 
						|
    model.matrix(~., data= ok.data()[, ok.regressor.names()])[, -1]
 | 
						|
  })
 | 
						|
  
 | 
						|
  ok.extraregressor.matrix <- reactive({
 | 
						|
    req(ok.text())
 | 
						|
    if (! "extra" %in% values$new_use_regressors) return(NULL)
 | 
						|
    if (verbose) 
 | 
						|
      cat("DEBUG enter ok.extraregressor.matrix\n")
 | 
						|
    
 | 
						|
    res <- NULL
 | 
						|
    for (inum in input$use_ootregnum) {
 | 
						|
      suppressWarnings({tmp <- as.numeric(ok.data()[[inum]])})
 | 
						|
      if (all(is.na(tmp))) {
 | 
						|
        showNotification(paste0(inum, " is all NAs or non-numeric ; predictor not used"),
 | 
						|
                         duration = 5, type = "warning")
 | 
						|
        next
 | 
						|
      }
 | 
						|
      tmp[is.na(tmp)] <- median(tmp, na.rm = TRUE)
 | 
						|
      res <- cbind(res, tmp)
 | 
						|
    }
 | 
						|
    for (icat in input$use_ootregcat) {
 | 
						|
      tmp <- ok.data()[[icat]]
 | 
						|
      if (! length(unique(tmp)) %in% 2:100) {
 | 
						|
        showNotification(paste0(icat, " has ", length(unique(tmp)), 
 | 
						|
                                " unique values ; predictor not used"),
 | 
						|
                         duration = 5, type = "warning")
 | 
						|
        next
 | 
						|
      }
 | 
						|
      tmp <- as.character(tmp)
 | 
						|
      tmp[is.na(tmp)] <- "<NA>"
 | 
						|
      tmp <- as.factor(tmp)
 | 
						|
      tmpmat <- model.matrix(~., data.frame(cat = tmp))[, -1, drop = FALSE]
 | 
						|
      res <- cbind(res, tmpmat)
 | 
						|
    }
 | 
						|
    if (verbose) 
 | 
						|
      cat("DEBUG exit ok.extraregressor.matrix", ncol(res), "\n")
 | 
						|
    res
 | 
						|
  })
 | 
						|
  
 | 
						|
  ok.extraRegex.matrix <- reactive({
 | 
						|
    if (is.null(ok.nicetext())) return(NULL)
 | 
						|
    if (is.null(values$extraRegex)) return(NULL)
 | 
						|
    if (is.null(values$regex_inuse)) return(NULL)
 | 
						|
    if (!any(values$regex_inuse)) return(NULL)
 | 
						|
    if (is.null(values$regex_case)) return(NULL)
 | 
						|
    if (verbose) 
 | 
						|
      cat("DEBUG enter ok.extraRegex.matrix\n")
 | 
						|
    tmp <- as.matrix(sapply(which(values$regex_inuse), function(iregex) {
 | 
						|
      1 * stringi::stri_detect_regex(
 | 
						|
        ok.nicetext(), values$extraRegex[iregex], 
 | 
						|
        opts_regex = list(case_insensitive = !values$regex_case[iregex]))
 | 
						|
    }))
 | 
						|
    colnames(tmp) <- values$extraRegex[values$regex_inuse]
 | 
						|
    tmp
 | 
						|
  })
 | 
						|
  
 | 
						|
  ok.predictor <- reactive({
 | 
						|
    req(values$new_use_regressors)
 | 
						|
    if (ok.data.running()) return(NULL)
 | 
						|
    if (is.null(ok.text())) return(NULL)
 | 
						|
    if (verbose) 
 | 
						|
      cat("DEBUG enter ok.predictor\n")
 | 
						|
    dadfm <- NULL
 | 
						|
    if ("dfm" %in% values$new_use_regressors) {
 | 
						|
      dadfm <- ok.dfm()
 | 
						|
    } else {
 | 
						|
      if (input$predModel == "naive bayes") {
 | 
						|
        values$modelDiagno <- 
 | 
						|
          "Error in training: Naive Bayes requires DFM word counts as predictors."
 | 
						|
        return(NULL) 
 | 
						|
      } 
 | 
						|
    }    
 | 
						|
    if (length(ok.regressor.names()) > 0) {
 | 
						|
      if (input$predModel == "random forest") {
 | 
						|
        if (is.null(dadfm)) {
 | 
						|
          dadfm <- ok.data()[, ok.regressor.names()]
 | 
						|
        } else
 | 
						|
          dadfm <- cbind(ok.data()[, ok.regressor.names()], as.matrix(dadfm))
 | 
						|
      }
 | 
						|
      if (input$predModel %in% c("lasso", "linear", "knn")) {
 | 
						|
        dadfm <- cbind(ok.regressor.matrix(), dadfm)
 | 
						|
      }
 | 
						|
    }
 | 
						|
    # if ("regex" %in% input$use_regressors && length(values$regex_inuse) > 0) {
 | 
						|
    if ("regex" %in% values$new_use_regressors & !is.null(ok.extraRegex.matrix())) {
 | 
						|
      dadfm <- cbind(dadfm, ok.extraRegex.matrix())
 | 
						|
    }
 | 
						|
    if ("extra" %in% values$new_use_regressors) {
 | 
						|
      dadfm <- cbind(dadfm, ok.extraregressor.matrix())
 | 
						|
    }
 | 
						|
 | 
						|
    if (is.null(dadfm)) {
 | 
						|
      values$modelDiagno <- "Error in training: no predictors selected"
 | 
						|
      if (verbose) 
 | 
						|
        cat("DEBUG ok.predictor exit: no predictors\n")
 | 
						|
      return(NULL) 
 | 
						|
    }
 | 
						|
    
 | 
						|
    if (input$predModel == "linear") 
 | 
						|
      dadfm <- SparseM::as.matrix.csr(dadfm, nrow(dadfm), ncol(dadfm))
 | 
						|
    if (input$predModel == "naive bayes") 
 | 
						|
      dadfm <- as.dfm(dadfm)
 | 
						|
    if (verbose) 
 | 
						|
      cat("DEBUG exit ok.predictor: ", ncol(dadfm), "\n")
 | 
						|
    dadfm
 | 
						|
  })
 | 
						|
  
 | 
						|
  
 | 
						|
  ## Train model async
 | 
						|
  ok.train <- reactiveVal()
 | 
						|
  ok.train.async <- reactiveVal()
 | 
						|
  observeEvent(
 | 
						|
    list(trainTrigger(), input$modelTrain),
 | 
						|
    {
 | 
						|
      if (is.null(ok.predictor())) return(NULL)
 | 
						|
      if (verbose) 
 | 
						|
        cat("DEBUG enter ok.train observer\n")
 | 
						|
      if (!ok.train.possible()) {
 | 
						|
        values$modelDiagno <- 
 | 
						|
          "Tag at least 3 texts for 2 tags to train prediction model."
 | 
						|
        ok.train(NULL)
 | 
						|
        # queryTrigger(queryTrigger() + 1)
 | 
						|
        return(NULL)
 | 
						|
      }
 | 
						|
      ready2tag(0)
 | 
						|
      values$modelDiagno <- paste0("Training ", input$predModel)
 | 
						|
      
 | 
						|
      tmp_model <- input$predModel
 | 
						|
      if (tmp_model == "random forest") {
 | 
						|
        if (!is.numeric(input$rfNumTrees) | !is.numeric(input$rfMtry) | 
 | 
						|
            !is.numeric(input$rfSampleFrac)) {
 | 
						|
          ok.train(NULL)
 | 
						|
          return(NULL)
 | 
						|
        }
 | 
						|
        train_fun <- ranger::ranger
 | 
						|
        train_args <- list(
 | 
						|
          x= ok.predictor()[!is.na(values$label), ], 
 | 
						|
          y= as.factor(values$label[!is.na(values$label)]), 
 | 
						|
          num.trees = input$rfNumTrees, 
 | 
						|
          mtry = if (input$rfMtry > 0) input$rfMtry,
 | 
						|
          sample.fraction = input$rfSampleFrac, probability = TRUE)
 | 
						|
      } else {
 | 
						|
        if (tmp_model == "linear") {
 | 
						|
          tmpvalues <- values$label
 | 
						|
          if (!is.numeric(input$liblinCost)) {
 | 
						|
            ok.train(NULL)
 | 
						|
            return(NULL)
 | 
						|
          }
 | 
						|
          for (ival in na.omit(unique(tmpvalues)))
 | 
						|
            if (sum(tmpvalues == ival, na.rm = T) == 1)
 | 
						|
              tmpvalues[which(tmpvalues == ival)] <- NA
 | 
						|
          train_fun <- LiblineaR::LiblineaR
 | 
						|
          train_args <- list(
 | 
						|
            data= ok.predictor()[!is.na(tmpvalues), ], 
 | 
						|
            target= as.factor(tmpvalues[!is.na(tmpvalues)]), 
 | 
						|
            type= 0, cost = input$liblinCost)
 | 
						|
        } else if (tmp_model == "lasso") {
 | 
						|
          tmpvalues <- values$label
 | 
						|
          for (ival in na.omit(unique(tmpvalues)))
 | 
						|
            if (sum(tmpvalues == ival, na.rm = T) == 1)
 | 
						|
              tmpvalues[which(tmpvalues == ival)] <- NA
 | 
						|
          train_fun <- glmnet::glmnet
 | 
						|
          train_args <- list(
 | 
						|
            x= ok.predictor()[!is.na(tmpvalues), ], 
 | 
						|
            y= as.factor(tmpvalues[!is.na(tmpvalues)]), 
 | 
						|
            family= "multinomial")
 | 
						|
        } else if (tmp_model == "naive bayes") {
 | 
						|
          if (!is.numeric(input$naiveSmooth)) {
 | 
						|
            ok.train(NULL)
 | 
						|
            return(NULL)
 | 
						|
          }
 | 
						|
          tmpvalues <- values$label
 | 
						|
          for (ival in na.omit(unique(tmpvalues)))
 | 
						|
            if (sum(tmpvalues == ival, na.rm = T) == 1)
 | 
						|
              tmpvalues[which(tmpvalues == ival)] <- NA
 | 
						|
          train_fun <- quanteda.textmodels::textmodel_nb
 | 
						|
          train_args <- list(
 | 
						|
            x= ok.predictor()[!is.na(tmpvalues), ], 
 | 
						|
            y= as.factor(tmpvalues[!is.na(tmpvalues)]), 
 | 
						|
            smooth = input$naiveSmooth, prior = input$naivePrior, 
 | 
						|
            distribution = input$naiveDistri)
 | 
						|
        } else if (tmp_model == "knn") {
 | 
						|
          train_fun <- function(x) return(TRUE)
 | 
						|
          train_args <- list(x = "knn")
 | 
						|
        }
 | 
						|
      }
 | 
						|
      
 | 
						|
      train_async <- future(seed = TRUE, {
 | 
						|
        suppressWarnings(do.call(train_fun, train_args))
 | 
						|
      }) %...>% ok.train.async()
 | 
						|
      train_async <- catch(train_async, function(error) {
 | 
						|
        values$modelDiagno <- paste0("Model training error: ", error)
 | 
						|
        ok.train(NULL)
 | 
						|
        ok.train.async(NULL)
 | 
						|
      })
 | 
						|
      train_async <- finally(train_async, function() {
 | 
						|
        values$modelDiagno <- "Training done, predicting..."
 | 
						|
        ok.train(ok.train.async())
 | 
						|
        ok.train.async(NULL)
 | 
						|
        predTrigger(predTrigger() + 1)
 | 
						|
      })
 | 
						|
      NULL
 | 
						|
    }
 | 
						|
  )
 | 
						|
  
 | 
						|
  ## Model prediction on full dataset (async)
 | 
						|
  ok.pred <- reactiveVal()
 | 
						|
  ok.pred.async <- reactiveVal()
 | 
						|
  # observeEvent(list(ok.train(), input$glmLambda, input$knnK, values$retagged), {
 | 
						|
  observeEvent(predTrigger(), {
 | 
						|
    if (is.null(ok.train())) {
 | 
						|
      ok.pred(NULL)
 | 
						|
      return(NULL)
 | 
						|
    }
 | 
						|
    if (verbose)
 | 
						|
      cat("DEBUG ok.pred()\n")
 | 
						|
    
 | 
						|
    damodel <- input$predModel
 | 
						|
    if (input$predModel == "random forest") {
 | 
						|
      pred_args <- list(ok.train(), data = ok.predictor())
 | 
						|
      dacolnames <- colnames(ok.train()$predictions)
 | 
						|
    } else if (input$predModel == "lasso") {
 | 
						|
      if (!is.numeric(input$glmLambda)) {
 | 
						|
        ok.pred(NULL)
 | 
						|
        return(NULL)
 | 
						|
      }
 | 
						|
      pred_args <- list(ok.train(), newx = ok.predictor(), s = input$glmLambda, 
 | 
						|
                        type = "response")
 | 
						|
    } else if (input$predModel == "linear") {
 | 
						|
      pred_args <- list(ok.train(), newx = ok.predictor(), proba = TRUE)
 | 
						|
    } else if (input$predModel == "naive bayes") {
 | 
						|
      pred_args <- list(ok.train(), newdata = ok.predictor(), 
 | 
						|
                        type = "probability")
 | 
						|
    } else if (input$predModel == "knn") {
 | 
						|
      if (!is.numeric(input$knnK)) {
 | 
						|
        ok.pred(NULL)
 | 
						|
        return(NULL)
 | 
						|
      }
 | 
						|
      pred_args <- list(train = ok.predictor()[!is.na(values$label), ], 
 | 
						|
                        test = ok.predictor(), 
 | 
						|
                        cl = values$label[!is.na(values$label)], 
 | 
						|
                        k = input$knnK, prob = TRUE)
 | 
						|
    }
 | 
						|
    
 | 
						|
    pred_async <- future(seed = TRUE, {
 | 
						|
      if (damodel == "random forest") {
 | 
						|
        res <- do.call(predict, pred_args)$predictions
 | 
						|
        colnames(res) <- dacolnames
 | 
						|
        res
 | 
						|
      } else if (damodel == "lasso") {
 | 
						|
        do.call(predict, pred_args)[, , 1]
 | 
						|
      } else if (damodel == "linear") {
 | 
						|
        do.call(predict, pred_args)$probabilities
 | 
						|
      } else if (damodel == "naive bayes") {
 | 
						|
        do.call(predict, pred_args)
 | 
						|
      } else if (damodel == "knn") {
 | 
						|
        tmp <- do.call(class::knn, pred_args)
 | 
						|
        ## A bit flaky here: probs of non-best classes uniformly share the rest, 
 | 
						|
        ## and distance is euc
 | 
						|
        tmpvalues <- sort(unique(pred_args$cl))
 | 
						|
        res <- matrix(0, nrow(pred_args$test), length(tmpvalues), 
 | 
						|
                      dimnames = list(NULL, tmpvalues))
 | 
						|
        res[cbind(1:nrow(res), tmp)] <- attr(tmp, "prob") -
 | 
						|
          ((1 - attr(tmp, "prob")) / (ncol(res) - 1))
 | 
						|
        res <- res + (1 - attr(tmp, "prob")) / (ncol(res) - 1)
 | 
						|
        res
 | 
						|
      }
 | 
						|
    }) %...>% ok.pred.async()
 | 
						|
    pred_async <- catch(pred_async, function(error) {
 | 
						|
      values$modelDiagno <- paste0("Error in prediction: ", error)
 | 
						|
    })
 | 
						|
    pred_async <- finally(pred_async, function() {
 | 
						|
      if (verbose)
 | 
						|
        cat("DEBUG ok.pred() out of async\n")
 | 
						|
      ok.pred(ok.pred.async())
 | 
						|
      ok.pred.async(NULL)
 | 
						|
      # predlabTrigger(predlabTrigger() + 1)
 | 
						|
      diagTrigger(diagTrigger() + 1)
 | 
						|
      queryNext(queryNext() + 1)
 | 
						|
    })
 | 
						|
  })
 | 
						|
  
 | 
						|
  ## Best predicted label for full dataset  
 | 
						|
  # ok.predlab1 <- eventReactive(predlabTrigger(), {
 | 
						|
  #   if (is.null(ok.pred())) return(NULL)
 | 
						|
  #   if (verbose)
 | 
						|
  #     cat("DEBUG ok.predlab1\n")
 | 
						|
  #   # queryTrigger(queryTrigger() + 1)
 | 
						|
  #   colnames(ok.pred())[max.col(ok.pred())]
 | 
						|
  # })
 | 
						|
  ok.predlab1 <- reactive({
 | 
						|
    if (is.null(ok.pred())) return(NULL)
 | 
						|
    if (verbose)
 | 
						|
      cat("DEBUG ok.predlab1\n")
 | 
						|
    colnames(ok.pred())[max.col(ok.pred())]
 | 
						|
  })
 | 
						|
  
 | 
						|
  ## On scheme change, forget existing model, queue, query and hist
 | 
						|
  ## load scheme description and predictor-regex
 | 
						|
  observeEvent(ok.labelcol(), {
 | 
						|
    if (verbose) 
 | 
						|
      cat("DEBUG event selectScheme", ok.labelcol(), "\n")
 | 
						|
    ok.pred(NULL)
 | 
						|
    ok.train(NULL)
 | 
						|
    values$queryqueue <- NULL
 | 
						|
    values$modelDiagno <- "No model"
 | 
						|
    values$histSaveStack <- NULL
 | 
						|
    
 | 
						|
    ## Load description file
 | 
						|
    descr <- ""
 | 
						|
    if (file.exists(paste0(values$conf$datadir, values$conf$projectid, 
 | 
						|
                           "/", input$selectScheme, ".txt"))) {
 | 
						|
      descr <- paste(collapse = "\n", readLines(paste0(
 | 
						|
        values$conf$datadir, values$conf$projectid, 
 | 
						|
        "/", input$selectScheme, ".txt")))
 | 
						|
    }
 | 
						|
    updateTextAreaInput(session, "schemeDescr", value = descr)
 | 
						|
    
 | 
						|
    ## Load regex from json
 | 
						|
    regexfile <- paste0(values$projectdir, input$selectScheme, "_regex.json")
 | 
						|
    if (file.exists(regexfile)) {
 | 
						|
      if (verbose) 
 | 
						|
        cat("DEBUG enter regex.json\n")
 | 
						|
      tmp <- RJSONIO::fromJSON(paste(collapse = "\n", readLines(regexfile)))
 | 
						|
      if (is.null(tmp$regex)) return(NULL)
 | 
						|
      values$extraRegex <- tmp$regex
 | 
						|
      names(values$extraRegex) <- sapply(tmp$regex, label2hash)
 | 
						|
      for (iregex in 1:length(tmp$regex)) {
 | 
						|
        values[[paste0("useregex_", names(values$extraRegex)[iregex])]] <- 
 | 
						|
          tmp$use[iregex]
 | 
						|
        values[[paste0("caseregex_", names(values$extraRegex)[iregex])]] <- 
 | 
						|
          tmp$case[iregex]
 | 
						|
      }
 | 
						|
    } else 
 | 
						|
      values$extraRegex <- NULL
 | 
						|
    
 | 
						|
  })
 | 
						|
  
 | 
						|
 | 
						|
 | 
						|
  ## queryNext event: update values$newQuery ; called by queryNext(), or by 
 | 
						|
  ## queryQueue or regex change
 | 
						|
  ## Requires :
 | 
						|
  ## - queryQueue (reactive: indices of current queue), and 
 | 
						|
  ## - wasTagged (boolean vector indicating whether each element has been tagged this session, used in "on untagged")
 | 
						|
  ## - wasRetagged (boolean vector indicating whether each element has been tagged this session, used in "on tagged")
 | 
						|
  ## TODO: make a scheme-list of wasTagged, now it will be wiped at each scheme change?
 | 
						|
  observeEvent(
 | 
						|
    list(queryNext(), values$queryQueue, 
 | 
						|
         input$sampleChoice, input$taggedWhich, input$maxprobWhich,
 | 
						|
         input$regexFilter, input$regexCaseSens, 
 | 
						|
         input$visuLock, values$visuZoom$xlim, values$visuZoom$ylim), 
 | 
						|
    {
 | 
						|
      if (is.null(values$queryQueue)) return(NULL)
 | 
						|
      if (verbose) 
 | 
						|
        cat("DEBUG queryNext\n")
 | 
						|
      
 | 
						|
      queue <- values$queryQueue
 | 
						|
      
 | 
						|
      if (input$sampleChoice == "untagged") {
 | 
						|
        queue <- queue[!values$wasTagged[queue]]
 | 
						|
      } else if (input$sampleChoice == "tagged") {
 | 
						|
        queue <- queue[!is.na(values$label)[queue] & !values$wasRetagged[queue]]
 | 
						|
        if (input$taggedWhich != "all") {
 | 
						|
          queue <- queue[values$label[queue] == input$taggedWhich]
 | 
						|
        }
 | 
						|
      } else if (input$sampleChoice == "all") {
 | 
						|
        queue <- queue[!values$wasRetagged[queue]]
 | 
						|
      }
 | 
						|
      
 | 
						|
      if (input$visuLock) if (!is.null(values$visuZoom$xlim)) {
 | 
						|
        if (!is.null(ok.visu()))
 | 
						|
          queue <- queue[ok.visu()[queue, 1] >= values$visuZoom$xlim[1] & 
 | 
						|
                           ok.visu()[queue, 1] <= values$visuZoom$xlim[2] & 
 | 
						|
                           ok.visu()[queue, 2] >= values$visuZoom$ylim[1] &
 | 
						|
                           ok.visu()[queue, 2] <= values$visuZoom$ylim[2]] 
 | 
						|
      }
 | 
						|
      
 | 
						|
      if (length(queue)) if (nchar(input$regexFilter)) {
 | 
						|
        try_regex <- try(silent = TRUE, {
 | 
						|
          queue <- queue[
 | 
						|
            stringi::stri_detect_regex(
 | 
						|
              ok.nicetext()[queue], input$regexFilter,
 | 
						|
              opts_regex = list(case_insensitive = !input$regexCaseSens))]
 | 
						|
        })
 | 
						|
        if (inherits(try_regex, "try-error")) {
 | 
						|
          showNotification("Invalid regex", duration = 2, type = "error")
 | 
						|
          values$newQuery <- NULL
 | 
						|
          return(NULL)
 | 
						|
        }
 | 
						|
      }
 | 
						|
      
 | 
						|
      if (!length(queue)) {
 | 
						|
        if (nchar(input$regexFilter)) {
 | 
						|
          showNotification("No texts matching the regex",
 | 
						|
                           duration = 2, type = "warning")
 | 
						|
        } else
 | 
						|
          showNotification("No more texts to be tagged in this set",
 | 
						|
                           duration = 2, type = "warning")
 | 
						|
        values$newQuery <- NULL
 | 
						|
        return(NULL)
 | 
						|
      }
 | 
						|
      
 | 
						|
      values$lastQuery <- queue[1] # ??
 | 
						|
      values$newQuery <- queue[1]
 | 
						|
      ready2tag(1)
 | 
						|
    }
 | 
						|
  )
 | 
						|
  
 | 
						|
  ## queryQueue event: fix values$queryQueue, (indices of current queue)
 | 
						|
  observeEvent(
 | 
						|
    list(ok.data(), ok.pred(), input$strategy, input$maxprobWhich, 
 | 
						|
         input$selectProject, input$selectScheme), {
 | 
						|
      if (input$strategy %in% c("entropy", "maxprob") & is.null(ok.pred())) {
 | 
						|
        values$queryQueue <- NULL
 | 
						|
        values$lastQuery <- NULL
 | 
						|
        values$newQuery <- NULL
 | 
						|
        return(NULL)
 | 
						|
      }
 | 
						|
      if (is.null(ok.text())) return(NULL)
 | 
						|
      if (verbose) 
 | 
						|
        cat("DEBUG queryQueue change\n")
 | 
						|
      
 | 
						|
      if (input$strategy == "sequential") {
 | 
						|
        values$queryQueue <- 1:length(ok.text())
 | 
						|
      } else if (input$strategy == "random") {
 | 
						|
        values$queryQueue <- sample(length(ok.text()))
 | 
						|
      } else {
 | 
						|
        dapred <- ok.pred()
 | 
						|
        if (input$strategy == "entropy") {
 | 
						|
          tominimize <- rowSums(dapred * log(dapred))
 | 
						|
          if (any(dapred == 0)) {
 | 
						|
            tominimize[dapred == 0] <- 0
 | 
						|
          }
 | 
						|
        }
 | 
						|
        if (input$strategy == "maxprob") {
 | 
						|
          if (! input$maxprobWhich %in% colnames(dapred)) return(NULL)
 | 
						|
          tominimize <- -dapred[, input$maxprobWhich];
 | 
						|
        }
 | 
						|
        values$queryQueue <- order(tominimize)
 | 
						|
      }
 | 
						|
    }
 | 
						|
  )
 | 
						|
  
 | 
						|
  
 | 
						|
  ## Update uniqueLabels and uniqueLabelsAll if necessary on values$label change
 | 
						|
  observeEvent(values$label, {
 | 
						|
    if (is.null(values$label)) return(NULL)
 | 
						|
    if (verbose) 
 | 
						|
      cat("DEBUG uniqueLabels\n")
 | 
						|
    dalabs <- sort(na.omit(unique(values$label)))
 | 
						|
    # if (!length(dalabs)) return(NULL)
 | 
						|
    if (!identical(dalabs, values$uniqueLabels)) 
 | 
						|
      values$uniqueLabels <- dalabs
 | 
						|
    if (any(! dalabs %in% values$uniqueLabelsAll))
 | 
						|
      values$uniqueLabelsAll <- c(values$uniqueLabelsAll, 
 | 
						|
                                  dalabs[! dalabs %in% values$uniqueLabelsAll])
 | 
						|
  })
 | 
						|
  
 | 
						|
  
 | 
						|
  
 | 
						|
  ##############################################################################
 | 
						|
  ## Project management
 | 
						|
  ##############################################################################
 | 
						|
  
 | 
						|
  output$cpDataMessage <- renderUI({
 | 
						|
    req(values$cpDataMessage)
 | 
						|
    p(values$cpDataMessage)
 | 
						|
  })
 | 
						|
 | 
						|
  output$cpConfirmMessage <- renderUI({
 | 
						|
    req(values$cpConfirmMessage)
 | 
						|
    p(strong(paste0("Error: ", values$cpConfirmMessage)))
 | 
						|
  })
 | 
						|
  
 | 
						|
  output$cpIdcolUI <- renderUI({
 | 
						|
    req(values$cpNewData)
 | 
						|
    if (!is.data.frame(values$cpNewData)) return(NULL)
 | 
						|
    selectInput("cpIdcol", NULL, colnames(values$cpNewData))
 | 
						|
  })
 | 
						|
 | 
						|
  output$cpTextcolUI <- renderUI({
 | 
						|
    req(values$cpNewData)
 | 
						|
    if (!is.data.frame(values$cpNewData)) return(NULL)
 | 
						|
    selectInput("cpTextcol", NULL, colnames(values$cpNewData), multiple = TRUE)
 | 
						|
  })
 | 
						|
  
 | 
						|
  output$cpContextcolUI <- renderUI({
 | 
						|
    req(values$cpNewData)
 | 
						|
    if (!is.data.frame(values$cpNewData)) return(NULL)
 | 
						|
    selectInput("cpContextcol", NULL, colnames(values$cpNewData), multiple = TRUE)
 | 
						|
  })
 | 
						|
 | 
						|
  output$cpTagcolUI <- renderUI({
 | 
						|
    req(values$cpNewData)
 | 
						|
    if (!is.data.frame(values$cpNewData)) return(NULL)
 | 
						|
    selectInput("cpTagcol", NULL, colnames(values$cpNewData), multiple = TRUE)
 | 
						|
  })
 | 
						|
 | 
						|
  output$cpCommcolUI <- renderUI({
 | 
						|
    req(values$cpNewData)
 | 
						|
    if (!is.data.frame(values$cpNewData)) return(NULL)
 | 
						|
    selectInput("cpCommcol", NULL, c("none", colnames(values$cpNewData)))
 | 
						|
  })
 | 
						|
  
 | 
						|
  output$cp_spacyUI <- renderUI(
 | 
						|
    textInput("cp_use_spacy_model", NULL, 
 | 
						|
              modelnames$spacy_name[modelnames$short == input$cp_lang],
 | 
						|
                   placeholder = "(spacy model name)")
 | 
						|
  )
 | 
						|
  
 | 
						|
  output$cp_spacyDlUI <- renderUI(HTML(paste(
 | 
						|
    "<em>Model must be downloaded once, from python:</em><br>", 
 | 
						|
    input$cp_which_python, "-m spacy download", 
 | 
						|
    modelnames$spacy_name[modelnames$short == input$cp_lang], "<br><br>"
 | 
						|
  )))
 | 
						|
 | 
						|
  output$cp_ftUI <- renderUI(
 | 
						|
    textInput("cp_use_ft_model", NULL, 
 | 
						|
              modelnames$fasttext_name[modelnames$short == input$cp_lang],
 | 
						|
              placeholder = "(fasttext model path)")
 | 
						|
  )
 | 
						|
  
 | 
						|
  output$cp_ftDlUI <- renderUI(list(HTML(paste(
 | 
						|
    "<em>Model can be downloaded <a href='", 
 | 
						|
    modelnames$fasttext_url[modelnames$short == input$cp_lang], 
 | 
						|
    "'>here</a></em>")), 
 | 
						|
    br(), br()))
 | 
						|
  
 | 
						|
  output$cp_sbUI <- renderUI(
 | 
						|
    textInput("cp_use_sb_model", NULL, ifelse(
 | 
						|
      input$cp_lang %in% c("ar", "zh", "nl", "en", "fr", "de", 
 | 
						|
                           "it", "ko", "pl", "pt", "ru", "es", "tr"), 
 | 
						|
      "distiluse-base-multilingual-cased-v1", 
 | 
						|
      "distiluse-base-multilingual-cased-v2"),
 | 
						|
      placeholder = "(custom sentence_transformers model)")
 | 
						|
  )
 | 
						|
  
 | 
						|
  
 | 
						|
  
 | 
						|
  ## Create new project, with modal dialog
 | 
						|
  observeEvent(input$createProject, {
 | 
						|
    if (verbose) 
 | 
						|
      cat("DEBUG create project\n")
 | 
						|
    showModal(modalDialog(
 | 
						|
      title = "New project", 
 | 
						|
      
 | 
						|
      p("Create a new text tagging project."),
 | 
						|
      p("This mainly involves choosing a dataset, and how to process it."),
 | 
						|
      p("Items marked by a star ", strong("*"), 
 | 
						|
        " cannot be changed once the project is created."),
 | 
						|
      
 | 
						|
      hr(),
 | 
						|
      h4("Files and folders"), 
 | 
						|
      
 | 
						|
      fluidRow(
 | 
						|
        column(3, p(strong("*Project name"))), 
 | 
						|
        column(5, textInput("cpName", NULL, placeholder = "(Required)")),
 | 
						|
        column(4, p("Tags will be stored in a new directory by this name."))
 | 
						|
      ),
 | 
						|
      fluidRow(
 | 
						|
        column(3, p(strong("*Data directory"))), 
 | 
						|
        column(5, textInput("cpDatadir", NULL, placeholder = "(Required)")),
 | 
						|
        column(4, p("Place (on the server) where the data and project are stored"))
 | 
						|
      ),
 | 
						|
      fluidRow(
 | 
						|
        column(3, p(strong("*Data filename"))), 
 | 
						|
        column(5, checkboxInput("cpDoImport", "New data", TRUE)),
 | 
						|
        column(4, p("Main file, containing id and text columns"))
 | 
						|
      ),
 | 
						|
      
 | 
						|
      conditionalPanel("!input.cpDoImport", list(
 | 
						|
        ## Existing data
 | 
						|
        fluidRow(
 | 
						|
          column(3, p(strong("Existing file"))), 
 | 
						|
          column(5, textInput("cpDatafile", NULL, placeholder = "(Required)")),
 | 
						|
          column(4, actionButton("cpDatafileGo", "Import"))
 | 
						|
        )
 | 
						|
      )),
 | 
						|
      conditionalPanel("input.cpDoImport", {
 | 
						|
        ## Import data
 | 
						|
        wellPanel(
 | 
						|
          selectInput('cpNDfiletype', NULL, 
 | 
						|
                      choices = c("Text file (.csv, .txt)" = "csv_txt",
 | 
						|
                                  "Feather" = "feather", 
 | 
						|
                                  "Parquet" = "parquet",
 | 
						|
                                  "OpenDocument (.ods)" = "ods",
 | 
						|
                                  "Microsoft Excel (.xls, .xlsx)" = "excel",
 | 
						|
                                  "SPSS (.sav, .por)" = "spss",
 | 
						|
                                  "SAS (.sas7bdat)" = "sas_data",
 | 
						|
                                  "Stata v5-v12 (.dta)" = "stata"),
 | 
						|
                      selected = "csv_txt"
 | 
						|
          ),
 | 
						|
          fileInput('cpNDfile', 'Choose File'),
 | 
						|
          
 | 
						|
          checkboxInput("cpNDparams", "File parameters"),
 | 
						|
          
 | 
						|
          conditionalPanel(
 | 
						|
            "input.cpNDparams", 
 | 
						|
            conditionalPanel(
 | 
						|
              "input.cpNDfiletype == 'csv_txt'",
 | 
						|
              
 | 
						|
              fluidRow(column(4, p('Header')), 
 | 
						|
                       column(8, selectInput('cpCSVheader', NULL,
 | 
						|
                                             c("Header" = TRUE, 
 | 
						|
                                               "No Header" = FALSE), 
 | 
						|
                                             "Auto"))),
 | 
						|
              fluidRow(column(4, p('Separator')), 
 | 
						|
                       column(8, selectInput('cpCSVsep', NULL,
 | 
						|
                                             c("Comma ','" = ",", 
 | 
						|
                                               "Semicolon ';'" = ";", 
 | 
						|
                                               "Tab" = "\t", 
 | 
						|
                                               "Space" = " "), 
 | 
						|
                                             "Auto"))),
 | 
						|
              fluidRow(column(4, p('Quote')), 
 | 
						|
                       column(8, selectInput('cpCSVquote', NULL,
 | 
						|
                                             c("Double Quote \"" = "\"",
 | 
						|
                                               "Single Quote '" = "'", 
 | 
						|
                                               "None" = ""), 
 | 
						|
                                             "Double Quote \""))),
 | 
						|
              fluidRow(column(4, p('Decimal mark')), 
 | 
						|
                       column(8, selectInput('cpCSVdec', NULL, 
 | 
						|
                                             c('Period "."' = ".", 
 | 
						|
                                               'Comma ","' = ","),
 | 
						|
                                             'Period "."'))),
 | 
						|
              fluidRow(column(4, p('File Encoding')), 
 | 
						|
                       column(8, selectInput('cpCSVencoding', NULL, 
 | 
						|
                                             c("unknown", "UTF-8", "Latin-1"),
 | 
						|
                                             "unknown")))),
 | 
						|
            
 | 
						|
            ## Options for ods files
 | 
						|
            conditionalPanel(
 | 
						|
              "input.cpNDfiletype == 'ods'",
 | 
						|
              fluidRow(column(4, p('Sheet')), 
 | 
						|
                       column(8, numericInput('cpODSsheet', NULL, 1, 1, 
 | 
						|
                                              step = 1))),
 | 
						|
              checkboxInput("cpODScolnames", "Column names", TRUE),
 | 
						|
              fluidRow(column(4, p('NA symbol')), 
 | 
						|
                       column(8, textInput('cpODSna', NULL, value = ""))),
 | 
						|
              fluidRow(column(4, p('Skip rows')), 
 | 
						|
                       column(8, numericInput('cpODSskip', NULL, 0, 0, 
 | 
						|
                                              step = 1))),
 | 
						|
              fluidRow(column(4, p('Specify range')), 
 | 
						|
                       column(8, textInput('cpODSrange', NULL, value = "")))
 | 
						|
            ),
 | 
						|
            
 | 
						|
            ## Options for xls / xlsx files
 | 
						|
            conditionalPanel(
 | 
						|
              "input.cpNDfiletype == 'excel'",
 | 
						|
              fluidRow(column(4, p('Column names')), 
 | 
						|
                       column(8, selectInput('cpXLScolnames', NULL, 
 | 
						|
                                             choices = c("Yes" = "TRUE",
 | 
						|
                                                         "No" = "FALSE"),
 | 
						|
                                             selected = TRUE ))),
 | 
						|
              fluidRow(column(4, p('Skip rows')), 
 | 
						|
                       column(8, numericInput('cpXLSskip', NULL, 0, 0, 
 | 
						|
                                              step = 1))),
 | 
						|
              fluidRow(column(4, p('Trim whitespaces')), 
 | 
						|
                       column(8, selectInput('cpXLStrim', NULL, 
 | 
						|
                                             choices = c("Yes" = "TRUE",
 | 
						|
                                                         "No" = "FALSE"),
 | 
						|
                                             selected = TRUE ))),
 | 
						|
              checkboxInput("cpXLSsheet", 
 | 
						|
                            "Specify Worksheet", FALSE),
 | 
						|
              conditionalPanel("input.cpXLSsheet",
 | 
						|
                               textInput("cpXLSsheet_specs", NULL, "")),
 | 
						|
              checkboxInput("cpXLSrange", "Specify Range", FALSE),
 | 
						|
              conditionalPanel("input.cpXLSrange",
 | 
						|
                               textInput("cpXLSrange_specs", NULL, "")
 | 
						|
              )),
 | 
						|
            
 | 
						|
            ## Options for SPSS files
 | 
						|
            conditionalPanel(
 | 
						|
              "input.cpNDfiletype == 'spss'",
 | 
						|
              fluidRow(
 | 
						|
                column(4, p('Skip rows')), 
 | 
						|
                column(8, numericInput('cpSPSSskip', NULL, 0, 0, step = 1))), 
 | 
						|
              checkboxInput("cpSPSSna", "User-defined NA", FALSE)), 
 | 
						|
            
 | 
						|
            ## Options for SAS files
 | 
						|
            conditionalPanel(
 | 
						|
              "input.cpNDfiletype == 'sas_data'",
 | 
						|
              fluidRow(
 | 
						|
                column(4, p('Skip rows')), 
 | 
						|
                column(8, numericInput("cpSASskip", NULL, 0, 0, step = 1))),
 | 
						|
              checkboxInput("cpSAScatalog", "Use catalog file", FALSE), 
 | 
						|
              conditionalPanel(
 | 
						|
                "input.cpSAScatalog", 
 | 
						|
                fileInput("cpSAScatalog_file", "Catalog file"))
 | 
						|
            ),
 | 
						|
            
 | 
						|
            ## Options for STATA dta files
 | 
						|
            conditionalPanel(
 | 
						|
              "input.cpNDfiletype == 'stata'",
 | 
						|
              checkboxInput("cpSTATAfactors", "Convert factors", FALSE))
 | 
						|
          )
 | 
						|
        )
 | 
						|
      }),
 | 
						|
 | 
						|
      uiOutput("cpDataMessage"),
 | 
						|
      hr(),
 | 
						|
      h4("Data"),
 | 
						|
 | 
						|
      fluidRow(
 | 
						|
        column(6, numericInput(
 | 
						|
          "cpNrows", "N. rows for working sample", 500, 1, step = 1)),
 | 
						|
        column(6, numericInput(
 | 
						|
          "cpNskip", "Rows to skip before working sample", 0, 0, step = 1))
 | 
						|
      ),
 | 
						|
      
 | 
						|
      fluidRow(
 | 
						|
        column(3, p(strong("*ID"))), 
 | 
						|
        column(5, uiOutput("cpIdcolUI")),
 | 
						|
        column(4, p("Name of the id variable, unique identifier of each text"))
 | 
						|
      ),
 | 
						|
      fluidRow(
 | 
						|
        column(3, p(strong("*Text"))), 
 | 
						|
        column(5, uiOutput("cpTextcolUI")),
 | 
						|
        column(4, p("Name of the text variables: if more than one, texts are concatenated in the specified order"))
 | 
						|
      ),
 | 
						|
      fluidRow(
 | 
						|
        column(3, p(strong("*Tags"))), 
 | 
						|
        column(5, uiOutput("cpTagcolUI")),
 | 
						|
        column(4, p("(optional) Names of variables that are already tagged: each will create a new scheme"))
 | 
						|
      ),
 | 
						|
      fluidRow(
 | 
						|
        column(3, p(strong("Comments"))), 
 | 
						|
        column(5, uiOutput("cpCommcolUI")),
 | 
						|
        column(4, p("(optional) Name of the comments variable"))
 | 
						|
      ),
 | 
						|
      fluidRow(
 | 
						|
        column(3, p(strong("Context"))), 
 | 
						|
        column(5, uiOutput("cpContextcolUI")),
 | 
						|
        column(4, p("(optional) Names of variables not used in the models, but may be displayed during tagging"))
 | 
						|
      ),
 | 
						|
      
 | 
						|
      
 | 
						|
      hr(),
 | 
						|
      h4("System"),
 | 
						|
      
 | 
						|
      fluidRow(
 | 
						|
        column(3, checkboxInput("cp_use_python", "Python backend", FALSE)), 
 | 
						|
        column(5, conditionalPanel(
 | 
						|
          "input.cp_use_python", 
 | 
						|
          textInput("cp_which_python", NULL, value = "python3", 
 | 
						|
                    placeholder = "(custom python path)"))),
 | 
						|
        column(4, conditionalPanel(
 | 
						|
          "input.cp_use_python", 
 | 
						|
          p("This must be a working python3 environment, with the required modules installed (see documentation)")))
 | 
						|
      ),
 | 
						|
      conditionalPanel("input.cp_use_python", list(
 | 
						|
        fluidRow(
 | 
						|
          column(3, p("Language")), 
 | 
						|
          column(5, selectInput("cp_lang", NULL, modelnames_labels, "en")),
 | 
						|
          column(4, p("Used to preset tokenization and embedding models"))
 | 
						|
        ),
 | 
						|
        fluidRow(
 | 
						|
          column(3, checkboxInput("cp_use_spacy", "SpaCy tokenization", FALSE)), 
 | 
						|
          column(5, conditionalPanel("input.cp_use_spacy", uiOutput("cp_spacyUI"))),
 | 
						|
          column(4, p("Name of the spacy tokenizer model, used in DTM and word embeddings"))
 | 
						|
        ),
 | 
						|
        conditionalPanel("input.cp_use_spacy", fluidRow(
 | 
						|
          column(3), 
 | 
						|
          column(9, uiOutput("cp_spacyDlUI")))
 | 
						|
        ),
 | 
						|
        fluidRow(
 | 
						|
          column(3, checkboxInput("cp_use_ft", "FastText word embeddings", FALSE)), 
 | 
						|
          column(5, conditionalPanel("input.cp_use_ft", uiOutput("cp_ftUI"))),
 | 
						|
          column(4, p("Path to the local fasttext model binary"))
 | 
						|
        ),
 | 
						|
        conditionalPanel("input.cp_use_ft", fluidRow(
 | 
						|
          column(3), 
 | 
						|
          column(9, uiOutput("cp_ftDlUI")))),
 | 
						|
        fluidRow(
 | 
						|
          column(3, checkboxInput("cp_use_sb", "SBERT sentence embeddings", FALSE)), 
 | 
						|
          column(5, conditionalPanel("input.cp_use_sb", uiOutput("cp_sbUI"))),
 | 
						|
          column(4, p("(GPU recommended) Name or path of the sentence-transformers model"))
 | 
						|
        ),
 | 
						|
        
 | 
						|
        conditionalPanel("input.cp_use_python", {
 | 
						|
          checkboxInput("cp_use_gpu", "GPU support (CUDA, for SBERT and BERT)", FALSE)
 | 
						|
        })
 | 
						|
      )),
 | 
						|
      
 | 
						|
      uiOutput("cpConfirmMessage"),
 | 
						|
      footer = tagList(actionButton("cpConfirm", "Create"),
 | 
						|
                       modalButton("Cancel"))))
 | 
						|
  })
 | 
						|
  
 | 
						|
  observeEvent(input$cpDatafileGo, {
 | 
						|
    dataname <- input$cpDatafile
 | 
						|
    dataname <- gsub("[.]feather$", "", dataname)
 | 
						|
    tmpdat <- try(arrow::read_feather(
 | 
						|
      paste0(gsub("/+$", "/", paste0(input$cpDatadir, "/")), 
 | 
						|
             dataname, ".feather")))
 | 
						|
    if (inherits(tmpdat, "try-error")) {
 | 
						|
      values$cpDataMessage <- paste0(
 | 
						|
        "Data import error: ", as.character(tmpdat))
 | 
						|
      values$cpNewData <- NULL
 | 
						|
      return(NULL)
 | 
						|
    } 
 | 
						|
    if (!is.data.frame(tmpdat)) {
 | 
						|
      values$cpDataMessage <- "Data import error: object not a data.frame"
 | 
						|
      values$cpNewData <- NULL
 | 
						|
      return(NULL)
 | 
						|
    } 
 | 
						|
    if (ncol(tmpdat) < 2) {
 | 
						|
      values$cpDataMessage <- "Data import error: only one column detected"
 | 
						|
      values$cpNewData <- NULL
 | 
						|
      return(NULL)
 | 
						|
    } 
 | 
						|
    
 | 
						|
    values$cpDataMessage <- paste0(
 | 
						|
      "Data import success: ", nrow(tmpdat), " rows, ", ncol(tmpdat), " cols.")
 | 
						|
    values$cpNewData <- tmpdat
 | 
						|
    values$cpNewDataname <- dataname
 | 
						|
    
 | 
						|
  })
 | 
						|
  
 | 
						|
  ## Data import mechanism
 | 
						|
  observeEvent(input$cpNDfile, {
 | 
						|
    req(input$cpNDfile)
 | 
						|
    if(input$cpNDfiletype == "csv_txt") {
 | 
						|
      res <- try(read.csv(
 | 
						|
        input$cpNDfile$datapath, 
 | 
						|
        header = as.logical(input$cpCSVheader), sep = input$cpCSVsep, 
 | 
						|
        quote = input$cpCSVquote, dec = input$cpCSVdec, stringsAsFactors = FALSE, 
 | 
						|
        encoding = input$cpCSVencoding, check.names = TRUE))
 | 
						|
     
 | 
						|
    } else if(input$cpNDfiletype == "feather") {
 | 
						|
        res <- try(arrow::read_feather(input$cpNDfile$datapath))
 | 
						|
        
 | 
						|
    } else if(input$cpNDfiletype == "parquet") {
 | 
						|
      res <- try(arrow::read_parquet(input$cpNDfile$datapath))
 | 
						|
      
 | 
						|
    } else if (input$cpNDfiletype == "ods") {
 | 
						|
      range <- input$cpODSrange
 | 
						|
      if (range == "") range <- NULL
 | 
						|
      res <- try(readODS::read_ods(
 | 
						|
        path = input$cpNDfile$datapath, sheet = input$cpODSsheet, 
 | 
						|
        col_names = input$cpODScolnames, na = input$cpODSna, 
 | 
						|
        skip = input$cpODSskip, range = input$cpODSrange))
 | 
						|
      
 | 
						|
    } else if(input$cpNDfiletype == "excel"){
 | 
						|
      column_names <- as.logical(input$cpXLScolnames)
 | 
						|
      trim_spaces <- as.logical(input$cpXLStrim)
 | 
						|
      the.range <- NULL
 | 
						|
      if(input$cpXLSrange == TRUE & input$cpXLSrange_specs != "") 
 | 
						|
        the.range <- input$cpXLSrange_specs
 | 
						|
      the.sheet <- NULL
 | 
						|
      if(input$cpXLSsheet == TRUE & input$cpXLSsheet_specs != "") 
 | 
						|
        the.sheet <- input$cpXLSsheet_specs
 | 
						|
      res <- try(data.frame(readxl::read_excel(
 | 
						|
        dataFile$datapath, col_names = column_names, range = the.range,
 | 
						|
        sheet= the.sheet, trim_ws = trim_spaces, skip = rows_to_skip)))
 | 
						|
      
 | 
						|
    } else if(input$cpNDfiletype == "spss"){
 | 
						|
      res <- try(data.frame(haven::read_spss(
 | 
						|
        file = input$cpNDfile$datapath, 
 | 
						|
        skip= input$cpSPSSskip, user_na = input$cpSPSSna)))
 | 
						|
      if (!inherits(res, "try-error")) {
 | 
						|
        res <- data.frame(lapply(res, function(x) {
 | 
						|
          attr(x, "format.spss") <- NULL
 | 
						|
          if ("haven_labelled" %in% class(x)) 
 | 
						|
            x <- haven::as_factor(x, levels= "labels")
 | 
						|
          x
 | 
						|
        }))
 | 
						|
      }
 | 
						|
      
 | 
						|
    } else if(input$cpNDfiletype == "sas_data") {
 | 
						|
      res <- try(data.frame(haven::read_sas(
 | 
						|
        data_file = input$cpNDfile$datapath,
 | 
						|
        catalog_file = input$cpSAScatalog_file$datapath, 
 | 
						|
        skip = input$cpSASskip)))      
 | 
						|
      
 | 
						|
    } else if(input$cpNDfiletype == "stata") {
 | 
						|
      res <- try(data.frame(foreign::read.dta(
 | 
						|
        file = input$cpNDfile$datapath, convert.factors = input$cpSTATAfactors)))
 | 
						|
    }
 | 
						|
    
 | 
						|
    if(inherits(res, "try-error")) {
 | 
						|
      values$cpDataMessage <- paste0("Data import error: ", res)
 | 
						|
      values$cpNewData <- NULL
 | 
						|
      return(NULL)
 | 
						|
    }
 | 
						|
    if (ncol(res) < 2) {
 | 
						|
      values$cpDataMessage <- "Data import error: only one column detected"
 | 
						|
      values$cpNewData <- NULL
 | 
						|
      return(NULL)
 | 
						|
    } 
 | 
						|
    values$cpDataMessage <- paste0(
 | 
						|
      "Data import success: ", nrow(res), " rows, ", ncol(res), " columns.")
 | 
						|
    values$cpNewData <- res
 | 
						|
  })
 | 
						|
  
 | 
						|
  ## New project confirm
 | 
						|
  observeEvent(input$cpConfirm, {
 | 
						|
    if (verbose)
 | 
						|
      cat("DEBUG cpConfirm\n")
 | 
						|
    newconf <- list()
 | 
						|
    
 | 
						|
    tmpdir <- gsub("/+$", "/", paste0(input$cpDatadir, "/"))
 | 
						|
    if (!dir.exists(tmpdir)) 
 | 
						|
      dir.create(tmpdir)
 | 
						|
    newconf$datadir <- tmpdir
 | 
						|
    
 | 
						|
    tmpname <- cleanFileName(input$cpName)
 | 
						|
    if (!nchar(tmpname)) {
 | 
						|
      values$cpConfirmMessage <- "Project name is required"
 | 
						|
      return(NULL)
 | 
						|
    }
 | 
						|
    if (dir.exists(paste0(newconf$datadir, tmpname))) {
 | 
						|
      values$cpConfirmMessage <- paste0(
 | 
						|
        "A project with the same name (", tmpname, 
 | 
						|
        ") already exists in the data directory.")
 | 
						|
      return(NULL)
 | 
						|
    }
 | 
						|
    newconf$projectid <- tmpname
 | 
						|
    
 | 
						|
    if (!is.data.frame(values$cpNewData)) {
 | 
						|
      values$cpConfirmMessage <- "Import data first"
 | 
						|
      return(NULL)
 | 
						|
    }
 | 
						|
    
 | 
						|
    if (input$cpDoImport) {
 | 
						|
      newconf$dataname <- newconf$projectid
 | 
						|
    } else
 | 
						|
      newconf$dataname <- values$cpNewDataname
 | 
						|
    
 | 
						|
    if (any(is.na(values$cpNewData[[input$cpIdcol]]))) {
 | 
						|
      values$cpConfirmMessage <- "ID variable contains missing values"
 | 
						|
      return(NULL)
 | 
						|
    }
 | 
						|
    if (any(duplicated(values$cpNewData[[input$cpIdcol]]))) {
 | 
						|
      values$cpConfirmMessage <- "ID variable contains duplicates"
 | 
						|
      return(NULL)
 | 
						|
    }
 | 
						|
    newconf$idcol <- input$cpIdcol
 | 
						|
    
 | 
						|
    if (!length(input$cpTextcol)) {
 | 
						|
      values$cpConfirmMessage <- "Select at least one text variable"
 | 
						|
      return(NULL)
 | 
						|
    }
 | 
						|
    newconf$textcols <- input$cpTextcol
 | 
						|
    
 | 
						|
    if (input$cpCommcol == "none") {
 | 
						|
      newconf$commcol <- paste0("comm_", newconf$projectid)
 | 
						|
    } else
 | 
						|
      newconf$commcol <- input$cpCommcol
 | 
						|
    
 | 
						|
    if (!length(input$cpTagcol)) {
 | 
						|
      newconf$tagcols <- "scheme0"
 | 
						|
    } else 
 | 
						|
      newconf$tagcols <- input$cpTagcol
 | 
						|
    
 | 
						|
    newconf$contextcols <- input$cpContextcol
 | 
						|
    
 | 
						|
    newconf$dataNrows <- input$cpNrows
 | 
						|
    newconf$dataSkipRows <- input$cpNskip
 | 
						|
    
 | 
						|
    if (input$cp_use_python) {
 | 
						|
      pytest <- try(system(paste(input$cp_which_python, "--version"), 
 | 
						|
                           intern = TRUE))
 | 
						|
      if (inherits(pytest, "try-error")) {
 | 
						|
        values$cpConfirmMessage <- paste(
 | 
						|
          "Python path `", input$cp_which_python, "` not valid")
 | 
						|
        return(NULL)
 | 
						|
      }
 | 
						|
    }
 | 
						|
    newconf$use_python <- input$cp_use_python
 | 
						|
    newconf$python <- input$cp_which_python
 | 
						|
    newconf$use_gpu <- input$cp_use_gpu
 | 
						|
 | 
						|
    if (input$cp_use_spacy) {
 | 
						|
      sptest <- system(paste0(
 | 
						|
        newconf$python, " -m spacy info ", input$cp_use_spacy_model), intern = TRUE)
 | 
						|
      if (length(sptest) == 0) {
 | 
						|
        values$cpConfirmMessage <- paste(
 | 
						|
          "Error loading spacy, check that it is installed in the specified python env")
 | 
						|
        return(NULL)
 | 
						|
      }
 | 
						|
      if (length(attr(sptest, "status"))) {
 | 
						|
        values$cpConfirmMessage <- paste(
 | 
						|
          "Error loading spacy model, check that it has been downloaded")
 | 
						|
        return(NULL)
 | 
						|
      }
 | 
						|
    }
 | 
						|
    newconf$use_spacy <- input$cp_use_spacy
 | 
						|
    newconf$use_spacy_model <- input$cp_use_spacy_model
 | 
						|
    
 | 
						|
    if (input$cp_use_ft) if (!file.exists(input$cp_use_ft_model)) {
 | 
						|
      values$cpConfirmMessage <- paste(
 | 
						|
        "Error loading fasttext model, check the specified path")
 | 
						|
      return(NULL)
 | 
						|
    }
 | 
						|
    newconf$use_ft <- input$cp_use_ft
 | 
						|
    newconf$use_ft_model <- input$cp_use_ft_model
 | 
						|
    
 | 
						|
    newconf$use_sb <- input$cp_use_sb
 | 
						|
    newconf$use_sb_model <- input$cp_use_sb_model
 | 
						|
 | 
						|
    writeLines(RJSONIO::toJSON(newconf), 
 | 
						|
               paste0("tigger_", newconf$projectid, ".json"))
 | 
						|
 | 
						|
    values$confnames <- c(newconf$projectid, values$confnames)
 | 
						|
    values$conf <- newconf
 | 
						|
    
 | 
						|
    ## On new data import, write a feather file with the name of the project
 | 
						|
    if (input$cpDoImport)
 | 
						|
      arrow::write_feather(values$cpNewData, paste0(
 | 
						|
        newconf$datadir, newconf$dataname, ".feather"
 | 
						|
      ))
 | 
						|
 | 
						|
    removeModal()
 | 
						|
  })
 | 
						|
  
 | 
						|
  ##############################################################################
 | 
						|
  ## Scheme management
 | 
						|
  ##############################################################################
 | 
						|
 | 
						|
  ## Print active scheme in scheme management panel
 | 
						|
  output$printScheme <- renderUI(h4(input$selectScheme))
 | 
						|
  
 | 
						|
  ## Create new scheme on request, with modal dialog
 | 
						|
  observeEvent(input$createScheme, {
 | 
						|
    if (verbose) 
 | 
						|
      cat("DEBUG create scheme\n")
 | 
						|
    showModal(modalDialog(
 | 
						|
      title = "Create tagging scheme", 
 | 
						|
      p(paste0("This will create a new scheme for this project")),
 | 
						|
      
 | 
						|
      textInput("newScheme", label = NULL, placeholder = "Required: new scheme name"),
 | 
						|
      textAreaInput("newSchemeDescr", label = NULL, 
 | 
						|
                    placeholder = "Optional: scheme description"),
 | 
						|
      checkboxInput("schemeDuplicate", "Duplicate existing scheme:"),
 | 
						|
      conditionalPanel(
 | 
						|
        "input.schemeDuplicate", 
 | 
						|
        selectInput("schemeDuplicateFrom", NULL, rev(values$conf$tagcols))),
 | 
						|
 | 
						|
      footer = tagList(actionButton("createSchemeConfirm", "Create"),
 | 
						|
                       modalButton("Cancel"))))
 | 
						|
  })
 | 
						|
  
 | 
						|
  observeEvent(input$createSchemeConfirm, {
 | 
						|
    if (is.null(ok.data())) return(NULL)
 | 
						|
    if (verbose) 
 | 
						|
      cat("DEBUG create scheme event\n")
 | 
						|
 | 
						|
    ## Make sure new scheme name is a correct file name
 | 
						|
    if (!grepl("\\S", input$newScheme)) {
 | 
						|
      showNotification(type = "error", 
 | 
						|
                       "Cannot create scheme: name empty.")
 | 
						|
      return(NULL)
 | 
						|
    }
 | 
						|
    
 | 
						|
    ## Check that new scheme name doesn't exist
 | 
						|
    ## Re-read tagcols from conf file, to prevent scheme disappearances
 | 
						|
    tmpname <- cleanFileName(input$newScheme)
 | 
						|
    tagpath <- paste0(values$projectdir, tmpname, ".csv")
 | 
						|
    confpath <- paste0("tigger_", values$conf$projectid, ".json")
 | 
						|
    if (verbose) 
 | 
						|
      cat("DEBUG create scheme check json:", confpath,"\n")
 | 
						|
    tmp_tagcols <- RJSONIO::fromJSON(confpath)$tagcols
 | 
						|
    if (verbose) 
 | 
						|
      cat("DEBUG create scheme json OK\n")
 | 
						|
    if (tmpname %in% tmp_tagcols | file.exists(tagpath)) {
 | 
						|
      showNotification(type = "error", 
 | 
						|
                       "Cannot create scheme: name already exists.")
 | 
						|
      return(NULL)
 | 
						|
    }
 | 
						|
    
 | 
						|
    ## Create new scheme description
 | 
						|
    if (nchar(input$newSchemeDescr))
 | 
						|
      writeLines(input$newSchemeDescr, 
 | 
						|
                 paste0(values$projectdir, tmpname, ".txt"))
 | 
						|
    
 | 
						|
    ## Update conf file and reactives
 | 
						|
    values$conf$tagcols <- c(tmp_tagcols, tmpname)
 | 
						|
    # values$confs[[values$conf$projectid]]$tagcols <- c(tmp_tagcols, tmpname)
 | 
						|
    writeLines(RJSONIO::toJSON(values$conf), confpath)
 | 
						|
    
 | 
						|
    if (input$schemeDuplicate) {
 | 
						|
      tmpmat <- matrix(values$tagmat[, input$schemeDuplicateFrom])
 | 
						|
      tmpmathist <- matrix(values$tagmathist[, input$schemeDuplicateFrom])
 | 
						|
    } else {
 | 
						|
      tmpmat <- matrix(rep(NA, nrow(ok.data())))
 | 
						|
      tmpmathist <- matrix(rep(NA, nrow(ok.data())))
 | 
						|
    }
 | 
						|
    colnames(tmpmat) <- tmpname
 | 
						|
    colnames(tmpmathist) <- tmpname
 | 
						|
    
 | 
						|
    values$tagmat <- cbind(values$tagmat, tmpmat)
 | 
						|
    values$tagmathist <- cbind(values$tagmat, tmpmathist)
 | 
						|
    
 | 
						|
    ## Create new tagging file if duplicating
 | 
						|
    if (input$schemeDuplicate) {
 | 
						|
      export <- data.frame(ok.data()[, values$conf$idcol], 
 | 
						|
                           values$tagmat[, tmpname])
 | 
						|
      colnames(export) <- c(values$conf$idcol, tmpname)
 | 
						|
      export <- na.omit(export)
 | 
						|
      write.table(export, tagpath, sep = ",", qmethod = "double", 
 | 
						|
                  fileEncoding = "UTF-8", row.names = FALSE,
 | 
						|
                  append = FALSE, col.names = TRUE)
 | 
						|
    }
 | 
						|
    
 | 
						|
    cat("DEBUG create scheme exit\n")
 | 
						|
    removeModal()
 | 
						|
  })
 | 
						|
  
 | 
						|
  ## Button action: delete scheme
 | 
						|
  observeEvent(input$schemeDelete, {
 | 
						|
    if (verbose) 
 | 
						|
      cat("DEBUG delete scheme\n")
 | 
						|
    if (length(values$conf$tagcols) < 2) {
 | 
						|
      showNotification(type = "error", 
 | 
						|
                       "Cannot delete: at least one scheme must remain.")
 | 
						|
      return(NULL)
 | 
						|
    }
 | 
						|
    showModal(modalDialog(
 | 
						|
      title = "Delete tagging scheme", 
 | 
						|
      paste0("Confirm delete '", input$selectScheme,
 | 
						|
             "': ", sum(!is.na(values$label)), " tags will be deleted", 
 | 
						|
             ", along with scheme description."),
 | 
						|
      footer = tagList(actionButton("schemeDeleteConfirm", "Delete"),
 | 
						|
                       modalButton("Cancel"))))
 | 
						|
  })
 | 
						|
  
 | 
						|
  observeEvent(input$schemeDeleteConfirm, {
 | 
						|
    file.remove(paste0(values$projectdir, input$selectScheme, ".csv"))
 | 
						|
    file.remove(paste0(values$projectdir, input$selectScheme, ".txt"))
 | 
						|
    file.remove(paste0(values$projectdir, input$selectScheme, "_regex.json"))
 | 
						|
    values$conf$tagcols <- 
 | 
						|
      values$conf$tagcols[values$conf$tagcols != input$selectScheme]
 | 
						|
    writeLines(RJSONIO::toJSON(values$conf), 
 | 
						|
               paste0("tigger_", input$selectProject, ".json"))
 | 
						|
    removeModal()
 | 
						|
  })
 | 
						|
  
 | 
						|
  
 | 
						|
  observeEvent(input$schemeDescrSave, {
 | 
						|
    req(input$selectScheme)
 | 
						|
    writeLines(input$schemeDescr, 
 | 
						|
               paste0(values$conf$datadir, values$conf$projectid, 
 | 
						|
                      "/", input$selectScheme, ".txt"))
 | 
						|
  })
 | 
						|
  
 | 
						|
  
 | 
						|
  ##############################################################################
 | 
						|
  ##############################################################################
 | 
						|
  
 | 
						|
  ## Tag selection : populate
 | 
						|
  observe({
 | 
						|
    values$conf$tagcols
 | 
						|
    updateSelectInput(
 | 
						|
      # session, "selectScheme", choices = c(rev(values$conf$tagcols), "<NEW>"))
 | 
						|
      session, "selectScheme", choices = rev(values$conf$tagcols))
 | 
						|
  })
 | 
						|
  
 | 
						|
  
 | 
						|
  ## Populate oracle buttons
 | 
						|
  output$oracleButtons <- renderUI({
 | 
						|
    if (is.null(ok.data())) return(NULL)
 | 
						|
    # if (is.null(values$label)) return(NULL)
 | 
						|
    if (is.null(values$uniqueLabels)) return(NULL)
 | 
						|
    if (verbose) 
 | 
						|
      cat("DEBUG populate oracle buttons\n")
 | 
						|
    
 | 
						|
    do.call(fluidRow, lapply(values$uniqueLabels, function(ilab)
 | 
						|
      actionButton(paste0("oracle_", label2hash(ilab)), 
 | 
						|
                   strong(ilab))))
 | 
						|
  })
 | 
						|
  
 | 
						|
  ## Populate maxprob variable selector
 | 
						|
  observe({
 | 
						|
    updateSelectInput(session, "maxprobWhich", choices = values$uniqueLabels)
 | 
						|
    updateSelectInput(session, "taggedWhich", 
 | 
						|
                      choices = c("All" = "all", values$uniqueLabels))
 | 
						|
  })
 | 
						|
  
 | 
						|
  ## Data panel message
 | 
						|
  observeEvent(ok.data(), {
 | 
						|
    if (verbose) 
 | 
						|
      cat("DEBUG data panel message\n")
 | 
						|
    if (is.null(ok.data())) {
 | 
						|
      output$dataMessage <- renderUI(p("Importing data..."))
 | 
						|
    }
 | 
						|
    output$dataMessage <- NULL
 | 
						|
  })
 | 
						|
  
 | 
						|
  ## On config change, update dataView and retag options
 | 
						|
  observeEvent(list(values$conf), {
 | 
						|
 | 
						|
    if (is.null(values$conf)) return(NULL)
 | 
						|
    output$panelData <- renderUI(list(
 | 
						|
      DT::dataTableOutput("dataView")
 | 
						|
    ))
 | 
						|
    
 | 
						|
    output$panelRetag <- renderUI(c(
 | 
						|
      list(
 | 
						|
        br(),
 | 
						|
        h4("Rename tags"),
 | 
						|
        fluidRow(
 | 
						|
          column(4, strong("Current")), 
 | 
						|
          column(2, strong("Count")),
 | 
						|
          column(6, strong("New"))
 | 
						|
        ), 
 | 
						|
        br()), 
 | 
						|
      lapply(values$uniqueLabels, function(iLab) {
 | 
						|
        iHash <- label2hash(iLab)
 | 
						|
        fluidRow(
 | 
						|
          column(4, p(iLab)), 
 | 
						|
          column(2, p(sum(values$label == iLab, na.rm = TRUE))),
 | 
						|
          column(4, textInput(
 | 
						|
            paste0("newtag_", iHash), NULL, 
 | 
						|
            placeholder = "(new name, leave empty to untag)")), 
 | 
						|
          column(2, actionButton(paste0("retag_", iHash), "Rename")))
 | 
						|
      })
 | 
						|
    ))
 | 
						|
  })
 | 
						|
  
 | 
						|
  output$panelExtraRegex <- renderUI(list(
 | 
						|
    p(strong("Regex predictors")),
 | 
						|
    fluidRow(
 | 
						|
      column(8, textInput("extraRegexText", NULL, 
 | 
						|
                          placeholder = "(new regex)", width = "100%")), 
 | 
						|
      column(4, actionButton(
 | 
						|
        "extraRegexSearch", strong("+"), width = "100%"))
 | 
						|
    ), 
 | 
						|
    fluidRow(
 | 
						|
      style="overflow-y:scroll; max-height: 25vh",
 | 
						|
      fluidRow(
 | 
						|
        column(2, strong("")),
 | 
						|
        column(1, strong("Use")),
 | 
						|
        column(1, strong("Case")),
 | 
						|
        column(1, p(strong("N"))),
 | 
						|
        column(7, p(strong("Regex")))
 | 
						|
      ),
 | 
						|
      
 | 
						|
      lapply(values$extraRegex, function(iRegex) {
 | 
						|
        iHash <- label2hash(iRegex)
 | 
						|
        tmp_cased <- values[[paste0("caseregex_", iHash)]]
 | 
						|
        if (is.null(tmp_cased)) tmp_cased <- FALSE
 | 
						|
        fluidRow(
 | 
						|
          column(2, actionButton(paste0("delregex_", iHash), "🗑️", width="100%")),
 | 
						|
          column(1, checkboxInput(
 | 
						|
            paste0("useregex_", iHash), NULL, 
 | 
						|
            value = isolate(!isFALSE(values[[paste0("useregex_", iHash)]])))),
 | 
						|
          column(1, checkboxInput(
 | 
						|
            paste0("caseregex_", iHash), NULL, 
 | 
						|
            value = isolate(isTRUE(values[[paste0("caseregex_", iHash)]])))),
 | 
						|
          column(1, br(), p(sum(
 | 
						|
            stringi::stri_detect_regex(
 | 
						|
              ok.nicetext(), iRegex, 
 | 
						|
              opts_regex = list(case_insensitive = !tmp_cased)), 
 | 
						|
            na.rm = TRUE))),
 | 
						|
          column(7, br(), p(iRegex))
 | 
						|
        )
 | 
						|
      })
 | 
						|
    )
 | 
						|
 | 
						|
  ))
 | 
						|
  
 | 
						|
  ## Process new regex
 | 
						|
  observeEvent(input$extraRegexSearch, {
 | 
						|
    if (is.null(ok.nicetext())) return(NULL)
 | 
						|
    req(nchar(input$extraRegexText) > 0)
 | 
						|
    if (input$extraRegexText %in% values$extraRegex) return(NULL)
 | 
						|
    try_regex <- try(stringi::stri_detect_regex(
 | 
						|
      ok.nicetext(), input$extraRegexText,
 | 
						|
      opts_regex = list(case_insensitive = TRUE)))
 | 
						|
    if (inherits(try_regex, "try-error")) return(NULL)
 | 
						|
    new_regex <- input$extraRegexText
 | 
						|
    new_hash <- label2hash(new_regex)
 | 
						|
    values$extraRegex <- c(new_regex, values$extraRegex)
 | 
						|
    names(values$extraRegex)[1] <- new_hash
 | 
						|
    if (verbose)
 | 
						|
      cat("DEBUG regex search", input$extraRegexText, "\n")
 | 
						|
  })
 | 
						|
  
 | 
						|
  ## Regex predictors: populate delete and use events on first creation
 | 
						|
  useregex_trigger <- reactiveVal(0)
 | 
						|
  delregex_trigger <- reactiveVal(0)
 | 
						|
  caseregex_trigger <- reactiveVal(0)
 | 
						|
  observeEvent(list(values$extraRegex), {
 | 
						|
    if (!length(values$extraRegex)) return(NULL)
 | 
						|
    if (any(! values$extraRegex %in% values$extraRegexHist)) {
 | 
						|
      if (verbose)
 | 
						|
        cat("DEBUG regex predictor names updates\n")
 | 
						|
      tmpregex <- values$extraRegex[! values$extraRegex %in% values$extraRegexHist]
 | 
						|
      values$extraRegexHist <- c(tmpregex, values$extraRegexHist)
 | 
						|
      
 | 
						|
      c(lapply(1:length(tmpregex), function(iregex) {
 | 
						|
          observeEvent(
 | 
						|
            input[[paste0("delregex_", names(tmpregex)[iregex])]], {
 | 
						|
              if (verbose)
 | 
						|
                cat("DEBUG regex delete", names(tmpregex)[iregex], "\n")
 | 
						|
              values$extraRegex <- values$extraRegex[values$extraRegex != tmpregex[iregex]]
 | 
						|
              delregex_trigger(delregex_trigger() + 1)
 | 
						|
            })
 | 
						|
        }),
 | 
						|
        lapply(1:length(tmpregex), function(iregex) {
 | 
						|
          tmpname <- paste0("useregex_", names(tmpregex)[iregex])
 | 
						|
          observeEvent(
 | 
						|
            input[[tmpname]], {
 | 
						|
              values[[tmpname]] <- input[[tmpname]]
 | 
						|
              useregex_trigger(useregex_trigger() + 1)
 | 
						|
            })
 | 
						|
        }),
 | 
						|
        lapply(1:length(tmpregex), function(iregex) {
 | 
						|
          tmpname <- paste0("caseregex_", names(tmpregex)[iregex])
 | 
						|
          observeEvent(
 | 
						|
            input[[tmpname]], {
 | 
						|
              values[[tmpname]] <- input[[tmpname]]
 | 
						|
              caseregex_trigger(caseregex_trigger() + 1)
 | 
						|
            })
 | 
						|
        })
 | 
						|
      )
 | 
						|
    }
 | 
						|
  })
 | 
						|
  
 | 
						|
  observeEvent(list(delregex_trigger(), useregex_trigger(), caseregex_trigger()), {
 | 
						|
    req(ok.data(), input$selectScheme)
 | 
						|
    regex_all <- values$extraRegex
 | 
						|
    values$regex_inuse <- sapply(names(regex_all), function(iname) 
 | 
						|
        values[[paste0("useregex_", iname)]])
 | 
						|
    values$regex_case <- sapply(names(regex_all), function(iname) 
 | 
						|
        values[[paste0("caseregex_", iname)]])
 | 
						|
    if (verbose) if (any(values$regex_inuse))
 | 
						|
      cat("DEBUG regex in use", paste0(collapse = " ", regex_all[values$regex_inuse]), "\n")
 | 
						|
    writeLines(
 | 
						|
      RJSONIO::toJSON(lapply(FUN = unname, list(
 | 
						|
        regex = regex_all, use = values$regex_inuse, case = values$regex_case))),
 | 
						|
      paste0(values$projectdir, input$selectScheme, "_regex.json"))
 | 
						|
  })
 | 
						|
 | 
						|
  output$dataView <- DT::renderDataTable({
 | 
						|
    # if (is.null(ok.text()) | ok.data.running()) return(NULL)
 | 
						|
    req(ok.text())
 | 
						|
    if (verbose) 
 | 
						|
      cat("DEBUG enter dataView\n")
 | 
						|
    if (verbose) 
 | 
						|
      cat("DEBUG render dataView\n")
 | 
						|
    DT::datatable(
 | 
						|
      rownames = F, 
 | 
						|
      data.frame(
 | 
						|
        row = 1:nrow(ok.data()),
 | 
						|
        id = ok.data()[, values$conf$idcol], 
 | 
						|
        text = ok.text()), 
 | 
						|
      options = list(
 | 
						|
        pageLength = 3,
 | 
						|
        columnDefs = list(list(
 | 
						|
          targets = 2,
 | 
						|
          render = DT::JS(
 | 
						|
            "function(data, type, row, meta) {",
 | 
						|
            "return type === 'display' && data.length > 200 ?",
 | 
						|
            "'<span title=\"' + data + '\">' + data.substr(0, 200) + '...</span>' : data;",
 | 
						|
            "}")
 | 
						|
        ))
 | 
						|
      ), 
 | 
						|
      callback = DT::JS('table.page(0).draw(false);')
 | 
						|
    )
 | 
						|
  })
 | 
						|
  
 | 
						|
  ## Triggers for mass rename, created with each new label, with confirm
 | 
						|
  observeEvent(values$uniqueLabelsAll, {
 | 
						|
    if (is.null(values$uniqueLabelsAll)) return(NULL)
 | 
						|
    notthere <- unique(na.omit(
 | 
						|
      values$uniqueLabelsAll[! values$uniqueLabelsAll %in% values$retagLabels]))
 | 
						|
    lapply(
 | 
						|
      notthere,
 | 
						|
      function(iTag) {
 | 
						|
        iHash <- label2hash(iTag)
 | 
						|
        
 | 
						|
        observeEvent(input[[paste0("retag_", iHash)]], {
 | 
						|
          tmp_oldtag <- iTag
 | 
						|
          tmp_retagged <- which(values$label == tmp_oldtag)
 | 
						|
          tmp_newtag <- gsub(
 | 
						|
            "^\\s+|\\s+$", "", input[[paste0("newtag_", iHash)]])
 | 
						|
          showModal(modalDialog(
 | 
						|
            title = "Mass retag", 
 | 
						|
            paste0("Confirm retag: ", length(tmp_retagged), " ", 
 | 
						|
                   tmp_oldtag, " tags will be ", 
 | 
						|
                   ifelse(nchar(tmp_newtag), 
 | 
						|
                          paste("renamed to", tmp_newtag) , "deleted")),
 | 
						|
            footer = tagList(actionButton(paste0("retag_confirm_", iTag), "Retag"),
 | 
						|
                             modalButton("Cancel"))))
 | 
						|
        })
 | 
						|
        
 | 
						|
        observeEvent(input[[paste0("retag_confirm_", iTag)]], {
 | 
						|
          tmp_oldtag <- iTag
 | 
						|
          tmp_retagged <- which(values$label == tmp_oldtag)
 | 
						|
          tmp_newtag <- gsub(
 | 
						|
            "^\\s+|\\s+$", "", input[[paste0("newtag_", iHash)]])
 | 
						|
          if (!nchar(tmp_newtag)) tmp_newtag <- NA
 | 
						|
          values$tagTrigger <- list(
 | 
						|
            id = ok.data()[[values$conf$idcol]][tmp_retagged],
 | 
						|
            tag = rep(tmp_newtag, length(tmp_retagged)))
 | 
						|
          values$queries <- c(values$queries, tmp_retagged)
 | 
						|
 | 
						|
          removeModal()
 | 
						|
        })
 | 
						|
 | 
						|
      })
 | 
						|
    values$retagLabels <- c(values$retagLabels, notthere)
 | 
						|
  })
 | 
						|
  
 | 
						|
  
 | 
						|
 | 
						|
  ##############################################################################
 | 
						|
  ## UI rendering of Panel Tagging / Visualization 
 | 
						|
  ##############################################################################
 | 
						|
  
 | 
						|
  ####################
 | 
						|
  ## Tagging left panel
 | 
						|
  
 | 
						|
 | 
						|
  # ## Trigger from Oops
 | 
						|
  # observeEvent(input$oops, {
 | 
						|
  #   if (is.null(ok.data()))
 | 
						|
  #     return(NULL)
 | 
						|
  #   if (verbose) 
 | 
						|
  #     cat("DEBUG enter Oops\n")
 | 
						|
  #   if (!is.null(values$retagged)) if (values$queries[length(values$queries)] == values$retagged[length(values$retagged)])
 | 
						|
  #     values$retagged <- values$retagged[-length(values$retagged)]
 | 
						|
  #   values$label[values$queries[length(values$queries)]] <- NA
 | 
						|
  #   if (ok.commentCol() != "(None)")
 | 
						|
  #     values$comment[values$queries[length(values$queries)]] <- ok.data()[values$queries[length(values$queries)], ok.commentCol()]
 | 
						|
  #   values$queries <- values$queries[-length(values$queries)]
 | 
						|
  #   ## tagTrigger ?
 | 
						|
  #   
 | 
						|
  #   ready2tag(0)
 | 
						|
  #   queryNext(queryNext() + 1)
 | 
						|
  # })
 | 
						|
  
 | 
						|
  
 | 
						|
  ## Tagging triggers for Oracle buttons, created with each new label
 | 
						|
  observeEvent(values$uniqueLabelsAll, {
 | 
						|
    if (is.null(values$label)) return(NULL)
 | 
						|
    if (is.null(values$uniqueLabelsAll)) return(NULL)
 | 
						|
    if (all(is.na(values$label))) return(NULL)
 | 
						|
    notthere <- unique(na.omit(
 | 
						|
      values$uniqueLabelsAll[! values$uniqueLabelsAll %in% values$oracleLabels]
 | 
						|
    ))
 | 
						|
    if (!length(notthere)) return(NULL)
 | 
						|
    if (verbose) 
 | 
						|
      cat("DEBUG triggers for oracle buttons\n")
 | 
						|
    lapply(
 | 
						|
      notthere,
 | 
						|
      function(iLab) {
 | 
						|
        observeEvent(
 | 
						|
          input[[paste0("oracle_", label2hash(iLab))]], {
 | 
						|
            if (is.null(ok.text())) return(NULL)
 | 
						|
            if (is.null(values$newQuery)) return(NULL)
 | 
						|
            if (!ready2tag()) {cat("DEBUG not ready to tag in Button\n"); return(NULL)}
 | 
						|
            # ready2tag(0)
 | 
						|
            # if (input$allowRetag | sum(is.na(values$label)) == 0)
 | 
						|
            if (!is.na(values$label[values$lastQuery]))
 | 
						|
              values$retagged <- c(values$retagged, values$lastQuery)
 | 
						|
            # values$label[values$lastQuery] <- iLab
 | 
						|
            values$comment[values$lastQuery] <- input$currentComment
 | 
						|
            # values$queries <- c(values$queries, values$lastQuery)
 | 
						|
            values$tagTrigger <- list(
 | 
						|
              id = ok.data()[values$lastQuery, values$conf$idcol], 
 | 
						|
              tag = iLab, 
 | 
						|
              comment = input$currentComment)
 | 
						|
          })
 | 
						|
      })
 | 
						|
    values$oracleLabels <- c(values$oracleLabels, notthere)
 | 
						|
  })
 | 
						|
  
 | 
						|
  ## Trigger from Oracle Create value button
 | 
						|
  observeEvent(input$currentAction, {
 | 
						|
    if (is.null(ok.data())) return(NULL)
 | 
						|
    if (is.null(values$newQuery)) return(NULL)
 | 
						|
    if (verbose) 
 | 
						|
      cat("DEBUG enter create value\n")
 | 
						|
    # if (input$allowRetag | sum(is.na(values$label)) == 0)
 | 
						|
    if (!is.na(values$label[values$lastQuery]))
 | 
						|
      values$retagged <- c(values$retagged, values$lastQuery)
 | 
						|
    if (input$newLab == "") {
 | 
						|
      return(NULL)
 | 
						|
    } else {
 | 
						|
      # values$label[values$lastQuery] <- input$newLab
 | 
						|
      updateTextInput(session, "newLab", value= "")
 | 
						|
    }
 | 
						|
    values$comment[values$lastQuery] <- input$currentComment
 | 
						|
    # values$queries <- c(values$queries, values$lastQuery)
 | 
						|
    values$tagTrigger <- list(
 | 
						|
      id = ok.data()[values$lastQuery, values$conf$idcol], 
 | 
						|
      tag = input$newLab, 
 | 
						|
      comment = input$currentComment)
 | 
						|
  })
 | 
						|
  
 | 
						|
  ## Trigger from Oracle Confirm button
 | 
						|
  observeEvent(input$oracleConfirm, {
 | 
						|
    req(ok.data(), ok.train(), values$newQuery)
 | 
						|
    # if (is.null(ok.data())) return(NULL)
 | 
						|
    # if (is.null(values$newQuery)) return(NULL)
 | 
						|
    if (!ready2tag()) {cat("DEBUG not ready to tag in OC\n"); return(NULL)}
 | 
						|
    # ready2tag(0)
 | 
						|
    if (!ok.train.possible()) return(NULL)
 | 
						|
    if (verbose) 
 | 
						|
      cat("DEBUG oracle confirm\n")
 | 
						|
    # if (input$allowRetag | sum(is.na(values$label)) == 0)
 | 
						|
    if (!is.na(values$label[values$lastQuery]))
 | 
						|
      values$retagged <- c(values$retagged, values$lastQuery)
 | 
						|
    # values$label[values$lastQuery] <- ok.predlab1()[values$lastQuery]
 | 
						|
    # values$queries <- c(values$queries, values$lastQuery)
 | 
						|
    values$comment[values$lastQuery] <- input$currentComment
 | 
						|
    values$tagTrigger <- list(
 | 
						|
      id = ok.data()[values$lastQuery, values$conf$idcol], 
 | 
						|
      tag = ok.predlab1()[values$lastQuery], 
 | 
						|
      comment = input$currentComment)
 | 
						|
  })
 | 
						|
  
 | 
						|
  ## Trigger for CV glm (async)
 | 
						|
  glmCV_nclicks <- reactiveVal(0)
 | 
						|
  observeEvent(input$glmCV, {
 | 
						|
    if (is.null(ok.predictor())) return(NULL)
 | 
						|
    if (glmCV_nclicks() != 0) {
 | 
						|
      showNotification("LASSO CV already running")
 | 
						|
      return(NULL)
 | 
						|
    }
 | 
						|
    glmCV_nclicks(1)
 | 
						|
    glmCV_future <- reactiveVal()
 | 
						|
 | 
						|
    tmpvalues <- values$label
 | 
						|
    tmppred <- ok.predictor()
 | 
						|
    for (ival in na.omit(unique(tmpvalues)))
 | 
						|
      if (sum(tmpvalues == ival, na.rm = T) <= 8)
 | 
						|
        tmpvalues[which(tmpvalues == ival)] <- NA
 | 
						|
    glmCV_async <- future(seed = TRUE, {
 | 
						|
      glmnet::cv.glmnet(
 | 
						|
        x= tmppred[!is.na(tmpvalues), ], 
 | 
						|
        y= as.factor(tmpvalues[!is.na(tmpvalues)]), 
 | 
						|
        family= "multinomial")
 | 
						|
    }) %...>% glmCV_future()
 | 
						|
    
 | 
						|
    glmCV_async <- catch(glmCV_async, function(e) {
 | 
						|
      showNotification(type = "error", paste0("LASSO CV error: ", e))
 | 
						|
    })
 | 
						|
    glmCV_async <- finally(glmCV_async, function() {
 | 
						|
      glmCV_nclicks(0)
 | 
						|
      if (is.null(glmCV_future())) return(NULL)
 | 
						|
      updateNumericInput(session, "glmLambda", 
 | 
						|
                         value = glmCV_future()$lambda.min)
 | 
						|
    })
 | 
						|
    NULL
 | 
						|
  })
 | 
						|
  
 | 
						|
  ## Trigger for CV liblinear (async)
 | 
						|
  liblinCV_nclicks <- reactiveVal(0)
 | 
						|
  observeEvent(input$liblinCV, {
 | 
						|
    if (is.null(ok.predictor())) return(NULL)
 | 
						|
    if (liblinCV_nclicks() != 0) {
 | 
						|
      showNotification("LibLineaR CV already running")
 | 
						|
      return(NULL)
 | 
						|
    }
 | 
						|
    liblinCV_nclicks(1)
 | 
						|
    liblinCV_future <- reactiveVal()
 | 
						|
 | 
						|
    tmpvalues <- values$label
 | 
						|
    tmppred <- ok.predictor()
 | 
						|
    for (ival in na.omit(unique(tmpvalues)))
 | 
						|
      if (sum(tmpvalues == ival, na.rm = T) <= 3)
 | 
						|
        tmpvalues[which(tmpvalues == ival)] <- NA
 | 
						|
    
 | 
						|
    liblinCV_async <- future(seed = TRUE, {
 | 
						|
      LiblineaR::LiblineaR(
 | 
						|
        data= tmppred[!is.na(tmpvalues), ], 
 | 
						|
        target= as.factor(tmpvalues[!is.na(tmpvalues)]), 
 | 
						|
        type= 0, findC= T)
 | 
						|
    }) %...>% liblinCV_future()
 | 
						|
    liblinCV_async <- catch(liblinCV_async, function(e) {
 | 
						|
      showNotification(type = "error", paste0("LibLineaR CV error: ", e))
 | 
						|
    })
 | 
						|
    liblinCV_async <- finally(liblinCV_async, function() {
 | 
						|
      liblinCV_nclicks(0)
 | 
						|
      if (is.null(liblinCV_future())) return(NULL)
 | 
						|
      updateNumericInput(session, "liblinCost", value= liblinCV_future())
 | 
						|
    })
 | 
						|
    NULL
 | 
						|
  })
 | 
						|
  
 | 
						|
  
 | 
						|
  output$currentContext <- renderUI({
 | 
						|
    if (verbose) 
 | 
						|
      cat("DEBUG 1rentContext\n")
 | 
						|
    if ((! is.null(ok.text())) & (length(ok.contextCol()) > 0))
 | 
						|
      HTML(paste(paste(ok.contextCol(),
 | 
						|
                       as.character(ok.data()[values$newQuery, ok.contextCol()]),
 | 
						|
                       sep = " : "),
 | 
						|
                 collapse= "<br/><br/>"))
 | 
						|
  })
 | 
						|
  
 | 
						|
  output$makeOracleConfirm <- renderUI({
 | 
						|
    if (verbose) 
 | 
						|
      cat("DEBUG update printPredicted\n")
 | 
						|
    if (is.null(ok.pred())) {
 | 
						|
      tmp <- "No prediction"
 | 
						|
    } else if (is.null(values$newQuery)) {
 | 
						|
      tmp <- "No query"
 | 
						|
    } else {
 | 
						|
      tmp <- paste0(
 | 
						|
        ifelse(is.na(values$label[values$newQuery]), "", "Pred:    "),
 | 
						|
        ok.predlab1()[values$newQuery], 
 | 
						|
        " (", round(max(ok.pred()[values$newQuery, ]), 3), ")")
 | 
						|
    }
 | 
						|
    res <- list(actionButton("oracleConfirm", label= paste("🤖", tmp), width = "100%"))
 | 
						|
    if (!is.null(values$label) & !is.null(values$newQuery)) {
 | 
						|
      if (!is.na(values$label[values$newQuery]))
 | 
						|
        res <- list(res[[1]], HTML(paste0(
 | 
						|
          "<center><strong> Current: ", values$label[values$newQuery], 
 | 
						|
          "</strong></center>")))
 | 
						|
    }
 | 
						|
    res
 | 
						|
  })
 | 
						|
  
 | 
						|
  
 | 
						|
  ## Show training diagnostics on new model train
 | 
						|
  output$trainDiagno <- 
 | 
						|
    renderText(paste0(ok.msgtagged(), "\n", values$modelDiagno))
 | 
						|
 | 
						|
  ## Diagnostics event 
 | 
						|
  observeEvent(diagTrigger(), {
 | 
						|
    if (verbose) 
 | 
						|
      cat("DEBUG update modelDiagno\n")
 | 
						|
    if (is.null(ok.data())) {
 | 
						|
      values$modelDiagno <- "No model"
 | 
						|
      return(NULL)
 | 
						|
    }
 | 
						|
    if (!ok.train.possible()) {
 | 
						|
      values$modelDiagno <- "No model (not enough labelled data)."
 | 
						|
      return(NULL)
 | 
						|
    }
 | 
						|
    
 | 
						|
    if (input$predModel == "random forest") {
 | 
						|
      tmp <- paste0(
 | 
						|
        "Train Accuracy: ", 
 | 
						|
        round(100 * mean(ok.predlab1()[!is.na(values$label)] == 
 | 
						|
                           values$label[!is.na(values$label)]), 1), 
 | 
						|
        "% ; Wtd. F1: ",
 | 
						|
        sprintf("%f", round(wtdF1(values$label[!is.na(values$label)],
 | 
						|
                                  ok.predlab1()[!is.na(values$label)]), 3)),
 | 
						|
        "%\nOOB Accuracy  : ", 
 | 
						|
        round(100 * mean( 
 | 
						|
          colnames(ok.train()$predictions)[apply(ok.train()$predictions, 1, which.max)] == 
 | 
						|
            values$label[!is.na(values$label)]), 1), 
 | 
						|
        "% ; Wtd. F1: ",
 | 
						|
        sprintf("%.3f", wtdF1(
 | 
						|
          values$label[!is.na(values$label)],
 | 
						|
          colnames(ok.train()$predictions)[apply(ok.train()$predictions, 1, which.max)]))
 | 
						|
      )
 | 
						|
    } else if (input$predModel %in% c("lasso", "linear", "naive bayes", "knn")) {
 | 
						|
      tmp <- paste0(
 | 
						|
        "Train Accuracy: ", 
 | 
						|
        round(100 *  mean(ok.predlab1()[!is.na(values$label)] == 
 | 
						|
                            values$label[!is.na(values$label)]), 1), 
 | 
						|
        "% ; Wtd. F1: ",
 | 
						|
        sprintf("%.3f", wtdF1(
 | 
						|
          values$label[!is.na(values$label)],
 | 
						|
          ok.predlab1()[!is.na(values$label)])))
 | 
						|
    }
 | 
						|
    values$modelDiagno <- tmp
 | 
						|
  })
 | 
						|
  
 | 
						|
  ## Update comment input field
 | 
						|
  observeEvent(values$newQuery, {
 | 
						|
    if (is.null(ok.text())) return(NULL)
 | 
						|
    if (verbose) 
 | 
						|
      cat("DEBUG update comment field\n")
 | 
						|
    updateTextInput(session, "currentComment", 
 | 
						|
                    value = values$comment[values$newQuery])
 | 
						|
  })
 | 
						|
  
 | 
						|
  
 | 
						|
  ## Save event: tag + comment in dedicated csv
 | 
						|
  observeEvent(values$tagTrigger, {
 | 
						|
    if (is.null(values$tagTrigger)) return(NULL)
 | 
						|
    if (verbose)
 | 
						|
      cat("DEBUG enter event tagTrigger", length(values$tagTrigger$id), "\n")
 | 
						|
    ready2tag(0) # Prevent new tagging until next query
 | 
						|
    dapath <- paste0(values$projectdir, ok.labelcol(), ".csv")
 | 
						|
    darows <- match(values$tagTrigger$id, ok.data()[, values$conf$idcol])
 | 
						|
    
 | 
						|
    export <- data.frame(values$tagTrigger$id, values$tagTrigger$tag)
 | 
						|
    colnames(export) <- c(values$conf$idcol, ok.labelcol())
 | 
						|
    if (verbose)
 | 
						|
      cat("DEBUG tagTrigger tag", 
 | 
						|
          paste(export[, 1], export[, 2], collapse = " ; "), "\n")
 | 
						|
    write.table(export, dapath, sep = ",", qmethod = "double", 
 | 
						|
                fileEncoding = "UTF-8", row.names = F,
 | 
						|
                append = ifelse(file.exists(dapath), T, F), 
 | 
						|
                col.names = ifelse(file.exists(dapath), F, T))
 | 
						|
    
 | 
						|
    if (any(nchar(values$tagTrigger$comment))) {
 | 
						|
      tmp_filter <- which(nchar(values$tagTrigger$comment) > 0)
 | 
						|
      dapath <- paste0(values$projectdir, values$conf$commcol, ".csv")
 | 
						|
      export <- data.frame(values$tagTrigger$id[tmp_filter], 
 | 
						|
                           values$tagTrigger$comment[tmp_filter])
 | 
						|
      colnames(export) <- c(values$conf$idcol, values$conf$commcol)
 | 
						|
      if (verbose)
 | 
						|
        cat("DEBUG tagTrigger comment", export[1,1], export[1, 2])
 | 
						|
      write.table(export, dapath, sep = ",", qmethod = "double", 
 | 
						|
                  fileEncoding = "UTF-8", row.names = F,
 | 
						|
                  append = ifelse(file.exists(dapath), T, F), 
 | 
						|
                  col.names = ifelse(file.exists(dapath), F, T))
 | 
						|
    }
 | 
						|
 | 
						|
    ## Update label hist
 | 
						|
    values$labelhist[match(values$tagTrigger$id, ok.data()[, values$conf$idcol])] <- 
 | 
						|
      max(values$labelhist, na.rm = T) + 1
 | 
						|
    ## Update wasTagged and wasRetagged
 | 
						|
    itag <- match(values$tagTrigger$id, ok.data()[, values$conf$idcol])
 | 
						|
    if (input$sampleChoice == "untagged") {
 | 
						|
      values$wasTagged[itag] <- TRUE
 | 
						|
    } else if (input$sampleChoice == "tagged") {
 | 
						|
      values$wasRetagged[itag] <- TRUE
 | 
						|
    } else if (input$sampleChoice == "all") {
 | 
						|
      if (is.na(values$label[itag])) {
 | 
						|
        values$wasTagged[itag] <- TRUE
 | 
						|
      } else 
 | 
						|
        values$wasRetagged[itag] <- TRUE
 | 
						|
    }
 | 
						|
    
 | 
						|
    ## Call queryNext event, or launch training
 | 
						|
    if (trainCountdown() == 1) {
 | 
						|
      trainCountdown(input$trainCountdown)
 | 
						|
      trainTrigger(trainTrigger() + 1)
 | 
						|
    } else if (trainCountdown() > 1) {
 | 
						|
      trainCountdown(trainCountdown() - 1)
 | 
						|
      queryNext(queryNext() + 1)
 | 
						|
    } else {
 | 
						|
      queryNext(queryNext() + 1)
 | 
						|
    }
 | 
						|
    
 | 
						|
    ## Update values$label etc
 | 
						|
    values$label[darows] <- values$tagTrigger$tag
 | 
						|
    values$queries <- c(values$queries, darows)
 | 
						|
    
 | 
						|
    if (verbose)
 | 
						|
      cat("DEBUG exit event tagTrigger", nrow(export), "rows\n")
 | 
						|
  })
 | 
						|
  
 | 
						|
  
 | 
						|
  ####################
 | 
						|
  ## Tagging main panel
 | 
						|
  
 | 
						|
  output$textVisuCols <- renderUI({
 | 
						|
    if (verbose) 
 | 
						|
      cat("DEBUG render text/visu panel\n")
 | 
						|
    leftsize <- 6
 | 
						|
    if (!input$panelText) leftsize <- 1
 | 
						|
    if (input$panelText & !input$panelVisu) leftsize <- 11
 | 
						|
    
 | 
						|
    fluidRow(
 | 
						|
      column(
 | 
						|
        leftsize, 
 | 
						|
        conditionalPanel(
 | 
						|
          "input.panelText",
 | 
						|
          wellPanel(htmlOutput("currentText"))
 | 
						|
        )
 | 
						|
      ), 
 | 
						|
      column(
 | 
						|
        12 - leftsize,
 | 
						|
        conditionalPanel(
 | 
						|
          "input.panelVisu",
 | 
						|
          fluidRow(
 | 
						|
            column(2, HTML(paste0('<div title="Compute new visualization">', 
 | 
						|
                                  actionButton("visuCompute", "🎲"), 
 | 
						|
                                  '</div>'))),
 | 
						|
            column(2, HTML(paste0('<div title="Zoom out/into selected area">', 
 | 
						|
                                  actionButton("visuGoZoom", "🔍±"), 
 | 
						|
                                  '</div>'))),
 | 
						|
            column(2, HTML(paste0('<div title="Query only from zoom area">', 
 | 
						|
                                  checkboxInput("visuLock", "🔒", value = FALSE), 
 | 
						|
                                  '</div>'))), 
 | 
						|
            column(2, HTML(paste0('<div title="Visualization options">', 
 | 
						|
                                  checkboxInput("visuOptions", "🔧"), 
 | 
						|
                                  '</div>'))), 
 | 
						|
            column(3, HTML(paste0('<div title="Plot size">', 
 | 
						|
                                  numericInput("visuHeight", NULL, 400, 20, 2e12, 10), 
 | 
						|
                                  '</div>')))
 | 
						|
          ),
 | 
						|
          conditionalPanel(
 | 
						|
            'input.visuOptions',
 | 
						|
            flowLayout(
 | 
						|
              selectInput(
 | 
						|
                "visuAlgo", "Visu. method", c("umap", "t-sne"), "umap", width = "100%"),
 | 
						|
              selectizeInput(
 | 
						|
                "visuSource", "Visu. source",
 | 
						|
                c("Word counts (DFM)" = "dfm", 
 | 
						|
                  "Word embeddings (FastText)" = "ft",
 | 
						|
                  "Sentence embeddings (SBERT)" = "sb", 
 | 
						|
                  "BERT predictions" = "bertpred", 
 | 
						|
                  "Regex" = "regex"),
 | 
						|
                c("sb", "ft"), multiple = TRUE)
 | 
						|
            )
 | 
						|
          ),
 | 
						|
          conditionalPanel( # umap options
 | 
						|
            'input.visuAlgo == "umap" & input.visuOptions', 
 | 
						|
            flowLayout(
 | 
						|
              numericInput(
 | 
						|
                "umapNeighb", "Nb. Neighbors", 15, 1, 500, 1), 
 | 
						|
              numericInput(
 | 
						|
                "umapSpread", "Spread", 1, 0, 16, 1e-2),
 | 
						|
              numericInput(
 | 
						|
                "umapMindist", "Min dist.", .001, 0, 16, 1e-3), 
 | 
						|
              selectInput(
 | 
						|
                "umapMetric", "Metric", selected = "cosine",
 | 
						|
                c("euclidean", "cosine", "manhattan", "hamming", 
 | 
						|
                  "correlation", "categorical")
 | 
						|
              )
 | 
						|
            )
 | 
						|
          ),
 | 
						|
          conditionalPanel( # t-SNE options
 | 
						|
            'input.visuAlgo == "t-sne" & input.visuOptions', 
 | 
						|
            flowLayout(
 | 
						|
              numericInput(
 | 
						|
                "tsnePerplex", "Perplexity", 15, 1, 500, 1), 
 | 
						|
              numericInput(
 | 
						|
                "tsneTheta", "Theta", .5, 0, 1, 1e-2),
 | 
						|
              numericInput(
 | 
						|
                "tsnePcaDim", "PCA dims", 50, 1, 100, 1)
 | 
						|
            )
 | 
						|
          ),
 | 
						|
          uiOutput("visuMsg"),
 | 
						|
          plotOutput(
 | 
						|
            "visuPlot", width = "100%",
 | 
						|
            dblclick = "visuDblclick",
 | 
						|
            brush = brushOpts("visuBrush", resetOnNew = T))
 | 
						|
        )
 | 
						|
      )
 | 
						|
    )
 | 
						|
  })
 | 
						|
  
 | 
						|
  ##################
 | 
						|
  ## Subpanel "Text"
 | 
						|
  ##################
 | 
						|
  
 | 
						|
  output$currentText <- renderUI({
 | 
						|
    if (verbose) 
 | 
						|
      cat("DEBUG render currentText\n")
 | 
						|
    if (is.null(ok.data()) | is.null(ok.nicetext()))
 | 
						|
      return(HTML("Import data to start training."))
 | 
						|
 | 
						|
    if (input$strategy %in% c("entropy", "maxprob") & is.null(ok.train()))
 | 
						|
        return(HTML("<em>No model: train one, or try 'random' or 'sequential' strategy</em>"))
 | 
						|
 | 
						|
    if (is.null(values$newQuery))
 | 
						|
      return(HTML("<em>No query</em>"))
 | 
						|
    
 | 
						|
    bastext <- ok.nicetext()[values$newQuery]
 | 
						|
    if (nchar(input$regexFilter) > 0)
 | 
						|
      bastext <- gsub(paste0("(", isolate(input$regexFilter), ")"), 
 | 
						|
                      "<b>\\1</b>", bastext, ignore.case = isolate(!input$regexCaseSens))
 | 
						|
    baslen <- nchar(bastext)
 | 
						|
    if (!length(baslen)) {
 | 
						|
      if (nchar(input$regexFilter) > 0) 
 | 
						|
        return(HTML("<b><em>Regex filter: no match</em></b>"))
 | 
						|
      return(HTML("<b><em>No text matches the filters</em></b>"))
 | 
						|
    }
 | 
						|
    if (is.na(baslen))
 | 
						|
      return(HTML("<em>No query</em>"))
 | 
						|
    if (baslen > 1200) {
 | 
						|
      bastext <- paste0(substring(bastext, 1, 1200), '<em title="> 512 tokens (not read by BERT)">',
 | 
						|
                        substring(bastext, 1201), "</em>")
 | 
						|
    }
 | 
						|
    bastext <- gsub("\n", "<br/>", bastext)
 | 
						|
    HTML(bastext)
 | 
						|
  })
 | 
						|
  
 | 
						|
  
 | 
						|
  
 | 
						|
  ##############################################################################
 | 
						|
  ## Panel "Visualize"
 | 
						|
  ##############################################################################
 | 
						|
  
 | 
						|
  output$visuMsg <- renderUI({
 | 
						|
    if (!is.null(values$visuMsg))
 | 
						|
      # strong(values$visuMsg)
 | 
						|
      h5(values$visuMsg, style = "text-align:center")
 | 
						|
  })
 | 
						|
  
 | 
						|
  ## Compute visualization points (async)
 | 
						|
  visu_nclicks <- reactiveVal(0)
 | 
						|
  ok.visu <- reactiveVal()
 | 
						|
  observe({
 | 
						|
    if (!input$panelVisu) 
 | 
						|
      values$visuGo <- FALSE
 | 
						|
    if (input$panelVisu & !values$visuGo) 
 | 
						|
      values$visuGo <- TRUE
 | 
						|
  })
 | 
						|
  observeEvent(list(values$visuGo, input$visuCompute), {
 | 
						|
    if (is.null(input$visuSource)) return(NULL)
 | 
						|
    if (!values$visuGo) return(NULL)
 | 
						|
    if (is.null(ok.data())) {
 | 
						|
      values$visuMsg <- "No data imported."
 | 
						|
      return(NULL)
 | 
						|
    }
 | 
						|
    
 | 
						|
    if (visu_nclicks() > 0) {
 | 
						|
      values$visuMsg <- "Already computing visualization"
 | 
						|
      return(NULL)
 | 
						|
    }
 | 
						|
    visu_nclicks(1)
 | 
						|
    
 | 
						|
    values$visuMsg <- "Computing visualization..."
 | 
						|
    
 | 
						|
    ## Prepare options and objects for async computation    
 | 
						|
    dasources <- input$visuSource
 | 
						|
    daalgo <- input$visuAlgo
 | 
						|
    
 | 
						|
    if (verbose) 
 | 
						|
      cat("DEBUG compute visu embedding: ", daalgo, paste(dasources, collapse = " "), "\n")
 | 
						|
    
 | 
						|
    visudat <- NULL
 | 
						|
    if ("dfm" %in% dasources) dadfm <- ok.dfm()
 | 
						|
    if ("ft" %in% dasources) {
 | 
						|
      visudat <- ok.data()[, values$cols_ft]
 | 
						|
    }
 | 
						|
    if ("sb" %in% dasources) {
 | 
						|
      if (is.null(visudat)) {
 | 
						|
        visudat <- ok.data()[, values$cols_sb]
 | 
						|
      } else 
 | 
						|
        visudat <- cbind(visudat, ok.data()[, values$cols_sb])
 | 
						|
    }
 | 
						|
    if ("bertpred" %in% dasources) {
 | 
						|
      if (is.null(visudat)) {
 | 
						|
        visudat <- ok.data()[, do.call(c, values$cols_bertpred)]
 | 
						|
      } else 
 | 
						|
        visudat <- cbind(visudat, ok.data()[, do.call(c, values$cols_bertpred)])
 | 
						|
    }
 | 
						|
    if ("regex" %in% dasources) {
 | 
						|
      if (is.null(visudat)) {
 | 
						|
        visudat <- ok.extraRegex.matrix()
 | 
						|
      } else 
 | 
						|
        visudat <- cbind(visudat, ok.extraRegex.matrix())
 | 
						|
    }
 | 
						|
    if (daalgo == "umap") {
 | 
						|
      visu_args <- list(
 | 
						|
        n_neighbors = input$umapNeighb, metric = input$umapMetric, 
 | 
						|
        spread = input$umapSpread, min_dist = input$umapMindist)
 | 
						|
    } else if (daalgo == "t-sne") {
 | 
						|
      visu_args <- list(
 | 
						|
        perplexity = input$tsnePerplex, theta = input$tsneTheta,
 | 
						|
        initial_dims = input$tsnePcaDim, check_duplicates = FALSE)
 | 
						|
    }
 | 
						|
    
 | 
						|
    visu_async <- future(seed = TRUE, {
 | 
						|
      if ("dfm" %in% dasources) {
 | 
						|
        ## TODO: CA treatment buggy, switch to SVD?
 | 
						|
        # da_ca <- quanteda.textmodels::textmodel_ca(
 | 
						|
        #   quanteda::as.dfm(dadfm), nd = min(100, nrow(dadfm)))
 | 
						|
        # da_ca <- (da_ca$rowcoord %*% diag(da_ca$sv))[, !is.na(da_ca$sv)]
 | 
						|
        da_ca <- as.matrix(dadfm)
 | 
						|
        if (is.null(visudat)) {
 | 
						|
          visudat <- da_ca
 | 
						|
        } else 
 | 
						|
          visudat <- cbind(da_ca, visudat)
 | 
						|
      }
 | 
						|
      
 | 
						|
      if (daalgo == "umap") {
 | 
						|
        do.call(uwot::umap, c(list(visudat), visu_args))
 | 
						|
      } else if (daalgo == "t-sne") {
 | 
						|
        visudat_dupli <- duplicated(visudat)
 | 
						|
        if (any(visudat_dupli))
 | 
						|
          visudat[visudat_dupli, ] <- jitter(visudat[visudat_dupli, ])
 | 
						|
        do.call(Rtsne::Rtsne, c(list(visudat), visu_args))$Y
 | 
						|
      }
 | 
						|
    }) %...>% ok.visu()
 | 
						|
    
 | 
						|
    visu_async <- catch(visu_async, function(error) {
 | 
						|
      values$visuMsg <- paste0("Error: ", error)
 | 
						|
    })
 | 
						|
    visu_async <- finally(visu_async, function() {
 | 
						|
      visu_nclicks(0)
 | 
						|
      if (input$visuGoZoom == 0) {
 | 
						|
        values$visuMsg <- "Double-click to select, drag to select zooming zone."
 | 
						|
      } else
 | 
						|
        values$visuMsg <- NULL
 | 
						|
      
 | 
						|
      nice_choices <- c("FastText" = "ft", "SBERT" = "sb", "visu" = daalgo)
 | 
						|
      names(nice_choices)[3] <- ifelse(daalgo == "t-sne", "t-SNE", "UMAP")
 | 
						|
      updateSelectInput(
 | 
						|
        session, "dlEmbedSelect", choices = nice_choices, selected = daalgo)
 | 
						|
      
 | 
						|
      if (verbose) 
 | 
						|
        cat("DEBUG Visu async out\n")
 | 
						|
    })
 | 
						|
    
 | 
						|
    NULL
 | 
						|
  })
 | 
						|
  
 | 
						|
  ## Compute visualization plot (async)
 | 
						|
  ok.ggvisu <- reactiveVal()
 | 
						|
  observe({
 | 
						|
    # if (is.null(ok.visu())) return(NULL)
 | 
						|
    if (is.null(ok.visu())) {
 | 
						|
      ok.ggvisu(NULL)
 | 
						|
      return(NULL)
 | 
						|
    }
 | 
						|
    # if (!isTRUE(values$visugo)) return(NULL)
 | 
						|
    if (!input$panelVisu) return(NULL)
 | 
						|
    
 | 
						|
    if (verbose) 
 | 
						|
      cat("DEBUG compute visu plot\n")
 | 
						|
    ggdat <- data.frame(ok.visu(), tag = values$label)
 | 
						|
    if (!is.null(ok.predlab1())) 
 | 
						|
      ggdat$tag[is.na(values$label)] <- ok.predlab1()[is.na(values$label)]
 | 
						|
    da_predlabs <- ok.predlab1()
 | 
						|
    da_tagged <- ! is.na(values$label)
 | 
						|
    da_query <- values$newQuery
 | 
						|
    
 | 
						|
    ggvisu_async <- future({
 | 
						|
      ggres <- ggplot(ggdat, aes(X1, X2)) + theme_bw() + 
 | 
						|
        xlab("") + ylab("") + 
 | 
						|
        theme(legend.position = "bottom") +
 | 
						|
        scale_shape_manual(values = rep(1:8, 40)) # TODO : adapt to tags
 | 
						|
      
 | 
						|
      ## Tagged points      
 | 
						|
      if (any(da_tagged)) {
 | 
						|
        ggres <- ggres + 
 | 
						|
          geom_point(data = ggdat[da_tagged, ], 
 | 
						|
                     aes(X1, X2, color = tag, shape = tag),
 | 
						|
                     size = 1.5, stroke = 1.5, alpha = .8)
 | 
						|
      }
 | 
						|
      
 | 
						|
      ## Not tagged points
 | 
						|
      if (!is.null(da_predlabs)) {
 | 
						|
        ggres <- ggres + 
 | 
						|
          geom_point(data = ggdat[!da_tagged, ],
 | 
						|
                     aes(color = tag, shape = tag), show.legend = FALSE)
 | 
						|
      } else {
 | 
						|
        ggres <- ggres + 
 | 
						|
          geom_point(alpha = .8, data = ggdat[is.na(ggdat$tag), ])
 | 
						|
      }
 | 
						|
      
 | 
						|
      ## Current query
 | 
						|
      if (!is.null(da_query)) {
 | 
						|
        if (is.null(da_predlabs)) {
 | 
						|
          ggres <- ggres + 
 | 
						|
            geom_point(data = ggdat[da_query, ], color = 2,
 | 
						|
                       stroke = 3, shape = 3, show.legend = FALSE)
 | 
						|
        } else {
 | 
						|
          ggres <- ggres + 
 | 
						|
            geom_point(data = ggdat[da_query, ], 
 | 
						|
                       stroke = 3, shape = 3, show.legend = FALSE)
 | 
						|
        }
 | 
						|
      }
 | 
						|
      ggres
 | 
						|
    }) %...>% ok.ggvisu()
 | 
						|
    
 | 
						|
    ggvisu_async <- catch(ggvisu_async, function(error) {
 | 
						|
      values$visuMsg <- paste0("Error in ggplot: ", error)
 | 
						|
    })
 | 
						|
    
 | 
						|
  })
 | 
						|
  
 | 
						|
  observe({
 | 
						|
    if (verbose) 
 | 
						|
      cat("DEBUG update visuPlot\n")
 | 
						|
    if (is.null(ok.ggvisu())) {
 | 
						|
      values$visuMsg <- "No visualization yet."
 | 
						|
      output$visuPlot <- NULL
 | 
						|
      values$visuZoom$xlim <- NULL
 | 
						|
      values$visuZoom$ylim <- NULL
 | 
						|
      return(NULL)
 | 
						|
    }
 | 
						|
    output$visuPlot <- renderPlot(height = input$visuHeight, {
 | 
						|
      # if (is.null(ok.ggvisu())) return(NULL)
 | 
						|
      ok.ggvisu() + 
 | 
						|
        coord_fixed(xlim = values$visuZoom$xlim, ylim = values$visuZoom$ylim)
 | 
						|
    })
 | 
						|
  })
 | 
						|
  
 | 
						|
  observeEvent(input$visuDblclick, {
 | 
						|
    if (!is.null(values$visuMsg))
 | 
						|
      values$visuMsg <- NULL
 | 
						|
    tmp <- input$visuDblclick
 | 
						|
    bmu <- t(ok.visu()) - c(tmp$x, tmp$y)
 | 
						|
    bmu <- which.min(colSums(bmu^2))
 | 
						|
    values$lastQuery <- bmu
 | 
						|
    values$newQuery <- bmu
 | 
						|
    ready2tag(1)
 | 
						|
  })
 | 
						|
  
 | 
						|
  observeEvent(input$visuGoZoom, {
 | 
						|
    if (!is.null(values$visuMsg))
 | 
						|
      values$visuMsg <- NULL
 | 
						|
    values$visuZoom$xlim <- c(input$visuBrush$xmin, input$visuBrush$xmax)
 | 
						|
    values$visuZoom$ylim <- c(input$visuBrush$ymin, input$visuBrush$ymax)
 | 
						|
  })
 | 
						|
  
 | 
						|
  
 | 
						|
  ##############################################################################
 | 
						|
  ## Panel History
 | 
						|
  ##############################################################################
 | 
						|
  
 | 
						|
  ## Create history link and retag events as they appear
 | 
						|
  observeEvent(input$histDTable_rows_current, {
 | 
						|
    histrows <- order(values$labelhist)
 | 
						|
    histrows <- rev(histrows[!is.na(values$labelhist[histrows])])
 | 
						|
    histrows <- histrows[!is.na(values$label[histrows])]
 | 
						|
    histrows <- histrows[input$histDTable_rows_current]
 | 
						|
    notthere <- histrows[! histrows %in% values$histQueries]
 | 
						|
    
 | 
						|
    if (!length(notthere)) return(NULL)
 | 
						|
    lapply(notthere, function(daquery) {
 | 
						|
      if (verbose) cat("DEBUG create history events", daquery, "\n")
 | 
						|
        observeEvent(input[[paste0("hist", daquery)]], {
 | 
						|
          if (verbose) cat("DEBUG history link", daquery, "\n")
 | 
						|
          values$lastQuery <- daquery
 | 
						|
          values$newQuery <- daquery
 | 
						|
          updateTabsetPanel(session, "mainPanelset", selected = "Tagging")
 | 
						|
          ready2tag(1)
 | 
						|
        })
 | 
						|
 | 
						|
        observeEvent(input[[paste0("histTag", daquery)]], {
 | 
						|
          if (verbose) cat("DEBUG history retag", daquery, "\n")
 | 
						|
          values$histSaveStack <- rbind(
 | 
						|
            values$histSaveStack, 
 | 
						|
            data.frame(id = ok.data()[daquery, values$conf$idcol], 
 | 
						|
                       tag = input[[paste0("histTag", daquery)]], 
 | 
						|
                       comment = values$comment[daquery])
 | 
						|
          )
 | 
						|
        })
 | 
						|
    })
 | 
						|
    values$histQueries <- c(values$histQueries, notthere)
 | 
						|
  })
 | 
						|
  
 | 
						|
  ## History save action
 | 
						|
  observeEvent(input$histSave, {
 | 
						|
    req(is.data.frame(values$histSaveStack))
 | 
						|
    values$tagTrigger <- as.list(values$histSaveStack)
 | 
						|
    values$label[match(values$histSaveStack$id, ok.data()[[values$conf$idcol]])] <- 
 | 
						|
      values$histSaveStack$tag
 | 
						|
    values$histSaveStack <- NULL
 | 
						|
  })
 | 
						|
  
 | 
						|
  ## Update save name with number of unsaved changes
 | 
						|
  observeEvent(length(values$histSaveStack$id), {
 | 
						|
    updateActionButton(
 | 
						|
      session, "histSave", 
 | 
						|
      label = paste0("Save changes (", length(unique(values$histSaveStack$id)),")"))
 | 
						|
  })
 | 
						|
  
 | 
						|
  ## History table, with clickable links and retagging
 | 
						|
  output$histDTable <- DT::renderDataTable({
 | 
						|
    histrows <- order(values$labelhist)
 | 
						|
    histrows <- rev(histrows[!is.na(values$labelhist[histrows])])
 | 
						|
    histrows <- histrows[!is.na(values$label[histrows])]
 | 
						|
 | 
						|
    # histrows <- rev(values$queries)
 | 
						|
    hist_df <- data.frame(
 | 
						|
      # history = length(histrows):1,
 | 
						|
      history = values$labelhist[histrows],
 | 
						|
      id = sapply(histrows, function(irow) {
 | 
						|
        as.character(actionLink(
 | 
						|
          paste0("hist", irow), 
 | 
						|
          label = ok.data()[irow, values$conf$idcol], 
 | 
						|
          onclick = paste0('Shiny.onInputChange(\"hist', irow, '\",  this.id)')))
 | 
						|
      }),
 | 
						|
      Tag = values$label[histrows],
 | 
						|
      Retag = sapply(histrows, function(irow) {
 | 
						|
        paste0(
 | 
						|
          "<select id=\"histTag", irow, "\" onchange=\"Shiny.onInputChange('histTag", irow, "', this.value)\">", 
 | 
						|
          paste0("<option value=\"", values$uniqueLabels, "\" ", 
 | 
						|
                 ifelse(values$uniqueLabels == values$label[irow], "selected", ""),
 | 
						|
                 ">", values$uniqueLabels, "</option>", collapse = " "),
 | 
						|
          "</select>"
 | 
						|
        )
 | 
						|
      }),
 | 
						|
      Comment = values$comment[histrows],
 | 
						|
      Text = ok.nicetext()[histrows])
 | 
						|
    if (!is.null(values$conf$contextcols))
 | 
						|
      hist_df <- cbind(hist_df,
 | 
						|
                       ok.data()[histrows, values$conf$contextcols])
 | 
						|
    
 | 
						|
    DT::datatable(hist_df, escape = FALSE, rownames = FALSE, filter = "top", 
 | 
						|
                  options = list(
 | 
						|
                    columnDefs = list(list(
 | 
						|
                      targets = 5,
 | 
						|
                      render = DT::JS(
 | 
						|
                        "function(data, type, row, meta) {",
 | 
						|
                        "return type === 'display' && data.length > 200 ?",
 | 
						|
                        "'<span title=\"' + data + '\">' + data.substr(0, 200) + '...</span>' : data;",
 | 
						|
                        "}")))
 | 
						|
                  ))
 | 
						|
  })
 | 
						|
  
 | 
						|
  ##############################################################################
 | 
						|
  ## Panel "Stats"
 | 
						|
  ##############################################################################
 | 
						|
  
 | 
						|
  output$statsTagTable <- renderTable(rownames = F, {
 | 
						|
    table(tag = values$label)
 | 
						|
  })
 | 
						|
  
 | 
						|
  
 | 
						|
  ## Model diagnostics by CV
 | 
						|
  output$statsCVoutput <- reactive({
 | 
						|
    if (is.null(values$modelcv))
 | 
						|
      return("Model diagnostics will appear here")
 | 
						|
    values$modelcv
 | 
						|
  })
 | 
						|
  
 | 
						|
  output$statsCVtable <- DT::renderDataTable({
 | 
						|
    if (is.null(values$cvtable))
 | 
						|
      return(NULL)
 | 
						|
    DT::datatable(values$cvtable, list(pageLength = 25), rownames = F)
 | 
						|
  })
 | 
						|
  
 | 
						|
  ## Button action: compute CV (async)
 | 
						|
  cv_nclicks <- reactiveVal(0)
 | 
						|
  observeEvent(input$statsCVgo, {
 | 
						|
    if (is.null(ok.train())) {
 | 
						|
      values$modelcv <- "Error in model training"
 | 
						|
      return(NULL)
 | 
						|
    }
 | 
						|
    if (cv_nclicks() != 0) {
 | 
						|
      values$modelcv <- "CV already running"
 | 
						|
      return(NULL)
 | 
						|
    }
 | 
						|
    cv_nclicks(cv_nclicks() + 1)
 | 
						|
    
 | 
						|
    predmat <- ok.predictor()[!is.na(values$label), ]
 | 
						|
    predlab <- values$label[!is.na(values$label)]
 | 
						|
    cvsamp <- sample(10, nrow(predmat), replace = TRUE)
 | 
						|
    cvres <- rep(NA, nrow(predmat))
 | 
						|
    cvres_future <- reactiveVal()
 | 
						|
    
 | 
						|
    tmp_model <- input$predModel
 | 
						|
    if (tmp_model == "random forest") {
 | 
						|
      cv_args <- list(
 | 
						|
        num.trees = input$rfNumTrees, mtry = if (input$rfMtry > 0) input$rfMtry,
 | 
						|
        sample.fraction = input$rfSampleFrac, probability = FALSE)
 | 
						|
    } else if (tmp_model == "linear") {
 | 
						|
      cv_args <- list(type= 0, cost = input$liblinCost)
 | 
						|
    } else if (tmp_model == "lasso") {
 | 
						|
      cv_args <- input$glmLambda
 | 
						|
    } else if (tmp_model == "naive bayes") {
 | 
						|
      cv_args <- list(smooth = input$naiveSmooth, prior = input$naivePrior, 
 | 
						|
                      distribution = input$naiveDistri)
 | 
						|
    } else if (tmp_model == "knn") {
 | 
						|
      cv_args <- list(k = input$knnK)
 | 
						|
    }
 | 
						|
    
 | 
						|
    cv_status_file <- paste0(session$token, "_cv")
 | 
						|
    
 | 
						|
    cv_async <- future(seed = TRUE, {
 | 
						|
      for (icv in 1:10) {
 | 
						|
        set_status(cv_status_file, paste0("Computing CV fold ", icv, " / 10"))
 | 
						|
        if (tmp_model == "random forest") {
 | 
						|
          cvmodel <- do.call(ranger::ranger, c(cv_args, list(
 | 
						|
            x= predmat[cvsamp != icv, ], 
 | 
						|
            y= as.factor(predlab[cvsamp != icv]))))
 | 
						|
          cvres[cvsamp == icv] <- as.character(
 | 
						|
            predict(cvmodel, data= predmat[cvsamp == icv, ])$predictions)
 | 
						|
        } else {
 | 
						|
          tmpvalues <- predlab[cvsamp != icv]
 | 
						|
          # for (ival in na.omit(unique(tmpvalues)))
 | 
						|
          #   if (sum(tmpvalues == ival, na.rm = T) == 1)
 | 
						|
          #     tmpvalues[which(tmpvalues == ival)] <- NA
 | 
						|
          
 | 
						|
          if (tmp_model == "lasso") {
 | 
						|
            cvmodel <- suppressWarnings(
 | 
						|
              glmnet::glmnet(x= predmat[cvsamp != icv, ][!is.na(tmpvalues), ], 
 | 
						|
                             y= as.factor(tmpvalues[!is.na(tmpvalues)]), 
 | 
						|
                             family= "multinomial"))
 | 
						|
            cvres[cvsamp == icv] <- predict(
 | 
						|
              cvmodel, newx = predmat[cvsamp == icv, ], 
 | 
						|
              type = "class", s= cv_args)
 | 
						|
          } else if (tmp_model == "linear") {
 | 
						|
            cvmodel <- do.call(LiblineaR::LiblineaR, c(
 | 
						|
              list(data= predmat[cvsamp != icv, ][!is.na(tmpvalues), ], 
 | 
						|
                   target= as.factor(tmpvalues[!is.na(tmpvalues)])), 
 | 
						|
              cv_args))
 | 
						|
            cvres[cvsamp == icv] <- as.character(
 | 
						|
              predict(cvmodel, newx= predmat[cvsamp == icv, ])$predictions)
 | 
						|
          } else if (tmp_model == "naive bayes") {
 | 
						|
            cvmodel <- do.call(quanteda.textmodels::textmodel_nb, c(
 | 
						|
              list(x= predmat[cvsamp != icv, ][!is.na(tmpvalues), ], 
 | 
						|
                   y= as.factor(tmpvalues[!is.na(tmpvalues)])), 
 | 
						|
              cv_args))
 | 
						|
            cvres[cvsamp == icv] <- as.character(
 | 
						|
              predict(
 | 
						|
                cvmodel, newdata= predmat[cvsamp == icv, ], type= "class"))
 | 
						|
          } else if (isolate(tmp_model) == "knn") {
 | 
						|
            cvres[cvsamp == icv] <- 
 | 
						|
              as.character(do.call(class::knn, c(cv_args, list(
 | 
						|
                train = predmat[cvsamp != icv, ], 
 | 
						|
                test = predmat[cvsamp == icv, ], 
 | 
						|
                cl = predlab[cvsamp != icv]))))
 | 
						|
          }
 | 
						|
        }
 | 
						|
      }
 | 
						|
      cvres
 | 
						|
    }) %...>% cvres_future()
 | 
						|
    
 | 
						|
    cv_async <- catch(cv_async, function(e) {
 | 
						|
      cvres_future(NULL)
 | 
						|
      values$modelcv <- e$message
 | 
						|
    })
 | 
						|
    
 | 
						|
    cv_async <- finally(cv_async, function() {
 | 
						|
      cv_nclicks(0)
 | 
						|
      if (file.exists(cv_status_file)) unlink(cv_status_file)
 | 
						|
      if (is.null(cvres_future())) return(NULL)
 | 
						|
      cvres <- cvres_future()
 | 
						|
      predlab <- values$label[!is.na(values$label)]
 | 
						|
      dametrics <- metricsTable(predlab, cvres)
 | 
						|
      values$cvtable <- dametrics
 | 
						|
      res <- "10-CV OOB evaluation:\n"
 | 
						|
      # if (any(grepl("^bertpred", input$use_regressors)))
 | 
						|
      if (any(grepl("^bertpred", values$new_use_regressors)))
 | 
						|
        res <- paste0(
 | 
						|
          res, 
 | 
						|
          "⚠️  Scores are biased upwards if BERT predictions were trained on current data ⚠️ \n")
 | 
						|
      res <- paste0(
 | 
						|
        res, 
 | 
						|
        "* Accuracy    : ", round(100 * mean(cvres == predlab), 2), "%\n", 
 | 
						|
        "* Macro F1    : ", sprintf("%f", mean(as.numeric(dametrics$F1))), "\n", 
 | 
						|
        "* Weighted F1 : ", sprintf("%f", weighted.mean(as.numeric(dametrics$F1), 
 | 
						|
                                                        dametrics$N.cases)), "\n"
 | 
						|
      )
 | 
						|
      values$modelcv <- res
 | 
						|
    })
 | 
						|
    
 | 
						|
    NULL
 | 
						|
  })
 | 
						|
  
 | 
						|
  ## Automatic message update during CV computation
 | 
						|
  observe({
 | 
						|
    if (cv_nclicks() == 0) return(NULL)
 | 
						|
    invalidateLater(200)
 | 
						|
    if (!file.exists(paste0(session$token, "_cv"))) return(NULL)
 | 
						|
    values$modelcv <- get_status(paste0(session$token, "_cv"))
 | 
						|
    values$cvtable <- NULL
 | 
						|
  })
 | 
						|
  
 | 
						|
  
 | 
						|
  ##############################################################################
 | 
						|
  ## Panel BERT
 | 
						|
  ##############################################################################
 | 
						|
  
 | 
						|
  ##########
 | 
						|
  ## Bert UI left panel
 | 
						|
  
 | 
						|
  ## Update list of saved models and dlSelect on init and on saveTrigger
 | 
						|
  
 | 
						|
  observeEvent(list(ok.data(), values$bertSaveTrigger), {
 | 
						|
    damodels <- dir(pattern = "^tiggerbert_saved_")
 | 
						|
    if (!length(damodels)) return(NULL)
 | 
						|
    damodels <- gsub("^tiggerbert_saved_", "", damodels)
 | 
						|
    values$bertSaved <- damodels
 | 
						|
    updateSelectInput(session, "dlBPSelect", choices = damodels)
 | 
						|
  })
 | 
						|
  
 | 
						|
  ## Populate saved models list, with buttons
 | 
						|
  
 | 
						|
  output$bertSaved <- renderUI({
 | 
						|
    if (verbose) 
 | 
						|
      cat("DEBUG bert update saved list\n")
 | 
						|
    HTML(paste0(
 | 
						|
      "<table><tbody>", 
 | 
						|
      paste0(sapply(values$bertSaved, function(dabert) {
 | 
						|
        bertshort <- dabert
 | 
						|
        if (nchar(bertshort) > 23) 
 | 
						|
          bertshort <- paste0(substr(bertshort, 1, 20), "...")
 | 
						|
        paste0(
 | 
						|
          "<tr><td>", 
 | 
						|
          actionLink(paste0("saved_", dabert), bertshort, title = dabert, 
 | 
						|
                     style="margin-right: 5px;"), "</td><td>", 
 | 
						|
          actionButton(paste0("bert_copypars_", dabert), "📋", 
 | 
						|
                       title = "Copy parameters"), "</td><td>",
 | 
						|
          actionButton(paste0("bert_infer_", dabert), "🤖", 
 | 
						|
                       title = "Infer predictions on current data"), "</td><td>",
 | 
						|
          actionButton(paste0("bert_delete_", dabert), "🗑", 
 | 
						|
                       title = "Delete model"), 
 | 
						|
          "</td></tr>")
 | 
						|
      }), collapse = "\n"),
 | 
						|
      "</tbody></table>"
 | 
						|
    ))
 | 
						|
  })
 | 
						|
  
 | 
						|
  ##########
 | 
						|
  ## Bert UI main panel
 | 
						|
  
 | 
						|
  ## UI outputs : message and stats table and plot
 | 
						|
  output$bertMsg <- reactive(
 | 
						|
    ifelse(is.null(values$bertMsg), 
 | 
						|
           "Train new BERT, or select saved model.", 
 | 
						|
           values$bertMsg))
 | 
						|
  
 | 
						|
  output$bertValstats <- DT::renderDataTable(
 | 
						|
    DT::datatable(values$bertValstats, list(pageLength = 25)))
 | 
						|
  
 | 
						|
  output$bertValPlot <- renderPlot(values$bertValPlot)
 | 
						|
  
 | 
						|
  output$bertMsgHyperpar <- reactive(values$bertMsgHyperpar)
 | 
						|
  
 | 
						|
  ##################################
 | 
						|
  ## Async handling of BERT training
 | 
						|
  
 | 
						|
  ## Status File
 | 
						|
  bert_status_file <- paste0("tiggerbert_", session$token, "_trainstatus")
 | 
						|
  
 | 
						|
  fire_interrupt <- function(dafile) set_status(dafile, "interrupt")
 | 
						|
  fire_ready <- function(dafile) set_status(dafile, "Ready")
 | 
						|
  fire_running <- function(dafile, perc_complete = NULL){
 | 
						|
    if(is.null(perc_complete))
 | 
						|
      msg <- "Training..."
 | 
						|
    else
 | 
						|
      msg <- paste0("Training... ", perc_complete, "% complete")
 | 
						|
    set_status(dafile, msg)
 | 
						|
  }
 | 
						|
  interrupted <- function(dafile) get_status(dafile) == "interrupt"
 | 
						|
  
 | 
						|
  ## Create Status File
 | 
						|
  fire_ready(bert_status_file)
 | 
						|
  
 | 
						|
  
 | 
						|
  ## Button action preparation
 | 
						|
  bert_nclicks <- reactiveVal(0)
 | 
						|
  bert_result_val <- reactiveVal()
 | 
						|
  bert_hyperpars <- reactiveVal()
 | 
						|
 | 
						|
  ## Button appearance: write "Stop" if training is ongoing
 | 
						|
  observeEvent(bert_nclicks(), {
 | 
						|
    updateActionButton(session, "bertTrain", 
 | 
						|
                       label = ifelse(bert_nclicks() > 0, "Stop", "Train BERT"))
 | 
						|
  })
 | 
						|
  
 | 
						|
  ## Button action: Train new BERT
 | 
						|
  observeEvent(input$bertTrain,{
 | 
						|
    
 | 
						|
    ## Don't do anything if analysis is already being run
 | 
						|
    # if(bert_nclicks() != 0){
 | 
						|
    #   values$bertMsg <- "BERT is already training"
 | 
						|
    #   return(NULL)
 | 
						|
    # }
 | 
						|
    tmpsession <- paste0("tiggerbert_", ok.labelcol(), "_", session$token)
 | 
						|
    if(bert_nclicks() != 0){
 | 
						|
      write("stop", paste0(tmpsession, "_stop"))
 | 
						|
      bert_nclicks(0)
 | 
						|
      return(NULL)
 | 
						|
    }
 | 
						|
    if (file.exists(paste0(tmpsession, "_stop")))
 | 
						|
      file.remove(paste0(tmpsession, "_stop"))
 | 
						|
 | 
						|
    ## Increment clicks and prevent concurrent analyses
 | 
						|
    bert_nclicks(bert_nclicks() + 1)
 | 
						|
    
 | 
						|
    bert_result_val(data.frame(Status="Running..."))
 | 
						|
    fire_running(bert_status_file)
 | 
						|
    
 | 
						|
    ## Prepare async training
 | 
						|
    values$bertLoadSaved <- NULL
 | 
						|
    # tmpsession <- paste0("tiggerbert_", ok.labelcol(), "_", session$token)
 | 
						|
    tmpmodel <- input$bertModel
 | 
						|
    system(paste0("rm ", tmpsession, "_done"))
 | 
						|
    
 | 
						|
    tmpdat <- data.frame(
 | 
						|
      id = ok.data()[, values$conf$idcol],
 | 
						|
      text = ok.nicetext(),
 | 
						|
      label = values$label
 | 
						|
    )
 | 
						|
    tmpdat <- tmpdat[!is.na(tmpdat$label), ]
 | 
						|
    tmpdat <- tmpdat[grepl("\\S", tmpdat$text), ]
 | 
						|
    
 | 
						|
    ## Filter data: min occurences, balance classes, validation split
 | 
						|
    tmplabtable <- table(tmpdat$label)
 | 
						|
    if (is.numeric(input$bertMinOccur)) if (input$bertMinOccur > 1) {
 | 
						|
      tmpdat <- tmpdat[tmpdat$label %in% 
 | 
						|
                         names(which(tmplabtable >= input$bertMinOccur)), ]
 | 
						|
      tmplabtable <- table(tmpdat$label)
 | 
						|
    }
 | 
						|
    set.seed(input$bertValidSeed)
 | 
						|
    if (input$bertBalance) {
 | 
						|
      tmpminocc <- min(tmplabtable)
 | 
						|
      tmptrain <- sample(do.call(c, lapply(unique(tmpdat$label), function(dalabel) 
 | 
						|
        sample(which(tmpdat$label == dalabel), round((1-input$bertValidFrac) * tmpminocc)))))
 | 
						|
      tmpvalid <- sample(do.call(c, lapply(unique(tmpdat$label), function(dalabel) {
 | 
						|
        dasamp <- which(tmpdat$label == dalabel)
 | 
						|
        dasamp <- dasamp[! dasamp %in% tmptrain]
 | 
						|
        sample(dasamp, round(input$bertValidFrac * tmpminocc))
 | 
						|
      })))
 | 
						|
    } else {
 | 
						|
      tmptrain <- sample(1:nrow(tmpdat), round((1 - input$bertValidFrac) * nrow(tmpdat)))
 | 
						|
      tmpvalid <- (1:nrow(tmpdat))[-tmptrain]
 | 
						|
    }
 | 
						|
    
 | 
						|
    write.csv(tmpdat[tmptrain, ], paste0(tmpsession, "_dat_train.csv"), row.names = FALSE)
 | 
						|
    write.csv(tmpdat[tmpvalid, ], paste0(tmpsession, "_dat_valid.csv"), row.names = FALSE)
 | 
						|
    
 | 
						|
    iwait <- 0
 | 
						|
    while (iwait < 10) {
 | 
						|
      if (!file.exists(paste0(tmpsession, "_dat_valid.csv"))) Sys.sleep(500)
 | 
						|
      iwait <- iwait + 1
 | 
						|
    }
 | 
						|
    
 | 
						|
    tmp_cmd <- paste0(
 | 
						|
      values$conf$python, " gobert.py -m ", tmpmodel,
 | 
						|
      " -t ", tmpsession, "_dat_train.csv",
 | 
						|
      " -v ", tmpsession, "_dat_valid.csv",
 | 
						|
      " -b ", input$bertBatchsize,
 | 
						|
      " -g ", input$bertGradacc,
 | 
						|
      " -e ", input$bertEpochs,
 | 
						|
      " -l ", input$bertLrate,
 | 
						|
      " -w ", input$bertWdecay,
 | 
						|
      " -B ", ifelse(input$bertBest, 1, 0),
 | 
						|
      " -E ", input$bertNeval,
 | 
						|
      " -s ", tmpsession,
 | 
						|
      " -G ", as.numeric(values$conf$use_gpu),
 | 
						|
      " -A ", as.numeric(input$bertAdapt),
 | 
						|
      " 2>&1 ; touch ", tmpsession, "_done"
 | 
						|
    )
 | 
						|
    
 | 
						|
    ## Save hyperpars
 | 
						|
    bert_hyperpars(list(bertModel = input$bertModel, 
 | 
						|
                        bertEpochs = input$bertEpochs, 
 | 
						|
                        bertLrate = input$bertLrate, 
 | 
						|
                        bertWdecay = input$bertWdecay, 
 | 
						|
                        bertBatchsize = input$bertBatchsize, 
 | 
						|
                        bertGradacc = input$bertGradacc, 
 | 
						|
                        bertNeval = input$bertNeval, 
 | 
						|
                        bertBest = input$bertBest, 
 | 
						|
                        bertValidFrac = input$bertValidFrac, 
 | 
						|
                        bertValidSeed = input$bertValidSeed, 
 | 
						|
                        bertNtrain = length(tmptrain), 
 | 
						|
                        bertTrainTable = as.list(table(tmpdat$label[tmptrain])), 
 | 
						|
                        bertScheme = ok.labelcol()))
 | 
						|
    
 | 
						|
    ## Launch async training
 | 
						|
    bert_async <- future({
 | 
						|
      if (file.exists(tmpsession))
 | 
						|
        system(paste0("rm -R ", tmpsession, "*"))
 | 
						|
      
 | 
						|
      # system(tmp_cmd, wait = FALSE, intern = FALSE)
 | 
						|
      tmp_exec <- system(tmp_cmd, wait = FALSE, intern = TRUE)
 | 
						|
      writeLines(tmp_exec, paste0(tmpsession, "_log"))
 | 
						|
      
 | 
						|
      ## After training, get trainer state from last checkpoint for diagnostics
 | 
						|
      chkpt <- dir(paste0(tmpsession, "_train"), pattern = "^checkpoint")
 | 
						|
      chkpt <-
 | 
						|
        chkpt[order(as.numeric(gsub("\\D", "", chkpt)), decreasing = T)[1]]
 | 
						|
      system(paste0(
 | 
						|
        "cp ", tmpsession, "_train/", chkpt, "/trainer_state.json ",
 | 
						|
        tmpsession, "/"))
 | 
						|
    }) %...>% bert_result_val()
 | 
						|
    
 | 
						|
    ## Catch interrupt (or any other error) and notify user
 | 
						|
    bert_async <- catch(bert_async, function(e){
 | 
						|
      bert_result_val(NULL)
 | 
						|
      showNotification(e$message)
 | 
						|
      values$bertMsg <- e$message
 | 
						|
    })
 | 
						|
    
 | 
						|
    ## After the promise has been evaluated set nclicks to 0 to allow another Run
 | 
						|
    bert_async <- finally(bert_async, function() {
 | 
						|
      system(paste0("rm -R ", tmpsession, "_train"))
 | 
						|
      system(paste0("rm ", tmpsession, "_done"))
 | 
						|
      
 | 
						|
      fire_ready(bert_status_file)
 | 
						|
      bert_nclicks(0)
 | 
						|
      
 | 
						|
      if (is.null(bert_result_val())) return(NULL)
 | 
						|
      
 | 
						|
      bertpreds <- try(read.csv(paste0(tmpsession, "_predval.csv")))
 | 
						|
      if (inherits(bertpreds, "try-error")) {
 | 
						|
        # values$bertMsg <- "BERT error - try again"
 | 
						|
        values$bertMsg <- try(paste(readLines(paste0(tmpsession, "_log")), collapse = "\n"))
 | 
						|
        values$bertValstats <- NULL
 | 
						|
        values$bertValPlot <-  NULL
 | 
						|
        values$bertLastUnsaved <- NULL
 | 
						|
        values$bertMsgHyperpar <- NULL
 | 
						|
        values$bertLastHyperpar <- NULL
 | 
						|
        return(NULL)
 | 
						|
      }
 | 
						|
      
 | 
						|
      datruth <- tmpdat[tmpvalid, "label"]
 | 
						|
      dapred <- bertpreds$bertpred
 | 
						|
      dametrics <- metricsTable(datruth, dapred)
 | 
						|
      damsg <- paste0(
 | 
						|
        "Unsaved model (", tmpmodel, ")\n\n",
 | 
						|
        "BERT validation stats:\n",
 | 
						|
        "* Accuracy    : ", round(100 * mean(datruth == dapred), 2), "%\n",
 | 
						|
        "* Macro F1    : ", sprintf("%f", mean(as.numeric(dametrics$F1))), "\n",
 | 
						|
        "* Weighted F1 : ", sprintf("%f", weighted.mean(as.numeric(dametrics$F1),
 | 
						|
                                                        dametrics$N.cases)), "\n")
 | 
						|
      hypermsg <- bertHyperToMsg(bert_hyperpars())
 | 
						|
      writeLines(RJSONIO::toJSON(bert_hyperpars()), 
 | 
						|
                 paste0(tmpsession, "/tiggertrain.json"))
 | 
						|
      
 | 
						|
      values$bertValstats <- dametrics
 | 
						|
      values$bertMsg <- damsg
 | 
						|
      values$bertValPlot <-  bertPlotFromPath(tmpsession)
 | 
						|
      values$bertLastUnsaved <- list(
 | 
						|
        metrics = dametrics, msg = damsg, session = tmpsession)
 | 
						|
      values$bertMsgHyperpar <- hypermsg
 | 
						|
      values$bertLastHyperpar <- bert_hyperpars()
 | 
						|
    })
 | 
						|
    
 | 
						|
    values$bertMsg <- "BERT is training"
 | 
						|
    values$bertValstats <- NULL
 | 
						|
    values$bertValPlot <- NULL
 | 
						|
    values$bertLastUnsaved <- NULL
 | 
						|
    values$bertMsgHyperpar <- NULL
 | 
						|
    values$bertLastHyperpar <- NULL
 | 
						|
    
 | 
						|
    ## Return something other than the promise so shiny remains responsive
 | 
						|
    NULL
 | 
						|
  })
 | 
						|
  
 | 
						|
  ## Automatic check of BERT training status
 | 
						|
  observe({
 | 
						|
    if (bert_nclicks() == 0) return(NULL)
 | 
						|
    isolate({
 | 
						|
      tmpsession <- paste0("tiggerbert_", ok.labelcol(), "_", session$token)
 | 
						|
      ## Check for checkpoint dirs
 | 
						|
      checkdirs <-
 | 
						|
        length(dir(paste0(tmpsession, "_train"), pattern = "^checkpoint"))
 | 
						|
      # fire_running(bert_status_file,
 | 
						|
      #              min(100, round(100 * checkdirs / input$bertNeval, 1)))
 | 
						|
      # dastate <- try(RJSONIO::fromJSON(paste0(tmpsession, "_train/trainer_state.json")))
 | 
						|
      # if (!inherits(dastate, "try-error"))
 | 
						|
      #   fire_running(bert_status_file,
 | 
						|
      #                round(100 * as.numeric(dastate$epoch) / input$bertEpochs, 1))
 | 
						|
      try({
 | 
						|
        dapoints <- dir(paste0(tmpsession, "_train"), 
 | 
						|
                        pattern = "trainer_state[.]json$", recursive = T)
 | 
						|
        dapoints_num <- as.numeric(gsub("^checkpoint-(\\d+)/.*$", "\\1", dapoints))
 | 
						|
        statefile <- paste0(
 | 
						|
          tmpsession, "_train/", dapoints[which.max(dapoints_num)])
 | 
						|
        if (file.exists(statefile)) {
 | 
						|
          state <- RJSONIO::fromJSON(statefile)
 | 
						|
            fire_running(bert_status_file,
 | 
						|
                         round(100 * as.numeric(state$epoch) / input$bertEpochs, 1))
 | 
						|
        }
 | 
						|
      })
 | 
						|
    })
 | 
						|
    if (checkdirs < isolate(input$bertNeval) & file.exists(paste0(tmpsession, "_done"))) {
 | 
						|
      values$bertMsg <- "BERT Error - try again"
 | 
						|
      # values$bertMsg <- paste(readLines(paste0(tmpsession, "_log")), collapse = "\n")
 | 
						|
    } else {
 | 
						|
      invalidateLater(1e3)
 | 
						|
      loss_progress <- ""
 | 
						|
      if (checkdirs > 0) {
 | 
						|
        ## Get training diagnostics
 | 
						|
        try({
 | 
						|
          dapoints <- dir(paste0(tmpsession, "_train"), 
 | 
						|
                          pattern = "trainer_state[.]json$", recursive = T)
 | 
						|
          dapoints_num <- as.numeric(gsub("^checkpoint-(\\d+)/.*$", "\\1", dapoints))
 | 
						|
          statefile <- paste0(
 | 
						|
            tmpsession, "_train/", dapoints[which.max(dapoints_num)])
 | 
						|
          if (file.exists(statefile)) {
 | 
						|
            state <- RJSONIO::fromJSON(statefile)
 | 
						|
            logs <- data.table::rbindlist(lapply(state$log_history, as.list), 
 | 
						|
                                          fill = TRUE)
 | 
						|
            logs <- logs[, .(loss = max(loss, na.rm = T), 
 | 
						|
                             eval_loss = max(eval_loss, na.rm = T)), by = epoch]
 | 
						|
            loss_progress <- paste0(
 | 
						|
              "\n* Epoch : ", sprintf("%.02f", logs$epoch), 
 | 
						|
              "\ttrain loss : ", sprintf("%.05f",logs$loss), 
 | 
						|
              "\teval loss : ", sprintf("%.05f", logs$eval_loss), collapse = "")
 | 
						|
          }
 | 
						|
        })
 | 
						|
      }
 | 
						|
      values$bertMsg <- 
 | 
						|
        ifelse(checkdirs == 0, "Pre-processing...", 
 | 
						|
               paste0(get_status(bert_status_file), loss_progress))
 | 
						|
    }
 | 
						|
    values$bertValstats <- NULL
 | 
						|
    values$bertValPlot <- NULL
 | 
						|
    values$bertLoadSaved <- NULL
 | 
						|
    values$bertMsgHyperpar <- NULL
 | 
						|
  })
 | 
						|
  
 | 
						|
  
 | 
						|
  ## Action: show last BERT model
 | 
						|
  observeEvent(input$bertLast, {
 | 
						|
    values$bertMsg <- values$bertLastUnsaved$msg
 | 
						|
    values$bertValstats <- values$bertLastUnsaved$metrics
 | 
						|
    values$bertValPlot <- bertPlotFromPath(values$bertLastUnsaved$session)
 | 
						|
    values$bertMsgHyperpar <- bertHyperToMsg(values$bertLastHyperpar)
 | 
						|
    values$bertLoadSaved <- NULL
 | 
						|
  })
 | 
						|
  
 | 
						|
  
 | 
						|
  ## Action: save BERT model
 | 
						|
  observeEvent(input$bertSave, {
 | 
						|
    tmpsession <- paste0("tiggerbert_", ok.labelcol(), "_", session$token)
 | 
						|
    if (!file.exists(paste0(tmpsession)))
 | 
						|
      return(NULL)
 | 
						|
    
 | 
						|
    if (!grepl("\\S", input$bertSaveName)) {
 | 
						|
      tmpname <- paste0(
 | 
						|
        ok.labelcol(), "_", gsub("\\D+$", "", gsub("\\D", "-", Sys.time())))
 | 
						|
    } else 
 | 
						|
      tmpname <- cleanFileName(input$bertSaveName)
 | 
						|
    
 | 
						|
    savedir <- paste0("tiggerbert_saved_", tmpname)
 | 
						|
    system(paste("cp -R", tmpsession, savedir))
 | 
						|
    system(paste0("cp ", tmpsession, "_dat_valid.csv ", 
 | 
						|
                  savedir, "/tiggerbert_dat_valid.csv"))
 | 
						|
    system(paste0("cp ", tmpsession, "_predval.csv ", 
 | 
						|
                  savedir, "/tiggerbert_pred_valid.csv"))
 | 
						|
    
 | 
						|
    system(paste0("rm -R ", tmpsession, "*"))
 | 
						|
    values$bertMsg <- paste0("BERT model successfully saved: ", tmpname)
 | 
						|
    values$bertValstats <- NULL
 | 
						|
    values$bertValPlot <- NULL
 | 
						|
    values$bertMsgHyperpar <- NULL
 | 
						|
    
 | 
						|
    if (is.null(values$bertSaveTrigger)) {
 | 
						|
      values$bertSaveTrigger <- 0
 | 
						|
    } else
 | 
						|
      values$bertSaveTrigger <- values$bertSaveTrigger + 1
 | 
						|
    values$bertLastUnsaved <- NULL
 | 
						|
    updateTextInput(session, "bertSaveName", value = "")
 | 
						|
  })
 | 
						|
  
 | 
						|
  
 | 
						|
  ## Actions: load saved model (list updated only with new models)
 | 
						|
  observeEvent(values$bertSaved, {
 | 
						|
    if (!length(values$bertSaved)) return(NULL)
 | 
						|
    tmpsaved <- values$bertSaved[! values$bertSaved %in% values$bertSavedAll]
 | 
						|
    
 | 
						|
    lapply(tmpsaved, function(dabert) {
 | 
						|
      ############
 | 
						|
      ## Actions to load saved model
 | 
						|
      observeEvent(input[[paste0("saved_", dabert)]], {
 | 
						|
        bertconf <- RJSONIO::fromJSON(paste0(
 | 
						|
          "tiggerbert_saved_", dabert, "/config.json"))
 | 
						|
        truth <- read.csv(paste0(
 | 
						|
          "tiggerbert_saved_", dabert, "/tiggerbert_dat_valid.csv"))$label
 | 
						|
        pred <- read.csv(paste0(
 | 
						|
          "tiggerbert_saved_", dabert, "/tiggerbert_pred_valid.csv"))$bertpred
 | 
						|
        
 | 
						|
        dametrics <- metricsTable(truth, pred)
 | 
						|
        values$bertValstats <- dametrics
 | 
						|
        values$bertMsg <- paste0(
 | 
						|
          "Saved model ", dabert, " (", bertconf[["_name_or_path"]], ")\n\n",
 | 
						|
          "BERT validation stats:\n", 
 | 
						|
          "* Accuracy    : ", round(100 * mean(truth == pred), 2), "%\n", 
 | 
						|
          "* Macro F1    : ", sprintf("%f", mean(as.numeric(dametrics$F1))), "\n", 
 | 
						|
          "* Weighted F1 : ", sprintf("%f", weighted.mean(as.numeric(dametrics$F1), 
 | 
						|
                                                          dametrics$N.cases)), "\n")
 | 
						|
        
 | 
						|
        ## Hyperparameters
 | 
						|
        tiggertrain_file <- paste0("tiggerbert_saved_", dabert, "/tiggertrain.json")
 | 
						|
        if (file.exists(tiggertrain_file)) {
 | 
						|
          try({
 | 
						|
            values$bertMsgHyperpar <- bertHyperToMsg(RJSONIO::fromJSON(tiggertrain_file))
 | 
						|
          })
 | 
						|
        } else 
 | 
						|
          values$bertMsgHyperpar <- "Training parameters not saved."
 | 
						|
        
 | 
						|
        values$bertValPlot <- 
 | 
						|
          bertPlotFromPath(paste0("tiggerbert_saved_", dabert))
 | 
						|
        values$bertLoadSaved <- dabert
 | 
						|
      })
 | 
						|
      
 | 
						|
      ############
 | 
						|
      ## Actions to copy hyperparameters
 | 
						|
      observeEvent(input[[paste0("bert_copypars_", dabert)]], {
 | 
						|
        tiggertrain_file <- paste0("tiggerbert_saved_", dabert, "/tiggertrain.json")
 | 
						|
        if (!file.exists(tiggertrain_file)) return(NULL)
 | 
						|
        try({
 | 
						|
          tmppars <- RJSONIO::fromJSON(tiggertrain_file)
 | 
						|
          updateSelectInput(session, "bertModel", selected = tmppars$bertModel)
 | 
						|
          updateCheckboxInput(session, "bertBest", value = tmppars$bertBest)
 | 
						|
          numinputs <- c("bertEpochs", "bertLrate", "bertWdecay", 
 | 
						|
                         "bertBatchsize", "bertGradacc")
 | 
						|
          for (iinput in numinputs)
 | 
						|
            updateNumericInput(session, iinput, value = tmppars[[iinput]])
 | 
						|
        })
 | 
						|
      })
 | 
						|
      
 | 
						|
      ############
 | 
						|
      ## Actions to delete saved model (with confirm)
 | 
						|
      observeEvent(input[[paste0("bert_delete_", dabert)]], {
 | 
						|
        showModal(modalDialog(
 | 
						|
          title = "Delete BERT model", 
 | 
						|
          paste0("Confirm delete: ", dabert, 
 | 
						|
                 " (model and predictions will be deleted)."),
 | 
						|
          footer = tagList(actionButton(paste0("bert_delete_confirm_", dabert), "Delete"),
 | 
						|
                           modalButton("Cancel"))))
 | 
						|
      })
 | 
						|
      
 | 
						|
      observeEvent(input[[paste0("bert_delete_confirm_", dabert)]], {
 | 
						|
        system(paste0("rm -R tiggerbert_saved_", dabert))
 | 
						|
        system(paste0(
 | 
						|
          "rm ", values$conf$datadir, values$conf$dataname, "_bertpred_",
 | 
						|
          dabert, ".feather"))
 | 
						|
        # values$bertLoadSaved <- NULL
 | 
						|
        if (is.null(values$bertSaveTrigger)) {
 | 
						|
          values$bertSaveTrigger <- 0
 | 
						|
        } else
 | 
						|
          values$bertSaveTrigger <- values$bertSaveTrigger + 1
 | 
						|
        removeModal()
 | 
						|
        values$bertMsg <- NULL
 | 
						|
        values$bertValstats <- NULL
 | 
						|
        values$bertMsgHyperpar <- NULL
 | 
						|
      })
 | 
						|
      
 | 
						|
      ############
 | 
						|
      ## Actions to infer on current data from saved BERT (async)
 | 
						|
      observeEvent(input[[paste0("bert_infer_", dabert)]], {
 | 
						|
        if (bertpred_nclicks() > 0 | bpall_nclicks() > 0) {
 | 
						|
          values$bertMsg <- "Inference already running"
 | 
						|
          values$bertMsgHyperpar <- NULL
 | 
						|
          values$bertValPlot <- NULL
 | 
						|
          values$bertValstats <- NULL
 | 
						|
          return(NULL)
 | 
						|
        }
 | 
						|
        bertpred_nclicks(1)
 | 
						|
        
 | 
						|
        saved_model <- paste0("tiggerbert_saved_", dabert)
 | 
						|
        out_path <- paste0(
 | 
						|
          values$conf$datadir, values$conf$dataname, "_bertpred_",
 | 
						|
          dabert, ".feather")
 | 
						|
        
 | 
						|
        if (file.exists(out_path)) {
 | 
						|
          old_infer <- arrow::read_feather(out_path)
 | 
						|
        } else 
 | 
						|
          old_infer <- NULL
 | 
						|
        
 | 
						|
        to_infer <- ok.data()[[values$conf$idcol]]
 | 
						|
        to_infer <- to_infer[! to_infer %in% old_infer[[values$conf$idcol]]]
 | 
						|
        to_infer_row <- match(to_infer, ok.data()[[values$conf$idcol]])
 | 
						|
        
 | 
						|
        if (!length(to_infer)) {
 | 
						|
          values$bertMsg <- paste0(
 | 
						|
            "All current ", nrow(ok.data()), 
 | 
						|
            " texts already infered by ", dabert)
 | 
						|
          values$bertMsgHyperpar <- NULL
 | 
						|
          values$bertValPlot <- NULL
 | 
						|
          values$bertValstats <- NULL
 | 
						|
          bertpred_nclicks(0)
 | 
						|
          return(NULL)
 | 
						|
        }
 | 
						|
        
 | 
						|
        values$bertValPlot <- NULL
 | 
						|
        values$bertValstats <- NULL
 | 
						|
        values$bertMsgHyperpar <- NULL
 | 
						|
        values$bertMsg <- paste0(
 | 
						|
          "Preparing inference on ", length(to_infer), " new texts")
 | 
						|
        tmp_dat <- paste0("tiggerbert_infer_", session$token, ".feather")
 | 
						|
        arrow::write_feather(
 | 
						|
          data.frame(
 | 
						|
            ok.data()[to_infer_row, values$conf$idcol, drop = FALSE], 
 | 
						|
            text = ok.nicetext()[to_infer_row]), 
 | 
						|
          tmp_dat)
 | 
						|
        
 | 
						|
        values$bertMsg <- paste0(
 | 
						|
          "Infering ", dabert, " predictions on ", 
 | 
						|
          length(to_infer), " new texts")
 | 
						|
        infer_cmd <- paste0(
 | 
						|
          values$conf$python, " gobert_infer.py -m ", saved_model,
 | 
						|
          " -o ", out_path, 
 | 
						|
          " -d ", tmp_dat,
 | 
						|
          " -i ", values$conf$idcol, 
 | 
						|
          " -l ", paste0("tiggerbert_infer_", session$token, ".log"), 
 | 
						|
          " -G ", as.numeric(values$conf$use_gpu), 
 | 
						|
          " -b ", 128)
 | 
						|
        bertpred_res <- reactiveVal()
 | 
						|
        bertpred_async <- future({
 | 
						|
          system(infer_cmd)
 | 
						|
          system(paste0("rm ", tmp_dat))
 | 
						|
          
 | 
						|
          if (!is.null(old_infer)) {
 | 
						|
            new_infer <- arrow::read_feather(out_path)
 | 
						|
            arrow::write_feather(rbind(old_infer, new_infer), out_path)
 | 
						|
          }
 | 
						|
        }) %...>% bertpred_res
 | 
						|
        bertpred_async <- catch(bertpred_async, function(error) {
 | 
						|
          bertpred_nclicks(0)
 | 
						|
          values$bertMsg <- paste0("BERT inference error: ", error)
 | 
						|
          system(paste0("rm ", tmp_dat))
 | 
						|
        })
 | 
						|
        bertpred_async <- finally(bertpred_async, function() {
 | 
						|
          bertpred_nclicks(0)
 | 
						|
          values$bertMsg <- 
 | 
						|
            "Inference complete!\nRe-import data in Project/Sample panel to use the predictions for tagging."
 | 
						|
          values$bertValPlot <- NULL
 | 
						|
          values$bertValstats <- NULL
 | 
						|
          values$bertMsgHyperpar <- NULL
 | 
						|
        })
 | 
						|
        NULL
 | 
						|
      })
 | 
						|
    })
 | 
						|
    values$bertSavedAll <- c(values$bertSavedAll, tmpsaved)
 | 
						|
  })
 | 
						|
  
 | 
						|
  ## Automatic check of BERT inference progress
 | 
						|
  observe({
 | 
						|
    if (bertpred_nclicks() == 0) return(NULL)
 | 
						|
    invalidateLater(1e3)
 | 
						|
    logfile <- paste0("tiggerbert_infer_", isolate(session$token), ".log")
 | 
						|
    if (file.exists(logfile)) {
 | 
						|
      values$bertMsg <- readLines(logfile)[1]
 | 
						|
      values$bertValPlot <- NULL
 | 
						|
      values$bertValstats <- NULL
 | 
						|
      values$bertMsgHyperpar <- NULL
 | 
						|
    }
 | 
						|
  })
 | 
						|
  
 | 
						|
  
 | 
						|
  
 | 
						|
 | 
						|
  ##############################################################################
 | 
						|
  ## Export panel
 | 
						|
  ##############################################################################
 | 
						|
  
 | 
						|
  ## Export tags and full model predictions
 | 
						|
  output$dlTagSave <- 
 | 
						|
    downloadHandler(
 | 
						|
      function() 
 | 
						|
        paste0(Sys.Date(), "-activeTigger-", ok.labelcol(),".", input$dlTagFormat), 
 | 
						|
      function(con) {
 | 
						|
        if (is.null(ok.data())) return(NULL)
 | 
						|
        dadat <- ok.data()[, values$conf$idcol, drop = FALSE]
 | 
						|
        if ("tags" %in% input$dlTagSelect)
 | 
						|
          dadat[[ok.labelcol()]] <- values$label
 | 
						|
        if ("comments" %in% input$dlTagSelect) if (ok.commentCol() != "(None)")
 | 
						|
          dadat[, ok.commentCol()] <- values$comment
 | 
						|
        if ("predictions" %in% input$dlTagSelect) if (ok.train.possible()) {
 | 
						|
          dadat$tigger.predict <- ok.predlab1()
 | 
						|
          dadat$tigger.predprob <- apply(ok.pred(), 1, max)
 | 
						|
          tmp_pred <- ok.pred()
 | 
						|
          colnames(tmp_pred) <- paste0("tiggerpred_", colnames(tmp_pred))
 | 
						|
          dadat <- cbind(dadat, tmp_pred)
 | 
						|
        }
 | 
						|
        othercols <- input$dlTagSelect
 | 
						|
        othercols <- othercols[! othercols %in% c("tags", "comments", "predictions")]
 | 
						|
        if (length(othercols))
 | 
						|
          dadat <- cbind(dadat, ok.data()[, othercols, drop = FALSE])
 | 
						|
        
 | 
						|
        if (input$dlTagFormat == "csv") {
 | 
						|
          write.csv(dadat, row.names = F, con)
 | 
						|
        } else 
 | 
						|
          arrow::write_feather(dadat, con)
 | 
						|
      })
 | 
						|
  
 | 
						|
  
 | 
						|
  ## Export embeddings (ft, sb, umap, t-sne)
 | 
						|
  output$dlEmbedSave <- downloadHandler(
 | 
						|
    function() 
 | 
						|
      paste0(Sys.Date(), "-activeTigger-emb-", 
 | 
						|
             paste(input$dlEmbedSelect, collapse = "-"),
 | 
						|
             ".", input$dlEmbedFormat), 
 | 
						|
    function (file) {
 | 
						|
      if (is.null(input$dlEmbedSelect)) return(NULL)
 | 
						|
      export <- ok.data()[, values$conf$idcol, drop = FALSE]
 | 
						|
      if ("ft" %in% input$dlEmbedSelect) 
 | 
						|
        export <- data.frame(export, ok.data()[, values$cols_ft])
 | 
						|
      if ("sb" %in% input$dlEmbedSelect) 
 | 
						|
        export <- data.frame(export, ok.data()[, values$cols_sb])
 | 
						|
      if (any(c("t-sne", "umap") %in% input$dlEmbedSelect)) {
 | 
						|
        tmpvisu <- ok.visu()
 | 
						|
        tmpalgo <- input$dlEmbedSelect[input$dlEmbedSelect %in% c("t-sne", "umap")]
 | 
						|
        colnames(tmpvisu) <- paste0(gsub("-", "", tmpalgo), 1:2)
 | 
						|
        export <- data.frame(export, tmpvisu)
 | 
						|
      }
 | 
						|
      if (input$dlEmbedFormat == "csv") {
 | 
						|
        write.csv(export, file, row.names = FALSE)
 | 
						|
      } else if (input$dlEmbedFormat == "feather") {
 | 
						|
        arrow::write_feather(export, file)
 | 
						|
      }
 | 
						|
    })
 | 
						|
  
 | 
						|
  ## Export BERT predictions
 | 
						|
  
 | 
						|
  output$dlBPMsg <- reactive(values$dlBPMsg)
 | 
						|
  
 | 
						|
 | 
						|
  
 | 
						|
  # output$dlBertPredCsv <- 
 | 
						|
  #   downloadHandler(paste0(Sys.Date(), "-bertpred-", ok.labelcol(),".csv"), 
 | 
						|
  #                   content= function(con) {
 | 
						|
  #                     if (is.null(ok.data())) return(NULL)
 | 
						|
  #                     dadat <- ok.data()[, c(values$conf$idcol, ok.labelcol())]
 | 
						|
  #                     dadat[[ok.labelcol()]] <- values$label
 | 
						|
  #                     if (ok.commentCol() != "(None)") {
 | 
						|
  #                       dadat[, ok.commentCol()] <- values$comment
 | 
						|
  #                     }
 | 
						|
  #                     if (ok.train.possible()) {
 | 
						|
  #                       dadat$tigger.predict <- ok.predlab1()
 | 
						|
  #                       dadat$tigger.predprob <- apply(ok.pred(), 1, max)
 | 
						|
  #                     }
 | 
						|
  #                     dadat <- cbind(dadat, ok.pred())
 | 
						|
  #                     write.csv(dadat, row.names = F, con)
 | 
						|
  #                   })
 | 
						|
 | 
						|
  ############
 | 
						|
  ## Action to infer on ALL data from saved BERT (async)
 | 
						|
  bpall_res <- reactiveVal()
 | 
						|
  observeEvent(input$dlBPInfer, {
 | 
						|
    if (bpall_nclicks() > 0 | bertpred_nclicks() > 0) {
 | 
						|
      values$dlBPMsg <- "Inference already running"
 | 
						|
      bpall_res(NULL)
 | 
						|
      return(NULL)
 | 
						|
    }
 | 
						|
    bpall_nclicks(1)
 | 
						|
    bpall_res_async <- reactiveVal()
 | 
						|
    dabert <- input$dlBPSelect
 | 
						|
 | 
						|
    saved_model <- paste0("tiggerbert_saved_", dabert)
 | 
						|
    out_path <- paste0(
 | 
						|
      values$conf$datadir, values$conf$dataname, "_bertpredall_",
 | 
						|
      dabert, ".feather")
 | 
						|
    tmp_dat <- paste0("tiggerbert_inferall_", session$token, ".feather")
 | 
						|
    tmp_log <- paste0("tiggerbert_inferall_", session$token, ".log")
 | 
						|
    tmp_done <- paste0("tiggerbert_inferall_", session$token, "_done")
 | 
						|
    if (file.exists(tmp_done))
 | 
						|
      system(paste0("rm ", tmp_done))
 | 
						|
    
 | 
						|
    if (file.exists(out_path)) {
 | 
						|
      values$dlBPMsg <- "Inference already complete, ready for download"
 | 
						|
      bpall_nclicks(0)
 | 
						|
      bpall_res(out_path)
 | 
						|
      return(NULL)
 | 
						|
      # old_infer <- arrow::read_feather(out_path)
 | 
						|
    } 
 | 
						|
    
 | 
						|
    fulldat <- arrow::read_feather(paste0(
 | 
						|
      values$conf$datadir, values$conf$dataname, ".feather"))
 | 
						|
    values$dlBPMsg <- paste0(
 | 
						|
      "Preparing inference on ", nrow(fulldat), " texts")
 | 
						|
    if (length(values$conf$textcols) > 1) {
 | 
						|
      fulldat$text <- do.call(paste, c(
 | 
						|
        fulldat[, values$conf$textcols, drop = FALSE], sep = "\n\n"))
 | 
						|
    } else {
 | 
						|
      fulldat$text <- fulldat[, values$conf$textcols]
 | 
						|
    }
 | 
						|
    arrow::write_feather(fulldat[, c(values$conf$idcol, "text")], tmp_dat)
 | 
						|
    
 | 
						|
    infer_cmd <- paste0(
 | 
						|
      values$conf$python, " gobert_infer.py -m ", saved_model,
 | 
						|
      " -o ", out_path,
 | 
						|
      " -d ", tmp_dat,
 | 
						|
      " -i ", values$conf$idcol, 
 | 
						|
      " -l ", tmp_log, 
 | 
						|
      " -G ", as.numeric(values$conf$use_gpu),
 | 
						|
      " -b ", 128,
 | 
						|
      "; touch ", tmp_done)
 | 
						|
 | 
						|
    bpall_async <- future({
 | 
						|
      system(infer_cmd)
 | 
						|
      system(paste0("rm ", tmp_dat))
 | 
						|
    }) %...>% bpall_res_async
 | 
						|
    
 | 
						|
    bpall_async <- catch(bpall_async, function(error) {
 | 
						|
      values$dlBPMsg <- paste0("BERT inference error: ", error)
 | 
						|
      system(paste0("rm ", tmp_dat))
 | 
						|
      system(paste0("rm ", tmp_done))
 | 
						|
    })
 | 
						|
    
 | 
						|
    bpall_async <- finally(bpall_async, function() {
 | 
						|
      bpall_nclicks(0)
 | 
						|
      values$dlBPMsg <-
 | 
						|
        "Inference complete, ready for download"
 | 
						|
      bpall_res(out_path)
 | 
						|
    })
 | 
						|
    NULL
 | 
						|
  })
 | 
						|
  
 | 
						|
  ## Automatic check of BERT all prediction progress
 | 
						|
  observe({
 | 
						|
    if (bpall_nclicks() == 0) return(NULL)
 | 
						|
    invalidateLater(1e3)
 | 
						|
    logfile <- paste0("tiggerbert_inferall_", isolate(session$token), ".log")
 | 
						|
    if (file.exists(logfile))
 | 
						|
      values$dlBPMsg <- readLines(logfile)[1]
 | 
						|
  })
 | 
						|
  
 | 
						|
  ## Show BERTpred download button when data is ready
 | 
						|
  output$dlBPDlButton <- renderUI({
 | 
						|
    if (is.null(bpall_res())) return(NULL)
 | 
						|
    downloadButton("dlBPDl", NULL)
 | 
						|
  })
 | 
						|
  
 | 
						|
  output$dlBPDl <- downloadHandler(
 | 
						|
    function()
 | 
						|
      paste0(Sys.Date(), "-tiggerbert-", values$conf$dataname, 
 | 
						|
             input$dlBPSelect, ".", input$dlBPFormat), 
 | 
						|
    function(file) {
 | 
						|
      if (is.null(bpall_res())) return(NULL)
 | 
						|
      tmp_bert <- gsub("^.*_bertpredall_(.*)[.]feather$", "\\1", bpall_res())
 | 
						|
      if (tmp_bert != input$dlBPSelect) {
 | 
						|
        values$dlBPMsg <- "BERT model changed - predict again or select current model"
 | 
						|
        return(NULL)
 | 
						|
      }
 | 
						|
      tmp <- arrow::read_feather(bpall_res())
 | 
						|
      if (input$dlBPFormat == "csv") {
 | 
						|
        write.csv(tmp, file, row.names = FALSE)
 | 
						|
      } else if (input$dlBPFormat == "feather") {
 | 
						|
        arrow::write_feather(tmp, file)
 | 
						|
      }
 | 
						|
    }
 | 
						|
  )
 | 
						|
 | 
						|
  
 | 
						|
  ##############################################################################
 | 
						|
  ## Delete files at end of session
 | 
						|
  onStop(function() {
 | 
						|
    if(file.exists(bert_status_file)) unlink(bert_status_file)
 | 
						|
    if(length(dir(pattern = paste(session$token))))
 | 
						|
      system(paste0("rm -R *", session$token, "*"))
 | 
						|
  })
 | 
						|
  
 | 
						|
})
 |