datalab/docker-images-datalab/activetigger/activetigger/server.R


pkgs <- c("arrow", "class", "DT", "future", "ggplot2", "glmnet", "htmlTable",
          "LiblineaR", "Matrix","Metrics", "promises", "quanteda",
          "quanteda.textmodels", "ranger", "rlang", "Rtsne",
          "RJSONIO", "shiny", "SparseM", "stringi", "uwot")
pkgs <- pkgs[! pkgs %in% rownames(installed.packages())]
if (length(pkgs)) {
  warning(paste0("Packages ", paste0(pkgs, collapse = " , "), " not available. ",
                 "App might crash."))
}
# for (i in seq_along(pkgs)) install.packages(pkgs[i])

verbose <- TRUE
# sink("ActiveTigger.log")

options(shiny.maxRequestSize = 30*1024^3) # File upload size

modelnames <- read.csv("modelnames.csv")
modelnames_labels <- modelnames$short
names(modelnames_labels) <- modelnames$short_lang

library(quanteda)
library(ggplot2)
library(promises)
library(future)
plan(multisession)


################################################################################
## Global functions
################################################################################

label2hash <- function(label) rlang::hash(label)

cleanFileName <- function(name)
  gsub('\\s|[/]|[\\]|[:]|[*]|[?]|["]|[<]|[>]|[+]|[$]|[=]', "_",
       gsub("^\\s+|\\s+$", "", name))

wtdF1 <- function(truth, pred) {
  pred <- pred[!is.na(truth)]
  truth <- truth[!is.na(truth)]
  sum(sapply(unique(truth), function(ilab)
    Metrics::fbeta_score(truth == ilab, pred == ilab) *
      sum(truth == ilab) / length(truth)), na.rm = TRUE)
}

metricsTable <- function(truth, pred) {
  do.call(rbind, lapply(sort(unique(truth)), function(ilab) {
    tmptrue <- truth == ilab
    tmppred <- pred == ilab
    tmp <- data.frame(
      "Tag" = ilab,
      "N.cases" = sum(tmptrue),
      "N.predict" = sum(tmppred),
      "Accuracy" = sprintf("%f", round(Metrics::accuracy(tmptrue, tmppred), 6)),
      "Precision" = sprintf("%f", round(Metrics::precision(tmptrue, tmppred), 6)),
      "Recall" = sprintf("%f", round(Metrics::recall(tmptrue, tmppred), 6)),
      "F1" = sprintf("%f", round(Metrics::fbeta_score(tmptrue, tmppred), 6)))
    tmp$F1[tmp$F1 == "NaN"] <- sprintf("%f", 0)
    tmp
  }))
}

bertPlotFromPath <- function(path) {
  if (!file.exists(paste0(path, "/trainer_state.json"))) return(NULL)
  state <- RJSONIO::fromJSON(paste0(path, "/trainer_state.json"))
  logs <- data.table::rbindlist(lapply(state$log_history, as.list), fill = TRUE)
  epochmin <- logs$epoch[which.min(logs$eval_loss)]

  gdat <- data.table::rbindlist(
    use.names = FALSE,
    list(
      cbind("train", logs[!is.na(logs$loss), c("epoch", "loss")]),
      cbind("valid", logs[!is.na(logs$eval_loss), c("epoch", "eval_loss")])
    ))
  colnames(gdat) <- c("data", "epoch", "loss")

  ggplot(gdat, aes_string("epoch", "loss", color = "data", shape = "data")) +
    geom_point(size = 2) + geom_line() +
    geom_vline(xintercept = epochmin, linetype = 2) +
    theme_bw() + expand_limits(y=0)
}

bertHyperToMsg <- function(pars) {
  if (is.null(pars)) return(NULL)
  paste0("Scheme : ", pars$bertScheme, "; ",
         "Model : ", pars$bertModel, "\n* ",
         "Epochs : ", pars$bertEpochs, " ; ",
         "Lrate : ", pars$bertLrate, " ; ",
         "Wdecay : ", pars$bertWdecay, " ; ",
         "Batch size : ", pars$bertBatchsize, " ; ",
         "Grad. accum. : ", pars$bertGradacc, "\n* ",
         "Keep best : ", pars$bertBest, " ; ",
         "ValidFrac : ", pars$bertValidFrac, " ; ",
         "ValidSeed : ", pars$bertValidSeed, " \n* ",
         pars$bertNtrain, " training samples : ",
         paste0(names(pars$bertTrainTable), " (",
                as.numeric(pars$bertTrainTable), ")",
                collapse = " ; "))
}

get_status <- function(dafile)
  scan(dafile, what = "character", sep="\n", quiet = TRUE)
set_status <- function(dafile, msg) write(msg, dafile)


################################################################################
## Main server function
################################################################################

# options(shiny.maxRequestSize=2^34) # Max filesize

shinyServer(function(input, output, session) {

  values <- reactiveValues()
  values$visuGo <- FALSE
  values$dfmGo <- FALSE
  # queryTrigger <- reactiveVal(0)
  # predlabTrigger <- reactiveVal(0)
  initDataTrigger <- reactiveVal(0)
  queryNext <- reactiveVal(0)
  trainTrigger <- reactiveVal(0)
  predTrigger <- reactiveVal(0)
  diagTrigger <- reactiveVal(0)
  ready2tag <- reactiveVal(0)
  trainCountdown <- reactiveVal(0)

  ok.data <- reactiveVal()
  ok.data.async <- reactiveVal()
  ok.data.running <- reactiveVal(FALSE)

  bpall_nclicks <- reactiveVal(0)
  bertpred_nclicks <- reactiveVal(0)

  ## NEW use_regressors list as values$new_use_regressors
  observeEvent(input$use_regressors, {
    values$new_use_regressors <- input$use_regressors
    cat("DEBUG new_use_regressors: ", paste(input$use_regressors, collapse = " "), "\n")
  })

  ## NEW trainCountdown with input$trainCountdown
  ## Policy: 0, don't train ; 1, trigger train on tag ; >1, decrement on tag
  observeEvent(input$trainCountdown, trainCountdown(input$trainCountdown))

  ####################
  ## Projects and data

  ## Read config files, update UI forms accordingly
  ## This should happen only once
  observe({
    if (verbose)
      cat("DEBUG read project configs\n")
    # values$conf <- RJSONIO::fromJSON("conf.json")
    projfiles <- dir(pattern = "^tigger_.*[.]json$")
    isolate({
      if (!length(projfiles)) {
        values$conf <- NULL
        return(NULL)
      }
      values$confnames <- gsub("^tigger_(.*)[.]json$", "\\1", projfiles)

      values$conf <- RJSONIO::fromJSON(projfiles[1])
    })
  })

  ## When projects added, update selector
  observeEvent(values$confnames, {
    if (verbose)
      cat("DEBUG update selectProject from confnames\n")
    updateSelectInput(session, "selectProject", choices = values$confnames)
  })

  ## Event from project change
  observeEvent(input$selectProject, {
    if (is.null(input$selectProject)) return(NULL)
    if (!nchar(input$selectProject)) return(NULL)
    if (verbose)
      cat("DEBUG project change step 1\n")
    # values$project <- input$selectProject
    # values$conf <- values$confs[[values$project]]
    values$conf <- RJSONIO::fromJSON(
      paste0("tigger_", input$selectProject, ".json"))
    ok.data(NULL)
    ok.visu(NULL)
    initDataTrigger(initDataTrigger() + 1)

    if (verbose)
      cat("DEBUG project change step 2\n")
    if (!is.null(values$conf$dataNrows))
      updateNumericInput(session, "dataNrows", value = as.numeric(values$conf$dataNrows))
    if (!is.null(values$conf$dataSkipRows))
      updateNumericInput(session, "dataSkipRows", value = as.numeric(values$conf$dataSkipRows))

    #### Python ####
    ## Default use_python if not found in conf file: FALSE
    if(is.null(values$conf$use_python))
      values$conf$use_python <- FALSE
    updateCheckboxInput(session, "sys_use_python", value = values$conf$use_python)

    ## Set default python if not found in conf file
    if(is.null(values$conf$python))
      values$conf$python <- "python3"
    updateTextInput(session, "sys_which_python", value = values$conf$python)

    values$python_ok <- FALSE
    if (values$conf$use_python) {
      ## Check whether python is working
      pytest <- try(system(paste(values$conf$python, "--version"),
                           intern = TRUE))
      if (inherits(pytest, "try-error")) {
        showNotification(paste(
          "Python path `", values$conf$python,
          "` not valid, try changing it in Project/System tab"),
          duration = 10, type = "error")
      } else
        values$python_ok <- TRUE
    }
    if (verbose)
      cat("Python ok:", values$python_ok, "\n")


    #### GPU ####
    if (!is.null(values$conf$use_gpu))
      updateCheckboxInput(session, "sys_use_gpu", value = values$conf$use_gpu)

    #### Spacy ####
    if (!is.null(values$conf$use_spacy))
      updateCheckboxInput(session, "sys_use_spacy", value = values$conf$use_spacy)
    updateTextInput(session, "sys_use_spacy_model", value = values$conf$use_spacy_model)

    #### fastText ####
    if (!is.null(values$conf$use_ft))
      updateCheckboxInput(session, "sys_use_ft", value = values$conf$use_ft)
    updateTextInput(session, "sys_use_ft_model", value = values$conf$use_ft_model)

    #### SBERT ####
    if (!is.null(values$conf$use_sb))
      updateCheckboxInput(session, "sys_use_sb", value = values$conf$use_sb)
    updateTextInput(session, "sys_use_sb_model", value = values$conf$use_sb_model)

  })


  ##############################################################################
  ## Reactives : project settings UI
  ##############################################################################

  output$sys_datadir <- renderText(values$conf$datadir)
  output$sys_datafile <- renderText(paste0(values$conf$dataname, ".feather"))
  output$sys_var_id <- renderUI(renderText(values$conf$idcol))
  output$sys_var_text <- renderUI(renderText(values$conf$textcols))
  output$sys_var_tag <- renderUI(renderText(values$conf$tagcols))
  output$sys_var_context_ui <- renderUI(selectInput(
    "sys_var_context", NULL, values$data_orig_colnames, values$conf$contextcols,
    multiple = TRUE))
  output$sys_var_comm_ui <- renderUI(selectInput(
    "sys_var_comm", NULL, c("(none)", values$data_orig_colnames),
    values$conf$commcol))

  output$sys_ex_lang_ui <- renderUI(selectInput(
    "sys_ex_lang", NULL, modelnames_labels, "en"
  ))
  output$sys_ex_spacy <- renderText(
    modelnames$spacy_name[modelnames$short == input$sys_ex_lang])
  output$sys_ex_spacy_dl <- renderText(paste0(
    "Download with python module: ", values$conf$python, " -m spacy download ",
    modelnames$spacy_name[modelnames$short == input$sys_ex_lang]))
  output$sys_ex_ft <- renderText(
    modelnames$fasttext_name[modelnames$short == input$sys_ex_lang])
  output$sys_ex_ft_dl <- renderText(paste0(
    "Manual download link : ",
    modelnames$fasttext_url[modelnames$short == input$sys_ex_lang]))
  output$sys_ex_sb <- renderText(
    ifelse(input$sys_ex_lang %in% c("ar", "zh", "nl", "en", "fr", "de",
                                    "it", "ko", "pl", "pt", "ru", "es", "tr"),
           "distiluse-base-multilingual-cased-v1",
           "distiluse-base-multilingual-cased-v2"))


  ## Save system changes on button click, with modal
  output$save_config_msg <- renderUI({
    req(values$save_config_msg)
    p(strong(values$save_config_msg))
  })
  output$save_embed_msg <- renderUI({
    tmp <- ""
    if (input$sys_use_spacy) if (!identical(input$sys_use_spacy_model, values$conf$use_spacy_model))
      tmp <- paste0(tmp, "<li> Changing the spacy model will delete the current tokenized text and fasttext word embeddings, to prevent conflicts</li>")
    if (input$sys_use_ft) if (!identical(input$sys_use_ft_model, values$conf$use_ft_model))
      tmp <- paste0(tmp, "<li> Changing the fasttext model will delete the current fasttext word embeddings, to prevent conflicts.</li>")
    if (input$sys_use_sb) if (!identical(input$sys_use_sb_model, values$conf$use_sb_model))
      tmp <- paste0(tmp, "<li> Changing the SBERT model will delete the current SBERT sentence embeddings, to prevent conflicts.</li>")

    if (nchar(tmp)) {
      return(HTML(paste("<br><p> <strong>Warning:</strong> <ul>", tmp, "</ul></p>")))
    } else
      return(NULL)
  })
  observeEvent(input$saveSystem, {
    values$save_config_msg <- NULL
    showModal(modalDialog(
      title = "Save project configuration",
      paste0("Click 'Save' to check and confirm the new configuration"),
      uiOutput("save_embed_msg"),
      uiOutput("save_config_msg"),
      footer = tagList(actionButton(paste0("save_conf_confirm"), "Save"),
                       modalButton("Cancel"))))
  })
  observeEvent(input$save_conf_confirm, {
    newconf <- values$conf
    newconf$contextcols <- input$sys_var_context
    newconf$commcol <- input$sys_var_comm

    if (input$sys_use_python) {
      pytest <- try(system(paste(input$sys_which_python, "--version"),
                           intern = TRUE))
      if (inherits(pytest, "try-error")) {
        values$save_config_msg <- paste0(
          "Error: python path `", input$sys_which_python, "` not valid")
        return(NULL)
      }
    }
    newconf$use_python <- input$sys_use_python
    newconf$python <- input$sys_which_python
    newconf$use_gpu <- input$sys_use_gpu

    if (input$sys_use_spacy) {
      sptest <- system(paste0(
        newconf$python, " -m spacy info ", input$sys_use_spacy_model), intern = TRUE)
      if (length(sptest) == 0) {
        values$save_config_msg <- paste(
          "Error loading spacy, check that it is installed in the specified python env")
        return(NULL)
      }
      if (length(attr(sptest, "status"))) {
        values$save_config_msg <- paste(
          "Error loading spacy model, check that it has been downloaded")
        return(NULL)
      }
      newconf$use_spacy_model <- input$sys_use_spacy_model
    }
    newconf$use_spacy <- input$sys_use_spacy

    if (input$sys_use_ft) {
      if (!file.exists(input$sys_use_ft_model)) {
        values$save_config_msg <- paste(
          "Error loading fasttext model, check the specified path")
        return(NULL)
      }
      newconf$use_ft_model <- input$sys_use_ft_model
    }
    newconf$use_ft <- input$sys_use_ft

    if (input$sys_use_sb)
      newconf$use_sb_model <- input$sys_use_sb_model
    newconf$use_sb <- input$sys_use_sb

    # if (newconf$use_spacy) if (newconf$use_spacy_model != values$conf$use_spacy_model) {
    if (newconf$use_spacy) if (!identical(newconf$use_spacy_model, values$conf$use_spacy_model)) {
      file.remove(paste0(values$conf$datadir, values$conf$dataname, "_spa.feather"))
      file.remove(paste0(values$conf$datadir, values$conf$dataname, "_ft.feather"))
    }
    # if (newconf$use_ft) if (newconf$use_ft_model != values$conf$use_ft_model)
    if (newconf$use_ft) if (!identical(newconf$use_ft_model, values$conf$use_ft_model))
      file.remove(paste0(values$conf$datadir, values$conf$dataname, "_ft.feather"))
    # if (newconf$use_sb) if (newconf$use_sb_model != values$conf$use_sb_model)
    if (newconf$use_sb) if (!identical(newconf$use_sb_model, values$conf$use_sb_model))
      file.remove(paste0(values$conf$datadir, values$conf$dataname, "_sb.feather"))

    writeLines(RJSONIO::toJSON(newconf),
               paste0("tigger_", newconf$projectid, ".json"))
    values$conf <- newconf
    initDataTrigger(initDataTrigger() + 1)
    removeModal()
  })

  ##############################################################################
  ## Reactives : data
  ##############################################################################

  ok.commentCol <- reactive(values$conf$commcol)

  ## Current imported data (async)
  observeEvent(input$dataImport, {
    if (ok.data.running()) return(NULL)
    ## Save nrows to conf file on change
    if (!is.numeric(input$dataNrows) | !is.numeric(input$dataSkipRows)) return(NULL)
    values$conf$dataNrows <- input$dataNrows
    values$conf$dataSkipRows <- input$dataSkipRows
    if (!is.null(values$conf$projectid))
      writeLines(RJSONIO::toJSON(values$conf), paste0(
        "tigger_", values$conf$projectid, ".json"))
    initDataTrigger(initDataTrigger() + 1)
  })


  ## Read data event
  observeEvent(initDataTrigger(), {
    if (is.null(values$conf)) {
      ok.data(NULL)
      return(NULL)
    }
    if (initDataTrigger() == 0) return(NULL)
    if (verbose)
      cat("DEBUG enter ok.data\n")
    if (verbose)
      cat("DEBUG enter ok.data trigger", initDataTrigger(), "\n")
    ok.data.running(TRUE)

    ## Prepare variables for async launch
    coco <- values$conf
    coco$python_ok <- values$python_ok

    da_token <- session$token
    data_status <- paste0("tigger_", da_token, "_data")

    file_orig_colnames <- paste0(data_status, "_origcols")
    file_colstok <- paste0(data_status, "_tok")
    file_colsft <- paste0(data_status, "_ft")
    file_colssb <- paste0(data_status, "_sb")
    file_colsbertpred <- paste0(data_status, "_bertpred")
    da_missing <- paste0(data_status, "_bertpred_missing")

    projpath <- paste0(coco$datadir, coco$projectid)
    if (!file.exists(projpath))
      dir.create(projpath)

    ## Read data: async launch
    data_async <- future({
      #################
      ## Main data file

      set_status(data_status, "Importing main data file...")
      res <- arrow::read_feather(
        paste0(coco$datadir, coco$dataname, ".feather"))
      if (is.null(coco$dataNrows)) return(NULL)
      rowselect <-
        intersect(1:nrow(res), (1:(coco$dataNrows)) + coco$dataSkipRows)
      if (!length(rowselect)) return(NULL)
      res <- res[rowselect, , drop = FALSE]

      writeLines(colnames(res), file_orig_colnames)


      #################
      ## Tokenized text

      ## Import data : feather of two cols, one the id, other the tokenized
      set_status(data_status, "Importing tokenized text...")
      file_tok <- paste0(coco$datadir, coco$dataname, "_spa.feather")
      if (file.exists(file_tok)) {
        tokdat <- arrow::read_feather(file_tok)
      } else
        tokdat <- NULL
      tok_remaining <- res[[coco$idcol]]
      tok_remaining <-
        tok_remaining[! tok_remaining %in% tokdat[[coco$idcol]]]

      ## Tokenize remaining texts
      if (length(tok_remaining)) {
        if (!coco$python_ok | !coco$use_spacy) {
          ## If remaining texts to tokenize but no python, fall back on
          ## untokenized text
          ## TODO here use quanteda tokenizer with options?
          tokdat <- data.frame(
            res[[coco$idcol]],
            do.call(paste, c(res[match(tok_remaining, res[[coco$idcol]]),
                                 coco$textcols, drop = FALSE], sep = "\n\n")))
          colnames(tokdat) <- c(coco$idcol, "text_spa")
          set_status(data_status, "Importing tokenized text: no spacy ok")
        } else {
          ## If python present, tokenize with spacy
          set_status(data_status,
                     paste("Tokenizing", length(tok_remaining), "new texts"))
          tmpfile <- paste0("tigger_tok_", da_token)
          arrow::write_feather(
            data.frame(
              id = tok_remaining,
              text = do.call(
                paste, c(res[match(tok_remaining, res[[coco$idcol]]),
                             coco$textcols, drop = FALSE], sep = "\n\n"))),
            paste0(tmpfile, ".feather")
          )
          system(paste0(
            coco$python, " tokenize_spacy.py -d ", tmpfile, ".feather",
            " -m ", coco$use_spacy_model))
          tok_new <- arrow::read_feather(paste0(tmpfile, "_spa.feather"))
          colnames(tok_new)[colnames(tok_new) == "id"] <- coco$idcol
          tokdat <- rbind(tokdat, tok_new)
          arrow::write_feather(tokdat, file_tok)
          system(paste0("rm ", tmpfile, "*"))
          set_status(data_status, "Importing tokenized text: spacy ok")
        }
      }

      ## Save colname for tokenized text
      tmp_cols_tok <- colnames(tokdat)[colnames(tokdat) != coco$idcol]
      writeLines(tmp_cols_tok, file_colstok)

      set_status(data_status, "Importing tokenized text: wrote colnames")

      ## Merge with current data
      res <- merge(res, tokdat, by = coco$idcol,
                   all.y = FALSE, sort = FALSE)
      rm(tokdat)

      set_status(data_status, "Importing tokenized text: merged")

      #################
      ## FastText embeddings

      set_status(data_status, paste("Loading fastText embeddings"))

      ## FastText embeddings : feather file with id and embeddings
      file_ft <- paste0(coco$datadir, coco$dataname, "_ft.feather")
      if (file.exists(file_ft)) {
        ftdat <- arrow::read_feather(file_ft)
      } else
        ftdat <- NULL
      ft_remaining <- res[[coco$idcol]]
      ft_remaining <-
        ft_remaining[! ft_remaining %in% ftdat[[coco$idcol]]]

      ## Embed remaining texts, if FT in use
      if (length(ft_remaining)) {
        if (!coco$python_ok | !coco$use_ft) {
          ## If no python or FT, abort if not all rows already embedded
          ftdat <- NULL
        } else {
          set_status(data_status, paste(
            "Embedding", length(ft_remaining), "new texts with fastText"))
          tmpfile <- paste0("tigger_ft_", da_token)
          arrow::write_feather(
            data.frame(
              id = ft_remaining,
              text = do.call(
                paste, res[match(ft_remaining, res[[coco$idcol]]),
                           tmp_cols_tok, drop = FALSE])),
            paste0(tmpfile, ".feather")
          )
          system(paste0(
            coco$python, " embed_fasttext.py -d ", tmpfile, ".feather",
            " -m ", coco$use_ft_model))
          ft_new <- arrow::read_feather(paste0(tmpfile, "_ft.feather"))
          colnames(ft_new)[colnames(ft_new) == "id"] <- coco$idcol
          ftdat <- rbind(ftdat, ft_new)
          arrow::write_feather(ftdat, file_ft)
          system(paste0("rm ", tmpfile, "*"))
        }
      }

      if (!is.null(ftdat)) {
        ## Merge with current data
        writeLines(colnames(ftdat)[colnames(ftdat) != coco$idcol], file_colsft)
        res <- merge(res, ftdat, by = coco$idcol, all.y = FALSE, sort = FALSE)
        rm(ftdat)
      }


      #################
      ## SBERT embeddings

      set_status(data_status, "Loading SBERT embeddings")

      ## SBERT file : feather of id and embeddings
      file_sb <- paste0(coco$datadir, coco$dataname, "_sb.feather")
      if (file.exists(file_sb)) {
        sbdat <- arrow::read_feather(file_sb)
      }else
        sbdat <- NULL
      sb_remaining <- res[[coco$idcol]]
      sb_remaining <-
        sb_remaining[! sb_remaining %in% sbdat[[coco$idcol]]]

      ## Embed remaining texts
      if (length(sb_remaining)) {
        if (!coco$python_ok | !coco$use_sb) {
          ## If no python or SB, abort if not all rows already embedded
          sbdat <- NULL
        } else {
          set_status(data_status, paste(
            "Embedding", length(sb_remaining), "new texts with SBERT"))
          tmpfile <- paste0("tigger_sb_", da_token)
          arrow::write_feather(
            data.frame(
              id = sb_remaining,
              text = do.call(
                paste, res[match(sb_remaining, res[[coco$idcol]]),
                           coco$textcols, drop = FALSE])),
            paste0(tmpfile, ".feather")
          )
          system(paste0(
            coco$python, " embed_sbert.py -d ", tmpfile, ".feather",
            " -m ", coco$use_sb_model))
          sb_new <- arrow::read_feather(paste0(tmpfile, "_sb.feather"))
          colnames(sb_new)[colnames(sb_new) == "id"] <- coco$idcol
          sbdat <- rbind(sbdat, sb_new)
          arrow::write_feather(sbdat, file_sb)
          system(paste0("rm ", tmpfile, "*"))
        }
      }

      if (!is.null(sbdat)) {
        ## Merge with current data
        writeLines(colnames(sbdat)[colnames(sbdat) != coco$idcol], file_colssb)
        res <- merge(res, sbdat, by = coco$idcol, all.y = FALSE, sort = FALSE)
        rm(sbdat)
      }

      #################
      ## BERT predictions

      set_status(data_status, "Loading BERT predictions")

      ## BERT predictions : feathers of id and probs
      ## TODO: take full prediction if existing

      files_pred <- dir(
        coco$datadir, paste0("^", coco$dataname, "_bertpred_.*[.]feather$"))
      # files_predall <- dir(
      #   coco$datadir, paste0("^", coco$dataname, "_bertpredall_.*[.]feather$"))
      # files_pred <- files_pred[
      #   ! gsub("_bertpred_", "_bertpredall_", files_pred) %in% files_predall]
      # files_pred <- c(files_predall, files_pred)

      tmp_bertpred_cols <- list()
      if (length(files_pred)) {
        for (ipred in files_pred) {
          preddat <- arrow::read_feather(paste0(coco$datadir, ipred))
          predname <-
            gsub(paste0("^", coco$dataname, "_bertpred_(.*)[.]feather$"),
                 "\\1", ipred)
          # predname <-
          #   gsub(paste0("^", coco$dataname, "_bertpred(all)?_(.*)[.]feather$"),
          #        "\\2", ipred, perl = TRUE)
          n_missing <- sum(! res[[coco$idcol]] %in% preddat[[coco$idcol]])
          if (n_missing) {
            write(
              paste0("BERT prediction ", predname, " : ", n_missing,
                     " texts missing in prediction."),
              da_missing, append = TRUE)
          } else {
            colnames(preddat) <- gsub(
              "^bertpred_", paste0("bertpred_", predname), colnames(preddat))
            tmp_bertpred_cols[[predname]] <-
              colnames(preddat)[colnames(preddat) != coco$idcol]
            res <- merge(res, preddat, by = coco$idcol,
                         all.y = FALSE, sort = FALSE)
          }
          rm(preddat)
        }
        if (length(tmp_bertpred_cols)) {
          writeLines(names(tmp_bertpred_cols), file_colsbertpred)
          lapply(names(tmp_bertpred_cols), function(x) {
            writeLines(tmp_bertpred_cols[[x]], paste0(file_colsbertpred, "_", x))
          })
        }
      }

      ## Comment col
      set_status(data_status, "Loading comments")
      tmpfile <- paste0(projpath, "/", coco$commcol, ".csv")
      if (file.exists(tmpfile)) {
        tmpcol <- read.csv(tmpfile)
        colnames(tmpcol)[2] <- coco$commcol
        ## Remove duplicates, keep newest
        tmpcol <- tmpcol[nrow(tmpcol):1, , drop = FALSE]
        tmpcol <- tmpcol[!duplicated(tmpcol[, 1]), , drop = FALSE]
        tmpcol <- tmpcol[nrow(tmpcol):1, , drop = FALSE]
        res <- merge(res, tmpcol, by = coco$idcol,
                     all.x = TRUE, all.y = FALSE, sort = FALSE)
        res[[coco$commcol]][is.na(res[[coco$commcol]])] <- ""
      } else
        res[[coco$commcol]] <- ""

      # ## Tagging cols
      # set_status(data_status, "Loading tags")
      # for (itag in coco$tagcols) {
      #   tmpfile <- paste0(projpath, "/", itag, ".csv")
      #   if (file.exists(tmpfile)) {
      #     tmpcol <- read.csv(tmpfile)
      #     tmpcol[[paste0("hist_", itag)]] <- 1:nrow(tmpcol)
      #     ## Remove duplicates, keep newest
      #     tmpcol <- tmpcol[nrow(tmpcol):1, ]
      #     tmpcol <- tmpcol[!duplicated(tmpcol[, 1]), ]
      #     tmpcol <- tmpcol[nrow(tmpcol):1, ]
      #     if (verbose)
      #       cat("DEBUG imported ", itag, nrow(tmpcol), "\n")
      #     res <- merge(res, tmpcol, by = coco$idcol,
      #                  all.x = TRUE, all.y = FALSE, sort = FALSE)
      #   } else {
      #     res[[itag]] <- NA
      #     res[[paste0("hist_", itag)]] <- NA
      #   }
      # }
      ## Tagging cols
      set_status(data_status, "Loading tags")
      for (itag in coco$tagcols) {
        itaghist <- paste0("hist_", itag)
        if (is.null(res[[itag]]) | all(is.na(res[[itag]]))) {
          res[[itag]] <- NA
          res[[itaghist]] <- NA
        } else {
          res[[itaghist]][!is.na(res[[itag]])] <- 1:sum(!is.na(res[[itag]]))
        }
        tmpfile <- paste0(projpath, "/", itag, ".csv")
        if (file.exists(tmpfile)) {
          tmpcol <- read.csv(tmpfile)
          tmpcol[[itaghist]] <- 1:nrow(tmpcol) +
            max(0, suppressWarnings(max(na.omit(res[[itaghist]]))))
          ## Remove duplicates, keep newest
          tmpcol <- tmpcol[nrow(tmpcol):1, ]
          tmpcol <- tmpcol[!duplicated(tmpcol[, 1]), ]
          tmpcol <- tmpcol[nrow(tmpcol):1, ]
          tmpcol <- tmpcol[tmpcol[[coco$idcol]] %in% res[[coco$idcol]], ]
          if (verbose)
            cat("DEBUG imported", itag, nrow(tmpcol), "\n")
          # res <- merge(res, tmpcol, by = coco$idcol,
          #              all.x = TRUE, all.y = FALSE, sort = FALSE)
          if (nrow(tmpcol)) {
            idmatches <- match(tmpcol[[coco$idcol]], res[[coco$idcol]])
            # idmatches <- idmatches[!is.na(idmatches)]
            res[[itag]][idmatches] <- tmpcol[[itag]]
            res[[itaghist]][idmatches] <- tmpcol[[itaghist]]
          }
        }
      }

      set_status(data_status, "Finalizing data import")
      res
    }) %...>% ok.data.async()
    data_async <- catch(data_async, function(error) {
      values$modelDiagno <- paste0("Error in data import: ", error)
    })
    data_async <- finally(data_async, function() {
      #################
      ## Finalize data importation
      res <- ok.data.async()
      if (is.null(res)) return(NULL)

      set_status(data_status, "Finalizing data import: read columns")
      ## Read column (tok, ft, sb, ...) names from dedicated files
      values$tagcols <- coco$tagcols
      values$cols_tok <- readLines(file_colstok)
      values$cols_ft <- NULL
      if (file.exists(file_colsft)) {
        values$cols_ft <- readLines(file_colsft)
      }
      values$cols_sb <- NULL
      if (file.exists(file_colssb)) {
        values$cols_sb <- readLines(file_colssb)
      }
      set_status(data_status, "Finalizing data import: bertpred")
      if (file.exists(file_colsbertpred)) {
        tmp_bertpred <- readLines(file_colsbertpred)
        values$cols_bertpred <- lapply(tmp_bertpred, function(x)
          readLines(paste0(file_colsbertpred, "_", x)))
        names(values$cols_bertpred) <- tmp_bertpred
      } else
        values$cols_bertpred <- NULL
      set_status(data_status, "Finalizing data import: bertpred missing")
      if (file.exists(da_missing)) {
        lapply(readLines(da_missing), function(x)
          showNotification(paste(
            x, "\n",
            "You may predict on current text in BERT panel, ",
            "and then reload the data.")))
      }

      ## Populate regressors list in prediction panel
      set_status(data_status, "Finalizing data import: populating regressors")
      regr_choices <- c(
        "Regex" = "regex",
        "Word counts (DFM)" = "dfm")
      if (!is.null(values$cols_ft))
        regr_choices <- c(regr_choices, "Word embeddings (FastText)" = "ft")
      if (!is.null(values$cols_sb))
        regr_choices <- c(regr_choices, "Sentence embeddings (SBERT)" = "sb")
      regr_choices <- c(regr_choices, "Extra predictors" = "extra")

      if (length(values$cols_bertpred)) {
        tmp_choices <- paste0("bertpred_", names(values$cols_bertpred))
        names(tmp_choices) <- paste0("BERT_", names(values$cols_bertpred))
        regr_choices <- c(regr_choices, tmp_choices)
      }

      # if (length(regr_choices) > 3) {
        regr_choices_sel <-
          regr_choices[regr_choices %in% c("regex", "ft", "sb")]
      # } else
      #   regr_choices_sel <- c("regex", "dfm")

      updateSelectizeInput(
        session, "use_regressors", choices = regr_choices,
        selected = regr_choices_sel)
      values$new_use_regressors <- regr_choices_sel

      othercols <- colnames(res)
      othercols <- othercols[! othercols %in% c(
        values$cols_tok, values$cols_ft, values$cols_sb, values$cols_bertpred,
        values$tagcols, paste0("hist_", values$tagcols),
        coco$commcol, coco$textcols)]
      updateSelectInput(
        session, "dlTagSelect",
        choices = c("tags", "comments", "predictions", othercols),
        selected = c("tags", "comments", "predictions"))

      ## Populate extra regressors lists in prediction panel
      updateSelectizeInput(
        session, "use_ootregnum", choices = othercols, selected = NULL)
      updateSelectizeInput(
        session, "use_ootregcat", choices = othercols, selected = NULL)

      ## Project path
      values$projectdir <- paste0(projpath, "/")

      ## Create tagmat: data.frame of all current taggings
      values$tagmat <- res[, coco$tagcols, drop = FALSE]
      ## Create tagmathist: data.frame of historical order of taggings
      values$tagmathist <- res[, paste0("hist_", coco$tagcols), drop = FALSE]
      colnames(values$tagmathist) <- coco$tagcols

      ## Save data original colnames
      values$data_orig_colnames <- readLines(file_orig_colnames)

      ok.data(ok.data.async())
      ok.data.async(NULL)
      ok.data.running(FALSE)
      set_status(data_status, "Exit data async")
      system(paste0("rm ", data_status, "*"))
      values$modelDiagno <- "No model"


      if(trainTrigger() == 0) trainTrigger(1) ## For first run. TODO set trainTrigger to 0 when changing data?
      if (verbose) cat("DEBUG Exit data async finally\n")
    })
    NULL
  })

  ## Read data import status from file
  observe({
    if (ok.data.running()) {
      invalidateLater(100)
      tmpfile <- paste0("tigger_", session$token, "_data")
      if (file.exists(tmpfile)) {
        values$modelDiagno <- paste(collapse = "\n", scan(
          tmpfile, what = "character", sep="\n", quiet = TRUE))
      } else
        values$modelDiagno <- ""
    }
  })


  ###############
  ## ok.text() reactive
  ###############
  ok.text <- eventReactive(ok.data(), {
    if (is.null(ok.data())) return(NULL)
    res <- ok.data()[, values$cols_tok[1]]
    if (length(values$cols_tok) > 1) {
      for (icol in values$cols_tok[-1]) {
        res <- paste0(res, "\n\n", ok.data()[, icol])
      }
    }
    # queryTrigger(queryTrigger() + 1)
    if (verbose)
      cat("DEBUG ok.text", length(res), "\n")
    res
  })

  ok.labelcol <- reactive({
    if (is.null(ok.data())) return(NULL)
    if (verbose)
      # cat("DEBUG enter ok.labelcol\n")
    if (is.null(input$selectScheme)) return(values$conf$tagcols[1])
    input$selectScheme
  })


  ok.nicetext <- reactive({
    req(ok.data(), values$conf$textcols)
    # if (is.null(ok.data())) return(NULL)
    do.call(paste, c(ok.data()[, values$conf$textcols, drop = FALSE],
                     sep = "\n\n"))
  })

  ok.regressor.names <- reactive({
    daregs <- values$new_use_regressors
    if (verbose)
      cat("DEBUG enter ok.regressor.names ", paste(daregs, collapse = " "), "\n")
    res <- NULL
    if ("ft" %in% daregs)
      res <- c(res, values$cols_ft)
    if ("sb" %in% daregs)
      res <- c(res, values$cols_sb)
    if (length(values$cols_bertpred)) {
      for (ipred in 1:length(values$cols_bertpred)) {
        if (paste0("bertpred_", names(values$cols_bertpred)[ipred]) %in% daregs)
          res <- c(res, values$cols_bertpred[[ipred]])
      }
    }
    if (verbose)
      cat("DEBUG exit ok.regressor.names:", length(res), "\n")
    res
  })

  ok.contextCol <- reactive({
    if (verbose)
      cat("DEBUG enter ok.contextCol\n")
    # res <- isolate(values$conf$contextcols)
    res <- values$conf$contextcols
    names(res) <- res
    res
  })

  ## Initiate label and queries history, on ok.text() or labelcol change,
  observeEvent(list(ok.text(), ok.labelcol()), {
    if (verbose)
      cat("DEBUG initiate label\n")
    if (is.null(ok.labelcol())) {
      values$label <- NULL
      values$labelhist <- NULL
      values$queries <- NULL
      values$comment <- NULL
      values$wasTagged <- NULL
      values$wasRetagged <- NULL
    } else {
      values$label <- values$tagmat[, ok.labelcol()]
      values$labelhist <- values$tagmathist[, ok.labelcol()]
      values$queries <- which(!is.na(values$label))
      values$comment <- ok.data()[[values$conf$commcol]]
      values$wasTagged <- !is.na(values$label)
      values$wasRetagged <- rep(FALSE, length(values$label))
    }
  })

  ## On values$label change, values$tagmat is updated for separate history
  observeEvent(values$label, {
    if (is.null(values$label)) return(NULL)
    if (verbose)
      cat("DEBUG Update tagmat from label\n")
    values$tagmat[, ok.labelcol()] <- values$label
    values$tagmathist[, ok.labelcol()] <- values$labelhist
  })


  ##############################################################################
  ## Reactives : training
  ##############################################################################

  ## Message of how many tagged and untagged, for model diagnostics box
  ok.msgtagged <- reactive({
    if (verbose)
      cat("DEBUG enter ok.msgtagged\n")
    # if (is.null(ok.data()) | ok.data.running()) {
    if (is.null(ok.data())) {
      return("No data yet ")
    }

    tmp <- ""
    # if (sum(is.na(values$label)) == 0)
    #   tmp <- paste0(tmp, "Data correction mode (0 NA labels left).\n")
    paste0(tmp, "Tagged: ", sum(!is.na(values$label)),
           " ; Untagged: ", sum(is.na(values$label)))
  })

  ## Training possible? At least 3 texts for 2 tags
  ok.train.possible <- reactive({
    if (verbose)
      cat("DEBUG enter ok.train.possible\n")
    labtab <- table(values$label)
    if (length(labtab) < 2) return(FALSE)
    if (sum(labtab > 2) < 2) return(FALSE)
    TRUE
  })

  ## Compute DFM (async)
  ## TODO make this less reactive
  observe({
    # if ("dfm" %in% input$use_regressors & !values$dfmGo)
    if ("dfm" %in% values$new_use_regressors & !values$dfmGo)
      values$dfmGo <- TRUE
  })
  ok.dfm <- reactiveVal()
  observe({
    if (is.null(ok.text())) return(NULL)
    if (! values$dfmGo) return(NULL)
    if (!is.numeric(input$dfmNgrams)) return(NULL)
    if (verbose)
      cat("DEBUG enter dfm observer\n")

    datext <- ok.text()
    dfmNgrams <- input$dfmNgrams
    dfmTfIdf <- input$dfmTfIdf
    dfmMinDocfreq <- input$dfmMinDocfreq
    dfmMinTermfreq <- input$dfmMinTermfreq
    dfmTfScheme <- input$dfmTfScheme
    dfmDfScheme <- input$dfmDfScheme

    dfm_async <- future(seed = TRUE, {
      dadfm <- datext %>%
        tolower() %>% tokens(remove_punct = F) %>%
        # tokens_select(pattern = stopwords(language= "fr"), selection = "remove") %>%
        tokens_ngrams(n= 1:dfmNgrams) %>% dfm() %>%
        dfm_trim(min_docfreq = dfmMinDocfreq, min_termfreq = dfmMinTermfreq,
                 verbose = TRUE)
      if (dfmTfIdf) {
        dadfm <- dadfm %>%
          dfm_tfidf(scheme_tf = dfmTfScheme, scheme_df = dfmDfScheme)
      }
      suppressWarnings({ # TODO : check
        dadfm <- quanteda::convert(dadfm, to= "tm")
      })
      dadfm <- Matrix::sparseMatrix(i= dadfm$i, j= dadfm$j, x= dadfm$v,
                                    dimnames = dadfm$dimnames)
      dadfm
    }) %...>% ok.dfm()
    dfm_async <- catch(dfm_async, function(error) {
      values$modelDiagno <- paste0("Error in DFM: ", error)
    })
  })

  ## Predictor : df or matrix of dfm and extra
  ok.regressor.matrix <- reactive({
    if (is.null(ok.text())) return(NULL)
    if (is.null(ok.regressor.names())) return(NULL)
    if (verbose)
      cat("DEBUG enter ok.regressor.matrix\n")
    model.matrix(~., data= ok.data()[, ok.regressor.names()])[, -1]
  })

  ok.extraregressor.matrix <- reactive({
    req(ok.text())
    if (! "extra" %in% values$new_use_regressors) return(NULL)
    if (verbose)
      cat("DEBUG enter ok.extraregressor.matrix\n")

    res <- NULL
    for (inum in input$use_ootregnum) {
      suppressWarnings({tmp <- as.numeric(ok.data()[[inum]])})
      if (all(is.na(tmp))) {
        showNotification(paste0(inum, " is all NAs or non-numeric ; predictor not used"),
                         duration = 5, type = "warning")
        next
      }
      tmp[is.na(tmp)] <- median(tmp, na.rm = TRUE)
      res <- cbind(res, tmp)
    }
    for (icat in input$use_ootregcat) {
      tmp <- ok.data()[[icat]]
      if (! length(unique(tmp)) %in% 2:100) {
        showNotification(paste0(icat, " has ", length(unique(tmp)),
                                " unique values ; predictor not used"),
                         duration = 5, type = "warning")
        next
      }
      tmp <- as.character(tmp)
      tmp[is.na(tmp)] <- "<NA>"
      tmp <- as.factor(tmp)
      tmpmat <- model.matrix(~., data.frame(cat = tmp))[, -1, drop = FALSE]
      res <- cbind(res, tmpmat)
    }
    if (verbose)
      cat("DEBUG exit ok.extraregressor.matrix", ncol(res), "\n")
    res
  })

  ok.extraRegex.matrix <- reactive({
    if (is.null(ok.nicetext())) return(NULL)
    if (is.null(values$extraRegex)) return(NULL)
    if (is.null(values$regex_inuse)) return(NULL)
    if (!any(values$regex_inuse)) return(NULL)
    if (is.null(values$regex_case)) return(NULL)
    if (verbose)
      cat("DEBUG enter ok.extraRegex.matrix\n")
    tmp <- as.matrix(sapply(which(values$regex_inuse), function(iregex) {
      1 * stringi::stri_detect_regex(
        ok.nicetext(), values$extraRegex[iregex],
        opts_regex = list(case_insensitive = !values$regex_case[iregex]))
    }))
    colnames(tmp) <- values$extraRegex[values$regex_inuse]
    tmp
  })

  ok.predictor <- reactive({
    req(values$new_use_regressors)
    if (ok.data.running()) return(NULL)
    if (is.null(ok.text())) return(NULL)
    if (verbose)
      cat("DEBUG enter ok.predictor\n")
    dadfm <- NULL
    if ("dfm" %in% values$new_use_regressors) {
      dadfm <- ok.dfm()
    } else {
      if (input$predModel == "naive bayes") {
        values$modelDiagno <-
          "Error in training: Naive Bayes requires DFM word counts as predictors."
        return(NULL)
      }
    }
    if (length(ok.regressor.names()) > 0) {
      if (input$predModel == "random forest") {
        if (is.null(dadfm)) {
          dadfm <- ok.data()[, ok.regressor.names()]
        } else
          dadfm <- cbind(ok.data()[, ok.regressor.names()], as.matrix(dadfm))
      }
      if (input$predModel %in% c("lasso", "linear", "knn")) {
        dadfm <- cbind(ok.regressor.matrix(), dadfm)
      }
    }
    # if ("regex" %in% input$use_regressors && length(values$regex_inuse) > 0) {
    if ("regex" %in% values$new_use_regressors & !is.null(ok.extraRegex.matrix())) {
      dadfm <- cbind(dadfm, ok.extraRegex.matrix())
    }
    if ("extra" %in% values$new_use_regressors) {
      dadfm <- cbind(dadfm, ok.extraregressor.matrix())
    }

    if (is.null(dadfm)) {
      values$modelDiagno <- "Error in training: no predictors selected"
      if (verbose)
        cat("DEBUG ok.predictor exit: no predictors\n")
      return(NULL)
    }

    if (input$predModel == "linear")
      dadfm <- SparseM::as.matrix.csr(dadfm, nrow(dadfm), ncol(dadfm))
    if (input$predModel == "naive bayes")
      dadfm <- as.dfm(dadfm)
    if (verbose)
      cat("DEBUG exit ok.predictor: ", ncol(dadfm), "\n")
    dadfm
  })


  ## Train model async
  ok.train <- reactiveVal()
  ok.train.async <- reactiveVal()
  observeEvent(
    list(trainTrigger(), input$modelTrain),
    {
      if (is.null(ok.predictor())) return(NULL)
      if (verbose)
        cat("DEBUG enter ok.train observer\n")
      if (!ok.train.possible()) {
        values$modelDiagno <-
          "Tag at least 3 texts for 2 tags to train prediction model."
        ok.train(NULL)
        # queryTrigger(queryTrigger() + 1)
        return(NULL)
      }
      ready2tag(0)
      values$modelDiagno <- paste0("Training ", input$predModel)

      tmp_model <- input$predModel
      if (tmp_model == "random forest") {
        if (!is.numeric(input$rfNumTrees) | !is.numeric(input$rfMtry) |
            !is.numeric(input$rfSampleFrac)) {
          ok.train(NULL)
          return(NULL)
        }
        train_fun <- ranger::ranger
        train_args <- list(
          x= ok.predictor()[!is.na(values$label), ],
          y= as.factor(values$label[!is.na(values$label)]),
          num.trees = input$rfNumTrees,
          mtry = if (input$rfMtry > 0) input$rfMtry,
          sample.fraction = input$rfSampleFrac, probability = TRUE)
      } else {
        if (tmp_model == "linear") {
          tmpvalues <- values$label
          if (!is.numeric(input$liblinCost)) {
            ok.train(NULL)
            return(NULL)
          }
          for (ival in na.omit(unique(tmpvalues)))
            if (sum(tmpvalues == ival, na.rm = T) == 1)
              tmpvalues[which(tmpvalues == ival)] <- NA
          train_fun <- LiblineaR::LiblineaR
          train_args <- list(
            data= ok.predictor()[!is.na(tmpvalues), ],
            target= as.factor(tmpvalues[!is.na(tmpvalues)]),
            type= 0, cost = input$liblinCost)
        } else if (tmp_model == "lasso") {
          tmpvalues <- values$label
          for (ival in na.omit(unique(tmpvalues)))
            if (sum(tmpvalues == ival, na.rm = T) == 1)
              tmpvalues[which(tmpvalues == ival)] <- NA
          train_fun <- glmnet::glmnet
          train_args <- list(
            x= ok.predictor()[!is.na(tmpvalues), ],
            y= as.factor(tmpvalues[!is.na(tmpvalues)]),
            family= "multinomial")
        } else if (tmp_model == "naive bayes") {
          if (!is.numeric(input$naiveSmooth)) {
            ok.train(NULL)
            return(NULL)
          }
          tmpvalues <- values$label
          for (ival in na.omit(unique(tmpvalues)))
            if (sum(tmpvalues == ival, na.rm = T) == 1)
              tmpvalues[which(tmpvalues == ival)] <- NA
          train_fun <- quanteda.textmodels::textmodel_nb
          train_args <- list(
            x= ok.predictor()[!is.na(tmpvalues), ],
            y= as.factor(tmpvalues[!is.na(tmpvalues)]),
            smooth = input$naiveSmooth, prior = input$naivePrior,
            distribution = input$naiveDistri)
        } else if (tmp_model == "knn") {
          train_fun <- function(x) return(TRUE)
          train_args <- list(x = "knn")
        }
      }

      train_async <- future(seed = TRUE, {
        suppressWarnings(do.call(train_fun, train_args))
      }) %...>% ok.train.async()
      train_async <- catch(train_async, function(error) {
        values$modelDiagno <- paste0("Model training error: ", error)
        ok.train(NULL)
        ok.train.async(NULL)
      })
      train_async <- finally(train_async, function() {
        values$modelDiagno <- "Training done, predicting..."
        ok.train(ok.train.async())
        ok.train.async(NULL)
        predTrigger(predTrigger() + 1)
      })
      NULL
    }
  )

  ## Model prediction on full dataset (async)
  ok.pred <- reactiveVal()
  ok.pred.async <- reactiveVal()
  # observeEvent(list(ok.train(), input$glmLambda, input$knnK, values$retagged), {
  observeEvent(predTrigger(), {
    if (is.null(ok.train())) {
      ok.pred(NULL)
      return(NULL)
    }
    if (verbose)
      cat("DEBUG ok.pred()\n")

    damodel <- input$predModel
    if (input$predModel == "random forest") {
      pred_args <- list(ok.train(), data = ok.predictor())
      dacolnames <- colnames(ok.train()$predictions)
    } else if (input$predModel == "lasso") {
      if (!is.numeric(input$glmLambda)) {
        ok.pred(NULL)
        return(NULL)
      }
      pred_args <- list(ok.train(), newx = ok.predictor(), s = input$glmLambda,
                        type = "response")
    } else if (input$predModel == "linear") {
      pred_args <- list(ok.train(), newx = ok.predictor(), proba = TRUE)
    } else if (input$predModel == "naive bayes") {
      pred_args <- list(ok.train(), newdata = ok.predictor(),
                        type = "probability")
    } else if (input$predModel == "knn") {
      if (!is.numeric(input$knnK)) {
        ok.pred(NULL)
        return(NULL)
      }
      pred_args <- list(train = ok.predictor()[!is.na(values$label), ],
                        test = ok.predictor(),
                        cl = values$label[!is.na(values$label)],
                        k = input$knnK, prob = TRUE)
    }

    pred_async <- future(seed = TRUE, {
      if (damodel == "random forest") {
        res <- do.call(predict, pred_args)$predictions
        colnames(res) <- dacolnames
        res
      } else if (damodel == "lasso") {
        do.call(predict, pred_args)[, , 1]
      } else if (damodel == "linear") {
        do.call(predict, pred_args)$probabilities
      } else if (damodel == "naive bayes") {
        do.call(predict, pred_args)
      } else if (damodel == "knn") {
        tmp <- do.call(class::knn, pred_args)
        ## A bit flaky here: probs of non-best classes uniformly share the rest,
        ## and distance is euc
        tmpvalues <- sort(unique(pred_args$cl))
        res <- matrix(0, nrow(pred_args$test), length(tmpvalues),
                      dimnames = list(NULL, tmpvalues))
        res[cbind(1:nrow(res), tmp)] <- attr(tmp, "prob") -
          ((1 - attr(tmp, "prob")) / (ncol(res) - 1))
        res <- res + (1 - attr(tmp, "prob")) / (ncol(res) - 1)
        res
      }
    }) %...>% ok.pred.async()
    pred_async <- catch(pred_async, function(error) {
      values$modelDiagno <- paste0("Error in prediction: ", error)
    })
    pred_async <- finally(pred_async, function() {
      if (verbose)
        cat("DEBUG ok.pred() out of async\n")
      ok.pred(ok.pred.async())
      ok.pred.async(NULL)
      # predlabTrigger(predlabTrigger() + 1)
      diagTrigger(diagTrigger() + 1)
      queryNext(queryNext() + 1)
    })
  })

  ## Best predicted label for full dataset
  # ok.predlab1 <- eventReactive(predlabTrigger(), {
  #   if (is.null(ok.pred())) return(NULL)
  #   if (verbose)
  #     cat("DEBUG ok.predlab1\n")
  #   # queryTrigger(queryTrigger() + 1)
  #   colnames(ok.pred())[max.col(ok.pred())]
  # })
  ok.predlab1 <- reactive({
    if (is.null(ok.pred())) return(NULL)
    if (verbose)
      cat("DEBUG ok.predlab1\n")
    colnames(ok.pred())[max.col(ok.pred())]
  })

  ## On scheme change, forget existing model, queue, query and hist
  ## load scheme description and predictor-regex
  observeEvent(ok.labelcol(), {
    if (verbose)
      cat("DEBUG event selectScheme", ok.labelcol(), "\n")
    ok.pred(NULL)
    ok.train(NULL)
    values$queryqueue <- NULL
    values$modelDiagno <- "No model"
    values$histSaveStack <- NULL

    ## Load description file
    descr <- ""
    if (file.exists(paste0(values$conf$datadir, values$conf$projectid,
                           "/", input$selectScheme, ".txt"))) {
      descr <- paste(collapse = "\n", readLines(paste0(
        values$conf$datadir, values$conf$projectid,
        "/", input$selectScheme, ".txt")))
    }
    updateTextAreaInput(session, "schemeDescr", value = descr)

    ## Load regex from json
    regexfile <- paste0(values$projectdir, input$selectScheme, "_regex.json")
    if (file.exists(regexfile)) {
      if (verbose)
        cat("DEBUG enter regex.json\n")
      tmp <- RJSONIO::fromJSON(paste(collapse = "\n", readLines(regexfile)))
      if (is.null(tmp$regex)) return(NULL)
      values$extraRegex <- tmp$regex
      names(values$extraRegex) <- sapply(tmp$regex, label2hash)
      for (iregex in 1:length(tmp$regex)) {
        values[[paste0("useregex_", names(values$extraRegex)[iregex])]] <-
          tmp$use[iregex]
        values[[paste0("caseregex_", names(values$extraRegex)[iregex])]] <-
          tmp$case[iregex]
      }
    } else
      values$extraRegex <- NULL

  })


  ## queryNext event: update values$newQuery ; called by queryNext(), or by
  ## queryQueue or regex change
  ## Requires :
  ## - queryQueue (reactive: indices of current queue), and
  ## - wasTagged (boolean vector indicating whether each element has been tagged this session, used in "on untagged")
  ## - wasRetagged (boolean vector indicating whether each element has been tagged this session, used in "on tagged")
  ## TODO: make a scheme-list of wasTagged, now it will be wiped at each scheme change?
  observeEvent(
    list(queryNext(), values$queryQueue,
         input$sampleChoice, input$taggedWhich, input$maxprobWhich,
         input$regexFilter, input$regexCaseSens,
         input$visuLock, values$visuZoom$xlim, values$visuZoom$ylim),
    {
      if (is.null(values$queryQueue)) return(NULL)
      if (verbose)
        cat("DEBUG queryNext\n")

      queue <- values$queryQueue

      if (input$sampleChoice == "untagged") {
        queue <- queue[!values$wasTagged[queue]]
      } else if (input$sampleChoice == "tagged") {
        queue <- queue[!is.na(values$label)[queue] & !values$wasRetagged[queue]]
        if (input$taggedWhich != "all") {
          queue <- queue[values$label[queue] == input$taggedWhich]
        }
      } else if (input$sampleChoice == "all") {
        queue <- queue[!values$wasRetagged[queue]]
      }

      if (input$visuLock) if (!is.null(values$visuZoom$xlim)) {
        if (!is.null(ok.visu()))
          queue <- queue[ok.visu()[queue, 1] >= values$visuZoom$xlim[1] &
                           ok.visu()[queue, 1] <= values$visuZoom$xlim[2] &
                           ok.visu()[queue, 2] >= values$visuZoom$ylim[1] &
                           ok.visu()[queue, 2] <= values$visuZoom$ylim[2]]
      }

      if (length(queue)) if (nchar(input$regexFilter)) {
        try_regex <- try(silent = TRUE, {
          queue <- queue[
            stringi::stri_detect_regex(
              ok.nicetext()[queue], input$regexFilter,
              opts_regex = list(case_insensitive = !input$regexCaseSens))]
        })
        if (inherits(try_regex, "try-error")) {
          showNotification("Invalid regex", duration = 2, type = "error")
          values$newQuery <- NULL
          return(NULL)
        }
      }

      if (!length(queue)) {
        if (nchar(input$regexFilter)) {
          showNotification("No texts matching the regex",
                           duration = 2, type = "warning")
        } else
          showNotification("No more texts to be tagged in this set",
                           duration = 2, type = "warning")
        values$newQuery <- NULL
        return(NULL)
      }

      values$lastQuery <- queue[1] # ??
      values$newQuery <- queue[1]
      ready2tag(1)
    }
  )

  ## queryQueue event: fix values$queryQueue, (indices of current queue)
  observeEvent(
    list(ok.data(), ok.pred(), input$strategy, input$maxprobWhich,
         input$selectProject, input$selectScheme), {
      if (input$strategy %in% c("entropy", "maxprob") & is.null(ok.pred())) {
        values$queryQueue <- NULL
        values$lastQuery <- NULL
        values$newQuery <- NULL
        return(NULL)
      }
      if (is.null(ok.text())) return(NULL)
      if (verbose)
        cat("DEBUG queryQueue change\n")

      if (input$strategy == "sequential") {
        values$queryQueue <- 1:length(ok.text())
      } else if (input$strategy == "random") {
        values$queryQueue <- sample(length(ok.text()))
      } else {
        dapred <- ok.pred()
        if (input$strategy == "entropy") {
          tominimize <- rowSums(dapred * log(dapred))
          if (any(dapred == 0)) {
            tominimize[dapred == 0] <- 0
          }
        }
        if (input$strategy == "maxprob") {
          if (! input$maxprobWhich %in% colnames(dapred)) return(NULL)
          tominimize <- -dapred[, input$maxprobWhich];
        }
        values$queryQueue <- order(tominimize)
      }
    }
  )


  ## Update uniqueLabels and uniqueLabelsAll if necessary on values$label change
  observeEvent(values$label, {
    if (is.null(values$label)) return(NULL)
    if (verbose)
      cat("DEBUG uniqueLabels\n")
    dalabs <- sort(na.omit(unique(values$label)))
    # if (!length(dalabs)) return(NULL)
    if (!identical(dalabs, values$uniqueLabels))
      values$uniqueLabels <- dalabs
    if (any(! dalabs %in% values$uniqueLabelsAll))
      values$uniqueLabelsAll <- c(values$uniqueLabelsAll,
                                  dalabs[! dalabs %in% values$uniqueLabelsAll])
  })


  ##############################################################################
  ## Project management
  ##############################################################################

  output$cpDataMessage <- renderUI({
    req(values$cpDataMessage)
    p(values$cpDataMessage)
  })

  output$cpConfirmMessage <- renderUI({
    req(values$cpConfirmMessage)
    p(strong(paste0("Error: ", values$cpConfirmMessage)))
  })

  output$cpIdcolUI <- renderUI({
    req(values$cpNewData)
    if (!is.data.frame(values$cpNewData)) return(NULL)
    selectInput("cpIdcol", NULL, colnames(values$cpNewData))
  })

  output$cpTextcolUI <- renderUI({
    req(values$cpNewData)
    if (!is.data.frame(values$cpNewData)) return(NULL)
    selectInput("cpTextcol", NULL, colnames(values$cpNewData), multiple = TRUE)
  })

  output$cpContextcolUI <- renderUI({
    req(values$cpNewData)
    if (!is.data.frame(values$cpNewData)) return(NULL)
    selectInput("cpContextcol", NULL, colnames(values$cpNewData), multiple = TRUE)
  })

  output$cpTagcolUI <- renderUI({
    req(values$cpNewData)
    if (!is.data.frame(values$cpNewData)) return(NULL)
    selectInput("cpTagcol", NULL, colnames(values$cpNewData), multiple = TRUE)
  })

  output$cpCommcolUI <- renderUI({
    req(values$cpNewData)
    if (!is.data.frame(values$cpNewData)) return(NULL)
    selectInput("cpCommcol", NULL, c("none", colnames(values$cpNewData)))
  })

  output$cp_spacyUI <- renderUI(
    textInput("cp_use_spacy_model", NULL,
              modelnames$spacy_name[modelnames$short == input$cp_lang],
                   placeholder = "(spacy model name)")
  )

  output$cp_spacyDlUI <- renderUI(HTML(paste(
    "<em>Model must be downloaded once, from python:</em><br>",
    input$cp_which_python, "-m spacy download",
    modelnames$spacy_name[modelnames$short == input$cp_lang], "<br><br>"
  )))

  output$cp_ftUI <- renderUI(
    textInput("cp_use_ft_model", NULL,
              modelnames$fasttext_name[modelnames$short == input$cp_lang],
              placeholder = "(fasttext model path)")
  )

  output$cp_ftDlUI <- renderUI(list(HTML(paste(
    "<em>Model can be downloaded <a href='",
    modelnames$fasttext_url[modelnames$short == input$cp_lang],
    "'>here</a></em>")),
    br(), br()))

  output$cp_sbUI <- renderUI(
    textInput("cp_use_sb_model", NULL, ifelse(
      input$cp_lang %in% c("ar", "zh", "nl", "en", "fr", "de",
                           "it", "ko", "pl", "pt", "ru", "es", "tr"),
      "distiluse-base-multilingual-cased-v1",
      "distiluse-base-multilingual-cased-v2"),
      placeholder = "(custom sentence_transformers model)")
  )


  ## Create new project, with modal dialog
  observeEvent(input$createProject, {
    if (verbose)
      cat("DEBUG create project\n")
    showModal(modalDialog(
      title = "New project",

      p("Create a new text tagging project."),
      p("This mainly involves choosing a dataset, and how to process it."),
      p("Items marked by a star ", strong("*"),
        " cannot be changed once the project is created."),

      hr(),
      h4("Files and folders"),

      fluidRow(
        column(3, p(strong("*Project name"))),
        column(5, textInput("cpName", NULL, placeholder = "(Required)")),
        column(4, p("Tags will be stored in a new directory by this name."))
      ),
      fluidRow(
        column(3, p(strong("*Data directory"))),
        column(5, textInput("cpDatadir", NULL, placeholder = "(Required)")),
        column(4, p("Place (on the server) where the data and project are stored"))
      ),
      fluidRow(
        column(3, p(strong("*Data filename"))),
        column(5, checkboxInput("cpDoImport", "New data", TRUE)),
        column(4, p("Main file, containing id and text columns"))
      ),

      conditionalPanel("!input.cpDoImport", list(
        ## Existing data
        fluidRow(
          column(3, p(strong("Existing file"))),
          column(5, textInput("cpDatafile", NULL, placeholder = "(Required)")),
          column(4, actionButton("cpDatafileGo", "Import"))
        )
      )),
      conditionalPanel("input.cpDoImport", {
        ## Import data
        wellPanel(
          selectInput('cpNDfiletype', NULL,
                      choices = c("Text file (.csv, .txt)" = "csv_txt",
                                  "Feather" = "feather",
                                  "Parquet" = "parquet",
                                  "OpenDocument (.ods)" = "ods",
                                  "Microsoft Excel (.xls, .xlsx)" = "excel",
                                  "SPSS (.sav, .por)" = "spss",
                                  "SAS (.sas7bdat)" = "sas_data",
                                  "Stata v5-v12 (.dta)" = "stata"),
                      selected = "csv_txt"
          ),
          fileInput('cpNDfile', 'Choose File'),

          checkboxInput("cpNDparams", "File parameters"),

          conditionalPanel(
            "input.cpNDparams",
            conditionalPanel(
              "input.cpNDfiletype == 'csv_txt'",

              fluidRow(column(4, p('Header')),
                       column(8, selectInput('cpCSVheader', NULL,
                                             c("Header" = TRUE,
                                               "No Header" = FALSE),
                                             "Auto"))),
              fluidRow(column(4, p('Separator')),
                       column(8, selectInput('cpCSVsep', NULL,
                                             c("Comma ','" = ",",
                                               "Semicolon ';'" = ";",
                                               "Tab" = "\t",
                                               "Space" = " "),
                                             "Auto"))),
              fluidRow(column(4, p('Quote')),
                       column(8, selectInput('cpCSVquote', NULL,
                                             c("Double Quote \"" = "\"",
                                               "Single Quote '" = "'",
                                               "None" = ""),
                                             "Double Quote \""))),
              fluidRow(column(4, p('Decimal mark')),
                       column(8, selectInput('cpCSVdec', NULL,
                                             c('Period "."' = ".",
                                               'Comma ","' = ","),
                                             'Period "."'))),
              fluidRow(column(4, p('File Encoding')),
                       column(8, selectInput('cpCSVencoding', NULL,
                                             c("unknown", "UTF-8", "Latin-1"),
                                             "unknown")))),

            ## Options for ods files
            conditionalPanel(
              "input.cpNDfiletype == 'ods'",
              fluidRow(column(4, p('Sheet')),
                       column(8, numericInput('cpODSsheet', NULL, 1, 1,
                                              step = 1))),
              checkboxInput("cpODScolnames", "Column names", TRUE),
              fluidRow(column(4, p('NA symbol')),
                       column(8, textInput('cpODSna', NULL, value = ""))),
              fluidRow(column(4, p('Skip rows')),
                       column(8, numericInput('cpODSskip', NULL, 0, 0,
                                              step = 1))),
              fluidRow(column(4, p('Specify range')),
                       column(8, textInput('cpODSrange', NULL, value = "")))
            ),

            ## Options for xls / xlsx files
            conditionalPanel(
              "input.cpNDfiletype == 'excel'",
              fluidRow(column(4, p('Column names')),
                       column(8, selectInput('cpXLScolnames', NULL,
                                             choices = c("Yes" = "TRUE",
                                                         "No" = "FALSE"),
                                             selected = TRUE ))),
              fluidRow(column(4, p('Skip rows')),
                       column(8, numericInput('cpXLSskip', NULL, 0, 0,
                                              step = 1))),
              fluidRow(column(4, p('Trim whitespaces')),
                       column(8, selectInput('cpXLStrim', NULL,
                                             choices = c("Yes" = "TRUE",
                                                         "No" = "FALSE"),
                                             selected = TRUE ))),
              checkboxInput("cpXLSsheet",
                            "Specify Worksheet", FALSE),
              conditionalPanel("input.cpXLSsheet",
                               textInput("cpXLSsheet_specs", NULL, "")),
              checkboxInput("cpXLSrange", "Specify Range", FALSE),
              conditionalPanel("input.cpXLSrange",
                               textInput("cpXLSrange_specs", NULL, "")
              )),

            ## Options for SPSS files
            conditionalPanel(
              "input.cpNDfiletype == 'spss'",
              fluidRow(
                column(4, p('Skip rows')),
                column(8, numericInput('cpSPSSskip', NULL, 0, 0, step = 1))),
              checkboxInput("cpSPSSna", "User-defined NA", FALSE)),

            ## Options for SAS files
            conditionalPanel(
              "input.cpNDfiletype == 'sas_data'",
              fluidRow(
                column(4, p('Skip rows')),
                column(8, numericInput("cpSASskip", NULL, 0, 0, step = 1))),
              checkboxInput("cpSAScatalog", "Use catalog file", FALSE),
              conditionalPanel(
                "input.cpSAScatalog",
                fileInput("cpSAScatalog_file", "Catalog file"))
            ),

            ## Options for STATA dta files
            conditionalPanel(
              "input.cpNDfiletype == 'stata'",
              checkboxInput("cpSTATAfactors", "Convert factors", FALSE))
          )
        )
      }),

      uiOutput("cpDataMessage"),
      hr(),
      h4("Data"),

      fluidRow(
        column(6, numericInput(
          "cpNrows", "N. rows for working sample", 500, 1, step = 1)),
        column(6, numericInput(
          "cpNskip", "Rows to skip before working sample", 0, 0, step = 1))
      ),

      fluidRow(
        column(3, p(strong("*ID"))),
        column(5, uiOutput("cpIdcolUI")),
        column(4, p("Name of the id variable, unique identifier of each text"))
      ),
      fluidRow(
        column(3, p(strong("*Text"))),
        column(5, uiOutput("cpTextcolUI")),
        column(4, p("Name of the text variables: if more than one, texts are concatenated in the specified order"))
      ),
      fluidRow(
        column(3, p(strong("*Tags"))),
        column(5, uiOutput("cpTagcolUI")),
        column(4, p("(optional) Names of variables that are already tagged: each will create a new scheme"))
      ),
      fluidRow(
        column(3, p(strong("Comments"))),
        column(5, uiOutput("cpCommcolUI")),
        column(4, p("(optional) Name of the comments variable"))
      ),
      fluidRow(
        column(3, p(strong("Context"))),
        column(5, uiOutput("cpContextcolUI")),
        column(4, p("(optional) Names of variables not used in the models, but may be displayed during tagging"))
      ),


      hr(),
      h4("System"),

      fluidRow(
        column(3, checkboxInput("cp_use_python", "Python backend", FALSE)),
        column(5, conditionalPanel(
          "input.cp_use_python",
          textInput("cp_which_python", NULL, value = "python3",
                    placeholder = "(custom python path)"))),
        column(4, conditionalPanel(
          "input.cp_use_python",
          p("This must be a working python3 environment, with the required modules installed (see documentation)")))
      ),
      conditionalPanel("input.cp_use_python", list(
        fluidRow(
          column(3, p("Language")),
          column(5, selectInput("cp_lang", NULL, modelnames_labels, "en")),
          column(4, p("Used to preset tokenization and embedding models"))
        ),
        fluidRow(
          column(3, checkboxInput("cp_use_spacy", "SpaCy tokenization", FALSE)),
          column(5, conditionalPanel("input.cp_use_spacy", uiOutput("cp_spacyUI"))),
          column(4, p("Name of the spacy tokenizer model, used in DTM and word embeddings"))
        ),
        conditionalPanel("input.cp_use_spacy", fluidRow(
          column(3),
          column(9, uiOutput("cp_spacyDlUI")))
        ),
        fluidRow(
          column(3, checkboxInput("cp_use_ft", "FastText word embeddings", FALSE)),
          column(5, conditionalPanel("input.cp_use_ft", uiOutput("cp_ftUI"))),
          column(4, p("Path to the local fasttext model binary"))
        ),
        conditionalPanel("input.cp_use_ft", fluidRow(
          column(3),
          column(9, uiOutput("cp_ftDlUI")))),
        fluidRow(
          column(3, checkboxInput("cp_use_sb", "SBERT sentence embeddings", FALSE)),
          column(5, conditionalPanel("input.cp_use_sb", uiOutput("cp_sbUI"))),
          column(4, p("(GPU recommended) Name or path of the sentence-transformers model"))
        ),

        conditionalPanel("input.cp_use_python", {
          checkboxInput("cp_use_gpu", "GPU support (CUDA, for SBERT and BERT)", FALSE)
        })
      )),

      uiOutput("cpConfirmMessage"),
      footer = tagList(actionButton("cpConfirm", "Create"),
                       modalButton("Cancel"))))
  })

  observeEvent(input$cpDatafileGo, {
    dataname <- input$cpDatafile
    dataname <- gsub("[.]feather$", "", dataname)
    tmpdat <- try(arrow::read_feather(
      paste0(gsub("/+$", "/", paste0(input$cpDatadir, "/")),
             dataname, ".feather")))
    if (inherits(tmpdat, "try-error")) {
      values$cpDataMessage <- paste0(
        "Data import error: ", as.character(tmpdat))
      values$cpNewData <- NULL
      return(NULL)
    }
    if (!is.data.frame(tmpdat)) {
      values$cpDataMessage <- "Data import error: object not a data.frame"
      values$cpNewData <- NULL
      return(NULL)
    }
    if (ncol(tmpdat) < 2) {
      values$cpDataMessage <- "Data import error: only one column detected"
      values$cpNewData <- NULL
      return(NULL)
    }

    values$cpDataMessage <- paste0(
      "Data import success: ", nrow(tmpdat), " rows, ", ncol(tmpdat), " cols.")
    values$cpNewData <- tmpdat
    values$cpNewDataname <- dataname

  })

  ## Data import mechanism
  observeEvent(input$cpNDfile, {
    req(input$cpNDfile)
    if(input$cpNDfiletype == "csv_txt") {
      res <- try(read.csv(
        input$cpNDfile$datapath,
        header = as.logical(input$cpCSVheader), sep = input$cpCSVsep,
        quote = input$cpCSVquote, dec = input$cpCSVdec, stringsAsFactors = FALSE,
        encoding = input$cpCSVencoding, check.names = TRUE))

    } else if(input$cpNDfiletype == "feather") {
        res <- try(arrow::read_feather(input$cpNDfile$datapath))

    } else if(input$cpNDfiletype == "parquet") {
      res <- try(arrow::read_parquet(input$cpNDfile$datapath))

    } else if (input$cpNDfiletype == "ods") {
      range <- input$cpODSrange
      if (range == "") range <- NULL
      res <- try(readODS::read_ods(
        path = input$cpNDfile$datapath, sheet = input$cpODSsheet,
        col_names = input$cpODScolnames, na = input$cpODSna,
        skip = input$cpODSskip, range = input$cpODSrange))

    } else if(input$cpNDfiletype == "excel"){
      column_names <- as.logical(input$cpXLScolnames)
      trim_spaces <- as.logical(input$cpXLStrim)
      the.range <- NULL
      if(input$cpXLSrange == TRUE & input$cpXLSrange_specs != "")
        the.range <- input$cpXLSrange_specs
      the.sheet <- NULL
      if(input$cpXLSsheet == TRUE & input$cpXLSsheet_specs != "")
        the.sheet <- input$cpXLSsheet_specs
      res <- try(data.frame(readxl::read_excel(
        dataFile$datapath, col_names = column_names, range = the.range,
        sheet= the.sheet, trim_ws = trim_spaces, skip = rows_to_skip)))

    } else if(input$cpNDfiletype == "spss"){
      res <- try(data.frame(haven::read_spss(
        file = input$cpNDfile$datapath,
        skip= input$cpSPSSskip, user_na = input$cpSPSSna)))
      if (!inherits(res, "try-error")) {
        res <- data.frame(lapply(res, function(x) {
          attr(x, "format.spss") <- NULL
          if ("haven_labelled" %in% class(x))
            x <- haven::as_factor(x, levels= "labels")
          x
        }))
      }

    } else if(input$cpNDfiletype == "sas_data") {
      res <- try(data.frame(haven::read_sas(
        data_file = input$cpNDfile$datapath,
        catalog_file = input$cpSAScatalog_file$datapath,
        skip = input$cpSASskip)))

    } else if(input$cpNDfiletype == "stata") {
      res <- try(data.frame(foreign::read.dta(
        file = input$cpNDfile$datapath, convert.factors = input$cpSTATAfactors)))
    }

    if(inherits(res, "try-error")) {
      values$cpDataMessage <- paste0("Data import error: ", res)
      values$cpNewData <- NULL
      return(NULL)
    }
    if (ncol(res) < 2) {
      values$cpDataMessage <- "Data import error: only one column detected"
      values$cpNewData <- NULL
      return(NULL)
    }
    values$cpDataMessage <- paste0(
      "Data import success: ", nrow(res), " rows, ", ncol(res), " columns.")
    values$cpNewData <- res
  })

  ## New project confirm
  observeEvent(input$cpConfirm, {
    if (verbose)
      cat("DEBUG cpConfirm\n")
    newconf <- list()

    tmpdir <- gsub("/+$", "/", paste0(input$cpDatadir, "/"))
    if (!dir.exists(tmpdir))
      dir.create(tmpdir)
    newconf$datadir <- tmpdir

    tmpname <- cleanFileName(input$cpName)
    if (!nchar(tmpname)) {
      values$cpConfirmMessage <- "Project name is required"
      return(NULL)
    }
    if (dir.exists(paste0(newconf$datadir, tmpname))) {
      values$cpConfirmMessage <- paste0(
        "A project with the same name (", tmpname,
        ") already exists in the data directory.")
      return(NULL)
    }
    newconf$projectid <- tmpname

    if (!is.data.frame(values$cpNewData)) {
      values$cpConfirmMessage <- "Import data first"
      return(NULL)
    }

    if (input$cpDoImport) {
      newconf$dataname <- newconf$projectid
    } else
      newconf$dataname <- values$cpNewDataname

    if (any(is.na(values$cpNewData[[input$cpIdcol]]))) {
      values$cpConfirmMessage <- "ID variable contains missing values"
      return(NULL)
    }
    if (any(duplicated(values$cpNewData[[input$cpIdcol]]))) {
      values$cpConfirmMessage <- "ID variable contains duplicates"
      return(NULL)
    }
    newconf$idcol <- input$cpIdcol

    if (!length(input$cpTextcol)) {
      values$cpConfirmMessage <- "Select at least one text variable"
      return(NULL)
    }
    newconf$textcols <- input$cpTextcol

    if (input$cpCommcol == "none") {
      newconf$commcol <- paste0("comm_", newconf$projectid)
    } else
      newconf$commcol <- input$cpCommcol

    if (!length(input$cpTagcol)) {
      newconf$tagcols <- "scheme0"
    } else
      newconf$tagcols <- input$cpTagcol

    newconf$contextcols <- input$cpContextcol

    newconf$dataNrows <- input$cpNrows
    newconf$dataSkipRows <- input$cpNskip

    if (input$cp_use_python) {
      pytest <- try(system(paste(input$cp_which_python, "--version"),
                           intern = TRUE))
      if (inherits(pytest, "try-error")) {
        values$cpConfirmMessage <- paste(
          "Python path `", input$cp_which_python, "` not valid")
        return(NULL)
      }
    }
    newconf$use_python <- input$cp_use_python
    newconf$python <- input$cp_which_python
    newconf$use_gpu <- input$cp_use_gpu

    if (input$cp_use_spacy) {
      sptest <- system(paste0(
        newconf$python, " -m spacy info ", input$cp_use_spacy_model), intern = TRUE)
      if (length(sptest) == 0) {
        values$cpConfirmMessage <- paste(
          "Error loading spacy, check that it is installed in the specified python env")
        return(NULL)
      }
      if (length(attr(sptest, "status"))) {
        values$cpConfirmMessage <- paste(
          "Error loading spacy model, check that it has been downloaded")
        return(NULL)
      }
    }
    newconf$use_spacy <- input$cp_use_spacy
    newconf$use_spacy_model <- input$cp_use_spacy_model

    if (input$cp_use_ft) if (!file.exists(input$cp_use_ft_model)) {
      values$cpConfirmMessage <- paste(
        "Error loading fasttext model, check the specified path")
      return(NULL)
    }
    newconf$use_ft <- input$cp_use_ft
    newconf$use_ft_model <- input$cp_use_ft_model

    newconf$use_sb <- input$cp_use_sb
    newconf$use_sb_model <- input$cp_use_sb_model

    writeLines(RJSONIO::toJSON(newconf),
               paste0("tigger_", newconf$projectid, ".json"))

    values$confnames <- c(newconf$projectid, values$confnames)
    values$conf <- newconf

    ## On new data import, write a feather file with the name of the project
    if (input$cpDoImport)
      arrow::write_feather(values$cpNewData, paste0(
        newconf$datadir, newconf$dataname, ".feather"
      ))

    removeModal()
  })

  ##############################################################################
  ## Scheme management
  ##############################################################################

  ## Print active scheme in scheme management panel
  output$printScheme <- renderUI(h4(input$selectScheme))

  ## Create new scheme on request, with modal dialog
  observeEvent(input$createScheme, {
    if (verbose)
      cat("DEBUG create scheme\n")
    showModal(modalDialog(
      title = "Create tagging scheme",
      p(paste0("This will create a new scheme for this project")),

      textInput("newScheme", label = NULL, placeholder = "Required: new scheme name"),
      textAreaInput("newSchemeDescr", label = NULL,
                    placeholder = "Optional: scheme description"),
      checkboxInput("schemeDuplicate", "Duplicate existing scheme:"),
      conditionalPanel(
        "input.schemeDuplicate",
        selectInput("schemeDuplicateFrom", NULL, rev(values$conf$tagcols))),

      footer = tagList(actionButton("createSchemeConfirm", "Create"),
                       modalButton("Cancel"))))
  })

  observeEvent(input$createSchemeConfirm, {
    if (is.null(ok.data())) return(NULL)
    if (verbose)
      cat("DEBUG create scheme event\n")

    ## Make sure new scheme name is a correct file name
    if (!grepl("\\S", input$newScheme)) {
      showNotification(type = "error",
                       "Cannot create scheme: name empty.")
      return(NULL)
    }

    ## Check that new scheme name doesn't exist
    ## Re-read tagcols from conf file, to prevent scheme disappearances
    tmpname <- cleanFileName(input$newScheme)
    tagpath <- paste0(values$projectdir, tmpname, ".csv")
    confpath <- paste0("tigger_", values$conf$projectid, ".json")
    if (verbose)
      cat("DEBUG create scheme check json:", confpath,"\n")
    tmp_tagcols <- RJSONIO::fromJSON(confpath)$tagcols
    if (verbose)
      cat("DEBUG create scheme json OK\n")
    if (tmpname %in% tmp_tagcols | file.exists(tagpath)) {
      showNotification(type = "error",
                       "Cannot create scheme: name already exists.")
      return(NULL)
    }

    ## Create new scheme description
    if (nchar(input$newSchemeDescr))
      writeLines(input$newSchemeDescr,
                 paste0(values$projectdir, tmpname, ".txt"))

    ## Update conf file and reactives
    values$conf$tagcols <- c(tmp_tagcols, tmpname)
    # values$confs[[values$conf$projectid]]$tagcols <- c(tmp_tagcols, tmpname)
    writeLines(RJSONIO::toJSON(values$conf), confpath)

    if (input$schemeDuplicate) {
      tmpmat <- matrix(values$tagmat[, input$schemeDuplicateFrom])
      tmpmathist <- matrix(values$tagmathist[, input$schemeDuplicateFrom])
    } else {
      tmpmat <- matrix(rep(NA, nrow(ok.data())))
      tmpmathist <- matrix(rep(NA, nrow(ok.data())))
    }
    colnames(tmpmat) <- tmpname
    colnames(tmpmathist) <- tmpname

    values$tagmat <- cbind(values$tagmat, tmpmat)
    values$tagmathist <- cbind(values$tagmat, tmpmathist)

    ## Create new tagging file if duplicating
    if (input$schemeDuplicate) {
      export <- data.frame(ok.data()[, values$conf$idcol],
                           values$tagmat[, tmpname])
      colnames(export) <- c(values$conf$idcol, tmpname)
      export <- na.omit(export)
      write.table(export, tagpath, sep = ",", qmethod = "double",
                  fileEncoding = "UTF-8", row.names = FALSE,
                  append = FALSE, col.names = TRUE)
    }

    cat("DEBUG create scheme exit\n")
    removeModal()
  })

  ## Button action: delete scheme
  observeEvent(input$schemeDelete, {
    if (verbose)
      cat("DEBUG delete scheme\n")
    if (length(values$conf$tagcols) < 2) {
      showNotification(type = "error",
                       "Cannot delete: at least one scheme must remain.")
      return(NULL)
    }
    showModal(modalDialog(
      title = "Delete tagging scheme",
      paste0("Confirm delete '", input$selectScheme,
             "': ", sum(!is.na(values$label)), " tags will be deleted",
             ", along with scheme description."),
      footer = tagList(actionButton("schemeDeleteConfirm", "Delete"),
                       modalButton("Cancel"))))
  })

  observeEvent(input$schemeDeleteConfirm, {
    file.remove(paste0(values$projectdir, input$selectScheme, ".csv"))
    file.remove(paste0(values$projectdir, input$selectScheme, ".txt"))
    file.remove(paste0(values$projectdir, input$selectScheme, "_regex.json"))
    values$conf$tagcols <-
      values$conf$tagcols[values$conf$tagcols != input$selectScheme]
    writeLines(RJSONIO::toJSON(values$conf),
               paste0("tigger_", input$selectProject, ".json"))
    removeModal()
  })


  observeEvent(input$schemeDescrSave, {
    req(input$selectScheme)
    writeLines(input$schemeDescr,
               paste0(values$conf$datadir, values$conf$projectid,
                      "/", input$selectScheme, ".txt"))
  })


  ##############################################################################
  ##############################################################################

  ## Tag selection : populate
  observe({
    values$conf$tagcols
    updateSelectInput(
      # session, "selectScheme", choices = c(rev(values$conf$tagcols), "<NEW>"))
      session, "selectScheme", choices = rev(values$conf$tagcols))
  })


  ## Populate oracle buttons
  output$oracleButtons <- renderUI({
    if (is.null(ok.data())) return(NULL)
    # if (is.null(values$label)) return(NULL)
    if (is.null(values$uniqueLabels)) return(NULL)
    if (verbose)
      cat("DEBUG populate oracle buttons\n")

    do.call(fluidRow, lapply(values$uniqueLabels, function(ilab)
      actionButton(paste0("oracle_", label2hash(ilab)),
                   strong(ilab))))
  })

  ## Populate maxprob variable selector
  observe({
    updateSelectInput(session, "maxprobWhich", choices = values$uniqueLabels)
    updateSelectInput(session, "taggedWhich",
                      choices = c("All" = "all", values$uniqueLabels))
  })

  ## Data panel message
  observeEvent(ok.data(), {
    if (verbose)
      cat("DEBUG data panel message\n")
    if (is.null(ok.data())) {
      output$dataMessage <- renderUI(p("Importing data..."))
    }
    output$dataMessage <- NULL
  })

  ## On config change, update dataView and retag options
  observeEvent(list(values$conf), {

    if (is.null(values$conf)) return(NULL)
    output$panelData <- renderUI(list(
      DT::dataTableOutput("dataView")
    ))

    output$panelRetag <- renderUI(c(
      list(
        br(),
        h4("Rename tags"),
        fluidRow(
          column(4, strong("Current")),
          column(2, strong("Count")),
          column(6, strong("New"))
        ),
        br()),
      lapply(values$uniqueLabels, function(iLab) {
        iHash <- label2hash(iLab)
        fluidRow(
          column(4, p(iLab)),
          column(2, p(sum(values$label == iLab, na.rm = TRUE))),
          column(4, textInput(
            paste0("newtag_", iHash), NULL,
            placeholder = "(new name, leave empty to untag)")),
          column(2, actionButton(paste0("retag_", iHash), "Rename")))
      })
    ))
  })

  output$panelExtraRegex <- renderUI(list(
    p(strong("Regex predictors")),
    fluidRow(
      column(8, textInput("extraRegexText", NULL,
                          placeholder = "(new regex)", width = "100%")),
      column(4, actionButton(
        "extraRegexSearch", strong("+"), width = "100%"))
    ),
    fluidRow(
      style="overflow-y:scroll; max-height: 25vh",
      fluidRow(
        column(2, strong("")),
        column(1, strong("Use")),
        column(1, strong("Case")),
        column(1, p(strong("N"))),
        column(7, p(strong("Regex")))
      ),

      lapply(values$extraRegex, function(iRegex) {
        iHash <- label2hash(iRegex)
        tmp_cased <- values[[paste0("caseregex_", iHash)]]
        if (is.null(tmp_cased)) tmp_cased <- FALSE
        fluidRow(
          column(2, actionButton(paste0("delregex_", iHash), "🗑️", width="100%")),
          column(1, checkboxInput(
            paste0("useregex_", iHash), NULL,
            value = isolate(!isFALSE(values[[paste0("useregex_", iHash)]])))),
          column(1, checkboxInput(
            paste0("caseregex_", iHash), NULL,
            value = isolate(isTRUE(values[[paste0("caseregex_", iHash)]])))),
          column(1, br(), p(sum(
            stringi::stri_detect_regex(
              ok.nicetext(), iRegex,
              opts_regex = list(case_insensitive = !tmp_cased)),
            na.rm = TRUE))),
          column(7, br(), p(iRegex))
        )
      })
    )

  ))

  ## Process new regex
  observeEvent(input$extraRegexSearch, {
    if (is.null(ok.nicetext())) return(NULL)
    req(nchar(input$extraRegexText) > 0)
    if (input$extraRegexText %in% values$extraRegex) return(NULL)
    try_regex <- try(stringi::stri_detect_regex(
      ok.nicetext(), input$extraRegexText,
      opts_regex = list(case_insensitive = TRUE)))
    if (inherits(try_regex, "try-error")) return(NULL)
    new_regex <- input$extraRegexText
    new_hash <- label2hash(new_regex)
    values$extraRegex <- c(new_regex, values$extraRegex)
    names(values$extraRegex)[1] <- new_hash
    if (verbose)
      cat("DEBUG regex search", input$extraRegexText, "\n")
  })

  ## Regex predictors: populate delete and use events on first creation
  useregex_trigger <- reactiveVal(0)
  delregex_trigger <- reactiveVal(0)
  caseregex_trigger <- reactiveVal(0)
  observeEvent(list(values$extraRegex), {
    if (!length(values$extraRegex)) return(NULL)
    if (any(! values$extraRegex %in% values$extraRegexHist)) {
      if (verbose)
        cat("DEBUG regex predictor names updates\n")
      tmpregex <- values$extraRegex[! values$extraRegex %in% values$extraRegexHist]
      values$extraRegexHist <- c(tmpregex, values$extraRegexHist)

      c(lapply(1:length(tmpregex), function(iregex) {
          observeEvent(
            input[[paste0("delregex_", names(tmpregex)[iregex])]], {
              if (verbose)
                cat("DEBUG regex delete", names(tmpregex)[iregex], "\n")
              values$extraRegex <- values$extraRegex[values$extraRegex != tmpregex[iregex]]
              delregex_trigger(delregex_trigger() + 1)
            })
        }),
        lapply(1:length(tmpregex), function(iregex) {
          tmpname <- paste0("useregex_", names(tmpregex)[iregex])
          observeEvent(
            input[[tmpname]], {
              values[[tmpname]] <- input[[tmpname]]
              useregex_trigger(useregex_trigger() + 1)
            })
        }),
        lapply(1:length(tmpregex), function(iregex) {
          tmpname <- paste0("caseregex_", names(tmpregex)[iregex])
          observeEvent(
            input[[tmpname]], {
              values[[tmpname]] <- input[[tmpname]]
              caseregex_trigger(caseregex_trigger() + 1)
            })
        })
      )
    }
  })

  observeEvent(list(delregex_trigger(), useregex_trigger(), caseregex_trigger()), {
    req(ok.data(), input$selectScheme)
    regex_all <- values$extraRegex
    values$regex_inuse <- sapply(names(regex_all), function(iname)
        values[[paste0("useregex_", iname)]])
    values$regex_case <- sapply(names(regex_all), function(iname)
        values[[paste0("caseregex_", iname)]])
    if (verbose) if (any(values$regex_inuse))
      cat("DEBUG regex in use", paste0(collapse = " ", regex_all[values$regex_inuse]), "\n")
    writeLines(
      RJSONIO::toJSON(lapply(FUN = unname, list(
        regex = regex_all, use = values$regex_inuse, case = values$regex_case))),
      paste0(values$projectdir, input$selectScheme, "_regex.json"))
  })

  output$dataView <- DT::renderDataTable({
    # if (is.null(ok.text()) | ok.data.running()) return(NULL)
    req(ok.text())
    if (verbose)
      cat("DEBUG enter dataView\n")
    if (verbose)
      cat("DEBUG render dataView\n")
    DT::datatable(
      rownames = F,
      data.frame(
        row = 1:nrow(ok.data()),
        id = ok.data()[, values$conf$idcol],
        text = ok.text()),
      options = list(
        pageLength = 3,
        columnDefs = list(list(
          targets = 2,
          render = DT::JS(
            "function(data, type, row, meta) {",
            "return type === 'display' && data.length > 200 ?",
            "'<span title=\"' + data + '\">' + data.substr(0, 200) + '...</span>' : data;",
            "}")
        ))
      ),
      callback = DT::JS('table.page(0).draw(false);')
    )
  })

  ## Triggers for mass rename, created with each new label, with confirm
  observeEvent(values$uniqueLabelsAll, {
    if (is.null(values$uniqueLabelsAll)) return(NULL)
    notthere <- unique(na.omit(
      values$uniqueLabelsAll[! values$uniqueLabelsAll %in% values$retagLabels]))
    lapply(
      notthere,
      function(iTag) {
        iHash <- label2hash(iTag)

        observeEvent(input[[paste0("retag_", iHash)]], {
          tmp_oldtag <- iTag
          tmp_retagged <- which(values$label == tmp_oldtag)
          tmp_newtag <- gsub(
            "^\\s+|\\s+$", "", input[[paste0("newtag_", iHash)]])
          showModal(modalDialog(
            title = "Mass retag",
            paste0("Confirm retag: ", length(tmp_retagged), " ",
                   tmp_oldtag, " tags will be ",
                   ifelse(nchar(tmp_newtag),
                          paste("renamed to", tmp_newtag) , "deleted")),
            footer = tagList(actionButton(paste0("retag_confirm_", iTag), "Retag"),
                             modalButton("Cancel"))))
        })

        observeEvent(input[[paste0("retag_confirm_", iTag)]], {
          tmp_oldtag <- iTag
          tmp_retagged <- which(values$label == tmp_oldtag)
          tmp_newtag <- gsub(
            "^\\s+|\\s+$", "", input[[paste0("newtag_", iHash)]])
          if (!nchar(tmp_newtag)) tmp_newtag <- NA
          values$tagTrigger <- list(
            id = ok.data()[[values$conf$idcol]][tmp_retagged],
            tag = rep(tmp_newtag, length(tmp_retagged)))
          values$queries <- c(values$queries, tmp_retagged)

          removeModal()
        })

      })
    values$retagLabels <- c(values$retagLabels, notthere)
  })


  ##############################################################################
  ## UI rendering of Panel Tagging / Visualization
  ##############################################################################

  ####################
  ## Tagging left panel


  # ## Trigger from Oops
  # observeEvent(input$oops, {
  #   if (is.null(ok.data()))
  #     return(NULL)
  #   if (verbose)
  #     cat("DEBUG enter Oops\n")
  #   if (!is.null(values$retagged)) if (values$queries[length(values$queries)] == values$retagged[length(values$retagged)])
  #     values$retagged <- values$retagged[-length(values$retagged)]
  #   values$label[values$queries[length(values$queries)]] <- NA
  #   if (ok.commentCol() != "(None)")
  #     values$comment[values$queries[length(values$queries)]] <- ok.data()[values$queries[length(values$queries)], ok.commentCol()]
  #   values$queries <- values$queries[-length(values$queries)]
  #   ## tagTrigger ?
  #
  #   ready2tag(0)
  #   queryNext(queryNext() + 1)
  # })


  ## Tagging triggers for Oracle buttons, created with each new label
  observeEvent(values$uniqueLabelsAll, {
    if (is.null(values$label)) return(NULL)
    if (is.null(values$uniqueLabelsAll)) return(NULL)
    if (all(is.na(values$label))) return(NULL)
    notthere <- unique(na.omit(
      values$uniqueLabelsAll[! values$uniqueLabelsAll %in% values$oracleLabels]
    ))
    if (!length(notthere)) return(NULL)
    if (verbose)
      cat("DEBUG triggers for oracle buttons\n")
    lapply(
      notthere,
      function(iLab) {
        observeEvent(
          input[[paste0("oracle_", label2hash(iLab))]], {
            if (is.null(ok.text())) return(NULL)
            if (is.null(values$newQuery)) return(NULL)
            if (!ready2tag()) {cat("DEBUG not ready to tag in Button\n"); return(NULL)}
            # ready2tag(0)
            # if (input$allowRetag | sum(is.na(values$label)) == 0)
            if (!is.na(values$label[values$lastQuery]))
              values$retagged <- c(values$retagged, values$lastQuery)
            # values$label[values$lastQuery] <- iLab
            values$comment[values$lastQuery] <- input$currentComment
            # values$queries <- c(values$queries, values$lastQuery)
            values$tagTrigger <- list(
              id = ok.data()[values$lastQuery, values$conf$idcol],
              tag = iLab,
              comment = input$currentComment)
          })
      })
    values$oracleLabels <- c(values$oracleLabels, notthere)
  })

  ## Trigger from Oracle Create value button
  observeEvent(input$currentAction, {
    if (is.null(ok.data())) return(NULL)
    if (is.null(values$newQuery)) return(NULL)
    if (verbose)
      cat("DEBUG enter create value\n")
    # if (input$allowRetag | sum(is.na(values$label)) == 0)
    if (!is.na(values$label[values$lastQuery]))
      values$retagged <- c(values$retagged, values$lastQuery)
    if (input$newLab == "") {
      return(NULL)
    } else {
      # values$label[values$lastQuery] <- input$newLab
      updateTextInput(session, "newLab", value= "")
    }
    values$comment[values$lastQuery] <- input$currentComment
    # values$queries <- c(values$queries, values$lastQuery)
    values$tagTrigger <- list(
      id = ok.data()[values$lastQuery, values$conf$idcol],
      tag = input$newLab,
      comment = input$currentComment)
  })

  ## Trigger from Oracle Confirm button
  observeEvent(input$oracleConfirm, {
    req(ok.data(), ok.train(), values$newQuery)
    # if (is.null(ok.data())) return(NULL)
    # if (is.null(values$newQuery)) return(NULL)
    if (!ready2tag()) {cat("DEBUG not ready to tag in OC\n"); return(NULL)}
    # ready2tag(0)
    if (!ok.train.possible()) return(NULL)
    if (verbose)
      cat("DEBUG oracle confirm\n")
    # if (input$allowRetag | sum(is.na(values$label)) == 0)
    if (!is.na(values$label[values$lastQuery]))
      values$retagged <- c(values$retagged, values$lastQuery)
    # values$label[values$lastQuery] <- ok.predlab1()[values$lastQuery]
    # values$queries <- c(values$queries, values$lastQuery)
    values$comment[values$lastQuery] <- input$currentComment
    values$tagTrigger <- list(
      id = ok.data()[values$lastQuery, values$conf$idcol],
      tag = ok.predlab1()[values$lastQuery],
      comment = input$currentComment)
  })

  ## Trigger for CV glm (async)
  glmCV_nclicks <- reactiveVal(0)
  observeEvent(input$glmCV, {
    if (is.null(ok.predictor())) return(NULL)
    if (glmCV_nclicks() != 0) {
      showNotification("LASSO CV already running")
      return(NULL)
    }
    glmCV_nclicks(1)
    glmCV_future <- reactiveVal()

    tmpvalues <- values$label
    tmppred <- ok.predictor()
    for (ival in na.omit(unique(tmpvalues)))
      if (sum(tmpvalues == ival, na.rm = T) <= 8)
        tmpvalues[which(tmpvalues == ival)] <- NA
    glmCV_async <- future(seed = TRUE, {
      glmnet::cv.glmnet(
        x= tmppred[!is.na(tmpvalues), ],
        y= as.factor(tmpvalues[!is.na(tmpvalues)]),
        family= "multinomial")
    }) %...>% glmCV_future()

    glmCV_async <- catch(glmCV_async, function(e) {
      showNotification(type = "error", paste0("LASSO CV error: ", e))
    })
    glmCV_async <- finally(glmCV_async, function() {
      glmCV_nclicks(0)
      if (is.null(glmCV_future())) return(NULL)
      updateNumericInput(session, "glmLambda",
                         value = glmCV_future()$lambda.min)
    })
    NULL
  })

  ## Trigger for CV liblinear (async)
  liblinCV_nclicks <- reactiveVal(0)
  observeEvent(input$liblinCV, {
    if (is.null(ok.predictor())) return(NULL)
    if (liblinCV_nclicks() != 0) {
      showNotification("LibLineaR CV already running")
      return(NULL)
    }
    liblinCV_nclicks(1)
    liblinCV_future <- reactiveVal()

    tmpvalues <- values$label
    tmppred <- ok.predictor()
    for (ival in na.omit(unique(tmpvalues)))
      if (sum(tmpvalues == ival, na.rm = T) <= 3)
        tmpvalues[which(tmpvalues == ival)] <- NA

    liblinCV_async <- future(seed = TRUE, {
      LiblineaR::LiblineaR(
        data= tmppred[!is.na(tmpvalues), ],
        target= as.factor(tmpvalues[!is.na(tmpvalues)]),
        type= 0, findC= T)
    }) %...>% liblinCV_future()
    liblinCV_async <- catch(liblinCV_async, function(e) {
      showNotification(type = "error", paste0("LibLineaR CV error: ", e))
    })
    liblinCV_async <- finally(liblinCV_async, function() {
      liblinCV_nclicks(0)
      if (is.null(liblinCV_future())) return(NULL)
      updateNumericInput(session, "liblinCost", value= liblinCV_future())
    })
    NULL
  })


  output$currentContext <- renderUI({
    if (verbose)
      cat("DEBUG 1rentContext\n")
    if ((! is.null(ok.text())) & (length(ok.contextCol()) > 0))
      HTML(paste(paste(ok.contextCol(),
                       as.character(ok.data()[values$newQuery, ok.contextCol()]),
                       sep = " : "),
                 collapse= "<br/><br/>"))
  })

  output$makeOracleConfirm <- renderUI({
    if (verbose)
      cat("DEBUG update printPredicted\n")
    if (is.null(ok.pred())) {
      tmp <- "No prediction"
    } else if (is.null(values$newQuery)) {
      tmp <- "No query"
    } else {
      tmp <- paste0(
        ifelse(is.na(values$label[values$newQuery]), "", "Pred:    "),
        ok.predlab1()[values$newQuery],
        " (", round(max(ok.pred()[values$newQuery, ]), 3), ")")
    }
    res <- list(actionButton("oracleConfirm", label= paste("🤖", tmp), width = "100%"))
    if (!is.null(values$label) & !is.null(values$newQuery)) {
      if (!is.na(values$label[values$newQuery]))
        res <- list(res[[1]], HTML(paste0(
          "<center><strong> Current: ", values$label[values$newQuery],
          "</strong></center>")))
    }
    res
  })


  ## Show training diagnostics on new model train
  output$trainDiagno <-
    renderText(paste0(ok.msgtagged(), "\n", values$modelDiagno))

  ## Diagnostics event
  observeEvent(diagTrigger(), {
    if (verbose)
      cat("DEBUG update modelDiagno\n")
    if (is.null(ok.data())) {
      values$modelDiagno <- "No model"
      return(NULL)
    }
    if (!ok.train.possible()) {
      values$modelDiagno <- "No model (not enough labelled data)."
      return(NULL)
    }

    if (input$predModel == "random forest") {
      tmp <- paste0(
        "Train Accuracy: ",
        round(100 * mean(ok.predlab1()[!is.na(values$label)] ==
                           values$label[!is.na(values$label)]), 1),
        "% ; Wtd. F1: ",
        sprintf("%f", round(wtdF1(values$label[!is.na(values$label)],
                                  ok.predlab1()[!is.na(values$label)]), 3)),
        "%\nOOB Accuracy  : ",
        round(100 * mean(
          colnames(ok.train()$predictions)[apply(ok.train()$predictions, 1, which.max)] ==
            values$label[!is.na(values$label)]), 1),
        "% ; Wtd. F1: ",
        sprintf("%.3f", wtdF1(
          values$label[!is.na(values$label)],
          colnames(ok.train()$predictions)[apply(ok.train()$predictions, 1, which.max)]))
      )
    } else if (input$predModel %in% c("lasso", "linear", "naive bayes", "knn")) {
      tmp <- paste0(
        "Train Accuracy: ",
        round(100 *  mean(ok.predlab1()[!is.na(values$label)] ==
                            values$label[!is.na(values$label)]), 1),
        "% ; Wtd. F1: ",
        sprintf("%.3f", wtdF1(
          values$label[!is.na(values$label)],
          ok.predlab1()[!is.na(values$label)])))
    }
    values$modelDiagno <- tmp
  })

  ## Update comment input field
  observeEvent(values$newQuery, {
    if (is.null(ok.text())) return(NULL)
    if (verbose)
      cat("DEBUG update comment field\n")
    updateTextInput(session, "currentComment",
                    value = values$comment[values$newQuery])
  })


  ## Save event: tag + comment in dedicated csv
  observeEvent(values$tagTrigger, {
    if (is.null(values$tagTrigger)) return(NULL)
    if (verbose)
      cat("DEBUG enter event tagTrigger", length(values$tagTrigger$id), "\n")
    ready2tag(0) # Prevent new tagging until next query
    dapath <- paste0(values$projectdir, ok.labelcol(), ".csv")
    darows <- match(values$tagTrigger$id, ok.data()[, values$conf$idcol])

    export <- data.frame(values$tagTrigger$id, values$tagTrigger$tag)
    colnames(export) <- c(values$conf$idcol, ok.labelcol())
    if (verbose)
      cat("DEBUG tagTrigger tag",
          paste(export[, 1], export[, 2], collapse = " ; "), "\n")
    write.table(export, dapath, sep = ",", qmethod = "double",
                fileEncoding = "UTF-8", row.names = F,
                append = ifelse(file.exists(dapath), T, F),
                col.names = ifelse(file.exists(dapath), F, T))

    if (any(nchar(values$tagTrigger$comment))) {
      tmp_filter <- which(nchar(values$tagTrigger$comment) > 0)
      dapath <- paste0(values$projectdir, values$conf$commcol, ".csv")
      export <- data.frame(values$tagTrigger$id[tmp_filter],
                           values$tagTrigger$comment[tmp_filter])
      colnames(export) <- c(values$conf$idcol, values$conf$commcol)
      if (verbose)
        cat("DEBUG tagTrigger comment", export[1,1], export[1, 2])
      write.table(export, dapath, sep = ",", qmethod = "double",
                  fileEncoding = "UTF-8", row.names = F,
                  append = ifelse(file.exists(dapath), T, F),
                  col.names = ifelse(file.exists(dapath), F, T))
    }

    ## Update label hist
    values$labelhist[match(values$tagTrigger$id, ok.data()[, values$conf$idcol])] <-
      max(values$labelhist, na.rm = T) + 1
    ## Update wasTagged and wasRetagged
    itag <- match(values$tagTrigger$id, ok.data()[, values$conf$idcol])
    if (input$sampleChoice == "untagged") {
      values$wasTagged[itag] <- TRUE
    } else if (input$sampleChoice == "tagged") {
      values$wasRetagged[itag] <- TRUE
    } else if (input$sampleChoice == "all") {
      if (is.na(values$label[itag])) {
        values$wasTagged[itag] <- TRUE
      } else
        values$wasRetagged[itag] <- TRUE
    }

    ## Call queryNext event, or launch training
    if (trainCountdown() == 1) {
      trainCountdown(input$trainCountdown)
      trainTrigger(trainTrigger() + 1)
    } else if (trainCountdown() > 1) {
      trainCountdown(trainCountdown() - 1)
      queryNext(queryNext() + 1)
    } else {
      queryNext(queryNext() + 1)
    }

    ## Update values$label etc
    values$label[darows] <- values$tagTrigger$tag
    values$queries <- c(values$queries, darows)

    if (verbose)
      cat("DEBUG exit event tagTrigger", nrow(export), "rows\n")
  })


  ####################
  ## Tagging main panel

  output$textVisuCols <- renderUI({
    if (verbose)
      cat("DEBUG render text/visu panel\n")
    leftsize <- 6
    if (!input$panelText) leftsize <- 1
    if (input$panelText & !input$panelVisu) leftsize <- 11

    fluidRow(
      column(
        leftsize,
        conditionalPanel(
          "input.panelText",
          wellPanel(htmlOutput("currentText"))
        )
      ),
      column(
        12 - leftsize,
        conditionalPanel(
          "input.panelVisu",
          fluidRow(
            column(2, HTML(paste0('<div title="Compute new visualization">',
                                  actionButton("visuCompute", "🎲"),
                                  '</div>'))),
            column(2, HTML(paste0('<div title="Zoom out/into selected area">',
                                  actionButton("visuGoZoom", "🔍±"),
                                  '</div>'))),
            column(2, HTML(paste0('<div title="Query only from zoom area">',
                                  checkboxInput("visuLock", "🔒", value = FALSE),
                                  '</div>'))),
            column(2, HTML(paste0('<div title="Visualization options">',
                                  checkboxInput("visuOptions", "🔧"),
                                  '</div>'))),
            column(3, HTML(paste0('<div title="Plot size">',
                                  numericInput("visuHeight", NULL, 400, 20, 2e12, 10),
                                  '</div>')))
          ),
          conditionalPanel(
            'input.visuOptions',
            flowLayout(
              selectInput(
                "visuAlgo", "Visu. method", c("umap", "t-sne"), "umap", width = "100%"),
              selectizeInput(
                "visuSource", "Visu. source",
                c("Word counts (DFM)" = "dfm",
                  "Word embeddings (FastText)" = "ft",
                  "Sentence embeddings (SBERT)" = "sb",
                  "BERT predictions" = "bertpred",
                  "Regex" = "regex"),
                c("sb", "ft"), multiple = TRUE)
            )
          ),
          conditionalPanel( # umap options
            'input.visuAlgo == "umap" & input.visuOptions',
            flowLayout(
              numericInput(
                "umapNeighb", "Nb. Neighbors", 15, 1, 500, 1),
              numericInput(
                "umapSpread", "Spread", 1, 0, 16, 1e-2),
              numericInput(
                "umapMindist", "Min dist.", .001, 0, 16, 1e-3),
              selectInput(
                "umapMetric", "Metric", selected = "cosine",
                c("euclidean", "cosine", "manhattan", "hamming",
                  "correlation", "categorical")
              )
            )
          ),
          conditionalPanel( # t-SNE options
            'input.visuAlgo == "t-sne" & input.visuOptions',
            flowLayout(
              numericInput(
                "tsnePerplex", "Perplexity", 15, 1, 500, 1),
              numericInput(
                "tsneTheta", "Theta", .5, 0, 1, 1e-2),
              numericInput(
                "tsnePcaDim", "PCA dims", 50, 1, 100, 1)
            )
          ),
          uiOutput("visuMsg"),
          plotOutput(
            "visuPlot", width = "100%",
            dblclick = "visuDblclick",
            brush = brushOpts("visuBrush", resetOnNew = T))
        )
      )
    )
  })

  ##################
  ## Subpanel "Text"
  ##################

  output$currentText <- renderUI({
    if (verbose)
      cat("DEBUG render currentText\n")
    if (is.null(ok.data()) | is.null(ok.nicetext()))
      return(HTML("Import data to start training."))

    if (input$strategy %in% c("entropy", "maxprob") & is.null(ok.train()))
        return(HTML("<em>No model: train one, or try 'random' or 'sequential' strategy</em>"))

    if (is.null(values$newQuery))
      return(HTML("<em>No query</em>"))

    bastext <- ok.nicetext()[values$newQuery]
    if (nchar(input$regexFilter) > 0)
      bastext <- gsub(paste0("(", isolate(input$regexFilter), ")"),
                      "<b>\\1</b>", bastext, ignore.case = isolate(!input$regexCaseSens))
    baslen <- nchar(bastext)
    if (!length(baslen)) {
      if (nchar(input$regexFilter) > 0)
        return(HTML("<b><em>Regex filter: no match</em></b>"))
      return(HTML("<b><em>No text matches the filters</em></b>"))
    }
    if (is.na(baslen))
      return(HTML("<em>No query</em>"))
    if (baslen > 1200) {
      bastext <- paste0(substring(bastext, 1, 1200), '<em title="> 512 tokens (not read by BERT)">',
                        substring(bastext, 1201), "</em>")
    }
    bastext <- gsub("\n", "<br/>", bastext)
    HTML(bastext)
  })


  ##############################################################################
  ## Panel "Visualize"
  ##############################################################################

  output$visuMsg <- renderUI({
    if (!is.null(values$visuMsg))
      # strong(values$visuMsg)
      h5(values$visuMsg, style = "text-align:center")
  })

  ## Compute visualization points (async)
  visu_nclicks <- reactiveVal(0)
  ok.visu <- reactiveVal()
  observe({
    if (!input$panelVisu)
      values$visuGo <- FALSE
    if (input$panelVisu & !values$visuGo)
      values$visuGo <- TRUE
  })
  observeEvent(list(values$visuGo, input$visuCompute), {
    if (is.null(input$visuSource)) return(NULL)
    if (!values$visuGo) return(NULL)
    if (is.null(ok.data())) {
      values$visuMsg <- "No data imported."
      return(NULL)
    }

    if (visu_nclicks() > 0) {
      values$visuMsg <- "Already computing visualization"
      return(NULL)
    }
    visu_nclicks(1)

    values$visuMsg <- "Computing visualization..."

    ## Prepare options and objects for async computation
    dasources <- input$visuSource
    daalgo <- input$visuAlgo

    if (verbose)
      cat("DEBUG compute visu embedding: ", daalgo, paste(dasources, collapse = " "), "\n")

    visudat <- NULL
    if ("dfm" %in% dasources) dadfm <- ok.dfm()
    if ("ft" %in% dasources) {
      visudat <- ok.data()[, values$cols_ft]
    }
    if ("sb" %in% dasources) {
      if (is.null(visudat)) {
        visudat <- ok.data()[, values$cols_sb]
      } else
        visudat <- cbind(visudat, ok.data()[, values$cols_sb])
    }
    if ("bertpred" %in% dasources) {
      if (is.null(visudat)) {
        visudat <- ok.data()[, do.call(c, values$cols_bertpred)]
      } else
        visudat <- cbind(visudat, ok.data()[, do.call(c, values$cols_bertpred)])
    }
    if ("regex" %in% dasources) {
      if (is.null(visudat)) {
        visudat <- ok.extraRegex.matrix()
      } else
        visudat <- cbind(visudat, ok.extraRegex.matrix())
    }
    if (daalgo == "umap") {
      visu_args <- list(
        n_neighbors = input$umapNeighb, metric = input$umapMetric,
        spread = input$umapSpread, min_dist = input$umapMindist)
    } else if (daalgo == "t-sne") {
      visu_args <- list(
        perplexity = input$tsnePerplex, theta = input$tsneTheta,
        initial_dims = input$tsnePcaDim, check_duplicates = FALSE)
    }

    visu_async <- future(seed = TRUE, {
      if ("dfm" %in% dasources) {
        ## TODO: CA treatment buggy, switch to SVD?
        # da_ca <- quanteda.textmodels::textmodel_ca(
        #   quanteda::as.dfm(dadfm), nd = min(100, nrow(dadfm)))
        # da_ca <- (da_ca$rowcoord %*% diag(da_ca$sv))[, !is.na(da_ca$sv)]
        da_ca <- as.matrix(dadfm)
        if (is.null(visudat)) {
          visudat <- da_ca
        } else
          visudat <- cbind(da_ca, visudat)
      }

      if (daalgo == "umap") {
        do.call(uwot::umap, c(list(visudat), visu_args))
      } else if (daalgo == "t-sne") {
        visudat_dupli <- duplicated(visudat)
        if (any(visudat_dupli))
          visudat[visudat_dupli, ] <- jitter(visudat[visudat_dupli, ])
        do.call(Rtsne::Rtsne, c(list(visudat), visu_args))$Y
      }
    }) %...>% ok.visu()

    visu_async <- catch(visu_async, function(error) {
      values$visuMsg <- paste0("Error: ", error)
    })
    visu_async <- finally(visu_async, function() {
      visu_nclicks(0)
      if (input$visuGoZoom == 0) {
        values$visuMsg <- "Double-click to select, drag to select zooming zone."
      } else
        values$visuMsg <- NULL

      nice_choices <- c("FastText" = "ft", "SBERT" = "sb", "visu" = daalgo)
      names(nice_choices)[3] <- ifelse(daalgo == "t-sne", "t-SNE", "UMAP")
      updateSelectInput(
        session, "dlEmbedSelect", choices = nice_choices, selected = daalgo)

      if (verbose)
        cat("DEBUG Visu async out\n")
    })

    NULL
  })

  ## Compute visualization plot (async)
  ok.ggvisu <- reactiveVal()
  observe({
    # if (is.null(ok.visu())) return(NULL)
    if (is.null(ok.visu())) {
      ok.ggvisu(NULL)
      return(NULL)
    }
    # if (!isTRUE(values$visugo)) return(NULL)
    if (!input$panelVisu) return(NULL)

    if (verbose)
      cat("DEBUG compute visu plot\n")
    ggdat <- data.frame(ok.visu(), tag = values$label)
    if (!is.null(ok.predlab1()))
      ggdat$tag[is.na(values$label)] <- ok.predlab1()[is.na(values$label)]
    da_predlabs <- ok.predlab1()
    da_tagged <- ! is.na(values$label)
    da_query <- values$newQuery

    ggvisu_async <- future({
      ggres <- ggplot(ggdat, aes(X1, X2)) + theme_bw() +
        xlab("") + ylab("") +
        theme(legend.position = "bottom") +
        scale_shape_manual(values = rep(1:8, 40)) # TODO : adapt to tags

      ## Tagged points
      if (any(da_tagged)) {
        ggres <- ggres +
          geom_point(data = ggdat[da_tagged, ],
                     aes(X1, X2, color = tag, shape = tag),
                     size = 1.5, stroke = 1.5, alpha = .8)
      }

      ## Not tagged points
      if (!is.null(da_predlabs)) {
        ggres <- ggres +
          geom_point(data = ggdat[!da_tagged, ],
                     aes(color = tag, shape = tag), show.legend = FALSE)
      } else {
        ggres <- ggres +
          geom_point(alpha = .8, data = ggdat[is.na(ggdat$tag), ])
      }

      ## Current query
      if (!is.null(da_query)) {
        if (is.null(da_predlabs)) {
          ggres <- ggres +
            geom_point(data = ggdat[da_query, ], color = 2,
                       stroke = 3, shape = 3, show.legend = FALSE)
        } else {
          ggres <- ggres +
            geom_point(data = ggdat[da_query, ],
                       stroke = 3, shape = 3, show.legend = FALSE)
        }
      }
      ggres
    }) %...>% ok.ggvisu()

    ggvisu_async <- catch(ggvisu_async, function(error) {
      values$visuMsg <- paste0("Error in ggplot: ", error)
    })

  })

  observe({
    if (verbose)
      cat("DEBUG update visuPlot\n")
    if (is.null(ok.ggvisu())) {
      values$visuMsg <- "No visualization yet."
      output$visuPlot <- NULL
      values$visuZoom$xlim <- NULL
      values$visuZoom$ylim <- NULL
      return(NULL)
    }
    output$visuPlot <- renderPlot(height = input$visuHeight, {
      # if (is.null(ok.ggvisu())) return(NULL)
      ok.ggvisu() +
        coord_fixed(xlim = values$visuZoom$xlim, ylim = values$visuZoom$ylim)
    })
  })

  observeEvent(input$visuDblclick, {
    if (!is.null(values$visuMsg))
      values$visuMsg <- NULL
    tmp <- input$visuDblclick
    bmu <- t(ok.visu()) - c(tmp$x, tmp$y)
    bmu <- which.min(colSums(bmu^2))
    values$lastQuery <- bmu
    values$newQuery <- bmu
    ready2tag(1)
  })

  observeEvent(input$visuGoZoom, {
    if (!is.null(values$visuMsg))
      values$visuMsg <- NULL
    values$visuZoom$xlim <- c(input$visuBrush$xmin, input$visuBrush$xmax)
    values$visuZoom$ylim <- c(input$visuBrush$ymin, input$visuBrush$ymax)
  })


  ##############################################################################
  ## Panel History
  ##############################################################################

  ## Create history link and retag events as they appear
  observeEvent(input$histDTable_rows_current, {
    histrows <- order(values$labelhist)
    histrows <- rev(histrows[!is.na(values$labelhist[histrows])])
    histrows <- histrows[!is.na(values$label[histrows])]
    histrows <- histrows[input$histDTable_rows_current]
    notthere <- histrows[! histrows %in% values$histQueries]

    if (!length(notthere)) return(NULL)
    lapply(notthere, function(daquery) {
      if (verbose) cat("DEBUG create history events", daquery, "\n")
        observeEvent(input[[paste0("hist", daquery)]], {
          if (verbose) cat("DEBUG history link", daquery, "\n")
          values$lastQuery <- daquery
          values$newQuery <- daquery
          updateTabsetPanel(session, "mainPanelset", selected = "Tagging")
          ready2tag(1)
        })

        observeEvent(input[[paste0("histTag", daquery)]], {
          if (verbose) cat("DEBUG history retag", daquery, "\n")
          values$histSaveStack <- rbind(
            values$histSaveStack,
            data.frame(id = ok.data()[daquery, values$conf$idcol],
                       tag = input[[paste0("histTag", daquery)]],
                       comment = values$comment[daquery])
          )
        })
    })
    values$histQueries <- c(values$histQueries, notthere)
  })

  ## History save action
  observeEvent(input$histSave, {
    req(is.data.frame(values$histSaveStack))
    values$tagTrigger <- as.list(values$histSaveStack)
    values$label[match(values$histSaveStack$id, ok.data()[[values$conf$idcol]])] <-
      values$histSaveStack$tag
    values$histSaveStack <- NULL
  })

  ## Update save name with number of unsaved changes
  observeEvent(length(values$histSaveStack$id), {
    updateActionButton(
      session, "histSave",
      label = paste0("Save changes (", length(unique(values$histSaveStack$id)),")"))
  })

  ## History table, with clickable links and retagging
  output$histDTable <- DT::renderDataTable({
    histrows <- order(values$labelhist)
    histrows <- rev(histrows[!is.na(values$labelhist[histrows])])
    histrows <- histrows[!is.na(values$label[histrows])]

    # histrows <- rev(values$queries)
    hist_df <- data.frame(
      # history = length(histrows):1,
      history = values$labelhist[histrows],
      id = sapply(histrows, function(irow) {
        as.character(actionLink(
          paste0("hist", irow),
          label = ok.data()[irow, values$conf$idcol],
          onclick = paste0('Shiny.onInputChange(\"hist', irow, '\",  this.id)')))
      }),
      Tag = values$label[histrows],
      Retag = sapply(histrows, function(irow) {
        paste0(
          "<select id=\"histTag", irow, "\" onchange=\"Shiny.onInputChange('histTag", irow, "', this.value)\">",
          paste0("<option value=\"", values$uniqueLabels, "\" ",
                 ifelse(values$uniqueLabels == values$label[irow], "selected", ""),
                 ">", values$uniqueLabels, "</option>", collapse = " "),
          "</select>"
        )
      }),
      Comment = values$comment[histrows],
      Text = ok.nicetext()[histrows])
    if (!is.null(values$conf$contextcols))
      hist_df <- cbind(hist_df,
                       ok.data()[histrows, values$conf$contextcols])

    DT::datatable(hist_df, escape = FALSE, rownames = FALSE, filter = "top",
                  options = list(
                    columnDefs = list(list(
                      targets = 5,
                      render = DT::JS(
                        "function(data, type, row, meta) {",
                        "return type === 'display' && data.length > 200 ?",
                        "'<span title=\"' + data + '\">' + data.substr(0, 200) + '...</span>' : data;",
                        "}")))
                  ))
  })

  ##############################################################################
  ## Panel "Stats"
  ##############################################################################

  output$statsTagTable <- renderTable(rownames = F, {
    table(tag = values$label)
  })


  ## Model diagnostics by CV
  output$statsCVoutput <- reactive({
    if (is.null(values$modelcv))
      return("Model diagnostics will appear here")
    values$modelcv
  })

  output$statsCVtable <- DT::renderDataTable({
    if (is.null(values$cvtable))
      return(NULL)
    DT::datatable(values$cvtable, list(pageLength = 25), rownames = F)
  })

  ## Button action: compute CV (async)
  cv_nclicks <- reactiveVal(0)
  observeEvent(input$statsCVgo, {
    if (is.null(ok.train())) {
      values$modelcv <- "Error in model training"
      return(NULL)
    }
    if (cv_nclicks() != 0) {
      values$modelcv <- "CV already running"
      return(NULL)
    }
    cv_nclicks(cv_nclicks() + 1)

    predmat <- ok.predictor()[!is.na(values$label), ]
    predlab <- values$label[!is.na(values$label)]
    cvsamp <- sample(10, nrow(predmat), replace = TRUE)
    cvres <- rep(NA, nrow(predmat))
    cvres_future <- reactiveVal()

    tmp_model <- input$predModel
    if (tmp_model == "random forest") {
      cv_args <- list(
        num.trees = input$rfNumTrees, mtry = if (input$rfMtry > 0) input$rfMtry,
        sample.fraction = input$rfSampleFrac, probability = FALSE)
    } else if (tmp_model == "linear") {
      cv_args <- list(type= 0, cost = input$liblinCost)
    } else if (tmp_model == "lasso") {
      cv_args <- input$glmLambda
    } else if (tmp_model == "naive bayes") {
      cv_args <- list(smooth = input$naiveSmooth, prior = input$naivePrior,
                      distribution = input$naiveDistri)
    } else if (tmp_model == "knn") {
      cv_args <- list(k = input$knnK)
    }

    cv_status_file <- paste0(session$token, "_cv")

    cv_async <- future(seed = TRUE, {
      for (icv in 1:10) {
        set_status(cv_status_file, paste0("Computing CV fold ", icv, " / 10"))
        if (tmp_model == "random forest") {
          cvmodel <- do.call(ranger::ranger, c(cv_args, list(
            x= predmat[cvsamp != icv, ],
            y= as.factor(predlab[cvsamp != icv]))))
          cvres[cvsamp == icv] <- as.character(
            predict(cvmodel, data= predmat[cvsamp == icv, ])$predictions)
        } else {
          tmpvalues <- predlab[cvsamp != icv]
          # for (ival in na.omit(unique(tmpvalues)))
          #   if (sum(tmpvalues == ival, na.rm = T) == 1)
          #     tmpvalues[which(tmpvalues == ival)] <- NA

          if (tmp_model == "lasso") {
            cvmodel <- suppressWarnings(
              glmnet::glmnet(x= predmat[cvsamp != icv, ][!is.na(tmpvalues), ],
                             y= as.factor(tmpvalues[!is.na(tmpvalues)]),
                             family= "multinomial"))
            cvres[cvsamp == icv] <- predict(
              cvmodel, newx = predmat[cvsamp == icv, ],
              type = "class", s= cv_args)
          } else if (tmp_model == "linear") {
            cvmodel <- do.call(LiblineaR::LiblineaR, c(
              list(data= predmat[cvsamp != icv, ][!is.na(tmpvalues), ],
                   target= as.factor(tmpvalues[!is.na(tmpvalues)])),
              cv_args))
            cvres[cvsamp == icv] <- as.character(
              predict(cvmodel, newx= predmat[cvsamp == icv, ])$predictions)
          } else if (tmp_model == "naive bayes") {
            cvmodel <- do.call(quanteda.textmodels::textmodel_nb, c(
              list(x= predmat[cvsamp != icv, ][!is.na(tmpvalues), ],
                   y= as.factor(tmpvalues[!is.na(tmpvalues)])),
              cv_args))
            cvres[cvsamp == icv] <- as.character(
              predict(
                cvmodel, newdata= predmat[cvsamp == icv, ], type= "class"))
          } else if (isolate(tmp_model) == "knn") {
            cvres[cvsamp == icv] <-
              as.character(do.call(class::knn, c(cv_args, list(
                train = predmat[cvsamp != icv, ],
                test = predmat[cvsamp == icv, ],
                cl = predlab[cvsamp != icv]))))
          }
        }
      }
      cvres
    }) %...>% cvres_future()

    cv_async <- catch(cv_async, function(e) {
      cvres_future(NULL)
      values$modelcv <- e$message
    })

    cv_async <- finally(cv_async, function() {
      cv_nclicks(0)
      if (file.exists(cv_status_file)) unlink(cv_status_file)
      if (is.null(cvres_future())) return(NULL)
      cvres <- cvres_future()
      predlab <- values$label[!is.na(values$label)]
      dametrics <- metricsTable(predlab, cvres)
      values$cvtable <- dametrics
      res <- "10-CV OOB evaluation:\n"
      # if (any(grepl("^bertpred", input$use_regressors)))
      if (any(grepl("^bertpred", values$new_use_regressors)))
        res <- paste0(
          res,
          "⚠️  Scores are biased upwards if BERT predictions were trained on current data ⚠️ \n")
      res <- paste0(
        res,
        "* Accuracy    : ", round(100 * mean(cvres == predlab), 2), "%\n",
        "* Macro F1    : ", sprintf("%f", mean(as.numeric(dametrics$F1))), "\n",
        "* Weighted F1 : ", sprintf("%f", weighted.mean(as.numeric(dametrics$F1),
                                                        dametrics$N.cases)), "\n"
      )
      values$modelcv <- res
    })

    NULL
  })

  ## Automatic message update during CV computation
  observe({
    if (cv_nclicks() == 0) return(NULL)
    invalidateLater(200)
    if (!file.exists(paste0(session$token, "_cv"))) return(NULL)
    values$modelcv <- get_status(paste0(session$token, "_cv"))
    values$cvtable <- NULL
  })


  ##############################################################################
  ## Panel BERT
  ##############################################################################

  ##########
  ## Bert UI left panel

  ## Update list of saved models and dlSelect on init and on saveTrigger

  observeEvent(list(ok.data(), values$bertSaveTrigger), {
    damodels <- dir(pattern = "^tiggerbert_saved_")
    if (!length(damodels)) return(NULL)
    damodels <- gsub("^tiggerbert_saved_", "", damodels)
    values$bertSaved <- damodels
    updateSelectInput(session, "dlBPSelect", choices = damodels)
  })

  ## Populate saved models list, with buttons

  output$bertSaved <- renderUI({
    if (verbose)
      cat("DEBUG bert update saved list\n")
    HTML(paste0(
      "<table><tbody>",
      paste0(sapply(values$bertSaved, function(dabert) {
        bertshort <- dabert
        if (nchar(bertshort) > 23)
          bertshort <- paste0(substr(bertshort, 1, 20), "...")
        paste0(
          "<tr><td>",
          actionLink(paste0("saved_", dabert), bertshort, title = dabert,
                     style="margin-right: 5px;"), "</td><td>",
          actionButton(paste0("bert_copypars_", dabert), "📋",
                       title = "Copy parameters"), "</td><td>",
          actionButton(paste0("bert_infer_", dabert), "🤖",
                       title = "Infer predictions on current data"), "</td><td>",
          actionButton(paste0("bert_delete_", dabert), "🗑",
                       title = "Delete model"),
          "</td></tr>")
      }), collapse = "\n"),
      "</tbody></table>"
    ))
  })

  ##########
  ## Bert UI main panel

  ## UI outputs : message and stats table and plot
  output$bertMsg <- reactive(
    ifelse(is.null(values$bertMsg),
           "Train new BERT, or select saved model.",
           values$bertMsg))

  output$bertValstats <- DT::renderDataTable(
    DT::datatable(values$bertValstats, list(pageLength = 25)))

  output$bertValPlot <- renderPlot(values$bertValPlot)

  output$bertMsgHyperpar <- reactive(values$bertMsgHyperpar)

  ##################################
  ## Async handling of BERT training

  ## Status File
  bert_status_file <- paste0("tiggerbert_", session$token, "_trainstatus")

  fire_interrupt <- function(dafile) set_status(dafile, "interrupt")
  fire_ready <- function(dafile) set_status(dafile, "Ready")
  fire_running <- function(dafile, perc_complete = NULL){
    if(is.null(perc_complete))
      msg <- "Training..."
    else
      msg <- paste0("Training... ", perc_complete, "% complete")
    set_status(dafile, msg)
  }
  interrupted <- function(dafile) get_status(dafile) == "interrupt"

  ## Create Status File
  fire_ready(bert_status_file)


  ## Button action preparation
  bert_nclicks <- reactiveVal(0)
  bert_result_val <- reactiveVal()
  bert_hyperpars <- reactiveVal()

  ## Button appearance: write "Stop" if training is ongoing
  observeEvent(bert_nclicks(), {
    updateActionButton(session, "bertTrain",
                       label = ifelse(bert_nclicks() > 0, "Stop", "Train BERT"))
  })

  ## Button action: Train new BERT
  observeEvent(input$bertTrain,{

    ## Don't do anything if analysis is already being run
    # if(bert_nclicks() != 0){
    #   values$bertMsg <- "BERT is already training"
    #   return(NULL)
    # }
    tmpsession <- paste0("tiggerbert_", ok.labelcol(), "_", session$token)
    if(bert_nclicks() != 0){
      write("stop", paste0(tmpsession, "_stop"))
      bert_nclicks(0)
      return(NULL)
    }
    if (file.exists(paste0(tmpsession, "_stop")))
      file.remove(paste0(tmpsession, "_stop"))

    ## Increment clicks and prevent concurrent analyses
    bert_nclicks(bert_nclicks() + 1)

    bert_result_val(data.frame(Status="Running..."))
    fire_running(bert_status_file)

    ## Prepare async training
    values$bertLoadSaved <- NULL
    # tmpsession <- paste0("tiggerbert_", ok.labelcol(), "_", session$token)
    tmpmodel <- input$bertModel
    system(paste0("rm ", tmpsession, "_done"))

    tmpdat <- data.frame(
      id = ok.data()[, values$conf$idcol],
      text = ok.nicetext(),
      label = values$label
    )
    tmpdat <- tmpdat[!is.na(tmpdat$label), ]
    tmpdat <- tmpdat[grepl("\\S", tmpdat$text), ]

    ## Filter data: min occurences, balance classes, validation split
    tmplabtable <- table(tmpdat$label)
    if (is.numeric(input$bertMinOccur)) if (input$bertMinOccur > 1) {
      tmpdat <- tmpdat[tmpdat$label %in%
                         names(which(tmplabtable >= input$bertMinOccur)), ]
      tmplabtable <- table(tmpdat$label)
    }
    set.seed(input$bertValidSeed)
    if (input$bertBalance) {
      tmpminocc <- min(tmplabtable)
      tmptrain <- sample(do.call(c, lapply(unique(tmpdat$label), function(dalabel)
        sample(which(tmpdat$label == dalabel), round((1-input$bertValidFrac) * tmpminocc)))))
      tmpvalid <- sample(do.call(c, lapply(unique(tmpdat$label), function(dalabel) {
        dasamp <- which(tmpdat$label == dalabel)
        dasamp <- dasamp[! dasamp %in% tmptrain]
        sample(dasamp, round(input$bertValidFrac * tmpminocc))
      })))
    } else {
      tmptrain <- sample(1:nrow(tmpdat), round((1 - input$bertValidFrac) * nrow(tmpdat)))
      tmpvalid <- (1:nrow(tmpdat))[-tmptrain]
    }

    write.csv(tmpdat[tmptrain, ], paste0(tmpsession, "_dat_train.csv"), row.names = FALSE)
    write.csv(tmpdat[tmpvalid, ], paste0(tmpsession, "_dat_valid.csv"), row.names = FALSE)

    iwait <- 0
    while (iwait < 10) {
      if (!file.exists(paste0(tmpsession, "_dat_valid.csv"))) Sys.sleep(500)
      iwait <- iwait + 1
    }

    tmp_cmd <- paste0(
      values$conf$python, " gobert.py -m ", tmpmodel,
      " -t ", tmpsession, "_dat_train.csv",
      " -v ", tmpsession, "_dat_valid.csv",
      " -b ", input$bertBatchsize,
      " -g ", input$bertGradacc,
      " -e ", input$bertEpochs,
      " -l ", input$bertLrate,
      " -w ", input$bertWdecay,
      " -B ", ifelse(input$bertBest, 1, 0),
      " -E ", input$bertNeval,
      " -s ", tmpsession,
      " -G ", as.numeric(values$conf$use_gpu),
      " -A ", as.numeric(input$bertAdapt),
      " 2>&1 ; touch ", tmpsession, "_done"
    )

    ## Save hyperpars
    bert_hyperpars(list(bertModel = input$bertModel,
                        bertEpochs = input$bertEpochs,
                        bertLrate = input$bertLrate,
                        bertWdecay = input$bertWdecay,
                        bertBatchsize = input$bertBatchsize,
                        bertGradacc = input$bertGradacc,
                        bertNeval = input$bertNeval,
                        bertBest = input$bertBest,
                        bertValidFrac = input$bertValidFrac,
                        bertValidSeed = input$bertValidSeed,
                        bertNtrain = length(tmptrain),
                        bertTrainTable = as.list(table(tmpdat$label[tmptrain])),
                        bertScheme = ok.labelcol()))

    ## Launch async training
    bert_async <- future({
      if (file.exists(tmpsession))
        system(paste0("rm -R ", tmpsession, "*"))

      # system(tmp_cmd, wait = FALSE, intern = FALSE)
      tmp_exec <- system(tmp_cmd, wait = FALSE, intern = TRUE)
      writeLines(tmp_exec, paste0(tmpsession, "_log"))

      ## After training, get trainer state from last checkpoint for diagnostics
      chkpt <- dir(paste0(tmpsession, "_train"), pattern = "^checkpoint")
      chkpt <-
        chkpt[order(as.numeric(gsub("\\D", "", chkpt)), decreasing = T)[1]]
      system(paste0(
        "cp ", tmpsession, "_train/", chkpt, "/trainer_state.json ",
        tmpsession, "/"))
    }) %...>% bert_result_val()

    ## Catch interrupt (or any other error) and notify user
    bert_async <- catch(bert_async, function(e){
      bert_result_val(NULL)
      showNotification(e$message)
      values$bertMsg <- e$message
    })

    ## After the promise has been evaluated set nclicks to 0 to allow another Run
    bert_async <- finally(bert_async, function() {
      system(paste0("rm -R ", tmpsession, "_train"))
      system(paste0("rm ", tmpsession, "_done"))

      fire_ready(bert_status_file)
      bert_nclicks(0)

      if (is.null(bert_result_val())) return(NULL)

      bertpreds <- try(read.csv(paste0(tmpsession, "_predval.csv")))
      if (inherits(bertpreds, "try-error")) {
        # values$bertMsg <- "BERT error - try again"
        values$bertMsg <- try(paste(readLines(paste0(tmpsession, "_log")), collapse = "\n"))
        values$bertValstats <- NULL
        values$bertValPlot <-  NULL
        values$bertLastUnsaved <- NULL
        values$bertMsgHyperpar <- NULL
        values$bertLastHyperpar <- NULL
        return(NULL)
      }

      datruth <- tmpdat[tmpvalid, "label"]
      dapred <- bertpreds$bertpred
      dametrics <- metricsTable(datruth, dapred)
      damsg <- paste0(
        "Unsaved model (", tmpmodel, ")\n\n",
        "BERT validation stats:\n",
        "* Accuracy    : ", round(100 * mean(datruth == dapred), 2), "%\n",
        "* Macro F1    : ", sprintf("%f", mean(as.numeric(dametrics$F1))), "\n",
        "* Weighted F1 : ", sprintf("%f", weighted.mean(as.numeric(dametrics$F1),
                                                        dametrics$N.cases)), "\n")
      hypermsg <- bertHyperToMsg(bert_hyperpars())
      writeLines(RJSONIO::toJSON(bert_hyperpars()),
                 paste0(tmpsession, "/tiggertrain.json"))

      values$bertValstats <- dametrics
      values$bertMsg <- damsg
      values$bertValPlot <-  bertPlotFromPath(tmpsession)
      values$bertLastUnsaved <- list(
        metrics = dametrics, msg = damsg, session = tmpsession)
      values$bertMsgHyperpar <- hypermsg
      values$bertLastHyperpar <- bert_hyperpars()
    })

    values$bertMsg <- "BERT is training"
    values$bertValstats <- NULL
    values$bertValPlot <- NULL
    values$bertLastUnsaved <- NULL
    values$bertMsgHyperpar <- NULL
    values$bertLastHyperpar <- NULL

    ## Return something other than the promise so shiny remains responsive
    NULL
  })

  ## Automatic check of BERT training status
  observe({
    if (bert_nclicks() == 0) return(NULL)
    isolate({
      tmpsession <- paste0("tiggerbert_", ok.labelcol(), "_", session$token)
      ## Check for checkpoint dirs
      checkdirs <-
        length(dir(paste0(tmpsession, "_train"), pattern = "^checkpoint"))
      # fire_running(bert_status_file,
      #              min(100, round(100 * checkdirs / input$bertNeval, 1)))
      # dastate <- try(RJSONIO::fromJSON(paste0(tmpsession, "_train/trainer_state.json")))
      # if (!inherits(dastate, "try-error"))
      #   fire_running(bert_status_file,
      #                round(100 * as.numeric(dastate$epoch) / input$bertEpochs, 1))
      try({
        dapoints <- dir(paste0(tmpsession, "_train"),
                        pattern = "trainer_state[.]json$", recursive = T)
        dapoints_num <- as.numeric(gsub("^checkpoint-(\\d+)/.*$", "\\1", dapoints))
        statefile <- paste0(
          tmpsession, "_train/", dapoints[which.max(dapoints_num)])
        if (file.exists(statefile)) {
          state <- RJSONIO::fromJSON(statefile)
            fire_running(bert_status_file,
                         round(100 * as.numeric(state$epoch) / input$bertEpochs, 1))
        }
      })
    })
    if (checkdirs < isolate(input$bertNeval) & file.exists(paste0(tmpsession, "_done"))) {
      values$bertMsg <- "BERT Error - try again"
      # values$bertMsg <- paste(readLines(paste0(tmpsession, "_log")), collapse = "\n")
    } else {
      invalidateLater(1e3)
      loss_progress <- ""
      if (checkdirs > 0) {
        ## Get training diagnostics
        try({
          dapoints <- dir(paste0(tmpsession, "_train"),
                          pattern = "trainer_state[.]json$", recursive = T)
          dapoints_num <- as.numeric(gsub("^checkpoint-(\\d+)/.*$", "\\1", dapoints))
          statefile <- paste0(
            tmpsession, "_train/", dapoints[which.max(dapoints_num)])
          if (file.exists(statefile)) {
            state <- RJSONIO::fromJSON(statefile)
            logs <- data.table::rbindlist(lapply(state$log_history, as.list),
                                          fill = TRUE)
            logs <- logs[, .(loss = max(loss, na.rm = T),
                             eval_loss = max(eval_loss, na.rm = T)), by = epoch]
            loss_progress <- paste0(
              "\n* Epoch : ", sprintf("%.02f", logs$epoch),
              "\ttrain loss : ", sprintf("%.05f",logs$loss),
              "\teval loss : ", sprintf("%.05f", logs$eval_loss), collapse = "")
          }
        })
      }
      values$bertMsg <-
        ifelse(checkdirs == 0, "Pre-processing...",
               paste0(get_status(bert_status_file), loss_progress))
    }
    values$bertValstats <- NULL
    values$bertValPlot <- NULL
    values$bertLoadSaved <- NULL
    values$bertMsgHyperpar <- NULL
  })


  ## Action: show last BERT model
  observeEvent(input$bertLast, {
    values$bertMsg <- values$bertLastUnsaved$msg
    values$bertValstats <- values$bertLastUnsaved$metrics
    values$bertValPlot <- bertPlotFromPath(values$bertLastUnsaved$session)
    values$bertMsgHyperpar <- bertHyperToMsg(values$bertLastHyperpar)
    values$bertLoadSaved <- NULL
  })


  ## Action: save BERT model
  observeEvent(input$bertSave, {
    tmpsession <- paste0("tiggerbert_", ok.labelcol(), "_", session$token)
    if (!file.exists(paste0(tmpsession)))
      return(NULL)

    if (!grepl("\\S", input$bertSaveName)) {
      tmpname <- paste0(
        ok.labelcol(), "_", gsub("\\D+$", "", gsub("\\D", "-", Sys.time())))
    } else
      tmpname <- cleanFileName(input$bertSaveName)

    savedir <- paste0("tiggerbert_saved_", tmpname)
    system(paste("cp -R", tmpsession, savedir))
    system(paste0("cp ", tmpsession, "_dat_valid.csv ",
                  savedir, "/tiggerbert_dat_valid.csv"))
    system(paste0("cp ", tmpsession, "_predval.csv ",
                  savedir, "/tiggerbert_pred_valid.csv"))

    system(paste0("rm -R ", tmpsession, "*"))
    values$bertMsg <- paste0("BERT model successfully saved: ", tmpname)
    values$bertValstats <- NULL
    values$bertValPlot <- NULL
    values$bertMsgHyperpar <- NULL

    if (is.null(values$bertSaveTrigger)) {
      values$bertSaveTrigger <- 0
    } else
      values$bertSaveTrigger <- values$bertSaveTrigger + 1
    values$bertLastUnsaved <- NULL
    updateTextInput(session, "bertSaveName", value = "")
  })


  ## Actions: load saved model (list updated only with new models)
  observeEvent(values$bertSaved, {
    if (!length(values$bertSaved)) return(NULL)
    tmpsaved <- values$bertSaved[! values$bertSaved %in% values$bertSavedAll]

    lapply(tmpsaved, function(dabert) {
      ############
      ## Actions to load saved model
      observeEvent(input[[paste0("saved_", dabert)]], {
        bertconf <- RJSONIO::fromJSON(paste0(
          "tiggerbert_saved_", dabert, "/config.json"))
        truth <- read.csv(paste0(
          "tiggerbert_saved_", dabert, "/tiggerbert_dat_valid.csv"))$label
        pred <- read.csv(paste0(
          "tiggerbert_saved_", dabert, "/tiggerbert_pred_valid.csv"))$bertpred

        dametrics <- metricsTable(truth, pred)
        values$bertValstats <- dametrics
        values$bertMsg <- paste0(
          "Saved model ", dabert, " (", bertconf[["_name_or_path"]], ")\n\n",
          "BERT validation stats:\n",
          "* Accuracy    : ", round(100 * mean(truth == pred), 2), "%\n",
          "* Macro F1    : ", sprintf("%f", mean(as.numeric(dametrics$F1))), "\n",
          "* Weighted F1 : ", sprintf("%f", weighted.mean(as.numeric(dametrics$F1),
                                                          dametrics$N.cases)), "\n")

        ## Hyperparameters
        tiggertrain_file <- paste0("tiggerbert_saved_", dabert, "/tiggertrain.json")
        if (file.exists(tiggertrain_file)) {
          try({
            values$bertMsgHyperpar <- bertHyperToMsg(RJSONIO::fromJSON(tiggertrain_file))
          })
        } else
          values$bertMsgHyperpar <- "Training parameters not saved."

        values$bertValPlot <-
          bertPlotFromPath(paste0("tiggerbert_saved_", dabert))
        values$bertLoadSaved <- dabert
      })

      ############
      ## Actions to copy hyperparameters
      observeEvent(input[[paste0("bert_copypars_", dabert)]], {
        tiggertrain_file <- paste0("tiggerbert_saved_", dabert, "/tiggertrain.json")
        if (!file.exists(tiggertrain_file)) return(NULL)
        try({
          tmppars <- RJSONIO::fromJSON(tiggertrain_file)
          updateSelectInput(session, "bertModel", selected = tmppars$bertModel)
          updateCheckboxInput(session, "bertBest", value = tmppars$bertBest)
          numinputs <- c("bertEpochs", "bertLrate", "bertWdecay",
                         "bertBatchsize", "bertGradacc")
          for (iinput in numinputs)
            updateNumericInput(session, iinput, value = tmppars[[iinput]])
        })
      })

      ############
      ## Actions to delete saved model (with confirm)
      observeEvent(input[[paste0("bert_delete_", dabert)]], {
        showModal(modalDialog(
          title = "Delete BERT model",
          paste0("Confirm delete: ", dabert,
                 " (model and predictions will be deleted)."),
          footer = tagList(actionButton(paste0("bert_delete_confirm_", dabert), "Delete"),
                           modalButton("Cancel"))))
      })

      observeEvent(input[[paste0("bert_delete_confirm_", dabert)]], {
        system(paste0("rm -R tiggerbert_saved_", dabert))
        system(paste0(
          "rm ", values$conf$datadir, values$conf$dataname, "_bertpred_",
          dabert, ".feather"))
        # values$bertLoadSaved <- NULL
        if (is.null(values$bertSaveTrigger)) {
          values$bertSaveTrigger <- 0
        } else
          values$bertSaveTrigger <- values$bertSaveTrigger + 1
        removeModal()
        values$bertMsg <- NULL
        values$bertValstats <- NULL
        values$bertMsgHyperpar <- NULL
      })

      ############
      ## Actions to infer on current data from saved BERT (async)
      observeEvent(input[[paste0("bert_infer_", dabert)]], {
        if (bertpred_nclicks() > 0 | bpall_nclicks() > 0) {
          values$bertMsg <- "Inference already running"
          values$bertMsgHyperpar <- NULL
          values$bertValPlot <- NULL
          values$bertValstats <- NULL
          return(NULL)
        }
        bertpred_nclicks(1)

        saved_model <- paste0("tiggerbert_saved_", dabert)
        out_path <- paste0(
          values$conf$datadir, values$conf$dataname, "_bertpred_",
          dabert, ".feather")

        if (file.exists(out_path)) {
          old_infer <- arrow::read_feather(out_path)
        } else
          old_infer <- NULL

        to_infer <- ok.data()[[values$conf$idcol]]
        to_infer <- to_infer[! to_infer %in% old_infer[[values$conf$idcol]]]
        to_infer_row <- match(to_infer, ok.data()[[values$conf$idcol]])

        if (!length(to_infer)) {
          values$bertMsg <- paste0(
            "All current ", nrow(ok.data()),
            " texts already infered by ", dabert)
          values$bertMsgHyperpar <- NULL
          values$bertValPlot <- NULL
          values$bertValstats <- NULL
          bertpred_nclicks(0)
          return(NULL)
        }

        values$bertValPlot <- NULL
        values$bertValstats <- NULL
        values$bertMsgHyperpar <- NULL
        values$bertMsg <- paste0(
          "Preparing inference on ", length(to_infer), " new texts")
        tmp_dat <- paste0("tiggerbert_infer_", session$token, ".feather")
        arrow::write_feather(
          data.frame(
            ok.data()[to_infer_row, values$conf$idcol, drop = FALSE],
            text = ok.nicetext()[to_infer_row]),
          tmp_dat)

        values$bertMsg <- paste0(
          "Infering ", dabert, " predictions on ",
          length(to_infer), " new texts")
        infer_cmd <- paste0(
          values$conf$python, " gobert_infer.py -m ", saved_model,
          " -o ", out_path,
          " -d ", tmp_dat,
          " -i ", values$conf$idcol,
          " -l ", paste0("tiggerbert_infer_", session$token, ".log"),
          " -G ", as.numeric(values$conf$use_gpu),
          " -b ", 128)
        bertpred_res <- reactiveVal()
        bertpred_async <- future({
          system(infer_cmd)
          system(paste0("rm ", tmp_dat))

          if (!is.null(old_infer)) {
            new_infer <- arrow::read_feather(out_path)
            arrow::write_feather(rbind(old_infer, new_infer), out_path)
          }
        }) %...>% bertpred_res
        bertpred_async <- catch(bertpred_async, function(error) {
          bertpred_nclicks(0)
          values$bertMsg <- paste0("BERT inference error: ", error)
          system(paste0("rm ", tmp_dat))
        })
        bertpred_async <- finally(bertpred_async, function() {
          bertpred_nclicks(0)
          values$bertMsg <-
            "Inference complete!\nRe-import data in Project/Sample panel to use the predictions for tagging."
          values$bertValPlot <- NULL
          values$bertValstats <- NULL
          values$bertMsgHyperpar <- NULL
        })
        NULL
      })
    })
    values$bertSavedAll <- c(values$bertSavedAll, tmpsaved)
  })

  ## Automatic check of BERT inference progress
  observe({
    if (bertpred_nclicks() == 0) return(NULL)
    invalidateLater(1e3)
    logfile <- paste0("tiggerbert_infer_", isolate(session$token), ".log")
    if (file.exists(logfile)) {
      values$bertMsg <- readLines(logfile)[1]
      values$bertValPlot <- NULL
      values$bertValstats <- NULL
      values$bertMsgHyperpar <- NULL
    }
  })


  ##############################################################################
  ## Export panel
  ##############################################################################

  ## Export tags and full model predictions
  output$dlTagSave <-
    downloadHandler(
      function()
        paste0(Sys.Date(), "-activeTigger-", ok.labelcol(),".", input$dlTagFormat),
      function(con) {
        if (is.null(ok.data())) return(NULL)
        dadat <- ok.data()[, values$conf$idcol, drop = FALSE]
        if ("tags" %in% input$dlTagSelect)
          dadat[[ok.labelcol()]] <- values$label
        if ("comments" %in% input$dlTagSelect) if (ok.commentCol() != "(None)")
          dadat[, ok.commentCol()] <- values$comment
        if ("predictions" %in% input$dlTagSelect) if (ok.train.possible()) {
          dadat$tigger.predict <- ok.predlab1()
          dadat$tigger.predprob <- apply(ok.pred(), 1, max)
          tmp_pred <- ok.pred()
          colnames(tmp_pred) <- paste0("tiggerpred_", colnames(tmp_pred))
          dadat <- cbind(dadat, tmp_pred)
        }
        othercols <- input$dlTagSelect
        othercols <- othercols[! othercols %in% c("tags", "comments", "predictions")]
        if (length(othercols))
          dadat <- cbind(dadat, ok.data()[, othercols, drop = FALSE])

        if (input$dlTagFormat == "csv") {
          write.csv(dadat, row.names = F, con)
        } else
          arrow::write_feather(dadat, con)
      })


  ## Export embeddings (ft, sb, umap, t-sne)
  output$dlEmbedSave <- downloadHandler(
    function()
      paste0(Sys.Date(), "-activeTigger-emb-",
             paste(input$dlEmbedSelect, collapse = "-"),
             ".", input$dlEmbedFormat),
    function (file) {
      if (is.null(input$dlEmbedSelect)) return(NULL)
      export <- ok.data()[, values$conf$idcol, drop = FALSE]
      if ("ft" %in% input$dlEmbedSelect)
        export <- data.frame(export, ok.data()[, values$cols_ft])
      if ("sb" %in% input$dlEmbedSelect)
        export <- data.frame(export, ok.data()[, values$cols_sb])
      if (any(c("t-sne", "umap") %in% input$dlEmbedSelect)) {
        tmpvisu <- ok.visu()
        tmpalgo <- input$dlEmbedSelect[input$dlEmbedSelect %in% c("t-sne", "umap")]
        colnames(tmpvisu) <- paste0(gsub("-", "", tmpalgo), 1:2)
        export <- data.frame(export, tmpvisu)
      }
      if (input$dlEmbedFormat == "csv") {
        write.csv(export, file, row.names = FALSE)
      } else if (input$dlEmbedFormat == "feather") {
        arrow::write_feather(export, file)
      }
    })

  ## Export BERT predictions

  output$dlBPMsg <- reactive(values$dlBPMsg)


  # output$dlBertPredCsv <-
  #   downloadHandler(paste0(Sys.Date(), "-bertpred-", ok.labelcol(),".csv"),
  #                   content= function(con) {
  #                     if (is.null(ok.data())) return(NULL)
  #                     dadat <- ok.data()[, c(values$conf$idcol, ok.labelcol())]
  #                     dadat[[ok.labelcol()]] <- values$label
  #                     if (ok.commentCol() != "(None)") {
  #                       dadat[, ok.commentCol()] <- values$comment
  #                     }
  #                     if (ok.train.possible()) {
  #                       dadat$tigger.predict <- ok.predlab1()
  #                       dadat$tigger.predprob <- apply(ok.pred(), 1, max)
  #                     }
  #                     dadat <- cbind(dadat, ok.pred())
  #                     write.csv(dadat, row.names = F, con)
  #                   })

  ############
  ## Action to infer on ALL data from saved BERT (async)
  bpall_res <- reactiveVal()
  observeEvent(input$dlBPInfer, {
    if (bpall_nclicks() > 0 | bertpred_nclicks() > 0) {
      values$dlBPMsg <- "Inference already running"
      bpall_res(NULL)
      return(NULL)
    }
    bpall_nclicks(1)
    bpall_res_async <- reactiveVal()
    dabert <- input$dlBPSelect

    saved_model <- paste0("tiggerbert_saved_", dabert)
    out_path <- paste0(
      values$conf$datadir, values$conf$dataname, "_bertpredall_",
      dabert, ".feather")
    tmp_dat <- paste0("tiggerbert_inferall_", session$token, ".feather")
    tmp_log <- paste0("tiggerbert_inferall_", session$token, ".log")
    tmp_done <- paste0("tiggerbert_inferall_", session$token, "_done")
    if (file.exists(tmp_done))
      system(paste0("rm ", tmp_done))

    if (file.exists(out_path)) {
      values$dlBPMsg <- "Inference already complete, ready for download"
      bpall_nclicks(0)
      bpall_res(out_path)
      return(NULL)
      # old_infer <- arrow::read_feather(out_path)
    }

    fulldat <- arrow::read_feather(paste0(
      values$conf$datadir, values$conf$dataname, ".feather"))
    values$dlBPMsg <- paste0(
      "Preparing inference on ", nrow(fulldat), " texts")
    if (length(values$conf$textcols) > 1) {
      fulldat$text <- do.call(paste, c(
        fulldat[, values$conf$textcols, drop = FALSE], sep = "\n\n"))
    } else {
      fulldat$text <- fulldat[, values$conf$textcols]
    }
    arrow::write_feather(fulldat[, c(values$conf$idcol, "text")], tmp_dat)

    infer_cmd <- paste0(
      values$conf$python, " gobert_infer.py -m ", saved_model,
      " -o ", out_path,
      " -d ", tmp_dat,
      " -i ", values$conf$idcol,
      " -l ", tmp_log,
      " -G ", as.numeric(values$conf$use_gpu),
      " -b ", 128,
      "; touch ", tmp_done)

    bpall_async <- future({
      system(infer_cmd)
      system(paste0("rm ", tmp_dat))
    }) %...>% bpall_res_async

    bpall_async <- catch(bpall_async, function(error) {
      values$dlBPMsg <- paste0("BERT inference error: ", error)
      system(paste0("rm ", tmp_dat))
      system(paste0("rm ", tmp_done))
    })

    bpall_async <- finally(bpall_async, function() {
      bpall_nclicks(0)
      values$dlBPMsg <-
        "Inference complete, ready for download"
      bpall_res(out_path)
    })
    NULL
  })

  ## Automatic check of BERT all prediction progress
  observe({
    if (bpall_nclicks() == 0) return(NULL)
    invalidateLater(1e3)
    logfile <- paste0("tiggerbert_inferall_", isolate(session$token), ".log")
    if (file.exists(logfile))
      values$dlBPMsg <- readLines(logfile)[1]
  })

  ## Show BERTpred download button when data is ready
  output$dlBPDlButton <- renderUI({
    if (is.null(bpall_res())) return(NULL)
    downloadButton("dlBPDl", NULL)
  })

  output$dlBPDl <- downloadHandler(
    function()
      paste0(Sys.Date(), "-tiggerbert-", values$conf$dataname,
             input$dlBPSelect, ".", input$dlBPFormat),
    function(file) {
      if (is.null(bpall_res())) return(NULL)
      tmp_bert <- gsub("^.*_bertpredall_(.*)[.]feather$", "\\1", bpall_res())
      if (tmp_bert != input$dlBPSelect) {
        values$dlBPMsg <- "BERT model changed - predict again or select current model"
        return(NULL)
      }
      tmp <- arrow::read_feather(bpall_res())
      if (input$dlBPFormat == "csv") {
        write.csv(tmp, file, row.names = FALSE)
      } else if (input$dlBPFormat == "feather") {
        arrow::write_feather(tmp, file)
      }
    }
  )


  ##############################################################################
  ## Delete files at end of session
  onStop(function() {
    if(file.exists(bert_status_file)) unlink(bert_status_file)
    if(length(dir(pattern = paste(session$token))))
      system(paste0("rm -R *", session$token, "*"))
  })

})