datalab/docker-images-datalab/myactivetigger/activetigger/server.R

4144 lines
152 KiB
R
Raw Normal View History

2024-03-06 15:54:50 +01:00
pkgs <- c("arrow", "class", "DT", "future", "ggplot2", "glmnet", "htmlTable",
"LiblineaR", "Matrix","Metrics", "promises", "quanteda",
"quanteda.textmodels", "ranger", "rlang", "Rtsne",
"RJSONIO", "shiny", "SparseM", "stringi", "uwot")
pkgs <- pkgs[! pkgs %in% rownames(installed.packages())]
if (length(pkgs)) {
warning(paste0("Packages ", paste0(pkgs, collapse = " , "), " not available. ",
"App might crash."))
}
# for (i in seq_along(pkgs)) install.packages(pkgs[i])
verbose <- TRUE
# sink("ActiveTigger.log")
options(shiny.maxRequestSize = 30*1024^3) # File upload size
modelnames <- read.csv("modelnames.csv")
modelnames_labels <- modelnames$short
names(modelnames_labels) <- modelnames$short_lang
library(quanteda)
library(ggplot2)
library(promises)
library(future)
plan(multisession)
################################################################################
## Global functions
################################################################################
label2hash <- function(label) rlang::hash(label)
cleanFileName <- function(name)
gsub('\\s|[/]|[\\]|[:]|[*]|[?]|["]|[<]|[>]|[+]|[$]|[=]', "_",
gsub("^\\s+|\\s+$", "", name))
wtdF1 <- function(truth, pred) {
pred <- pred[!is.na(truth)]
truth <- truth[!is.na(truth)]
sum(sapply(unique(truth), function(ilab)
Metrics::fbeta_score(truth == ilab, pred == ilab) *
sum(truth == ilab) / length(truth)), na.rm = TRUE)
}
metricsTable <- function(truth, pred) {
do.call(rbind, lapply(sort(unique(truth)), function(ilab) {
tmptrue <- truth == ilab
tmppred <- pred == ilab
tmp <- data.frame(
"Tag" = ilab,
"N.cases" = sum(tmptrue),
"N.predict" = sum(tmppred),
"Accuracy" = sprintf("%f", round(Metrics::accuracy(tmptrue, tmppred), 6)),
"Precision" = sprintf("%f", round(Metrics::precision(tmptrue, tmppred), 6)),
"Recall" = sprintf("%f", round(Metrics::recall(tmptrue, tmppred), 6)),
"F1" = sprintf("%f", round(Metrics::fbeta_score(tmptrue, tmppred), 6)))
tmp$F1[tmp$F1 == "NaN"] <- sprintf("%f", 0)
tmp
}))
}
bertPlotFromPath <- function(path) {
if (!file.exists(paste0(path, "/trainer_state.json"))) return(NULL)
state <- RJSONIO::fromJSON(paste0(path, "/trainer_state.json"))
logs <- data.table::rbindlist(lapply(state$log_history, as.list), fill = TRUE)
epochmin <- logs$epoch[which.min(logs$eval_loss)]
gdat <- data.table::rbindlist(
use.names = FALSE,
list(
cbind("train", logs[!is.na(logs$loss), c("epoch", "loss")]),
cbind("valid", logs[!is.na(logs$eval_loss), c("epoch", "eval_loss")])
))
colnames(gdat) <- c("data", "epoch", "loss")
ggplot(gdat, aes_string("epoch", "loss", color = "data", shape = "data")) +
geom_point(size = 2) + geom_line() +
geom_vline(xintercept = epochmin, linetype = 2) +
theme_bw() + expand_limits(y=0)
}
bertHyperToMsg <- function(pars) {
if (is.null(pars)) return(NULL)
paste0("Scheme : ", pars$bertScheme, "; ",
"Model : ", pars$bertModel, "\n* ",
"Epochs : ", pars$bertEpochs, " ; ",
"Lrate : ", pars$bertLrate, " ; ",
"Wdecay : ", pars$bertWdecay, " ; ",
"Batch size : ", pars$bertBatchsize, " ; ",
"Grad. accum. : ", pars$bertGradacc, "\n* ",
"Keep best : ", pars$bertBest, " ; ",
"ValidFrac : ", pars$bertValidFrac, " ; ",
"ValidSeed : ", pars$bertValidSeed, " \n* ",
pars$bertNtrain, " training samples : ",
paste0(names(pars$bertTrainTable), " (",
as.numeric(pars$bertTrainTable), ")",
collapse = " ; "))
}
get_status <- function(dafile)
scan(dafile, what = "character", sep="\n", quiet = TRUE)
set_status <- function(dafile, msg) write(msg, dafile)
################################################################################
## Main server function
################################################################################
# options(shiny.maxRequestSize=2^34) # Max filesize
shinyServer(function(input, output, session) {
values <- reactiveValues()
values$visuGo <- FALSE
values$dfmGo <- FALSE
# queryTrigger <- reactiveVal(0)
# predlabTrigger <- reactiveVal(0)
initDataTrigger <- reactiveVal(0)
queryNext <- reactiveVal(0)
trainTrigger <- reactiveVal(0)
predTrigger <- reactiveVal(0)
diagTrigger <- reactiveVal(0)
ready2tag <- reactiveVal(0)
trainCountdown <- reactiveVal(0)
ok.data <- reactiveVal()
ok.data.async <- reactiveVal()
ok.data.running <- reactiveVal(FALSE)
bpall_nclicks <- reactiveVal(0)
bertpred_nclicks <- reactiveVal(0)
## NEW use_regressors list as values$new_use_regressors
observeEvent(input$use_regressors, {
values$new_use_regressors <- input$use_regressors
cat("DEBUG new_use_regressors: ", paste(input$use_regressors, collapse = " "), "\n")
})
## NEW trainCountdown with input$trainCountdown
## Policy: 0, don't train ; 1, trigger train on tag ; >1, decrement on tag
observeEvent(input$trainCountdown, trainCountdown(input$trainCountdown))
####################
## Projects and data
## Read config files, update UI forms accordingly
## This should happen only once
observe({
if (verbose)
cat("DEBUG read project configs\n")
# values$conf <- RJSONIO::fromJSON("conf.json")
projfiles <- dir(pattern = "^tigger_.*[.]json$")
isolate({
if (!length(projfiles)) {
values$conf <- NULL
return(NULL)
}
values$confnames <- gsub("^tigger_(.*)[.]json$", "\\1", projfiles)
values$conf <- RJSONIO::fromJSON(projfiles[1])
})
})
## When projects added, update selector
observeEvent(values$confnames, {
if (verbose)
cat("DEBUG update selectProject from confnames\n")
updateSelectInput(session, "selectProject", choices = values$confnames)
})
## Event from project change
observeEvent(input$selectProject, {
if (is.null(input$selectProject)) return(NULL)
if (!nchar(input$selectProject)) return(NULL)
if (verbose)
cat("DEBUG project change step 1\n")
# values$project <- input$selectProject
# values$conf <- values$confs[[values$project]]
values$conf <- RJSONIO::fromJSON(
paste0("tigger_", input$selectProject, ".json"))
ok.data(NULL)
ok.visu(NULL)
initDataTrigger(initDataTrigger() + 1)
if (verbose)
cat("DEBUG project change step 2\n")
if (!is.null(values$conf$dataNrows))
updateNumericInput(session, "dataNrows", value = as.numeric(values$conf$dataNrows))
if (!is.null(values$conf$dataSkipRows))
updateNumericInput(session, "dataSkipRows", value = as.numeric(values$conf$dataSkipRows))
#### Python ####
## Default use_python if not found in conf file: FALSE
if(is.null(values$conf$use_python))
values$conf$use_python <- FALSE
updateCheckboxInput(session, "sys_use_python", value = values$conf$use_python)
## Set default python if not found in conf file
if(is.null(values$conf$python))
values$conf$python <- "python3"
updateTextInput(session, "sys_which_python", value = values$conf$python)
values$python_ok <- FALSE
if (values$conf$use_python) {
## Check whether python is working
pytest <- try(system(paste(values$conf$python, "--version"),
intern = TRUE))
if (inherits(pytest, "try-error")) {
showNotification(paste(
"Python path `", values$conf$python,
"` not valid, try changing it in Project/System tab"),
duration = 10, type = "error")
} else
values$python_ok <- TRUE
}
if (verbose)
cat("Python ok:", values$python_ok, "\n")
#### GPU ####
if (!is.null(values$conf$use_gpu))
updateCheckboxInput(session, "sys_use_gpu", value = values$conf$use_gpu)
#### Spacy ####
if (!is.null(values$conf$use_spacy))
updateCheckboxInput(session, "sys_use_spacy", value = values$conf$use_spacy)
updateTextInput(session, "sys_use_spacy_model", value = values$conf$use_spacy_model)
#### fastText ####
if (!is.null(values$conf$use_ft))
updateCheckboxInput(session, "sys_use_ft", value = values$conf$use_ft)
updateTextInput(session, "sys_use_ft_model", value = values$conf$use_ft_model)
#### SBERT ####
if (!is.null(values$conf$use_sb))
updateCheckboxInput(session, "sys_use_sb", value = values$conf$use_sb)
updateTextInput(session, "sys_use_sb_model", value = values$conf$use_sb_model)
})
##############################################################################
## Reactives : project settings UI
##############################################################################
output$sys_datadir <- renderText(values$conf$datadir)
output$sys_datafile <- renderText(paste0(values$conf$dataname, ".feather"))
output$sys_var_id <- renderUI(renderText(values$conf$idcol))
output$sys_var_text <- renderUI(renderText(values$conf$textcols))
output$sys_var_tag <- renderUI(renderText(values$conf$tagcols))
output$sys_var_context_ui <- renderUI(selectInput(
"sys_var_context", NULL, values$data_orig_colnames, values$conf$contextcols,
multiple = TRUE))
output$sys_var_comm_ui <- renderUI(selectInput(
"sys_var_comm", NULL, c("(none)", values$data_orig_colnames),
values$conf$commcol))
output$sys_ex_lang_ui <- renderUI(selectInput(
"sys_ex_lang", NULL, modelnames_labels, "en"
))
output$sys_ex_spacy <- renderText(
modelnames$spacy_name[modelnames$short == input$sys_ex_lang])
output$sys_ex_spacy_dl <- renderText(paste0(
"Download with python module: ", values$conf$python, " -m spacy download ",
modelnames$spacy_name[modelnames$short == input$sys_ex_lang]))
output$sys_ex_ft <- renderText(
modelnames$fasttext_name[modelnames$short == input$sys_ex_lang])
output$sys_ex_ft_dl <- renderText(paste0(
"Manual download link : ",
modelnames$fasttext_url[modelnames$short == input$sys_ex_lang]))
output$sys_ex_sb <- renderText(
ifelse(input$sys_ex_lang %in% c("ar", "zh", "nl", "en", "fr", "de",
"it", "ko", "pl", "pt", "ru", "es", "tr"),
"distiluse-base-multilingual-cased-v1",
"distiluse-base-multilingual-cased-v2"))
## Save system changes on button click, with modal
output$save_config_msg <- renderUI({
req(values$save_config_msg)
p(strong(values$save_config_msg))
})
output$save_embed_msg <- renderUI({
tmp <- ""
if (input$sys_use_spacy) if (!identical(input$sys_use_spacy_model, values$conf$use_spacy_model))
tmp <- paste0(tmp, "<li> Changing the spacy model will delete the current tokenized text and fasttext word embeddings, to prevent conflicts</li>")
if (input$sys_use_ft) if (!identical(input$sys_use_ft_model, values$conf$use_ft_model))
tmp <- paste0(tmp, "<li> Changing the fasttext model will delete the current fasttext word embeddings, to prevent conflicts.</li>")
if (input$sys_use_sb) if (!identical(input$sys_use_sb_model, values$conf$use_sb_model))
tmp <- paste0(tmp, "<li> Changing the SBERT model will delete the current SBERT sentence embeddings, to prevent conflicts.</li>")
if (nchar(tmp)) {
return(HTML(paste("<br><p> <strong>Warning:</strong> <ul>", tmp, "</ul></p>")))
} else
return(NULL)
})
observeEvent(input$saveSystem, {
values$save_config_msg <- NULL
showModal(modalDialog(
title = "Save project configuration",
paste0("Click 'Save' to check and confirm the new configuration"),
uiOutput("save_embed_msg"),
uiOutput("save_config_msg"),
footer = tagList(actionButton(paste0("save_conf_confirm"), "Save"),
modalButton("Cancel"))))
})
observeEvent(input$save_conf_confirm, {
newconf <- values$conf
newconf$contextcols <- input$sys_var_context
newconf$commcol <- input$sys_var_comm
if (input$sys_use_python) {
pytest <- try(system(paste(input$sys_which_python, "--version"),
intern = TRUE))
if (inherits(pytest, "try-error")) {
values$save_config_msg <- paste0(
"Error: python path `", input$sys_which_python, "` not valid")
return(NULL)
}
}
newconf$use_python <- input$sys_use_python
newconf$python <- input$sys_which_python
newconf$use_gpu <- input$sys_use_gpu
if (input$sys_use_spacy) {
sptest <- system(paste0(
newconf$python, " -m spacy info ", input$sys_use_spacy_model), intern = TRUE)
if (length(sptest) == 0) {
values$save_config_msg <- paste(
"Error loading spacy, check that it is installed in the specified python env")
return(NULL)
}
if (length(attr(sptest, "status"))) {
values$save_config_msg <- paste(
"Error loading spacy model, check that it has been downloaded")
return(NULL)
}
newconf$use_spacy_model <- input$sys_use_spacy_model
}
newconf$use_spacy <- input$sys_use_spacy
if (input$sys_use_ft) {
if (!file.exists(input$sys_use_ft_model)) {
values$save_config_msg <- paste(
"Error loading fasttext model, check the specified path")
return(NULL)
}
newconf$use_ft_model <- input$sys_use_ft_model
}
newconf$use_ft <- input$sys_use_ft
if (input$sys_use_sb)
newconf$use_sb_model <- input$sys_use_sb_model
newconf$use_sb <- input$sys_use_sb
# if (newconf$use_spacy) if (newconf$use_spacy_model != values$conf$use_spacy_model) {
if (newconf$use_spacy) if (!identical(newconf$use_spacy_model, values$conf$use_spacy_model)) {
file.remove(paste0(values$conf$datadir, values$conf$dataname, "_spa.feather"))
file.remove(paste0(values$conf$datadir, values$conf$dataname, "_ft.feather"))
}
# if (newconf$use_ft) if (newconf$use_ft_model != values$conf$use_ft_model)
if (newconf$use_ft) if (!identical(newconf$use_ft_model, values$conf$use_ft_model))
file.remove(paste0(values$conf$datadir, values$conf$dataname, "_ft.feather"))
# if (newconf$use_sb) if (newconf$use_sb_model != values$conf$use_sb_model)
if (newconf$use_sb) if (!identical(newconf$use_sb_model, values$conf$use_sb_model))
file.remove(paste0(values$conf$datadir, values$conf$dataname, "_sb.feather"))
writeLines(RJSONIO::toJSON(newconf),
paste0("tigger_", newconf$projectid, ".json"))
values$conf <- newconf
initDataTrigger(initDataTrigger() + 1)
removeModal()
})
##############################################################################
## Reactives : data
##############################################################################
ok.commentCol <- reactive(values$conf$commcol)
## Current imported data (async)
observeEvent(input$dataImport, {
if (ok.data.running()) return(NULL)
## Save nrows to conf file on change
if (!is.numeric(input$dataNrows) | !is.numeric(input$dataSkipRows)) return(NULL)
values$conf$dataNrows <- input$dataNrows
values$conf$dataSkipRows <- input$dataSkipRows
if (!is.null(values$conf$projectid))
writeLines(RJSONIO::toJSON(values$conf), paste0(
"tigger_", values$conf$projectid, ".json"))
initDataTrigger(initDataTrigger() + 1)
})
## Read data event
observeEvent(initDataTrigger(), {
if (is.null(values$conf)) {
ok.data(NULL)
return(NULL)
}
if (initDataTrigger() == 0) return(NULL)
if (verbose)
cat("DEBUG enter ok.data\n")
if (verbose)
cat("DEBUG enter ok.data trigger", initDataTrigger(), "\n")
ok.data.running(TRUE)
## Prepare variables for async launch
coco <- values$conf
coco$python_ok <- values$python_ok
da_token <- session$token
data_status <- paste0("tigger_", da_token, "_data")
file_orig_colnames <- paste0(data_status, "_origcols")
file_colstok <- paste0(data_status, "_tok")
file_colsft <- paste0(data_status, "_ft")
file_colssb <- paste0(data_status, "_sb")
file_colsbertpred <- paste0(data_status, "_bertpred")
da_missing <- paste0(data_status, "_bertpred_missing")
projpath <- paste0(coco$datadir, coco$projectid)
if (!file.exists(projpath))
dir.create(projpath)
## Read data: async launch
data_async <- future({
#################
## Main data file
set_status(data_status, "Importing main data file...")
res <- arrow::read_feather(
paste0(coco$datadir, coco$dataname, ".feather"))
if (is.null(coco$dataNrows)) return(NULL)
rowselect <-
intersect(1:nrow(res), (1:(coco$dataNrows)) + coco$dataSkipRows)
if (!length(rowselect)) return(NULL)
res <- res[rowselect, , drop = FALSE]
writeLines(colnames(res), file_orig_colnames)
#################
## Tokenized text
## Import data : feather of two cols, one the id, other the tokenized
set_status(data_status, "Importing tokenized text...")
file_tok <- paste0(coco$datadir, coco$dataname, "_spa.feather")
if (file.exists(file_tok)) {
tokdat <- arrow::read_feather(file_tok)
} else
tokdat <- NULL
tok_remaining <- res[[coco$idcol]]
tok_remaining <-
tok_remaining[! tok_remaining %in% tokdat[[coco$idcol]]]
## Tokenize remaining texts
if (length(tok_remaining)) {
if (!coco$python_ok | !coco$use_spacy) {
## If remaining texts to tokenize but no python, fall back on
## untokenized text
## TODO here use quanteda tokenizer with options?
tokdat <- data.frame(
res[[coco$idcol]],
do.call(paste, c(res[match(tok_remaining, res[[coco$idcol]]),
coco$textcols, drop = FALSE], sep = "\n\n")))
colnames(tokdat) <- c(coco$idcol, "text_spa")
set_status(data_status, "Importing tokenized text: no spacy ok")
} else {
## If python present, tokenize with spacy
set_status(data_status,
paste("Tokenizing", length(tok_remaining), "new texts"))
tmpfile <- paste0("tigger_tok_", da_token)
arrow::write_feather(
data.frame(
id = tok_remaining,
text = do.call(
paste, c(res[match(tok_remaining, res[[coco$idcol]]),
coco$textcols, drop = FALSE], sep = "\n\n"))),
paste0(tmpfile, ".feather")
)
system(paste0(
coco$python, " tokenize_spacy.py -d ", tmpfile, ".feather",
" -m ", coco$use_spacy_model))
tok_new <- arrow::read_feather(paste0(tmpfile, "_spa.feather"))
colnames(tok_new)[colnames(tok_new) == "id"] <- coco$idcol
tokdat <- rbind(tokdat, tok_new)
arrow::write_feather(tokdat, file_tok)
system(paste0("rm ", tmpfile, "*"))
set_status(data_status, "Importing tokenized text: spacy ok")
}
}
## Save colname for tokenized text
tmp_cols_tok <- colnames(tokdat)[colnames(tokdat) != coco$idcol]
writeLines(tmp_cols_tok, file_colstok)
set_status(data_status, "Importing tokenized text: wrote colnames")
## Merge with current data
res <- merge(res, tokdat, by = coco$idcol,
all.y = FALSE, sort = FALSE)
rm(tokdat)
set_status(data_status, "Importing tokenized text: merged")
#################
## FastText embeddings
set_status(data_status, paste("Loading fastText embeddings"))
## FastText embeddings : feather file with id and embeddings
file_ft <- paste0(coco$datadir, coco$dataname, "_ft.feather")
if (file.exists(file_ft)) {
ftdat <- arrow::read_feather(file_ft)
} else
ftdat <- NULL
ft_remaining <- res[[coco$idcol]]
ft_remaining <-
ft_remaining[! ft_remaining %in% ftdat[[coco$idcol]]]
## Embed remaining texts, if FT in use
if (length(ft_remaining)) {
if (!coco$python_ok | !coco$use_ft) {
## If no python or FT, abort if not all rows already embedded
ftdat <- NULL
} else {
set_status(data_status, paste(
"Embedding", length(ft_remaining), "new texts with fastText"))
tmpfile <- paste0("tigger_ft_", da_token)
arrow::write_feather(
data.frame(
id = ft_remaining,
text = do.call(
paste, res[match(ft_remaining, res[[coco$idcol]]),
tmp_cols_tok, drop = FALSE])),
paste0(tmpfile, ".feather")
)
system(paste0(
coco$python, " embed_fasttext.py -d ", tmpfile, ".feather",
" -m ", coco$use_ft_model))
ft_new <- arrow::read_feather(paste0(tmpfile, "_ft.feather"))
colnames(ft_new)[colnames(ft_new) == "id"] <- coco$idcol
ftdat <- rbind(ftdat, ft_new)
arrow::write_feather(ftdat, file_ft)
system(paste0("rm ", tmpfile, "*"))
}
}
if (!is.null(ftdat)) {
## Merge with current data
writeLines(colnames(ftdat)[colnames(ftdat) != coco$idcol], file_colsft)
res <- merge(res, ftdat, by = coco$idcol, all.y = FALSE, sort = FALSE)
rm(ftdat)
}
#################
## SBERT embeddings
set_status(data_status, "Loading SBERT embeddings")
## SBERT file : feather of id and embeddings
file_sb <- paste0(coco$datadir, coco$dataname, "_sb.feather")
if (file.exists(file_sb)) {
sbdat <- arrow::read_feather(file_sb)
}else
sbdat <- NULL
sb_remaining <- res[[coco$idcol]]
sb_remaining <-
sb_remaining[! sb_remaining %in% sbdat[[coco$idcol]]]
## Embed remaining texts
if (length(sb_remaining)) {
if (!coco$python_ok | !coco$use_sb) {
## If no python or SB, abort if not all rows already embedded
sbdat <- NULL
} else {
set_status(data_status, paste(
"Embedding", length(sb_remaining), "new texts with SBERT"))
tmpfile <- paste0("tigger_sb_", da_token)
arrow::write_feather(
data.frame(
id = sb_remaining,
text = do.call(
paste, res[match(sb_remaining, res[[coco$idcol]]),
coco$textcols, drop = FALSE])),
paste0(tmpfile, ".feather")
)
system(paste0(
coco$python, " embed_sbert.py -d ", tmpfile, ".feather",
" -m ", coco$use_sb_model))
sb_new <- arrow::read_feather(paste0(tmpfile, "_sb.feather"))
colnames(sb_new)[colnames(sb_new) == "id"] <- coco$idcol
sbdat <- rbind(sbdat, sb_new)
arrow::write_feather(sbdat, file_sb)
system(paste0("rm ", tmpfile, "*"))
}
}
if (!is.null(sbdat)) {
## Merge with current data
writeLines(colnames(sbdat)[colnames(sbdat) != coco$idcol], file_colssb)
res <- merge(res, sbdat, by = coco$idcol, all.y = FALSE, sort = FALSE)
rm(sbdat)
}
#################
## BERT predictions
set_status(data_status, "Loading BERT predictions")
## BERT predictions : feathers of id and probs
## TODO: take full prediction if existing
files_pred <- dir(
coco$datadir, paste0("^", coco$dataname, "_bertpred_.*[.]feather$"))
# files_predall <- dir(
# coco$datadir, paste0("^", coco$dataname, "_bertpredall_.*[.]feather$"))
# files_pred <- files_pred[
# ! gsub("_bertpred_", "_bertpredall_", files_pred) %in% files_predall]
# files_pred <- c(files_predall, files_pred)
tmp_bertpred_cols <- list()
if (length(files_pred)) {
for (ipred in files_pred) {
preddat <- arrow::read_feather(paste0(coco$datadir, ipred))
predname <-
gsub(paste0("^", coco$dataname, "_bertpred_(.*)[.]feather$"),
"\\1", ipred)
# predname <-
# gsub(paste0("^", coco$dataname, "_bertpred(all)?_(.*)[.]feather$"),
# "\\2", ipred, perl = TRUE)
n_missing <- sum(! res[[coco$idcol]] %in% preddat[[coco$idcol]])
if (n_missing) {
write(
paste0("BERT prediction ", predname, " : ", n_missing,
" texts missing in prediction."),
da_missing, append = TRUE)
} else {
colnames(preddat) <- gsub(
"^bertpred_", paste0("bertpred_", predname), colnames(preddat))
tmp_bertpred_cols[[predname]] <-
colnames(preddat)[colnames(preddat) != coco$idcol]
res <- merge(res, preddat, by = coco$idcol,
all.y = FALSE, sort = FALSE)
}
rm(preddat)
}
if (length(tmp_bertpred_cols)) {
writeLines(names(tmp_bertpred_cols), file_colsbertpred)
lapply(names(tmp_bertpred_cols), function(x) {
writeLines(tmp_bertpred_cols[[x]], paste0(file_colsbertpred, "_", x))
})
}
}
## Comment col
set_status(data_status, "Loading comments")
tmpfile <- paste0(projpath, "/", coco$commcol, ".csv")
if (file.exists(tmpfile)) {
tmpcol <- read.csv(tmpfile)
colnames(tmpcol)[2] <- coco$commcol
## Remove duplicates, keep newest
tmpcol <- tmpcol[nrow(tmpcol):1, , drop = FALSE]
tmpcol <- tmpcol[!duplicated(tmpcol[, 1]), , drop = FALSE]
tmpcol <- tmpcol[nrow(tmpcol):1, , drop = FALSE]
res <- merge(res, tmpcol, by = coco$idcol,
all.x = TRUE, all.y = FALSE, sort = FALSE)
res[[coco$commcol]][is.na(res[[coco$commcol]])] <- ""
} else
res[[coco$commcol]] <- ""
# ## Tagging cols
# set_status(data_status, "Loading tags")
# for (itag in coco$tagcols) {
# tmpfile <- paste0(projpath, "/", itag, ".csv")
# if (file.exists(tmpfile)) {
# tmpcol <- read.csv(tmpfile)
# tmpcol[[paste0("hist_", itag)]] <- 1:nrow(tmpcol)
# ## Remove duplicates, keep newest
# tmpcol <- tmpcol[nrow(tmpcol):1, ]
# tmpcol <- tmpcol[!duplicated(tmpcol[, 1]), ]
# tmpcol <- tmpcol[nrow(tmpcol):1, ]
# if (verbose)
# cat("DEBUG imported ", itag, nrow(tmpcol), "\n")
# res <- merge(res, tmpcol, by = coco$idcol,
# all.x = TRUE, all.y = FALSE, sort = FALSE)
# } else {
# res[[itag]] <- NA
# res[[paste0("hist_", itag)]] <- NA
# }
# }
## Tagging cols
set_status(data_status, "Loading tags")
for (itag in coco$tagcols) {
itaghist <- paste0("hist_", itag)
if (is.null(res[[itag]]) | all(is.na(res[[itag]]))) {
res[[itag]] <- NA
res[[itaghist]] <- NA
} else {
res[[itaghist]][!is.na(res[[itag]])] <- 1:sum(!is.na(res[[itag]]))
}
tmpfile <- paste0(projpath, "/", itag, ".csv")
if (file.exists(tmpfile)) {
tmpcol <- read.csv(tmpfile)
tmpcol[[itaghist]] <- 1:nrow(tmpcol) +
max(0, suppressWarnings(max(na.omit(res[[itaghist]]))))
## Remove duplicates, keep newest
tmpcol <- tmpcol[nrow(tmpcol):1, ]
tmpcol <- tmpcol[!duplicated(tmpcol[, 1]), ]
tmpcol <- tmpcol[nrow(tmpcol):1, ]
tmpcol <- tmpcol[tmpcol[[coco$idcol]] %in% res[[coco$idcol]], ]
if (verbose)
cat("DEBUG imported", itag, nrow(tmpcol), "\n")
# res <- merge(res, tmpcol, by = coco$idcol,
# all.x = TRUE, all.y = FALSE, sort = FALSE)
if (nrow(tmpcol)) {
idmatches <- match(tmpcol[[coco$idcol]], res[[coco$idcol]])
# idmatches <- idmatches[!is.na(idmatches)]
res[[itag]][idmatches] <- tmpcol[[itag]]
res[[itaghist]][idmatches] <- tmpcol[[itaghist]]
}
}
}
set_status(data_status, "Finalizing data import")
res
}) %...>% ok.data.async()
data_async <- catch(data_async, function(error) {
values$modelDiagno <- paste0("Error in data import: ", error)
})
data_async <- finally(data_async, function() {
#################
## Finalize data importation
res <- ok.data.async()
if (is.null(res)) return(NULL)
set_status(data_status, "Finalizing data import: read columns")
## Read column (tok, ft, sb, ...) names from dedicated files
values$tagcols <- coco$tagcols
values$cols_tok <- readLines(file_colstok)
values$cols_ft <- NULL
if (file.exists(file_colsft)) {
values$cols_ft <- readLines(file_colsft)
}
values$cols_sb <- NULL
if (file.exists(file_colssb)) {
values$cols_sb <- readLines(file_colssb)
}
set_status(data_status, "Finalizing data import: bertpred")
if (file.exists(file_colsbertpred)) {
tmp_bertpred <- readLines(file_colsbertpred)
values$cols_bertpred <- lapply(tmp_bertpred, function(x)
readLines(paste0(file_colsbertpred, "_", x)))
names(values$cols_bertpred) <- tmp_bertpred
} else
values$cols_bertpred <- NULL
set_status(data_status, "Finalizing data import: bertpred missing")
if (file.exists(da_missing)) {
lapply(readLines(da_missing), function(x)
showNotification(paste(
x, "\n",
"You may predict on current text in BERT panel, ",
"and then reload the data.")))
}
## Populate regressors list in prediction panel
set_status(data_status, "Finalizing data import: populating regressors")
regr_choices <- c(
"Regex" = "regex",
"Word counts (DFM)" = "dfm")
if (!is.null(values$cols_ft))
regr_choices <- c(regr_choices, "Word embeddings (FastText)" = "ft")
if (!is.null(values$cols_sb))
regr_choices <- c(regr_choices, "Sentence embeddings (SBERT)" = "sb")
regr_choices <- c(regr_choices, "Extra predictors" = "extra")
if (length(values$cols_bertpred)) {
tmp_choices <- paste0("bertpred_", names(values$cols_bertpred))
names(tmp_choices) <- paste0("BERT_", names(values$cols_bertpred))
regr_choices <- c(regr_choices, tmp_choices)
}
# if (length(regr_choices) > 3) {
regr_choices_sel <-
regr_choices[regr_choices %in% c("regex", "ft", "sb")]
# } else
# regr_choices_sel <- c("regex", "dfm")
updateSelectizeInput(
session, "use_regressors", choices = regr_choices,
selected = regr_choices_sel)
values$new_use_regressors <- regr_choices_sel
othercols <- colnames(res)
othercols <- othercols[! othercols %in% c(
values$cols_tok, values$cols_ft, values$cols_sb, values$cols_bertpred,
values$tagcols, paste0("hist_", values$tagcols),
coco$commcol, coco$textcols)]
updateSelectInput(
session, "dlTagSelect",
choices = c("tags", "comments", "predictions", othercols),
selected = c("tags", "comments", "predictions"))
## Populate extra regressors lists in prediction panel
updateSelectizeInput(
session, "use_ootregnum", choices = othercols, selected = NULL)
updateSelectizeInput(
session, "use_ootregcat", choices = othercols, selected = NULL)
## Project path
values$projectdir <- paste0(projpath, "/")
## Create tagmat: data.frame of all current taggings
values$tagmat <- res[, coco$tagcols, drop = FALSE]
## Create tagmathist: data.frame of historical order of taggings
values$tagmathist <- res[, paste0("hist_", coco$tagcols), drop = FALSE]
colnames(values$tagmathist) <- coco$tagcols
## Save data original colnames
values$data_orig_colnames <- readLines(file_orig_colnames)
ok.data(ok.data.async())
ok.data.async(NULL)
ok.data.running(FALSE)
set_status(data_status, "Exit data async")
system(paste0("rm ", data_status, "*"))
values$modelDiagno <- "No model"
if(trainTrigger() == 0) trainTrigger(1) ## For first run. TODO set trainTrigger to 0 when changing data?
if (verbose) cat("DEBUG Exit data async finally\n")
})
NULL
})
## Read data import status from file
observe({
if (ok.data.running()) {
invalidateLater(100)
tmpfile <- paste0("tigger_", session$token, "_data")
if (file.exists(tmpfile)) {
values$modelDiagno <- paste(collapse = "\n", scan(
tmpfile, what = "character", sep="\n", quiet = TRUE))
} else
values$modelDiagno <- ""
}
})
###############
## ok.text() reactive
###############
ok.text <- eventReactive(ok.data(), {
if (is.null(ok.data())) return(NULL)
res <- ok.data()[, values$cols_tok[1]]
if (length(values$cols_tok) > 1) {
for (icol in values$cols_tok[-1]) {
res <- paste0(res, "\n\n", ok.data()[, icol])
}
}
# queryTrigger(queryTrigger() + 1)
if (verbose)
cat("DEBUG ok.text", length(res), "\n")
res
})
ok.labelcol <- reactive({
if (is.null(ok.data())) return(NULL)
if (verbose)
# cat("DEBUG enter ok.labelcol\n")
if (is.null(input$selectScheme)) return(values$conf$tagcols[1])
input$selectScheme
})
ok.nicetext <- reactive({
req(ok.data(), values$conf$textcols)
# if (is.null(ok.data())) return(NULL)
do.call(paste, c(ok.data()[, values$conf$textcols, drop = FALSE],
sep = "\n\n"))
})
ok.regressor.names <- reactive({
daregs <- values$new_use_regressors
if (verbose)
cat("DEBUG enter ok.regressor.names ", paste(daregs, collapse = " "), "\n")
res <- NULL
if ("ft" %in% daregs)
res <- c(res, values$cols_ft)
if ("sb" %in% daregs)
res <- c(res, values$cols_sb)
if (length(values$cols_bertpred)) {
for (ipred in 1:length(values$cols_bertpred)) {
if (paste0("bertpred_", names(values$cols_bertpred)[ipred]) %in% daregs)
res <- c(res, values$cols_bertpred[[ipred]])
}
}
if (verbose)
cat("DEBUG exit ok.regressor.names:", length(res), "\n")
res
})
ok.contextCol <- reactive({
if (verbose)
cat("DEBUG enter ok.contextCol\n")
# res <- isolate(values$conf$contextcols)
res <- values$conf$contextcols
names(res) <- res
res
})
## Initiate label and queries history, on ok.text() or labelcol change,
observeEvent(list(ok.text(), ok.labelcol()), {
if (verbose)
cat("DEBUG initiate label\n")
if (is.null(ok.labelcol())) {
values$label <- NULL
values$labelhist <- NULL
values$queries <- NULL
values$comment <- NULL
values$wasTagged <- NULL
values$wasRetagged <- NULL
} else {
values$label <- values$tagmat[, ok.labelcol()]
values$labelhist <- values$tagmathist[, ok.labelcol()]
values$queries <- which(!is.na(values$label))
values$comment <- ok.data()[[values$conf$commcol]]
values$wasTagged <- !is.na(values$label)
values$wasRetagged <- rep(FALSE, length(values$label))
}
})
## On values$label change, values$tagmat is updated for separate history
observeEvent(values$label, {
if (is.null(values$label)) return(NULL)
if (verbose)
cat("DEBUG Update tagmat from label\n")
values$tagmat[, ok.labelcol()] <- values$label
values$tagmathist[, ok.labelcol()] <- values$labelhist
})
##############################################################################
## Reactives : training
##############################################################################
## Message of how many tagged and untagged, for model diagnostics box
ok.msgtagged <- reactive({
if (verbose)
cat("DEBUG enter ok.msgtagged\n")
# if (is.null(ok.data()) | ok.data.running()) {
if (is.null(ok.data())) {
return("No data yet ")
}
tmp <- ""
# if (sum(is.na(values$label)) == 0)
# tmp <- paste0(tmp, "Data correction mode (0 NA labels left).\n")
paste0(tmp, "Tagged: ", sum(!is.na(values$label)),
" ; Untagged: ", sum(is.na(values$label)))
})
## Training possible? At least 3 texts for 2 tags
ok.train.possible <- reactive({
if (verbose)
cat("DEBUG enter ok.train.possible\n")
labtab <- table(values$label)
if (length(labtab) < 2) return(FALSE)
if (sum(labtab > 2) < 2) return(FALSE)
TRUE
})
## Compute DFM (async)
## TODO make this less reactive
observe({
# if ("dfm" %in% input$use_regressors & !values$dfmGo)
if ("dfm" %in% values$new_use_regressors & !values$dfmGo)
values$dfmGo <- TRUE
})
ok.dfm <- reactiveVal()
observe({
if (is.null(ok.text())) return(NULL)
if (! values$dfmGo) return(NULL)
if (!is.numeric(input$dfmNgrams)) return(NULL)
if (verbose)
cat("DEBUG enter dfm observer\n")
datext <- ok.text()
dfmNgrams <- input$dfmNgrams
dfmTfIdf <- input$dfmTfIdf
dfmMinDocfreq <- input$dfmMinDocfreq
dfmMinTermfreq <- input$dfmMinTermfreq
dfmTfScheme <- input$dfmTfScheme
dfmDfScheme <- input$dfmDfScheme
dfm_async <- future(seed = TRUE, {
dadfm <- datext %>%
tolower() %>% tokens(remove_punct = F) %>%
# tokens_select(pattern = stopwords(language= "fr"), selection = "remove") %>%
tokens_ngrams(n= 1:dfmNgrams) %>% dfm() %>%
dfm_trim(min_docfreq = dfmMinDocfreq, min_termfreq = dfmMinTermfreq,
verbose = TRUE)
if (dfmTfIdf) {
dadfm <- dadfm %>%
dfm_tfidf(scheme_tf = dfmTfScheme, scheme_df = dfmDfScheme)
}
suppressWarnings({ # TODO : check
dadfm <- quanteda::convert(dadfm, to= "tm")
})
dadfm <- Matrix::sparseMatrix(i= dadfm$i, j= dadfm$j, x= dadfm$v,
dimnames = dadfm$dimnames)
dadfm
}) %...>% ok.dfm()
dfm_async <- catch(dfm_async, function(error) {
values$modelDiagno <- paste0("Error in DFM: ", error)
})
})
## Predictor : df or matrix of dfm and extra
ok.regressor.matrix <- reactive({
if (is.null(ok.text())) return(NULL)
if (is.null(ok.regressor.names())) return(NULL)
if (verbose)
cat("DEBUG enter ok.regressor.matrix\n")
model.matrix(~., data= ok.data()[, ok.regressor.names()])[, -1]
})
ok.extraregressor.matrix <- reactive({
req(ok.text())
if (! "extra" %in% values$new_use_regressors) return(NULL)
if (verbose)
cat("DEBUG enter ok.extraregressor.matrix\n")
res <- NULL
for (inum in input$use_ootregnum) {
suppressWarnings({tmp <- as.numeric(ok.data()[[inum]])})
if (all(is.na(tmp))) {
showNotification(paste0(inum, " is all NAs or non-numeric ; predictor not used"),
duration = 5, type = "warning")
next
}
tmp[is.na(tmp)] <- median(tmp, na.rm = TRUE)
res <- cbind(res, tmp)
}
for (icat in input$use_ootregcat) {
tmp <- ok.data()[[icat]]
if (! length(unique(tmp)) %in% 2:100) {
showNotification(paste0(icat, " has ", length(unique(tmp)),
" unique values ; predictor not used"),
duration = 5, type = "warning")
next
}
tmp <- as.character(tmp)
tmp[is.na(tmp)] <- "<NA>"
tmp <- as.factor(tmp)
tmpmat <- model.matrix(~., data.frame(cat = tmp))[, -1, drop = FALSE]
res <- cbind(res, tmpmat)
}
if (verbose)
cat("DEBUG exit ok.extraregressor.matrix", ncol(res), "\n")
res
})
ok.extraRegex.matrix <- reactive({
if (is.null(ok.nicetext())) return(NULL)
if (is.null(values$extraRegex)) return(NULL)
if (is.null(values$regex_inuse)) return(NULL)
if (!any(values$regex_inuse)) return(NULL)
if (is.null(values$regex_case)) return(NULL)
if (verbose)
cat("DEBUG enter ok.extraRegex.matrix\n")
tmp <- as.matrix(sapply(which(values$regex_inuse), function(iregex) {
1 * stringi::stri_detect_regex(
ok.nicetext(), values$extraRegex[iregex],
opts_regex = list(case_insensitive = !values$regex_case[iregex]))
}))
colnames(tmp) <- values$extraRegex[values$regex_inuse]
tmp
})
ok.predictor <- reactive({
req(values$new_use_regressors)
if (ok.data.running()) return(NULL)
if (is.null(ok.text())) return(NULL)
if (verbose)
cat("DEBUG enter ok.predictor\n")
dadfm <- NULL
if ("dfm" %in% values$new_use_regressors) {
dadfm <- ok.dfm()
} else {
if (input$predModel == "naive bayes") {
values$modelDiagno <-
"Error in training: Naive Bayes requires DFM word counts as predictors."
return(NULL)
}
}
if (length(ok.regressor.names()) > 0) {
if (input$predModel == "random forest") {
if (is.null(dadfm)) {
dadfm <- ok.data()[, ok.regressor.names()]
} else
dadfm <- cbind(ok.data()[, ok.regressor.names()], as.matrix(dadfm))
}
if (input$predModel %in% c("lasso", "linear", "knn")) {
dadfm <- cbind(ok.regressor.matrix(), dadfm)
}
}
# if ("regex" %in% input$use_regressors && length(values$regex_inuse) > 0) {
if ("regex" %in% values$new_use_regressors & !is.null(ok.extraRegex.matrix())) {
dadfm <- cbind(dadfm, ok.extraRegex.matrix())
}
if ("extra" %in% values$new_use_regressors) {
dadfm <- cbind(dadfm, ok.extraregressor.matrix())
}
if (is.null(dadfm)) {
values$modelDiagno <- "Error in training: no predictors selected"
if (verbose)
cat("DEBUG ok.predictor exit: no predictors\n")
return(NULL)
}
if (input$predModel == "linear")
dadfm <- SparseM::as.matrix.csr(dadfm, nrow(dadfm), ncol(dadfm))
if (input$predModel == "naive bayes")
dadfm <- as.dfm(dadfm)
if (verbose)
cat("DEBUG exit ok.predictor: ", ncol(dadfm), "\n")
dadfm
})
## Train model async
ok.train <- reactiveVal()
ok.train.async <- reactiveVal()
observeEvent(
list(trainTrigger(), input$modelTrain),
{
if (is.null(ok.predictor())) return(NULL)
if (verbose)
cat("DEBUG enter ok.train observer\n")
if (!ok.train.possible()) {
values$modelDiagno <-
"Tag at least 3 texts for 2 tags to train prediction model."
ok.train(NULL)
# queryTrigger(queryTrigger() + 1)
return(NULL)
}
ready2tag(0)
values$modelDiagno <- paste0("Training ", input$predModel)
tmp_model <- input$predModel
if (tmp_model == "random forest") {
if (!is.numeric(input$rfNumTrees) | !is.numeric(input$rfMtry) |
!is.numeric(input$rfSampleFrac)) {
ok.train(NULL)
return(NULL)
}
train_fun <- ranger::ranger
train_args <- list(
x= ok.predictor()[!is.na(values$label), ],
y= as.factor(values$label[!is.na(values$label)]),
num.trees = input$rfNumTrees,
mtry = if (input$rfMtry > 0) input$rfMtry,
sample.fraction = input$rfSampleFrac, probability = TRUE)
} else {
if (tmp_model == "linear") {
tmpvalues <- values$label
if (!is.numeric(input$liblinCost)) {
ok.train(NULL)
return(NULL)
}
for (ival in na.omit(unique(tmpvalues)))
if (sum(tmpvalues == ival, na.rm = T) == 1)
tmpvalues[which(tmpvalues == ival)] <- NA
train_fun <- LiblineaR::LiblineaR
train_args <- list(
data= ok.predictor()[!is.na(tmpvalues), ],
target= as.factor(tmpvalues[!is.na(tmpvalues)]),
type= 0, cost = input$liblinCost)
} else if (tmp_model == "lasso") {
tmpvalues <- values$label
for (ival in na.omit(unique(tmpvalues)))
if (sum(tmpvalues == ival, na.rm = T) == 1)
tmpvalues[which(tmpvalues == ival)] <- NA
train_fun <- glmnet::glmnet
train_args <- list(
x= ok.predictor()[!is.na(tmpvalues), ],
y= as.factor(tmpvalues[!is.na(tmpvalues)]),
family= "multinomial")
} else if (tmp_model == "naive bayes") {
if (!is.numeric(input$naiveSmooth)) {
ok.train(NULL)
return(NULL)
}
tmpvalues <- values$label
for (ival in na.omit(unique(tmpvalues)))
if (sum(tmpvalues == ival, na.rm = T) == 1)
tmpvalues[which(tmpvalues == ival)] <- NA
train_fun <- quanteda.textmodels::textmodel_nb
train_args <- list(
x= ok.predictor()[!is.na(tmpvalues), ],
y= as.factor(tmpvalues[!is.na(tmpvalues)]),
smooth = input$naiveSmooth, prior = input$naivePrior,
distribution = input$naiveDistri)
} else if (tmp_model == "knn") {
train_fun <- function(x) return(TRUE)
train_args <- list(x = "knn")
}
}
train_async <- future(seed = TRUE, {
suppressWarnings(do.call(train_fun, train_args))
}) %...>% ok.train.async()
train_async <- catch(train_async, function(error) {
values$modelDiagno <- paste0("Model training error: ", error)
ok.train(NULL)
ok.train.async(NULL)
})
train_async <- finally(train_async, function() {
values$modelDiagno <- "Training done, predicting..."
ok.train(ok.train.async())
ok.train.async(NULL)
predTrigger(predTrigger() + 1)
})
NULL
}
)
## Model prediction on full dataset (async)
ok.pred <- reactiveVal()
ok.pred.async <- reactiveVal()
# observeEvent(list(ok.train(), input$glmLambda, input$knnK, values$retagged), {
observeEvent(predTrigger(), {
if (is.null(ok.train())) {
ok.pred(NULL)
return(NULL)
}
if (verbose)
cat("DEBUG ok.pred()\n")
damodel <- input$predModel
if (input$predModel == "random forest") {
pred_args <- list(ok.train(), data = ok.predictor())
dacolnames <- colnames(ok.train()$predictions)
} else if (input$predModel == "lasso") {
if (!is.numeric(input$glmLambda)) {
ok.pred(NULL)
return(NULL)
}
pred_args <- list(ok.train(), newx = ok.predictor(), s = input$glmLambda,
type = "response")
} else if (input$predModel == "linear") {
pred_args <- list(ok.train(), newx = ok.predictor(), proba = TRUE)
} else if (input$predModel == "naive bayes") {
pred_args <- list(ok.train(), newdata = ok.predictor(),
type = "probability")
} else if (input$predModel == "knn") {
if (!is.numeric(input$knnK)) {
ok.pred(NULL)
return(NULL)
}
pred_args <- list(train = ok.predictor()[!is.na(values$label), ],
test = ok.predictor(),
cl = values$label[!is.na(values$label)],
k = input$knnK, prob = TRUE)
}
pred_async <- future(seed = TRUE, {
if (damodel == "random forest") {
res <- do.call(predict, pred_args)$predictions
colnames(res) <- dacolnames
res
} else if (damodel == "lasso") {
do.call(predict, pred_args)[, , 1]
} else if (damodel == "linear") {
do.call(predict, pred_args)$probabilities
} else if (damodel == "naive bayes") {
do.call(predict, pred_args)
} else if (damodel == "knn") {
tmp <- do.call(class::knn, pred_args)
## A bit flaky here: probs of non-best classes uniformly share the rest,
## and distance is euc
tmpvalues <- sort(unique(pred_args$cl))
res <- matrix(0, nrow(pred_args$test), length(tmpvalues),
dimnames = list(NULL, tmpvalues))
res[cbind(1:nrow(res), tmp)] <- attr(tmp, "prob") -
((1 - attr(tmp, "prob")) / (ncol(res) - 1))
res <- res + (1 - attr(tmp, "prob")) / (ncol(res) - 1)
res
}
}) %...>% ok.pred.async()
pred_async <- catch(pred_async, function(error) {
values$modelDiagno <- paste0("Error in prediction: ", error)
})
pred_async <- finally(pred_async, function() {
if (verbose)
cat("DEBUG ok.pred() out of async\n")
ok.pred(ok.pred.async())
ok.pred.async(NULL)
# predlabTrigger(predlabTrigger() + 1)
diagTrigger(diagTrigger() + 1)
queryNext(queryNext() + 1)
})
})
## Best predicted label for full dataset
# ok.predlab1 <- eventReactive(predlabTrigger(), {
# if (is.null(ok.pred())) return(NULL)
# if (verbose)
# cat("DEBUG ok.predlab1\n")
# # queryTrigger(queryTrigger() + 1)
# colnames(ok.pred())[max.col(ok.pred())]
# })
ok.predlab1 <- reactive({
if (is.null(ok.pred())) return(NULL)
if (verbose)
cat("DEBUG ok.predlab1\n")
colnames(ok.pred())[max.col(ok.pred())]
})
## On scheme change, forget existing model, queue, query and hist
## load scheme description and predictor-regex
observeEvent(ok.labelcol(), {
if (verbose)
cat("DEBUG event selectScheme", ok.labelcol(), "\n")
ok.pred(NULL)
ok.train(NULL)
values$queryqueue <- NULL
values$modelDiagno <- "No model"
values$histSaveStack <- NULL
## Load description file
descr <- ""
if (file.exists(paste0(values$conf$datadir, values$conf$projectid,
"/", input$selectScheme, ".txt"))) {
descr <- paste(collapse = "\n", readLines(paste0(
values$conf$datadir, values$conf$projectid,
"/", input$selectScheme, ".txt")))
}
updateTextAreaInput(session, "schemeDescr", value = descr)
## Load regex from json
regexfile <- paste0(values$projectdir, input$selectScheme, "_regex.json")
if (file.exists(regexfile)) {
if (verbose)
cat("DEBUG enter regex.json\n")
tmp <- RJSONIO::fromJSON(paste(collapse = "\n", readLines(regexfile)))
if (is.null(tmp$regex)) return(NULL)
values$extraRegex <- tmp$regex
names(values$extraRegex) <- sapply(tmp$regex, label2hash)
for (iregex in 1:length(tmp$regex)) {
values[[paste0("useregex_", names(values$extraRegex)[iregex])]] <-
tmp$use[iregex]
values[[paste0("caseregex_", names(values$extraRegex)[iregex])]] <-
tmp$case[iregex]
}
} else
values$extraRegex <- NULL
})
## queryNext event: update values$newQuery ; called by queryNext(), or by
## queryQueue or regex change
## Requires :
## - queryQueue (reactive: indices of current queue), and
## - wasTagged (boolean vector indicating whether each element has been tagged this session, used in "on untagged")
## - wasRetagged (boolean vector indicating whether each element has been tagged this session, used in "on tagged")
## TODO: make a scheme-list of wasTagged, now it will be wiped at each scheme change?
observeEvent(
list(queryNext(), values$queryQueue,
input$sampleChoice, input$taggedWhich, input$maxprobWhich,
input$regexFilter, input$regexCaseSens,
input$visuLock, values$visuZoom$xlim, values$visuZoom$ylim),
{
if (is.null(values$queryQueue)) return(NULL)
if (verbose)
cat("DEBUG queryNext\n")
queue <- values$queryQueue
if (input$sampleChoice == "untagged") {
queue <- queue[!values$wasTagged[queue]]
} else if (input$sampleChoice == "tagged") {
queue <- queue[!is.na(values$label)[queue] & !values$wasRetagged[queue]]
if (input$taggedWhich != "all") {
queue <- queue[values$label[queue] == input$taggedWhich]
}
} else if (input$sampleChoice == "all") {
queue <- queue[!values$wasRetagged[queue]]
}
if (input$visuLock) if (!is.null(values$visuZoom$xlim)) {
if (!is.null(ok.visu()))
queue <- queue[ok.visu()[queue, 1] >= values$visuZoom$xlim[1] &
ok.visu()[queue, 1] <= values$visuZoom$xlim[2] &
ok.visu()[queue, 2] >= values$visuZoom$ylim[1] &
ok.visu()[queue, 2] <= values$visuZoom$ylim[2]]
}
if (length(queue)) if (nchar(input$regexFilter)) {
try_regex <- try(silent = TRUE, {
queue <- queue[
stringi::stri_detect_regex(
ok.nicetext()[queue], input$regexFilter,
opts_regex = list(case_insensitive = !input$regexCaseSens))]
})
if (inherits(try_regex, "try-error")) {
showNotification("Invalid regex", duration = 2, type = "error")
values$newQuery <- NULL
return(NULL)
}
}
if (!length(queue)) {
if (nchar(input$regexFilter)) {
showNotification("No texts matching the regex",
duration = 2, type = "warning")
} else
showNotification("No more texts to be tagged in this set",
duration = 2, type = "warning")
values$newQuery <- NULL
return(NULL)
}
values$lastQuery <- queue[1] # ??
values$newQuery <- queue[1]
ready2tag(1)
}
)
## queryQueue event: fix values$queryQueue, (indices of current queue)
observeEvent(
list(ok.data(), ok.pred(), input$strategy, input$maxprobWhich,
input$selectProject, input$selectScheme), {
if (input$strategy %in% c("entropy", "maxprob") & is.null(ok.pred())) {
values$queryQueue <- NULL
values$lastQuery <- NULL
values$newQuery <- NULL
return(NULL)
}
if (is.null(ok.text())) return(NULL)
if (verbose)
cat("DEBUG queryQueue change\n")
if (input$strategy == "sequential") {
values$queryQueue <- 1:length(ok.text())
} else if (input$strategy == "random") {
values$queryQueue <- sample(length(ok.text()))
} else {
dapred <- ok.pred()
if (input$strategy == "entropy") {
tominimize <- rowSums(dapred * log(dapred))
if (any(dapred == 0)) {
tominimize[dapred == 0] <- 0
}
}
if (input$strategy == "maxprob") {
if (! input$maxprobWhich %in% colnames(dapred)) return(NULL)
tominimize <- -dapred[, input$maxprobWhich];
}
values$queryQueue <- order(tominimize)
}
}
)
## Update uniqueLabels and uniqueLabelsAll if necessary on values$label change
observeEvent(values$label, {
if (is.null(values$label)) return(NULL)
if (verbose)
cat("DEBUG uniqueLabels\n")
dalabs <- sort(na.omit(unique(values$label)))
# if (!length(dalabs)) return(NULL)
if (!identical(dalabs, values$uniqueLabels))
values$uniqueLabels <- dalabs
if (any(! dalabs %in% values$uniqueLabelsAll))
values$uniqueLabelsAll <- c(values$uniqueLabelsAll,
dalabs[! dalabs %in% values$uniqueLabelsAll])
})
##############################################################################
## Project management
##############################################################################
output$cpDataMessage <- renderUI({
req(values$cpDataMessage)
p(values$cpDataMessage)
})
output$cpConfirmMessage <- renderUI({
req(values$cpConfirmMessage)
p(strong(paste0("Error: ", values$cpConfirmMessage)))
})
output$cpIdcolUI <- renderUI({
req(values$cpNewData)
if (!is.data.frame(values$cpNewData)) return(NULL)
selectInput("cpIdcol", NULL, colnames(values$cpNewData))
})
output$cpTextcolUI <- renderUI({
req(values$cpNewData)
if (!is.data.frame(values$cpNewData)) return(NULL)
selectInput("cpTextcol", NULL, colnames(values$cpNewData), multiple = TRUE)
})
output$cpContextcolUI <- renderUI({
req(values$cpNewData)
if (!is.data.frame(values$cpNewData)) return(NULL)
selectInput("cpContextcol", NULL, colnames(values$cpNewData), multiple = TRUE)
})
output$cpTagcolUI <- renderUI({
req(values$cpNewData)
if (!is.data.frame(values$cpNewData)) return(NULL)
selectInput("cpTagcol", NULL, colnames(values$cpNewData), multiple = TRUE)
})
output$cpCommcolUI <- renderUI({
req(values$cpNewData)
if (!is.data.frame(values$cpNewData)) return(NULL)
selectInput("cpCommcol", NULL, c("none", colnames(values$cpNewData)))
})
output$cp_spacyUI <- renderUI(
textInput("cp_use_spacy_model", NULL,
modelnames$spacy_name[modelnames$short == input$cp_lang],
placeholder = "(spacy model name)")
)
output$cp_spacyDlUI <- renderUI(HTML(paste(
"<em>Model must be downloaded once, from python:</em><br>",
input$cp_which_python, "-m spacy download",
modelnames$spacy_name[modelnames$short == input$cp_lang], "<br><br>"
)))
output$cp_ftUI <- renderUI(
textInput("cp_use_ft_model", NULL,
modelnames$fasttext_name[modelnames$short == input$cp_lang],
placeholder = "(fasttext model path)")
)
output$cp_ftDlUI <- renderUI(list(HTML(paste(
"<em>Model can be downloaded <a href='",
modelnames$fasttext_url[modelnames$short == input$cp_lang],
"'>here</a></em>")),
br(), br()))
output$cp_sbUI <- renderUI(
textInput("cp_use_sb_model", NULL, ifelse(
input$cp_lang %in% c("ar", "zh", "nl", "en", "fr", "de",
"it", "ko", "pl", "pt", "ru", "es", "tr"),
"distiluse-base-multilingual-cased-v1",
"distiluse-base-multilingual-cased-v2"),
placeholder = "(custom sentence_transformers model)")
)
## Create new project, with modal dialog
observeEvent(input$createProject, {
if (verbose)
cat("DEBUG create project\n")
showModal(modalDialog(
title = "New project",
p("Create a new text tagging project."),
p("This mainly involves choosing a dataset, and how to process it."),
p("Items marked by a star ", strong("*"),
" cannot be changed once the project is created."),
hr(),
h4("Files and folders"),
fluidRow(
column(3, p(strong("*Project name"))),
column(5, textInput("cpName", NULL, placeholder = "(Required)")),
column(4, p("Tags will be stored in a new directory by this name."))
),
fluidRow(
column(3, p(strong("*Data directory"))),
column(5, textInput("cpDatadir", NULL, placeholder = "(Required)")),
column(4, p("Place (on the server) where the data and project are stored"))
),
fluidRow(
column(3, p(strong("*Data filename"))),
column(5, checkboxInput("cpDoImport", "New data", TRUE)),
column(4, p("Main file, containing id and text columns"))
),
conditionalPanel("!input.cpDoImport", list(
## Existing data
fluidRow(
column(3, p(strong("Existing file"))),
column(5, textInput("cpDatafile", NULL, placeholder = "(Required)")),
column(4, actionButton("cpDatafileGo", "Import"))
)
)),
conditionalPanel("input.cpDoImport", {
## Import data
wellPanel(
selectInput('cpNDfiletype', NULL,
choices = c("Text file (.csv, .txt)" = "csv_txt",
"Feather" = "feather",
"Parquet" = "parquet",
"OpenDocument (.ods)" = "ods",
"Microsoft Excel (.xls, .xlsx)" = "excel",
"SPSS (.sav, .por)" = "spss",
"SAS (.sas7bdat)" = "sas_data",
"Stata v5-v12 (.dta)" = "stata"),
selected = "csv_txt"
),
fileInput('cpNDfile', 'Choose File'),
checkboxInput("cpNDparams", "File parameters"),
conditionalPanel(
"input.cpNDparams",
conditionalPanel(
"input.cpNDfiletype == 'csv_txt'",
fluidRow(column(4, p('Header')),
column(8, selectInput('cpCSVheader', NULL,
c("Header" = TRUE,
"No Header" = FALSE),
"Auto"))),
fluidRow(column(4, p('Separator')),
column(8, selectInput('cpCSVsep', NULL,
c("Comma ','" = ",",
"Semicolon ';'" = ";",
"Tab" = "\t",
"Space" = " "),
"Auto"))),
fluidRow(column(4, p('Quote')),
column(8, selectInput('cpCSVquote', NULL,
c("Double Quote \"" = "\"",
"Single Quote '" = "'",
"None" = ""),
"Double Quote \""))),
fluidRow(column(4, p('Decimal mark')),
column(8, selectInput('cpCSVdec', NULL,
c('Period "."' = ".",
'Comma ","' = ","),
'Period "."'))),
fluidRow(column(4, p('File Encoding')),
column(8, selectInput('cpCSVencoding', NULL,
c("unknown", "UTF-8", "Latin-1"),
"unknown")))),
## Options for ods files
conditionalPanel(
"input.cpNDfiletype == 'ods'",
fluidRow(column(4, p('Sheet')),
column(8, numericInput('cpODSsheet', NULL, 1, 1,
step = 1))),
checkboxInput("cpODScolnames", "Column names", TRUE),
fluidRow(column(4, p('NA symbol')),
column(8, textInput('cpODSna', NULL, value = ""))),
fluidRow(column(4, p('Skip rows')),
column(8, numericInput('cpODSskip', NULL, 0, 0,
step = 1))),
fluidRow(column(4, p('Specify range')),
column(8, textInput('cpODSrange', NULL, value = "")))
),
## Options for xls / xlsx files
conditionalPanel(
"input.cpNDfiletype == 'excel'",
fluidRow(column(4, p('Column names')),
column(8, selectInput('cpXLScolnames', NULL,
choices = c("Yes" = "TRUE",
"No" = "FALSE"),
selected = TRUE ))),
fluidRow(column(4, p('Skip rows')),
column(8, numericInput('cpXLSskip', NULL, 0, 0,
step = 1))),
fluidRow(column(4, p('Trim whitespaces')),
column(8, selectInput('cpXLStrim', NULL,
choices = c("Yes" = "TRUE",
"No" = "FALSE"),
selected = TRUE ))),
checkboxInput("cpXLSsheet",
"Specify Worksheet", FALSE),
conditionalPanel("input.cpXLSsheet",
textInput("cpXLSsheet_specs", NULL, "")),
checkboxInput("cpXLSrange", "Specify Range", FALSE),
conditionalPanel("input.cpXLSrange",
textInput("cpXLSrange_specs", NULL, "")
)),
## Options for SPSS files
conditionalPanel(
"input.cpNDfiletype == 'spss'",
fluidRow(
column(4, p('Skip rows')),
column(8, numericInput('cpSPSSskip', NULL, 0, 0, step = 1))),
checkboxInput("cpSPSSna", "User-defined NA", FALSE)),
## Options for SAS files
conditionalPanel(
"input.cpNDfiletype == 'sas_data'",
fluidRow(
column(4, p('Skip rows')),
column(8, numericInput("cpSASskip", NULL, 0, 0, step = 1))),
checkboxInput("cpSAScatalog", "Use catalog file", FALSE),
conditionalPanel(
"input.cpSAScatalog",
fileInput("cpSAScatalog_file", "Catalog file"))
),
## Options for STATA dta files
conditionalPanel(
"input.cpNDfiletype == 'stata'",
checkboxInput("cpSTATAfactors", "Convert factors", FALSE))
)
)
}),
uiOutput("cpDataMessage"),
hr(),
h4("Data"),
fluidRow(
column(6, numericInput(
"cpNrows", "N. rows for working sample", 500, 1, step = 1)),
column(6, numericInput(
"cpNskip", "Rows to skip before working sample", 0, 0, step = 1))
),
fluidRow(
column(3, p(strong("*ID"))),
column(5, uiOutput("cpIdcolUI")),
column(4, p("Name of the id variable, unique identifier of each text"))
),
fluidRow(
column(3, p(strong("*Text"))),
column(5, uiOutput("cpTextcolUI")),
column(4, p("Name of the text variables: if more than one, texts are concatenated in the specified order"))
),
fluidRow(
column(3, p(strong("*Tags"))),
column(5, uiOutput("cpTagcolUI")),
column(4, p("(optional) Names of variables that are already tagged: each will create a new scheme"))
),
fluidRow(
column(3, p(strong("Comments"))),
column(5, uiOutput("cpCommcolUI")),
column(4, p("(optional) Name of the comments variable"))
),
fluidRow(
column(3, p(strong("Context"))),
column(5, uiOutput("cpContextcolUI")),
column(4, p("(optional) Names of variables not used in the models, but may be displayed during tagging"))
),
hr(),
h4("System"),
fluidRow(
column(3, checkboxInput("cp_use_python", "Python backend", FALSE)),
column(5, conditionalPanel(
"input.cp_use_python",
textInput("cp_which_python", NULL, value = "python3",
placeholder = "(custom python path)"))),
column(4, conditionalPanel(
"input.cp_use_python",
p("This must be a working python3 environment, with the required modules installed (see documentation)")))
),
conditionalPanel("input.cp_use_python", list(
fluidRow(
column(3, p("Language")),
column(5, selectInput("cp_lang", NULL, modelnames_labels, "en")),
column(4, p("Used to preset tokenization and embedding models"))
),
fluidRow(
column(3, checkboxInput("cp_use_spacy", "SpaCy tokenization", FALSE)),
column(5, conditionalPanel("input.cp_use_spacy", uiOutput("cp_spacyUI"))),
column(4, p("Name of the spacy tokenizer model, used in DTM and word embeddings"))
),
conditionalPanel("input.cp_use_spacy", fluidRow(
column(3),
column(9, uiOutput("cp_spacyDlUI")))
),
fluidRow(
column(3, checkboxInput("cp_use_ft", "FastText word embeddings", FALSE)),
column(5, conditionalPanel("input.cp_use_ft", uiOutput("cp_ftUI"))),
column(4, p("Path to the local fasttext model binary"))
),
conditionalPanel("input.cp_use_ft", fluidRow(
column(3),
column(9, uiOutput("cp_ftDlUI")))),
fluidRow(
column(3, checkboxInput("cp_use_sb", "SBERT sentence embeddings", FALSE)),
column(5, conditionalPanel("input.cp_use_sb", uiOutput("cp_sbUI"))),
column(4, p("(GPU recommended) Name or path of the sentence-transformers model"))
),
conditionalPanel("input.cp_use_python", {
checkboxInput("cp_use_gpu", "GPU support (CUDA, for SBERT and BERT)", FALSE)
})
)),
uiOutput("cpConfirmMessage"),
footer = tagList(actionButton("cpConfirm", "Create"),
modalButton("Cancel"))))
})
observeEvent(input$cpDatafileGo, {
dataname <- input$cpDatafile
dataname <- gsub("[.]feather$", "", dataname)
tmpdat <- try(arrow::read_feather(
paste0(gsub("/+$", "/", paste0(input$cpDatadir, "/")),
dataname, ".feather")))
if (inherits(tmpdat, "try-error")) {
values$cpDataMessage <- paste0(
"Data import error: ", as.character(tmpdat))
values$cpNewData <- NULL
return(NULL)
}
if (!is.data.frame(tmpdat)) {
values$cpDataMessage <- "Data import error: object not a data.frame"
values$cpNewData <- NULL
return(NULL)
}
if (ncol(tmpdat) < 2) {
values$cpDataMessage <- "Data import error: only one column detected"
values$cpNewData <- NULL
return(NULL)
}
values$cpDataMessage <- paste0(
"Data import success: ", nrow(tmpdat), " rows, ", ncol(tmpdat), " cols.")
values$cpNewData <- tmpdat
values$cpNewDataname <- dataname
})
## Data import mechanism
observeEvent(input$cpNDfile, {
req(input$cpNDfile)
if(input$cpNDfiletype == "csv_txt") {
res <- try(read.csv(
input$cpNDfile$datapath,
header = as.logical(input$cpCSVheader), sep = input$cpCSVsep,
quote = input$cpCSVquote, dec = input$cpCSVdec, stringsAsFactors = FALSE,
encoding = input$cpCSVencoding, check.names = TRUE))
} else if(input$cpNDfiletype == "feather") {
res <- try(arrow::read_feather(input$cpNDfile$datapath))
} else if(input$cpNDfiletype == "parquet") {
res <- try(arrow::read_parquet(input$cpNDfile$datapath))
} else if (input$cpNDfiletype == "ods") {
range <- input$cpODSrange
if (range == "") range <- NULL
res <- try(readODS::read_ods(
path = input$cpNDfile$datapath, sheet = input$cpODSsheet,
col_names = input$cpODScolnames, na = input$cpODSna,
skip = input$cpODSskip, range = input$cpODSrange))
} else if(input$cpNDfiletype == "excel"){
column_names <- as.logical(input$cpXLScolnames)
trim_spaces <- as.logical(input$cpXLStrim)
the.range <- NULL
if(input$cpXLSrange == TRUE & input$cpXLSrange_specs != "")
the.range <- input$cpXLSrange_specs
the.sheet <- NULL
if(input$cpXLSsheet == TRUE & input$cpXLSsheet_specs != "")
the.sheet <- input$cpXLSsheet_specs
res <- try(data.frame(readxl::read_excel(
dataFile$datapath, col_names = column_names, range = the.range,
sheet= the.sheet, trim_ws = trim_spaces, skip = rows_to_skip)))
} else if(input$cpNDfiletype == "spss"){
res <- try(data.frame(haven::read_spss(
file = input$cpNDfile$datapath,
skip= input$cpSPSSskip, user_na = input$cpSPSSna)))
if (!inherits(res, "try-error")) {
res <- data.frame(lapply(res, function(x) {
attr(x, "format.spss") <- NULL
if ("haven_labelled" %in% class(x))
x <- haven::as_factor(x, levels= "labels")
x
}))
}
} else if(input$cpNDfiletype == "sas_data") {
res <- try(data.frame(haven::read_sas(
data_file = input$cpNDfile$datapath,
catalog_file = input$cpSAScatalog_file$datapath,
skip = input$cpSASskip)))
} else if(input$cpNDfiletype == "stata") {
res <- try(data.frame(foreign::read.dta(
file = input$cpNDfile$datapath, convert.factors = input$cpSTATAfactors)))
}
if(inherits(res, "try-error")) {
values$cpDataMessage <- paste0("Data import error: ", res)
values$cpNewData <- NULL
return(NULL)
}
if (ncol(res) < 2) {
values$cpDataMessage <- "Data import error: only one column detected"
values$cpNewData <- NULL
return(NULL)
}
values$cpDataMessage <- paste0(
"Data import success: ", nrow(res), " rows, ", ncol(res), " columns.")
values$cpNewData <- res
})
## New project confirm
observeEvent(input$cpConfirm, {
if (verbose)
cat("DEBUG cpConfirm\n")
newconf <- list()
tmpdir <- gsub("/+$", "/", paste0(input$cpDatadir, "/"))
if (!dir.exists(tmpdir))
dir.create(tmpdir)
newconf$datadir <- tmpdir
tmpname <- cleanFileName(input$cpName)
if (!nchar(tmpname)) {
values$cpConfirmMessage <- "Project name is required"
return(NULL)
}
if (dir.exists(paste0(newconf$datadir, tmpname))) {
values$cpConfirmMessage <- paste0(
"A project with the same name (", tmpname,
") already exists in the data directory.")
return(NULL)
}
newconf$projectid <- tmpname
if (!is.data.frame(values$cpNewData)) {
values$cpConfirmMessage <- "Import data first"
return(NULL)
}
if (input$cpDoImport) {
newconf$dataname <- newconf$projectid
} else
newconf$dataname <- values$cpNewDataname
if (any(is.na(values$cpNewData[[input$cpIdcol]]))) {
values$cpConfirmMessage <- "ID variable contains missing values"
return(NULL)
}
if (any(duplicated(values$cpNewData[[input$cpIdcol]]))) {
values$cpConfirmMessage <- "ID variable contains duplicates"
return(NULL)
}
newconf$idcol <- input$cpIdcol
if (!length(input$cpTextcol)) {
values$cpConfirmMessage <- "Select at least one text variable"
return(NULL)
}
newconf$textcols <- input$cpTextcol
if (input$cpCommcol == "none") {
newconf$commcol <- paste0("comm_", newconf$projectid)
} else
newconf$commcol <- input$cpCommcol
if (!length(input$cpTagcol)) {
newconf$tagcols <- "scheme0"
} else
newconf$tagcols <- input$cpTagcol
newconf$contextcols <- input$cpContextcol
newconf$dataNrows <- input$cpNrows
newconf$dataSkipRows <- input$cpNskip
if (input$cp_use_python) {
pytest <- try(system(paste(input$cp_which_python, "--version"),
intern = TRUE))
if (inherits(pytest, "try-error")) {
values$cpConfirmMessage <- paste(
"Python path `", input$cp_which_python, "` not valid")
return(NULL)
}
}
newconf$use_python <- input$cp_use_python
newconf$python <- input$cp_which_python
newconf$use_gpu <- input$cp_use_gpu
if (input$cp_use_spacy) {
sptest <- system(paste0(
newconf$python, " -m spacy info ", input$cp_use_spacy_model), intern = TRUE)
if (length(sptest) == 0) {
values$cpConfirmMessage <- paste(
"Error loading spacy, check that it is installed in the specified python env")
return(NULL)
}
if (length(attr(sptest, "status"))) {
values$cpConfirmMessage <- paste(
"Error loading spacy model, check that it has been downloaded")
return(NULL)
}
}
newconf$use_spacy <- input$cp_use_spacy
newconf$use_spacy_model <- input$cp_use_spacy_model
if (input$cp_use_ft) if (!file.exists(input$cp_use_ft_model)) {
values$cpConfirmMessage <- paste(
"Error loading fasttext model, check the specified path")
return(NULL)
}
newconf$use_ft <- input$cp_use_ft
newconf$use_ft_model <- input$cp_use_ft_model
newconf$use_sb <- input$cp_use_sb
newconf$use_sb_model <- input$cp_use_sb_model
writeLines(RJSONIO::toJSON(newconf),
paste0("tigger_", newconf$projectid, ".json"))
values$confnames <- c(newconf$projectid, values$confnames)
values$conf <- newconf
## On new data import, write a feather file with the name of the project
if (input$cpDoImport)
arrow::write_feather(values$cpNewData, paste0(
newconf$datadir, newconf$dataname, ".feather"
))
removeModal()
})
##############################################################################
## Scheme management
##############################################################################
## Print active scheme in scheme management panel
output$printScheme <- renderUI(h4(input$selectScheme))
## Create new scheme on request, with modal dialog
observeEvent(input$createScheme, {
if (verbose)
cat("DEBUG create scheme\n")
showModal(modalDialog(
title = "Create tagging scheme",
p(paste0("This will create a new scheme for this project")),
textInput("newScheme", label = NULL, placeholder = "Required: new scheme name"),
textAreaInput("newSchemeDescr", label = NULL,
placeholder = "Optional: scheme description"),
checkboxInput("schemeDuplicate", "Duplicate existing scheme:"),
conditionalPanel(
"input.schemeDuplicate",
selectInput("schemeDuplicateFrom", NULL, rev(values$conf$tagcols))),
footer = tagList(actionButton("createSchemeConfirm", "Create"),
modalButton("Cancel"))))
})
observeEvent(input$createSchemeConfirm, {
if (is.null(ok.data())) return(NULL)
if (verbose)
cat("DEBUG create scheme event\n")
## Make sure new scheme name is a correct file name
if (!grepl("\\S", input$newScheme)) {
showNotification(type = "error",
"Cannot create scheme: name empty.")
return(NULL)
}
## Check that new scheme name doesn't exist
## Re-read tagcols from conf file, to prevent scheme disappearances
tmpname <- cleanFileName(input$newScheme)
tagpath <- paste0(values$projectdir, tmpname, ".csv")
confpath <- paste0("tigger_", values$conf$projectid, ".json")
if (verbose)
cat("DEBUG create scheme check json:", confpath,"\n")
tmp_tagcols <- RJSONIO::fromJSON(confpath)$tagcols
if (verbose)
cat("DEBUG create scheme json OK\n")
if (tmpname %in% tmp_tagcols | file.exists(tagpath)) {
showNotification(type = "error",
"Cannot create scheme: name already exists.")
return(NULL)
}
## Create new scheme description
if (nchar(input$newSchemeDescr))
writeLines(input$newSchemeDescr,
paste0(values$projectdir, tmpname, ".txt"))
## Update conf file and reactives
values$conf$tagcols <- c(tmp_tagcols, tmpname)
# values$confs[[values$conf$projectid]]$tagcols <- c(tmp_tagcols, tmpname)
writeLines(RJSONIO::toJSON(values$conf), confpath)
if (input$schemeDuplicate) {
tmpmat <- matrix(values$tagmat[, input$schemeDuplicateFrom])
tmpmathist <- matrix(values$tagmathist[, input$schemeDuplicateFrom])
} else {
tmpmat <- matrix(rep(NA, nrow(ok.data())))
tmpmathist <- matrix(rep(NA, nrow(ok.data())))
}
colnames(tmpmat) <- tmpname
colnames(tmpmathist) <- tmpname
values$tagmat <- cbind(values$tagmat, tmpmat)
values$tagmathist <- cbind(values$tagmat, tmpmathist)
## Create new tagging file if duplicating
if (input$schemeDuplicate) {
export <- data.frame(ok.data()[, values$conf$idcol],
values$tagmat[, tmpname])
colnames(export) <- c(values$conf$idcol, tmpname)
export <- na.omit(export)
write.table(export, tagpath, sep = ",", qmethod = "double",
fileEncoding = "UTF-8", row.names = FALSE,
append = FALSE, col.names = TRUE)
}
cat("DEBUG create scheme exit\n")
removeModal()
})
## Button action: delete scheme
observeEvent(input$schemeDelete, {
if (verbose)
cat("DEBUG delete scheme\n")
if (length(values$conf$tagcols) < 2) {
showNotification(type = "error",
"Cannot delete: at least one scheme must remain.")
return(NULL)
}
showModal(modalDialog(
title = "Delete tagging scheme",
paste0("Confirm delete '", input$selectScheme,
"': ", sum(!is.na(values$label)), " tags will be deleted",
", along with scheme description."),
footer = tagList(actionButton("schemeDeleteConfirm", "Delete"),
modalButton("Cancel"))))
})
observeEvent(input$schemeDeleteConfirm, {
file.remove(paste0(values$projectdir, input$selectScheme, ".csv"))
file.remove(paste0(values$projectdir, input$selectScheme, ".txt"))
file.remove(paste0(values$projectdir, input$selectScheme, "_regex.json"))
values$conf$tagcols <-
values$conf$tagcols[values$conf$tagcols != input$selectScheme]
writeLines(RJSONIO::toJSON(values$conf),
paste0("tigger_", input$selectProject, ".json"))
removeModal()
})
observeEvent(input$schemeDescrSave, {
req(input$selectScheme)
writeLines(input$schemeDescr,
paste0(values$conf$datadir, values$conf$projectid,
"/", input$selectScheme, ".txt"))
})
##############################################################################
##############################################################################
## Tag selection : populate
observe({
values$conf$tagcols
updateSelectInput(
# session, "selectScheme", choices = c(rev(values$conf$tagcols), "<NEW>"))
session, "selectScheme", choices = rev(values$conf$tagcols))
})
## Populate oracle buttons
output$oracleButtons <- renderUI({
if (is.null(ok.data())) return(NULL)
# if (is.null(values$label)) return(NULL)
if (is.null(values$uniqueLabels)) return(NULL)
if (verbose)
cat("DEBUG populate oracle buttons\n")
do.call(fluidRow, lapply(values$uniqueLabels, function(ilab)
actionButton(paste0("oracle_", label2hash(ilab)),
strong(ilab))))
})
## Populate maxprob variable selector
observe({
updateSelectInput(session, "maxprobWhich", choices = values$uniqueLabels)
updateSelectInput(session, "taggedWhich",
choices = c("All" = "all", values$uniqueLabels))
})
## Data panel message
observeEvent(ok.data(), {
if (verbose)
cat("DEBUG data panel message\n")
if (is.null(ok.data())) {
output$dataMessage <- renderUI(p("Importing data..."))
}
output$dataMessage <- NULL
})
## On config change, update dataView and retag options
observeEvent(list(values$conf), {
if (is.null(values$conf)) return(NULL)
output$panelData <- renderUI(list(
DT::dataTableOutput("dataView")
))
output$panelRetag <- renderUI(c(
list(
br(),
h4("Rename tags"),
fluidRow(
column(4, strong("Current")),
column(2, strong("Count")),
column(6, strong("New"))
),
br()),
lapply(values$uniqueLabels, function(iLab) {
iHash <- label2hash(iLab)
fluidRow(
column(4, p(iLab)),
column(2, p(sum(values$label == iLab, na.rm = TRUE))),
column(4, textInput(
paste0("newtag_", iHash), NULL,
placeholder = "(new name, leave empty to untag)")),
column(2, actionButton(paste0("retag_", iHash), "Rename")))
})
))
})
output$panelExtraRegex <- renderUI(list(
p(strong("Regex predictors")),
fluidRow(
column(8, textInput("extraRegexText", NULL,
placeholder = "(new regex)", width = "100%")),
column(4, actionButton(
"extraRegexSearch", strong("+"), width = "100%"))
),
fluidRow(
style="overflow-y:scroll; max-height: 25vh",
fluidRow(
column(2, strong("")),
column(1, strong("Use")),
column(1, strong("Case")),
column(1, p(strong("N"))),
column(7, p(strong("Regex")))
),
lapply(values$extraRegex, function(iRegex) {
iHash <- label2hash(iRegex)
tmp_cased <- values[[paste0("caseregex_", iHash)]]
if (is.null(tmp_cased)) tmp_cased <- FALSE
fluidRow(
column(2, actionButton(paste0("delregex_", iHash), "🗑️", width="100%")),
column(1, checkboxInput(
paste0("useregex_", iHash), NULL,
value = isolate(!isFALSE(values[[paste0("useregex_", iHash)]])))),
column(1, checkboxInput(
paste0("caseregex_", iHash), NULL,
value = isolate(isTRUE(values[[paste0("caseregex_", iHash)]])))),
column(1, br(), p(sum(
stringi::stri_detect_regex(
ok.nicetext(), iRegex,
opts_regex = list(case_insensitive = !tmp_cased)),
na.rm = TRUE))),
column(7, br(), p(iRegex))
)
})
)
))
## Process new regex
observeEvent(input$extraRegexSearch, {
if (is.null(ok.nicetext())) return(NULL)
req(nchar(input$extraRegexText) > 0)
if (input$extraRegexText %in% values$extraRegex) return(NULL)
try_regex <- try(stringi::stri_detect_regex(
ok.nicetext(), input$extraRegexText,
opts_regex = list(case_insensitive = TRUE)))
if (inherits(try_regex, "try-error")) return(NULL)
new_regex <- input$extraRegexText
new_hash <- label2hash(new_regex)
values$extraRegex <- c(new_regex, values$extraRegex)
names(values$extraRegex)[1] <- new_hash
if (verbose)
cat("DEBUG regex search", input$extraRegexText, "\n")
})
## Regex predictors: populate delete and use events on first creation
useregex_trigger <- reactiveVal(0)
delregex_trigger <- reactiveVal(0)
caseregex_trigger <- reactiveVal(0)
observeEvent(list(values$extraRegex), {
if (!length(values$extraRegex)) return(NULL)
if (any(! values$extraRegex %in% values$extraRegexHist)) {
if (verbose)
cat("DEBUG regex predictor names updates\n")
tmpregex <- values$extraRegex[! values$extraRegex %in% values$extraRegexHist]
values$extraRegexHist <- c(tmpregex, values$extraRegexHist)
c(lapply(1:length(tmpregex), function(iregex) {
observeEvent(
input[[paste0("delregex_", names(tmpregex)[iregex])]], {
if (verbose)
cat("DEBUG regex delete", names(tmpregex)[iregex], "\n")
values$extraRegex <- values$extraRegex[values$extraRegex != tmpregex[iregex]]
delregex_trigger(delregex_trigger() + 1)
})
}),
lapply(1:length(tmpregex), function(iregex) {
tmpname <- paste0("useregex_", names(tmpregex)[iregex])
observeEvent(
input[[tmpname]], {
values[[tmpname]] <- input[[tmpname]]
useregex_trigger(useregex_trigger() + 1)
})
}),
lapply(1:length(tmpregex), function(iregex) {
tmpname <- paste0("caseregex_", names(tmpregex)[iregex])
observeEvent(
input[[tmpname]], {
values[[tmpname]] <- input[[tmpname]]
caseregex_trigger(caseregex_trigger() + 1)
})
})
)
}
})
observeEvent(list(delregex_trigger(), useregex_trigger(), caseregex_trigger()), {
req(ok.data(), input$selectScheme)
regex_all <- values$extraRegex
values$regex_inuse <- sapply(names(regex_all), function(iname)
values[[paste0("useregex_", iname)]])
values$regex_case <- sapply(names(regex_all), function(iname)
values[[paste0("caseregex_", iname)]])
if (verbose) if (any(values$regex_inuse))
cat("DEBUG regex in use", paste0(collapse = " ", regex_all[values$regex_inuse]), "\n")
writeLines(
RJSONIO::toJSON(lapply(FUN = unname, list(
regex = regex_all, use = values$regex_inuse, case = values$regex_case))),
paste0(values$projectdir, input$selectScheme, "_regex.json"))
})
output$dataView <- DT::renderDataTable({
# if (is.null(ok.text()) | ok.data.running()) return(NULL)
req(ok.text())
if (verbose)
cat("DEBUG enter dataView\n")
if (verbose)
cat("DEBUG render dataView\n")
DT::datatable(
rownames = F,
data.frame(
row = 1:nrow(ok.data()),
id = ok.data()[, values$conf$idcol],
text = ok.text()),
options = list(
pageLength = 3,
columnDefs = list(list(
targets = 2,
render = DT::JS(
"function(data, type, row, meta) {",
"return type === 'display' && data.length > 200 ?",
"'<span title=\"' + data + '\">' + data.substr(0, 200) + '...</span>' : data;",
"}")
))
),
callback = DT::JS('table.page(0).draw(false);')
)
})
## Triggers for mass rename, created with each new label, with confirm
observeEvent(values$uniqueLabelsAll, {
if (is.null(values$uniqueLabelsAll)) return(NULL)
notthere <- unique(na.omit(
values$uniqueLabelsAll[! values$uniqueLabelsAll %in% values$retagLabels]))
lapply(
notthere,
function(iTag) {
iHash <- label2hash(iTag)
observeEvent(input[[paste0("retag_", iHash)]], {
tmp_oldtag <- iTag
tmp_retagged <- which(values$label == tmp_oldtag)
tmp_newtag <- gsub(
"^\\s+|\\s+$", "", input[[paste0("newtag_", iHash)]])
showModal(modalDialog(
title = "Mass retag",
paste0("Confirm retag: ", length(tmp_retagged), " ",
tmp_oldtag, " tags will be ",
ifelse(nchar(tmp_newtag),
paste("renamed to", tmp_newtag) , "deleted")),
footer = tagList(actionButton(paste0("retag_confirm_", iTag), "Retag"),
modalButton("Cancel"))))
})
observeEvent(input[[paste0("retag_confirm_", iTag)]], {
tmp_oldtag <- iTag
tmp_retagged <- which(values$label == tmp_oldtag)
tmp_newtag <- gsub(
"^\\s+|\\s+$", "", input[[paste0("newtag_", iHash)]])
if (!nchar(tmp_newtag)) tmp_newtag <- NA
values$tagTrigger <- list(
id = ok.data()[[values$conf$idcol]][tmp_retagged],
tag = rep(tmp_newtag, length(tmp_retagged)))
values$queries <- c(values$queries, tmp_retagged)
removeModal()
})
})
values$retagLabels <- c(values$retagLabels, notthere)
})
##############################################################################
## UI rendering of Panel Tagging / Visualization
##############################################################################
####################
## Tagging left panel
# ## Trigger from Oops
# observeEvent(input$oops, {
# if (is.null(ok.data()))
# return(NULL)
# if (verbose)
# cat("DEBUG enter Oops\n")
# if (!is.null(values$retagged)) if (values$queries[length(values$queries)] == values$retagged[length(values$retagged)])
# values$retagged <- values$retagged[-length(values$retagged)]
# values$label[values$queries[length(values$queries)]] <- NA
# if (ok.commentCol() != "(None)")
# values$comment[values$queries[length(values$queries)]] <- ok.data()[values$queries[length(values$queries)], ok.commentCol()]
# values$queries <- values$queries[-length(values$queries)]
# ## tagTrigger ?
#
# ready2tag(0)
# queryNext(queryNext() + 1)
# })
## Tagging triggers for Oracle buttons, created with each new label
observeEvent(values$uniqueLabelsAll, {
if (is.null(values$label)) return(NULL)
if (is.null(values$uniqueLabelsAll)) return(NULL)
if (all(is.na(values$label))) return(NULL)
notthere <- unique(na.omit(
values$uniqueLabelsAll[! values$uniqueLabelsAll %in% values$oracleLabels]
))
if (!length(notthere)) return(NULL)
if (verbose)
cat("DEBUG triggers for oracle buttons\n")
lapply(
notthere,
function(iLab) {
observeEvent(
input[[paste0("oracle_", label2hash(iLab))]], {
if (is.null(ok.text())) return(NULL)
if (is.null(values$newQuery)) return(NULL)
if (!ready2tag()) {cat("DEBUG not ready to tag in Button\n"); return(NULL)}
# ready2tag(0)
# if (input$allowRetag | sum(is.na(values$label)) == 0)
if (!is.na(values$label[values$lastQuery]))
values$retagged <- c(values$retagged, values$lastQuery)
# values$label[values$lastQuery] <- iLab
values$comment[values$lastQuery] <- input$currentComment
# values$queries <- c(values$queries, values$lastQuery)
values$tagTrigger <- list(
id = ok.data()[values$lastQuery, values$conf$idcol],
tag = iLab,
comment = input$currentComment)
})
})
values$oracleLabels <- c(values$oracleLabels, notthere)
})
## Trigger from Oracle Create value button
observeEvent(input$currentAction, {
if (is.null(ok.data())) return(NULL)
if (is.null(values$newQuery)) return(NULL)
if (verbose)
cat("DEBUG enter create value\n")
# if (input$allowRetag | sum(is.na(values$label)) == 0)
if (!is.na(values$label[values$lastQuery]))
values$retagged <- c(values$retagged, values$lastQuery)
if (input$newLab == "") {
return(NULL)
} else {
# values$label[values$lastQuery] <- input$newLab
updateTextInput(session, "newLab", value= "")
}
values$comment[values$lastQuery] <- input$currentComment
# values$queries <- c(values$queries, values$lastQuery)
values$tagTrigger <- list(
id = ok.data()[values$lastQuery, values$conf$idcol],
tag = input$newLab,
comment = input$currentComment)
})
## Trigger from Oracle Confirm button
observeEvent(input$oracleConfirm, {
req(ok.data(), ok.train(), values$newQuery)
# if (is.null(ok.data())) return(NULL)
# if (is.null(values$newQuery)) return(NULL)
if (!ready2tag()) {cat("DEBUG not ready to tag in OC\n"); return(NULL)}
# ready2tag(0)
if (!ok.train.possible()) return(NULL)
if (verbose)
cat("DEBUG oracle confirm\n")
# if (input$allowRetag | sum(is.na(values$label)) == 0)
if (!is.na(values$label[values$lastQuery]))
values$retagged <- c(values$retagged, values$lastQuery)
# values$label[values$lastQuery] <- ok.predlab1()[values$lastQuery]
# values$queries <- c(values$queries, values$lastQuery)
values$comment[values$lastQuery] <- input$currentComment
values$tagTrigger <- list(
id = ok.data()[values$lastQuery, values$conf$idcol],
tag = ok.predlab1()[values$lastQuery],
comment = input$currentComment)
})
## Trigger for CV glm (async)
glmCV_nclicks <- reactiveVal(0)
observeEvent(input$glmCV, {
if (is.null(ok.predictor())) return(NULL)
if (glmCV_nclicks() != 0) {
showNotification("LASSO CV already running")
return(NULL)
}
glmCV_nclicks(1)
glmCV_future <- reactiveVal()
tmpvalues <- values$label
tmppred <- ok.predictor()
for (ival in na.omit(unique(tmpvalues)))
if (sum(tmpvalues == ival, na.rm = T) <= 8)
tmpvalues[which(tmpvalues == ival)] <- NA
glmCV_async <- future(seed = TRUE, {
glmnet::cv.glmnet(
x= tmppred[!is.na(tmpvalues), ],
y= as.factor(tmpvalues[!is.na(tmpvalues)]),
family= "multinomial")
}) %...>% glmCV_future()
glmCV_async <- catch(glmCV_async, function(e) {
showNotification(type = "error", paste0("LASSO CV error: ", e))
})
glmCV_async <- finally(glmCV_async, function() {
glmCV_nclicks(0)
if (is.null(glmCV_future())) return(NULL)
updateNumericInput(session, "glmLambda",
value = glmCV_future()$lambda.min)
})
NULL
})
## Trigger for CV liblinear (async)
liblinCV_nclicks <- reactiveVal(0)
observeEvent(input$liblinCV, {
if (is.null(ok.predictor())) return(NULL)
if (liblinCV_nclicks() != 0) {
showNotification("LibLineaR CV already running")
return(NULL)
}
liblinCV_nclicks(1)
liblinCV_future <- reactiveVal()
tmpvalues <- values$label
tmppred <- ok.predictor()
for (ival in na.omit(unique(tmpvalues)))
if (sum(tmpvalues == ival, na.rm = T) <= 3)
tmpvalues[which(tmpvalues == ival)] <- NA
liblinCV_async <- future(seed = TRUE, {
LiblineaR::LiblineaR(
data= tmppred[!is.na(tmpvalues), ],
target= as.factor(tmpvalues[!is.na(tmpvalues)]),
type= 0, findC= T)
}) %...>% liblinCV_future()
liblinCV_async <- catch(liblinCV_async, function(e) {
showNotification(type = "error", paste0("LibLineaR CV error: ", e))
})
liblinCV_async <- finally(liblinCV_async, function() {
liblinCV_nclicks(0)
if (is.null(liblinCV_future())) return(NULL)
updateNumericInput(session, "liblinCost", value= liblinCV_future())
})
NULL
})
output$currentContext <- renderUI({
if (verbose)
cat("DEBUG 1rentContext\n")
if ((! is.null(ok.text())) & (length(ok.contextCol()) > 0))
HTML(paste(paste(ok.contextCol(),
as.character(ok.data()[values$newQuery, ok.contextCol()]),
sep = " : "),
collapse= "<br/><br/>"))
})
output$makeOracleConfirm <- renderUI({
if (verbose)
cat("DEBUG update printPredicted\n")
if (is.null(ok.pred())) {
tmp <- "No prediction"
} else if (is.null(values$newQuery)) {
tmp <- "No query"
} else {
tmp <- paste0(
ifelse(is.na(values$label[values$newQuery]), "", "Pred: "),
ok.predlab1()[values$newQuery],
" (", round(max(ok.pred()[values$newQuery, ]), 3), ")")
}
res <- list(actionButton("oracleConfirm", label= paste("🤖", tmp), width = "100%"))
if (!is.null(values$label) & !is.null(values$newQuery)) {
if (!is.na(values$label[values$newQuery]))
res <- list(res[[1]], HTML(paste0(
"<center><strong> Current: ", values$label[values$newQuery],
"</strong></center>")))
}
res
})
## Show training diagnostics on new model train
output$trainDiagno <-
renderText(paste0(ok.msgtagged(), "\n", values$modelDiagno))
## Diagnostics event
observeEvent(diagTrigger(), {
if (verbose)
cat("DEBUG update modelDiagno\n")
if (is.null(ok.data())) {
values$modelDiagno <- "No model"
return(NULL)
}
if (!ok.train.possible()) {
values$modelDiagno <- "No model (not enough labelled data)."
return(NULL)
}
if (input$predModel == "random forest") {
tmp <- paste0(
"Train Accuracy: ",
round(100 * mean(ok.predlab1()[!is.na(values$label)] ==
values$label[!is.na(values$label)]), 1),
"% ; Wtd. F1: ",
sprintf("%f", round(wtdF1(values$label[!is.na(values$label)],
ok.predlab1()[!is.na(values$label)]), 3)),
"%\nOOB Accuracy : ",
round(100 * mean(
colnames(ok.train()$predictions)[apply(ok.train()$predictions, 1, which.max)] ==
values$label[!is.na(values$label)]), 1),
"% ; Wtd. F1: ",
sprintf("%.3f", wtdF1(
values$label[!is.na(values$label)],
colnames(ok.train()$predictions)[apply(ok.train()$predictions, 1, which.max)]))
)
} else if (input$predModel %in% c("lasso", "linear", "naive bayes", "knn")) {
tmp <- paste0(
"Train Accuracy: ",
round(100 * mean(ok.predlab1()[!is.na(values$label)] ==
values$label[!is.na(values$label)]), 1),
"% ; Wtd. F1: ",
sprintf("%.3f", wtdF1(
values$label[!is.na(values$label)],
ok.predlab1()[!is.na(values$label)])))
}
values$modelDiagno <- tmp
})
## Update comment input field
observeEvent(values$newQuery, {
if (is.null(ok.text())) return(NULL)
if (verbose)
cat("DEBUG update comment field\n")
updateTextInput(session, "currentComment",
value = values$comment[values$newQuery])
})
## Save event: tag + comment in dedicated csv
observeEvent(values$tagTrigger, {
if (is.null(values$tagTrigger)) return(NULL)
if (verbose)
cat("DEBUG enter event tagTrigger", length(values$tagTrigger$id), "\n")
ready2tag(0) # Prevent new tagging until next query
dapath <- paste0(values$projectdir, ok.labelcol(), ".csv")
darows <- match(values$tagTrigger$id, ok.data()[, values$conf$idcol])
export <- data.frame(values$tagTrigger$id, values$tagTrigger$tag)
colnames(export) <- c(values$conf$idcol, ok.labelcol())
if (verbose)
cat("DEBUG tagTrigger tag",
paste(export[, 1], export[, 2], collapse = " ; "), "\n")
write.table(export, dapath, sep = ",", qmethod = "double",
fileEncoding = "UTF-8", row.names = F,
append = ifelse(file.exists(dapath), T, F),
col.names = ifelse(file.exists(dapath), F, T))
if (any(nchar(values$tagTrigger$comment))) {
tmp_filter <- which(nchar(values$tagTrigger$comment) > 0)
dapath <- paste0(values$projectdir, values$conf$commcol, ".csv")
export <- data.frame(values$tagTrigger$id[tmp_filter],
values$tagTrigger$comment[tmp_filter])
colnames(export) <- c(values$conf$idcol, values$conf$commcol)
if (verbose)
cat("DEBUG tagTrigger comment", export[1,1], export[1, 2])
write.table(export, dapath, sep = ",", qmethod = "double",
fileEncoding = "UTF-8", row.names = F,
append = ifelse(file.exists(dapath), T, F),
col.names = ifelse(file.exists(dapath), F, T))
}
## Update label hist
values$labelhist[match(values$tagTrigger$id, ok.data()[, values$conf$idcol])] <-
max(values$labelhist, na.rm = T) + 1
## Update wasTagged and wasRetagged
itag <- match(values$tagTrigger$id, ok.data()[, values$conf$idcol])
if (input$sampleChoice == "untagged") {
values$wasTagged[itag] <- TRUE
} else if (input$sampleChoice == "tagged") {
values$wasRetagged[itag] <- TRUE
} else if (input$sampleChoice == "all") {
if (is.na(values$label[itag])) {
values$wasTagged[itag] <- TRUE
} else
values$wasRetagged[itag] <- TRUE
}
## Call queryNext event, or launch training
if (trainCountdown() == 1) {
trainCountdown(input$trainCountdown)
trainTrigger(trainTrigger() + 1)
} else if (trainCountdown() > 1) {
trainCountdown(trainCountdown() - 1)
queryNext(queryNext() + 1)
} else {
queryNext(queryNext() + 1)
}
## Update values$label etc
values$label[darows] <- values$tagTrigger$tag
values$queries <- c(values$queries, darows)
if (verbose)
cat("DEBUG exit event tagTrigger", nrow(export), "rows\n")
})
####################
## Tagging main panel
output$textVisuCols <- renderUI({
if (verbose)
cat("DEBUG render text/visu panel\n")
leftsize <- 6
if (!input$panelText) leftsize <- 1
if (input$panelText & !input$panelVisu) leftsize <- 11
fluidRow(
column(
leftsize,
conditionalPanel(
"input.panelText",
wellPanel(htmlOutput("currentText"))
)
),
column(
12 - leftsize,
conditionalPanel(
"input.panelVisu",
fluidRow(
column(2, HTML(paste0('<div title="Compute new visualization">',
actionButton("visuCompute", "🎲"),
'</div>'))),
column(2, HTML(paste0('<div title="Zoom out/into selected area">',
actionButton("visuGoZoom", "🔍±"),
'</div>'))),
column(2, HTML(paste0('<div title="Query only from zoom area">',
checkboxInput("visuLock", "🔒", value = FALSE),
'</div>'))),
column(2, HTML(paste0('<div title="Visualization options">',
checkboxInput("visuOptions", "🔧"),
'</div>'))),
column(3, HTML(paste0('<div title="Plot size">',
numericInput("visuHeight", NULL, 400, 20, 2e12, 10),
'</div>')))
),
conditionalPanel(
'input.visuOptions',
flowLayout(
selectInput(
"visuAlgo", "Visu. method", c("umap", "t-sne"), "umap", width = "100%"),
selectizeInput(
"visuSource", "Visu. source",
c("Word counts (DFM)" = "dfm",
"Word embeddings (FastText)" = "ft",
"Sentence embeddings (SBERT)" = "sb",
"BERT predictions" = "bertpred",
"Regex" = "regex"),
c("sb", "ft"), multiple = TRUE)
)
),
conditionalPanel( # umap options
'input.visuAlgo == "umap" & input.visuOptions',
flowLayout(
numericInput(
"umapNeighb", "Nb. Neighbors", 15, 1, 500, 1),
numericInput(
"umapSpread", "Spread", 1, 0, 16, 1e-2),
numericInput(
"umapMindist", "Min dist.", .001, 0, 16, 1e-3),
selectInput(
"umapMetric", "Metric", selected = "cosine",
c("euclidean", "cosine", "manhattan", "hamming",
"correlation", "categorical")
)
)
),
conditionalPanel( # t-SNE options
'input.visuAlgo == "t-sne" & input.visuOptions',
flowLayout(
numericInput(
"tsnePerplex", "Perplexity", 15, 1, 500, 1),
numericInput(
"tsneTheta", "Theta", .5, 0, 1, 1e-2),
numericInput(
"tsnePcaDim", "PCA dims", 50, 1, 100, 1)
)
),
uiOutput("visuMsg"),
plotOutput(
"visuPlot", width = "100%",
dblclick = "visuDblclick",
brush = brushOpts("visuBrush", resetOnNew = T))
)
)
)
})
##################
## Subpanel "Text"
##################
output$currentText <- renderUI({
if (verbose)
cat("DEBUG render currentText\n")
if (is.null(ok.data()) | is.null(ok.nicetext()))
return(HTML("Import data to start training."))
if (input$strategy %in% c("entropy", "maxprob") & is.null(ok.train()))
return(HTML("<em>No model: train one, or try 'random' or 'sequential' strategy</em>"))
if (is.null(values$newQuery))
return(HTML("<em>No query</em>"))
bastext <- ok.nicetext()[values$newQuery]
if (nchar(input$regexFilter) > 0)
bastext <- gsub(paste0("(", isolate(input$regexFilter), ")"),
"<b>\\1</b>", bastext, ignore.case = isolate(!input$regexCaseSens))
baslen <- nchar(bastext)
if (!length(baslen)) {
if (nchar(input$regexFilter) > 0)
return(HTML("<b><em>Regex filter: no match</em></b>"))
return(HTML("<b><em>No text matches the filters</em></b>"))
}
if (is.na(baslen))
return(HTML("<em>No query</em>"))
if (baslen > 1200) {
bastext <- paste0(substring(bastext, 1, 1200), '<em title="> 512 tokens (not read by BERT)">',
substring(bastext, 1201), "</em>")
}
bastext <- gsub("\n", "<br/>", bastext)
HTML(bastext)
})
##############################################################################
## Panel "Visualize"
##############################################################################
output$visuMsg <- renderUI({
if (!is.null(values$visuMsg))
# strong(values$visuMsg)
h5(values$visuMsg, style = "text-align:center")
})
## Compute visualization points (async)
visu_nclicks <- reactiveVal(0)
ok.visu <- reactiveVal()
observe({
if (!input$panelVisu)
values$visuGo <- FALSE
if (input$panelVisu & !values$visuGo)
values$visuGo <- TRUE
})
observeEvent(list(values$visuGo, input$visuCompute), {
if (is.null(input$visuSource)) return(NULL)
if (!values$visuGo) return(NULL)
if (is.null(ok.data())) {
values$visuMsg <- "No data imported."
return(NULL)
}
if (visu_nclicks() > 0) {
values$visuMsg <- "Already computing visualization"
return(NULL)
}
visu_nclicks(1)
values$visuMsg <- "Computing visualization..."
## Prepare options and objects for async computation
dasources <- input$visuSource
daalgo <- input$visuAlgo
if (verbose)
cat("DEBUG compute visu embedding: ", daalgo, paste(dasources, collapse = " "), "\n")
visudat <- NULL
if ("dfm" %in% dasources) dadfm <- ok.dfm()
if ("ft" %in% dasources) {
visudat <- ok.data()[, values$cols_ft]
}
if ("sb" %in% dasources) {
if (is.null(visudat)) {
visudat <- ok.data()[, values$cols_sb]
} else
visudat <- cbind(visudat, ok.data()[, values$cols_sb])
}
if ("bertpred" %in% dasources) {
if (is.null(visudat)) {
visudat <- ok.data()[, do.call(c, values$cols_bertpred)]
} else
visudat <- cbind(visudat, ok.data()[, do.call(c, values$cols_bertpred)])
}
if ("regex" %in% dasources) {
if (is.null(visudat)) {
visudat <- ok.extraRegex.matrix()
} else
visudat <- cbind(visudat, ok.extraRegex.matrix())
}
if (daalgo == "umap") {
visu_args <- list(
n_neighbors = input$umapNeighb, metric = input$umapMetric,
spread = input$umapSpread, min_dist = input$umapMindist)
} else if (daalgo == "t-sne") {
visu_args <- list(
perplexity = input$tsnePerplex, theta = input$tsneTheta,
initial_dims = input$tsnePcaDim, check_duplicates = FALSE)
}
visu_async <- future(seed = TRUE, {
if ("dfm" %in% dasources) {
## TODO: CA treatment buggy, switch to SVD?
# da_ca <- quanteda.textmodels::textmodel_ca(
# quanteda::as.dfm(dadfm), nd = min(100, nrow(dadfm)))
# da_ca <- (da_ca$rowcoord %*% diag(da_ca$sv))[, !is.na(da_ca$sv)]
da_ca <- as.matrix(dadfm)
if (is.null(visudat)) {
visudat <- da_ca
} else
visudat <- cbind(da_ca, visudat)
}
if (daalgo == "umap") {
do.call(uwot::umap, c(list(visudat), visu_args))
} else if (daalgo == "t-sne") {
visudat_dupli <- duplicated(visudat)
if (any(visudat_dupli))
visudat[visudat_dupli, ] <- jitter(visudat[visudat_dupli, ])
do.call(Rtsne::Rtsne, c(list(visudat), visu_args))$Y
}
}) %...>% ok.visu()
visu_async <- catch(visu_async, function(error) {
values$visuMsg <- paste0("Error: ", error)
})
visu_async <- finally(visu_async, function() {
visu_nclicks(0)
if (input$visuGoZoom == 0) {
values$visuMsg <- "Double-click to select, drag to select zooming zone."
} else
values$visuMsg <- NULL
nice_choices <- c("FastText" = "ft", "SBERT" = "sb", "visu" = daalgo)
names(nice_choices)[3] <- ifelse(daalgo == "t-sne", "t-SNE", "UMAP")
updateSelectInput(
session, "dlEmbedSelect", choices = nice_choices, selected = daalgo)
if (verbose)
cat("DEBUG Visu async out\n")
})
NULL
})
## Compute visualization plot (async)
ok.ggvisu <- reactiveVal()
observe({
# if (is.null(ok.visu())) return(NULL)
if (is.null(ok.visu())) {
ok.ggvisu(NULL)
return(NULL)
}
# if (!isTRUE(values$visugo)) return(NULL)
if (!input$panelVisu) return(NULL)
if (verbose)
cat("DEBUG compute visu plot\n")
ggdat <- data.frame(ok.visu(), tag = values$label)
if (!is.null(ok.predlab1()))
ggdat$tag[is.na(values$label)] <- ok.predlab1()[is.na(values$label)]
da_predlabs <- ok.predlab1()
da_tagged <- ! is.na(values$label)
da_query <- values$newQuery
ggvisu_async <- future({
ggres <- ggplot(ggdat, aes(X1, X2)) + theme_bw() +
xlab("") + ylab("") +
theme(legend.position = "bottom") +
scale_shape_manual(values = rep(1:8, 40)) # TODO : adapt to tags
## Tagged points
if (any(da_tagged)) {
ggres <- ggres +
geom_point(data = ggdat[da_tagged, ],
aes(X1, X2, color = tag, shape = tag),
size = 1.5, stroke = 1.5, alpha = .8)
}
## Not tagged points
if (!is.null(da_predlabs)) {
ggres <- ggres +
geom_point(data = ggdat[!da_tagged, ],
aes(color = tag, shape = tag), show.legend = FALSE)
} else {
ggres <- ggres +
geom_point(alpha = .8, data = ggdat[is.na(ggdat$tag), ])
}
## Current query
if (!is.null(da_query)) {
if (is.null(da_predlabs)) {
ggres <- ggres +
geom_point(data = ggdat[da_query, ], color = 2,
stroke = 3, shape = 3, show.legend = FALSE)
} else {
ggres <- ggres +
geom_point(data = ggdat[da_query, ],
stroke = 3, shape = 3, show.legend = FALSE)
}
}
ggres
}) %...>% ok.ggvisu()
ggvisu_async <- catch(ggvisu_async, function(error) {
values$visuMsg <- paste0("Error in ggplot: ", error)
})
})
observe({
if (verbose)
cat("DEBUG update visuPlot\n")
if (is.null(ok.ggvisu())) {
values$visuMsg <- "No visualization yet."
output$visuPlot <- NULL
values$visuZoom$xlim <- NULL
values$visuZoom$ylim <- NULL
return(NULL)
}
output$visuPlot <- renderPlot(height = input$visuHeight, {
# if (is.null(ok.ggvisu())) return(NULL)
ok.ggvisu() +
coord_fixed(xlim = values$visuZoom$xlim, ylim = values$visuZoom$ylim)
})
})
observeEvent(input$visuDblclick, {
if (!is.null(values$visuMsg))
values$visuMsg <- NULL
tmp <- input$visuDblclick
bmu <- t(ok.visu()) - c(tmp$x, tmp$y)
bmu <- which.min(colSums(bmu^2))
values$lastQuery <- bmu
values$newQuery <- bmu
ready2tag(1)
})
observeEvent(input$visuGoZoom, {
if (!is.null(values$visuMsg))
values$visuMsg <- NULL
values$visuZoom$xlim <- c(input$visuBrush$xmin, input$visuBrush$xmax)
values$visuZoom$ylim <- c(input$visuBrush$ymin, input$visuBrush$ymax)
})
##############################################################################
## Panel History
##############################################################################
## Create history link and retag events as they appear
observeEvent(input$histDTable_rows_current, {
histrows <- order(values$labelhist)
histrows <- rev(histrows[!is.na(values$labelhist[histrows])])
histrows <- histrows[!is.na(values$label[histrows])]
histrows <- histrows[input$histDTable_rows_current]
notthere <- histrows[! histrows %in% values$histQueries]
if (!length(notthere)) return(NULL)
lapply(notthere, function(daquery) {
if (verbose) cat("DEBUG create history events", daquery, "\n")
observeEvent(input[[paste0("hist", daquery)]], {
if (verbose) cat("DEBUG history link", daquery, "\n")
values$lastQuery <- daquery
values$newQuery <- daquery
updateTabsetPanel(session, "mainPanelset", selected = "Tagging")
ready2tag(1)
})
observeEvent(input[[paste0("histTag", daquery)]], {
if (verbose) cat("DEBUG history retag", daquery, "\n")
values$histSaveStack <- rbind(
values$histSaveStack,
data.frame(id = ok.data()[daquery, values$conf$idcol],
tag = input[[paste0("histTag", daquery)]],
comment = values$comment[daquery])
)
})
})
values$histQueries <- c(values$histQueries, notthere)
})
## History save action
observeEvent(input$histSave, {
req(is.data.frame(values$histSaveStack))
values$tagTrigger <- as.list(values$histSaveStack)
values$label[match(values$histSaveStack$id, ok.data()[[values$conf$idcol]])] <-
values$histSaveStack$tag
values$histSaveStack <- NULL
})
## Update save name with number of unsaved changes
observeEvent(length(values$histSaveStack$id), {
updateActionButton(
session, "histSave",
label = paste0("Save changes (", length(unique(values$histSaveStack$id)),")"))
})
## History table, with clickable links and retagging
output$histDTable <- DT::renderDataTable({
histrows <- order(values$labelhist)
histrows <- rev(histrows[!is.na(values$labelhist[histrows])])
histrows <- histrows[!is.na(values$label[histrows])]
# histrows <- rev(values$queries)
hist_df <- data.frame(
# history = length(histrows):1,
history = values$labelhist[histrows],
id = sapply(histrows, function(irow) {
as.character(actionLink(
paste0("hist", irow),
label = ok.data()[irow, values$conf$idcol],
onclick = paste0('Shiny.onInputChange(\"hist', irow, '\", this.id)')))
}),
Tag = values$label[histrows],
Retag = sapply(histrows, function(irow) {
paste0(
"<select id=\"histTag", irow, "\" onchange=\"Shiny.onInputChange('histTag", irow, "', this.value)\">",
paste0("<option value=\"", values$uniqueLabels, "\" ",
ifelse(values$uniqueLabels == values$label[irow], "selected", ""),
">", values$uniqueLabels, "</option>", collapse = " "),
"</select>"
)
}),
Comment = values$comment[histrows],
Text = ok.nicetext()[histrows])
if (!is.null(values$conf$contextcols))
hist_df <- cbind(hist_df,
ok.data()[histrows, values$conf$contextcols])
DT::datatable(hist_df, escape = FALSE, rownames = FALSE, filter = "top",
options = list(
columnDefs = list(list(
targets = 5,
render = DT::JS(
"function(data, type, row, meta) {",
"return type === 'display' && data.length > 200 ?",
"'<span title=\"' + data + '\">' + data.substr(0, 200) + '...</span>' : data;",
"}")))
))
})
##############################################################################
## Panel "Stats"
##############################################################################
output$statsTagTable <- renderTable(rownames = F, {
table(tag = values$label)
})
## Model diagnostics by CV
output$statsCVoutput <- reactive({
if (is.null(values$modelcv))
return("Model diagnostics will appear here")
values$modelcv
})
output$statsCVtable <- DT::renderDataTable({
if (is.null(values$cvtable))
return(NULL)
DT::datatable(values$cvtable, list(pageLength = 25), rownames = F)
})
## Button action: compute CV (async)
cv_nclicks <- reactiveVal(0)
observeEvent(input$statsCVgo, {
if (is.null(ok.train())) {
values$modelcv <- "Error in model training"
return(NULL)
}
if (cv_nclicks() != 0) {
values$modelcv <- "CV already running"
return(NULL)
}
cv_nclicks(cv_nclicks() + 1)
predmat <- ok.predictor()[!is.na(values$label), ]
predlab <- values$label[!is.na(values$label)]
cvsamp <- sample(10, nrow(predmat), replace = TRUE)
cvres <- rep(NA, nrow(predmat))
cvres_future <- reactiveVal()
tmp_model <- input$predModel
if (tmp_model == "random forest") {
cv_args <- list(
num.trees = input$rfNumTrees, mtry = if (input$rfMtry > 0) input$rfMtry,
sample.fraction = input$rfSampleFrac, probability = FALSE)
} else if (tmp_model == "linear") {
cv_args <- list(type= 0, cost = input$liblinCost)
} else if (tmp_model == "lasso") {
cv_args <- input$glmLambda
} else if (tmp_model == "naive bayes") {
cv_args <- list(smooth = input$naiveSmooth, prior = input$naivePrior,
distribution = input$naiveDistri)
} else if (tmp_model == "knn") {
cv_args <- list(k = input$knnK)
}
cv_status_file <- paste0(session$token, "_cv")
cv_async <- future(seed = TRUE, {
for (icv in 1:10) {
set_status(cv_status_file, paste0("Computing CV fold ", icv, " / 10"))
if (tmp_model == "random forest") {
cvmodel <- do.call(ranger::ranger, c(cv_args, list(
x= predmat[cvsamp != icv, ],
y= as.factor(predlab[cvsamp != icv]))))
cvres[cvsamp == icv] <- as.character(
predict(cvmodel, data= predmat[cvsamp == icv, ])$predictions)
} else {
tmpvalues <- predlab[cvsamp != icv]
# for (ival in na.omit(unique(tmpvalues)))
# if (sum(tmpvalues == ival, na.rm = T) == 1)
# tmpvalues[which(tmpvalues == ival)] <- NA
if (tmp_model == "lasso") {
cvmodel <- suppressWarnings(
glmnet::glmnet(x= predmat[cvsamp != icv, ][!is.na(tmpvalues), ],
y= as.factor(tmpvalues[!is.na(tmpvalues)]),
family= "multinomial"))
cvres[cvsamp == icv] <- predict(
cvmodel, newx = predmat[cvsamp == icv, ],
type = "class", s= cv_args)
} else if (tmp_model == "linear") {
cvmodel <- do.call(LiblineaR::LiblineaR, c(
list(data= predmat[cvsamp != icv, ][!is.na(tmpvalues), ],
target= as.factor(tmpvalues[!is.na(tmpvalues)])),
cv_args))
cvres[cvsamp == icv] <- as.character(
predict(cvmodel, newx= predmat[cvsamp == icv, ])$predictions)
} else if (tmp_model == "naive bayes") {
cvmodel <- do.call(quanteda.textmodels::textmodel_nb, c(
list(x= predmat[cvsamp != icv, ][!is.na(tmpvalues), ],
y= as.factor(tmpvalues[!is.na(tmpvalues)])),
cv_args))
cvres[cvsamp == icv] <- as.character(
predict(
cvmodel, newdata= predmat[cvsamp == icv, ], type= "class"))
} else if (isolate(tmp_model) == "knn") {
cvres[cvsamp == icv] <-
as.character(do.call(class::knn, c(cv_args, list(
train = predmat[cvsamp != icv, ],
test = predmat[cvsamp == icv, ],
cl = predlab[cvsamp != icv]))))
}
}
}
cvres
}) %...>% cvres_future()
cv_async <- catch(cv_async, function(e) {
cvres_future(NULL)
values$modelcv <- e$message
})
cv_async <- finally(cv_async, function() {
cv_nclicks(0)
if (file.exists(cv_status_file)) unlink(cv_status_file)
if (is.null(cvres_future())) return(NULL)
cvres <- cvres_future()
predlab <- values$label[!is.na(values$label)]
dametrics <- metricsTable(predlab, cvres)
values$cvtable <- dametrics
res <- "10-CV OOB evaluation:\n"
# if (any(grepl("^bertpred", input$use_regressors)))
if (any(grepl("^bertpred", values$new_use_regressors)))
res <- paste0(
res,
"⚠️ Scores are biased upwards if BERT predictions were trained on current data ⚠️ \n")
res <- paste0(
res,
"* Accuracy : ", round(100 * mean(cvres == predlab), 2), "%\n",
"* Macro F1 : ", sprintf("%f", mean(as.numeric(dametrics$F1))), "\n",
"* Weighted F1 : ", sprintf("%f", weighted.mean(as.numeric(dametrics$F1),
dametrics$N.cases)), "\n"
)
values$modelcv <- res
})
NULL
})
## Automatic message update during CV computation
observe({
if (cv_nclicks() == 0) return(NULL)
invalidateLater(200)
if (!file.exists(paste0(session$token, "_cv"))) return(NULL)
values$modelcv <- get_status(paste0(session$token, "_cv"))
values$cvtable <- NULL
})
##############################################################################
## Panel BERT
##############################################################################
##########
## Bert UI left panel
## Update list of saved models and dlSelect on init and on saveTrigger
observeEvent(list(ok.data(), values$bertSaveTrigger), {
damodels <- dir(pattern = "^tiggerbert_saved_")
if (!length(damodels)) return(NULL)
damodels <- gsub("^tiggerbert_saved_", "", damodels)
values$bertSaved <- damodels
updateSelectInput(session, "dlBPSelect", choices = damodels)
})
## Populate saved models list, with buttons
output$bertSaved <- renderUI({
if (verbose)
cat("DEBUG bert update saved list\n")
HTML(paste0(
"<table><tbody>",
paste0(sapply(values$bertSaved, function(dabert) {
bertshort <- dabert
if (nchar(bertshort) > 23)
bertshort <- paste0(substr(bertshort, 1, 20), "...")
paste0(
"<tr><td>",
actionLink(paste0("saved_", dabert), bertshort, title = dabert,
style="margin-right: 5px;"), "</td><td>",
actionButton(paste0("bert_copypars_", dabert), "📋",
title = "Copy parameters"), "</td><td>",
actionButton(paste0("bert_infer_", dabert), "🤖",
title = "Infer predictions on current data"), "</td><td>",
actionButton(paste0("bert_delete_", dabert), "🗑",
title = "Delete model"),
"</td></tr>")
}), collapse = "\n"),
"</tbody></table>"
))
})
##########
## Bert UI main panel
## UI outputs : message and stats table and plot
output$bertMsg <- reactive(
ifelse(is.null(values$bertMsg),
"Train new BERT, or select saved model.",
values$bertMsg))
output$bertValstats <- DT::renderDataTable(
DT::datatable(values$bertValstats, list(pageLength = 25)))
output$bertValPlot <- renderPlot(values$bertValPlot)
output$bertMsgHyperpar <- reactive(values$bertMsgHyperpar)
##################################
## Async handling of BERT training
## Status File
bert_status_file <- paste0("tiggerbert_", session$token, "_trainstatus")
fire_interrupt <- function(dafile) set_status(dafile, "interrupt")
fire_ready <- function(dafile) set_status(dafile, "Ready")
fire_running <- function(dafile, perc_complete = NULL){
if(is.null(perc_complete))
msg <- "Training..."
else
msg <- paste0("Training... ", perc_complete, "% complete")
set_status(dafile, msg)
}
interrupted <- function(dafile) get_status(dafile) == "interrupt"
## Create Status File
fire_ready(bert_status_file)
## Button action preparation
bert_nclicks <- reactiveVal(0)
bert_result_val <- reactiveVal()
bert_hyperpars <- reactiveVal()
## Button appearance: write "Stop" if training is ongoing
observeEvent(bert_nclicks(), {
updateActionButton(session, "bertTrain",
label = ifelse(bert_nclicks() > 0, "Stop", "Train BERT"))
})
## Button action: Train new BERT
observeEvent(input$bertTrain,{
## Don't do anything if analysis is already being run
# if(bert_nclicks() != 0){
# values$bertMsg <- "BERT is already training"
# return(NULL)
# }
tmpsession <- paste0("tiggerbert_", ok.labelcol(), "_", session$token)
if(bert_nclicks() != 0){
write("stop", paste0(tmpsession, "_stop"))
bert_nclicks(0)
return(NULL)
}
if (file.exists(paste0(tmpsession, "_stop")))
file.remove(paste0(tmpsession, "_stop"))
## Increment clicks and prevent concurrent analyses
bert_nclicks(bert_nclicks() + 1)
bert_result_val(data.frame(Status="Running..."))
fire_running(bert_status_file)
## Prepare async training
values$bertLoadSaved <- NULL
# tmpsession <- paste0("tiggerbert_", ok.labelcol(), "_", session$token)
tmpmodel <- input$bertModel
system(paste0("rm ", tmpsession, "_done"))
tmpdat <- data.frame(
id = ok.data()[, values$conf$idcol],
text = ok.nicetext(),
label = values$label
)
tmpdat <- tmpdat[!is.na(tmpdat$label), ]
tmpdat <- tmpdat[grepl("\\S", tmpdat$text), ]
## Filter data: min occurences, balance classes, validation split
tmplabtable <- table(tmpdat$label)
if (is.numeric(input$bertMinOccur)) if (input$bertMinOccur > 1) {
tmpdat <- tmpdat[tmpdat$label %in%
names(which(tmplabtable >= input$bertMinOccur)), ]
tmplabtable <- table(tmpdat$label)
}
set.seed(input$bertValidSeed)
if (input$bertBalance) {
tmpminocc <- min(tmplabtable)
tmptrain <- sample(do.call(c, lapply(unique(tmpdat$label), function(dalabel)
sample(which(tmpdat$label == dalabel), round((1-input$bertValidFrac) * tmpminocc)))))
tmpvalid <- sample(do.call(c, lapply(unique(tmpdat$label), function(dalabel) {
dasamp <- which(tmpdat$label == dalabel)
dasamp <- dasamp[! dasamp %in% tmptrain]
sample(dasamp, round(input$bertValidFrac * tmpminocc))
})))
} else {
tmptrain <- sample(1:nrow(tmpdat), round((1 - input$bertValidFrac) * nrow(tmpdat)))
tmpvalid <- (1:nrow(tmpdat))[-tmptrain]
}
write.csv(tmpdat[tmptrain, ], paste0(tmpsession, "_dat_train.csv"), row.names = FALSE)
write.csv(tmpdat[tmpvalid, ], paste0(tmpsession, "_dat_valid.csv"), row.names = FALSE)
iwait <- 0
while (iwait < 10) {
if (!file.exists(paste0(tmpsession, "_dat_valid.csv"))) Sys.sleep(500)
iwait <- iwait + 1
}
tmp_cmd <- paste0(
values$conf$python, " gobert.py -m ", tmpmodel,
" -t ", tmpsession, "_dat_train.csv",
" -v ", tmpsession, "_dat_valid.csv",
" -b ", input$bertBatchsize,
" -g ", input$bertGradacc,
" -e ", input$bertEpochs,
" -l ", input$bertLrate,
" -w ", input$bertWdecay,
" -B ", ifelse(input$bertBest, 1, 0),
" -E ", input$bertNeval,
" -s ", tmpsession,
" -G ", as.numeric(values$conf$use_gpu),
" -A ", as.numeric(input$bertAdapt),
" 2>&1 ; touch ", tmpsession, "_done"
)
## Save hyperpars
bert_hyperpars(list(bertModel = input$bertModel,
bertEpochs = input$bertEpochs,
bertLrate = input$bertLrate,
bertWdecay = input$bertWdecay,
bertBatchsize = input$bertBatchsize,
bertGradacc = input$bertGradacc,
bertNeval = input$bertNeval,
bertBest = input$bertBest,
bertValidFrac = input$bertValidFrac,
bertValidSeed = input$bertValidSeed,
bertNtrain = length(tmptrain),
bertTrainTable = as.list(table(tmpdat$label[tmptrain])),
bertScheme = ok.labelcol()))
## Launch async training
bert_async <- future({
if (file.exists(tmpsession))
system(paste0("rm -R ", tmpsession, "*"))
# system(tmp_cmd, wait = FALSE, intern = FALSE)
tmp_exec <- system(tmp_cmd, wait = FALSE, intern = TRUE)
writeLines(tmp_exec, paste0(tmpsession, "_log"))
## After training, get trainer state from last checkpoint for diagnostics
chkpt <- dir(paste0(tmpsession, "_train"), pattern = "^checkpoint")
chkpt <-
chkpt[order(as.numeric(gsub("\\D", "", chkpt)), decreasing = T)[1]]
system(paste0(
"cp ", tmpsession, "_train/", chkpt, "/trainer_state.json ",
tmpsession, "/"))
}) %...>% bert_result_val()
## Catch interrupt (or any other error) and notify user
bert_async <- catch(bert_async, function(e){
bert_result_val(NULL)
showNotification(e$message)
values$bertMsg <- e$message
})
## After the promise has been evaluated set nclicks to 0 to allow another Run
bert_async <- finally(bert_async, function() {
system(paste0("rm -R ", tmpsession, "_train"))
system(paste0("rm ", tmpsession, "_done"))
fire_ready(bert_status_file)
bert_nclicks(0)
if (is.null(bert_result_val())) return(NULL)
bertpreds <- try(read.csv(paste0(tmpsession, "_predval.csv")))
if (inherits(bertpreds, "try-error")) {
# values$bertMsg <- "BERT error - try again"
values$bertMsg <- try(paste(readLines(paste0(tmpsession, "_log")), collapse = "\n"))
values$bertValstats <- NULL
values$bertValPlot <- NULL
values$bertLastUnsaved <- NULL
values$bertMsgHyperpar <- NULL
values$bertLastHyperpar <- NULL
return(NULL)
}
datruth <- tmpdat[tmpvalid, "label"]
dapred <- bertpreds$bertpred
dametrics <- metricsTable(datruth, dapred)
damsg <- paste0(
"Unsaved model (", tmpmodel, ")\n\n",
"BERT validation stats:\n",
"* Accuracy : ", round(100 * mean(datruth == dapred), 2), "%\n",
"* Macro F1 : ", sprintf("%f", mean(as.numeric(dametrics$F1))), "\n",
"* Weighted F1 : ", sprintf("%f", weighted.mean(as.numeric(dametrics$F1),
dametrics$N.cases)), "\n")
hypermsg <- bertHyperToMsg(bert_hyperpars())
writeLines(RJSONIO::toJSON(bert_hyperpars()),
paste0(tmpsession, "/tiggertrain.json"))
values$bertValstats <- dametrics
values$bertMsg <- damsg
values$bertValPlot <- bertPlotFromPath(tmpsession)
values$bertLastUnsaved <- list(
metrics = dametrics, msg = damsg, session = tmpsession)
values$bertMsgHyperpar <- hypermsg
values$bertLastHyperpar <- bert_hyperpars()
})
values$bertMsg <- "BERT is training"
values$bertValstats <- NULL
values$bertValPlot <- NULL
values$bertLastUnsaved <- NULL
values$bertMsgHyperpar <- NULL
values$bertLastHyperpar <- NULL
## Return something other than the promise so shiny remains responsive
NULL
})
## Automatic check of BERT training status
observe({
if (bert_nclicks() == 0) return(NULL)
isolate({
tmpsession <- paste0("tiggerbert_", ok.labelcol(), "_", session$token)
## Check for checkpoint dirs
checkdirs <-
length(dir(paste0(tmpsession, "_train"), pattern = "^checkpoint"))
# fire_running(bert_status_file,
# min(100, round(100 * checkdirs / input$bertNeval, 1)))
# dastate <- try(RJSONIO::fromJSON(paste0(tmpsession, "_train/trainer_state.json")))
# if (!inherits(dastate, "try-error"))
# fire_running(bert_status_file,
# round(100 * as.numeric(dastate$epoch) / input$bertEpochs, 1))
try({
dapoints <- dir(paste0(tmpsession, "_train"),
pattern = "trainer_state[.]json$", recursive = T)
dapoints_num <- as.numeric(gsub("^checkpoint-(\\d+)/.*$", "\\1", dapoints))
statefile <- paste0(
tmpsession, "_train/", dapoints[which.max(dapoints_num)])
if (file.exists(statefile)) {
state <- RJSONIO::fromJSON(statefile)
fire_running(bert_status_file,
round(100 * as.numeric(state$epoch) / input$bertEpochs, 1))
}
})
})
if (checkdirs < isolate(input$bertNeval) & file.exists(paste0(tmpsession, "_done"))) {
values$bertMsg <- "BERT Error - try again"
# values$bertMsg <- paste(readLines(paste0(tmpsession, "_log")), collapse = "\n")
} else {
invalidateLater(1e3)
loss_progress <- ""
if (checkdirs > 0) {
## Get training diagnostics
try({
dapoints <- dir(paste0(tmpsession, "_train"),
pattern = "trainer_state[.]json$", recursive = T)
dapoints_num <- as.numeric(gsub("^checkpoint-(\\d+)/.*$", "\\1", dapoints))
statefile <- paste0(
tmpsession, "_train/", dapoints[which.max(dapoints_num)])
if (file.exists(statefile)) {
state <- RJSONIO::fromJSON(statefile)
logs <- data.table::rbindlist(lapply(state$log_history, as.list),
fill = TRUE)
logs <- logs[, .(loss = max(loss, na.rm = T),
eval_loss = max(eval_loss, na.rm = T)), by = epoch]
loss_progress <- paste0(
"\n* Epoch : ", sprintf("%.02f", logs$epoch),
"\ttrain loss : ", sprintf("%.05f",logs$loss),
"\teval loss : ", sprintf("%.05f", logs$eval_loss), collapse = "")
}
})
}
values$bertMsg <-
ifelse(checkdirs == 0, "Pre-processing...",
paste0(get_status(bert_status_file), loss_progress))
}
values$bertValstats <- NULL
values$bertValPlot <- NULL
values$bertLoadSaved <- NULL
values$bertMsgHyperpar <- NULL
})
## Action: show last BERT model
observeEvent(input$bertLast, {
values$bertMsg <- values$bertLastUnsaved$msg
values$bertValstats <- values$bertLastUnsaved$metrics
values$bertValPlot <- bertPlotFromPath(values$bertLastUnsaved$session)
values$bertMsgHyperpar <- bertHyperToMsg(values$bertLastHyperpar)
values$bertLoadSaved <- NULL
})
## Action: save BERT model
observeEvent(input$bertSave, {
tmpsession <- paste0("tiggerbert_", ok.labelcol(), "_", session$token)
if (!file.exists(paste0(tmpsession)))
return(NULL)
if (!grepl("\\S", input$bertSaveName)) {
tmpname <- paste0(
ok.labelcol(), "_", gsub("\\D+$", "", gsub("\\D", "-", Sys.time())))
} else
tmpname <- cleanFileName(input$bertSaveName)
savedir <- paste0("tiggerbert_saved_", tmpname)
system(paste("cp -R", tmpsession, savedir))
system(paste0("cp ", tmpsession, "_dat_valid.csv ",
savedir, "/tiggerbert_dat_valid.csv"))
system(paste0("cp ", tmpsession, "_predval.csv ",
savedir, "/tiggerbert_pred_valid.csv"))
system(paste0("rm -R ", tmpsession, "*"))
values$bertMsg <- paste0("BERT model successfully saved: ", tmpname)
values$bertValstats <- NULL
values$bertValPlot <- NULL
values$bertMsgHyperpar <- NULL
if (is.null(values$bertSaveTrigger)) {
values$bertSaveTrigger <- 0
} else
values$bertSaveTrigger <- values$bertSaveTrigger + 1
values$bertLastUnsaved <- NULL
updateTextInput(session, "bertSaveName", value = "")
})
## Actions: load saved model (list updated only with new models)
observeEvent(values$bertSaved, {
if (!length(values$bertSaved)) return(NULL)
tmpsaved <- values$bertSaved[! values$bertSaved %in% values$bertSavedAll]
lapply(tmpsaved, function(dabert) {
############
## Actions to load saved model
observeEvent(input[[paste0("saved_", dabert)]], {
bertconf <- RJSONIO::fromJSON(paste0(
"tiggerbert_saved_", dabert, "/config.json"))
truth <- read.csv(paste0(
"tiggerbert_saved_", dabert, "/tiggerbert_dat_valid.csv"))$label
pred <- read.csv(paste0(
"tiggerbert_saved_", dabert, "/tiggerbert_pred_valid.csv"))$bertpred
dametrics <- metricsTable(truth, pred)
values$bertValstats <- dametrics
values$bertMsg <- paste0(
"Saved model ", dabert, " (", bertconf[["_name_or_path"]], ")\n\n",
"BERT validation stats:\n",
"* Accuracy : ", round(100 * mean(truth == pred), 2), "%\n",
"* Macro F1 : ", sprintf("%f", mean(as.numeric(dametrics$F1))), "\n",
"* Weighted F1 : ", sprintf("%f", weighted.mean(as.numeric(dametrics$F1),
dametrics$N.cases)), "\n")
## Hyperparameters
tiggertrain_file <- paste0("tiggerbert_saved_", dabert, "/tiggertrain.json")
if (file.exists(tiggertrain_file)) {
try({
values$bertMsgHyperpar <- bertHyperToMsg(RJSONIO::fromJSON(tiggertrain_file))
})
} else
values$bertMsgHyperpar <- "Training parameters not saved."
values$bertValPlot <-
bertPlotFromPath(paste0("tiggerbert_saved_", dabert))
values$bertLoadSaved <- dabert
})
############
## Actions to copy hyperparameters
observeEvent(input[[paste0("bert_copypars_", dabert)]], {
tiggertrain_file <- paste0("tiggerbert_saved_", dabert, "/tiggertrain.json")
if (!file.exists(tiggertrain_file)) return(NULL)
try({
tmppars <- RJSONIO::fromJSON(tiggertrain_file)
updateSelectInput(session, "bertModel", selected = tmppars$bertModel)
updateCheckboxInput(session, "bertBest", value = tmppars$bertBest)
numinputs <- c("bertEpochs", "bertLrate", "bertWdecay",
"bertBatchsize", "bertGradacc")
for (iinput in numinputs)
updateNumericInput(session, iinput, value = tmppars[[iinput]])
})
})
############
## Actions to delete saved model (with confirm)
observeEvent(input[[paste0("bert_delete_", dabert)]], {
showModal(modalDialog(
title = "Delete BERT model",
paste0("Confirm delete: ", dabert,
" (model and predictions will be deleted)."),
footer = tagList(actionButton(paste0("bert_delete_confirm_", dabert), "Delete"),
modalButton("Cancel"))))
})
observeEvent(input[[paste0("bert_delete_confirm_", dabert)]], {
system(paste0("rm -R tiggerbert_saved_", dabert))
system(paste0(
"rm ", values$conf$datadir, values$conf$dataname, "_bertpred_",
dabert, ".feather"))
# values$bertLoadSaved <- NULL
if (is.null(values$bertSaveTrigger)) {
values$bertSaveTrigger <- 0
} else
values$bertSaveTrigger <- values$bertSaveTrigger + 1
removeModal()
values$bertMsg <- NULL
values$bertValstats <- NULL
values$bertMsgHyperpar <- NULL
})
############
## Actions to infer on current data from saved BERT (async)
observeEvent(input[[paste0("bert_infer_", dabert)]], {
if (bertpred_nclicks() > 0 | bpall_nclicks() > 0) {
values$bertMsg <- "Inference already running"
values$bertMsgHyperpar <- NULL
values$bertValPlot <- NULL
values$bertValstats <- NULL
return(NULL)
}
bertpred_nclicks(1)
saved_model <- paste0("tiggerbert_saved_", dabert)
out_path <- paste0(
values$conf$datadir, values$conf$dataname, "_bertpred_",
dabert, ".feather")
if (file.exists(out_path)) {
old_infer <- arrow::read_feather(out_path)
} else
old_infer <- NULL
to_infer <- ok.data()[[values$conf$idcol]]
to_infer <- to_infer[! to_infer %in% old_infer[[values$conf$idcol]]]
to_infer_row <- match(to_infer, ok.data()[[values$conf$idcol]])
if (!length(to_infer)) {
values$bertMsg <- paste0(
"All current ", nrow(ok.data()),
" texts already infered by ", dabert)
values$bertMsgHyperpar <- NULL
values$bertValPlot <- NULL
values$bertValstats <- NULL
bertpred_nclicks(0)
return(NULL)
}
values$bertValPlot <- NULL
values$bertValstats <- NULL
values$bertMsgHyperpar <- NULL
values$bertMsg <- paste0(
"Preparing inference on ", length(to_infer), " new texts")
tmp_dat <- paste0("tiggerbert_infer_", session$token, ".feather")
arrow::write_feather(
data.frame(
ok.data()[to_infer_row, values$conf$idcol, drop = FALSE],
text = ok.nicetext()[to_infer_row]),
tmp_dat)
values$bertMsg <- paste0(
"Infering ", dabert, " predictions on ",
length(to_infer), " new texts")
infer_cmd <- paste0(
values$conf$python, " gobert_infer.py -m ", saved_model,
" -o ", out_path,
" -d ", tmp_dat,
" -i ", values$conf$idcol,
" -l ", paste0("tiggerbert_infer_", session$token, ".log"),
" -G ", as.numeric(values$conf$use_gpu),
" -b ", 128)
bertpred_res <- reactiveVal()
bertpred_async <- future({
system(infer_cmd)
system(paste0("rm ", tmp_dat))
if (!is.null(old_infer)) {
new_infer <- arrow::read_feather(out_path)
arrow::write_feather(rbind(old_infer, new_infer), out_path)
}
}) %...>% bertpred_res
bertpred_async <- catch(bertpred_async, function(error) {
bertpred_nclicks(0)
values$bertMsg <- paste0("BERT inference error: ", error)
system(paste0("rm ", tmp_dat))
})
bertpred_async <- finally(bertpred_async, function() {
bertpred_nclicks(0)
values$bertMsg <-
"Inference complete!\nRe-import data in Project/Sample panel to use the predictions for tagging."
values$bertValPlot <- NULL
values$bertValstats <- NULL
values$bertMsgHyperpar <- NULL
})
NULL
})
})
values$bertSavedAll <- c(values$bertSavedAll, tmpsaved)
})
## Automatic check of BERT inference progress
observe({
if (bertpred_nclicks() == 0) return(NULL)
invalidateLater(1e3)
logfile <- paste0("tiggerbert_infer_", isolate(session$token), ".log")
if (file.exists(logfile)) {
values$bertMsg <- readLines(logfile)[1]
values$bertValPlot <- NULL
values$bertValstats <- NULL
values$bertMsgHyperpar <- NULL
}
})
##############################################################################
## Export panel
##############################################################################
## Export tags and full model predictions
output$dlTagSave <-
downloadHandler(
function()
paste0(Sys.Date(), "-activeTigger-", ok.labelcol(),".", input$dlTagFormat),
function(con) {
if (is.null(ok.data())) return(NULL)
dadat <- ok.data()[, values$conf$idcol, drop = FALSE]
if ("tags" %in% input$dlTagSelect)
dadat[[ok.labelcol()]] <- values$label
if ("comments" %in% input$dlTagSelect) if (ok.commentCol() != "(None)")
dadat[, ok.commentCol()] <- values$comment
if ("predictions" %in% input$dlTagSelect) if (ok.train.possible()) {
dadat$tigger.predict <- ok.predlab1()
dadat$tigger.predprob <- apply(ok.pred(), 1, max)
tmp_pred <- ok.pred()
colnames(tmp_pred) <- paste0("tiggerpred_", colnames(tmp_pred))
dadat <- cbind(dadat, tmp_pred)
}
othercols <- input$dlTagSelect
othercols <- othercols[! othercols %in% c("tags", "comments", "predictions")]
if (length(othercols))
dadat <- cbind(dadat, ok.data()[, othercols, drop = FALSE])
if (input$dlTagFormat == "csv") {
write.csv(dadat, row.names = F, con)
} else
arrow::write_feather(dadat, con)
})
## Export embeddings (ft, sb, umap, t-sne)
output$dlEmbedSave <- downloadHandler(
function()
paste0(Sys.Date(), "-activeTigger-emb-",
paste(input$dlEmbedSelect, collapse = "-"),
".", input$dlEmbedFormat),
function (file) {
if (is.null(input$dlEmbedSelect)) return(NULL)
export <- ok.data()[, values$conf$idcol, drop = FALSE]
if ("ft" %in% input$dlEmbedSelect)
export <- data.frame(export, ok.data()[, values$cols_ft])
if ("sb" %in% input$dlEmbedSelect)
export <- data.frame(export, ok.data()[, values$cols_sb])
if (any(c("t-sne", "umap") %in% input$dlEmbedSelect)) {
tmpvisu <- ok.visu()
tmpalgo <- input$dlEmbedSelect[input$dlEmbedSelect %in% c("t-sne", "umap")]
colnames(tmpvisu) <- paste0(gsub("-", "", tmpalgo), 1:2)
export <- data.frame(export, tmpvisu)
}
if (input$dlEmbedFormat == "csv") {
write.csv(export, file, row.names = FALSE)
} else if (input$dlEmbedFormat == "feather") {
arrow::write_feather(export, file)
}
})
## Export BERT predictions
output$dlBPMsg <- reactive(values$dlBPMsg)
# output$dlBertPredCsv <-
# downloadHandler(paste0(Sys.Date(), "-bertpred-", ok.labelcol(),".csv"),
# content= function(con) {
# if (is.null(ok.data())) return(NULL)
# dadat <- ok.data()[, c(values$conf$idcol, ok.labelcol())]
# dadat[[ok.labelcol()]] <- values$label
# if (ok.commentCol() != "(None)") {
# dadat[, ok.commentCol()] <- values$comment
# }
# if (ok.train.possible()) {
# dadat$tigger.predict <- ok.predlab1()
# dadat$tigger.predprob <- apply(ok.pred(), 1, max)
# }
# dadat <- cbind(dadat, ok.pred())
# write.csv(dadat, row.names = F, con)
# })
############
## Action to infer on ALL data from saved BERT (async)
bpall_res <- reactiveVal()
observeEvent(input$dlBPInfer, {
if (bpall_nclicks() > 0 | bertpred_nclicks() > 0) {
values$dlBPMsg <- "Inference already running"
bpall_res(NULL)
return(NULL)
}
bpall_nclicks(1)
bpall_res_async <- reactiveVal()
dabert <- input$dlBPSelect
saved_model <- paste0("tiggerbert_saved_", dabert)
out_path <- paste0(
values$conf$datadir, values$conf$dataname, "_bertpredall_",
dabert, ".feather")
tmp_dat <- paste0("tiggerbert_inferall_", session$token, ".feather")
tmp_log <- paste0("tiggerbert_inferall_", session$token, ".log")
tmp_done <- paste0("tiggerbert_inferall_", session$token, "_done")
if (file.exists(tmp_done))
system(paste0("rm ", tmp_done))
if (file.exists(out_path)) {
values$dlBPMsg <- "Inference already complete, ready for download"
bpall_nclicks(0)
bpall_res(out_path)
return(NULL)
# old_infer <- arrow::read_feather(out_path)
}
fulldat <- arrow::read_feather(paste0(
values$conf$datadir, values$conf$dataname, ".feather"))
values$dlBPMsg <- paste0(
"Preparing inference on ", nrow(fulldat), " texts")
if (length(values$conf$textcols) > 1) {
fulldat$text <- do.call(paste, c(
fulldat[, values$conf$textcols, drop = FALSE], sep = "\n\n"))
} else {
fulldat$text <- fulldat[, values$conf$textcols]
}
arrow::write_feather(fulldat[, c(values$conf$idcol, "text")], tmp_dat)
infer_cmd <- paste0(
values$conf$python, " gobert_infer.py -m ", saved_model,
" -o ", out_path,
" -d ", tmp_dat,
" -i ", values$conf$idcol,
" -l ", tmp_log,
" -G ", as.numeric(values$conf$use_gpu),
" -b ", 128,
"; touch ", tmp_done)
bpall_async <- future({
system(infer_cmd)
system(paste0("rm ", tmp_dat))
}) %...>% bpall_res_async
bpall_async <- catch(bpall_async, function(error) {
values$dlBPMsg <- paste0("BERT inference error: ", error)
system(paste0("rm ", tmp_dat))
system(paste0("rm ", tmp_done))
})
bpall_async <- finally(bpall_async, function() {
bpall_nclicks(0)
values$dlBPMsg <-
"Inference complete, ready for download"
bpall_res(out_path)
})
NULL
})
## Automatic check of BERT all prediction progress
observe({
if (bpall_nclicks() == 0) return(NULL)
invalidateLater(1e3)
logfile <- paste0("tiggerbert_inferall_", isolate(session$token), ".log")
if (file.exists(logfile))
values$dlBPMsg <- readLines(logfile)[1]
})
## Show BERTpred download button when data is ready
output$dlBPDlButton <- renderUI({
if (is.null(bpall_res())) return(NULL)
downloadButton("dlBPDl", NULL)
})
output$dlBPDl <- downloadHandler(
function()
paste0(Sys.Date(), "-tiggerbert-", values$conf$dataname,
input$dlBPSelect, ".", input$dlBPFormat),
function(file) {
if (is.null(bpall_res())) return(NULL)
tmp_bert <- gsub("^.*_bertpredall_(.*)[.]feather$", "\\1", bpall_res())
if (tmp_bert != input$dlBPSelect) {
values$dlBPMsg <- "BERT model changed - predict again or select current model"
return(NULL)
}
tmp <- arrow::read_feather(bpall_res())
if (input$dlBPFormat == "csv") {
write.csv(tmp, file, row.names = FALSE)
} else if (input$dlBPFormat == "feather") {
arrow::write_feather(tmp, file)
}
}
)
##############################################################################
## Delete files at end of session
onStop(function() {
if(file.exists(bert_status_file)) unlink(bert_status_file)
if(length(dir(pattern = paste(session$token))))
system(paste0("rm -R *", session$token, "*"))
})
})