datalab/docker-images-datalab/myactivetigger/activetigger/ui.R

632 lines
23 KiB
R
Raw Normal View History

2024-03-06 15:54:50 +01:00
## 21/04/2020 : shiny pour active learning étiquetage de textes
shinyUI(fluidPage(
title = "Active Tigger",
############################################################################
## Top panel: Title, strategy, model options
############################################################################
fluidRow(
column(
4,
fluidRow(
column(4, br(),
HTML('<img src="active_tigger.png" width="100%">')),
column(
8, br(),
p(strong("Project / Scheme")),
fluidRow(
column(2, HTML(paste0(
'<div title="New project">',
actionButton("createProject", "+"),
'</div>'))),
column(10, selectInput("selectProject", NULL, NULL, NULL))
),
fluidRow(
column(2, HTML(paste0(
'<div title="New scheme">',
actionButton("createScheme", "+"),
'</div>'))),
column(10, selectInput("selectScheme", NULL, NULL, NULL))
)
))
),
column(
2, br(),
HTML(paste0(
'<div title="Querying strategy">',
p(strong("Strategy")),
selectInput("strategy", NULL,
choices= c("Active" = "entropy",
"MaxProb" = "maxprob",
"Random" = "random",
"Deterministic" = "sequential"),
selected = "sequential"),
conditionalPanel("input.strategy == 'maxprob'",
selectInput("maxprobWhich", NULL, NULL)),
'</div>'))
),
column(
2, br(),
HTML(paste0(
'<div title="Query from which?">',
p(strong("On")),
selectInput("sampleChoice", NULL,
choices = c("Untagged" = "untagged",
"Tagged" = "tagged",
"All" = "all")),
conditionalPanel("input.sampleChoice == 'tagged'",
selectInput("taggedWhich", NULL, NULL)),
'</div>'))
),
column(4, p(br()),
verbatimTextOutput("trainDiagno"),
fluidRow(
column(8, HTML(paste0(
'<div title="Train prediction model 🤖">',
actionButton("modelTrain", "🤖 Train", width = "100%"),
'</div>'))),
column(4, HTML(paste0(
'<div title="🤖 Prediction model options">',
checkboxInput("showTrainOptions", "🔧"),
'</div>')))
))
),
## General training options
conditionalPanel(
"input.showTrainOptions",
hr(),
fluidRow(
column(
4,
conditionalPanel(
"input.showTrainOptions & input.use_regressors.includes('regex')",
uiOutput("panelExtraRegex")
)
),
column(
4,
p(strong("Predictors")),
selectizeInput("use_regressors", NULL, "",
multiple = TRUE, width = "100%"),
conditionalPanel(
"input.showTrainOptions & input.use_regressors.includes('extra')",
selectizeInput("use_ootregnum", "Extra predictors: continuous", "",
multiple = TRUE, width = "100%"),
selectizeInput("use_ootregcat", "Extra predictors: categorical", "",
multiple = TRUE, width = "100%")
),
conditionalPanel(
"input.showTrainOptions & input.use_regressors.includes('dfm')",
hr(),
fluidRow(
column(
6, numericInput("dfmMinTermfreq", "DFM : Min Termfreq",
min= 1, max= 1e3, value= 5, step= 1)),
column(
6, numericInput("dfmMinDocfreq", "DFM : Min Docfreq",
min= 1, max= 1e3, value= 5, step= 1))),
fluidRow(
column(
4, checkboxInput("dfmTfIdf", "Tf-Idf", TRUE)),
column(
4, selectInput("dfmTfScheme", label = NULL,
choices= c("logcount", "count", "prop", "propmax",
"boolean", "augmented", "logave"),
selected= "logcount")),
column(
4, selectInput("dfmDfScheme", label = NULL,
choices= c("inverse", "count",
"inversemax", "inverseprob", "unary"),
selected= "inverse"))
),
numericInput("dfmNgrams", "N-grams", value= 1,
min= 1, max= 10, step = 1)
)
),
column(
4,
HTML(paste0(
'<div title="Auto train after # tags (0=never)">',
fluidRow(
column(6, strong("Auto train every")),
column(6, numericInput("trainCountdown", NULL, 0, 0, 1e6, 1))),
'</div>'
)),
HTML(paste0(
'<div title="🤖 model (recommended: Liblinear)">',
fluidRow(
column(6, strong("Model")),
column(6, selectInput("predModel", NULL, selected = "linear",
choices = c("Naive Bayes" = "naive bayes",
"KNN" = "knn",
"Liblinear" = "linear",
"LASSO" = "lasso",
"Random Forest" = "random forest")))
),
'</div>'
)),
## Model-specific training options
### Random forest options
conditionalPanel(
"input.showTrainOptions & input.predModel == 'random forest'",
fluidRow(
numericInput("rfNumTrees", label = "Num. trees",
min = 1, max = 2e3, value = 500, step = 1),
numericInput("rfMtry", label = "mtry",
min = 0, max = 1e5, value = 0, step = 1),
numericInput("rfSampleFrac", label = "Sample fraction",
min = 0, max = 1, value = 1, step = .01)
)
),
### Naive Bayes options
conditionalPanel(
"input.showTrainOptions & input.predModel == 'naive bayes'",
flowLayout(
numericInput("naiveSmooth", label = "Smooth",
min = 0, max = 2e3,
value = 1, step = 1e-3),
selectInput("naivePrior", "Prior",
c("uniform", "docfreq", "termfreq")),
selectInput("naiveDistri", "Distribution",
c("multinomial", "Bernoulli"))
)
),
### Lasso options
conditionalPanel(
"input.showTrainOptions & input.predModel == 'lasso'",
strong("Lasso penalty"),
fluidRow(
column(
6, numericInput("glmLambda", label = NULL, min = 0, max = 2e3,
value = 0, step = 1e-6)),
column(
6, actionButton("glmCV", label= "Find best (CV)")))
),
### Linear options
conditionalPanel(
"input.showTrainOptions & input.predModel == 'linear'",
strong("Liblinear Cost"),
fluidRow(
column(
6, numericInput("liblinCost", label= NULL, min= 0, max= 2e10,
value= 32, step= 1)),
column(
6, actionButton("liblinCV", label= "Find best (CV)")))
),
### KNN options
conditionalPanel(
"input.showTrainOptions & input.predModel == 'knn'",
flowLayout(
strong("N. Neighbours"),
numericInput("knnK", label = NULL, min = 1, max = 1e2,
value = 3, step = 1)
)
)
)
),
hr()
),
############################################################################
## Main panel set
############################################################################
tabsetPanel(
id = "mainPanelset",
selected = "Tagging",
########################################################################
## Project panel
########################################################################
tabPanel(
"Project",
br(),
tabsetPanel(
id = "tabsetProject",
selected = "Sample",
tabPanel(
"Settings",
br(),
actionButton("saveSystem", "Save changes"),
h4("Files"),
fluidRow(
column(2, p("Data directory")),
column(4, uiOutput("sys_datadir")),
column(6, p("Place (on the server) where the data and project are stored"))
),
fluidRow(
column(2, p("Data filename")),
column(4, uiOutput("sys_datafile")),
column(6, p("Main file, containing id and text columns"))
),
h4("Variables"),
fluidRow(
column(2, p("ID")),
column(4, uiOutput("sys_var_id")),
column(6, p("Name of the id variable, unique identifier of each text"))
),
fluidRow(
column(2, p("Text")),
column(4, uiOutput("sys_var_text")),
column(6, p("Name of the text variables: if more than one, texts are concatenated in the specified order"))
),
fluidRow(
column(2, p("Tags")),
column(4, uiOutput("sys_var_tag")),
column(6, p("Names of scheme variables"))
),
fluidRow(
column(2, p("Comments")),
column(4, uiOutput("sys_var_comm_ui")),
column(6, p("Name of the comments variable"))
),
fluidRow(
column(2, p("Context")),
column(4, uiOutput("sys_var_context_ui")),
column(6, p("Names of variables not used in the models, but may be displayed during tagging"))
),
h4("System"),
fluidRow(
column(2, checkboxInput("sys_use_python", "Python backend", FALSE)),
column(4, conditionalPanel(
"input.sys_use_python",
textInput("sys_which_python", NULL, value = "python3",
placeholder = "(custom python path)"))),
column(6, conditionalPanel(
"input.sys_use_python",
p("This must be a working python3 environment, with the required modules installed (see documentation)")))
),
conditionalPanel("input.sys_use_python", list(
fluidRow(
column(2, checkboxInput("sys_use_spacy", "SpaCy tokenization", FALSE)),
column(4, conditionalPanel("input.sys_use_spacy", textInput(
"sys_use_spacy_model", NULL, NULL, placeholder = "(spacy model name)"))),
column(6, p("Name of the spacy tokenizer model, used in DTM and word embeddings"))
),
conditionalPanel("input.sys_use_spacy", fluidRow(
column(2),
column(9, uiOutput("sys_spacyDlUI")))
),
fluidRow(
column(2, checkboxInput("sys_use_ft", "FastText word embeddings", FALSE)),
column(4, conditionalPanel("input.sys_use_ft", textInput(
"sys_use_ft_model", NULL, NULL, placeholder = "(fasttext model path)"))),
column(6, p("Path to the local fasttext model binary"))
),
conditionalPanel("input.sys_use_ft", fluidRow(
column(2),
column(9, uiOutput("sys_ftDlUI")))
),
fluidRow(
column(2, checkboxInput("sys_use_sb", "SBERT sentence embeddings", FALSE)),
column(4, conditionalPanel("input.sys_use_sb", textInput(
"sys_use_sb_model", NULL, NULL,
placeholder = "(custom sentence_transformers model)"))),
column(6, p("(GPU recommended) Name or path of the sentence-transformers model"))
),
conditionalPanel("input.sys_use_python", list(
checkboxInput("sys_use_gpu", "GPU support (CUDA, for SBERT and BERT)", FALSE),
br(),
wellPanel(
h4("Model picker"),
fluidRow(
column(2, p("Language")),
column(4, uiOutput("sys_ex_lang_ui")),
column(6, p("Used to preset tokenization and embedding models"))
),
fluidRow(
column(2),
column(4, strong("Recommended model")),
column(6, strong("Download instructions"))
),
fluidRow(
column(2, p("SpaCy tokenization")),
column(4, uiOutput("sys_ex_spacy")),
column(6, uiOutput("sys_ex_spacy_dl"))
),
fluidRow(
column(2, p("FastText word embeddings")),
column(4, uiOutput("sys_ex_ft")),
column(6, uiOutput("sys_ex_ft_dl"))
),
fluidRow(
column(2, p("SBERT sentence embeddings")),
column(4, uiOutput("sys_ex_sb")),
column(6, p("(Auto download by python module)"))
)
)
))
))
),
tabPanel(
"Sample",
br(),
fluidRow(
column(
4,
wellPanel(
fluidRow(
column(8, h4("Sample")),
column(4, actionButton("dataImport", "Import", width = "100%"))),
fluidRow(
column(6, numericInput("dataNrows", "N. rows", 500, 10, 1e4, 1)),
column(6, numericInput("dataSkipRows", "Skip rows", 0, 0, step = 1))
)
)
),
column(8, uiOutput("dataMessage"), uiOutput("panelData"))
)
),
tabPanel(
"Scheme",
br(),
fluidRow(
# column(4, uiOutput("panelScheme")),
column(
4,
wellPanel(
h4("Current scheme"),
fluidRow(
column(2, HTML(paste0(
"<div title='Delete scheme'>",
actionButton("schemeDelete", "🗑", width = "100%"),
"</div>"))),
column(6, uiOutput("printScheme")),
column(4, HTML(paste0(
"<div title='Save scheme description'>",
actionButton("schemeDescrSave", "Save", width = "100%"),
"</div>")))
),
br(),
textAreaInput("schemeDescr", NULL, width = "100%", rows = 10,
placeholder = "Write scheme description here"),
hr()
)
),
column(8, uiOutput("panelRetag"))
)
)
)
),
########################################################################
## Text / visualization panel
########################################################################
tabPanel(
"Tagging",
fluidRow(
column(
3,
br(),
fluidRow(
column(8, textInput("regexFilter", label = NULL,
placeholder = "(Regex filter)")),
column(4, checkboxInput("regexCaseSens", "Case"))),
wellPanel(
## Tagging buttons
fluidRow(
column(8, textInput("newLab", label = NULL,
placeholder = "(New label)")),
column(4, actionButton("currentAction", "Create"))
),
# fluidRow(uiOutput("oracleButtons")),
uiOutput("oracleButtons"),
br(),
textInput("currentComment", NULL, "", width = "100%",
placeholder = "(Comment)"),
br(),
uiOutput("makeOracleConfirm")
),
# fluidRow(
# column(6, checkboxInput("showContext", "Context")),
# column(6, actionButton("oops", strong("Oops")))
# ),
checkboxInput("showContext", "Context"),
conditionalPanel("input.showContext", htmlOutput("currentContext"))
),
column(
9,
fluidRow(
column(2, checkboxInput("panelText", "Text", TRUE)),
column(2, checkboxInput("panelVisu", "Visualization", FALSE),
offset = 8)
),
uiOutput("textVisuCols") # Handled in server.R for adaptive columns
)
)
),
########################################################################
## History panel
########################################################################
tabPanel(
"History",
br(),
actionButton("histSave", "Save changes"),
br(),
br(),
DT::dataTableOutput("histDTable")
),
########################################################################
## Stats panel
########################################################################
tabPanel(
"Stats",
br(),
fluidRow(
column(
3,
h3("Counts"),
tableOutput("statsTagTable")
),
column(
9,
h3("10-CV diagnostics"),
actionButton("statsCVgo", "Compute 10-CV"),
br(),
verbatimTextOutput("statsCVoutput"),
DT::dataTableOutput("statsCVtable")
)
),
hr(),
h3("Gold Standard")
),
########################################################################
## BERT panel
########################################################################
tabPanel(
"BERT",
fluidRow(
column(
3,
br(),
h3("Train new BERT"),
fluidRow(
column(6, actionButton("bertTrain", "Train BERT", width = "100%")),
column(6, checkboxInput("bertOptions", "Options"))),
fluidRow(
column(6, textInput(
"bertSaveName", NULL, placeholder = "(save name)")),
column(6, actionButton("bertSave", "Save", width = "100%"))),
actionLink("bertLast", "Last trained model"),
h3("Saved models"),
uiOutput("bertSaved")
),
column(
9,
br(),
conditionalPanel(
"input.bertOptions",
fluidRow(
column(6, selectInput(
"bertModel", "Model",
c("(Fr) CamemBERT-base" = "camembert/camembert-base",
"(Fr) CamemBERT-large" = "camembert/camembert-large",
"(Fr) FlauBERT-small" = "flaubert/flaubert_small_cased",
"(Fr) FlauBERT-base" = "flaubert/flaubert_base_cased",
"(Fr) FlauBERT-large" = "flaubert/flaubert_large_cased",
"(En) DistilBERT-base" = "distilbert-base-cased",
"(En) RoBERTa-base" = "roberta-base",
"(En) DeBERTa-base" = "microsoft/deberta-base",
"(Multi) DistilmBERT-base" = "distilbert-base-multilingual-cased",
"(Multi) MiniLM" = "microsoft/Multilingual-MiniLM-L12-H384",
"(Multi) XLM-RoBERTa-base" = "xlm-roberta-base"))),
column(6)
),
fluidRow(
column(3, numericInput("bertEpochs", "Epochs", 3, 1, 20, 1)),
column(3, numericInput("bertLrate", "Learning rate", 2e-5, 1e-6, 1, 1e-6)),
column(3, numericInput("bertWdecay", "Weight decay", 0.01, 0, 10, 1e-6)),
column(3)
),
fluidRow(
column(3, numericInput("bertBatchsize", "Batch size", 4, 1, 32, 1)),
column(3, numericInput("bertGradacc", "Gradient accum.", 4, 1, 32, 1)),
column(3, br(), checkboxInput("bertAdapt", "Adapt token length to batch", TRUE)),
column(3)
),
fluidRow(
column(3, numericInput("bertValidFrac", "Validation fraction", .2, 0, .9)),
column(3, numericInput("bertValidSeed", "Validation seed", 1234, 1, 9e8)),
column(3, numericInput("bertNeval", "N. validation evals", 10, 1, 100, 1)),
column(3, br(), checkboxInput("bertBest", "Keep best", TRUE))
),
fluidRow(
column(3, numericInput("bertMinOccur", "Min. class occurences", 1, 1, 1e4, 1)),
column(3, br(), checkboxInput("bertBalance", "Balance classes", FALSE)),
column(3),
column(3)
)
),
fluidRow(
column(
6,
# flowLayout(
# actionButton(
# "bertGoPred", "Infer on current data", width = "100%"),
# actionButton(
# "bertDelete", "Delete saved model", width = "100%")),
verbatimTextOutput("bertMsg")),
column(6, plotOutput("bertValPlot", height = 200))),
verbatimTextOutput("bertMsgHyperpar"),
DT::dataTableOutput("bertValstats")
)
)
),
########################################################################
## Export panel
########################################################################
tabPanel(
"Export",
h4("Export tagged data"),
p("Download the tags and predicted probabilities from the complete model, on the current data sample."),
# downloadButton("downloadCsv", "Save csv"),
flowLayout(
selectInput(
"dlTagSelect", NULL, c("tags", "comments", "predictions"),
c("tags", "comments", "predictions"), multiple = TRUE),
selectInput("dlTagFormat", NULL, c("csv", "feather"), "csv"),
downloadButton("dlTagSave", NULL, title = "Save tags")
),
hr(),
h4("Export embeddings"),
p("Download the embeddings (incl. from visualization if present), on the current data sample."),
flowLayout(
selectInput(
"dlEmbedSelect", NULL, c("FastText" = "ft", "SBERT" = "sb"),
selected = "sb", multiple = TRUE),
selectInput("dlEmbedFormat", NULL, c("csv", "feather"), "feather"),
downloadButton("dlEmbedSave", NULL, title = "Save embeddings")
),
hr(),
h4("Export BERT predictions"),
p("Download the predicted probabilities from the chosen BERT model, on the complete dataset."),
flowLayout(
selectInput("dlBPSelect", NULL, NULL, NULL),
selectInput("dlBPFormat", NULL, c("csv", "feather"), "feather"),
actionButton("dlBPInfer", "Predict"),
verbatimTextOutput("dlBPMsg"),
uiOutput("dlBPDlButton")
),
hr(),
h4("Export BERT models")
)
),
br(), br(), br(), br(), br(), br(), br(), br(), br(), br(), br(), br(), br(),
br(), br(), br(), br(), br(), br()
))