From 9c5c71336d3bf4901fa68bca8776b90046c2e779 Mon Sep 17 00:00:00 2001
From: aguyot-ensae
Date: Thu, 29 Feb 2024 09:49:04 +0100
Subject: [PATCH] copy files from dsit/datalab
---
docker-images-datalab/activetigger/.drone.yml | 48 +
docker-images-datalab/activetigger/Dockerfile | 111 +
.../activetigger/embed_fasttext.py | 41 +
.../activetigger/activetigger/embed_sbert.py | 42 +
.../activetigger/activetigger/gobert.py | 174 +
.../activetigger/activetigger/gobert_infer.py | 94 +
.../activetigger/activetigger/modelnames.csv | 159 +
.../activetigger/activetigger/server.R | 4143 +++++++++++++++++
.../activetigger/tokenize_spacy.py | 40 +
.../activetigger/activetigger/ui.R | 631 +++
.../activetigger/www/active_tigger.jpeg | Bin 0 -> 106990 bytes
.../activetigger/www/active_tigger.png | Bin 0 -> 250946 bytes
.../activetigger/activetigger/www/favicon.ico | Bin 0 -> 3758 bytes
.../activetigger/requirements.r | 1 +
.../activetigger/requirementspython.txt | 6 +
docker-images-datalab/index.md | 3 +
docker-images-datalab/requirementspython.txt | 15 +
.../charts/activetigger/.helmignore | 23 +
.../charts/activetigger/Chart.yaml | 24 +
.../charts/activetigger/index.md | 63 +
.../charts/activetigger/templates/NOTES.txt | 22 +
.../activetigger/templates/_helpers.tpl | 62 +
.../activetigger/templates/deployment.yaml | 68 +
.../charts/activetigger/templates/hpa.yaml | 32 +
.../activetigger/templates/ingress.yaml | 61 +
.../activetigger/templates/service.yaml | 15 +
.../templates/serviceaccount.yaml | 13 +
.../templates/tests/test-connection.yaml | 15 +
.../charts/activetigger/values.yaml | 107 +
.../charts/overleaf/.helmignore | 23 +
.../charts/overleaf/Chart.yaml | 24 +
.../helm-charts-test/charts/overleaf/index.md | 20 +
.../charts/overleaf/templates/NOTES.txt | 22 +
.../charts/overleaf/templates/_helpers.tpl | 62 +
.../charts/overleaf/templates/deployment.yaml | 68 +
.../charts/overleaf/templates/hpa.yaml | 32 +
.../charts/overleaf/templates/ingress.yaml | 61 +
.../charts/overleaf/templates/service.yaml | 15 +
.../overleaf/templates/serviceaccount.yaml | 13 +
.../templates/tests/test-connection.yaml | 15 +
.../charts/overleaf/values.yaml | 107 +
.../helm-charts-test/charts/test2/.helmignore | 23 +
.../helm-charts-test/charts/test2/Chart.yaml | 24 +
.../charts/test2/templates/NOTES.txt | 22 +
.../charts/test2/templates/_helpers.tpl | 62 +
.../charts/test2/templates/deployment.yaml | 68 +
.../charts/test2/templates/hpa.yaml | 32 +
.../charts/test2/templates/ingress.yaml | 61 +
.../charts/test2/templates/service.yaml | 15 +
.../test2/templates/serviceaccount.yaml | 13 +
.../templates/tests/test-connection.yaml | 15 +
.../helm-charts-test/charts/test2/values.yaml | 107 +
.../helm-charts-test/charts/test3/.helmignore | 23 +
.../helm-charts-test/charts/test3/Chart.yaml | 24 +
.../charts/test3/templates/NOTES.txt | 22 +
.../charts/test3/templates/_helpers.tpl | 62 +
.../charts/test3/templates/deployment.yaml | 68 +
.../charts/test3/templates/hpa.yaml | 32 +
.../charts/test3/templates/ingress.yaml | 61 +
.../charts/test3/templates/service.yaml | 15 +
.../test3/templates/serviceaccount.yaml | 13 +
.../templates/tests/test-connection.yaml | 15 +
.../helm-charts-test/charts/test3/values.yaml | 107 +
helm-charts-datalab/index.md | 3 +
index.md | 5 +
kk | 1 -
values.yaml | 18 +
67 files changed, 7385 insertions(+), 1 deletion(-)
create mode 100644 docker-images-datalab/activetigger/.drone.yml
create mode 100644 docker-images-datalab/activetigger/Dockerfile
create mode 100644 docker-images-datalab/activetigger/activetigger/embed_fasttext.py
create mode 100644 docker-images-datalab/activetigger/activetigger/embed_sbert.py
create mode 100644 docker-images-datalab/activetigger/activetigger/gobert.py
create mode 100644 docker-images-datalab/activetigger/activetigger/gobert_infer.py
create mode 100644 docker-images-datalab/activetigger/activetigger/modelnames.csv
create mode 100644 docker-images-datalab/activetigger/activetigger/server.R
create mode 100644 docker-images-datalab/activetigger/activetigger/tokenize_spacy.py
create mode 100644 docker-images-datalab/activetigger/activetigger/ui.R
create mode 100644 docker-images-datalab/activetigger/activetigger/www/active_tigger.jpeg
create mode 100644 docker-images-datalab/activetigger/activetigger/www/active_tigger.png
create mode 100644 docker-images-datalab/activetigger/activetigger/www/favicon.ico
create mode 100644 docker-images-datalab/activetigger/requirements.r
create mode 100644 docker-images-datalab/activetigger/requirementspython.txt
create mode 100644 docker-images-datalab/index.md
create mode 100644 docker-images-datalab/requirementspython.txt
create mode 100644 helm-charts-datalab/helm-charts-test/charts/activetigger/.helmignore
create mode 100644 helm-charts-datalab/helm-charts-test/charts/activetigger/Chart.yaml
create mode 100644 helm-charts-datalab/helm-charts-test/charts/activetigger/index.md
create mode 100644 helm-charts-datalab/helm-charts-test/charts/activetigger/templates/NOTES.txt
create mode 100644 helm-charts-datalab/helm-charts-test/charts/activetigger/templates/_helpers.tpl
create mode 100644 helm-charts-datalab/helm-charts-test/charts/activetigger/templates/deployment.yaml
create mode 100644 helm-charts-datalab/helm-charts-test/charts/activetigger/templates/hpa.yaml
create mode 100644 helm-charts-datalab/helm-charts-test/charts/activetigger/templates/ingress.yaml
create mode 100644 helm-charts-datalab/helm-charts-test/charts/activetigger/templates/service.yaml
create mode 100644 helm-charts-datalab/helm-charts-test/charts/activetigger/templates/serviceaccount.yaml
create mode 100644 helm-charts-datalab/helm-charts-test/charts/activetigger/templates/tests/test-connection.yaml
create mode 100644 helm-charts-datalab/helm-charts-test/charts/activetigger/values.yaml
create mode 100644 helm-charts-datalab/helm-charts-test/charts/overleaf/.helmignore
create mode 100644 helm-charts-datalab/helm-charts-test/charts/overleaf/Chart.yaml
create mode 100644 helm-charts-datalab/helm-charts-test/charts/overleaf/index.md
create mode 100644 helm-charts-datalab/helm-charts-test/charts/overleaf/templates/NOTES.txt
create mode 100644 helm-charts-datalab/helm-charts-test/charts/overleaf/templates/_helpers.tpl
create mode 100644 helm-charts-datalab/helm-charts-test/charts/overleaf/templates/deployment.yaml
create mode 100644 helm-charts-datalab/helm-charts-test/charts/overleaf/templates/hpa.yaml
create mode 100644 helm-charts-datalab/helm-charts-test/charts/overleaf/templates/ingress.yaml
create mode 100644 helm-charts-datalab/helm-charts-test/charts/overleaf/templates/service.yaml
create mode 100644 helm-charts-datalab/helm-charts-test/charts/overleaf/templates/serviceaccount.yaml
create mode 100644 helm-charts-datalab/helm-charts-test/charts/overleaf/templates/tests/test-connection.yaml
create mode 100644 helm-charts-datalab/helm-charts-test/charts/overleaf/values.yaml
create mode 100644 helm-charts-datalab/helm-charts-test/charts/test2/.helmignore
create mode 100644 helm-charts-datalab/helm-charts-test/charts/test2/Chart.yaml
create mode 100644 helm-charts-datalab/helm-charts-test/charts/test2/templates/NOTES.txt
create mode 100644 helm-charts-datalab/helm-charts-test/charts/test2/templates/_helpers.tpl
create mode 100644 helm-charts-datalab/helm-charts-test/charts/test2/templates/deployment.yaml
create mode 100644 helm-charts-datalab/helm-charts-test/charts/test2/templates/hpa.yaml
create mode 100644 helm-charts-datalab/helm-charts-test/charts/test2/templates/ingress.yaml
create mode 100644 helm-charts-datalab/helm-charts-test/charts/test2/templates/service.yaml
create mode 100644 helm-charts-datalab/helm-charts-test/charts/test2/templates/serviceaccount.yaml
create mode 100644 helm-charts-datalab/helm-charts-test/charts/test2/templates/tests/test-connection.yaml
create mode 100644 helm-charts-datalab/helm-charts-test/charts/test2/values.yaml
create mode 100644 helm-charts-datalab/helm-charts-test/charts/test3/.helmignore
create mode 100644 helm-charts-datalab/helm-charts-test/charts/test3/Chart.yaml
create mode 100644 helm-charts-datalab/helm-charts-test/charts/test3/templates/NOTES.txt
create mode 100644 helm-charts-datalab/helm-charts-test/charts/test3/templates/_helpers.tpl
create mode 100644 helm-charts-datalab/helm-charts-test/charts/test3/templates/deployment.yaml
create mode 100644 helm-charts-datalab/helm-charts-test/charts/test3/templates/hpa.yaml
create mode 100644 helm-charts-datalab/helm-charts-test/charts/test3/templates/ingress.yaml
create mode 100644 helm-charts-datalab/helm-charts-test/charts/test3/templates/service.yaml
create mode 100644 helm-charts-datalab/helm-charts-test/charts/test3/templates/serviceaccount.yaml
create mode 100644 helm-charts-datalab/helm-charts-test/charts/test3/templates/tests/test-connection.yaml
create mode 100644 helm-charts-datalab/helm-charts-test/charts/test3/values.yaml
create mode 100644 helm-charts-datalab/index.md
create mode 100644 index.md
delete mode 100644 kk
create mode 100644 values.yaml
diff --git a/docker-images-datalab/activetigger/.drone.yml b/docker-images-datalab/activetigger/.drone.yml
new file mode 100644
index 0000000..81a0799
--- /dev/null
+++ b/docker-images-datalab/activetigger/.drone.yml
@@ -0,0 +1,48 @@
+kind: pipeline
+name: Build & publish main
+
+steps:
+ - name: publish-image
+ pull: always
+ image: plugins/kaniko:1.7.1-kaniko1.9.1
+ settings:
+ auto_tag: true
+ auto_tag_suffix: latest
+ registry: code.groupe-genes.fr
+ repo: code.groupe-genes.fr/datalab/docker-images-datalab/activetigger
+ username:
+ from_secret: docker_username
+ password:
+ from_secret: docker_password
+ when:
+ event:
+ exclude:
+ - pull_request
+ - name: deploy
+ image: alpine
+ environment:
+ kubernetes_server:
+ from_secret: kubernetes_server
+ kubernetes_cert:
+ from_secret: kubernetes_cert
+ kubernetes_token:
+ from_secret: kubernetes_token
+ commands:
+ - apk add --no-cache curl
+ - curl -LL -o /usr/bin/kubectl "https://dl.k8s.io/release/v1.28.2/bin/linux/amd64/kubectl"
+ - curl -LL -o helm.tar.gz "https://get.helm.sh/helm-v3.14.0-linux-amd64.tar.gz"
+ - tar xf "helm.tar.gz" && mv ./linux-amd64/helm /usr/bin/helm
+ - chmod +x /usr/bin/kubectl
+ - chmod +x /usr/bin/helm
+ - kubectl config set-cluster default --server=$kubernetes_server --insecure-skip-tls-verify=true
+ - kubectl config set-credentials user --token=$kubernetes_token
+ - kubectl config set-context default --user=user --cluster=default --namespace=activetigger
+ - kubectl config use-context default
+ - kubectl get pods
+ - helm ls -n activetigger --debug
+ - helm dependency build ./helm-chart
+ - helm upgrade activetigger ./helm-chart -f ./helm-chart/values.yaml -n activetigger
+ when:
+ event:
+ exclude:
+ - pull_request
\ No newline at end of file
diff --git a/docker-images-datalab/activetigger/Dockerfile b/docker-images-datalab/activetigger/Dockerfile
new file mode 100644
index 0000000..651fe7d
--- /dev/null
+++ b/docker-images-datalab/activetigger/Dockerfile
@@ -0,0 +1,111 @@
+FROM ubuntu:22.04
+ARG DEBIAN_FRONTEND=noninteractive
+
+COPY requirements.r /requirements.r
+COPY requirementspython.txt /requirementspython.txt
+
+
+# Installation python
+RUN apt-get update && apt-get install -y \
+ python3.10 \
+ python3.10-distutils \
+ python3.10-venv \
+ python3-pip \
+ r-base \
+ wget \
+ && apt-get clean
+
+# Installation R + shiny server
+RUN apt-get update && \
+ apt-get install -y r-base
+RUN R -e "install.packages('shiny', repos='https://cran.rstudio.com/')"
+
+# Install gdebi-core and shiny-server
+RUN apt-get update
+RUN apt-get install -y gdebi-core
+RUN wget https://download3.rstudio.org/ubuntu-18.04/x86_64/shiny-server-1.5.21.1012-amd64.deb
+RUN gdebi --non-interactive shiny-server-1.5.21.1012-amd64.deb
+
+
+## Packages package R (à installer depuis l'exécutable R employé par shiny server)
+RUN Rscript /requirements.r
+
+## Environnement python | a vérifier dans requirementspython.txt l'installation des cu118 se fais de cette manière
+
+
+
+# Install Miniconda
+ RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
+ bash Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda && \
+ rm Miniconda3-latest-Linux-x86_64.sh
+
+# Add Conda binaries to PATH
+ENV PATH="/opt/conda/bin:${PATH}"
+
+# Create a Conda environment and activate it
+RUN conda create -n tigger python==3.10 && \
+ echo "conda activate tigger" >> ~/.bashrc
+
+# Mise à jour et installation des dépendances système
+RUN rm -rf /var/lib/apt/lists/*
+
+RUN pip3 install --no-cache-dir \
+ torch torchvision torchaudio \
+ -f https://download.pytorch.org/whl/cu118/torch_stable.html
+
+RUN apt-get update && apt-get install -y curl build-essential
+RUN pip3 install --no-cache-dir six
+
+# Install Rust using rustup
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
+
+# Add Cargo's bin directory to the PATH environment variable
+ENV PATH="/root/.cargo/bin:${PATH}"
+
+RUN pip3 install --no-cache-dir --upgrade setuptools
+
+
+# Autres installations de bibliothèques Python
+RUN pip3 install argparse
+RUN pip3 install datasets
+RUN pip3 install fasttext
+RUN pip3 install numpy
+RUN pip3 install pandas
+RUN pip3 install pyarrow
+RUN pip3 install scikit-learn
+RUN pip3 install sentence-transformers
+RUN pip3 install transformers
+RUN pip3 install typing-inspect==0.8.0
+RUN pip3 install typing-extensions==4.6.1
+RUN pip3 install spacy
+
+# Mettre en place des configurations supplémentaires si nécessaire
+
+# Commande par défaut à exécuter lorsque le conteneur démarre
+CMD ["/bin/bash"]
+
+## Téléchargement des modèles spacy et fasttext
+
+### Français
+WORKDIR ~
+RUN python -m spacy download fr_core_news_sm
+
+RUN wget https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.fr.300.bin.gz \
+ && gunzip cc.fr.300.bin.gz
+
+
+# A chaque création d'instance
+
+## Clone git pour créer la nouvelle instance (remplacer "tigger-name" par le nom que prendra l'instance, ie https://analytics.huma-num.fr/Prenom.Nom/tigger-name/)
+RUN mkdir -p ~/zPublish/shiny/tigger-name
+COPY activetigger/ ~/zPublish/shiny/tigger-name
+
+## Dans l'application
+
+## Tout en haut à gauche, bouton "+" pour "create project". Puis dans les champs :
+
+## - data directory: moi j'utilise toujours ~/tagging/domaine (genre ~/tagging/radio ou ~/tagging/journaux), mais c'est à toi de voir où tu veux que les données et tags soient stockées sur ton serveur
+## - je conseille de cocher toutes les cases : python, spacy, fasttext, sbert, gpu
+## - python : "~/conda/envs/tigger/bin/python"
+## - fasttext : "~/cc.fr.300.bin" (càd qu'il faut donner le chemin du modèle sur ton serveur, pas juste le nom)
+## - spacy et SBERT : garder les valeurs par défaut pour la langue choisie
diff --git a/docker-images-datalab/activetigger/activetigger/embed_fasttext.py b/docker-images-datalab/activetigger/activetigger/embed_fasttext.py
new file mode 100644
index 0000000..5c72316
--- /dev/null
+++ b/docker-images-datalab/activetigger/activetigger/embed_fasttext.py
@@ -0,0 +1,41 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+## FastText embed sentences
+## Requires data file with columns id and text
+
+import argparse
+import fasttext
+from os.path import expanduser
+import pandas as pd
+import pyarrow as pa
+import pyarrow.feather as feather
+import re
+
+
+def main(args):
+ print("FastText: Importing data")
+ datapath = expanduser(args.data)
+ dat = feather.read_feather(datapath)
+ outfile = re.sub("[.]feather$", "_ft.feather", datapath)
+
+ print("FastText: Loading model")
+ ft = fasttext.load_model(expanduser(args.model))
+ print("FastText: Embedding sentences")
+ emb = [ft.get_sentence_vector(re.sub("\n", " ", x)) for x in dat["text"]]
+
+ print("FastText: Exporting")
+ emb = pd.DataFrame(emb)
+ emb.columns = ["ft%03d" % (x + 1) for x in range(len(emb.columns))]
+ emb = pd.concat([dat["id"], emb], axis=1)
+ feather.write_feather(emb, outfile)
+ print("FastText: Done")
+
+
+if __name__ == "__main__":
+ argParser = argparse.ArgumentParser()
+ argParser.add_argument("-m", "--model", help="Model path", default="/data/user/b/jboelaert/cc.fr.100.bin")
+ argParser.add_argument("-d", "--data", help="Path to data (feather)")
+ args = argParser.parse_args()
+ main(args)
+
diff --git a/docker-images-datalab/activetigger/activetigger/embed_sbert.py b/docker-images-datalab/activetigger/activetigger/embed_sbert.py
new file mode 100644
index 0000000..d944fd9
--- /dev/null
+++ b/docker-images-datalab/activetigger/activetigger/embed_sbert.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+## SBERT embed sentences
+## Requires data file with columns id and text
+
+import argparse
+from os.path import expanduser
+import pandas as pd
+import pyarrow as pa
+import pyarrow.feather as feather
+import re
+from sentence_transformers import SentenceTransformer
+
+
+def main(args):
+ print("SBERT: Importing data")
+ datapath = expanduser(args.data)
+ dat = feather.read_feather(datapath)
+ outfile = re.sub("[.]feather$", "_sb.feather", datapath)
+
+ print("SBERT: Loading model")
+ sbert = SentenceTransformer(expanduser(args.model))
+ sbert.max_seq_length = 512
+ print("SBERT: Embedding sentences")
+ emb = sbert.encode(dat["text"])
+
+ print("SBERT: Exporting")
+ emb = pd.DataFrame(emb)
+ emb.columns = ["sb%03d" % (x + 1) for x in range(len(emb.columns))]
+ emb = pd.concat([dat["id"], emb], axis=1)
+ feather.write_feather(emb, outfile)
+ print("SBERT: Done")
+
+
+if __name__ == "__main__":
+ argParser = argparse.ArgumentParser()
+ argParser.add_argument("-m", "--model", help="Model name or path", default="distiluse-base-multilingual-cased-v1")
+ argParser.add_argument("-d", "--data", help="Path to data (feather)")
+ args = argParser.parse_args()
+ main(args)
+
diff --git a/docker-images-datalab/activetigger/activetigger/gobert.py b/docker-images-datalab/activetigger/activetigger/gobert.py
new file mode 100644
index 0000000..32a9f04
--- /dev/null
+++ b/docker-images-datalab/activetigger/activetigger/gobert.py
@@ -0,0 +1,174 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+## BERT trainer to be called by server.R
+## Requires two data files with columns id, label and text
+
+import argparse
+import datasets
+from datasets import load_metric
+import numpy as np
+from os.path import expanduser
+import os
+import pandas as pd
+import re
+from sklearn import metrics
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+from transformers import Trainer, TrainingArguments, TrainerCallback
+
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+
+def main(args):
+ print("Importing data")
+ dattrain = pd.read_csv(expanduser(args.traindat))
+ datval = pd.read_csv(expanduser(args.valdat))
+ datval_id = datval["id"]
+ classcolname = "label"
+
+ ## Make class_names
+ class_names = [x for x in dattrain[classcolname].unique()]
+
+ ## Labels to class number
+ dattrain[classcolname] = [class_names.index(x) for x in dattrain[classcolname].to_list()]
+ datval[classcolname] = [class_names.index(x) for x in datval[classcolname].to_list()]
+
+ ## Transform to datasets
+ dattrain = datasets.Dataset.from_pandas(dattrain[['text', 'label']])
+ datval = datasets.Dataset.from_pandas(datval[['text', 'label']])
+
+ # Model choice
+ modelname = expanduser(args.model)
+
+ ## Tokenizer
+ print("Tokenizing")
+
+ tokenizer = AutoTokenizer.from_pretrained(modelname)
+
+ # toktrain = dattrain.map(lambda e: tokenizer(e['text'], truncation=True, padding="max_length"), batched=True)
+ # toktest = datval.map(lambda e: tokenizer(e['text'], truncation=True, padding="max_length"), batched=True)
+ if args.adapt:
+ toktrain = dattrain.map(lambda e: tokenizer(e['text'], truncation=True, padding=True, max_length=512), batched=True)
+ toktest = datval.map(lambda e: tokenizer(e['text'], truncation=True, padding=True, max_length=512), batched=True)
+ else:
+ toktrain = dattrain.map(lambda e: tokenizer(e['text'], truncation=True, padding="max_length", max_length=512), batched=True)
+ toktest = datval.map(lambda e: tokenizer(e['text'], truncation=True, padding="max_length", max_length=512), batched=True)
+
+ del(dattrain)
+
+ ## Model
+ print("Loading model")
+ model = AutoModelForSequenceClassification.from_pretrained(modelname, num_labels = len(class_names))
+ if (args.gpu):
+ model.cuda()
+
+ ## Train using Trainer interface
+ print("Training...")
+ BATCH_SIZE = args.batchsize
+ GRAD_ACC = args.gradacc
+ epochs = args.epochs
+
+ total_steps = (epochs * len(toktrain)) // (BATCH_SIZE * GRAD_ACC)
+ warmup_steps = (total_steps) // 10
+ eval_steps = total_steps // args.eval
+
+ training_args = TrainingArguments(
+ output_dir=args.session + "_train",
+ learning_rate=args.lrate,
+ weight_decay=args.wdecay,
+ num_train_epochs=epochs,
+ gradient_accumulation_steps=GRAD_ACC,
+ per_device_train_batch_size=BATCH_SIZE,
+ # per_device_eval_batch_size=BATCH_SIZE,
+ per_device_eval_batch_size=32,
+ warmup_steps=warmup_steps,
+
+ eval_steps=eval_steps,
+ evaluation_strategy="steps",
+ save_strategy="steps",
+ save_steps=eval_steps,
+ logging_steps=eval_steps,
+ do_eval=True,
+ greater_is_better=False,
+ load_best_model_at_end=bool(args.best),
+ metric_for_best_model="eval_loss"
+ )
+
+ trainer = Trainer(model=model, args=training_args,
+ train_dataset=toktrain, eval_dataset=toktest)
+
+ the_session = args.session
+ class HaltCallback(TrainerCallback):
+ "A callback that checks for _stop file to interrupt training"
+
+ def on_step_begin(self, args, state, control, **kwargs):
+ if os.path.exists(the_session + "_stop"):
+ print("\nHalted by user.\n")
+ control.should_training_stop = True
+ return(control)
+ else:
+ print("\nNot halted by user.\n")
+
+ trainer.add_callback(HaltCallback)
+
+ trainer.train()
+
+ ## Add class names to model
+ label_to_id = {v: i for i, v in enumerate(class_names)}
+ model.config.label2id = label_to_id
+ model.config.id2label = {id: label for label, id in model.config.label2id.items()}
+
+ ## Save model
+ model.save_pretrained(args.session)
+
+
+ ## Prediction functions
+
+
+ def get_predprobs(text):
+ # inputs = tokenizer(text, padding="max_length", truncation=True, return_tensors="pt")
+ inputs = tokenizer(text, padding=True, truncation=True, max_length=512, return_tensors="pt")
+ if (args.gpu):
+ inputs = inputs.to("cuda")
+ outputs = model(**inputs)
+ res = outputs[0]
+ if (args.gpu):
+ res = res.cpu()
+ res = res.softmax(1).detach().numpy()
+ return res
+
+
+ def get_prediction(text):
+ return class_names[get_predprobs(text).argmax()]
+
+ ## Metrics on validation set
+ print("Computing predictions")
+ testpred = [get_prediction(txt) for txt in datval["text"]]
+ testtruth = [class_names[x] for x in datval["label"]]
+
+ exportpred = pd.DataFrame(datval_id)
+ exportpred.columns = ["id"]
+ exportpred["bertpred"] = testpred
+ exportpred.to_csv(args.session + "_predval.csv", index=False)
+
+
+if __name__ == "__main__":
+ argParser = argparse.ArgumentParser()
+ argParser.add_argument("-m", "--model", help="Model name or path", default="microsoft/Multilingual-MiniLM-L12-H384")
+ argParser.add_argument("-t", "--traindat", help="Path to training data")
+ argParser.add_argument("-v", "--valdat", help="Path to validation data")
+ argParser.add_argument("-b", "--batchsize", help="Batch size for training", type=int, default=4)
+ argParser.add_argument("-g", "--gradacc", help="Gradient accumulation for training", type=int, default=1)
+ argParser.add_argument("-e", "--epochs", help="Number of training epochs", type=float, default=3)
+ argParser.add_argument("-l", "--lrate", help="Learning rate", type=float, default=5e-05)
+ argParser.add_argument("-w", "--wdecay", help="Weight decay", type=float, default=.01)
+ argParser.add_argument("-B", "--best", help="Load best model instead of last", type=int, choices=[0,1], default=1)
+ argParser.add_argument("-E", "--eval", help="Number of intermediary evaluations", type=int, default=10)
+ argParser.add_argument("-s", "--session", help="Session name (used to save results)")
+ argParser.add_argument("-G", "--gpu", help="Use GPU (CUDA)", type=int, choices=[0,1], default=0)
+ argParser.add_argument("-A", "--adapt", help="Adapt token length to batch", type=int, choices=[0,1], default=1)
+
+
+ args = argParser.parse_args()
+
+ main(args)
+
diff --git a/docker-images-datalab/activetigger/activetigger/gobert_infer.py b/docker-images-datalab/activetigger/activetigger/gobert_infer.py
new file mode 100644
index 0000000..c2c607a
--- /dev/null
+++ b/docker-images-datalab/activetigger/activetigger/gobert_infer.py
@@ -0,0 +1,94 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+## BERT inference to be called by server.R
+
+import argparse
+import datasets
+import json
+import numpy as np
+from os import path, remove
+import pandas as pd
+import pyarrow.feather as feather
+import re
+from torch import no_grad
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+
+
+def chunker(seq, batch_size):
+ return (seq[pos:pos + batch_size] for pos in range(0, len(seq), batch_size))
+
+
+def main(args):
+ print("Importing data")
+ with open(path.expanduser(args.logfile), "w") as progfile:
+ progfile.write("Importing data")
+
+ dat = feather.read_feather(path.expanduser(args.dat))
+
+ with open(path.expanduser(args.logfile), "w") as progfile:
+ progfile.write("Tokenizing")
+
+ ## Tokenizer
+ print("Tokenizing")
+ with open(path.join(path.expanduser(args.model), "config.json"), "r") as jsonfile:
+ modeltype = json.load(jsonfile)["_name_or_path"]
+
+ tokenizer = AutoTokenizer.from_pretrained(modeltype)
+
+ ## Model
+ print("Loading model")
+ model = AutoModelForSequenceClassification.from_pretrained(path.expanduser(args.model))
+ if (args.gpu):
+ model.cuda()
+
+ ## Prediction functions
+
+
+ def get_predprobs(text):
+ inputs = tokenizer(text, padding=True, truncation=True, max_length=512, return_tensors="pt")
+ if (args.gpu):
+ inputs = inputs.to("cuda")
+ with no_grad():
+ outputs = model(**inputs)
+ res = outputs[0]
+ if (args.gpu):
+ res = res.cpu()
+ res = res.softmax(1).detach().numpy()
+ return res
+
+ print("Computing predictions")
+
+ chunks = chunker([str(x) for x in dat[args.txtname]], args.batch)
+ pred = []
+ for i, x in enumerate(chunks):
+ if (i % 5 == 0):
+ percent = round(100 * i * args.batch / len(dat), 1)
+ logmsg = "Computing: " + str(percent) + "% (" + str(i * args.batch) + "/" + str(len(dat)) + ")"
+ with open(path.expanduser(args.logfile), "w") as progfile:
+ progfile.write(logmsg)
+ pred.append(get_predprobs(x))
+
+ pred = np.concatenate(pred)
+ pred = pd.DataFrame(pred)
+ pred.columns = ["bertpred_" + v for i, v in model.config.id2label.items()]
+ pred = pd.concat([dat[args.idname], pred], axis=1)
+ feather.write_feather(pred, path.abspath(args.output))
+ remove(path.expanduser(args.logfile))
+
+
+if __name__ == "__main__":
+ argParser = argparse.ArgumentParser()
+ argParser.add_argument("-m", "--model", help="Trained model path")
+ argParser.add_argument("-d", "--dat", help="Path to data (feather file)")
+ argParser.add_argument("-o", "--output", help="Output path of predictions", default="tiggerbert.feather")
+ argParser.add_argument("-i", "--idname", help="Name of id variable", default="id")
+ argParser.add_argument("-x", "--txtname", help="Name of text variable", default="text")
+ argParser.add_argument("-l", "--logfile", help="Path to log file", default="tiggerbert-progress.txt")
+ argParser.add_argument("-G", "--gpu", help="Use GPU (CUDA)", type=int, choices=[0,1], default=1)
+ argParser.add_argument("-b", "--batch", help="Batch size", type=int, default=128)
+
+ args = argParser.parse_args()
+
+ main(args)
+
diff --git a/docker-images-datalab/activetigger/activetigger/modelnames.csv b/docker-images-datalab/activetigger/activetigger/modelnames.csv
new file mode 100644
index 0000000..a49e67e
--- /dev/null
+++ b/docker-images-datalab/activetigger/activetigger/modelnames.csv
@@ -0,0 +1,159 @@
+"short","spacy_name","fasttext_name","fasttext_url","language","short_lang"
+"af","xx_ent_wiki_sm","cc.af.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.af.300.bin.gz","Afrikaans","(af) Afrikaans"
+"als","xx_ent_wiki_sm","cc.als.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.als.300.bin.gz","Alemannic","(als) Alemannic"
+"am","xx_ent_wiki_sm","cc.am.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.am.300.bin.gz","Amharic","(am) Amharic"
+"an","xx_ent_wiki_sm","cc.an.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.an.300.bin.gz","Aragonese","(an) Aragonese"
+"ar","xx_ent_wiki_sm","cc.ar.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ar.300.bin.gz","Arabic","(ar) Arabic"
+"arz","xx_ent_wiki_sm","cc.arz.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.arz.300.bin.gz","Egyptian Arabic","(arz) Egyptian Arabic"
+"as","xx_ent_wiki_sm","cc.as.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.as.300.bin.gz","Assamese","(as) Assamese"
+"ast","xx_ent_wiki_sm","cc.ast.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ast.300.bin.gz","Asturian","(ast) Asturian"
+"az","xx_ent_wiki_sm","cc.az.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.az.300.bin.gz","Azerbaijani","(az) Azerbaijani"
+"azb","xx_ent_wiki_sm","cc.azb.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.azb.300.bin.gz","Southern Azerbaijani","(azb) Southern Azerbaijani"
+"ba","xx_ent_wiki_sm","cc.ba.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ba.300.bin.gz","Bashkir","(ba) Bashkir"
+"bar","xx_ent_wiki_sm","cc.bar.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.bar.300.bin.gz","Bavarian","(bar) Bavarian"
+"bcl","xx_ent_wiki_sm","cc.bcl.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.bcl.300.bin.gz","Central Bicolano","(bcl) Central Bicolano"
+"be","xx_ent_wiki_sm","cc.be.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.be.300.bin.gz","Belarusian","(be) Belarusian"
+"bg","xx_ent_wiki_sm","cc.bg.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.bg.300.bin.gz","Bulgarian","(bg) Bulgarian"
+"bh","xx_ent_wiki_sm","cc.bh.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.bh.300.bin.gz","Bihari","(bh) Bihari"
+"bn","xx_ent_wiki_sm","cc.bn.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.bn.300.bin.gz","Bengali","(bn) Bengali"
+"bo","xx_ent_wiki_sm","cc.bo.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.bo.300.bin.gz","Tibetan","(bo) Tibetan"
+"bpy","xx_ent_wiki_sm","cc.bpy.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.bpy.300.bin.gz","Bishnupriya Manipuri","(bpy) Bishnupriya Manipuri"
+"br","xx_ent_wiki_sm","cc.br.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.br.300.bin.gz","Breton","(br) Breton"
+"bs","xx_ent_wiki_sm","cc.bs.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.bs.300.bin.gz","Bosnian","(bs) Bosnian"
+"ca","ca_core_news_sm","cc.ca.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ca.300.bin.gz","Catalan","(ca) Catalan"
+"ce","xx_ent_wiki_sm","cc.ce.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ce.300.bin.gz","Chechen","(ce) Chechen"
+"ceb","xx_ent_wiki_sm","cc.ceb.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ceb.300.bin.gz","Cebuano","(ceb) Cebuano"
+"ckb","xx_ent_wiki_sm","cc.ckb.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ckb.300.bin.gz","Kurdish (Sorani)","(ckb) Kurdish (Sorani)"
+"co","xx_ent_wiki_sm","cc.co.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.co.300.bin.gz","Corsican","(co) Corsican"
+"cs","xx_ent_wiki_sm","cc.cs.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.cs.300.bin.gz","Czech","(cs) Czech"
+"cv","xx_ent_wiki_sm","cc.cv.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.cv.300.bin.gz","Chuvash","(cv) Chuvash"
+"cy","xx_ent_wiki_sm","cc.cy.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.cy.300.bin.gz","Welsh","(cy) Welsh"
+"da","da_core_news_sm","cc.da.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.da.300.bin.gz","Danish","(da) Danish"
+"de","de_core_news_sm","cc.de.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.de.300.bin.gz","German","(de) German"
+"diq","xx_ent_wiki_sm","cc.diq.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.diq.300.bin.gz","Zazaki","(diq) Zazaki"
+"dv","xx_ent_wiki_sm","cc.dv.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.dv.300.bin.gz","Divehi","(dv) Divehi"
+"el","el_core_news_sm","cc.el.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.el.300.bin.gz","Greek","(el) Greek"
+"eml","xx_ent_wiki_sm","cc.eml.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.eml.300.bin.gz","Emilian-Romagnol","(eml) Emilian-Romagnol"
+"en","en_core_web_sm","cc.en.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.en.300.bin.gz","English","(en) English"
+"eo","xx_ent_wiki_sm","cc.eo.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.eo.300.bin.gz","Esperanto","(eo) Esperanto"
+"es","es_core_news_sm","cc.es.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.es.300.bin.gz","Spanish","(es) Spanish"
+"et","xx_ent_wiki_sm","cc.et.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.et.300.bin.gz","Estonian","(et) Estonian"
+"eu","xx_ent_wiki_sm","cc.eu.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.eu.300.bin.gz","Basque","(eu) Basque"
+"fa","xx_ent_wiki_sm","cc.fa.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.fa.300.bin.gz","Persian","(fa) Persian"
+"fi","fi_core_news_sm","cc.fi.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.fi.300.bin.gz","Finnish","(fi) Finnish"
+"fr","fr_core_news_sm","cc.fr.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.fr.300.bin.gz","French","(fr) French"
+"frr","xx_ent_wiki_sm","cc.frr.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.frr.300.bin.gz","North Frisian","(frr) North Frisian"
+"fy","xx_ent_wiki_sm","cc.fy.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.fy.300.bin.gz","West Frisian","(fy) West Frisian"
+"ga","xx_ent_wiki_sm","cc.ga.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ga.300.bin.gz","Irish","(ga) Irish"
+"gd","xx_ent_wiki_sm","cc.gd.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.gd.300.bin.gz","Scottish Gaelic","(gd) Scottish Gaelic"
+"gl","xx_ent_wiki_sm","cc.gl.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.gl.300.bin.gz","Galician","(gl) Galician"
+"gom","xx_ent_wiki_sm","cc.gom.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.gom.300.bin.gz","Goan Konkani","(gom) Goan Konkani"
+"gu","xx_ent_wiki_sm","cc.gu.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.gu.300.bin.gz","Gujarati","(gu) Gujarati"
+"gv","xx_ent_wiki_sm","cc.gv.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.gv.300.bin.gz","Manx","(gv) Manx"
+"he","xx_ent_wiki_sm","cc.he.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.he.300.bin.gz","Hebrew","(he) Hebrew"
+"hi","xx_ent_wiki_sm","cc.hi.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.hi.300.bin.gz","Hindi","(hi) Hindi"
+"hif","xx_ent_wiki_sm","cc.hif.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.hif.300.bin.gz","Fiji Hindi","(hif) Fiji Hindi"
+"hr","hr_core_news_sm","cc.hr.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.hr.300.bin.gz","Croatian","(hr) Croatian"
+"hsb","xx_ent_wiki_sm","cc.hsb.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.hsb.300.bin.gz","Upper Sorbian","(hsb) Upper Sorbian"
+"ht","xx_ent_wiki_sm","cc.ht.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ht.300.bin.gz","Haitian","(ht) Haitian"
+"hu","xx_ent_wiki_sm","cc.hu.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.hu.300.bin.gz","Hungarian","(hu) Hungarian"
+"hy","xx_ent_wiki_sm","cc.hy.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.hy.300.bin.gz","Armenian","(hy) Armenian"
+"ia","xx_ent_wiki_sm","cc.ia.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ia.300.bin.gz","Interlingua","(ia) Interlingua"
+"id","xx_ent_wiki_sm","cc.id.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.id.300.bin.gz","Indonesian","(id) Indonesian"
+"ilo","xx_ent_wiki_sm","cc.ilo.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ilo.300.bin.gz","Ilokano","(ilo) Ilokano"
+"io","xx_ent_wiki_sm","cc.io.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.io.300.bin.gz","Ido","(io) Ido"
+"is","xx_ent_wiki_sm","cc.is.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.is.300.bin.gz","Icelandic","(is) Icelandic"
+"it","it_core_news_sm","cc.it.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.it.300.bin.gz","Italian","(it) Italian"
+"ja","ja_core_news_sm","cc.ja.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ja.300.bin.gz","Japanese","(ja) Japanese"
+"jv","xx_ent_wiki_sm","cc.jv.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.jv.300.bin.gz","Javanese","(jv) Javanese"
+"ka","xx_ent_wiki_sm","cc.ka.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ka.300.bin.gz","Georgian","(ka) Georgian"
+"kk","xx_ent_wiki_sm","cc.kk.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.kk.300.bin.gz","Kazakh","(kk) Kazakh"
+"km","xx_ent_wiki_sm","cc.km.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.km.300.bin.gz","Khmer","(km) Khmer"
+"kn","xx_ent_wiki_sm","cc.kn.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.kn.300.bin.gz","Kannada","(kn) Kannada"
+"ko","ko_core_news_sm","cc.ko.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ko.300.bin.gz","Korean","(ko) Korean"
+"ku","xx_ent_wiki_sm","cc.ku.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ku.300.bin.gz","Kurdish (Kurmanji)","(ku) Kurdish (Kurmanji)"
+"ky","xx_ent_wiki_sm","cc.ky.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ky.300.bin.gz","Kirghiz","(ky) Kirghiz"
+"la","xx_ent_wiki_sm","cc.la.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.la.300.bin.gz","Latin","(la) Latin"
+"lb","xx_ent_wiki_sm","cc.lb.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.lb.300.bin.gz","Luxembourgish","(lb) Luxembourgish"
+"li","xx_ent_wiki_sm","cc.li.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.li.300.bin.gz","Limburgish","(li) Limburgish"
+"lmo","xx_ent_wiki_sm","cc.lmo.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.lmo.300.bin.gz","Lombard","(lmo) Lombard"
+"lt","lt_core_news_sm","cc.lt.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.lt.300.bin.gz","Lithuanian","(lt) Lithuanian"
+"lv","xx_ent_wiki_sm","cc.lv.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.lv.300.bin.gz","Latvian","(lv) Latvian"
+"mai","xx_ent_wiki_sm","cc.mai.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.mai.300.bin.gz","Maithili","(mai) Maithili"
+"mg","xx_ent_wiki_sm","cc.mg.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.mg.300.bin.gz","Malagasy","(mg) Malagasy"
+"mhr","xx_ent_wiki_sm","cc.mhr.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.mhr.300.bin.gz","Meadow Mari","(mhr) Meadow Mari"
+"min","xx_ent_wiki_sm","cc.min.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.min.300.bin.gz","Minangkabau","(min) Minangkabau"
+"mk","mk_core_news_sm","cc.mk.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.mk.300.bin.gz","Macedonian","(mk) Macedonian"
+"ml","xx_ent_wiki_sm","cc.ml.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ml.300.bin.gz","Malayalam","(ml) Malayalam"
+"mn","xx_ent_wiki_sm","cc.mn.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.mn.300.bin.gz","Mongolian","(mn) Mongolian"
+"mr","xx_ent_wiki_sm","cc.mr.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.mr.300.bin.gz","Marathi","(mr) Marathi"
+"mrj","xx_ent_wiki_sm","cc.mrj.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.mrj.300.bin.gz","Hill Mari","(mrj) Hill Mari"
+"ms","xx_ent_wiki_sm","cc.ms.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ms.300.bin.gz","Malay","(ms) Malay"
+"mt","xx_ent_wiki_sm","cc.mt.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.mt.300.bin.gz","Maltese","(mt) Maltese"
+"mwl","xx_ent_wiki_sm","cc.mwl.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.mwl.300.bin.gz","Mirandese","(mwl) Mirandese"
+"my","xx_ent_wiki_sm","cc.my.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.my.300.bin.gz","Burmese","(my) Burmese"
+"myv","xx_ent_wiki_sm","cc.myv.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.myv.300.bin.gz","Erzya","(myv) Erzya"
+"mzn","xx_ent_wiki_sm","cc.mzn.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.mzn.300.bin.gz","Mazandarani","(mzn) Mazandarani"
+"nah","xx_ent_wiki_sm","cc.nah.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.nah.300.bin.gz","Nahuatl","(nah) Nahuatl"
+"nap","xx_ent_wiki_sm","cc.nap.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.nap.300.bin.gz","Neapolitan","(nap) Neapolitan"
+"nds","xx_ent_wiki_sm","cc.nds.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.nds.300.bin.gz","Low Saxon","(nds) Low Saxon"
+"ne","xx_ent_wiki_sm","cc.ne.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ne.300.bin.gz","Nepali","(ne) Nepali"
+"new","xx_ent_wiki_sm","cc.new.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.new.300.bin.gz","Newar","(new) Newar"
+"nl","nl_core_news_sm","cc.nl.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.nl.300.bin.gz","Dutch","(nl) Dutch"
+"nn","xx_ent_wiki_sm","cc.nn.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.nn.300.bin.gz","Norwegian (Nynorsk)","(nn) Norwegian (Nynorsk)"
+"no","xx_ent_wiki_sm","cc.no.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.no.300.bin.gz","Norwegian (Bokmål)","(no) Norwegian (Bokmål)"
+"nso","xx_ent_wiki_sm","cc.nso.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.nso.300.bin.gz","Northern Sotho","(nso) Northern Sotho"
+"oc","xx_ent_wiki_sm","cc.oc.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.oc.300.bin.gz","Occitan","(oc) Occitan"
+"or","xx_ent_wiki_sm","cc.or.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.or.300.bin.gz","Oriya","(or) Oriya"
+"os","xx_ent_wiki_sm","cc.os.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.os.300.bin.gz","Ossetian","(os) Ossetian"
+"pa","xx_ent_wiki_sm","cc.pa.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.pa.300.bin.gz","Eastern Punjabi","(pa) Eastern Punjabi"
+"pam","xx_ent_wiki_sm","cc.pam.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.pam.300.bin.gz","Kapampangan","(pam) Kapampangan"
+"pfl","xx_ent_wiki_sm","cc.pfl.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.pfl.300.bin.gz","Palatinate German","(pfl) Palatinate German"
+"pl","pl_core_news_sm","cc.pl.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.pl.300.bin.gz","Polish","(pl) Polish"
+"pms","xx_ent_wiki_sm","cc.pms.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.pms.300.bin.gz","Piedmontese","(pms) Piedmontese"
+"pnb","xx_ent_wiki_sm","cc.pnb.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.pnb.300.bin.gz","Western Punjabi","(pnb) Western Punjabi"
+"ps","xx_ent_wiki_sm","cc.ps.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ps.300.bin.gz","Pashto","(ps) Pashto"
+"pt","pt_core_news_sm","cc.pt.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.pt.300.bin.gz","Portuguese","(pt) Portuguese"
+"qu","xx_ent_wiki_sm","cc.qu.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.qu.300.bin.gz","Quechua","(qu) Quechua"
+"rm","xx_ent_wiki_sm","cc.rm.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.rm.300.bin.gz","Romansh","(rm) Romansh"
+"ro","ro_core_news_sm","cc.ro.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ro.300.bin.gz","Romanian","(ro) Romanian"
+"ru","ru_core_news_sm","cc.ru.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ru.300.bin.gz","Russian","(ru) Russian"
+"sa","xx_ent_wiki_sm","cc.sa.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sa.300.bin.gz","Sanskrit","(sa) Sanskrit"
+"sah","xx_ent_wiki_sm","cc.sah.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sah.300.bin.gz","Sakha","(sah) Sakha"
+"sc","xx_ent_wiki_sm","cc.sc.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sc.300.bin.gz","Sardinian","(sc) Sardinian"
+"scn","xx_ent_wiki_sm","cc.scn.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.scn.300.bin.gz","Sicilian","(scn) Sicilian"
+"sco","xx_ent_wiki_sm","cc.sco.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sco.300.bin.gz","Scots","(sco) Scots"
+"sd","xx_ent_wiki_sm","cc.sd.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sd.300.bin.gz","Sindhi","(sd) Sindhi"
+"sh","xx_ent_wiki_sm","cc.sh.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sh.300.bin.gz","Serbo-Croatian","(sh) Serbo-Croatian"
+"si","xx_ent_wiki_sm","cc.si.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.si.300.bin.gz","Sinhalese","(si) Sinhalese"
+"sk","xx_ent_wiki_sm","cc.sk.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sk.300.bin.gz","Slovak","(sk) Slovak"
+"sl","sl_core_news_sm","cc.sl.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sl.300.bin.gz","Slovenian","(sl) Slovenian"
+"so","xx_ent_wiki_sm","cc.so.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.so.300.bin.gz","Somali","(so) Somali"
+"sq","xx_ent_wiki_sm","cc.sq.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sq.300.bin.gz","Albanian","(sq) Albanian"
+"sr","xx_ent_wiki_sm","cc.sr.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sr.300.bin.gz","Serbian","(sr) Serbian"
+"su","xx_ent_wiki_sm","cc.su.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.su.300.bin.gz","Sundanese","(su) Sundanese"
+"sv","sv_core_news_sm","cc.sv.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sv.300.bin.gz","Swedish","(sv) Swedish"
+"sw","xx_ent_wiki_sm","cc.sw.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sw.300.bin.gz","Swahili","(sw) Swahili"
+"ta","xx_ent_wiki_sm","cc.ta.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ta.300.bin.gz","Tamil","(ta) Tamil"
+"te","xx_ent_wiki_sm","cc.te.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.te.300.bin.gz","Telugu","(te) Telugu"
+"tg","xx_ent_wiki_sm","cc.tg.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.tg.300.bin.gz","Tajik","(tg) Tajik"
+"th","xx_ent_wiki_sm","cc.th.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.th.300.bin.gz","Thai","(th) Thai"
+"tk","xx_ent_wiki_sm","cc.tk.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.tk.300.bin.gz","Turkmen","(tk) Turkmen"
+"tl","xx_ent_wiki_sm","cc.tl.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.tl.300.bin.gz","Tagalog","(tl) Tagalog"
+"tr","xx_ent_wiki_sm","cc.tr.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.tr.300.bin.gz","Turkish","(tr) Turkish"
+"tt","xx_ent_wiki_sm","cc.tt.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.tt.300.bin.gz","Tatar","(tt) Tatar"
+"ug","xx_ent_wiki_sm","cc.ug.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ug.300.bin.gz","Uyghur","(ug) Uyghur"
+"uk","uk_core_news_sm","cc.uk.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.uk.300.bin.gz","Ukrainian","(uk) Ukrainian"
+"ur","xx_ent_wiki_sm","cc.ur.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ur.300.bin.gz","Urdu","(ur) Urdu"
+"uz","xx_ent_wiki_sm","cc.uz.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.uz.300.bin.gz","Uzbek","(uz) Uzbek"
+"vec","xx_ent_wiki_sm","cc.vec.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.vec.300.bin.gz","Venetian","(vec) Venetian"
+"vi","xx_ent_wiki_sm","cc.vi.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.vi.300.bin.gz","Vietnamese","(vi) Vietnamese"
+"vls","xx_ent_wiki_sm","cc.vls.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.vls.300.bin.gz","West Flemish","(vls) West Flemish"
+"vo","xx_ent_wiki_sm","cc.vo.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.vo.300.bin.gz","Volapük","(vo) Volapük"
+"wa","xx_ent_wiki_sm","cc.wa.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.wa.300.bin.gz","Walloon","(wa) Walloon"
+"war","xx_ent_wiki_sm","cc.war.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.war.300.bin.gz","Waray","(war) Waray"
+"xmf","xx_ent_wiki_sm","cc.xmf.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.xmf.300.bin.gz","Mingrelian","(xmf) Mingrelian"
+"yi","xx_ent_wiki_sm","cc.yi.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.yi.300.bin.gz","Yiddish","(yi) Yiddish"
+"yo","xx_ent_wiki_sm","cc.yo.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.yo.300.bin.gz","Yoruba","(yo) Yoruba"
+"zea","xx_ent_wiki_sm","cc.zea.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.zea.300.bin.gz","Zeelandic","(zea) Zeelandic"
+"zh","zh_core_web_sm","cc.zh.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.zh.300.bin.gz","Chinese","(zh) Chinese"
diff --git a/docker-images-datalab/activetigger/activetigger/server.R b/docker-images-datalab/activetigger/activetigger/server.R
new file mode 100644
index 0000000..89a7ae6
--- /dev/null
+++ b/docker-images-datalab/activetigger/activetigger/server.R
@@ -0,0 +1,4143 @@
+
+pkgs <- c("arrow", "class", "DT", "future", "ggplot2", "glmnet", "htmlTable",
+ "LiblineaR", "Matrix","Metrics", "promises", "quanteda",
+ "quanteda.textmodels", "ranger", "rlang", "Rtsne",
+ "RJSONIO", "shiny", "SparseM", "stringi", "uwot")
+pkgs <- pkgs[! pkgs %in% rownames(installed.packages())]
+if (length(pkgs)) {
+ warning(paste0("Packages ", paste0(pkgs, collapse = " , "), " not available. ",
+ "App might crash."))
+}
+# for (i in seq_along(pkgs)) install.packages(pkgs[i])
+
+verbose <- TRUE
+# sink("ActiveTigger.log")
+
+options(shiny.maxRequestSize = 30*1024^3) # File upload size
+
+modelnames <- read.csv("modelnames.csv")
+modelnames_labels <- modelnames$short
+names(modelnames_labels) <- modelnames$short_lang
+
+library(quanteda)
+library(ggplot2)
+library(promises)
+library(future)
+plan(multisession)
+
+
+################################################################################
+## Global functions
+################################################################################
+
+label2hash <- function(label) rlang::hash(label)
+
+cleanFileName <- function(name)
+ gsub('\\s|[/]|[\\]|[:]|[*]|[?]|["]|[<]|[>]|[+]|[$]|[=]', "_",
+ gsub("^\\s+|\\s+$", "", name))
+
+wtdF1 <- function(truth, pred) {
+ pred <- pred[!is.na(truth)]
+ truth <- truth[!is.na(truth)]
+ sum(sapply(unique(truth), function(ilab)
+ Metrics::fbeta_score(truth == ilab, pred == ilab) *
+ sum(truth == ilab) / length(truth)), na.rm = TRUE)
+}
+
+metricsTable <- function(truth, pred) {
+ do.call(rbind, lapply(sort(unique(truth)), function(ilab) {
+ tmptrue <- truth == ilab
+ tmppred <- pred == ilab
+ tmp <- data.frame(
+ "Tag" = ilab,
+ "N.cases" = sum(tmptrue),
+ "N.predict" = sum(tmppred),
+ "Accuracy" = sprintf("%f", round(Metrics::accuracy(tmptrue, tmppred), 6)),
+ "Precision" = sprintf("%f", round(Metrics::precision(tmptrue, tmppred), 6)),
+ "Recall" = sprintf("%f", round(Metrics::recall(tmptrue, tmppred), 6)),
+ "F1" = sprintf("%f", round(Metrics::fbeta_score(tmptrue, tmppred), 6)))
+ tmp$F1[tmp$F1 == "NaN"] <- sprintf("%f", 0)
+ tmp
+ }))
+}
+
+bertPlotFromPath <- function(path) {
+ if (!file.exists(paste0(path, "/trainer_state.json"))) return(NULL)
+ state <- RJSONIO::fromJSON(paste0(path, "/trainer_state.json"))
+ logs <- data.table::rbindlist(lapply(state$log_history, as.list), fill = TRUE)
+ epochmin <- logs$epoch[which.min(logs$eval_loss)]
+
+ gdat <- data.table::rbindlist(
+ use.names = FALSE,
+ list(
+ cbind("train", logs[!is.na(logs$loss), c("epoch", "loss")]),
+ cbind("valid", logs[!is.na(logs$eval_loss), c("epoch", "eval_loss")])
+ ))
+ colnames(gdat) <- c("data", "epoch", "loss")
+
+ ggplot(gdat, aes_string("epoch", "loss", color = "data", shape = "data")) +
+ geom_point(size = 2) + geom_line() +
+ geom_vline(xintercept = epochmin, linetype = 2) +
+ theme_bw() + expand_limits(y=0)
+}
+
+bertHyperToMsg <- function(pars) {
+ if (is.null(pars)) return(NULL)
+ paste0("Scheme : ", pars$bertScheme, "; ",
+ "Model : ", pars$bertModel, "\n* ",
+ "Epochs : ", pars$bertEpochs, " ; ",
+ "Lrate : ", pars$bertLrate, " ; ",
+ "Wdecay : ", pars$bertWdecay, " ; ",
+ "Batch size : ", pars$bertBatchsize, " ; ",
+ "Grad. accum. : ", pars$bertGradacc, "\n* ",
+ "Keep best : ", pars$bertBest, " ; ",
+ "ValidFrac : ", pars$bertValidFrac, " ; ",
+ "ValidSeed : ", pars$bertValidSeed, " \n* ",
+ pars$bertNtrain, " training samples : ",
+ paste0(names(pars$bertTrainTable), " (",
+ as.numeric(pars$bertTrainTable), ")",
+ collapse = " ; "))
+}
+
+get_status <- function(dafile)
+ scan(dafile, what = "character", sep="\n", quiet = TRUE)
+set_status <- function(dafile, msg) write(msg, dafile)
+
+
+
+################################################################################
+## Main server function
+################################################################################
+
+# options(shiny.maxRequestSize=2^34) # Max filesize
+
+shinyServer(function(input, output, session) {
+
+ values <- reactiveValues()
+ values$visuGo <- FALSE
+ values$dfmGo <- FALSE
+ # queryTrigger <- reactiveVal(0)
+ # predlabTrigger <- reactiveVal(0)
+ initDataTrigger <- reactiveVal(0)
+ queryNext <- reactiveVal(0)
+ trainTrigger <- reactiveVal(0)
+ predTrigger <- reactiveVal(0)
+ diagTrigger <- reactiveVal(0)
+ ready2tag <- reactiveVal(0)
+ trainCountdown <- reactiveVal(0)
+
+ ok.data <- reactiveVal()
+ ok.data.async <- reactiveVal()
+ ok.data.running <- reactiveVal(FALSE)
+
+ bpall_nclicks <- reactiveVal(0)
+ bertpred_nclicks <- reactiveVal(0)
+
+ ## NEW use_regressors list as values$new_use_regressors
+ observeEvent(input$use_regressors, {
+ values$new_use_regressors <- input$use_regressors
+ cat("DEBUG new_use_regressors: ", paste(input$use_regressors, collapse = " "), "\n")
+ })
+
+ ## NEW trainCountdown with input$trainCountdown
+ ## Policy: 0, don't train ; 1, trigger train on tag ; >1, decrement on tag
+ observeEvent(input$trainCountdown, trainCountdown(input$trainCountdown))
+
+ ####################
+ ## Projects and data
+
+ ## Read config files, update UI forms accordingly
+ ## This should happen only once
+ observe({
+ if (verbose)
+ cat("DEBUG read project configs\n")
+ # values$conf <- RJSONIO::fromJSON("conf.json")
+ projfiles <- dir(pattern = "^tigger_.*[.]json$")
+ isolate({
+ if (!length(projfiles)) {
+ values$conf <- NULL
+ return(NULL)
+ }
+ values$confnames <- gsub("^tigger_(.*)[.]json$", "\\1", projfiles)
+
+ values$conf <- RJSONIO::fromJSON(projfiles[1])
+ })
+ })
+
+ ## When projects added, update selector
+ observeEvent(values$confnames, {
+ if (verbose)
+ cat("DEBUG update selectProject from confnames\n")
+ updateSelectInput(session, "selectProject", choices = values$confnames)
+ })
+
+ ## Event from project change
+ observeEvent(input$selectProject, {
+ if (is.null(input$selectProject)) return(NULL)
+ if (!nchar(input$selectProject)) return(NULL)
+ if (verbose)
+ cat("DEBUG project change step 1\n")
+ # values$project <- input$selectProject
+ # values$conf <- values$confs[[values$project]]
+ values$conf <- RJSONIO::fromJSON(
+ paste0("tigger_", input$selectProject, ".json"))
+ ok.data(NULL)
+ ok.visu(NULL)
+ initDataTrigger(initDataTrigger() + 1)
+
+ if (verbose)
+ cat("DEBUG project change step 2\n")
+ if (!is.null(values$conf$dataNrows))
+ updateNumericInput(session, "dataNrows", value = as.numeric(values$conf$dataNrows))
+ if (!is.null(values$conf$dataSkipRows))
+ updateNumericInput(session, "dataSkipRows", value = as.numeric(values$conf$dataSkipRows))
+
+ #### Python ####
+ ## Default use_python if not found in conf file: FALSE
+ if(is.null(values$conf$use_python))
+ values$conf$use_python <- FALSE
+ updateCheckboxInput(session, "sys_use_python", value = values$conf$use_python)
+
+ ## Set default python if not found in conf file
+ if(is.null(values$conf$python))
+ values$conf$python <- "python3"
+ updateTextInput(session, "sys_which_python", value = values$conf$python)
+
+ values$python_ok <- FALSE
+ if (values$conf$use_python) {
+ ## Check whether python is working
+ pytest <- try(system(paste(values$conf$python, "--version"),
+ intern = TRUE))
+ if (inherits(pytest, "try-error")) {
+ showNotification(paste(
+ "Python path `", values$conf$python,
+ "` not valid, try changing it in Project/System tab"),
+ duration = 10, type = "error")
+ } else
+ values$python_ok <- TRUE
+ }
+ if (verbose)
+ cat("Python ok:", values$python_ok, "\n")
+
+
+ #### GPU ####
+ if (!is.null(values$conf$use_gpu))
+ updateCheckboxInput(session, "sys_use_gpu", value = values$conf$use_gpu)
+
+ #### Spacy ####
+ if (!is.null(values$conf$use_spacy))
+ updateCheckboxInput(session, "sys_use_spacy", value = values$conf$use_spacy)
+ updateTextInput(session, "sys_use_spacy_model", value = values$conf$use_spacy_model)
+
+ #### fastText ####
+ if (!is.null(values$conf$use_ft))
+ updateCheckboxInput(session, "sys_use_ft", value = values$conf$use_ft)
+ updateTextInput(session, "sys_use_ft_model", value = values$conf$use_ft_model)
+
+ #### SBERT ####
+ if (!is.null(values$conf$use_sb))
+ updateCheckboxInput(session, "sys_use_sb", value = values$conf$use_sb)
+ updateTextInput(session, "sys_use_sb_model", value = values$conf$use_sb_model)
+
+ })
+
+
+ ##############################################################################
+ ## Reactives : project settings UI
+ ##############################################################################
+
+ output$sys_datadir <- renderText(values$conf$datadir)
+ output$sys_datafile <- renderText(paste0(values$conf$dataname, ".feather"))
+ output$sys_var_id <- renderUI(renderText(values$conf$idcol))
+ output$sys_var_text <- renderUI(renderText(values$conf$textcols))
+ output$sys_var_tag <- renderUI(renderText(values$conf$tagcols))
+ output$sys_var_context_ui <- renderUI(selectInput(
+ "sys_var_context", NULL, values$data_orig_colnames, values$conf$contextcols,
+ multiple = TRUE))
+ output$sys_var_comm_ui <- renderUI(selectInput(
+ "sys_var_comm", NULL, c("(none)", values$data_orig_colnames),
+ values$conf$commcol))
+
+ output$sys_ex_lang_ui <- renderUI(selectInput(
+ "sys_ex_lang", NULL, modelnames_labels, "en"
+ ))
+ output$sys_ex_spacy <- renderText(
+ modelnames$spacy_name[modelnames$short == input$sys_ex_lang])
+ output$sys_ex_spacy_dl <- renderText(paste0(
+ "Download with python module: ", values$conf$python, " -m spacy download ",
+ modelnames$spacy_name[modelnames$short == input$sys_ex_lang]))
+ output$sys_ex_ft <- renderText(
+ modelnames$fasttext_name[modelnames$short == input$sys_ex_lang])
+ output$sys_ex_ft_dl <- renderText(paste0(
+ "Manual download link : ",
+ modelnames$fasttext_url[modelnames$short == input$sys_ex_lang]))
+ output$sys_ex_sb <- renderText(
+ ifelse(input$sys_ex_lang %in% c("ar", "zh", "nl", "en", "fr", "de",
+ "it", "ko", "pl", "pt", "ru", "es", "tr"),
+ "distiluse-base-multilingual-cased-v1",
+ "distiluse-base-multilingual-cased-v2"))
+
+
+ ## Save system changes on button click, with modal
+ output$save_config_msg <- renderUI({
+ req(values$save_config_msg)
+ p(strong(values$save_config_msg))
+ })
+ output$save_embed_msg <- renderUI({
+ tmp <- ""
+ if (input$sys_use_spacy) if (!identical(input$sys_use_spacy_model, values$conf$use_spacy_model))
+ tmp <- paste0(tmp, " Changing the spacy model will delete the current tokenized text and fasttext word embeddings, to prevent conflicts")
+ if (input$sys_use_ft) if (!identical(input$sys_use_ft_model, values$conf$use_ft_model))
+ tmp <- paste0(tmp, " Changing the fasttext model will delete the current fasttext word embeddings, to prevent conflicts.")
+ if (input$sys_use_sb) if (!identical(input$sys_use_sb_model, values$conf$use_sb_model))
+ tmp <- paste0(tmp, " Changing the SBERT model will delete the current SBERT sentence embeddings, to prevent conflicts.")
+
+ if (nchar(tmp)) {
+ return(HTML(paste("
Warning:
")))
+ } else
+ return(NULL)
+ })
+ observeEvent(input$saveSystem, {
+ values$save_config_msg <- NULL
+ showModal(modalDialog(
+ title = "Save project configuration",
+ paste0("Click 'Save' to check and confirm the new configuration"),
+ uiOutput("save_embed_msg"),
+ uiOutput("save_config_msg"),
+ footer = tagList(actionButton(paste0("save_conf_confirm"), "Save"),
+ modalButton("Cancel"))))
+ })
+ observeEvent(input$save_conf_confirm, {
+ newconf <- values$conf
+ newconf$contextcols <- input$sys_var_context
+ newconf$commcol <- input$sys_var_comm
+
+ if (input$sys_use_python) {
+ pytest <- try(system(paste(input$sys_which_python, "--version"),
+ intern = TRUE))
+ if (inherits(pytest, "try-error")) {
+ values$save_config_msg <- paste0(
+ "Error: python path `", input$sys_which_python, "` not valid")
+ return(NULL)
+ }
+ }
+ newconf$use_python <- input$sys_use_python
+ newconf$python <- input$sys_which_python
+ newconf$use_gpu <- input$sys_use_gpu
+
+ if (input$sys_use_spacy) {
+ sptest <- system(paste0(
+ newconf$python, " -m spacy info ", input$sys_use_spacy_model), intern = TRUE)
+ if (length(sptest) == 0) {
+ values$save_config_msg <- paste(
+ "Error loading spacy, check that it is installed in the specified python env")
+ return(NULL)
+ }
+ if (length(attr(sptest, "status"))) {
+ values$save_config_msg <- paste(
+ "Error loading spacy model, check that it has been downloaded")
+ return(NULL)
+ }
+ newconf$use_spacy_model <- input$sys_use_spacy_model
+ }
+ newconf$use_spacy <- input$sys_use_spacy
+
+ if (input$sys_use_ft) {
+ if (!file.exists(input$sys_use_ft_model)) {
+ values$save_config_msg <- paste(
+ "Error loading fasttext model, check the specified path")
+ return(NULL)
+ }
+ newconf$use_ft_model <- input$sys_use_ft_model
+ }
+ newconf$use_ft <- input$sys_use_ft
+
+ if (input$sys_use_sb)
+ newconf$use_sb_model <- input$sys_use_sb_model
+ newconf$use_sb <- input$sys_use_sb
+
+ # if (newconf$use_spacy) if (newconf$use_spacy_model != values$conf$use_spacy_model) {
+ if (newconf$use_spacy) if (!identical(newconf$use_spacy_model, values$conf$use_spacy_model)) {
+ file.remove(paste0(values$conf$datadir, values$conf$dataname, "_spa.feather"))
+ file.remove(paste0(values$conf$datadir, values$conf$dataname, "_ft.feather"))
+ }
+ # if (newconf$use_ft) if (newconf$use_ft_model != values$conf$use_ft_model)
+ if (newconf$use_ft) if (!identical(newconf$use_ft_model, values$conf$use_ft_model))
+ file.remove(paste0(values$conf$datadir, values$conf$dataname, "_ft.feather"))
+ # if (newconf$use_sb) if (newconf$use_sb_model != values$conf$use_sb_model)
+ if (newconf$use_sb) if (!identical(newconf$use_sb_model, values$conf$use_sb_model))
+ file.remove(paste0(values$conf$datadir, values$conf$dataname, "_sb.feather"))
+
+ writeLines(RJSONIO::toJSON(newconf),
+ paste0("tigger_", newconf$projectid, ".json"))
+ values$conf <- newconf
+ initDataTrigger(initDataTrigger() + 1)
+ removeModal()
+ })
+
+ ##############################################################################
+ ## Reactives : data
+ ##############################################################################
+
+ ok.commentCol <- reactive(values$conf$commcol)
+
+ ## Current imported data (async)
+ observeEvent(input$dataImport, {
+ if (ok.data.running()) return(NULL)
+ ## Save nrows to conf file on change
+ if (!is.numeric(input$dataNrows) | !is.numeric(input$dataSkipRows)) return(NULL)
+ values$conf$dataNrows <- input$dataNrows
+ values$conf$dataSkipRows <- input$dataSkipRows
+ if (!is.null(values$conf$projectid))
+ writeLines(RJSONIO::toJSON(values$conf), paste0(
+ "tigger_", values$conf$projectid, ".json"))
+ initDataTrigger(initDataTrigger() + 1)
+ })
+
+
+ ## Read data event
+ observeEvent(initDataTrigger(), {
+ if (is.null(values$conf)) {
+ ok.data(NULL)
+ return(NULL)
+ }
+ if (initDataTrigger() == 0) return(NULL)
+ if (verbose)
+ cat("DEBUG enter ok.data\n")
+ if (verbose)
+ cat("DEBUG enter ok.data trigger", initDataTrigger(), "\n")
+ ok.data.running(TRUE)
+
+ ## Prepare variables for async launch
+ coco <- values$conf
+ coco$python_ok <- values$python_ok
+
+ da_token <- session$token
+ data_status <- paste0("tigger_", da_token, "_data")
+
+ file_orig_colnames <- paste0(data_status, "_origcols")
+ file_colstok <- paste0(data_status, "_tok")
+ file_colsft <- paste0(data_status, "_ft")
+ file_colssb <- paste0(data_status, "_sb")
+ file_colsbertpred <- paste0(data_status, "_bertpred")
+ da_missing <- paste0(data_status, "_bertpred_missing")
+
+ projpath <- paste0(coco$datadir, coco$projectid)
+ if (!file.exists(projpath))
+ dir.create(projpath)
+
+ ## Read data: async launch
+ data_async <- future({
+ #################
+ ## Main data file
+
+ set_status(data_status, "Importing main data file...")
+ res <- arrow::read_feather(
+ paste0(coco$datadir, coco$dataname, ".feather"))
+ if (is.null(coco$dataNrows)) return(NULL)
+ rowselect <-
+ intersect(1:nrow(res), (1:(coco$dataNrows)) + coco$dataSkipRows)
+ if (!length(rowselect)) return(NULL)
+ res <- res[rowselect, , drop = FALSE]
+
+ writeLines(colnames(res), file_orig_colnames)
+
+
+ #################
+ ## Tokenized text
+
+ ## Import data : feather of two cols, one the id, other the tokenized
+ set_status(data_status, "Importing tokenized text...")
+ file_tok <- paste0(coco$datadir, coco$dataname, "_spa.feather")
+ if (file.exists(file_tok)) {
+ tokdat <- arrow::read_feather(file_tok)
+ } else
+ tokdat <- NULL
+ tok_remaining <- res[[coco$idcol]]
+ tok_remaining <-
+ tok_remaining[! tok_remaining %in% tokdat[[coco$idcol]]]
+
+ ## Tokenize remaining texts
+ if (length(tok_remaining)) {
+ if (!coco$python_ok | !coco$use_spacy) {
+ ## If remaining texts to tokenize but no python, fall back on
+ ## untokenized text
+ ## TODO here use quanteda tokenizer with options?
+ tokdat <- data.frame(
+ res[[coco$idcol]],
+ do.call(paste, c(res[match(tok_remaining, res[[coco$idcol]]),
+ coco$textcols, drop = FALSE], sep = "\n\n")))
+ colnames(tokdat) <- c(coco$idcol, "text_spa")
+ set_status(data_status, "Importing tokenized text: no spacy ok")
+ } else {
+ ## If python present, tokenize with spacy
+ set_status(data_status,
+ paste("Tokenizing", length(tok_remaining), "new texts"))
+ tmpfile <- paste0("tigger_tok_", da_token)
+ arrow::write_feather(
+ data.frame(
+ id = tok_remaining,
+ text = do.call(
+ paste, c(res[match(tok_remaining, res[[coco$idcol]]),
+ coco$textcols, drop = FALSE], sep = "\n\n"))),
+ paste0(tmpfile, ".feather")
+ )
+ system(paste0(
+ coco$python, " tokenize_spacy.py -d ", tmpfile, ".feather",
+ " -m ", coco$use_spacy_model))
+ tok_new <- arrow::read_feather(paste0(tmpfile, "_spa.feather"))
+ colnames(tok_new)[colnames(tok_new) == "id"] <- coco$idcol
+ tokdat <- rbind(tokdat, tok_new)
+ arrow::write_feather(tokdat, file_tok)
+ system(paste0("rm ", tmpfile, "*"))
+ set_status(data_status, "Importing tokenized text: spacy ok")
+ }
+ }
+
+ ## Save colname for tokenized text
+ tmp_cols_tok <- colnames(tokdat)[colnames(tokdat) != coco$idcol]
+ writeLines(tmp_cols_tok, file_colstok)
+
+ set_status(data_status, "Importing tokenized text: wrote colnames")
+
+ ## Merge with current data
+ res <- merge(res, tokdat, by = coco$idcol,
+ all.y = FALSE, sort = FALSE)
+ rm(tokdat)
+
+ set_status(data_status, "Importing tokenized text: merged")
+
+ #################
+ ## FastText embeddings
+
+ set_status(data_status, paste("Loading fastText embeddings"))
+
+ ## FastText embeddings : feather file with id and embeddings
+ file_ft <- paste0(coco$datadir, coco$dataname, "_ft.feather")
+ if (file.exists(file_ft)) {
+ ftdat <- arrow::read_feather(file_ft)
+ } else
+ ftdat <- NULL
+ ft_remaining <- res[[coco$idcol]]
+ ft_remaining <-
+ ft_remaining[! ft_remaining %in% ftdat[[coco$idcol]]]
+
+ ## Embed remaining texts, if FT in use
+ if (length(ft_remaining)) {
+ if (!coco$python_ok | !coco$use_ft) {
+ ## If no python or FT, abort if not all rows already embedded
+ ftdat <- NULL
+ } else {
+ set_status(data_status, paste(
+ "Embedding", length(ft_remaining), "new texts with fastText"))
+ tmpfile <- paste0("tigger_ft_", da_token)
+ arrow::write_feather(
+ data.frame(
+ id = ft_remaining,
+ text = do.call(
+ paste, res[match(ft_remaining, res[[coco$idcol]]),
+ tmp_cols_tok, drop = FALSE])),
+ paste0(tmpfile, ".feather")
+ )
+ system(paste0(
+ coco$python, " embed_fasttext.py -d ", tmpfile, ".feather",
+ " -m ", coco$use_ft_model))
+ ft_new <- arrow::read_feather(paste0(tmpfile, "_ft.feather"))
+ colnames(ft_new)[colnames(ft_new) == "id"] <- coco$idcol
+ ftdat <- rbind(ftdat, ft_new)
+ arrow::write_feather(ftdat, file_ft)
+ system(paste0("rm ", tmpfile, "*"))
+ }
+ }
+
+ if (!is.null(ftdat)) {
+ ## Merge with current data
+ writeLines(colnames(ftdat)[colnames(ftdat) != coco$idcol], file_colsft)
+ res <- merge(res, ftdat, by = coco$idcol, all.y = FALSE, sort = FALSE)
+ rm(ftdat)
+ }
+
+
+ #################
+ ## SBERT embeddings
+
+ set_status(data_status, "Loading SBERT embeddings")
+
+ ## SBERT file : feather of id and embeddings
+ file_sb <- paste0(coco$datadir, coco$dataname, "_sb.feather")
+ if (file.exists(file_sb)) {
+ sbdat <- arrow::read_feather(file_sb)
+ }else
+ sbdat <- NULL
+ sb_remaining <- res[[coco$idcol]]
+ sb_remaining <-
+ sb_remaining[! sb_remaining %in% sbdat[[coco$idcol]]]
+
+ ## Embed remaining texts
+ if (length(sb_remaining)) {
+ if (!coco$python_ok | !coco$use_sb) {
+ ## If no python or SB, abort if not all rows already embedded
+ sbdat <- NULL
+ } else {
+ set_status(data_status, paste(
+ "Embedding", length(sb_remaining), "new texts with SBERT"))
+ tmpfile <- paste0("tigger_sb_", da_token)
+ arrow::write_feather(
+ data.frame(
+ id = sb_remaining,
+ text = do.call(
+ paste, res[match(sb_remaining, res[[coco$idcol]]),
+ coco$textcols, drop = FALSE])),
+ paste0(tmpfile, ".feather")
+ )
+ system(paste0(
+ coco$python, " embed_sbert.py -d ", tmpfile, ".feather",
+ " -m ", coco$use_sb_model))
+ sb_new <- arrow::read_feather(paste0(tmpfile, "_sb.feather"))
+ colnames(sb_new)[colnames(sb_new) == "id"] <- coco$idcol
+ sbdat <- rbind(sbdat, sb_new)
+ arrow::write_feather(sbdat, file_sb)
+ system(paste0("rm ", tmpfile, "*"))
+ }
+ }
+
+ if (!is.null(sbdat)) {
+ ## Merge with current data
+ writeLines(colnames(sbdat)[colnames(sbdat) != coco$idcol], file_colssb)
+ res <- merge(res, sbdat, by = coco$idcol, all.y = FALSE, sort = FALSE)
+ rm(sbdat)
+ }
+
+ #################
+ ## BERT predictions
+
+ set_status(data_status, "Loading BERT predictions")
+
+ ## BERT predictions : feathers of id and probs
+ ## TODO: take full prediction if existing
+
+ files_pred <- dir(
+ coco$datadir, paste0("^", coco$dataname, "_bertpred_.*[.]feather$"))
+ # files_predall <- dir(
+ # coco$datadir, paste0("^", coco$dataname, "_bertpredall_.*[.]feather$"))
+ # files_pred <- files_pred[
+ # ! gsub("_bertpred_", "_bertpredall_", files_pred) %in% files_predall]
+ # files_pred <- c(files_predall, files_pred)
+
+ tmp_bertpred_cols <- list()
+ if (length(files_pred)) {
+ for (ipred in files_pred) {
+ preddat <- arrow::read_feather(paste0(coco$datadir, ipred))
+ predname <-
+ gsub(paste0("^", coco$dataname, "_bertpred_(.*)[.]feather$"),
+ "\\1", ipred)
+ # predname <-
+ # gsub(paste0("^", coco$dataname, "_bertpred(all)?_(.*)[.]feather$"),
+ # "\\2", ipred, perl = TRUE)
+ n_missing <- sum(! res[[coco$idcol]] %in% preddat[[coco$idcol]])
+ if (n_missing) {
+ write(
+ paste0("BERT prediction ", predname, " : ", n_missing,
+ " texts missing in prediction."),
+ da_missing, append = TRUE)
+ } else {
+ colnames(preddat) <- gsub(
+ "^bertpred_", paste0("bertpred_", predname), colnames(preddat))
+ tmp_bertpred_cols[[predname]] <-
+ colnames(preddat)[colnames(preddat) != coco$idcol]
+ res <- merge(res, preddat, by = coco$idcol,
+ all.y = FALSE, sort = FALSE)
+ }
+ rm(preddat)
+ }
+ if (length(tmp_bertpred_cols)) {
+ writeLines(names(tmp_bertpred_cols), file_colsbertpred)
+ lapply(names(tmp_bertpred_cols), function(x) {
+ writeLines(tmp_bertpred_cols[[x]], paste0(file_colsbertpred, "_", x))
+ })
+ }
+ }
+
+ ## Comment col
+ set_status(data_status, "Loading comments")
+ tmpfile <- paste0(projpath, "/", coco$commcol, ".csv")
+ if (file.exists(tmpfile)) {
+ tmpcol <- read.csv(tmpfile)
+ colnames(tmpcol)[2] <- coco$commcol
+ ## Remove duplicates, keep newest
+ tmpcol <- tmpcol[nrow(tmpcol):1, , drop = FALSE]
+ tmpcol <- tmpcol[!duplicated(tmpcol[, 1]), , drop = FALSE]
+ tmpcol <- tmpcol[nrow(tmpcol):1, , drop = FALSE]
+ res <- merge(res, tmpcol, by = coco$idcol,
+ all.x = TRUE, all.y = FALSE, sort = FALSE)
+ res[[coco$commcol]][is.na(res[[coco$commcol]])] <- ""
+ } else
+ res[[coco$commcol]] <- ""
+
+ # ## Tagging cols
+ # set_status(data_status, "Loading tags")
+ # for (itag in coco$tagcols) {
+ # tmpfile <- paste0(projpath, "/", itag, ".csv")
+ # if (file.exists(tmpfile)) {
+ # tmpcol <- read.csv(tmpfile)
+ # tmpcol[[paste0("hist_", itag)]] <- 1:nrow(tmpcol)
+ # ## Remove duplicates, keep newest
+ # tmpcol <- tmpcol[nrow(tmpcol):1, ]
+ # tmpcol <- tmpcol[!duplicated(tmpcol[, 1]), ]
+ # tmpcol <- tmpcol[nrow(tmpcol):1, ]
+ # if (verbose)
+ # cat("DEBUG imported ", itag, nrow(tmpcol), "\n")
+ # res <- merge(res, tmpcol, by = coco$idcol,
+ # all.x = TRUE, all.y = FALSE, sort = FALSE)
+ # } else {
+ # res[[itag]] <- NA
+ # res[[paste0("hist_", itag)]] <- NA
+ # }
+ # }
+ ## Tagging cols
+ set_status(data_status, "Loading tags")
+ for (itag in coco$tagcols) {
+ itaghist <- paste0("hist_", itag)
+ if (is.null(res[[itag]]) | all(is.na(res[[itag]]))) {
+ res[[itag]] <- NA
+ res[[itaghist]] <- NA
+ } else {
+ res[[itaghist]][!is.na(res[[itag]])] <- 1:sum(!is.na(res[[itag]]))
+ }
+ tmpfile <- paste0(projpath, "/", itag, ".csv")
+ if (file.exists(tmpfile)) {
+ tmpcol <- read.csv(tmpfile)
+ tmpcol[[itaghist]] <- 1:nrow(tmpcol) +
+ max(0, suppressWarnings(max(na.omit(res[[itaghist]]))))
+ ## Remove duplicates, keep newest
+ tmpcol <- tmpcol[nrow(tmpcol):1, ]
+ tmpcol <- tmpcol[!duplicated(tmpcol[, 1]), ]
+ tmpcol <- tmpcol[nrow(tmpcol):1, ]
+ tmpcol <- tmpcol[tmpcol[[coco$idcol]] %in% res[[coco$idcol]], ]
+ if (verbose)
+ cat("DEBUG imported", itag, nrow(tmpcol), "\n")
+ # res <- merge(res, tmpcol, by = coco$idcol,
+ # all.x = TRUE, all.y = FALSE, sort = FALSE)
+ if (nrow(tmpcol)) {
+ idmatches <- match(tmpcol[[coco$idcol]], res[[coco$idcol]])
+ # idmatches <- idmatches[!is.na(idmatches)]
+ res[[itag]][idmatches] <- tmpcol[[itag]]
+ res[[itaghist]][idmatches] <- tmpcol[[itaghist]]
+ }
+ }
+ }
+
+ set_status(data_status, "Finalizing data import")
+ res
+ }) %...>% ok.data.async()
+ data_async <- catch(data_async, function(error) {
+ values$modelDiagno <- paste0("Error in data import: ", error)
+ })
+ data_async <- finally(data_async, function() {
+ #################
+ ## Finalize data importation
+ res <- ok.data.async()
+ if (is.null(res)) return(NULL)
+
+ set_status(data_status, "Finalizing data import: read columns")
+ ## Read column (tok, ft, sb, ...) names from dedicated files
+ values$tagcols <- coco$tagcols
+ values$cols_tok <- readLines(file_colstok)
+ values$cols_ft <- NULL
+ if (file.exists(file_colsft)) {
+ values$cols_ft <- readLines(file_colsft)
+ }
+ values$cols_sb <- NULL
+ if (file.exists(file_colssb)) {
+ values$cols_sb <- readLines(file_colssb)
+ }
+ set_status(data_status, "Finalizing data import: bertpred")
+ if (file.exists(file_colsbertpred)) {
+ tmp_bertpred <- readLines(file_colsbertpred)
+ values$cols_bertpred <- lapply(tmp_bertpred, function(x)
+ readLines(paste0(file_colsbertpred, "_", x)))
+ names(values$cols_bertpred) <- tmp_bertpred
+ } else
+ values$cols_bertpred <- NULL
+ set_status(data_status, "Finalizing data import: bertpred missing")
+ if (file.exists(da_missing)) {
+ lapply(readLines(da_missing), function(x)
+ showNotification(paste(
+ x, "\n",
+ "You may predict on current text in BERT panel, ",
+ "and then reload the data.")))
+ }
+
+ ## Populate regressors list in prediction panel
+ set_status(data_status, "Finalizing data import: populating regressors")
+ regr_choices <- c(
+ "Regex" = "regex",
+ "Word counts (DFM)" = "dfm")
+ if (!is.null(values$cols_ft))
+ regr_choices <- c(regr_choices, "Word embeddings (FastText)" = "ft")
+ if (!is.null(values$cols_sb))
+ regr_choices <- c(regr_choices, "Sentence embeddings (SBERT)" = "sb")
+ regr_choices <- c(regr_choices, "Extra predictors" = "extra")
+
+ if (length(values$cols_bertpred)) {
+ tmp_choices <- paste0("bertpred_", names(values$cols_bertpred))
+ names(tmp_choices) <- paste0("BERT_", names(values$cols_bertpred))
+ regr_choices <- c(regr_choices, tmp_choices)
+ }
+
+ # if (length(regr_choices) > 3) {
+ regr_choices_sel <-
+ regr_choices[regr_choices %in% c("regex", "ft", "sb")]
+ # } else
+ # regr_choices_sel <- c("regex", "dfm")
+
+ updateSelectizeInput(
+ session, "use_regressors", choices = regr_choices,
+ selected = regr_choices_sel)
+ values$new_use_regressors <- regr_choices_sel
+
+ othercols <- colnames(res)
+ othercols <- othercols[! othercols %in% c(
+ values$cols_tok, values$cols_ft, values$cols_sb, values$cols_bertpred,
+ values$tagcols, paste0("hist_", values$tagcols),
+ coco$commcol, coco$textcols)]
+ updateSelectInput(
+ session, "dlTagSelect",
+ choices = c("tags", "comments", "predictions", othercols),
+ selected = c("tags", "comments", "predictions"))
+
+ ## Populate extra regressors lists in prediction panel
+ updateSelectizeInput(
+ session, "use_ootregnum", choices = othercols, selected = NULL)
+ updateSelectizeInput(
+ session, "use_ootregcat", choices = othercols, selected = NULL)
+
+ ## Project path
+ values$projectdir <- paste0(projpath, "/")
+
+ ## Create tagmat: data.frame of all current taggings
+ values$tagmat <- res[, coco$tagcols, drop = FALSE]
+ ## Create tagmathist: data.frame of historical order of taggings
+ values$tagmathist <- res[, paste0("hist_", coco$tagcols), drop = FALSE]
+ colnames(values$tagmathist) <- coco$tagcols
+
+ ## Save data original colnames
+ values$data_orig_colnames <- readLines(file_orig_colnames)
+
+ ok.data(ok.data.async())
+ ok.data.async(NULL)
+ ok.data.running(FALSE)
+ set_status(data_status, "Exit data async")
+ system(paste0("rm ", data_status, "*"))
+ values$modelDiagno <- "No model"
+
+
+ if(trainTrigger() == 0) trainTrigger(1) ## For first run. TODO set trainTrigger to 0 when changing data?
+ if (verbose) cat("DEBUG Exit data async finally\n")
+ })
+ NULL
+ })
+
+ ## Read data import status from file
+ observe({
+ if (ok.data.running()) {
+ invalidateLater(100)
+ tmpfile <- paste0("tigger_", session$token, "_data")
+ if (file.exists(tmpfile)) {
+ values$modelDiagno <- paste(collapse = "\n", scan(
+ tmpfile, what = "character", sep="\n", quiet = TRUE))
+ } else
+ values$modelDiagno <- ""
+ }
+ })
+
+
+ ###############
+ ## ok.text() reactive
+ ###############
+ ok.text <- eventReactive(ok.data(), {
+ if (is.null(ok.data())) return(NULL)
+ res <- ok.data()[, values$cols_tok[1]]
+ if (length(values$cols_tok) > 1) {
+ for (icol in values$cols_tok[-1]) {
+ res <- paste0(res, "\n\n", ok.data()[, icol])
+ }
+ }
+ # queryTrigger(queryTrigger() + 1)
+ if (verbose)
+ cat("DEBUG ok.text", length(res), "\n")
+ res
+ })
+
+ ok.labelcol <- reactive({
+ if (is.null(ok.data())) return(NULL)
+ if (verbose)
+ # cat("DEBUG enter ok.labelcol\n")
+ if (is.null(input$selectScheme)) return(values$conf$tagcols[1])
+ input$selectScheme
+ })
+
+
+ ok.nicetext <- reactive({
+ req(ok.data(), values$conf$textcols)
+ # if (is.null(ok.data())) return(NULL)
+ do.call(paste, c(ok.data()[, values$conf$textcols, drop = FALSE],
+ sep = "\n\n"))
+ })
+
+ ok.regressor.names <- reactive({
+ daregs <- values$new_use_regressors
+ if (verbose)
+ cat("DEBUG enter ok.regressor.names ", paste(daregs, collapse = " "), "\n")
+ res <- NULL
+ if ("ft" %in% daregs)
+ res <- c(res, values$cols_ft)
+ if ("sb" %in% daregs)
+ res <- c(res, values$cols_sb)
+ if (length(values$cols_bertpred)) {
+ for (ipred in 1:length(values$cols_bertpred)) {
+ if (paste0("bertpred_", names(values$cols_bertpred)[ipred]) %in% daregs)
+ res <- c(res, values$cols_bertpred[[ipred]])
+ }
+ }
+ if (verbose)
+ cat("DEBUG exit ok.regressor.names:", length(res), "\n")
+ res
+ })
+
+ ok.contextCol <- reactive({
+ if (verbose)
+ cat("DEBUG enter ok.contextCol\n")
+ # res <- isolate(values$conf$contextcols)
+ res <- values$conf$contextcols
+ names(res) <- res
+ res
+ })
+
+ ## Initiate label and queries history, on ok.text() or labelcol change,
+ observeEvent(list(ok.text(), ok.labelcol()), {
+ if (verbose)
+ cat("DEBUG initiate label\n")
+ if (is.null(ok.labelcol())) {
+ values$label <- NULL
+ values$labelhist <- NULL
+ values$queries <- NULL
+ values$comment <- NULL
+ values$wasTagged <- NULL
+ values$wasRetagged <- NULL
+ } else {
+ values$label <- values$tagmat[, ok.labelcol()]
+ values$labelhist <- values$tagmathist[, ok.labelcol()]
+ values$queries <- which(!is.na(values$label))
+ values$comment <- ok.data()[[values$conf$commcol]]
+ values$wasTagged <- !is.na(values$label)
+ values$wasRetagged <- rep(FALSE, length(values$label))
+ }
+ })
+
+ ## On values$label change, values$tagmat is updated for separate history
+ observeEvent(values$label, {
+ if (is.null(values$label)) return(NULL)
+ if (verbose)
+ cat("DEBUG Update tagmat from label\n")
+ values$tagmat[, ok.labelcol()] <- values$label
+ values$tagmathist[, ok.labelcol()] <- values$labelhist
+ })
+
+
+ ##############################################################################
+ ## Reactives : training
+ ##############################################################################
+
+ ## Message of how many tagged and untagged, for model diagnostics box
+ ok.msgtagged <- reactive({
+ if (verbose)
+ cat("DEBUG enter ok.msgtagged\n")
+ # if (is.null(ok.data()) | ok.data.running()) {
+ if (is.null(ok.data())) {
+ return("No data yet ")
+ }
+
+ tmp <- ""
+ # if (sum(is.na(values$label)) == 0)
+ # tmp <- paste0(tmp, "Data correction mode (0 NA labels left).\n")
+ paste0(tmp, "Tagged: ", sum(!is.na(values$label)),
+ " ; Untagged: ", sum(is.na(values$label)))
+ })
+
+ ## Training possible? At least 3 texts for 2 tags
+ ok.train.possible <- reactive({
+ if (verbose)
+ cat("DEBUG enter ok.train.possible\n")
+ labtab <- table(values$label)
+ if (length(labtab) < 2) return(FALSE)
+ if (sum(labtab > 2) < 2) return(FALSE)
+ TRUE
+ })
+
+ ## Compute DFM (async)
+ ## TODO make this less reactive
+ observe({
+ # if ("dfm" %in% input$use_regressors & !values$dfmGo)
+ if ("dfm" %in% values$new_use_regressors & !values$dfmGo)
+ values$dfmGo <- TRUE
+ })
+ ok.dfm <- reactiveVal()
+ observe({
+ if (is.null(ok.text())) return(NULL)
+ if (! values$dfmGo) return(NULL)
+ if (!is.numeric(input$dfmNgrams)) return(NULL)
+ if (verbose)
+ cat("DEBUG enter dfm observer\n")
+
+ datext <- ok.text()
+ dfmNgrams <- input$dfmNgrams
+ dfmTfIdf <- input$dfmTfIdf
+ dfmMinDocfreq <- input$dfmMinDocfreq
+ dfmMinTermfreq <- input$dfmMinTermfreq
+ dfmTfScheme <- input$dfmTfScheme
+ dfmDfScheme <- input$dfmDfScheme
+
+ dfm_async <- future(seed = TRUE, {
+ dadfm <- datext %>%
+ tolower() %>% tokens(remove_punct = F) %>%
+ # tokens_select(pattern = stopwords(language= "fr"), selection = "remove") %>%
+ tokens_ngrams(n= 1:dfmNgrams) %>% dfm() %>%
+ dfm_trim(min_docfreq = dfmMinDocfreq, min_termfreq = dfmMinTermfreq,
+ verbose = TRUE)
+ if (dfmTfIdf) {
+ dadfm <- dadfm %>%
+ dfm_tfidf(scheme_tf = dfmTfScheme, scheme_df = dfmDfScheme)
+ }
+ suppressWarnings({ # TODO : check
+ dadfm <- quanteda::convert(dadfm, to= "tm")
+ })
+ dadfm <- Matrix::sparseMatrix(i= dadfm$i, j= dadfm$j, x= dadfm$v,
+ dimnames = dadfm$dimnames)
+ dadfm
+ }) %...>% ok.dfm()
+ dfm_async <- catch(dfm_async, function(error) {
+ values$modelDiagno <- paste0("Error in DFM: ", error)
+ })
+ })
+
+ ## Predictor : df or matrix of dfm and extra
+ ok.regressor.matrix <- reactive({
+ if (is.null(ok.text())) return(NULL)
+ if (is.null(ok.regressor.names())) return(NULL)
+ if (verbose)
+ cat("DEBUG enter ok.regressor.matrix\n")
+ model.matrix(~., data= ok.data()[, ok.regressor.names()])[, -1]
+ })
+
+ ok.extraregressor.matrix <- reactive({
+ req(ok.text())
+ if (! "extra" %in% values$new_use_regressors) return(NULL)
+ if (verbose)
+ cat("DEBUG enter ok.extraregressor.matrix\n")
+
+ res <- NULL
+ for (inum in input$use_ootregnum) {
+ suppressWarnings({tmp <- as.numeric(ok.data()[[inum]])})
+ if (all(is.na(tmp))) {
+ showNotification(paste0(inum, " is all NAs or non-numeric ; predictor not used"),
+ duration = 5, type = "warning")
+ next
+ }
+ tmp[is.na(tmp)] <- median(tmp, na.rm = TRUE)
+ res <- cbind(res, tmp)
+ }
+ for (icat in input$use_ootregcat) {
+ tmp <- ok.data()[[icat]]
+ if (! length(unique(tmp)) %in% 2:100) {
+ showNotification(paste0(icat, " has ", length(unique(tmp)),
+ " unique values ; predictor not used"),
+ duration = 5, type = "warning")
+ next
+ }
+ tmp <- as.character(tmp)
+ tmp[is.na(tmp)] <- ""
+ tmp <- as.factor(tmp)
+ tmpmat <- model.matrix(~., data.frame(cat = tmp))[, -1, drop = FALSE]
+ res <- cbind(res, tmpmat)
+ }
+ if (verbose)
+ cat("DEBUG exit ok.extraregressor.matrix", ncol(res), "\n")
+ res
+ })
+
+ ok.extraRegex.matrix <- reactive({
+ if (is.null(ok.nicetext())) return(NULL)
+ if (is.null(values$extraRegex)) return(NULL)
+ if (is.null(values$regex_inuse)) return(NULL)
+ if (!any(values$regex_inuse)) return(NULL)
+ if (is.null(values$regex_case)) return(NULL)
+ if (verbose)
+ cat("DEBUG enter ok.extraRegex.matrix\n")
+ tmp <- as.matrix(sapply(which(values$regex_inuse), function(iregex) {
+ 1 * stringi::stri_detect_regex(
+ ok.nicetext(), values$extraRegex[iregex],
+ opts_regex = list(case_insensitive = !values$regex_case[iregex]))
+ }))
+ colnames(tmp) <- values$extraRegex[values$regex_inuse]
+ tmp
+ })
+
+ ok.predictor <- reactive({
+ req(values$new_use_regressors)
+ if (ok.data.running()) return(NULL)
+ if (is.null(ok.text())) return(NULL)
+ if (verbose)
+ cat("DEBUG enter ok.predictor\n")
+ dadfm <- NULL
+ if ("dfm" %in% values$new_use_regressors) {
+ dadfm <- ok.dfm()
+ } else {
+ if (input$predModel == "naive bayes") {
+ values$modelDiagno <-
+ "Error in training: Naive Bayes requires DFM word counts as predictors."
+ return(NULL)
+ }
+ }
+ if (length(ok.regressor.names()) > 0) {
+ if (input$predModel == "random forest") {
+ if (is.null(dadfm)) {
+ dadfm <- ok.data()[, ok.regressor.names()]
+ } else
+ dadfm <- cbind(ok.data()[, ok.regressor.names()], as.matrix(dadfm))
+ }
+ if (input$predModel %in% c("lasso", "linear", "knn")) {
+ dadfm <- cbind(ok.regressor.matrix(), dadfm)
+ }
+ }
+ # if ("regex" %in% input$use_regressors && length(values$regex_inuse) > 0) {
+ if ("regex" %in% values$new_use_regressors & !is.null(ok.extraRegex.matrix())) {
+ dadfm <- cbind(dadfm, ok.extraRegex.matrix())
+ }
+ if ("extra" %in% values$new_use_regressors) {
+ dadfm <- cbind(dadfm, ok.extraregressor.matrix())
+ }
+
+ if (is.null(dadfm)) {
+ values$modelDiagno <- "Error in training: no predictors selected"
+ if (verbose)
+ cat("DEBUG ok.predictor exit: no predictors\n")
+ return(NULL)
+ }
+
+ if (input$predModel == "linear")
+ dadfm <- SparseM::as.matrix.csr(dadfm, nrow(dadfm), ncol(dadfm))
+ if (input$predModel == "naive bayes")
+ dadfm <- as.dfm(dadfm)
+ if (verbose)
+ cat("DEBUG exit ok.predictor: ", ncol(dadfm), "\n")
+ dadfm
+ })
+
+
+ ## Train model async
+ ok.train <- reactiveVal()
+ ok.train.async <- reactiveVal()
+ observeEvent(
+ list(trainTrigger(), input$modelTrain),
+ {
+ if (is.null(ok.predictor())) return(NULL)
+ if (verbose)
+ cat("DEBUG enter ok.train observer\n")
+ if (!ok.train.possible()) {
+ values$modelDiagno <-
+ "Tag at least 3 texts for 2 tags to train prediction model."
+ ok.train(NULL)
+ # queryTrigger(queryTrigger() + 1)
+ return(NULL)
+ }
+ ready2tag(0)
+ values$modelDiagno <- paste0("Training ", input$predModel)
+
+ tmp_model <- input$predModel
+ if (tmp_model == "random forest") {
+ if (!is.numeric(input$rfNumTrees) | !is.numeric(input$rfMtry) |
+ !is.numeric(input$rfSampleFrac)) {
+ ok.train(NULL)
+ return(NULL)
+ }
+ train_fun <- ranger::ranger
+ train_args <- list(
+ x= ok.predictor()[!is.na(values$label), ],
+ y= as.factor(values$label[!is.na(values$label)]),
+ num.trees = input$rfNumTrees,
+ mtry = if (input$rfMtry > 0) input$rfMtry,
+ sample.fraction = input$rfSampleFrac, probability = TRUE)
+ } else {
+ if (tmp_model == "linear") {
+ tmpvalues <- values$label
+ if (!is.numeric(input$liblinCost)) {
+ ok.train(NULL)
+ return(NULL)
+ }
+ for (ival in na.omit(unique(tmpvalues)))
+ if (sum(tmpvalues == ival, na.rm = T) == 1)
+ tmpvalues[which(tmpvalues == ival)] <- NA
+ train_fun <- LiblineaR::LiblineaR
+ train_args <- list(
+ data= ok.predictor()[!is.na(tmpvalues), ],
+ target= as.factor(tmpvalues[!is.na(tmpvalues)]),
+ type= 0, cost = input$liblinCost)
+ } else if (tmp_model == "lasso") {
+ tmpvalues <- values$label
+ for (ival in na.omit(unique(tmpvalues)))
+ if (sum(tmpvalues == ival, na.rm = T) == 1)
+ tmpvalues[which(tmpvalues == ival)] <- NA
+ train_fun <- glmnet::glmnet
+ train_args <- list(
+ x= ok.predictor()[!is.na(tmpvalues), ],
+ y= as.factor(tmpvalues[!is.na(tmpvalues)]),
+ family= "multinomial")
+ } else if (tmp_model == "naive bayes") {
+ if (!is.numeric(input$naiveSmooth)) {
+ ok.train(NULL)
+ return(NULL)
+ }
+ tmpvalues <- values$label
+ for (ival in na.omit(unique(tmpvalues)))
+ if (sum(tmpvalues == ival, na.rm = T) == 1)
+ tmpvalues[which(tmpvalues == ival)] <- NA
+ train_fun <- quanteda.textmodels::textmodel_nb
+ train_args <- list(
+ x= ok.predictor()[!is.na(tmpvalues), ],
+ y= as.factor(tmpvalues[!is.na(tmpvalues)]),
+ smooth = input$naiveSmooth, prior = input$naivePrior,
+ distribution = input$naiveDistri)
+ } else if (tmp_model == "knn") {
+ train_fun <- function(x) return(TRUE)
+ train_args <- list(x = "knn")
+ }
+ }
+
+ train_async <- future(seed = TRUE, {
+ suppressWarnings(do.call(train_fun, train_args))
+ }) %...>% ok.train.async()
+ train_async <- catch(train_async, function(error) {
+ values$modelDiagno <- paste0("Model training error: ", error)
+ ok.train(NULL)
+ ok.train.async(NULL)
+ })
+ train_async <- finally(train_async, function() {
+ values$modelDiagno <- "Training done, predicting..."
+ ok.train(ok.train.async())
+ ok.train.async(NULL)
+ predTrigger(predTrigger() + 1)
+ })
+ NULL
+ }
+ )
+
+ ## Model prediction on full dataset (async)
+ ok.pred <- reactiveVal()
+ ok.pred.async <- reactiveVal()
+ # observeEvent(list(ok.train(), input$glmLambda, input$knnK, values$retagged), {
+ observeEvent(predTrigger(), {
+ if (is.null(ok.train())) {
+ ok.pred(NULL)
+ return(NULL)
+ }
+ if (verbose)
+ cat("DEBUG ok.pred()\n")
+
+ damodel <- input$predModel
+ if (input$predModel == "random forest") {
+ pred_args <- list(ok.train(), data = ok.predictor())
+ dacolnames <- colnames(ok.train()$predictions)
+ } else if (input$predModel == "lasso") {
+ if (!is.numeric(input$glmLambda)) {
+ ok.pred(NULL)
+ return(NULL)
+ }
+ pred_args <- list(ok.train(), newx = ok.predictor(), s = input$glmLambda,
+ type = "response")
+ } else if (input$predModel == "linear") {
+ pred_args <- list(ok.train(), newx = ok.predictor(), proba = TRUE)
+ } else if (input$predModel == "naive bayes") {
+ pred_args <- list(ok.train(), newdata = ok.predictor(),
+ type = "probability")
+ } else if (input$predModel == "knn") {
+ if (!is.numeric(input$knnK)) {
+ ok.pred(NULL)
+ return(NULL)
+ }
+ pred_args <- list(train = ok.predictor()[!is.na(values$label), ],
+ test = ok.predictor(),
+ cl = values$label[!is.na(values$label)],
+ k = input$knnK, prob = TRUE)
+ }
+
+ pred_async <- future(seed = TRUE, {
+ if (damodel == "random forest") {
+ res <- do.call(predict, pred_args)$predictions
+ colnames(res) <- dacolnames
+ res
+ } else if (damodel == "lasso") {
+ do.call(predict, pred_args)[, , 1]
+ } else if (damodel == "linear") {
+ do.call(predict, pred_args)$probabilities
+ } else if (damodel == "naive bayes") {
+ do.call(predict, pred_args)
+ } else if (damodel == "knn") {
+ tmp <- do.call(class::knn, pred_args)
+ ## A bit flaky here: probs of non-best classes uniformly share the rest,
+ ## and distance is euc
+ tmpvalues <- sort(unique(pred_args$cl))
+ res <- matrix(0, nrow(pred_args$test), length(tmpvalues),
+ dimnames = list(NULL, tmpvalues))
+ res[cbind(1:nrow(res), tmp)] <- attr(tmp, "prob") -
+ ((1 - attr(tmp, "prob")) / (ncol(res) - 1))
+ res <- res + (1 - attr(tmp, "prob")) / (ncol(res) - 1)
+ res
+ }
+ }) %...>% ok.pred.async()
+ pred_async <- catch(pred_async, function(error) {
+ values$modelDiagno <- paste0("Error in prediction: ", error)
+ })
+ pred_async <- finally(pred_async, function() {
+ if (verbose)
+ cat("DEBUG ok.pred() out of async\n")
+ ok.pred(ok.pred.async())
+ ok.pred.async(NULL)
+ # predlabTrigger(predlabTrigger() + 1)
+ diagTrigger(diagTrigger() + 1)
+ queryNext(queryNext() + 1)
+ })
+ })
+
+ ## Best predicted label for full dataset
+ # ok.predlab1 <- eventReactive(predlabTrigger(), {
+ # if (is.null(ok.pred())) return(NULL)
+ # if (verbose)
+ # cat("DEBUG ok.predlab1\n")
+ # # queryTrigger(queryTrigger() + 1)
+ # colnames(ok.pred())[max.col(ok.pred())]
+ # })
+ ok.predlab1 <- reactive({
+ if (is.null(ok.pred())) return(NULL)
+ if (verbose)
+ cat("DEBUG ok.predlab1\n")
+ colnames(ok.pred())[max.col(ok.pred())]
+ })
+
+ ## On scheme change, forget existing model, queue, query and hist
+ ## load scheme description and predictor-regex
+ observeEvent(ok.labelcol(), {
+ if (verbose)
+ cat("DEBUG event selectScheme", ok.labelcol(), "\n")
+ ok.pred(NULL)
+ ok.train(NULL)
+ values$queryqueue <- NULL
+ values$modelDiagno <- "No model"
+ values$histSaveStack <- NULL
+
+ ## Load description file
+ descr <- ""
+ if (file.exists(paste0(values$conf$datadir, values$conf$projectid,
+ "/", input$selectScheme, ".txt"))) {
+ descr <- paste(collapse = "\n", readLines(paste0(
+ values$conf$datadir, values$conf$projectid,
+ "/", input$selectScheme, ".txt")))
+ }
+ updateTextAreaInput(session, "schemeDescr", value = descr)
+
+ ## Load regex from json
+ regexfile <- paste0(values$projectdir, input$selectScheme, "_regex.json")
+ if (file.exists(regexfile)) {
+ if (verbose)
+ cat("DEBUG enter regex.json\n")
+ tmp <- RJSONIO::fromJSON(paste(collapse = "\n", readLines(regexfile)))
+ if (is.null(tmp$regex)) return(NULL)
+ values$extraRegex <- tmp$regex
+ names(values$extraRegex) <- sapply(tmp$regex, label2hash)
+ for (iregex in 1:length(tmp$regex)) {
+ values[[paste0("useregex_", names(values$extraRegex)[iregex])]] <-
+ tmp$use[iregex]
+ values[[paste0("caseregex_", names(values$extraRegex)[iregex])]] <-
+ tmp$case[iregex]
+ }
+ } else
+ values$extraRegex <- NULL
+
+ })
+
+
+
+ ## queryNext event: update values$newQuery ; called by queryNext(), or by
+ ## queryQueue or regex change
+ ## Requires :
+ ## - queryQueue (reactive: indices of current queue), and
+ ## - wasTagged (boolean vector indicating whether each element has been tagged this session, used in "on untagged")
+ ## - wasRetagged (boolean vector indicating whether each element has been tagged this session, used in "on tagged")
+ ## TODO: make a scheme-list of wasTagged, now it will be wiped at each scheme change?
+ observeEvent(
+ list(queryNext(), values$queryQueue,
+ input$sampleChoice, input$taggedWhich, input$maxprobWhich,
+ input$regexFilter, input$regexCaseSens,
+ input$visuLock, values$visuZoom$xlim, values$visuZoom$ylim),
+ {
+ if (is.null(values$queryQueue)) return(NULL)
+ if (verbose)
+ cat("DEBUG queryNext\n")
+
+ queue <- values$queryQueue
+
+ if (input$sampleChoice == "untagged") {
+ queue <- queue[!values$wasTagged[queue]]
+ } else if (input$sampleChoice == "tagged") {
+ queue <- queue[!is.na(values$label)[queue] & !values$wasRetagged[queue]]
+ if (input$taggedWhich != "all") {
+ queue <- queue[values$label[queue] == input$taggedWhich]
+ }
+ } else if (input$sampleChoice == "all") {
+ queue <- queue[!values$wasRetagged[queue]]
+ }
+
+ if (input$visuLock) if (!is.null(values$visuZoom$xlim)) {
+ if (!is.null(ok.visu()))
+ queue <- queue[ok.visu()[queue, 1] >= values$visuZoom$xlim[1] &
+ ok.visu()[queue, 1] <= values$visuZoom$xlim[2] &
+ ok.visu()[queue, 2] >= values$visuZoom$ylim[1] &
+ ok.visu()[queue, 2] <= values$visuZoom$ylim[2]]
+ }
+
+ if (length(queue)) if (nchar(input$regexFilter)) {
+ try_regex <- try(silent = TRUE, {
+ queue <- queue[
+ stringi::stri_detect_regex(
+ ok.nicetext()[queue], input$regexFilter,
+ opts_regex = list(case_insensitive = !input$regexCaseSens))]
+ })
+ if (inherits(try_regex, "try-error")) {
+ showNotification("Invalid regex", duration = 2, type = "error")
+ values$newQuery <- NULL
+ return(NULL)
+ }
+ }
+
+ if (!length(queue)) {
+ if (nchar(input$regexFilter)) {
+ showNotification("No texts matching the regex",
+ duration = 2, type = "warning")
+ } else
+ showNotification("No more texts to be tagged in this set",
+ duration = 2, type = "warning")
+ values$newQuery <- NULL
+ return(NULL)
+ }
+
+ values$lastQuery <- queue[1] # ??
+ values$newQuery <- queue[1]
+ ready2tag(1)
+ }
+ )
+
+ ## queryQueue event: fix values$queryQueue, (indices of current queue)
+ observeEvent(
+ list(ok.data(), ok.pred(), input$strategy, input$maxprobWhich,
+ input$selectProject, input$selectScheme), {
+ if (input$strategy %in% c("entropy", "maxprob") & is.null(ok.pred())) {
+ values$queryQueue <- NULL
+ values$lastQuery <- NULL
+ values$newQuery <- NULL
+ return(NULL)
+ }
+ if (is.null(ok.text())) return(NULL)
+ if (verbose)
+ cat("DEBUG queryQueue change\n")
+
+ if (input$strategy == "sequential") {
+ values$queryQueue <- 1:length(ok.text())
+ } else if (input$strategy == "random") {
+ values$queryQueue <- sample(length(ok.text()))
+ } else {
+ dapred <- ok.pred()
+ if (input$strategy == "entropy") {
+ tominimize <- rowSums(dapred * log(dapred))
+ if (any(dapred == 0)) {
+ tominimize[dapred == 0] <- 0
+ }
+ }
+ if (input$strategy == "maxprob") {
+ if (! input$maxprobWhich %in% colnames(dapred)) return(NULL)
+ tominimize <- -dapred[, input$maxprobWhich];
+ }
+ values$queryQueue <- order(tominimize)
+ }
+ }
+ )
+
+
+ ## Update uniqueLabels and uniqueLabelsAll if necessary on values$label change
+ observeEvent(values$label, {
+ if (is.null(values$label)) return(NULL)
+ if (verbose)
+ cat("DEBUG uniqueLabels\n")
+ dalabs <- sort(na.omit(unique(values$label)))
+ # if (!length(dalabs)) return(NULL)
+ if (!identical(dalabs, values$uniqueLabels))
+ values$uniqueLabels <- dalabs
+ if (any(! dalabs %in% values$uniqueLabelsAll))
+ values$uniqueLabelsAll <- c(values$uniqueLabelsAll,
+ dalabs[! dalabs %in% values$uniqueLabelsAll])
+ })
+
+
+
+ ##############################################################################
+ ## Project management
+ ##############################################################################
+
+ output$cpDataMessage <- renderUI({
+ req(values$cpDataMessage)
+ p(values$cpDataMessage)
+ })
+
+ output$cpConfirmMessage <- renderUI({
+ req(values$cpConfirmMessage)
+ p(strong(paste0("Error: ", values$cpConfirmMessage)))
+ })
+
+ output$cpIdcolUI <- renderUI({
+ req(values$cpNewData)
+ if (!is.data.frame(values$cpNewData)) return(NULL)
+ selectInput("cpIdcol", NULL, colnames(values$cpNewData))
+ })
+
+ output$cpTextcolUI <- renderUI({
+ req(values$cpNewData)
+ if (!is.data.frame(values$cpNewData)) return(NULL)
+ selectInput("cpTextcol", NULL, colnames(values$cpNewData), multiple = TRUE)
+ })
+
+ output$cpContextcolUI <- renderUI({
+ req(values$cpNewData)
+ if (!is.data.frame(values$cpNewData)) return(NULL)
+ selectInput("cpContextcol", NULL, colnames(values$cpNewData), multiple = TRUE)
+ })
+
+ output$cpTagcolUI <- renderUI({
+ req(values$cpNewData)
+ if (!is.data.frame(values$cpNewData)) return(NULL)
+ selectInput("cpTagcol", NULL, colnames(values$cpNewData), multiple = TRUE)
+ })
+
+ output$cpCommcolUI <- renderUI({
+ req(values$cpNewData)
+ if (!is.data.frame(values$cpNewData)) return(NULL)
+ selectInput("cpCommcol", NULL, c("none", colnames(values$cpNewData)))
+ })
+
+ output$cp_spacyUI <- renderUI(
+ textInput("cp_use_spacy_model", NULL,
+ modelnames$spacy_name[modelnames$short == input$cp_lang],
+ placeholder = "(spacy model name)")
+ )
+
+ output$cp_spacyDlUI <- renderUI(HTML(paste(
+ "Model must be downloaded once, from python:
",
+ input$cp_which_python, "-m spacy download",
+ modelnames$spacy_name[modelnames$short == input$cp_lang], "
"
+ )))
+
+ output$cp_ftUI <- renderUI(
+ textInput("cp_use_ft_model", NULL,
+ modelnames$fasttext_name[modelnames$short == input$cp_lang],
+ placeholder = "(fasttext model path)")
+ )
+
+ output$cp_ftDlUI <- renderUI(list(HTML(paste(
+ "Model can be downloaded here")),
+ br(), br()))
+
+ output$cp_sbUI <- renderUI(
+ textInput("cp_use_sb_model", NULL, ifelse(
+ input$cp_lang %in% c("ar", "zh", "nl", "en", "fr", "de",
+ "it", "ko", "pl", "pt", "ru", "es", "tr"),
+ "distiluse-base-multilingual-cased-v1",
+ "distiluse-base-multilingual-cased-v2"),
+ placeholder = "(custom sentence_transformers model)")
+ )
+
+
+
+ ## Create new project, with modal dialog
+ observeEvent(input$createProject, {
+ if (verbose)
+ cat("DEBUG create project\n")
+ showModal(modalDialog(
+ title = "New project",
+
+ p("Create a new text tagging project."),
+ p("This mainly involves choosing a dataset, and how to process it."),
+ p("Items marked by a star ", strong("*"),
+ " cannot be changed once the project is created."),
+
+ hr(),
+ h4("Files and folders"),
+
+ fluidRow(
+ column(3, p(strong("*Project name"))),
+ column(5, textInput("cpName", NULL, placeholder = "(Required)")),
+ column(4, p("Tags will be stored in a new directory by this name."))
+ ),
+ fluidRow(
+ column(3, p(strong("*Data directory"))),
+ column(5, textInput("cpDatadir", NULL, placeholder = "(Required)")),
+ column(4, p("Place (on the server) where the data and project are stored"))
+ ),
+ fluidRow(
+ column(3, p(strong("*Data filename"))),
+ column(5, checkboxInput("cpDoImport", "New data", TRUE)),
+ column(4, p("Main file, containing id and text columns"))
+ ),
+
+ conditionalPanel("!input.cpDoImport", list(
+ ## Existing data
+ fluidRow(
+ column(3, p(strong("Existing file"))),
+ column(5, textInput("cpDatafile", NULL, placeholder = "(Required)")),
+ column(4, actionButton("cpDatafileGo", "Import"))
+ )
+ )),
+ conditionalPanel("input.cpDoImport", {
+ ## Import data
+ wellPanel(
+ selectInput('cpNDfiletype', NULL,
+ choices = c("Text file (.csv, .txt)" = "csv_txt",
+ "Feather" = "feather",
+ "Parquet" = "parquet",
+ "OpenDocument (.ods)" = "ods",
+ "Microsoft Excel (.xls, .xlsx)" = "excel",
+ "SPSS (.sav, .por)" = "spss",
+ "SAS (.sas7bdat)" = "sas_data",
+ "Stata v5-v12 (.dta)" = "stata"),
+ selected = "csv_txt"
+ ),
+ fileInput('cpNDfile', 'Choose File'),
+
+ checkboxInput("cpNDparams", "File parameters"),
+
+ conditionalPanel(
+ "input.cpNDparams",
+ conditionalPanel(
+ "input.cpNDfiletype == 'csv_txt'",
+
+ fluidRow(column(4, p('Header')),
+ column(8, selectInput('cpCSVheader', NULL,
+ c("Header" = TRUE,
+ "No Header" = FALSE),
+ "Auto"))),
+ fluidRow(column(4, p('Separator')),
+ column(8, selectInput('cpCSVsep', NULL,
+ c("Comma ','" = ",",
+ "Semicolon ';'" = ";",
+ "Tab" = "\t",
+ "Space" = " "),
+ "Auto"))),
+ fluidRow(column(4, p('Quote')),
+ column(8, selectInput('cpCSVquote', NULL,
+ c("Double Quote \"" = "\"",
+ "Single Quote '" = "'",
+ "None" = ""),
+ "Double Quote \""))),
+ fluidRow(column(4, p('Decimal mark')),
+ column(8, selectInput('cpCSVdec', NULL,
+ c('Period "."' = ".",
+ 'Comma ","' = ","),
+ 'Period "."'))),
+ fluidRow(column(4, p('File Encoding')),
+ column(8, selectInput('cpCSVencoding', NULL,
+ c("unknown", "UTF-8", "Latin-1"),
+ "unknown")))),
+
+ ## Options for ods files
+ conditionalPanel(
+ "input.cpNDfiletype == 'ods'",
+ fluidRow(column(4, p('Sheet')),
+ column(8, numericInput('cpODSsheet', NULL, 1, 1,
+ step = 1))),
+ checkboxInput("cpODScolnames", "Column names", TRUE),
+ fluidRow(column(4, p('NA symbol')),
+ column(8, textInput('cpODSna', NULL, value = ""))),
+ fluidRow(column(4, p('Skip rows')),
+ column(8, numericInput('cpODSskip', NULL, 0, 0,
+ step = 1))),
+ fluidRow(column(4, p('Specify range')),
+ column(8, textInput('cpODSrange', NULL, value = "")))
+ ),
+
+ ## Options for xls / xlsx files
+ conditionalPanel(
+ "input.cpNDfiletype == 'excel'",
+ fluidRow(column(4, p('Column names')),
+ column(8, selectInput('cpXLScolnames', NULL,
+ choices = c("Yes" = "TRUE",
+ "No" = "FALSE"),
+ selected = TRUE ))),
+ fluidRow(column(4, p('Skip rows')),
+ column(8, numericInput('cpXLSskip', NULL, 0, 0,
+ step = 1))),
+ fluidRow(column(4, p('Trim whitespaces')),
+ column(8, selectInput('cpXLStrim', NULL,
+ choices = c("Yes" = "TRUE",
+ "No" = "FALSE"),
+ selected = TRUE ))),
+ checkboxInput("cpXLSsheet",
+ "Specify Worksheet", FALSE),
+ conditionalPanel("input.cpXLSsheet",
+ textInput("cpXLSsheet_specs", NULL, "")),
+ checkboxInput("cpXLSrange", "Specify Range", FALSE),
+ conditionalPanel("input.cpXLSrange",
+ textInput("cpXLSrange_specs", NULL, "")
+ )),
+
+ ## Options for SPSS files
+ conditionalPanel(
+ "input.cpNDfiletype == 'spss'",
+ fluidRow(
+ column(4, p('Skip rows')),
+ column(8, numericInput('cpSPSSskip', NULL, 0, 0, step = 1))),
+ checkboxInput("cpSPSSna", "User-defined NA", FALSE)),
+
+ ## Options for SAS files
+ conditionalPanel(
+ "input.cpNDfiletype == 'sas_data'",
+ fluidRow(
+ column(4, p('Skip rows')),
+ column(8, numericInput("cpSASskip", NULL, 0, 0, step = 1))),
+ checkboxInput("cpSAScatalog", "Use catalog file", FALSE),
+ conditionalPanel(
+ "input.cpSAScatalog",
+ fileInput("cpSAScatalog_file", "Catalog file"))
+ ),
+
+ ## Options for STATA dta files
+ conditionalPanel(
+ "input.cpNDfiletype == 'stata'",
+ checkboxInput("cpSTATAfactors", "Convert factors", FALSE))
+ )
+ )
+ }),
+
+ uiOutput("cpDataMessage"),
+ hr(),
+ h4("Data"),
+
+ fluidRow(
+ column(6, numericInput(
+ "cpNrows", "N. rows for working sample", 500, 1, step = 1)),
+ column(6, numericInput(
+ "cpNskip", "Rows to skip before working sample", 0, 0, step = 1))
+ ),
+
+ fluidRow(
+ column(3, p(strong("*ID"))),
+ column(5, uiOutput("cpIdcolUI")),
+ column(4, p("Name of the id variable, unique identifier of each text"))
+ ),
+ fluidRow(
+ column(3, p(strong("*Text"))),
+ column(5, uiOutput("cpTextcolUI")),
+ column(4, p("Name of the text variables: if more than one, texts are concatenated in the specified order"))
+ ),
+ fluidRow(
+ column(3, p(strong("*Tags"))),
+ column(5, uiOutput("cpTagcolUI")),
+ column(4, p("(optional) Names of variables that are already tagged: each will create a new scheme"))
+ ),
+ fluidRow(
+ column(3, p(strong("Comments"))),
+ column(5, uiOutput("cpCommcolUI")),
+ column(4, p("(optional) Name of the comments variable"))
+ ),
+ fluidRow(
+ column(3, p(strong("Context"))),
+ column(5, uiOutput("cpContextcolUI")),
+ column(4, p("(optional) Names of variables not used in the models, but may be displayed during tagging"))
+ ),
+
+
+ hr(),
+ h4("System"),
+
+ fluidRow(
+ column(3, checkboxInput("cp_use_python", "Python backend", FALSE)),
+ column(5, conditionalPanel(
+ "input.cp_use_python",
+ textInput("cp_which_python", NULL, value = "python3",
+ placeholder = "(custom python path)"))),
+ column(4, conditionalPanel(
+ "input.cp_use_python",
+ p("This must be a working python3 environment, with the required modules installed (see documentation)")))
+ ),
+ conditionalPanel("input.cp_use_python", list(
+ fluidRow(
+ column(3, p("Language")),
+ column(5, selectInput("cp_lang", NULL, modelnames_labels, "en")),
+ column(4, p("Used to preset tokenization and embedding models"))
+ ),
+ fluidRow(
+ column(3, checkboxInput("cp_use_spacy", "SpaCy tokenization", FALSE)),
+ column(5, conditionalPanel("input.cp_use_spacy", uiOutput("cp_spacyUI"))),
+ column(4, p("Name of the spacy tokenizer model, used in DTM and word embeddings"))
+ ),
+ conditionalPanel("input.cp_use_spacy", fluidRow(
+ column(3),
+ column(9, uiOutput("cp_spacyDlUI")))
+ ),
+ fluidRow(
+ column(3, checkboxInput("cp_use_ft", "FastText word embeddings", FALSE)),
+ column(5, conditionalPanel("input.cp_use_ft", uiOutput("cp_ftUI"))),
+ column(4, p("Path to the local fasttext model binary"))
+ ),
+ conditionalPanel("input.cp_use_ft", fluidRow(
+ column(3),
+ column(9, uiOutput("cp_ftDlUI")))),
+ fluidRow(
+ column(3, checkboxInput("cp_use_sb", "SBERT sentence embeddings", FALSE)),
+ column(5, conditionalPanel("input.cp_use_sb", uiOutput("cp_sbUI"))),
+ column(4, p("(GPU recommended) Name or path of the sentence-transformers model"))
+ ),
+
+ conditionalPanel("input.cp_use_python", {
+ checkboxInput("cp_use_gpu", "GPU support (CUDA, for SBERT and BERT)", FALSE)
+ })
+ )),
+
+ uiOutput("cpConfirmMessage"),
+ footer = tagList(actionButton("cpConfirm", "Create"),
+ modalButton("Cancel"))))
+ })
+
+ observeEvent(input$cpDatafileGo, {
+ dataname <- input$cpDatafile
+ dataname <- gsub("[.]feather$", "", dataname)
+ tmpdat <- try(arrow::read_feather(
+ paste0(gsub("/+$", "/", paste0(input$cpDatadir, "/")),
+ dataname, ".feather")))
+ if (inherits(tmpdat, "try-error")) {
+ values$cpDataMessage <- paste0(
+ "Data import error: ", as.character(tmpdat))
+ values$cpNewData <- NULL
+ return(NULL)
+ }
+ if (!is.data.frame(tmpdat)) {
+ values$cpDataMessage <- "Data import error: object not a data.frame"
+ values$cpNewData <- NULL
+ return(NULL)
+ }
+ if (ncol(tmpdat) < 2) {
+ values$cpDataMessage <- "Data import error: only one column detected"
+ values$cpNewData <- NULL
+ return(NULL)
+ }
+
+ values$cpDataMessage <- paste0(
+ "Data import success: ", nrow(tmpdat), " rows, ", ncol(tmpdat), " cols.")
+ values$cpNewData <- tmpdat
+ values$cpNewDataname <- dataname
+
+ })
+
+ ## Data import mechanism
+ observeEvent(input$cpNDfile, {
+ req(input$cpNDfile)
+ if(input$cpNDfiletype == "csv_txt") {
+ res <- try(read.csv(
+ input$cpNDfile$datapath,
+ header = as.logical(input$cpCSVheader), sep = input$cpCSVsep,
+ quote = input$cpCSVquote, dec = input$cpCSVdec, stringsAsFactors = FALSE,
+ encoding = input$cpCSVencoding, check.names = TRUE))
+
+ } else if(input$cpNDfiletype == "feather") {
+ res <- try(arrow::read_feather(input$cpNDfile$datapath))
+
+ } else if(input$cpNDfiletype == "parquet") {
+ res <- try(arrow::read_parquet(input$cpNDfile$datapath))
+
+ } else if (input$cpNDfiletype == "ods") {
+ range <- input$cpODSrange
+ if (range == "") range <- NULL
+ res <- try(readODS::read_ods(
+ path = input$cpNDfile$datapath, sheet = input$cpODSsheet,
+ col_names = input$cpODScolnames, na = input$cpODSna,
+ skip = input$cpODSskip, range = input$cpODSrange))
+
+ } else if(input$cpNDfiletype == "excel"){
+ column_names <- as.logical(input$cpXLScolnames)
+ trim_spaces <- as.logical(input$cpXLStrim)
+ the.range <- NULL
+ if(input$cpXLSrange == TRUE & input$cpXLSrange_specs != "")
+ the.range <- input$cpXLSrange_specs
+ the.sheet <- NULL
+ if(input$cpXLSsheet == TRUE & input$cpXLSsheet_specs != "")
+ the.sheet <- input$cpXLSsheet_specs
+ res <- try(data.frame(readxl::read_excel(
+ dataFile$datapath, col_names = column_names, range = the.range,
+ sheet= the.sheet, trim_ws = trim_spaces, skip = rows_to_skip)))
+
+ } else if(input$cpNDfiletype == "spss"){
+ res <- try(data.frame(haven::read_spss(
+ file = input$cpNDfile$datapath,
+ skip= input$cpSPSSskip, user_na = input$cpSPSSna)))
+ if (!inherits(res, "try-error")) {
+ res <- data.frame(lapply(res, function(x) {
+ attr(x, "format.spss") <- NULL
+ if ("haven_labelled" %in% class(x))
+ x <- haven::as_factor(x, levels= "labels")
+ x
+ }))
+ }
+
+ } else if(input$cpNDfiletype == "sas_data") {
+ res <- try(data.frame(haven::read_sas(
+ data_file = input$cpNDfile$datapath,
+ catalog_file = input$cpSAScatalog_file$datapath,
+ skip = input$cpSASskip)))
+
+ } else if(input$cpNDfiletype == "stata") {
+ res <- try(data.frame(foreign::read.dta(
+ file = input$cpNDfile$datapath, convert.factors = input$cpSTATAfactors)))
+ }
+
+ if(inherits(res, "try-error")) {
+ values$cpDataMessage <- paste0("Data import error: ", res)
+ values$cpNewData <- NULL
+ return(NULL)
+ }
+ if (ncol(res) < 2) {
+ values$cpDataMessage <- "Data import error: only one column detected"
+ values$cpNewData <- NULL
+ return(NULL)
+ }
+ values$cpDataMessage <- paste0(
+ "Data import success: ", nrow(res), " rows, ", ncol(res), " columns.")
+ values$cpNewData <- res
+ })
+
+ ## New project confirm
+ observeEvent(input$cpConfirm, {
+ if (verbose)
+ cat("DEBUG cpConfirm\n")
+ newconf <- list()
+
+ tmpdir <- gsub("/+$", "/", paste0(input$cpDatadir, "/"))
+ if (!dir.exists(tmpdir))
+ dir.create(tmpdir)
+ newconf$datadir <- tmpdir
+
+ tmpname <- cleanFileName(input$cpName)
+ if (!nchar(tmpname)) {
+ values$cpConfirmMessage <- "Project name is required"
+ return(NULL)
+ }
+ if (dir.exists(paste0(newconf$datadir, tmpname))) {
+ values$cpConfirmMessage <- paste0(
+ "A project with the same name (", tmpname,
+ ") already exists in the data directory.")
+ return(NULL)
+ }
+ newconf$projectid <- tmpname
+
+ if (!is.data.frame(values$cpNewData)) {
+ values$cpConfirmMessage <- "Import data first"
+ return(NULL)
+ }
+
+ if (input$cpDoImport) {
+ newconf$dataname <- newconf$projectid
+ } else
+ newconf$dataname <- values$cpNewDataname
+
+ if (any(is.na(values$cpNewData[[input$cpIdcol]]))) {
+ values$cpConfirmMessage <- "ID variable contains missing values"
+ return(NULL)
+ }
+ if (any(duplicated(values$cpNewData[[input$cpIdcol]]))) {
+ values$cpConfirmMessage <- "ID variable contains duplicates"
+ return(NULL)
+ }
+ newconf$idcol <- input$cpIdcol
+
+ if (!length(input$cpTextcol)) {
+ values$cpConfirmMessage <- "Select at least one text variable"
+ return(NULL)
+ }
+ newconf$textcols <- input$cpTextcol
+
+ if (input$cpCommcol == "none") {
+ newconf$commcol <- paste0("comm_", newconf$projectid)
+ } else
+ newconf$commcol <- input$cpCommcol
+
+ if (!length(input$cpTagcol)) {
+ newconf$tagcols <- "scheme0"
+ } else
+ newconf$tagcols <- input$cpTagcol
+
+ newconf$contextcols <- input$cpContextcol
+
+ newconf$dataNrows <- input$cpNrows
+ newconf$dataSkipRows <- input$cpNskip
+
+ if (input$cp_use_python) {
+ pytest <- try(system(paste(input$cp_which_python, "--version"),
+ intern = TRUE))
+ if (inherits(pytest, "try-error")) {
+ values$cpConfirmMessage <- paste(
+ "Python path `", input$cp_which_python, "` not valid")
+ return(NULL)
+ }
+ }
+ newconf$use_python <- input$cp_use_python
+ newconf$python <- input$cp_which_python
+ newconf$use_gpu <- input$cp_use_gpu
+
+ if (input$cp_use_spacy) {
+ sptest <- system(paste0(
+ newconf$python, " -m spacy info ", input$cp_use_spacy_model), intern = TRUE)
+ if (length(sptest) == 0) {
+ values$cpConfirmMessage <- paste(
+ "Error loading spacy, check that it is installed in the specified python env")
+ return(NULL)
+ }
+ if (length(attr(sptest, "status"))) {
+ values$cpConfirmMessage <- paste(
+ "Error loading spacy model, check that it has been downloaded")
+ return(NULL)
+ }
+ }
+ newconf$use_spacy <- input$cp_use_spacy
+ newconf$use_spacy_model <- input$cp_use_spacy_model
+
+ if (input$cp_use_ft) if (!file.exists(input$cp_use_ft_model)) {
+ values$cpConfirmMessage <- paste(
+ "Error loading fasttext model, check the specified path")
+ return(NULL)
+ }
+ newconf$use_ft <- input$cp_use_ft
+ newconf$use_ft_model <- input$cp_use_ft_model
+
+ newconf$use_sb <- input$cp_use_sb
+ newconf$use_sb_model <- input$cp_use_sb_model
+
+ writeLines(RJSONIO::toJSON(newconf),
+ paste0("tigger_", newconf$projectid, ".json"))
+
+ values$confnames <- c(newconf$projectid, values$confnames)
+ values$conf <- newconf
+
+ ## On new data import, write a feather file with the name of the project
+ if (input$cpDoImport)
+ arrow::write_feather(values$cpNewData, paste0(
+ newconf$datadir, newconf$dataname, ".feather"
+ ))
+
+ removeModal()
+ })
+
+ ##############################################################################
+ ## Scheme management
+ ##############################################################################
+
+ ## Print active scheme in scheme management panel
+ output$printScheme <- renderUI(h4(input$selectScheme))
+
+ ## Create new scheme on request, with modal dialog
+ observeEvent(input$createScheme, {
+ if (verbose)
+ cat("DEBUG create scheme\n")
+ showModal(modalDialog(
+ title = "Create tagging scheme",
+ p(paste0("This will create a new scheme for this project")),
+
+ textInput("newScheme", label = NULL, placeholder = "Required: new scheme name"),
+ textAreaInput("newSchemeDescr", label = NULL,
+ placeholder = "Optional: scheme description"),
+ checkboxInput("schemeDuplicate", "Duplicate existing scheme:"),
+ conditionalPanel(
+ "input.schemeDuplicate",
+ selectInput("schemeDuplicateFrom", NULL, rev(values$conf$tagcols))),
+
+ footer = tagList(actionButton("createSchemeConfirm", "Create"),
+ modalButton("Cancel"))))
+ })
+
+ observeEvent(input$createSchemeConfirm, {
+ if (is.null(ok.data())) return(NULL)
+ if (verbose)
+ cat("DEBUG create scheme event\n")
+
+ ## Make sure new scheme name is a correct file name
+ if (!grepl("\\S", input$newScheme)) {
+ showNotification(type = "error",
+ "Cannot create scheme: name empty.")
+ return(NULL)
+ }
+
+ ## Check that new scheme name doesn't exist
+ ## Re-read tagcols from conf file, to prevent scheme disappearances
+ tmpname <- cleanFileName(input$newScheme)
+ tagpath <- paste0(values$projectdir, tmpname, ".csv")
+ confpath <- paste0("tigger_", values$conf$projectid, ".json")
+ if (verbose)
+ cat("DEBUG create scheme check json:", confpath,"\n")
+ tmp_tagcols <- RJSONIO::fromJSON(confpath)$tagcols
+ if (verbose)
+ cat("DEBUG create scheme json OK\n")
+ if (tmpname %in% tmp_tagcols | file.exists(tagpath)) {
+ showNotification(type = "error",
+ "Cannot create scheme: name already exists.")
+ return(NULL)
+ }
+
+ ## Create new scheme description
+ if (nchar(input$newSchemeDescr))
+ writeLines(input$newSchemeDescr,
+ paste0(values$projectdir, tmpname, ".txt"))
+
+ ## Update conf file and reactives
+ values$conf$tagcols <- c(tmp_tagcols, tmpname)
+ # values$confs[[values$conf$projectid]]$tagcols <- c(tmp_tagcols, tmpname)
+ writeLines(RJSONIO::toJSON(values$conf), confpath)
+
+ if (input$schemeDuplicate) {
+ tmpmat <- matrix(values$tagmat[, input$schemeDuplicateFrom])
+ tmpmathist <- matrix(values$tagmathist[, input$schemeDuplicateFrom])
+ } else {
+ tmpmat <- matrix(rep(NA, nrow(ok.data())))
+ tmpmathist <- matrix(rep(NA, nrow(ok.data())))
+ }
+ colnames(tmpmat) <- tmpname
+ colnames(tmpmathist) <- tmpname
+
+ values$tagmat <- cbind(values$tagmat, tmpmat)
+ values$tagmathist <- cbind(values$tagmat, tmpmathist)
+
+ ## Create new tagging file if duplicating
+ if (input$schemeDuplicate) {
+ export <- data.frame(ok.data()[, values$conf$idcol],
+ values$tagmat[, tmpname])
+ colnames(export) <- c(values$conf$idcol, tmpname)
+ export <- na.omit(export)
+ write.table(export, tagpath, sep = ",", qmethod = "double",
+ fileEncoding = "UTF-8", row.names = FALSE,
+ append = FALSE, col.names = TRUE)
+ }
+
+ cat("DEBUG create scheme exit\n")
+ removeModal()
+ })
+
+ ## Button action: delete scheme
+ observeEvent(input$schemeDelete, {
+ if (verbose)
+ cat("DEBUG delete scheme\n")
+ if (length(values$conf$tagcols) < 2) {
+ showNotification(type = "error",
+ "Cannot delete: at least one scheme must remain.")
+ return(NULL)
+ }
+ showModal(modalDialog(
+ title = "Delete tagging scheme",
+ paste0("Confirm delete '", input$selectScheme,
+ "': ", sum(!is.na(values$label)), " tags will be deleted",
+ ", along with scheme description."),
+ footer = tagList(actionButton("schemeDeleteConfirm", "Delete"),
+ modalButton("Cancel"))))
+ })
+
+ observeEvent(input$schemeDeleteConfirm, {
+ file.remove(paste0(values$projectdir, input$selectScheme, ".csv"))
+ file.remove(paste0(values$projectdir, input$selectScheme, ".txt"))
+ file.remove(paste0(values$projectdir, input$selectScheme, "_regex.json"))
+ values$conf$tagcols <-
+ values$conf$tagcols[values$conf$tagcols != input$selectScheme]
+ writeLines(RJSONIO::toJSON(values$conf),
+ paste0("tigger_", input$selectProject, ".json"))
+ removeModal()
+ })
+
+
+ observeEvent(input$schemeDescrSave, {
+ req(input$selectScheme)
+ writeLines(input$schemeDescr,
+ paste0(values$conf$datadir, values$conf$projectid,
+ "/", input$selectScheme, ".txt"))
+ })
+
+
+ ##############################################################################
+ ##############################################################################
+
+ ## Tag selection : populate
+ observe({
+ values$conf$tagcols
+ updateSelectInput(
+ # session, "selectScheme", choices = c(rev(values$conf$tagcols), ""))
+ session, "selectScheme", choices = rev(values$conf$tagcols))
+ })
+
+
+ ## Populate oracle buttons
+ output$oracleButtons <- renderUI({
+ if (is.null(ok.data())) return(NULL)
+ # if (is.null(values$label)) return(NULL)
+ if (is.null(values$uniqueLabels)) return(NULL)
+ if (verbose)
+ cat("DEBUG populate oracle buttons\n")
+
+ do.call(fluidRow, lapply(values$uniqueLabels, function(ilab)
+ actionButton(paste0("oracle_", label2hash(ilab)),
+ strong(ilab))))
+ })
+
+ ## Populate maxprob variable selector
+ observe({
+ updateSelectInput(session, "maxprobWhich", choices = values$uniqueLabels)
+ updateSelectInput(session, "taggedWhich",
+ choices = c("All" = "all", values$uniqueLabels))
+ })
+
+ ## Data panel message
+ observeEvent(ok.data(), {
+ if (verbose)
+ cat("DEBUG data panel message\n")
+ if (is.null(ok.data())) {
+ output$dataMessage <- renderUI(p("Importing data..."))
+ }
+ output$dataMessage <- NULL
+ })
+
+ ## On config change, update dataView and retag options
+ observeEvent(list(values$conf), {
+
+ if (is.null(values$conf)) return(NULL)
+ output$panelData <- renderUI(list(
+ DT::dataTableOutput("dataView")
+ ))
+
+ output$panelRetag <- renderUI(c(
+ list(
+ br(),
+ h4("Rename tags"),
+ fluidRow(
+ column(4, strong("Current")),
+ column(2, strong("Count")),
+ column(6, strong("New"))
+ ),
+ br()),
+ lapply(values$uniqueLabels, function(iLab) {
+ iHash <- label2hash(iLab)
+ fluidRow(
+ column(4, p(iLab)),
+ column(2, p(sum(values$label == iLab, na.rm = TRUE))),
+ column(4, textInput(
+ paste0("newtag_", iHash), NULL,
+ placeholder = "(new name, leave empty to untag)")),
+ column(2, actionButton(paste0("retag_", iHash), "Rename")))
+ })
+ ))
+ })
+
+ output$panelExtraRegex <- renderUI(list(
+ p(strong("Regex predictors")),
+ fluidRow(
+ column(8, textInput("extraRegexText", NULL,
+ placeholder = "(new regex)", width = "100%")),
+ column(4, actionButton(
+ "extraRegexSearch", strong("+"), width = "100%"))
+ ),
+ fluidRow(
+ style="overflow-y:scroll; max-height: 25vh",
+ fluidRow(
+ column(2, strong("")),
+ column(1, strong("Use")),
+ column(1, strong("Case")),
+ column(1, p(strong("N"))),
+ column(7, p(strong("Regex")))
+ ),
+
+ lapply(values$extraRegex, function(iRegex) {
+ iHash <- label2hash(iRegex)
+ tmp_cased <- values[[paste0("caseregex_", iHash)]]
+ if (is.null(tmp_cased)) tmp_cased <- FALSE
+ fluidRow(
+ column(2, actionButton(paste0("delregex_", iHash), "🗑️", width="100%")),
+ column(1, checkboxInput(
+ paste0("useregex_", iHash), NULL,
+ value = isolate(!isFALSE(values[[paste0("useregex_", iHash)]])))),
+ column(1, checkboxInput(
+ paste0("caseregex_", iHash), NULL,
+ value = isolate(isTRUE(values[[paste0("caseregex_", iHash)]])))),
+ column(1, br(), p(sum(
+ stringi::stri_detect_regex(
+ ok.nicetext(), iRegex,
+ opts_regex = list(case_insensitive = !tmp_cased)),
+ na.rm = TRUE))),
+ column(7, br(), p(iRegex))
+ )
+ })
+ )
+
+ ))
+
+ ## Process new regex
+ observeEvent(input$extraRegexSearch, {
+ if (is.null(ok.nicetext())) return(NULL)
+ req(nchar(input$extraRegexText) > 0)
+ if (input$extraRegexText %in% values$extraRegex) return(NULL)
+ try_regex <- try(stringi::stri_detect_regex(
+ ok.nicetext(), input$extraRegexText,
+ opts_regex = list(case_insensitive = TRUE)))
+ if (inherits(try_regex, "try-error")) return(NULL)
+ new_regex <- input$extraRegexText
+ new_hash <- label2hash(new_regex)
+ values$extraRegex <- c(new_regex, values$extraRegex)
+ names(values$extraRegex)[1] <- new_hash
+ if (verbose)
+ cat("DEBUG regex search", input$extraRegexText, "\n")
+ })
+
+ ## Regex predictors: populate delete and use events on first creation
+ useregex_trigger <- reactiveVal(0)
+ delregex_trigger <- reactiveVal(0)
+ caseregex_trigger <- reactiveVal(0)
+ observeEvent(list(values$extraRegex), {
+ if (!length(values$extraRegex)) return(NULL)
+ if (any(! values$extraRegex %in% values$extraRegexHist)) {
+ if (verbose)
+ cat("DEBUG regex predictor names updates\n")
+ tmpregex <- values$extraRegex[! values$extraRegex %in% values$extraRegexHist]
+ values$extraRegexHist <- c(tmpregex, values$extraRegexHist)
+
+ c(lapply(1:length(tmpregex), function(iregex) {
+ observeEvent(
+ input[[paste0("delregex_", names(tmpregex)[iregex])]], {
+ if (verbose)
+ cat("DEBUG regex delete", names(tmpregex)[iregex], "\n")
+ values$extraRegex <- values$extraRegex[values$extraRegex != tmpregex[iregex]]
+ delregex_trigger(delregex_trigger() + 1)
+ })
+ }),
+ lapply(1:length(tmpregex), function(iregex) {
+ tmpname <- paste0("useregex_", names(tmpregex)[iregex])
+ observeEvent(
+ input[[tmpname]], {
+ values[[tmpname]] <- input[[tmpname]]
+ useregex_trigger(useregex_trigger() + 1)
+ })
+ }),
+ lapply(1:length(tmpregex), function(iregex) {
+ tmpname <- paste0("caseregex_", names(tmpregex)[iregex])
+ observeEvent(
+ input[[tmpname]], {
+ values[[tmpname]] <- input[[tmpname]]
+ caseregex_trigger(caseregex_trigger() + 1)
+ })
+ })
+ )
+ }
+ })
+
+ observeEvent(list(delregex_trigger(), useregex_trigger(), caseregex_trigger()), {
+ req(ok.data(), input$selectScheme)
+ regex_all <- values$extraRegex
+ values$regex_inuse <- sapply(names(regex_all), function(iname)
+ values[[paste0("useregex_", iname)]])
+ values$regex_case <- sapply(names(regex_all), function(iname)
+ values[[paste0("caseregex_", iname)]])
+ if (verbose) if (any(values$regex_inuse))
+ cat("DEBUG regex in use", paste0(collapse = " ", regex_all[values$regex_inuse]), "\n")
+ writeLines(
+ RJSONIO::toJSON(lapply(FUN = unname, list(
+ regex = regex_all, use = values$regex_inuse, case = values$regex_case))),
+ paste0(values$projectdir, input$selectScheme, "_regex.json"))
+ })
+
+ output$dataView <- DT::renderDataTable({
+ # if (is.null(ok.text()) | ok.data.running()) return(NULL)
+ req(ok.text())
+ if (verbose)
+ cat("DEBUG enter dataView\n")
+ if (verbose)
+ cat("DEBUG render dataView\n")
+ DT::datatable(
+ rownames = F,
+ data.frame(
+ row = 1:nrow(ok.data()),
+ id = ok.data()[, values$conf$idcol],
+ text = ok.text()),
+ options = list(
+ pageLength = 3,
+ columnDefs = list(list(
+ targets = 2,
+ render = DT::JS(
+ "function(data, type, row, meta) {",
+ "return type === 'display' && data.length > 200 ?",
+ "'' + data.substr(0, 200) + '...' : data;",
+ "}")
+ ))
+ ),
+ callback = DT::JS('table.page(0).draw(false);')
+ )
+ })
+
+ ## Triggers for mass rename, created with each new label, with confirm
+ observeEvent(values$uniqueLabelsAll, {
+ if (is.null(values$uniqueLabelsAll)) return(NULL)
+ notthere <- unique(na.omit(
+ values$uniqueLabelsAll[! values$uniqueLabelsAll %in% values$retagLabels]))
+ lapply(
+ notthere,
+ function(iTag) {
+ iHash <- label2hash(iTag)
+
+ observeEvent(input[[paste0("retag_", iHash)]], {
+ tmp_oldtag <- iTag
+ tmp_retagged <- which(values$label == tmp_oldtag)
+ tmp_newtag <- gsub(
+ "^\\s+|\\s+$", "", input[[paste0("newtag_", iHash)]])
+ showModal(modalDialog(
+ title = "Mass retag",
+ paste0("Confirm retag: ", length(tmp_retagged), " ",
+ tmp_oldtag, " tags will be ",
+ ifelse(nchar(tmp_newtag),
+ paste("renamed to", tmp_newtag) , "deleted")),
+ footer = tagList(actionButton(paste0("retag_confirm_", iTag), "Retag"),
+ modalButton("Cancel"))))
+ })
+
+ observeEvent(input[[paste0("retag_confirm_", iTag)]], {
+ tmp_oldtag <- iTag
+ tmp_retagged <- which(values$label == tmp_oldtag)
+ tmp_newtag <- gsub(
+ "^\\s+|\\s+$", "", input[[paste0("newtag_", iHash)]])
+ if (!nchar(tmp_newtag)) tmp_newtag <- NA
+ values$tagTrigger <- list(
+ id = ok.data()[[values$conf$idcol]][tmp_retagged],
+ tag = rep(tmp_newtag, length(tmp_retagged)))
+ values$queries <- c(values$queries, tmp_retagged)
+
+ removeModal()
+ })
+
+ })
+ values$retagLabels <- c(values$retagLabels, notthere)
+ })
+
+
+
+ ##############################################################################
+ ## UI rendering of Panel Tagging / Visualization
+ ##############################################################################
+
+ ####################
+ ## Tagging left panel
+
+
+ # ## Trigger from Oops
+ # observeEvent(input$oops, {
+ # if (is.null(ok.data()))
+ # return(NULL)
+ # if (verbose)
+ # cat("DEBUG enter Oops\n")
+ # if (!is.null(values$retagged)) if (values$queries[length(values$queries)] == values$retagged[length(values$retagged)])
+ # values$retagged <- values$retagged[-length(values$retagged)]
+ # values$label[values$queries[length(values$queries)]] <- NA
+ # if (ok.commentCol() != "(None)")
+ # values$comment[values$queries[length(values$queries)]] <- ok.data()[values$queries[length(values$queries)], ok.commentCol()]
+ # values$queries <- values$queries[-length(values$queries)]
+ # ## tagTrigger ?
+ #
+ # ready2tag(0)
+ # queryNext(queryNext() + 1)
+ # })
+
+
+ ## Tagging triggers for Oracle buttons, created with each new label
+ observeEvent(values$uniqueLabelsAll, {
+ if (is.null(values$label)) return(NULL)
+ if (is.null(values$uniqueLabelsAll)) return(NULL)
+ if (all(is.na(values$label))) return(NULL)
+ notthere <- unique(na.omit(
+ values$uniqueLabelsAll[! values$uniqueLabelsAll %in% values$oracleLabels]
+ ))
+ if (!length(notthere)) return(NULL)
+ if (verbose)
+ cat("DEBUG triggers for oracle buttons\n")
+ lapply(
+ notthere,
+ function(iLab) {
+ observeEvent(
+ input[[paste0("oracle_", label2hash(iLab))]], {
+ if (is.null(ok.text())) return(NULL)
+ if (is.null(values$newQuery)) return(NULL)
+ if (!ready2tag()) {cat("DEBUG not ready to tag in Button\n"); return(NULL)}
+ # ready2tag(0)
+ # if (input$allowRetag | sum(is.na(values$label)) == 0)
+ if (!is.na(values$label[values$lastQuery]))
+ values$retagged <- c(values$retagged, values$lastQuery)
+ # values$label[values$lastQuery] <- iLab
+ values$comment[values$lastQuery] <- input$currentComment
+ # values$queries <- c(values$queries, values$lastQuery)
+ values$tagTrigger <- list(
+ id = ok.data()[values$lastQuery, values$conf$idcol],
+ tag = iLab,
+ comment = input$currentComment)
+ })
+ })
+ values$oracleLabels <- c(values$oracleLabels, notthere)
+ })
+
+ ## Trigger from Oracle Create value button
+ observeEvent(input$currentAction, {
+ if (is.null(ok.data())) return(NULL)
+ if (is.null(values$newQuery)) return(NULL)
+ if (verbose)
+ cat("DEBUG enter create value\n")
+ # if (input$allowRetag | sum(is.na(values$label)) == 0)
+ if (!is.na(values$label[values$lastQuery]))
+ values$retagged <- c(values$retagged, values$lastQuery)
+ if (input$newLab == "") {
+ return(NULL)
+ } else {
+ # values$label[values$lastQuery] <- input$newLab
+ updateTextInput(session, "newLab", value= "")
+ }
+ values$comment[values$lastQuery] <- input$currentComment
+ # values$queries <- c(values$queries, values$lastQuery)
+ values$tagTrigger <- list(
+ id = ok.data()[values$lastQuery, values$conf$idcol],
+ tag = input$newLab,
+ comment = input$currentComment)
+ })
+
+ ## Trigger from Oracle Confirm button
+ observeEvent(input$oracleConfirm, {
+ req(ok.data(), ok.train(), values$newQuery)
+ # if (is.null(ok.data())) return(NULL)
+ # if (is.null(values$newQuery)) return(NULL)
+ if (!ready2tag()) {cat("DEBUG not ready to tag in OC\n"); return(NULL)}
+ # ready2tag(0)
+ if (!ok.train.possible()) return(NULL)
+ if (verbose)
+ cat("DEBUG oracle confirm\n")
+ # if (input$allowRetag | sum(is.na(values$label)) == 0)
+ if (!is.na(values$label[values$lastQuery]))
+ values$retagged <- c(values$retagged, values$lastQuery)
+ # values$label[values$lastQuery] <- ok.predlab1()[values$lastQuery]
+ # values$queries <- c(values$queries, values$lastQuery)
+ values$comment[values$lastQuery] <- input$currentComment
+ values$tagTrigger <- list(
+ id = ok.data()[values$lastQuery, values$conf$idcol],
+ tag = ok.predlab1()[values$lastQuery],
+ comment = input$currentComment)
+ })
+
+ ## Trigger for CV glm (async)
+ glmCV_nclicks <- reactiveVal(0)
+ observeEvent(input$glmCV, {
+ if (is.null(ok.predictor())) return(NULL)
+ if (glmCV_nclicks() != 0) {
+ showNotification("LASSO CV already running")
+ return(NULL)
+ }
+ glmCV_nclicks(1)
+ glmCV_future <- reactiveVal()
+
+ tmpvalues <- values$label
+ tmppred <- ok.predictor()
+ for (ival in na.omit(unique(tmpvalues)))
+ if (sum(tmpvalues == ival, na.rm = T) <= 8)
+ tmpvalues[which(tmpvalues == ival)] <- NA
+ glmCV_async <- future(seed = TRUE, {
+ glmnet::cv.glmnet(
+ x= tmppred[!is.na(tmpvalues), ],
+ y= as.factor(tmpvalues[!is.na(tmpvalues)]),
+ family= "multinomial")
+ }) %...>% glmCV_future()
+
+ glmCV_async <- catch(glmCV_async, function(e) {
+ showNotification(type = "error", paste0("LASSO CV error: ", e))
+ })
+ glmCV_async <- finally(glmCV_async, function() {
+ glmCV_nclicks(0)
+ if (is.null(glmCV_future())) return(NULL)
+ updateNumericInput(session, "glmLambda",
+ value = glmCV_future()$lambda.min)
+ })
+ NULL
+ })
+
+ ## Trigger for CV liblinear (async)
+ liblinCV_nclicks <- reactiveVal(0)
+ observeEvent(input$liblinCV, {
+ if (is.null(ok.predictor())) return(NULL)
+ if (liblinCV_nclicks() != 0) {
+ showNotification("LibLineaR CV already running")
+ return(NULL)
+ }
+ liblinCV_nclicks(1)
+ liblinCV_future <- reactiveVal()
+
+ tmpvalues <- values$label
+ tmppred <- ok.predictor()
+ for (ival in na.omit(unique(tmpvalues)))
+ if (sum(tmpvalues == ival, na.rm = T) <= 3)
+ tmpvalues[which(tmpvalues == ival)] <- NA
+
+ liblinCV_async <- future(seed = TRUE, {
+ LiblineaR::LiblineaR(
+ data= tmppred[!is.na(tmpvalues), ],
+ target= as.factor(tmpvalues[!is.na(tmpvalues)]),
+ type= 0, findC= T)
+ }) %...>% liblinCV_future()
+ liblinCV_async <- catch(liblinCV_async, function(e) {
+ showNotification(type = "error", paste0("LibLineaR CV error: ", e))
+ })
+ liblinCV_async <- finally(liblinCV_async, function() {
+ liblinCV_nclicks(0)
+ if (is.null(liblinCV_future())) return(NULL)
+ updateNumericInput(session, "liblinCost", value= liblinCV_future())
+ })
+ NULL
+ })
+
+
+ output$currentContext <- renderUI({
+ if (verbose)
+ cat("DEBUG 1rentContext\n")
+ if ((! is.null(ok.text())) & (length(ok.contextCol()) > 0))
+ HTML(paste(paste(ok.contextCol(),
+ as.character(ok.data()[values$newQuery, ok.contextCol()]),
+ sep = " : "),
+ collapse= "
"))
+ })
+
+ output$makeOracleConfirm <- renderUI({
+ if (verbose)
+ cat("DEBUG update printPredicted\n")
+ if (is.null(ok.pred())) {
+ tmp <- "No prediction"
+ } else if (is.null(values$newQuery)) {
+ tmp <- "No query"
+ } else {
+ tmp <- paste0(
+ ifelse(is.na(values$label[values$newQuery]), "", "Pred: "),
+ ok.predlab1()[values$newQuery],
+ " (", round(max(ok.pred()[values$newQuery, ]), 3), ")")
+ }
+ res <- list(actionButton("oracleConfirm", label= paste("🤖", tmp), width = "100%"))
+ if (!is.null(values$label) & !is.null(values$newQuery)) {
+ if (!is.na(values$label[values$newQuery]))
+ res <- list(res[[1]], HTML(paste0(
+ " Current: ", values$label[values$newQuery],
+ "")))
+ }
+ res
+ })
+
+
+ ## Show training diagnostics on new model train
+ output$trainDiagno <-
+ renderText(paste0(ok.msgtagged(), "\n", values$modelDiagno))
+
+ ## Diagnostics event
+ observeEvent(diagTrigger(), {
+ if (verbose)
+ cat("DEBUG update modelDiagno\n")
+ if (is.null(ok.data())) {
+ values$modelDiagno <- "No model"
+ return(NULL)
+ }
+ if (!ok.train.possible()) {
+ values$modelDiagno <- "No model (not enough labelled data)."
+ return(NULL)
+ }
+
+ if (input$predModel == "random forest") {
+ tmp <- paste0(
+ "Train Accuracy: ",
+ round(100 * mean(ok.predlab1()[!is.na(values$label)] ==
+ values$label[!is.na(values$label)]), 1),
+ "% ; Wtd. F1: ",
+ sprintf("%f", round(wtdF1(values$label[!is.na(values$label)],
+ ok.predlab1()[!is.na(values$label)]), 3)),
+ "%\nOOB Accuracy : ",
+ round(100 * mean(
+ colnames(ok.train()$predictions)[apply(ok.train()$predictions, 1, which.max)] ==
+ values$label[!is.na(values$label)]), 1),
+ "% ; Wtd. F1: ",
+ sprintf("%.3f", wtdF1(
+ values$label[!is.na(values$label)],
+ colnames(ok.train()$predictions)[apply(ok.train()$predictions, 1, which.max)]))
+ )
+ } else if (input$predModel %in% c("lasso", "linear", "naive bayes", "knn")) {
+ tmp <- paste0(
+ "Train Accuracy: ",
+ round(100 * mean(ok.predlab1()[!is.na(values$label)] ==
+ values$label[!is.na(values$label)]), 1),
+ "% ; Wtd. F1: ",
+ sprintf("%.3f", wtdF1(
+ values$label[!is.na(values$label)],
+ ok.predlab1()[!is.na(values$label)])))
+ }
+ values$modelDiagno <- tmp
+ })
+
+ ## Update comment input field
+ observeEvent(values$newQuery, {
+ if (is.null(ok.text())) return(NULL)
+ if (verbose)
+ cat("DEBUG update comment field\n")
+ updateTextInput(session, "currentComment",
+ value = values$comment[values$newQuery])
+ })
+
+
+ ## Save event: tag + comment in dedicated csv
+ observeEvent(values$tagTrigger, {
+ if (is.null(values$tagTrigger)) return(NULL)
+ if (verbose)
+ cat("DEBUG enter event tagTrigger", length(values$tagTrigger$id), "\n")
+ ready2tag(0) # Prevent new tagging until next query
+ dapath <- paste0(values$projectdir, ok.labelcol(), ".csv")
+ darows <- match(values$tagTrigger$id, ok.data()[, values$conf$idcol])
+
+ export <- data.frame(values$tagTrigger$id, values$tagTrigger$tag)
+ colnames(export) <- c(values$conf$idcol, ok.labelcol())
+ if (verbose)
+ cat("DEBUG tagTrigger tag",
+ paste(export[, 1], export[, 2], collapse = " ; "), "\n")
+ write.table(export, dapath, sep = ",", qmethod = "double",
+ fileEncoding = "UTF-8", row.names = F,
+ append = ifelse(file.exists(dapath), T, F),
+ col.names = ifelse(file.exists(dapath), F, T))
+
+ if (any(nchar(values$tagTrigger$comment))) {
+ tmp_filter <- which(nchar(values$tagTrigger$comment) > 0)
+ dapath <- paste0(values$projectdir, values$conf$commcol, ".csv")
+ export <- data.frame(values$tagTrigger$id[tmp_filter],
+ values$tagTrigger$comment[tmp_filter])
+ colnames(export) <- c(values$conf$idcol, values$conf$commcol)
+ if (verbose)
+ cat("DEBUG tagTrigger comment", export[1,1], export[1, 2])
+ write.table(export, dapath, sep = ",", qmethod = "double",
+ fileEncoding = "UTF-8", row.names = F,
+ append = ifelse(file.exists(dapath), T, F),
+ col.names = ifelse(file.exists(dapath), F, T))
+ }
+
+ ## Update label hist
+ values$labelhist[match(values$tagTrigger$id, ok.data()[, values$conf$idcol])] <-
+ max(values$labelhist, na.rm = T) + 1
+ ## Update wasTagged and wasRetagged
+ itag <- match(values$tagTrigger$id, ok.data()[, values$conf$idcol])
+ if (input$sampleChoice == "untagged") {
+ values$wasTagged[itag] <- TRUE
+ } else if (input$sampleChoice == "tagged") {
+ values$wasRetagged[itag] <- TRUE
+ } else if (input$sampleChoice == "all") {
+ if (is.na(values$label[itag])) {
+ values$wasTagged[itag] <- TRUE
+ } else
+ values$wasRetagged[itag] <- TRUE
+ }
+
+ ## Call queryNext event, or launch training
+ if (trainCountdown() == 1) {
+ trainCountdown(input$trainCountdown)
+ trainTrigger(trainTrigger() + 1)
+ } else if (trainCountdown() > 1) {
+ trainCountdown(trainCountdown() - 1)
+ queryNext(queryNext() + 1)
+ } else {
+ queryNext(queryNext() + 1)
+ }
+
+ ## Update values$label etc
+ values$label[darows] <- values$tagTrigger$tag
+ values$queries <- c(values$queries, darows)
+
+ if (verbose)
+ cat("DEBUG exit event tagTrigger", nrow(export), "rows\n")
+ })
+
+
+ ####################
+ ## Tagging main panel
+
+ output$textVisuCols <- renderUI({
+ if (verbose)
+ cat("DEBUG render text/visu panel\n")
+ leftsize <- 6
+ if (!input$panelText) leftsize <- 1
+ if (input$panelText & !input$panelVisu) leftsize <- 11
+
+ fluidRow(
+ column(
+ leftsize,
+ conditionalPanel(
+ "input.panelText",
+ wellPanel(htmlOutput("currentText"))
+ )
+ ),
+ column(
+ 12 - leftsize,
+ conditionalPanel(
+ "input.panelVisu",
+ fluidRow(
+ column(2, HTML(paste0('',
+ actionButton("visuCompute", "🎲"),
+ '
'))),
+ column(2, HTML(paste0('',
+ actionButton("visuGoZoom", "🔍±"),
+ '
'))),
+ column(2, HTML(paste0('',
+ checkboxInput("visuLock", "🔒", value = FALSE),
+ '
'))),
+ column(2, HTML(paste0('',
+ checkboxInput("visuOptions", "🔧"),
+ '
'))),
+ column(3, HTML(paste0('',
+ numericInput("visuHeight", NULL, 400, 20, 2e12, 10),
+ '
')))
+ ),
+ conditionalPanel(
+ 'input.visuOptions',
+ flowLayout(
+ selectInput(
+ "visuAlgo", "Visu. method", c("umap", "t-sne"), "umap", width = "100%"),
+ selectizeInput(
+ "visuSource", "Visu. source",
+ c("Word counts (DFM)" = "dfm",
+ "Word embeddings (FastText)" = "ft",
+ "Sentence embeddings (SBERT)" = "sb",
+ "BERT predictions" = "bertpred",
+ "Regex" = "regex"),
+ c("sb", "ft"), multiple = TRUE)
+ )
+ ),
+ conditionalPanel( # umap options
+ 'input.visuAlgo == "umap" & input.visuOptions',
+ flowLayout(
+ numericInput(
+ "umapNeighb", "Nb. Neighbors", 15, 1, 500, 1),
+ numericInput(
+ "umapSpread", "Spread", 1, 0, 16, 1e-2),
+ numericInput(
+ "umapMindist", "Min dist.", .001, 0, 16, 1e-3),
+ selectInput(
+ "umapMetric", "Metric", selected = "cosine",
+ c("euclidean", "cosine", "manhattan", "hamming",
+ "correlation", "categorical")
+ )
+ )
+ ),
+ conditionalPanel( # t-SNE options
+ 'input.visuAlgo == "t-sne" & input.visuOptions',
+ flowLayout(
+ numericInput(
+ "tsnePerplex", "Perplexity", 15, 1, 500, 1),
+ numericInput(
+ "tsneTheta", "Theta", .5, 0, 1, 1e-2),
+ numericInput(
+ "tsnePcaDim", "PCA dims", 50, 1, 100, 1)
+ )
+ ),
+ uiOutput("visuMsg"),
+ plotOutput(
+ "visuPlot", width = "100%",
+ dblclick = "visuDblclick",
+ brush = brushOpts("visuBrush", resetOnNew = T))
+ )
+ )
+ )
+ })
+
+ ##################
+ ## Subpanel "Text"
+ ##################
+
+ output$currentText <- renderUI({
+ if (verbose)
+ cat("DEBUG render currentText\n")
+ if (is.null(ok.data()) | is.null(ok.nicetext()))
+ return(HTML("Import data to start training."))
+
+ if (input$strategy %in% c("entropy", "maxprob") & is.null(ok.train()))
+ return(HTML("No model: train one, or try 'random' or 'sequential' strategy"))
+
+ if (is.null(values$newQuery))
+ return(HTML("No query"))
+
+ bastext <- ok.nicetext()[values$newQuery]
+ if (nchar(input$regexFilter) > 0)
+ bastext <- gsub(paste0("(", isolate(input$regexFilter), ")"),
+ "\\1", bastext, ignore.case = isolate(!input$regexCaseSens))
+ baslen <- nchar(bastext)
+ if (!length(baslen)) {
+ if (nchar(input$regexFilter) > 0)
+ return(HTML("Regex filter: no match"))
+ return(HTML("No text matches the filters"))
+ }
+ if (is.na(baslen))
+ return(HTML("No query"))
+ if (baslen > 1200) {
+ bastext <- paste0(substring(bastext, 1, 1200), '',
+ substring(bastext, 1201), "")
+ }
+ bastext <- gsub("\n", "
", bastext)
+ HTML(bastext)
+ })
+
+
+
+ ##############################################################################
+ ## Panel "Visualize"
+ ##############################################################################
+
+ output$visuMsg <- renderUI({
+ if (!is.null(values$visuMsg))
+ # strong(values$visuMsg)
+ h5(values$visuMsg, style = "text-align:center")
+ })
+
+ ## Compute visualization points (async)
+ visu_nclicks <- reactiveVal(0)
+ ok.visu <- reactiveVal()
+ observe({
+ if (!input$panelVisu)
+ values$visuGo <- FALSE
+ if (input$panelVisu & !values$visuGo)
+ values$visuGo <- TRUE
+ })
+ observeEvent(list(values$visuGo, input$visuCompute), {
+ if (is.null(input$visuSource)) return(NULL)
+ if (!values$visuGo) return(NULL)
+ if (is.null(ok.data())) {
+ values$visuMsg <- "No data imported."
+ return(NULL)
+ }
+
+ if (visu_nclicks() > 0) {
+ values$visuMsg <- "Already computing visualization"
+ return(NULL)
+ }
+ visu_nclicks(1)
+
+ values$visuMsg <- "Computing visualization..."
+
+ ## Prepare options and objects for async computation
+ dasources <- input$visuSource
+ daalgo <- input$visuAlgo
+
+ if (verbose)
+ cat("DEBUG compute visu embedding: ", daalgo, paste(dasources, collapse = " "), "\n")
+
+ visudat <- NULL
+ if ("dfm" %in% dasources) dadfm <- ok.dfm()
+ if ("ft" %in% dasources) {
+ visudat <- ok.data()[, values$cols_ft]
+ }
+ if ("sb" %in% dasources) {
+ if (is.null(visudat)) {
+ visudat <- ok.data()[, values$cols_sb]
+ } else
+ visudat <- cbind(visudat, ok.data()[, values$cols_sb])
+ }
+ if ("bertpred" %in% dasources) {
+ if (is.null(visudat)) {
+ visudat <- ok.data()[, do.call(c, values$cols_bertpred)]
+ } else
+ visudat <- cbind(visudat, ok.data()[, do.call(c, values$cols_bertpred)])
+ }
+ if ("regex" %in% dasources) {
+ if (is.null(visudat)) {
+ visudat <- ok.extraRegex.matrix()
+ } else
+ visudat <- cbind(visudat, ok.extraRegex.matrix())
+ }
+ if (daalgo == "umap") {
+ visu_args <- list(
+ n_neighbors = input$umapNeighb, metric = input$umapMetric,
+ spread = input$umapSpread, min_dist = input$umapMindist)
+ } else if (daalgo == "t-sne") {
+ visu_args <- list(
+ perplexity = input$tsnePerplex, theta = input$tsneTheta,
+ initial_dims = input$tsnePcaDim, check_duplicates = FALSE)
+ }
+
+ visu_async <- future(seed = TRUE, {
+ if ("dfm" %in% dasources) {
+ ## TODO: CA treatment buggy, switch to SVD?
+ # da_ca <- quanteda.textmodels::textmodel_ca(
+ # quanteda::as.dfm(dadfm), nd = min(100, nrow(dadfm)))
+ # da_ca <- (da_ca$rowcoord %*% diag(da_ca$sv))[, !is.na(da_ca$sv)]
+ da_ca <- as.matrix(dadfm)
+ if (is.null(visudat)) {
+ visudat <- da_ca
+ } else
+ visudat <- cbind(da_ca, visudat)
+ }
+
+ if (daalgo == "umap") {
+ do.call(uwot::umap, c(list(visudat), visu_args))
+ } else if (daalgo == "t-sne") {
+ visudat_dupli <- duplicated(visudat)
+ if (any(visudat_dupli))
+ visudat[visudat_dupli, ] <- jitter(visudat[visudat_dupli, ])
+ do.call(Rtsne::Rtsne, c(list(visudat), visu_args))$Y
+ }
+ }) %...>% ok.visu()
+
+ visu_async <- catch(visu_async, function(error) {
+ values$visuMsg <- paste0("Error: ", error)
+ })
+ visu_async <- finally(visu_async, function() {
+ visu_nclicks(0)
+ if (input$visuGoZoom == 0) {
+ values$visuMsg <- "Double-click to select, drag to select zooming zone."
+ } else
+ values$visuMsg <- NULL
+
+ nice_choices <- c("FastText" = "ft", "SBERT" = "sb", "visu" = daalgo)
+ names(nice_choices)[3] <- ifelse(daalgo == "t-sne", "t-SNE", "UMAP")
+ updateSelectInput(
+ session, "dlEmbedSelect", choices = nice_choices, selected = daalgo)
+
+ if (verbose)
+ cat("DEBUG Visu async out\n")
+ })
+
+ NULL
+ })
+
+ ## Compute visualization plot (async)
+ ok.ggvisu <- reactiveVal()
+ observe({
+ # if (is.null(ok.visu())) return(NULL)
+ if (is.null(ok.visu())) {
+ ok.ggvisu(NULL)
+ return(NULL)
+ }
+ # if (!isTRUE(values$visugo)) return(NULL)
+ if (!input$panelVisu) return(NULL)
+
+ if (verbose)
+ cat("DEBUG compute visu plot\n")
+ ggdat <- data.frame(ok.visu(), tag = values$label)
+ if (!is.null(ok.predlab1()))
+ ggdat$tag[is.na(values$label)] <- ok.predlab1()[is.na(values$label)]
+ da_predlabs <- ok.predlab1()
+ da_tagged <- ! is.na(values$label)
+ da_query <- values$newQuery
+
+ ggvisu_async <- future({
+ ggres <- ggplot(ggdat, aes(X1, X2)) + theme_bw() +
+ xlab("") + ylab("") +
+ theme(legend.position = "bottom") +
+ scale_shape_manual(values = rep(1:8, 40)) # TODO : adapt to tags
+
+ ## Tagged points
+ if (any(da_tagged)) {
+ ggres <- ggres +
+ geom_point(data = ggdat[da_tagged, ],
+ aes(X1, X2, color = tag, shape = tag),
+ size = 1.5, stroke = 1.5, alpha = .8)
+ }
+
+ ## Not tagged points
+ if (!is.null(da_predlabs)) {
+ ggres <- ggres +
+ geom_point(data = ggdat[!da_tagged, ],
+ aes(color = tag, shape = tag), show.legend = FALSE)
+ } else {
+ ggres <- ggres +
+ geom_point(alpha = .8, data = ggdat[is.na(ggdat$tag), ])
+ }
+
+ ## Current query
+ if (!is.null(da_query)) {
+ if (is.null(da_predlabs)) {
+ ggres <- ggres +
+ geom_point(data = ggdat[da_query, ], color = 2,
+ stroke = 3, shape = 3, show.legend = FALSE)
+ } else {
+ ggres <- ggres +
+ geom_point(data = ggdat[da_query, ],
+ stroke = 3, shape = 3, show.legend = FALSE)
+ }
+ }
+ ggres
+ }) %...>% ok.ggvisu()
+
+ ggvisu_async <- catch(ggvisu_async, function(error) {
+ values$visuMsg <- paste0("Error in ggplot: ", error)
+ })
+
+ })
+
+ observe({
+ if (verbose)
+ cat("DEBUG update visuPlot\n")
+ if (is.null(ok.ggvisu())) {
+ values$visuMsg <- "No visualization yet."
+ output$visuPlot <- NULL
+ values$visuZoom$xlim <- NULL
+ values$visuZoom$ylim <- NULL
+ return(NULL)
+ }
+ output$visuPlot <- renderPlot(height = input$visuHeight, {
+ # if (is.null(ok.ggvisu())) return(NULL)
+ ok.ggvisu() +
+ coord_fixed(xlim = values$visuZoom$xlim, ylim = values$visuZoom$ylim)
+ })
+ })
+
+ observeEvent(input$visuDblclick, {
+ if (!is.null(values$visuMsg))
+ values$visuMsg <- NULL
+ tmp <- input$visuDblclick
+ bmu <- t(ok.visu()) - c(tmp$x, tmp$y)
+ bmu <- which.min(colSums(bmu^2))
+ values$lastQuery <- bmu
+ values$newQuery <- bmu
+ ready2tag(1)
+ })
+
+ observeEvent(input$visuGoZoom, {
+ if (!is.null(values$visuMsg))
+ values$visuMsg <- NULL
+ values$visuZoom$xlim <- c(input$visuBrush$xmin, input$visuBrush$xmax)
+ values$visuZoom$ylim <- c(input$visuBrush$ymin, input$visuBrush$ymax)
+ })
+
+
+ ##############################################################################
+ ## Panel History
+ ##############################################################################
+
+ ## Create history link and retag events as they appear
+ observeEvent(input$histDTable_rows_current, {
+ histrows <- order(values$labelhist)
+ histrows <- rev(histrows[!is.na(values$labelhist[histrows])])
+ histrows <- histrows[!is.na(values$label[histrows])]
+ histrows <- histrows[input$histDTable_rows_current]
+ notthere <- histrows[! histrows %in% values$histQueries]
+
+ if (!length(notthere)) return(NULL)
+ lapply(notthere, function(daquery) {
+ if (verbose) cat("DEBUG create history events", daquery, "\n")
+ observeEvent(input[[paste0("hist", daquery)]], {
+ if (verbose) cat("DEBUG history link", daquery, "\n")
+ values$lastQuery <- daquery
+ values$newQuery <- daquery
+ updateTabsetPanel(session, "mainPanelset", selected = "Tagging")
+ ready2tag(1)
+ })
+
+ observeEvent(input[[paste0("histTag", daquery)]], {
+ if (verbose) cat("DEBUG history retag", daquery, "\n")
+ values$histSaveStack <- rbind(
+ values$histSaveStack,
+ data.frame(id = ok.data()[daquery, values$conf$idcol],
+ tag = input[[paste0("histTag", daquery)]],
+ comment = values$comment[daquery])
+ )
+ })
+ })
+ values$histQueries <- c(values$histQueries, notthere)
+ })
+
+ ## History save action
+ observeEvent(input$histSave, {
+ req(is.data.frame(values$histSaveStack))
+ values$tagTrigger <- as.list(values$histSaveStack)
+ values$label[match(values$histSaveStack$id, ok.data()[[values$conf$idcol]])] <-
+ values$histSaveStack$tag
+ values$histSaveStack <- NULL
+ })
+
+ ## Update save name with number of unsaved changes
+ observeEvent(length(values$histSaveStack$id), {
+ updateActionButton(
+ session, "histSave",
+ label = paste0("Save changes (", length(unique(values$histSaveStack$id)),")"))
+ })
+
+ ## History table, with clickable links and retagging
+ output$histDTable <- DT::renderDataTable({
+ histrows <- order(values$labelhist)
+ histrows <- rev(histrows[!is.na(values$labelhist[histrows])])
+ histrows <- histrows[!is.na(values$label[histrows])]
+
+ # histrows <- rev(values$queries)
+ hist_df <- data.frame(
+ # history = length(histrows):1,
+ history = values$labelhist[histrows],
+ id = sapply(histrows, function(irow) {
+ as.character(actionLink(
+ paste0("hist", irow),
+ label = ok.data()[irow, values$conf$idcol],
+ onclick = paste0('Shiny.onInputChange(\"hist', irow, '\", this.id)')))
+ }),
+ Tag = values$label[histrows],
+ Retag = sapply(histrows, function(irow) {
+ paste0(
+ ""
+ )
+ }),
+ Comment = values$comment[histrows],
+ Text = ok.nicetext()[histrows])
+ if (!is.null(values$conf$contextcols))
+ hist_df <- cbind(hist_df,
+ ok.data()[histrows, values$conf$contextcols])
+
+ DT::datatable(hist_df, escape = FALSE, rownames = FALSE, filter = "top",
+ options = list(
+ columnDefs = list(list(
+ targets = 5,
+ render = DT::JS(
+ "function(data, type, row, meta) {",
+ "return type === 'display' && data.length > 200 ?",
+ "'' + data.substr(0, 200) + '...' : data;",
+ "}")))
+ ))
+ })
+
+ ##############################################################################
+ ## Panel "Stats"
+ ##############################################################################
+
+ output$statsTagTable <- renderTable(rownames = F, {
+ table(tag = values$label)
+ })
+
+
+ ## Model diagnostics by CV
+ output$statsCVoutput <- reactive({
+ if (is.null(values$modelcv))
+ return("Model diagnostics will appear here")
+ values$modelcv
+ })
+
+ output$statsCVtable <- DT::renderDataTable({
+ if (is.null(values$cvtable))
+ return(NULL)
+ DT::datatable(values$cvtable, list(pageLength = 25), rownames = F)
+ })
+
+ ## Button action: compute CV (async)
+ cv_nclicks <- reactiveVal(0)
+ observeEvent(input$statsCVgo, {
+ if (is.null(ok.train())) {
+ values$modelcv <- "Error in model training"
+ return(NULL)
+ }
+ if (cv_nclicks() != 0) {
+ values$modelcv <- "CV already running"
+ return(NULL)
+ }
+ cv_nclicks(cv_nclicks() + 1)
+
+ predmat <- ok.predictor()[!is.na(values$label), ]
+ predlab <- values$label[!is.na(values$label)]
+ cvsamp <- sample(10, nrow(predmat), replace = TRUE)
+ cvres <- rep(NA, nrow(predmat))
+ cvres_future <- reactiveVal()
+
+ tmp_model <- input$predModel
+ if (tmp_model == "random forest") {
+ cv_args <- list(
+ num.trees = input$rfNumTrees, mtry = if (input$rfMtry > 0) input$rfMtry,
+ sample.fraction = input$rfSampleFrac, probability = FALSE)
+ } else if (tmp_model == "linear") {
+ cv_args <- list(type= 0, cost = input$liblinCost)
+ } else if (tmp_model == "lasso") {
+ cv_args <- input$glmLambda
+ } else if (tmp_model == "naive bayes") {
+ cv_args <- list(smooth = input$naiveSmooth, prior = input$naivePrior,
+ distribution = input$naiveDistri)
+ } else if (tmp_model == "knn") {
+ cv_args <- list(k = input$knnK)
+ }
+
+ cv_status_file <- paste0(session$token, "_cv")
+
+ cv_async <- future(seed = TRUE, {
+ for (icv in 1:10) {
+ set_status(cv_status_file, paste0("Computing CV fold ", icv, " / 10"))
+ if (tmp_model == "random forest") {
+ cvmodel <- do.call(ranger::ranger, c(cv_args, list(
+ x= predmat[cvsamp != icv, ],
+ y= as.factor(predlab[cvsamp != icv]))))
+ cvres[cvsamp == icv] <- as.character(
+ predict(cvmodel, data= predmat[cvsamp == icv, ])$predictions)
+ } else {
+ tmpvalues <- predlab[cvsamp != icv]
+ # for (ival in na.omit(unique(tmpvalues)))
+ # if (sum(tmpvalues == ival, na.rm = T) == 1)
+ # tmpvalues[which(tmpvalues == ival)] <- NA
+
+ if (tmp_model == "lasso") {
+ cvmodel <- suppressWarnings(
+ glmnet::glmnet(x= predmat[cvsamp != icv, ][!is.na(tmpvalues), ],
+ y= as.factor(tmpvalues[!is.na(tmpvalues)]),
+ family= "multinomial"))
+ cvres[cvsamp == icv] <- predict(
+ cvmodel, newx = predmat[cvsamp == icv, ],
+ type = "class", s= cv_args)
+ } else if (tmp_model == "linear") {
+ cvmodel <- do.call(LiblineaR::LiblineaR, c(
+ list(data= predmat[cvsamp != icv, ][!is.na(tmpvalues), ],
+ target= as.factor(tmpvalues[!is.na(tmpvalues)])),
+ cv_args))
+ cvres[cvsamp == icv] <- as.character(
+ predict(cvmodel, newx= predmat[cvsamp == icv, ])$predictions)
+ } else if (tmp_model == "naive bayes") {
+ cvmodel <- do.call(quanteda.textmodels::textmodel_nb, c(
+ list(x= predmat[cvsamp != icv, ][!is.na(tmpvalues), ],
+ y= as.factor(tmpvalues[!is.na(tmpvalues)])),
+ cv_args))
+ cvres[cvsamp == icv] <- as.character(
+ predict(
+ cvmodel, newdata= predmat[cvsamp == icv, ], type= "class"))
+ } else if (isolate(tmp_model) == "knn") {
+ cvres[cvsamp == icv] <-
+ as.character(do.call(class::knn, c(cv_args, list(
+ train = predmat[cvsamp != icv, ],
+ test = predmat[cvsamp == icv, ],
+ cl = predlab[cvsamp != icv]))))
+ }
+ }
+ }
+ cvres
+ }) %...>% cvres_future()
+
+ cv_async <- catch(cv_async, function(e) {
+ cvres_future(NULL)
+ values$modelcv <- e$message
+ })
+
+ cv_async <- finally(cv_async, function() {
+ cv_nclicks(0)
+ if (file.exists(cv_status_file)) unlink(cv_status_file)
+ if (is.null(cvres_future())) return(NULL)
+ cvres <- cvres_future()
+ predlab <- values$label[!is.na(values$label)]
+ dametrics <- metricsTable(predlab, cvres)
+ values$cvtable <- dametrics
+ res <- "10-CV OOB evaluation:\n"
+ # if (any(grepl("^bertpred", input$use_regressors)))
+ if (any(grepl("^bertpred", values$new_use_regressors)))
+ res <- paste0(
+ res,
+ "⚠️ Scores are biased upwards if BERT predictions were trained on current data ⚠️ \n")
+ res <- paste0(
+ res,
+ "* Accuracy : ", round(100 * mean(cvres == predlab), 2), "%\n",
+ "* Macro F1 : ", sprintf("%f", mean(as.numeric(dametrics$F1))), "\n",
+ "* Weighted F1 : ", sprintf("%f", weighted.mean(as.numeric(dametrics$F1),
+ dametrics$N.cases)), "\n"
+ )
+ values$modelcv <- res
+ })
+
+ NULL
+ })
+
+ ## Automatic message update during CV computation
+ observe({
+ if (cv_nclicks() == 0) return(NULL)
+ invalidateLater(200)
+ if (!file.exists(paste0(session$token, "_cv"))) return(NULL)
+ values$modelcv <- get_status(paste0(session$token, "_cv"))
+ values$cvtable <- NULL
+ })
+
+
+ ##############################################################################
+ ## Panel BERT
+ ##############################################################################
+
+ ##########
+ ## Bert UI left panel
+
+ ## Update list of saved models and dlSelect on init and on saveTrigger
+
+ observeEvent(list(ok.data(), values$bertSaveTrigger), {
+ damodels <- dir(pattern = "^tiggerbert_saved_")
+ if (!length(damodels)) return(NULL)
+ damodels <- gsub("^tiggerbert_saved_", "", damodels)
+ values$bertSaved <- damodels
+ updateSelectInput(session, "dlBPSelect", choices = damodels)
+ })
+
+ ## Populate saved models list, with buttons
+
+ output$bertSaved <- renderUI({
+ if (verbose)
+ cat("DEBUG bert update saved list\n")
+ HTML(paste0(
+ "",
+ paste0(sapply(values$bertSaved, function(dabert) {
+ bertshort <- dabert
+ if (nchar(bertshort) > 23)
+ bertshort <- paste0(substr(bertshort, 1, 20), "...")
+ paste0(
+ "",
+ actionLink(paste0("saved_", dabert), bertshort, title = dabert,
+ style="margin-right: 5px;"), " | ",
+ actionButton(paste0("bert_copypars_", dabert), "📋",
+ title = "Copy parameters"), " | ",
+ actionButton(paste0("bert_infer_", dabert), "🤖",
+ title = "Infer predictions on current data"), " | ",
+ actionButton(paste0("bert_delete_", dabert), "🗑",
+ title = "Delete model"),
+ " |
")
+ }), collapse = "\n"),
+ "
"
+ ))
+ })
+
+ ##########
+ ## Bert UI main panel
+
+ ## UI outputs : message and stats table and plot
+ output$bertMsg <- reactive(
+ ifelse(is.null(values$bertMsg),
+ "Train new BERT, or select saved model.",
+ values$bertMsg))
+
+ output$bertValstats <- DT::renderDataTable(
+ DT::datatable(values$bertValstats, list(pageLength = 25)))
+
+ output$bertValPlot <- renderPlot(values$bertValPlot)
+
+ output$bertMsgHyperpar <- reactive(values$bertMsgHyperpar)
+
+ ##################################
+ ## Async handling of BERT training
+
+ ## Status File
+ bert_status_file <- paste0("tiggerbert_", session$token, "_trainstatus")
+
+ fire_interrupt <- function(dafile) set_status(dafile, "interrupt")
+ fire_ready <- function(dafile) set_status(dafile, "Ready")
+ fire_running <- function(dafile, perc_complete = NULL){
+ if(is.null(perc_complete))
+ msg <- "Training..."
+ else
+ msg <- paste0("Training... ", perc_complete, "% complete")
+ set_status(dafile, msg)
+ }
+ interrupted <- function(dafile) get_status(dafile) == "interrupt"
+
+ ## Create Status File
+ fire_ready(bert_status_file)
+
+
+ ## Button action preparation
+ bert_nclicks <- reactiveVal(0)
+ bert_result_val <- reactiveVal()
+ bert_hyperpars <- reactiveVal()
+
+ ## Button appearance: write "Stop" if training is ongoing
+ observeEvent(bert_nclicks(), {
+ updateActionButton(session, "bertTrain",
+ label = ifelse(bert_nclicks() > 0, "Stop", "Train BERT"))
+ })
+
+ ## Button action: Train new BERT
+ observeEvent(input$bertTrain,{
+
+ ## Don't do anything if analysis is already being run
+ # if(bert_nclicks() != 0){
+ # values$bertMsg <- "BERT is already training"
+ # return(NULL)
+ # }
+ tmpsession <- paste0("tiggerbert_", ok.labelcol(), "_", session$token)
+ if(bert_nclicks() != 0){
+ write("stop", paste0(tmpsession, "_stop"))
+ bert_nclicks(0)
+ return(NULL)
+ }
+ if (file.exists(paste0(tmpsession, "_stop")))
+ file.remove(paste0(tmpsession, "_stop"))
+
+ ## Increment clicks and prevent concurrent analyses
+ bert_nclicks(bert_nclicks() + 1)
+
+ bert_result_val(data.frame(Status="Running..."))
+ fire_running(bert_status_file)
+
+ ## Prepare async training
+ values$bertLoadSaved <- NULL
+ # tmpsession <- paste0("tiggerbert_", ok.labelcol(), "_", session$token)
+ tmpmodel <- input$bertModel
+ system(paste0("rm ", tmpsession, "_done"))
+
+ tmpdat <- data.frame(
+ id = ok.data()[, values$conf$idcol],
+ text = ok.nicetext(),
+ label = values$label
+ )
+ tmpdat <- tmpdat[!is.na(tmpdat$label), ]
+ tmpdat <- tmpdat[grepl("\\S", tmpdat$text), ]
+
+ ## Filter data: min occurences, balance classes, validation split
+ tmplabtable <- table(tmpdat$label)
+ if (is.numeric(input$bertMinOccur)) if (input$bertMinOccur > 1) {
+ tmpdat <- tmpdat[tmpdat$label %in%
+ names(which(tmplabtable >= input$bertMinOccur)), ]
+ tmplabtable <- table(tmpdat$label)
+ }
+ set.seed(input$bertValidSeed)
+ if (input$bertBalance) {
+ tmpminocc <- min(tmplabtable)
+ tmptrain <- sample(do.call(c, lapply(unique(tmpdat$label), function(dalabel)
+ sample(which(tmpdat$label == dalabel), round((1-input$bertValidFrac) * tmpminocc)))))
+ tmpvalid <- sample(do.call(c, lapply(unique(tmpdat$label), function(dalabel) {
+ dasamp <- which(tmpdat$label == dalabel)
+ dasamp <- dasamp[! dasamp %in% tmptrain]
+ sample(dasamp, round(input$bertValidFrac * tmpminocc))
+ })))
+ } else {
+ tmptrain <- sample(1:nrow(tmpdat), round((1 - input$bertValidFrac) * nrow(tmpdat)))
+ tmpvalid <- (1:nrow(tmpdat))[-tmptrain]
+ }
+
+ write.csv(tmpdat[tmptrain, ], paste0(tmpsession, "_dat_train.csv"), row.names = FALSE)
+ write.csv(tmpdat[tmpvalid, ], paste0(tmpsession, "_dat_valid.csv"), row.names = FALSE)
+
+ iwait <- 0
+ while (iwait < 10) {
+ if (!file.exists(paste0(tmpsession, "_dat_valid.csv"))) Sys.sleep(500)
+ iwait <- iwait + 1
+ }
+
+ tmp_cmd <- paste0(
+ values$conf$python, " gobert.py -m ", tmpmodel,
+ " -t ", tmpsession, "_dat_train.csv",
+ " -v ", tmpsession, "_dat_valid.csv",
+ " -b ", input$bertBatchsize,
+ " -g ", input$bertGradacc,
+ " -e ", input$bertEpochs,
+ " -l ", input$bertLrate,
+ " -w ", input$bertWdecay,
+ " -B ", ifelse(input$bertBest, 1, 0),
+ " -E ", input$bertNeval,
+ " -s ", tmpsession,
+ " -G ", as.numeric(values$conf$use_gpu),
+ " -A ", as.numeric(input$bertAdapt),
+ " 2>&1 ; touch ", tmpsession, "_done"
+ )
+
+ ## Save hyperpars
+ bert_hyperpars(list(bertModel = input$bertModel,
+ bertEpochs = input$bertEpochs,
+ bertLrate = input$bertLrate,
+ bertWdecay = input$bertWdecay,
+ bertBatchsize = input$bertBatchsize,
+ bertGradacc = input$bertGradacc,
+ bertNeval = input$bertNeval,
+ bertBest = input$bertBest,
+ bertValidFrac = input$bertValidFrac,
+ bertValidSeed = input$bertValidSeed,
+ bertNtrain = length(tmptrain),
+ bertTrainTable = as.list(table(tmpdat$label[tmptrain])),
+ bertScheme = ok.labelcol()))
+
+ ## Launch async training
+ bert_async <- future({
+ if (file.exists(tmpsession))
+ system(paste0("rm -R ", tmpsession, "*"))
+
+ # system(tmp_cmd, wait = FALSE, intern = FALSE)
+ tmp_exec <- system(tmp_cmd, wait = FALSE, intern = TRUE)
+ writeLines(tmp_exec, paste0(tmpsession, "_log"))
+
+ ## After training, get trainer state from last checkpoint for diagnostics
+ chkpt <- dir(paste0(tmpsession, "_train"), pattern = "^checkpoint")
+ chkpt <-
+ chkpt[order(as.numeric(gsub("\\D", "", chkpt)), decreasing = T)[1]]
+ system(paste0(
+ "cp ", tmpsession, "_train/", chkpt, "/trainer_state.json ",
+ tmpsession, "/"))
+ }) %...>% bert_result_val()
+
+ ## Catch interrupt (or any other error) and notify user
+ bert_async <- catch(bert_async, function(e){
+ bert_result_val(NULL)
+ showNotification(e$message)
+ values$bertMsg <- e$message
+ })
+
+ ## After the promise has been evaluated set nclicks to 0 to allow another Run
+ bert_async <- finally(bert_async, function() {
+ system(paste0("rm -R ", tmpsession, "_train"))
+ system(paste0("rm ", tmpsession, "_done"))
+
+ fire_ready(bert_status_file)
+ bert_nclicks(0)
+
+ if (is.null(bert_result_val())) return(NULL)
+
+ bertpreds <- try(read.csv(paste0(tmpsession, "_predval.csv")))
+ if (inherits(bertpreds, "try-error")) {
+ # values$bertMsg <- "BERT error - try again"
+ values$bertMsg <- try(paste(readLines(paste0(tmpsession, "_log")), collapse = "\n"))
+ values$bertValstats <- NULL
+ values$bertValPlot <- NULL
+ values$bertLastUnsaved <- NULL
+ values$bertMsgHyperpar <- NULL
+ values$bertLastHyperpar <- NULL
+ return(NULL)
+ }
+
+ datruth <- tmpdat[tmpvalid, "label"]
+ dapred <- bertpreds$bertpred
+ dametrics <- metricsTable(datruth, dapred)
+ damsg <- paste0(
+ "Unsaved model (", tmpmodel, ")\n\n",
+ "BERT validation stats:\n",
+ "* Accuracy : ", round(100 * mean(datruth == dapred), 2), "%\n",
+ "* Macro F1 : ", sprintf("%f", mean(as.numeric(dametrics$F1))), "\n",
+ "* Weighted F1 : ", sprintf("%f", weighted.mean(as.numeric(dametrics$F1),
+ dametrics$N.cases)), "\n")
+ hypermsg <- bertHyperToMsg(bert_hyperpars())
+ writeLines(RJSONIO::toJSON(bert_hyperpars()),
+ paste0(tmpsession, "/tiggertrain.json"))
+
+ values$bertValstats <- dametrics
+ values$bertMsg <- damsg
+ values$bertValPlot <- bertPlotFromPath(tmpsession)
+ values$bertLastUnsaved <- list(
+ metrics = dametrics, msg = damsg, session = tmpsession)
+ values$bertMsgHyperpar <- hypermsg
+ values$bertLastHyperpar <- bert_hyperpars()
+ })
+
+ values$bertMsg <- "BERT is training"
+ values$bertValstats <- NULL
+ values$bertValPlot <- NULL
+ values$bertLastUnsaved <- NULL
+ values$bertMsgHyperpar <- NULL
+ values$bertLastHyperpar <- NULL
+
+ ## Return something other than the promise so shiny remains responsive
+ NULL
+ })
+
+ ## Automatic check of BERT training status
+ observe({
+ if (bert_nclicks() == 0) return(NULL)
+ isolate({
+ tmpsession <- paste0("tiggerbert_", ok.labelcol(), "_", session$token)
+ ## Check for checkpoint dirs
+ checkdirs <-
+ length(dir(paste0(tmpsession, "_train"), pattern = "^checkpoint"))
+ # fire_running(bert_status_file,
+ # min(100, round(100 * checkdirs / input$bertNeval, 1)))
+ # dastate <- try(RJSONIO::fromJSON(paste0(tmpsession, "_train/trainer_state.json")))
+ # if (!inherits(dastate, "try-error"))
+ # fire_running(bert_status_file,
+ # round(100 * as.numeric(dastate$epoch) / input$bertEpochs, 1))
+ try({
+ dapoints <- dir(paste0(tmpsession, "_train"),
+ pattern = "trainer_state[.]json$", recursive = T)
+ dapoints_num <- as.numeric(gsub("^checkpoint-(\\d+)/.*$", "\\1", dapoints))
+ statefile <- paste0(
+ tmpsession, "_train/", dapoints[which.max(dapoints_num)])
+ if (file.exists(statefile)) {
+ state <- RJSONIO::fromJSON(statefile)
+ fire_running(bert_status_file,
+ round(100 * as.numeric(state$epoch) / input$bertEpochs, 1))
+ }
+ })
+ })
+ if (checkdirs < isolate(input$bertNeval) & file.exists(paste0(tmpsession, "_done"))) {
+ values$bertMsg <- "BERT Error - try again"
+ # values$bertMsg <- paste(readLines(paste0(tmpsession, "_log")), collapse = "\n")
+ } else {
+ invalidateLater(1e3)
+ loss_progress <- ""
+ if (checkdirs > 0) {
+ ## Get training diagnostics
+ try({
+ dapoints <- dir(paste0(tmpsession, "_train"),
+ pattern = "trainer_state[.]json$", recursive = T)
+ dapoints_num <- as.numeric(gsub("^checkpoint-(\\d+)/.*$", "\\1", dapoints))
+ statefile <- paste0(
+ tmpsession, "_train/", dapoints[which.max(dapoints_num)])
+ if (file.exists(statefile)) {
+ state <- RJSONIO::fromJSON(statefile)
+ logs <- data.table::rbindlist(lapply(state$log_history, as.list),
+ fill = TRUE)
+ logs <- logs[, .(loss = max(loss, na.rm = T),
+ eval_loss = max(eval_loss, na.rm = T)), by = epoch]
+ loss_progress <- paste0(
+ "\n* Epoch : ", sprintf("%.02f", logs$epoch),
+ "\ttrain loss : ", sprintf("%.05f",logs$loss),
+ "\teval loss : ", sprintf("%.05f", logs$eval_loss), collapse = "")
+ }
+ })
+ }
+ values$bertMsg <-
+ ifelse(checkdirs == 0, "Pre-processing...",
+ paste0(get_status(bert_status_file), loss_progress))
+ }
+ values$bertValstats <- NULL
+ values$bertValPlot <- NULL
+ values$bertLoadSaved <- NULL
+ values$bertMsgHyperpar <- NULL
+ })
+
+
+ ## Action: show last BERT model
+ observeEvent(input$bertLast, {
+ values$bertMsg <- values$bertLastUnsaved$msg
+ values$bertValstats <- values$bertLastUnsaved$metrics
+ values$bertValPlot <- bertPlotFromPath(values$bertLastUnsaved$session)
+ values$bertMsgHyperpar <- bertHyperToMsg(values$bertLastHyperpar)
+ values$bertLoadSaved <- NULL
+ })
+
+
+ ## Action: save BERT model
+ observeEvent(input$bertSave, {
+ tmpsession <- paste0("tiggerbert_", ok.labelcol(), "_", session$token)
+ if (!file.exists(paste0(tmpsession)))
+ return(NULL)
+
+ if (!grepl("\\S", input$bertSaveName)) {
+ tmpname <- paste0(
+ ok.labelcol(), "_", gsub("\\D+$", "", gsub("\\D", "-", Sys.time())))
+ } else
+ tmpname <- cleanFileName(input$bertSaveName)
+
+ savedir <- paste0("tiggerbert_saved_", tmpname)
+ system(paste("cp -R", tmpsession, savedir))
+ system(paste0("cp ", tmpsession, "_dat_valid.csv ",
+ savedir, "/tiggerbert_dat_valid.csv"))
+ system(paste0("cp ", tmpsession, "_predval.csv ",
+ savedir, "/tiggerbert_pred_valid.csv"))
+
+ system(paste0("rm -R ", tmpsession, "*"))
+ values$bertMsg <- paste0("BERT model successfully saved: ", tmpname)
+ values$bertValstats <- NULL
+ values$bertValPlot <- NULL
+ values$bertMsgHyperpar <- NULL
+
+ if (is.null(values$bertSaveTrigger)) {
+ values$bertSaveTrigger <- 0
+ } else
+ values$bertSaveTrigger <- values$bertSaveTrigger + 1
+ values$bertLastUnsaved <- NULL
+ updateTextInput(session, "bertSaveName", value = "")
+ })
+
+
+ ## Actions: load saved model (list updated only with new models)
+ observeEvent(values$bertSaved, {
+ if (!length(values$bertSaved)) return(NULL)
+ tmpsaved <- values$bertSaved[! values$bertSaved %in% values$bertSavedAll]
+
+ lapply(tmpsaved, function(dabert) {
+ ############
+ ## Actions to load saved model
+ observeEvent(input[[paste0("saved_", dabert)]], {
+ bertconf <- RJSONIO::fromJSON(paste0(
+ "tiggerbert_saved_", dabert, "/config.json"))
+ truth <- read.csv(paste0(
+ "tiggerbert_saved_", dabert, "/tiggerbert_dat_valid.csv"))$label
+ pred <- read.csv(paste0(
+ "tiggerbert_saved_", dabert, "/tiggerbert_pred_valid.csv"))$bertpred
+
+ dametrics <- metricsTable(truth, pred)
+ values$bertValstats <- dametrics
+ values$bertMsg <- paste0(
+ "Saved model ", dabert, " (", bertconf[["_name_or_path"]], ")\n\n",
+ "BERT validation stats:\n",
+ "* Accuracy : ", round(100 * mean(truth == pred), 2), "%\n",
+ "* Macro F1 : ", sprintf("%f", mean(as.numeric(dametrics$F1))), "\n",
+ "* Weighted F1 : ", sprintf("%f", weighted.mean(as.numeric(dametrics$F1),
+ dametrics$N.cases)), "\n")
+
+ ## Hyperparameters
+ tiggertrain_file <- paste0("tiggerbert_saved_", dabert, "/tiggertrain.json")
+ if (file.exists(tiggertrain_file)) {
+ try({
+ values$bertMsgHyperpar <- bertHyperToMsg(RJSONIO::fromJSON(tiggertrain_file))
+ })
+ } else
+ values$bertMsgHyperpar <- "Training parameters not saved."
+
+ values$bertValPlot <-
+ bertPlotFromPath(paste0("tiggerbert_saved_", dabert))
+ values$bertLoadSaved <- dabert
+ })
+
+ ############
+ ## Actions to copy hyperparameters
+ observeEvent(input[[paste0("bert_copypars_", dabert)]], {
+ tiggertrain_file <- paste0("tiggerbert_saved_", dabert, "/tiggertrain.json")
+ if (!file.exists(tiggertrain_file)) return(NULL)
+ try({
+ tmppars <- RJSONIO::fromJSON(tiggertrain_file)
+ updateSelectInput(session, "bertModel", selected = tmppars$bertModel)
+ updateCheckboxInput(session, "bertBest", value = tmppars$bertBest)
+ numinputs <- c("bertEpochs", "bertLrate", "bertWdecay",
+ "bertBatchsize", "bertGradacc")
+ for (iinput in numinputs)
+ updateNumericInput(session, iinput, value = tmppars[[iinput]])
+ })
+ })
+
+ ############
+ ## Actions to delete saved model (with confirm)
+ observeEvent(input[[paste0("bert_delete_", dabert)]], {
+ showModal(modalDialog(
+ title = "Delete BERT model",
+ paste0("Confirm delete: ", dabert,
+ " (model and predictions will be deleted)."),
+ footer = tagList(actionButton(paste0("bert_delete_confirm_", dabert), "Delete"),
+ modalButton("Cancel"))))
+ })
+
+ observeEvent(input[[paste0("bert_delete_confirm_", dabert)]], {
+ system(paste0("rm -R tiggerbert_saved_", dabert))
+ system(paste0(
+ "rm ", values$conf$datadir, values$conf$dataname, "_bertpred_",
+ dabert, ".feather"))
+ # values$bertLoadSaved <- NULL
+ if (is.null(values$bertSaveTrigger)) {
+ values$bertSaveTrigger <- 0
+ } else
+ values$bertSaveTrigger <- values$bertSaveTrigger + 1
+ removeModal()
+ values$bertMsg <- NULL
+ values$bertValstats <- NULL
+ values$bertMsgHyperpar <- NULL
+ })
+
+ ############
+ ## Actions to infer on current data from saved BERT (async)
+ observeEvent(input[[paste0("bert_infer_", dabert)]], {
+ if (bertpred_nclicks() > 0 | bpall_nclicks() > 0) {
+ values$bertMsg <- "Inference already running"
+ values$bertMsgHyperpar <- NULL
+ values$bertValPlot <- NULL
+ values$bertValstats <- NULL
+ return(NULL)
+ }
+ bertpred_nclicks(1)
+
+ saved_model <- paste0("tiggerbert_saved_", dabert)
+ out_path <- paste0(
+ values$conf$datadir, values$conf$dataname, "_bertpred_",
+ dabert, ".feather")
+
+ if (file.exists(out_path)) {
+ old_infer <- arrow::read_feather(out_path)
+ } else
+ old_infer <- NULL
+
+ to_infer <- ok.data()[[values$conf$idcol]]
+ to_infer <- to_infer[! to_infer %in% old_infer[[values$conf$idcol]]]
+ to_infer_row <- match(to_infer, ok.data()[[values$conf$idcol]])
+
+ if (!length(to_infer)) {
+ values$bertMsg <- paste0(
+ "All current ", nrow(ok.data()),
+ " texts already infered by ", dabert)
+ values$bertMsgHyperpar <- NULL
+ values$bertValPlot <- NULL
+ values$bertValstats <- NULL
+ bertpred_nclicks(0)
+ return(NULL)
+ }
+
+ values$bertValPlot <- NULL
+ values$bertValstats <- NULL
+ values$bertMsgHyperpar <- NULL
+ values$bertMsg <- paste0(
+ "Preparing inference on ", length(to_infer), " new texts")
+ tmp_dat <- paste0("tiggerbert_infer_", session$token, ".feather")
+ arrow::write_feather(
+ data.frame(
+ ok.data()[to_infer_row, values$conf$idcol, drop = FALSE],
+ text = ok.nicetext()[to_infer_row]),
+ tmp_dat)
+
+ values$bertMsg <- paste0(
+ "Infering ", dabert, " predictions on ",
+ length(to_infer), " new texts")
+ infer_cmd <- paste0(
+ values$conf$python, " gobert_infer.py -m ", saved_model,
+ " -o ", out_path,
+ " -d ", tmp_dat,
+ " -i ", values$conf$idcol,
+ " -l ", paste0("tiggerbert_infer_", session$token, ".log"),
+ " -G ", as.numeric(values$conf$use_gpu),
+ " -b ", 128)
+ bertpred_res <- reactiveVal()
+ bertpred_async <- future({
+ system(infer_cmd)
+ system(paste0("rm ", tmp_dat))
+
+ if (!is.null(old_infer)) {
+ new_infer <- arrow::read_feather(out_path)
+ arrow::write_feather(rbind(old_infer, new_infer), out_path)
+ }
+ }) %...>% bertpred_res
+ bertpred_async <- catch(bertpred_async, function(error) {
+ bertpred_nclicks(0)
+ values$bertMsg <- paste0("BERT inference error: ", error)
+ system(paste0("rm ", tmp_dat))
+ })
+ bertpred_async <- finally(bertpred_async, function() {
+ bertpred_nclicks(0)
+ values$bertMsg <-
+ "Inference complete!\nRe-import data in Project/Sample panel to use the predictions for tagging."
+ values$bertValPlot <- NULL
+ values$bertValstats <- NULL
+ values$bertMsgHyperpar <- NULL
+ })
+ NULL
+ })
+ })
+ values$bertSavedAll <- c(values$bertSavedAll, tmpsaved)
+ })
+
+ ## Automatic check of BERT inference progress
+ observe({
+ if (bertpred_nclicks() == 0) return(NULL)
+ invalidateLater(1e3)
+ logfile <- paste0("tiggerbert_infer_", isolate(session$token), ".log")
+ if (file.exists(logfile)) {
+ values$bertMsg <- readLines(logfile)[1]
+ values$bertValPlot <- NULL
+ values$bertValstats <- NULL
+ values$bertMsgHyperpar <- NULL
+ }
+ })
+
+
+
+
+ ##############################################################################
+ ## Export panel
+ ##############################################################################
+
+ ## Export tags and full model predictions
+ output$dlTagSave <-
+ downloadHandler(
+ function()
+ paste0(Sys.Date(), "-activeTigger-", ok.labelcol(),".", input$dlTagFormat),
+ function(con) {
+ if (is.null(ok.data())) return(NULL)
+ dadat <- ok.data()[, values$conf$idcol, drop = FALSE]
+ if ("tags" %in% input$dlTagSelect)
+ dadat[[ok.labelcol()]] <- values$label
+ if ("comments" %in% input$dlTagSelect) if (ok.commentCol() != "(None)")
+ dadat[, ok.commentCol()] <- values$comment
+ if ("predictions" %in% input$dlTagSelect) if (ok.train.possible()) {
+ dadat$tigger.predict <- ok.predlab1()
+ dadat$tigger.predprob <- apply(ok.pred(), 1, max)
+ tmp_pred <- ok.pred()
+ colnames(tmp_pred) <- paste0("tiggerpred_", colnames(tmp_pred))
+ dadat <- cbind(dadat, tmp_pred)
+ }
+ othercols <- input$dlTagSelect
+ othercols <- othercols[! othercols %in% c("tags", "comments", "predictions")]
+ if (length(othercols))
+ dadat <- cbind(dadat, ok.data()[, othercols, drop = FALSE])
+
+ if (input$dlTagFormat == "csv") {
+ write.csv(dadat, row.names = F, con)
+ } else
+ arrow::write_feather(dadat, con)
+ })
+
+
+ ## Export embeddings (ft, sb, umap, t-sne)
+ output$dlEmbedSave <- downloadHandler(
+ function()
+ paste0(Sys.Date(), "-activeTigger-emb-",
+ paste(input$dlEmbedSelect, collapse = "-"),
+ ".", input$dlEmbedFormat),
+ function (file) {
+ if (is.null(input$dlEmbedSelect)) return(NULL)
+ export <- ok.data()[, values$conf$idcol, drop = FALSE]
+ if ("ft" %in% input$dlEmbedSelect)
+ export <- data.frame(export, ok.data()[, values$cols_ft])
+ if ("sb" %in% input$dlEmbedSelect)
+ export <- data.frame(export, ok.data()[, values$cols_sb])
+ if (any(c("t-sne", "umap") %in% input$dlEmbedSelect)) {
+ tmpvisu <- ok.visu()
+ tmpalgo <- input$dlEmbedSelect[input$dlEmbedSelect %in% c("t-sne", "umap")]
+ colnames(tmpvisu) <- paste0(gsub("-", "", tmpalgo), 1:2)
+ export <- data.frame(export, tmpvisu)
+ }
+ if (input$dlEmbedFormat == "csv") {
+ write.csv(export, file, row.names = FALSE)
+ } else if (input$dlEmbedFormat == "feather") {
+ arrow::write_feather(export, file)
+ }
+ })
+
+ ## Export BERT predictions
+
+ output$dlBPMsg <- reactive(values$dlBPMsg)
+
+
+
+ # output$dlBertPredCsv <-
+ # downloadHandler(paste0(Sys.Date(), "-bertpred-", ok.labelcol(),".csv"),
+ # content= function(con) {
+ # if (is.null(ok.data())) return(NULL)
+ # dadat <- ok.data()[, c(values$conf$idcol, ok.labelcol())]
+ # dadat[[ok.labelcol()]] <- values$label
+ # if (ok.commentCol() != "(None)") {
+ # dadat[, ok.commentCol()] <- values$comment
+ # }
+ # if (ok.train.possible()) {
+ # dadat$tigger.predict <- ok.predlab1()
+ # dadat$tigger.predprob <- apply(ok.pred(), 1, max)
+ # }
+ # dadat <- cbind(dadat, ok.pred())
+ # write.csv(dadat, row.names = F, con)
+ # })
+
+ ############
+ ## Action to infer on ALL data from saved BERT (async)
+ bpall_res <- reactiveVal()
+ observeEvent(input$dlBPInfer, {
+ if (bpall_nclicks() > 0 | bertpred_nclicks() > 0) {
+ values$dlBPMsg <- "Inference already running"
+ bpall_res(NULL)
+ return(NULL)
+ }
+ bpall_nclicks(1)
+ bpall_res_async <- reactiveVal()
+ dabert <- input$dlBPSelect
+
+ saved_model <- paste0("tiggerbert_saved_", dabert)
+ out_path <- paste0(
+ values$conf$datadir, values$conf$dataname, "_bertpredall_",
+ dabert, ".feather")
+ tmp_dat <- paste0("tiggerbert_inferall_", session$token, ".feather")
+ tmp_log <- paste0("tiggerbert_inferall_", session$token, ".log")
+ tmp_done <- paste0("tiggerbert_inferall_", session$token, "_done")
+ if (file.exists(tmp_done))
+ system(paste0("rm ", tmp_done))
+
+ if (file.exists(out_path)) {
+ values$dlBPMsg <- "Inference already complete, ready for download"
+ bpall_nclicks(0)
+ bpall_res(out_path)
+ return(NULL)
+ # old_infer <- arrow::read_feather(out_path)
+ }
+
+ fulldat <- arrow::read_feather(paste0(
+ values$conf$datadir, values$conf$dataname, ".feather"))
+ values$dlBPMsg <- paste0(
+ "Preparing inference on ", nrow(fulldat), " texts")
+ if (length(values$conf$textcols) > 1) {
+ fulldat$text <- do.call(paste, c(
+ fulldat[, values$conf$textcols, drop = FALSE], sep = "\n\n"))
+ } else {
+ fulldat$text <- fulldat[, values$conf$textcols]
+ }
+ arrow::write_feather(fulldat[, c(values$conf$idcol, "text")], tmp_dat)
+
+ infer_cmd <- paste0(
+ values$conf$python, " gobert_infer.py -m ", saved_model,
+ " -o ", out_path,
+ " -d ", tmp_dat,
+ " -i ", values$conf$idcol,
+ " -l ", tmp_log,
+ " -G ", as.numeric(values$conf$use_gpu),
+ " -b ", 128,
+ "; touch ", tmp_done)
+
+ bpall_async <- future({
+ system(infer_cmd)
+ system(paste0("rm ", tmp_dat))
+ }) %...>% bpall_res_async
+
+ bpall_async <- catch(bpall_async, function(error) {
+ values$dlBPMsg <- paste0("BERT inference error: ", error)
+ system(paste0("rm ", tmp_dat))
+ system(paste0("rm ", tmp_done))
+ })
+
+ bpall_async <- finally(bpall_async, function() {
+ bpall_nclicks(0)
+ values$dlBPMsg <-
+ "Inference complete, ready for download"
+ bpall_res(out_path)
+ })
+ NULL
+ })
+
+ ## Automatic check of BERT all prediction progress
+ observe({
+ if (bpall_nclicks() == 0) return(NULL)
+ invalidateLater(1e3)
+ logfile <- paste0("tiggerbert_inferall_", isolate(session$token), ".log")
+ if (file.exists(logfile))
+ values$dlBPMsg <- readLines(logfile)[1]
+ })
+
+ ## Show BERTpred download button when data is ready
+ output$dlBPDlButton <- renderUI({
+ if (is.null(bpall_res())) return(NULL)
+ downloadButton("dlBPDl", NULL)
+ })
+
+ output$dlBPDl <- downloadHandler(
+ function()
+ paste0(Sys.Date(), "-tiggerbert-", values$conf$dataname,
+ input$dlBPSelect, ".", input$dlBPFormat),
+ function(file) {
+ if (is.null(bpall_res())) return(NULL)
+ tmp_bert <- gsub("^.*_bertpredall_(.*)[.]feather$", "\\1", bpall_res())
+ if (tmp_bert != input$dlBPSelect) {
+ values$dlBPMsg <- "BERT model changed - predict again or select current model"
+ return(NULL)
+ }
+ tmp <- arrow::read_feather(bpall_res())
+ if (input$dlBPFormat == "csv") {
+ write.csv(tmp, file, row.names = FALSE)
+ } else if (input$dlBPFormat == "feather") {
+ arrow::write_feather(tmp, file)
+ }
+ }
+ )
+
+
+ ##############################################################################
+ ## Delete files at end of session
+ onStop(function() {
+ if(file.exists(bert_status_file)) unlink(bert_status_file)
+ if(length(dir(pattern = paste(session$token))))
+ system(paste0("rm -R *", session$token, "*"))
+ })
+
+})
diff --git a/docker-images-datalab/activetigger/activetigger/tokenize_spacy.py b/docker-images-datalab/activetigger/activetigger/tokenize_spacy.py
new file mode 100644
index 0000000..a7143ce
--- /dev/null
+++ b/docker-images-datalab/activetigger/activetigger/tokenize_spacy.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+## Spacy tokenize texts
+## Requires data file with columns id and text
+
+import argparse
+from os.path import expanduser
+import pandas as pd
+import pyarrow as pa
+import pyarrow.feather as feather
+import spacy
+import re
+
+
+def main(args):
+ print("Tokenizer: Importing data")
+ datapath = expanduser(args.data)
+ dat = feather.read_feather(datapath)
+ outfile = re.sub("[.]feather$", "_spa.feather", datapath)
+
+ print("Tokenizer: Loading model")
+ spa = spacy.load(expanduser(args.model))
+ print("Tokenizer: Tokenizing sentences")
+ tok = [" ".join([str(token) for token in spa.tokenizer(text)]) for text in dat["text"]]
+
+ print("Tokenizer: Exporting")
+ tok = pd.concat([dat["id"], pd.DataFrame(tok)], axis=1)
+ tok.columns = ["id", "text_spa"]
+ feather.write_feather(tok, outfile)
+ print("Tokenizer: Done")
+
+
+if __name__ == "__main__":
+ argParser = argparse.ArgumentParser()
+ argParser.add_argument("-m", "--model", help="Model name", default="fr_core_news_sm")
+ argParser.add_argument("-d", "--data", help="Path to data (feather)")
+ args = argParser.parse_args()
+ main(args)
+
diff --git a/docker-images-datalab/activetigger/activetigger/ui.R b/docker-images-datalab/activetigger/activetigger/ui.R
new file mode 100644
index 0000000..5f788a2
--- /dev/null
+++ b/docker-images-datalab/activetigger/activetigger/ui.R
@@ -0,0 +1,631 @@
+## 21/04/2020 : shiny pour active learning étiquetage de textes
+
+shinyUI(fluidPage(
+ title = "Active Tigger",
+
+ ############################################################################
+ ## Top panel: Title, strategy, model options
+ ############################################################################
+ fluidRow(
+ column(
+ 4,
+ fluidRow(
+ column(4, br(),
+ HTML('
')),
+ column(
+ 8, br(),
+ p(strong("Project / Scheme")),
+ fluidRow(
+ column(2, HTML(paste0(
+ '',
+ actionButton("createProject", "+"),
+ '
'))),
+ column(10, selectInput("selectProject", NULL, NULL, NULL))
+ ),
+ fluidRow(
+ column(2, HTML(paste0(
+ '',
+ actionButton("createScheme", "+"),
+ '
'))),
+ column(10, selectInput("selectScheme", NULL, NULL, NULL))
+ )
+ ))
+ ),
+ column(
+ 2, br(),
+
+ HTML(paste0(
+ '',
+ p(strong("Strategy")),
+ selectInput("strategy", NULL,
+ choices= c("Active" = "entropy",
+ "MaxProb" = "maxprob",
+ "Random" = "random",
+ "Deterministic" = "sequential"),
+ selected = "sequential"),
+ conditionalPanel("input.strategy == 'maxprob'",
+ selectInput("maxprobWhich", NULL, NULL)),
+ '
'))
+ ),
+ column(
+ 2, br(),
+ HTML(paste0(
+ '',
+ p(strong("On")),
+ selectInput("sampleChoice", NULL,
+ choices = c("Untagged" = "untagged",
+ "Tagged" = "tagged",
+ "All" = "all")),
+ conditionalPanel("input.sampleChoice == 'tagged'",
+ selectInput("taggedWhich", NULL, NULL)),
+ '
'))
+ ),
+
+ column(4, p(br()),
+ verbatimTextOutput("trainDiagno"),
+ fluidRow(
+ column(8, HTML(paste0(
+ '',
+ actionButton("modelTrain", "🤖 Train", width = "100%"),
+ '
'))),
+ column(4, HTML(paste0(
+ '',
+ checkboxInput("showTrainOptions", "🔧"),
+ '
')))
+ ))
+ ),
+
+
+ ## General training options
+ conditionalPanel(
+ "input.showTrainOptions",
+ hr(),
+ fluidRow(
+ column(
+ 4,
+ conditionalPanel(
+ "input.showTrainOptions & input.use_regressors.includes('regex')",
+ uiOutput("panelExtraRegex")
+ )
+ ),
+ column(
+ 4,
+ p(strong("Predictors")),
+ selectizeInput("use_regressors", NULL, "",
+ multiple = TRUE, width = "100%"),
+ conditionalPanel(
+ "input.showTrainOptions & input.use_regressors.includes('extra')",
+ selectizeInput("use_ootregnum", "Extra predictors: continuous", "",
+ multiple = TRUE, width = "100%"),
+ selectizeInput("use_ootregcat", "Extra predictors: categorical", "",
+ multiple = TRUE, width = "100%")
+ ),
+
+ conditionalPanel(
+ "input.showTrainOptions & input.use_regressors.includes('dfm')",
+ hr(),
+ fluidRow(
+ column(
+ 6, numericInput("dfmMinTermfreq", "DFM : Min Termfreq",
+ min= 1, max= 1e3, value= 5, step= 1)),
+ column(
+ 6, numericInput("dfmMinDocfreq", "DFM : Min Docfreq",
+ min= 1, max= 1e3, value= 5, step= 1))),
+ fluidRow(
+ column(
+ 4, checkboxInput("dfmTfIdf", "Tf-Idf", TRUE)),
+ column(
+ 4, selectInput("dfmTfScheme", label = NULL,
+ choices= c("logcount", "count", "prop", "propmax",
+ "boolean", "augmented", "logave"),
+ selected= "logcount")),
+ column(
+ 4, selectInput("dfmDfScheme", label = NULL,
+ choices= c("inverse", "count",
+ "inversemax", "inverseprob", "unary"),
+ selected= "inverse"))
+ ),
+ numericInput("dfmNgrams", "N-grams", value= 1,
+ min= 1, max= 10, step = 1)
+ )
+ ),
+ column(
+ 4,
+ HTML(paste0(
+ '',
+ fluidRow(
+ column(6, strong("Auto train every")),
+ column(6, numericInput("trainCountdown", NULL, 0, 0, 1e6, 1))),
+ '
'
+ )),
+ HTML(paste0(
+ '',
+ fluidRow(
+ column(6, strong("Model")),
+ column(6, selectInput("predModel", NULL, selected = "linear",
+ choices = c("Naive Bayes" = "naive bayes",
+ "KNN" = "knn",
+ "Liblinear" = "linear",
+ "LASSO" = "lasso",
+ "Random Forest" = "random forest")))
+ ),
+ '
'
+ )),
+
+ ## Model-specific training options
+ ### Random forest options
+ conditionalPanel(
+ "input.showTrainOptions & input.predModel == 'random forest'",
+ fluidRow(
+ numericInput("rfNumTrees", label = "Num. trees",
+ min = 1, max = 2e3, value = 500, step = 1),
+ numericInput("rfMtry", label = "mtry",
+ min = 0, max = 1e5, value = 0, step = 1),
+ numericInput("rfSampleFrac", label = "Sample fraction",
+ min = 0, max = 1, value = 1, step = .01)
+ )
+ ),
+ ### Naive Bayes options
+ conditionalPanel(
+ "input.showTrainOptions & input.predModel == 'naive bayes'",
+ flowLayout(
+ numericInput("naiveSmooth", label = "Smooth",
+ min = 0, max = 2e3,
+ value = 1, step = 1e-3),
+ selectInput("naivePrior", "Prior",
+ c("uniform", "docfreq", "termfreq")),
+ selectInput("naiveDistri", "Distribution",
+ c("multinomial", "Bernoulli"))
+ )
+ ),
+ ### Lasso options
+ conditionalPanel(
+ "input.showTrainOptions & input.predModel == 'lasso'",
+ strong("Lasso penalty"),
+ fluidRow(
+ column(
+ 6, numericInput("glmLambda", label = NULL, min = 0, max = 2e3,
+ value = 0, step = 1e-6)),
+ column(
+ 6, actionButton("glmCV", label= "Find best (CV)")))
+ ),
+ ### Linear options
+ conditionalPanel(
+ "input.showTrainOptions & input.predModel == 'linear'",
+ strong("Liblinear Cost"),
+ fluidRow(
+ column(
+ 6, numericInput("liblinCost", label= NULL, min= 0, max= 2e10,
+ value= 32, step= 1)),
+ column(
+ 6, actionButton("liblinCV", label= "Find best (CV)")))
+ ),
+ ### KNN options
+ conditionalPanel(
+ "input.showTrainOptions & input.predModel == 'knn'",
+ flowLayout(
+ strong("N. Neighbours"),
+ numericInput("knnK", label = NULL, min = 1, max = 1e2,
+ value = 3, step = 1)
+ )
+ )
+ )
+ ),
+ hr()
+ ),
+
+
+ ############################################################################
+ ## Main panel set
+ ############################################################################
+ tabsetPanel(
+ id = "mainPanelset",
+ selected = "Tagging",
+
+ ########################################################################
+ ## Project panel
+ ########################################################################
+ tabPanel(
+ "Project",
+ br(),
+ tabsetPanel(
+ id = "tabsetProject",
+ selected = "Sample",
+
+ tabPanel(
+ "Settings",
+ br(),
+ actionButton("saveSystem", "Save changes"),
+
+ h4("Files"),
+ fluidRow(
+ column(2, p("Data directory")),
+ column(4, uiOutput("sys_datadir")),
+ column(6, p("Place (on the server) where the data and project are stored"))
+ ),
+ fluidRow(
+ column(2, p("Data filename")),
+ column(4, uiOutput("sys_datafile")),
+ column(6, p("Main file, containing id and text columns"))
+ ),
+
+ h4("Variables"),
+ fluidRow(
+ column(2, p("ID")),
+ column(4, uiOutput("sys_var_id")),
+ column(6, p("Name of the id variable, unique identifier of each text"))
+ ),
+ fluidRow(
+ column(2, p("Text")),
+ column(4, uiOutput("sys_var_text")),
+ column(6, p("Name of the text variables: if more than one, texts are concatenated in the specified order"))
+ ),
+ fluidRow(
+ column(2, p("Tags")),
+ column(4, uiOutput("sys_var_tag")),
+ column(6, p("Names of scheme variables"))
+ ),
+ fluidRow(
+ column(2, p("Comments")),
+ column(4, uiOutput("sys_var_comm_ui")),
+ column(6, p("Name of the comments variable"))
+ ),
+ fluidRow(
+ column(2, p("Context")),
+ column(4, uiOutput("sys_var_context_ui")),
+ column(6, p("Names of variables not used in the models, but may be displayed during tagging"))
+ ),
+
+ h4("System"),
+ fluidRow(
+ column(2, checkboxInput("sys_use_python", "Python backend", FALSE)),
+ column(4, conditionalPanel(
+ "input.sys_use_python",
+ textInput("sys_which_python", NULL, value = "python3",
+ placeholder = "(custom python path)"))),
+ column(6, conditionalPanel(
+ "input.sys_use_python",
+ p("This must be a working python3 environment, with the required modules installed (see documentation)")))
+ ),
+
+ conditionalPanel("input.sys_use_python", list(
+ fluidRow(
+ column(2, checkboxInput("sys_use_spacy", "SpaCy tokenization", FALSE)),
+ column(4, conditionalPanel("input.sys_use_spacy", textInput(
+ "sys_use_spacy_model", NULL, NULL, placeholder = "(spacy model name)"))),
+ column(6, p("Name of the spacy tokenizer model, used in DTM and word embeddings"))
+ ),
+ conditionalPanel("input.sys_use_spacy", fluidRow(
+ column(2),
+ column(9, uiOutput("sys_spacyDlUI")))
+ ),
+ fluidRow(
+ column(2, checkboxInput("sys_use_ft", "FastText word embeddings", FALSE)),
+ column(4, conditionalPanel("input.sys_use_ft", textInput(
+ "sys_use_ft_model", NULL, NULL, placeholder = "(fasttext model path)"))),
+ column(6, p("Path to the local fasttext model binary"))
+ ),
+ conditionalPanel("input.sys_use_ft", fluidRow(
+ column(2),
+ column(9, uiOutput("sys_ftDlUI")))
+ ),
+ fluidRow(
+ column(2, checkboxInput("sys_use_sb", "SBERT sentence embeddings", FALSE)),
+ column(4, conditionalPanel("input.sys_use_sb", textInput(
+ "sys_use_sb_model", NULL, NULL,
+ placeholder = "(custom sentence_transformers model)"))),
+ column(6, p("(GPU recommended) Name or path of the sentence-transformers model"))
+ ),
+
+ conditionalPanel("input.sys_use_python", list(
+ checkboxInput("sys_use_gpu", "GPU support (CUDA, for SBERT and BERT)", FALSE),
+
+ br(),
+ wellPanel(
+ h4("Model picker"),
+ fluidRow(
+ column(2, p("Language")),
+ column(4, uiOutput("sys_ex_lang_ui")),
+ column(6, p("Used to preset tokenization and embedding models"))
+ ),
+ fluidRow(
+ column(2),
+ column(4, strong("Recommended model")),
+ column(6, strong("Download instructions"))
+ ),
+ fluidRow(
+ column(2, p("SpaCy tokenization")),
+ column(4, uiOutput("sys_ex_spacy")),
+ column(6, uiOutput("sys_ex_spacy_dl"))
+ ),
+ fluidRow(
+ column(2, p("FastText word embeddings")),
+ column(4, uiOutput("sys_ex_ft")),
+ column(6, uiOutput("sys_ex_ft_dl"))
+ ),
+ fluidRow(
+ column(2, p("SBERT sentence embeddings")),
+ column(4, uiOutput("sys_ex_sb")),
+ column(6, p("(Auto download by python module)"))
+ )
+ )
+ ))
+ ))
+ ),
+
+ tabPanel(
+ "Sample",
+ br(),
+ fluidRow(
+ column(
+ 4,
+ wellPanel(
+ fluidRow(
+ column(8, h4("Sample")),
+ column(4, actionButton("dataImport", "Import", width = "100%"))),
+ fluidRow(
+ column(6, numericInput("dataNrows", "N. rows", 500, 10, 1e4, 1)),
+ column(6, numericInput("dataSkipRows", "Skip rows", 0, 0, step = 1))
+ )
+ )
+ ),
+ column(8, uiOutput("dataMessage"), uiOutput("panelData"))
+ )
+ ),
+
+ tabPanel(
+ "Scheme",
+ br(),
+ fluidRow(
+ # column(4, uiOutput("panelScheme")),
+ column(
+ 4,
+ wellPanel(
+ h4("Current scheme"),
+ fluidRow(
+ column(2, HTML(paste0(
+ "",
+ actionButton("schemeDelete", "🗑", width = "100%"),
+ "
"))),
+ column(6, uiOutput("printScheme")),
+ column(4, HTML(paste0(
+ "",
+ actionButton("schemeDescrSave", "Save", width = "100%"),
+ "
")))
+ ),
+ br(),
+ textAreaInput("schemeDescr", NULL, width = "100%", rows = 10,
+ placeholder = "Write scheme description here"),
+ hr()
+ )
+ ),
+ column(8, uiOutput("panelRetag"))
+ )
+ )
+ )
+ ),
+
+
+ ########################################################################
+ ## Text / visualization panel
+ ########################################################################
+ tabPanel(
+ "Tagging",
+ fluidRow(
+ column(
+ 3,
+ br(),
+ fluidRow(
+ column(8, textInput("regexFilter", label = NULL,
+ placeholder = "(Regex filter)")),
+ column(4, checkboxInput("regexCaseSens", "Case"))),
+
+ wellPanel(
+ ## Tagging buttons
+ fluidRow(
+ column(8, textInput("newLab", label = NULL,
+ placeholder = "(New label)")),
+ column(4, actionButton("currentAction", "Create"))
+ ),
+
+ # fluidRow(uiOutput("oracleButtons")),
+ uiOutput("oracleButtons"),
+
+ br(),
+ textInput("currentComment", NULL, "", width = "100%",
+ placeholder = "(Comment)"),
+ br(),
+ uiOutput("makeOracleConfirm")
+ ),
+
+ # fluidRow(
+ # column(6, checkboxInput("showContext", "Context")),
+ # column(6, actionButton("oops", strong("Oops")))
+ # ),
+ checkboxInput("showContext", "Context"),
+ conditionalPanel("input.showContext", htmlOutput("currentContext"))
+ ),
+ column(
+ 9,
+ fluidRow(
+ column(2, checkboxInput("panelText", "Text", TRUE)),
+ column(2, checkboxInput("panelVisu", "Visualization", FALSE),
+ offset = 8)
+ ),
+ uiOutput("textVisuCols") # Handled in server.R for adaptive columns
+ )
+ )
+ ),
+
+ ########################################################################
+ ## History panel
+ ########################################################################
+ tabPanel(
+ "History",
+ br(),
+ actionButton("histSave", "Save changes"),
+ br(),
+ br(),
+ DT::dataTableOutput("histDTable")
+ ),
+
+
+ ########################################################################
+ ## Stats panel
+ ########################################################################
+ tabPanel(
+ "Stats",
+ br(),
+ fluidRow(
+ column(
+ 3,
+ h3("Counts"),
+ tableOutput("statsTagTable")
+ ),
+ column(
+ 9,
+ h3("10-CV diagnostics"),
+ actionButton("statsCVgo", "Compute 10-CV"),
+ br(),
+ verbatimTextOutput("statsCVoutput"),
+ DT::dataTableOutput("statsCVtable")
+ )
+ ),
+ hr(),
+ h3("Gold Standard")
+ ),
+
+ ########################################################################
+ ## BERT panel
+ ########################################################################
+ tabPanel(
+ "BERT",
+
+ fluidRow(
+ column(
+ 3,
+ br(),
+ h3("Train new BERT"),
+ fluidRow(
+ column(6, actionButton("bertTrain", "Train BERT", width = "100%")),
+ column(6, checkboxInput("bertOptions", "Options"))),
+ fluidRow(
+ column(6, textInput(
+ "bertSaveName", NULL, placeholder = "(save name)")),
+ column(6, actionButton("bertSave", "Save", width = "100%"))),
+ actionLink("bertLast", "Last trained model"),
+ h3("Saved models"),
+ uiOutput("bertSaved")
+ ),
+ column(
+ 9,
+ br(),
+ conditionalPanel(
+ "input.bertOptions",
+ fluidRow(
+ column(6, selectInput(
+ "bertModel", "Model",
+ c("(Fr) CamemBERT-base" = "camembert/camembert-base",
+ "(Fr) CamemBERT-large" = "camembert/camembert-large",
+ "(Fr) FlauBERT-small" = "flaubert/flaubert_small_cased",
+ "(Fr) FlauBERT-base" = "flaubert/flaubert_base_cased",
+ "(Fr) FlauBERT-large" = "flaubert/flaubert_large_cased",
+ "(En) DistilBERT-base" = "distilbert-base-cased",
+ "(En) RoBERTa-base" = "roberta-base",
+ "(En) DeBERTa-base" = "microsoft/deberta-base",
+ "(Multi) DistilmBERT-base" = "distilbert-base-multilingual-cased",
+ "(Multi) MiniLM" = "microsoft/Multilingual-MiniLM-L12-H384",
+ "(Multi) XLM-RoBERTa-base" = "xlm-roberta-base"))),
+ column(6)
+ ),
+ fluidRow(
+ column(3, numericInput("bertEpochs", "Epochs", 3, 1, 20, 1)),
+ column(3, numericInput("bertLrate", "Learning rate", 2e-5, 1e-6, 1, 1e-6)),
+ column(3, numericInput("bertWdecay", "Weight decay", 0.01, 0, 10, 1e-6)),
+ column(3)
+ ),
+ fluidRow(
+ column(3, numericInput("bertBatchsize", "Batch size", 4, 1, 32, 1)),
+ column(3, numericInput("bertGradacc", "Gradient accum.", 4, 1, 32, 1)),
+ column(3, br(), checkboxInput("bertAdapt", "Adapt token length to batch", TRUE)),
+ column(3)
+ ),
+ fluidRow(
+ column(3, numericInput("bertValidFrac", "Validation fraction", .2, 0, .9)),
+ column(3, numericInput("bertValidSeed", "Validation seed", 1234, 1, 9e8)),
+ column(3, numericInput("bertNeval", "N. validation evals", 10, 1, 100, 1)),
+ column(3, br(), checkboxInput("bertBest", "Keep best", TRUE))
+ ),
+ fluidRow(
+ column(3, numericInput("bertMinOccur", "Min. class occurences", 1, 1, 1e4, 1)),
+ column(3, br(), checkboxInput("bertBalance", "Balance classes", FALSE)),
+ column(3),
+ column(3)
+ )
+ ),
+
+
+ fluidRow(
+ column(
+ 6,
+ # flowLayout(
+ # actionButton(
+ # "bertGoPred", "Infer on current data", width = "100%"),
+ # actionButton(
+ # "bertDelete", "Delete saved model", width = "100%")),
+ verbatimTextOutput("bertMsg")),
+ column(6, plotOutput("bertValPlot", height = 200))),
+
+ verbatimTextOutput("bertMsgHyperpar"),
+
+ DT::dataTableOutput("bertValstats")
+ )
+ )
+ ),
+
+ ########################################################################
+ ## Export panel
+ ########################################################################
+ tabPanel(
+ "Export",
+ h4("Export tagged data"),
+ p("Download the tags and predicted probabilities from the complete model, on the current data sample."),
+ # downloadButton("downloadCsv", "Save csv"),
+ flowLayout(
+ selectInput(
+ "dlTagSelect", NULL, c("tags", "comments", "predictions"),
+ c("tags", "comments", "predictions"), multiple = TRUE),
+ selectInput("dlTagFormat", NULL, c("csv", "feather"), "csv"),
+ downloadButton("dlTagSave", NULL, title = "Save tags")
+ ),
+
+ hr(),
+ h4("Export embeddings"),
+ p("Download the embeddings (incl. from visualization if present), on the current data sample."),
+ flowLayout(
+ selectInput(
+ "dlEmbedSelect", NULL, c("FastText" = "ft", "SBERT" = "sb"),
+ selected = "sb", multiple = TRUE),
+ selectInput("dlEmbedFormat", NULL, c("csv", "feather"), "feather"),
+ downloadButton("dlEmbedSave", NULL, title = "Save embeddings")
+ ),
+
+ hr(),
+ h4("Export BERT predictions"),
+ p("Download the predicted probabilities from the chosen BERT model, on the complete dataset."),
+ flowLayout(
+ selectInput("dlBPSelect", NULL, NULL, NULL),
+ selectInput("dlBPFormat", NULL, c("csv", "feather"), "feather"),
+ actionButton("dlBPInfer", "Predict"),
+ verbatimTextOutput("dlBPMsg"),
+ uiOutput("dlBPDlButton")
+ ),
+
+ hr(),
+ h4("Export BERT models")
+ )
+
+ ),
+ br(), br(), br(), br(), br(), br(), br(), br(), br(), br(), br(), br(), br(),
+ br(), br(), br(), br(), br(), br()
+))
diff --git a/docker-images-datalab/activetigger/activetigger/www/active_tigger.jpeg b/docker-images-datalab/activetigger/activetigger/www/active_tigger.jpeg
new file mode 100644
index 0000000000000000000000000000000000000000..c2e913e5cd6d212c85b26daad10e74236b54348e
GIT binary patch
literal 106990
zcmbTccQ{;M^gcQwYLt-ZWs(fhqxUjNL=U5PCV~hxSatg
z0*DCj-@i{tbax>lA|fWCASJmo>IV_^EjSUC{
zK4N&r@r<2=l^w|bKQAG;dz6Hjgo>1uiv2OoWA^{gcH0J^A-xB<2OuPP2DnEa^bCwl%$(1;xOsTR#3dx9q-7M}C@HI`
zs;TSg8yFfHo0wYL*nY74Xz$?e;pycK_wfx04GWKm{2Y~-^ffsp^;=r{&)mHHg2G=#
z#eb@*ku|k-^$i`JUEMvX-oJfg=<$h3%+$Z>rR9~?we^k7t!?bl@yY4gIqu@}e{d1p
z;ru`Ce+Tyez(sS1>)u^h?vwlvE`odBcPAmueWItr#I&z;NUU5RJrfBcef&Bhr?Q<4
zB&vHz_ug%koSs8$i4*%j(Eb;)|98NG|9>I-zkvPUxTXOVgamiNBcuU<0aqOCuAEWvxH_
z`;meZPBI5pYZ_kWy)=uk7pV6$^uMo%E+VP81wfIXp#I9Zw4ZZb^PHm#IczetHJ2pCc5^!-xs$7~Adzi3UUYL;vYfvJdy^f^lX
zD&}bxKhB
zX$Xn&wQdv+BW2sWhVs5EzM`l_$qDck#0ko
zl3czgLe&P0_AJm`e2jzNNk)-cZNW9a<414T?V1g(%z1&=Q(xS!Gy;k9(wF(nzFtAT
z(QCg?y`PZPQ;CNWRMmxa`TCXQ{SbXjavuRzuPt;)Y3Tm`O19@uZwQQNSU&bsG1q=o
zFx=0KhHk$Z|E<<E#_J|k>PPCl
zZ(CQB$l$sfZaeekhYO>heimIr%Cy-6wFRG#w@x;HqHtY+!}C+UctxHc-6=ROANuOHc=Pu(5PGby(T2cxWaJe4RZKqPDU5)z0{hZ;43`r
zDF5yw5d;@?a;y9NEwkXy*NPV~wmRglKJ7%O!3KM)>tD0kkTV}piWdBxj7(N8T`i*U
zRpCD~n2b|@@$036P+G!w8(V@g;`3F~8>7}VcT`!)JYANQB+ycr>TA5CpS$Y--
zpHDt(jNj7^OBCPqt}wVfg7u$dDw>QSSFHKRq6JWF|LN@vJFFuGWXvEB=RY>C)9Y%z
z0DWe>&+Ef=Tux>+o((ozxIB_^fwP+&9h*+V2DuQ;%)VN+@~y#?t5Rl>rcAVkJ1oS#
zQk%XWWo(a@gV3H{lg_0!^=j=D)^DGsFmedKBv~`4hUxv`9M%IDmR!
zlC33XA5m3qLwZ%?1WHC}2gvi5=w?im70;f0>Y>__BBQeE3w3>a1|Y(Yqdo7{9PqOE1W9q)Y#}4
zP1G3ICC%-?-+-Blh1=+b?J@XHH%6pCaJ8%&^o8O6yFkxRdK>BzH<<&6-+1%_$4q+$
z11{N9v4R$(O|O0;s~NxzFi*gvqkSFB@t}Jvfw)q%_GG>%yKf>$)=h*;lHlaVbB%f6
zyr=-wk*Li08=hJJAdX7@>O-xEAj2)7Ltv?~f9K;!^AvfM@HoN3HdwtO>hY$G(Wry1
z8DH`5u~NDrKYfq%YU@P7>!a5wS<&lVc4<(&`r#FuU{5QPzxHM9
zUS|^x&Er%z3PoqeEmqh?hW8MpO!IK2!{`=3>#yP(P&CXlX7yDqz&nw74SV!6>B5gN
z>2F10^`Jt(209mgGGkm;_JqYcinERIGTRrg5)0qC`zGcu{@|MyvTe_^iw+?KW4W5|
z`L-8Uw*Ce3Z^+Rs&OK^3HJh=oEKY3PsQ>Ce9Jki`Ws|QxP!g~Hy$eJAV@o%A^amm>
z!eolQaEd*78(ba#Ia}%Q{Z>t4wUKGk#k-<*8Y;7kAYA&z%F=nbv%l4HBip@RI?)!}
zFE9M-Q2eM(u(?tHVGo&pOqHWY`Kf)Yz`_O4s*4e&iF(mdVu`8E>cuda{!gH^4CDBbgzLIMe(Lo3kHd|K>Q;))
z-&@QAziW{(gmv-hhWi74NxBX<&ql;p+@DvL{F@42CCdk7|Je4!4A}i!^WSdee0)Lw
z_Y;00d)2qz&`g6rXOQbaeqn19YOJ;h2q@V&m~J#{xEVp?MOlLc3=}35s3e7@v;#vf
zF!gY=u6Z`B^fx%dI7IuS>w2Vnz39kd5Q+`fqacoBhSK6a$4;EULysI%)Leb|SWCI{
zLxfI%+<4a=QQm8MfGs3qumt`e9HSFT
zDXHluK>I4Y8WLk;_s53ya)pg=`_~dmAVj}OI(KT5S)d@k;Pvl+(>7Odz8}Mi
zOxp28{)j8f*>1tEx_7#7Q_i?6aFy@CrdL!tDAu6q7N+aGPd6BBDpY#*S@JXB;bR^A
zG4uz{TP0ig>n$Kf5Npq_<)@cQ)<2hvM%a|p7aq1C3sauU!t&FMf9t25RI%X9
zgcDYUj6a(^UI^D?RC{YUGqN!5`M8`?ft*D82mb`A-n3zIm2whrvQ6MvAbqMjRxa>q
zu(+Ai8>@~N_tby5;$nbp7Z|aS%D#T<-?BkBllM=_tgsK}!;t$~ot%*}UtnvHIC@$fY&owsg?~h@jBS-F
zg=txexGdCOSL^M+h%6E2PMJ*0=Mc964+>|?Gqu1(0c1C(iN6vj?7;rIcSDxVkSgJOY5
z@0ptB7ekHvow)l{!yr<7=H-=T^0l1;T4$D@%F0%2Kt=Z0N|j~x8_)hA_YxW1h;AlM
z+$Q+5PnPxJgJJLkkI8SVq60=->$RtYUd%0N^tpwD_PpUH315OmNI`TYAs%5Pa$NbF
z{pV#)6HvCM8BHmo}
ze7&tGMG_|u-#eJD#fz@w`YfG|oigTJ`STj}3;h!Lb;3a_)w?rY4~r$LM$ojEZ(<&i
z_6pKd6R54Jo)VR8Z7pr1<2hGN9f_JZRd
z4r_wcm{o7K$O`Q!v^s9*a6m35rJ<}yih%0M$F+Ck0E7_C7a5Xev-G2H(?d9&g{qi4
zeo8Fl8mHwO5`!|w6ZorU>yNS(qNgr=V@Tm1!}EmW2&nHy1&Qvlshan*m~F%0P(8i4
zqVg8#+BtIzb9Ix2#6gEl!#8ArWW+YJ(qGI0(bQq!mBwl7l`n{J2~-O8MAOJuN=;y`*bs
zGQX*a`fbcgw&Ip$$YQ_X?)pTNG`|f2LmDGja6KD|mIwl5p4Y7rYJ
zHD1I?v5)rKffa%3u&y#}oW%5~b=r5Dr8PoWQTknkPAGO~(0#2K*8m%2W6JyL;>oh>
z-^>;5i1g9S-5g#YYqO}*(zF#
z(BxVoZt>8aBkAYCEIIDE7o0yb-%dsVx$nFE@W7W0fhrq*kLMu`ti;}`Q=9^qEaMzs
z(nOs@zI=TxG&Nl7@E02!ZO#Ir*}|^&w13j8XF?U0+3QL==#0Z1hQjP1yMqD4
zH)Nx@*Bjd7)!z!Ld^qYVXKiW;)82pLFt@{^%A6G!``KQb!N%)k?R3u)-Oo{n86H++
zb(AFf-#e#wWx}dM-{9Vp=obD0$3r1Kjo*-d79uOYkVf?h>NFa?5SY}4!tD6?Sq3I#
z!FlI$+f6?-S5`$MEl~9ETjO{Z>6dnYE84ah*QSuZN&UoO_uM=quR+S|1fuW-#ZCeY
z<4Jk{vgd_NVCpjW+UFNfzDTTX&c$jhvoTXzc3`^Bdc^Hw$Rt)-*}GQef6~#edCJ__
zXxZyUR8PEUZe29qIF%(84|qmtIXAlglCJN#_jKj3luwoB)4+GR-i31`8k%0Nn3b|u
z1>3dr3|L=d^YDNdmzsI1qSYA`cKFi@RjgrfI?-HKVVg;$XZsTgX+YAp>;!Mt-y_ME
z9s_GRQEf=NfQ4ve1KJcz)XRYPx)QBdJ#^#GN%KD&^>s
z$aSxW+7ZXuish0w#ZiXAL6xRPibW
zLiXP`>U7!{g6c5VzwluAi&bPz-9NT=5wje}f~9jbSI(JKulVcEjx5f@(pax_rOBLv
zKGJkj#N->FPM(8czA}U8^RGz!ur6byz8V^rgqJCJkMr&_5?(O1RN^z8Rl2ITv6+=%
zXQY`8fc?fo|3d;Fb~VIh@;_2~7EGb8rUj-_$ZA?{s_3pw__rPYF(+|eP3x^DjW&WR
ztvq)L6OTlYzaHxZ&N$W!I4Yl+djtF?Cvteo9lX{p*ayZ6*z;&hR9mLs
zCETIsMY$TxZVK;U@XJ;FCr;EB^(Kx;3_69@()fi=7301_GpxMnIv4qm#Wo_mY|lpe
zdMQ5TAclFdVijxDCGd8edAuei#8n6R#)i6+2@i{6$E4MiZF-{%EoUbJ7Qi^>bcLtc
z8k5MqS00=THc<($vM=n9PHzF;a)gsjRb7?C?iHw|8S&M=%3|V`0WQLdAL4th)Oe9|
z=f}kV_Dah-_v*Y*8F~U4XG{R24EzhEh26>@g;6or*+FTpg2xZ36+&+Tycah-sB6fJ
z?-1DaUkSe^VKr;v?bz#?{PU{Kh5K;d;f@=Is!{&b2lw{9+Xmv4P2!6Nop&$XM3Y7?
z^|do%aXaG%A?GfBrxmNEO7@e#wl(K!y%2A>l{568l_nfP_t;9*e)+34R?80f!i_2aKE6wi>Y9(K`-;Znc;sgt`=YOM#KU4CF=xOK0>tJ-+ho`Gf
z7p*TaMp*uB)5(&h_%Pr5PN1oo&c~H;gtK&i1$K+wAoUefyi3XEHQ`i(DI!sB0Y4L+
z54^x~XE(z@=p)X7<=xcpUeI8PQ|iwO^0?TtmXO19t@Qa9;kIoqnUb!U6gE97#)%05K&|YJNW5#kTxWaC1Y%4dtkLU}n9P&PI&|Mw@xZNl&J`b8RbH@
z`HmcCIJ?&j0#cq!>|TPE0}L7HH0Y>&KdpTQF&5I^3z__>7t&iuVJE&)?J)9hPM0B}
zj#QHMk{o}E9h^;Hh&zS#DypA*A2GHu38c2&YZ=FFEz>MjgfkEQDEpi)Sk*afTGdtm>&7fi@Qa)Q6+J1M6qf$+9r2KYQ3ge|5gdyhmZp3Fxg>RU2Egtl6^-kH#)rkwZ!ZG_KhDEH1Vj2b|4GV>BYABW_rx8
zvDNI^D(G;K&J)btDZVs2o+2)(VQ#&Oa1ZDuKu1@p0`&q@Eo347(KWeMP?{k#u6cg_JM`R5r
zdy&*O&1@+Q3$g`%Q~NtKI$3jx8G6sUC2e(4!T9$UkhoEH4x12{g-%4Ei
zWnm|!CHIIb_SukdOl%ApYAJnJ<#GRx@j>;>cc#_}^Ead5I;$vS=39Vaw_a&WKLpFg
zpk$cK8-P}cxaslz9MWaFo`O1Pr)vZ5ODo<2a)|=G30BJd<*yWG!4Ws&vC^H?@^;lp
zamos=mpy#Dd;y-c69{jRDzOlJ(3kzmyS!sg!v`x-h{8>OF9YR#bLJ@1ZscX=d|W?q+V!pF8W
z*pLxPGTXZesaVl2^)q5zbcM?Qm-jotZQgkwdWZmR{!a7FKNy@lV{ip2d|mWA859rd
zkul2l|A?K)4r`K?M>h9m8m`IJfk?iR1aGhjXUXq05(SsURxpbt&)k%IF7u%~$2=Wm
zIpC7re4WeC7)j;sw>P${UcZaKNG3n^Jz+hsu$idvumGF9iPmEWPLvt9|5;+8U|=*M
zk-m>$P(Zc?4(r10I`3E{CqcZ+k*OHj`t+K}PkX?+GHZ-4dzv{FJ(@B5WRrOIosR
zK_6I|&3!L(ekU6w58&`*T5kPyM9NQi#arQyTO=A9Z^+
zi6@yDq}Ch?wV!|Tr|zRgXnqjiKTHluT()T^R^fArrn~%2GE;~tdgxP`j`F{kH=Km{
z^eZL9OjQ{z@7kri33S0crS5&EZkg_R3jn$I{8q>=w@;b&>^JKa@98GIDr*E4vPzW`
znjRui;dn)u+cOcf-gTa%K)tNY064zH7t=L)ITXv;JTOSP`r?mb6YXekbDSS5aHw&H
zz-pzH8uIX&?AMones$7GEk%rsa`h!;&7riK-#(^WSsfD9(1djIyhw9)mBArQUcV=b
zb9x~!ALh>Nn1nOxVvsS+ixK$rEgW7=l^nJOJxI-W7W7hZS4>{2jo)uFFqQ^H}XBlUy5iT
zuQZdhDxJBnG+QR0Ao}&?OIUh@!)iw5-f=Ppuf5NVw+O4Txm>)XQW;^UnH{tCV+;&l
zj@Dm<_t33lY0DDXkhmSK28(HnWqfz)zBDN5WJgm&lPSYxKNev#H;bUd98R4d{0j%P
zh-22ihSi&KFY(n9eU*Ot8UL~(hsYm$C8FZ#yfXM8+sm$j`be^QGA5{g=64Y`YXMgoIDlqyQh`y9dU+Bc~FLQ`M
zyM>;tBoFa@7q|uZMih;v|6APVLdMHCq4!=?&+r5XQFbsEQnqarf?8C3k2W3mJ(}$d
z4okONZN7bqA2iSI%s!4fX{JoSS{ff|2y%U}_G(wrF2o$s?Z;$2Dc1a6np~B5tWVlS
znHb`GO&uxe9HP?Q#r}V`)1IY-bARW
zS4ocAbBibkL+DKRc!&h2An9JcO~C50(f%r_Y(79Y0j`y
z4clQlPIm4SYe?hrdgXg!r9>JMGAK1_h;X=70C(uXyWs$N4y7{Tt6CKKaS~To(Js
zoyGQU$qA(G7k-!*1_p*dwloUjPvrS6X=%M7!v{O(Sqa#Pw&NK-c!&x_Vp?LLH}|K|
z1_<2(`*s*Rk$+vCuAKXn{+KcfKHx?bF=bSOe~Qg7s9M79iPMI^0w%bzB)(h}WxA
zWthv5e{zw_;ZSQzbe)B?cIE%Y;+S6*tYR6MvD^&nW}ZD)Ai0?x6C_njHia*fxCw1`
zk}&NCltkk=y{#qc0~T9p%z6SRvuvySh&f4qkzlX+3M^!R5vDWt47*1kWuo7cxTwa)
zzjy7ni!5OO?Jv
zLwN(Yi*EssYNr|+N(3P|uK?*{yxgH!D%iduW&W9}z~?sLfDxZhfj>JspR5F?ZJrEn
ztc`lTtK~-|zB(NkZC$EW;?@Ehw|;Rie|!qS>YfGWJ)vY-8th5sjzYge@BLi-oMJJi
z9H2sSF}WSOlDFO3yI#btsHRraaxi?tHQuC|r^afnet2>={Z`w2Cf9uJZFE%;dq##G
zI_e1cFVzmuzh@}XH!KkPQ+Q-wq!5_wSJ6dHyj{!A6g|+Cs1Xg
c%Arg4db%=6Rsi60_?4RI2Eb!YFUE(xJrJ;s7%wz{zleslhp0UH<>c9CRO
zXW326+_mVIESLpua%xf?QZ|hWxF7rYOWzFXI~Nrxl5F+uXSVQ((GZ?}s#oO?mVq4y
zWVk-}qEIJ=8(XSRdw{NbCW`)%jdEWM0d=qfKb%=PsY7NACc!uS54Y;qvvCV}C0(&m
zXKUgmLZ)5uSFhCWXY1F5E9Bb`FTQ>+(nx1pX&nTQ+-1#=8sb&Bc5i`R#Iu#ZX~Czw
z`h@Ylm_LDi3akN8Jxn0?_Q?8%c0-CRr`(?IOdjY&=Vq%y77K|iy?HupyOhnmMT@iU
zs_X*=<9gSaT4Q{h5Qn4xATCkU&tENF
zzisv8@Q>jyj9@D7TwzU(%7AVT`P9G3kaK5**n;P()t29W*DD>z
z_a-JF>dn{PlzIQ+jDX#D8%KBB>o*TwsDz_%2WOFo6!T=*jI5`XwLW5OhOf<>Rc$|+
zkl}mru2oqM(9=kcxD|FJi620?rq^iHfWPZ1m@<)^KieoMc=h&p=4Bm3u^7;V-_d#s#
zv+TmUB_TLNNa*x7g4~{g;op^hjZo7C>j5{}HB5;ZJ=XedXY9|KP@CXL?T^i4B4-(QA1haLjC%V@AM|3RSSZK!u3pRxL|Z(CpLVB2_bt9G5#Ibi}0lW{WyrOzUXXm8=Ivu1ze#UOGTc|ucpa&oeTE}UHa
zBZHgGf6r~O?(*qyZ@t<377#Ag(#Wd7qnBPQG2EIk=y}^GrVm=6hLj`SKg!-JB#Xkv_RF;pFMpc%@hi9?K)ON=|hBs>0
zB#Koxe&nA(5gi93hzM^k%l%uaaEM2rlT|z!w>G>B@sB3D)a;~uS)+YmYqmu@UXyo%
zUrp4EY*XEY=m}%tSrPj;NSXiRy!DH0y&c0g67gr);T$^V8*%P6__hOMoxShj()d5S%&IKc*&~9pQWtjC6_}cziT(BX3|{v)*4#GqXRB8^ep=&HJ_!Y7
zJDGt>pgU9c0;HCuXK5JmGFh$pKFX;%xDZs})0A`LhL1MiA`n%=L=bFK}1{II4R_gHCn+h0~=qYvG0v8Ek&*n
zLFUW
zaP@FUv`0Ld;Mw>Rw3*);piT@Kj9-J`N|rVBZ1erq4+q}!SUnX?_6p*+i7AQKx$)Jv
zB*B0tXf=K)E*Lj*j0&n5$;J4sfK9hvIG$*HWadtQs$-H!Ri%tjyeg2K9b@$jkls=i
zp5K1&;(5jj%z4ua9E_L7n!QorJVXw7`4%Z|JS*=Qj5z`~4=W(o5)mDcMZ{y42XzIt
zE#`X(A|)bT93G%T^~U#E&vBwFy$36aK4N^I4_9N#MdJdx1Ay2^oy?GUIZEduv#+qa
zTn33k7S-M%f)j=0#BTM_al6CTPW#-}X5jNH!RROBr$j<3MP_2|
zpGd-W+RsZN5m}O{otE;gW8!42Tj`;@QRD%W)-l(7pO!mNCL^?yudUOWBy#pyNTSv}
zo!F2Tgxbc-Rsn^Q>Es!-g|~omzR?oKJ{kX8fMPW1FDdm$)26<%o5R|N)Im>xkHlxz
zAt&|*6mJMa>ZQ4kyRC3{G>30ZWhw4G<3KzG2k4$mLNfSAS@ysFuJhFN7(?$*`-AkI
ztwnqm$k54}TL6paw{K3OCd!LRvo=PlGRI+R0w=A@KFBpw#?R`I`PkGsYv|d#bjsH~
z7~PLCrZHZ?cddq39md8K*>2Jm9&}5s1l4EgKu&l8caWJ`c3lnPejo~`YoV-ywNx%FaD2GAFIpW!w0u0W9ypkg?8VZA@S!OOx`#|)dZ
z!NA}gF5O$eBx>!ky+Kvtn?O))`jtwwPAukJHP5~8%PZ6>LVe5AtWht2D}6B$^lVq)
zkAj>2JyC5qLQQ5PBCn?L>3eRD7eDZQ`qqUjI-BXX$l*}ulisP+ps29-10?8MKt{@XHc@)o4TY&%96^|o@eY6Yrtyof6g8V`ikf#Su%J9i_cI}%8{4YzfH!daw`i|gH;Kmo{qZMRag7e_K9+vK+Y&xH}p61
z8Y!s=`Da-@fWTtXBsZs^|xg6eEy~Pu|l6HN-kk<&w}dv
za{L2W&E%Fph5L(p`yDB4XEmgHyKu1v_QoY=wyE|!+)wzPC%1m7*+f%&5804klqPhE
zDxt8=6OQ~GmL5CrP}?Vrj=|8DXmXELc#31{OQ%a%%nWddwqrhjEp&BBcDNghY~cqa
ze=o;OlbrF7PLN=d9%CkO#QbH`rT1$bur4JkK}Ax~~EGg@FGPI93Jf
zdwiE%owX04QDu0Of|3Se;x%p7#|JngrRD!2PyOGZ7qNm7;wocE+qY$1Kpw6TWLuA~
zXSp1#&{W;~&*qh=EvK&fe*a~CKRm(^BL{Mqi2pTfwlhs7{AusWG3aFD*Rx+Zh8_uq
ztS8msm-A+_YUYeD)pzb26vpf~F(>Doa4x=~>)R&yFs7u@v{V8IFls)4~E*0f=M^sj)J3nVr#!@wLn3
zEz3#o<61o7g4i5$yGis+DdU?V2dLey?{0ui{)}fTl;3>NkGWpZQ$v#Gdo0a#Y?4BcWbmR);NPq6W26Hzm@V
z-S##MMW<{-RaSvNCp~o62)l>oW&@E&Dtm%Uva)I{#%SwltMBK1RIV(GCDzxwQ709V
zwGD%)ki`kdf>)erl>BhsO=Z!ALf?VOL;+a)Swrkj-f#0aBXiv5OVFaZo4@PzylbPE
zO*sK~MR<+OQIqFec?Wo!Y{Bj=VDTorwGH|ZNb4{5+Xw17pQZV=>zthUj_CCN0w&*Q
zgK!ZloE}5l_S_(Z{_5j$JS79VlNb2Ry%BXPYeX(#JnmsdfyxuiQ7zqCQvWeeiK)m$
zV9Dz6wts|>@c`?G%<0ivfTbM>tJrDS!gm-pnB3)nap@VG8f^QRPY5DmYVV;&dS>6p
zt8hgHGHyE<2&jhA+>~^ff883ASl`WKM2@XF5?`_
zl8w&I4gf53oX-{0x2@yb_g&ZWyT%FP$-u`Race`q0N~bTR?RF06*9A8%hWkiOrx2_
zxJKhUOCAru)7Dh{Yw9O)$^O~uFxi+3yLlMsOI~1ggl2F-j#(
zQ4Kt*yGVaEYX!%@JTOL(g6%4(Yr~wxq3mWB77nQipToXo{xq`zdq|9ty%$p58*BCI
zlN!|N14W}sWRJGa$|eMd2XJ_=xA{?L9)8e~i;TJHvVZmDLl;A{HjgYD4`r8>D_1DJ
zFkS4NRVl+SYh19M+`OMuH;bud-H0K96DJiBEvz`|SDs1ghXtt=Mdu&jO(>wUV}gY;
z0AO9X1r^PNC-5ur=(e}_TthcgFEZ=+qr?gd4d&;Fg)DOP2wt*MO|Ve?TO2PXU8Jh+
zAm%&juh|da_juZVc2<$rYJU(8wn~Sx!o3z89cNBSt)X?O8`JprzE{Ukh$Dyld`<&0
ze1BTQ>{p7ht+E!KP&$6_lt)j?I~I->bE;;lZEwpswPMBV)+TRG)T
zs89oX6;x_-6uT3r9}g*W2R?36&l1EAo<}~5{Ta8b+#w--)ElcPI3gr|MQrd@r|=u%
z;}Ud#TluavV)R&Q)_)`H66qW#;E!=vT+B&RxNE<1)To^%ea8Ui(WMIb4cnDUu(l|P
zvEvBB%(r-7CpRhBjw7jQhM9!hf=NZB__d602!WXaRN1_Hd(fvnQ@+IILInDR_U*}z4s8H$c=NLSqc^cYgYh3o>T~Z}uA_2aNEG`$?>{cD|BGhrug)@q}@QpNs=%UhKTUK6
z8%r0t+fwHDm_?gK<||+K>lF(I{G+u>UeWJ*UdY6omSaW5hQ#UqU2l+wk?gWlCz=B|
zmdhMk^|8GDZ-9u4`<$|$etJoon3)Mfe8#(#`dh&1q;i~HO{(Q@_sr37aAeu)&l#`T
zjWeU@U+2&b>jo^xt~i(y9;%h4zY0?Dk!EUNu5sQMtj>P?s_19-@44=@do;nyWGjcj
zOgBraajOYAu0m5Nk2iXMG#oMyxXCc8cc^J#O&4!UwN)5k%1l&V`Ouz_UD)hCQCuWd
zP@cAmi76TqyRr!68Z!NgnPC$GT8*>
z=w#KXr%b~CN%|zjwXv_9bbHNHV%{CrqS&CC2{A6)wW=2t^Am
zbV`9sr_5~9zwPYa%|KGI$w2YS>#eEdNwnE!PoZ)<9&tLHl^3+03bM4^mD^?H1sfJ9|Uh
z8^27crhm^m$mifrdfhQ9BtH@iW+3B=M2gLq2_uzVWGPX=p=J-GuDe2`ND$I{^@7p6
z-|86O6LkccZsGdAWS-Cy)Nk>IJS+4=v*VpeGod54KyDk$SS0!7h(rrUQF~W36m5Ng
z?pK{CcpdtCc}>$77S1hm=f&(1#ZeKf4!b9rNYYOl&XYFMbp5AUch~4oevWt7hFbFb
zGS-;1!7cs-38``bk56;ojtk6aMfFa9elRr9*yBXpm0f-mqRL{mg4Z
z?IJpZ=S4-FNORQH`RN-qRRt^yO-b+J^c!5&E$!r0$KtDxr;19OhdwX!6n=j5uyW=Z
z@1mdNSZbF{d9E|F#lJZO02zXn
zcP3c^Keft?or5W3Uqlp%%)N4Dv@}S0W}S4{BYI$2yHr61%VMD&%&{n&t6%Mk{}TAE
zykL15qYr%nQ*-(h_21~*3}KX<2OdTa|E%N0H3;k`;%wE~X*n(C<3`2@K
zGZ~Xtn(+8!YUhjwgazkWz5Lg{#Prx%gf>s;bmL1vEAOOh!2&X6PHThOOWYc*a%1-g
z$I{h+h{4ORTA;`p47&bSt(zahPsIO1?M+etiPw7yQfmrVdWy(rJt)DBBeEveE}QpT
z?>a1K9+V7=ptbKhi^^Z?eAtzwiV)upe2A-B8h+SLpIzQ}ruQF%d-Bv^+>f&ZOaH?I
zYQE0-kgc0nyq{*TPz{)aTYzZp;X*tfv;qU=Z)=|UP3pA_t*O_7GT24<)`uf~1zN)c
zpG8;P+pd++m8XX!q@ie_;u*4Y^z=NmeW)#k(g$ExwNh!pDhaIpBG#epO3uWEkGnZm!xaY`cj&eokTg8*&>@2H6>h@1s!+v8E=_Ufa5{ApXft33P`_@hp%Zkpw-Ym;~>S!Tb
z0R{EIYDSxYORaRTWM4M@tk(lfBx
z(~WM}j+0g|({jL$nW-TxhGOq@5gd#r@~2s}c4CRkOeLmPydMufyai1AOBTbuwvA}b
zRC^y3GNLf)C)5$%BALp^YE20FpVl=-Gxb#?G+A|L#o
zGyc_HFTi|A+ZeUn;NAE4!KQY&5C6OcIGgqK|N7Wvworj>dU><|skwn2*Iw}kM}wsY
zKK_W^l8M1hmObSABscqHb7lPU#&eThW;7ptdVjJ^z`D^$k#beUhOc{cJZ>?53(ALA
zJRHv=!OEE~gq^-pNafFRN*<@R{Q*jPoY`6avVB|rh@{Iko=SJfWxv#*qBtP?0=#2=
zYDu=NX2rRicd#~xNZ$;A|6qrO<7%xC_I9=_@)VDXalVRfzrbrvB-f61GtS_kQs>YG
z`jdr6ujn&N<-APW2wdQ5!^M|C=l+@T&uR}#xNSlA*+bbU@K?N28L5oT)#7I5i4c|T
zbTG_-$NF#&os7limI(#Jo>SW-{S4kZW*Jn-hpk`x5Qtt$6dyNt)LoqYFQYASJ7D!oX+i>r*C38xt8n~
zWLka5FzA2nLKGk<>XVp)l3H(Z|1;-wW&ie%LZ_>P7ZxMY-g;(w$#ZCYypX=RTqLF4
zJ{p&xkG^Og5K=$o9XuGgqXdSnUMhU;m=fb@tB9@KoN~{I`rNRdJj3RPVZhpT%fyG^
zdAK4SOP89W#%!+4b%pPDp`80v{1YxQhSX$wbf#kVcwH~c93Ea5_mti};#Y)8T4aBv
zOApAmD`E5OXwUC1hAcZD_jTMebH1_$D7%`r-E%jdER+#vlP(d@Wspg1*!DlZaOp7=03J+gsZW6{n@yPxg}tshOq^P8fEOu~ncmd{EPP8X3<6A~8Gee+T!%BjY*3>Xl#ZYY@xivnL5vF;T~eY3j~&s=`G96_eID
zzJS*~0*B_mIz{awhtEUo*e6cYoaqC+Tsy3{czDFhQv-MK@!36xmz_$O4$U4NqXk8j
zxIiG76>mT0S+^wA&0X_8A~VZ$?cyNZaAUezV>v9N$n~d+NR#7;noFPikk2B}_lP-b%3>h(_8
zp)+N8C=Z_sw4M)y4Sd!mXDF@(ncvwS0(REcrnp;{2J8$?uqI;@d8sckR`=JIkgKjo*vvQl3%|2XUBW9#!@;Di
zx*O!s)?|+oeXsjd8}5?vWKiLXg+Wi5t)I26Lt^Flv)QD!o$#0qXYdfxy=m;tiheF1
z>QjMA#}2i$`0Mtl&w-Y{?`{EMUSP&14T04Ea>iD;UsPB;iwOx~{%SEn-Erjy$C&)4
zAQ~vQ`>eXd)I(Y>xZEU}%Iq12)g7Ig`}Uo%P|ir+CKDfHFNGPX;4FynJWSF|oLRc4
z{7i5hKM=!^YlW_mzmsM^qXj%mAdl3ne@?6MDY>vej9^h?>Oqj7Ktgo$YJd0av8YI7
zKaOOjppQLEINhLgwwx445IdmRBvpwOn=RG!ZOYxqv#$MC@})Eq`ZByX^-IJJ?JGW?
zN>P6v*=F{R@!0cPe*UOiPJov%vnye1CYTShoEpvVxhEl69VfMRq0JkoPhpdEVZEcR
zvsVa>z~K9
zUC*K|n!OFnVb8t$d9L&Poad$3NT3LFFyT3XXbAWG{Qdw(
znh$c}y=SlaRzDq20KI>&ek$lckX;*oM9wL9PAzV$3r06Tx~Y~6QoJv{y*l1yPe
zVub9*arD3MTLSRU?VNqO=ZPNaSx_G&0zV|K?v5qyIZ^Am^fAKX6?IPFzruo6uQ^A!
zu+Jj7e67THUkbq8Vnosar?ghUc*L>Ee4DtH$e-(lXaN~MuDdO{=OsP~t6!3obLug-
zYhR~O50ZK;(tViZ&DSJvXxH%8zoBRJOLLPu!sSv`ce0waVc2xWzNDn
zEMT+dNDokx+rk@e#OvO-zZGlrkLcT}tjJNmSh?BHI}>8Uq>>YUldln&_NWTU$M4kL
zZN(42zj%9>bZ=W{qFwzwnnrgQ;?0PO(>CT5R}{xX$-sEgF}1+5C&!hmWI5Z4!R{8Q
zFPmeo-!kVGkj#@RRg;Cr-XzOO?{5UAPb&lsnh4{+o|e)<%i!G=x>U1L*43|nO53R+
zY)fG2WJf%MG!M;^Q{6|mBACWns5xv}Lpv4dNKho-Bo}ZSnb%CrLsO(7%#P=C(8!{(
zLt$w%3wi%{ZqI!6UYfKs=hsaj)Zkv*`7qUv#QOtxw*8Pw2>sQmE2I47PcXF5!G+i9WS&TT#kx8=whF*{?(N%~0^Zs*#Ps_|qCEYzG0
zj|h70=%F(UFT~tlGU!OwcRoKouN)cedp7s^`!R0tozd6jPZslE#&dh@=sF#IVDv3u
zjyAIdN-Ow4zXZ@_ZCO#wjB0id##9wQ$D%y1KOqddeQq*6dAlN&e5XP{SzKKbel`_B
z${$!#*4o~Gb_t8fUm~y|K7(}e@JHM0#nC<_u!hZ+e?(}K5NK7QGe{+ey)mwOk12%o
z6zSj_31Ks{z!_;-bsLShc2gnuD+jPiD29$5%a*>aeo26WSxnw{DNM
z8K36t4D6N4yUDoP&+FBv?6y*>PI(6|TD|J+%e+^1tfo;SIPaziw9MHXtz`k|5FB@IGuB0px^CK{#@NwTu%GgUWrALM
zx9J|@kBq2>!JVWI*|SRfmDbo?q02zPWL1Gc&CkZ|W;3q~f*Ep|cq5vE?vW~7?por5
zjR|jEzo;TJ_(#-4G8zMYRW@ITWg_P|!g;rh=GE#atR>71r3cm}eI1PF@#tk0dE%
zdhg1++Kdp#wNQBU%?uxMH4{By%9o`BXjj}TL@ZMR!o(bF1UB#zK8s$t`
z0HGLlTGGAltmXD+N(MlrmVje3n0}zdxDd&UIH!
z3pe_f+d83Ji3gF4x`;W~tYvF<6D5*mW+uo!yIvwxOE{oGOT+j^ivD=ZXy()Rz3*WW
z4C|g2%#&URI^L6waTP9e&TrY0pNu!m)mDu%na6Jo*;F?-tbkrt<{=;Vb
zT+L)zdly;$RWtn7f8s(M#>JN5kFG;J);;B~*l3O|*QbmA%%4aJNBB>t#{t8HO>Z1P
zzV_^QH#@0_Y{FFvtY5K;m1_?xz;|LRQs-~8!l8(MM0jmvMy$6>ldV_#gf)X#Q9q%P&KGi+&tL@?^4qLkszg+Z8nk
zUx0LW9m#2C3R>+!a!ahvLvT?){YKR%U)0L-xbhgK1#cVNRh9X(`3t965G0H3jjIn2
zQf2F&yUM8x$Z0;%`~{<%x@nGhhtXl446?;cDo;S3@37L=PgSzV=+||bTYT8hUVr}F
zO>~IpC~msD4u+@08e~excTapCaunJ266ad*cd+aToA9;r
z)zhZ$_cGWmvy5Se5n2hS56b=ORdJHo*L^js2~THnu2w$%CJBEp>aW#i@BJgHEjocG
z3=^JF@#N6TvT#y`mW2pE())^~!8AJMb`XIdgr@u>iU5v_T=?@O21H*LF84y8Dm`-w
zH47J^v(Mpd4AB)H-{1T5kLWA$861eo?fXY`ch9I4W%}Qa^m7l4<=q0E8ZSvKZs`f~
zeM0Sjsm6-g-q$nRXFJwl_2d5I!J0`tvEuWScEx5;0%XdPGhT!YxKg1c=3n7Bfn+w?
zmX>38)3mNrBZ4TUy7uNWO5{q|x7|f#^(wJ9a6gY;;yJcR(tjX;?{#J*Vq(fU@?*
zalj)L9-|-gJ?Li8k8h14Vmv2fv5tcoJuDShm+AHM?aht5@`<*a2vQO{6Ra+e0gGv6XjLqc
z)L_+z*?a7E3SsM8dvnd?Ix)+SZoY>vn_esE78{RO@<{O`{lqmy0TLTX4_)RGeNitjrYr#wKUtFfH7V
zF0)T*E=T5i@f4adlMqed3ZiZN(w2*EP6!0dwxk|3mW`6rNjmanNSlfsU?tN9+90#Yo6j4p9RR!yE+2yfh38xmmv{K9+3J*o-c1zbc05w6~O?aYRS
z!_1~%Y@=t2>Dsu|6-|sn0<8&!dz5g{-=9Z;_4WA
zhD+h}To?BpLf(H>(f4gU_~=vg?0f@I*DI{sef)L*9!wr&QDflZ1n3YrI&B`e&iFAn|##cM9l;sG^)%@=ZCHjL0}WSipDS>#NKUTWIcuvYt4n>2
zaQgj$vR+$08WDVV*)Ks2Gj@5Q0CA_k2?&V(+ZxS(V^(ZAf7k1lRlIn}b5j~Hl{tj*
zI^m6i2Z20E4nK8wl#}o>Lj`WHDK#YAZW6cK0ee^gMTx0*sYd?A^5(gs{915)6mNH#
zpTfT7tc2q|!4B?7`kTi8(H2eZSSN|atZAFZd!gfzYZab&nVDR}xXMa~Iq)G>FHQD0
z@@2M&J%cvEz0dRArcz&6=G3f3com+>a1#8U$vgqw@MfvOJT|c2!+B_jzzt8mTzbrb
z%X(Z`VHSsuC%b~c5uNhIdqgvCw7lN|WHb8Py3pVl(gLas;+}B!Vhf$%ipjd}x%U3c
zd5;%Uv~vZD!3C-{)J~_Hd5H@R?PFoDLsaKHCpyq`Cp?Ly{UKo_$lg4
zI$SDy&Ko3nRM;M117($@3=fE(A0?5T+s(LdXsu*&b!A9N%u@pgW9A$n?0%;`OL31h
z!xIlyXaemRN=~-t6~nNhMOCKlVK`zXfgLH3MTxE*=!kK7gd
zgUq>Tuh#j~g`*+?L;h=-cF7v3F!D9}a^@I)50RvjT5OznJ5I2HMgI}mFnc5D$K+W-ny~aCp@W{{MpCe68(v9L5*2JkNIj?_uv7YVL5#}pQ
zT+qvFj_@y{pCLWT4}ewcQ>a_C?z`1KBi+1xjb>CArWbdGY#IKud%EKtZx}
zSG?!`^!k5--n@_Q5a-U6{N<+47A_4jK!L7uiuN6wwvZ6^mJ!Ybt>`D_UaX%zFE`tY
z=uR&7tMY|Y;CpTy2Uh#n)_*#=(neQ%gYP0Rje~8%R-C6!@w#a=;_)LZa;P7WNz&vX
zIkoi$sa+Hnf}u|IM_RUPoMJ7luC&aYLP85_anPmA(p|8k7{m@38UysoeMcw
z#HgHVddXjhyJsRr$5d>>J5(Qayc4~914O<_9#Hl&8e>MIAtUuP#PoldnAez6BNhR1
z9sn|L3l6iGTuMuGb=Mc`rG3jCvs+;N&R%pQ8bE9TmeyPBq-u(NbP&r@spGyqeeomg
zVYS!h{>;!#6ODSkA|oEb7o?2SEcLTdDq7uP4J2*zI{u#V@#CM-;wz)&?u3}?7w%vs
zq0qtXpmv8>Qm_`b0YnS=+M=e$SN3N0GiO-{@Y2)MIerKb8;DYYcM1i_#xU%
zKb)dmV{0tL??8VSN{jgQpC(w55W#sQ#`wLfQHF&~!LTp3H2;^R44&^~SASad3chD>
zt|($Ot{{yzd!gg#=uZxRTc3uBf)BP(
zdXr3%aStl6F3FFdU&I;T?)9lr(Qd6U#;#=0^~tecIBAFSl1b;j
zn|mhuTv*S=qiH9Y@$&34JtC%pT$SQh=QZ}C(}L47;#%jmC)mqC!sBhg6)5#D5)FUg
zIQAD8p&i%|n)=Ml57}^5*5rC{KhLD^hpoD&6wbUu|v78;icTsr4Fi)2Xu01x^ri(;DVGITdFjqJOAcL!s8;XLeK0aIgD(Lm~&fDzwa5NRfPAvmZs*^(0lWyUT>;I
zQ32$`*()qC@^rqHInGYC`H`W!H)9{8l>Q%1&Q>o$c^UiMH$&@xd2Pl!9j+uy!+J7p
z4}KdN4lsYnJ|+be_uBgDo3vnj%FjWVmMgC!n^ok{mlfh&Al+Yw&r6ztX1Ibd7tRj9
zt^@ACwMtO7^`0^8<@n=a_`KAiAyfAQJnbGn5Cs%Wt*w#YMvJi7M&c}Cv3o+o!a>}{
zZ_t28<~4JFh-nCy=nny())`+L_Ut{=8vj%f5gj
z0w>X_7}IA(_N(1wbKGiwQlm?m#e=%N<-Q-hr~U438%s?ZArgJ
z>TT+J@W$JYG{FQj*3d+KyUq6T10bsPEFD^o(%MFE-5Vd$}JkQDDr)iVcbpdJd27HoF|F3h8Cq
zQZ@*)2wGj$!>H}rQhxF6<|6$eqtP0hh%IYZ
zv&6*gll5M&8-14172RVxY^SJ@>@@#?y>~+OB?xHFWS~1?mWlrZ~g&NNquZHO-
zinLUdHI#sZ0EFbYPLxqUIa<^4Z8z~B`KSl2Bt4d_wYW}9nI`I2hQ3AkG8sSp3Do$9aY5_*BK+kmsOhd3{97)C
z#CA}#D+yR6UfR5StA2~eaebI_4bYSGlp0JK$(`}!xZa-Y<3SojC2aXT2KSBAegPco
zULV+ZhXhsO->}4lcBou7-LyhGVycFLEMMZsYEFUEO*~(JZBzBL#^mobebVrisjXM5
z5BB+**@w-Izu{k%_sKGuK3ENIfAPFtV+$57dlLZqleyfd6M3~wCDp6|`3xGUjeIVv
zDE`3lO(z>kkS;nyiy#?oW~m`~efGkH+{F~K`8nD2j`vu+!jOfItNUA2Jy
z$s$K?ni-ISK9I;4l6pF2mj>zz1_}2FXjZ1VZtlOexBiW2Z4(GBzB1WvQ-8TP^r|F0
zP4c1WgVnEwhX@vd3GoZYoNR#y>fkBsGk9EXC#&2D3bR!AT7lT*)?qlIn~lw)Znj0R
zgyPjuBqv)Ifjb9Y<%Gu9^uh^DY_cNdS;W&*V-pDAChD}U3JdCBR5fy?njd9nAa|;e
z-u;;{)8OoT9x;I)3J?zU_6SrQ3XvSO{+hdEULTGp!WEu!WwsR*?NDFnUhe=`8d!6C
zmzobKS42Wzj9ck7PMxX0+VTpa@4pfO;LbF=+-C0MkMoLeT`x@_oacXM1Xn^rPL{Fl
z>wcrrZ2~xCj}unvr&GnkKceCqLQwc@3h$k>NF8
zgT<`^$hSKmr2GWW^iml1JkM1~N^?q?nmIfMk+eIMU<=vEDeb=gOg@>!SZ(rQ7B!By
z`V2jS)6cG0NeH3Gzh`;XjlodEaI=!<^E766jd<2JN157EyDaAYQdV^$56fMsS_%4I
zmvITTb}T8y%Rb#45y_n2WLFBfIyqfUdQL4bHPMUh%|vy--x>puxhN{V
zVc9iebh6=di&z-^o7ax&Fhr?+CIW_&@||wsEiy4xgF*#jGSoI2?ro;`st9@0dku;&
zf377GW5Mq2S%Ov$JT5xhxN|jH>eKj4ZZh;#JEOH_Z;%_OaL}a0uJbBrnb0^9n&-Vj2zSCpe>vmVg0Q5$Qp&Ar<*hP)4tZH^=Fcpmm$8}`PF`kGUG=|=V>JaqIOJ(
z3-Mc3QFA=fH;lK}^9@}&R~PE?Re`5U&cl3GRsVo&r%YwDFd
z+=vJSspoI-gmm}bW*E5z*lk|BxGekerS)m~&eJXP*6|=2GO6=O;mUKil)#$~2yyf&
z^o|vHI1e|XX$S&_34I7&P^?xwCjuSV{?KpFXl%1FQ&7E7yytS&_@Xd~*}#gC=D3`+N^B{($^^zkVvLI@
z(E`iZPw?DTVWyEsX{GdN&`6d%)Z5Y9
zJLG;c`>~#y^v&c^KGEy@?t8`)R+_#mZ
R;YYFV?yV{zQy@Na!XDERGPQI(aJU6Y=5Y2q4v~jcszJ83*#h^-NkScO-3ZL(?47
z{(G;XwQR;DP`lGvDzNx>Y}>6j-!WY%kj6hS#I%0kNn9TLoyp90tiy>~Lbo#bXVUXK
zYepvOTg6f*5_d)5D3f7ohAFGH;xhbf6A(=2@
zSB#r+N)CxxyyRiuH`dRtP-
z^j)(&6eL_~pUc!%rw~~UpQu$l880;_Nn>?^>xjdtwR?HNs)s
z_(ybPaI5<_LvYnWgmXREazZn(;mC<4*1;DjaJ!-S^=utfbfbXFlY5m0^or>5U`
zW{ff}Jt_;6)3sY6c6qowVC@u#6_uar274QkXezTa=UDvTO6zx6pCY>R1JJ9dc*(
zOk~5>kw&)WBn)(*H}MvnK}}l`pAiPieLFIb@KFYH+^ZIF#8$|q@gjVwcP-oLFFOS#TxM;@03oELLk>Q0E;qZaSIc63*C%c%>e;t%7rLijpk891
z@^^XG;Q@Xc6eMOrG%%P=2ig_e4(=83;(OgV5#I`c1q1g$D
zk>WIwmlEr?{XP^hM-_}-E6BUD`a#Cd~{LanotzO!;bf`5lE
zX#C&>V;-GcT$cqw#RX8PMtM66;j`so+=l$}!9Kivlt_K{Vqtx$%)`{VjK!gbcP)yo#Z<73uGMrz=H;5+i>6?o@n2qw}60#*UQ$
zr7`9VpY}~qJ$vZ?<{uFX&U0;4gQe^-nxpt0$4nz?@g>`ttq4`P4&Uecu6Av$VwL$0O6x-mpJ)rgsbHY*Ft9~4EX@DpaGU7qPG97X<7xC_)rg9o9EZxA+!6y-HPc(xfYR*_Gxr2(xL0ES&;AM0rdZ;a23DWmcM!n+3%=C+dPvc_~v;a1Ym8C|%EUBjs%KzCKUec}_&|yJin&
z+Af~2AQFoWQ$f6XD5J^7!NlcZMM9FH)P5dl(n_8>
z0}%JErHs86h1=d++k>MZ@S?=80!e1aYRcKuns|Q2phF
zocR6`ZHR{8Qo`pT@@rsoZZz!zlz6^x{DQtuu=qB~))l9Y`142Jw3mL}y?cd#nGK?s
zXGehzgWIfy}^NxP&q_gTMwrvNejYqRk=FTQku|$JJANTLsv63;erSh7emZn%d$M4`
zaVNztD47Nlnmh4(A1}1ya}_z!Vv)bwMmI%!wFl$8jRjLk~f*cL(XmtR4kt8zhknQVe3>wgPA@Us`!mD
zZt@!ttI+u@{$0Al&T(!t;g>05rlR;Yn0{-^MQ6-gvhS}S(hwUw_erN3`}8w|)GiJ1m@u;JOUlO05@ah@l9gxQ{;EM(kx7cBgR
zha;qpa*0g?Oi~fM`Q}
zPO%PUH_M#~^2Y4o>P_>KhY**g*1driivlxfmq`&0su8tmY)#!BTrIN1mh<68zU?Wi
z$drUD%>uH>G2yw`ZoP*r83)xh({1|vm!+cR1iP^yf%7)1tlq2aE(_D^q2^k#-vYYd
zAM~?kj3}0!JF&{e+4H|Xy2?#n6Gez$wbcC$a3pA18^;zC9ill?ZZ~wfjKHd
z$W<~&QOJ2@GEktKN|x7NM)wj_qam7IxfG-SCs#;A%e$?-sO*h`{TskN;}g@TOzwM=2sb=KoxcLLryJRVZQAoo=FgSi
zZ)?qhmp4_^xFaAQR#Eln!AU}I`nA=|Bjgvw$h{-WudDVPHJCfK!uQ7KV{#hKRM|%9
z*6T0}yK}Wv0|1Xl?#e|)pP;WT!_bseDusJZbudG>Opbg3K)lxcD>@j5Q15Fm6jnS(
z(3JWTrcXFDi;kwSuZnvlj)7gz6bIFFcO9iIU+w~)71@us>DeS6u5GYDejw?zotbpU
zyv*b?CIT=OKRKf~7WB$yZAuTiB?AghUN1a`?t%{l8V8RD_
zlomNoL$xNA<#U)hRcog?f!6iyzOJ2C@GW*Qvhy?nE1BATDeo@15qx;K(%!u&xsfP)
zE&sNI>VfKnpymp{0R|P>@{03=@TQs9{l=~MP&iK-Cr-@5Wqf1Wa*$H*y2!N+Io@7#
zN=}jP7}Dr0%;A0mbmwlT(F*8*3*&TTbe=4NojEduB>tp3Bs4V>2DgAjc<8V&R@@Hd
zcHQQ8xK1-^gOZSQ=@Wy2yn*Ep^
zw}4Mgvpk3{+V!>mN3>Yvf*iGch&qxGoEk^e1^S5JVQTs;`*qvD&=1)&oaY#Gn-ffi
z@O{i(Q_Pot8^|`%3I&cSNTP4=Ja39$GNGm*tfeuJN7f_!?c$W~wV=YaXic-rt$gN!
zL|44vWBh1>a4J4tAqkKy&`wpq)@Tw6n*K+mJchYhQX0=shEgv6CGC4-X62$f`g5_@
z-?DkN!n!M;cd)#q#q3m`*^l1Xw0xqv3I_+X?nE9bPp*5jY5h=OU_P-en65F!v+nOArB*b1E@^11m(nh@x(IzYkmLW}g&N1EX#qG^NO+5ztbCm(6^G8@
zU{N0LUpQv}L$XU+V)Zt~{CA8MM*Bb1)14$K*y{n-T_8jN%7UXAe3!jwq5@Sc0qkbK
z_zGCGWlpd*PYZJ{!3FF{9z9-YGJ_7v
zYZ=@jATAXidFuOi`k<4hsiPoAV>Pwo(k$UUWvjnZ`*PU<&00mw0pn|DTi&eF&vf0Yey;JuYt1sS$XuOX8C@?sGHT_{
zB3d5?3)q9aJUjAxJBaHwW+!i;E1H0?(UP45&q7?dP*u5`tlqKv#TR-3pr=ub7#4!w
z(poMBqmS9Y{b7}qVdy+KGM-EY`YW>pGo75WfQ;NVDi-u7uHghdzB_v{zMOq}ZR#}e
zw0;_tUu8F#GA4SVU_{x|{>(2FDgAzDZfJO;C##aP-nlrMQkH%lhFHc$sA6F7yhU6>
zcdyf-weHJ#IME3qcZC?n4=u5V_DV1DO}Re#vq#(0+{{{QP@;~UlXOXF91ly@Fw|={
zy1rsIS0b6Ld>$hI>C=yM5J%HcK>
z`S&z!{TfE~vO{n7-T)szttvLlZR9w*cV4l?8WY#}eny^FTTk1}A@;E#aO#wy_Ez)V
z^kMC#a!zudtA3w*?fHR4YI^k9aKOEYp7R-$0F}RQafF-umDk#2=J3px3T8Si>mO0M
z+8-C$K`*u>xc0b$$T=WXb>Y9Pc0Tjm^v#Fk{;d)BS-V8Dm
z*ZnqFdq9ozbsJ@-VqR#{O<5x59W8h7g3HKLzS!m;>rQ_dww79z=MFMQ=Dd^MwP~XS
z8iZTC;rzI6=szOTlgpRWy@X)nfb20+mr$H*Wx59VoJ$<(
zG#cZ2(3$+MBGI-VNoeMJc)=mwvvP$xv17)nyz}8Pc5q#^dUhEgGK7y_RS1C*Q!-Xt
zp4A>(ZT3d6P_m=gu!C>399QEj7$Sx;mjJb0uln^Oc-eH5EWShehwc2zdLV3Lry2i<
z>JUt3_UmyeHt@l9R-#?UmE)__l21D$(JtymHo?MjqB#tz0V)+b4<=snemT7^)Dh-Z
z8Mp;12JijAv3)|dGu4tYXd$aL~<&Txc3=g2K
zL9m+MI4IFp@
zhIchQ#dRb1(Hox?s?Uv|ePs8RqsMhZ+sS)ko?~2otTV@LtBs=FjR(n6Z!sWa~W{eI!
zF%fE8Drbf59(eYeP>p(ul1
zt-up#;Qz{&DG2Ih0;A)Btta{kzBjHw$W3@%gwL{Fz`7FXxzs|@4Fm`IKTYw=;K20B
zY;+_=ygrq@DzXl?_8R1F!48P4uKZ=&%Dfqt&b7JyDMHxxk;(GeE|NN_;U)F0V-Ppg
z*;HTvWy&cK)s>xd_44<`%}47Mf|#8?mpQ1QZ}Ma9Eramh^A3`F;)?fJAc66u-J=Ok
z=)UsU+0_3=s_^#*q4mf7w9*$%ktxYW2iDaL`dzT`Ctfp(SiH;TLr*s4c~c;09QFhQ
zh)`f~NxM>vW(p%7YEAXkdXD4;bSZ`fAY+s$
z_2^PG+Di6gS{1IUTtLEuUN<#3k2aVpWF+Bsw|29>wnkAPfXCSgou5Md*)Dl
zWv~vr*(wB+4ns^b?dW6se2Wk0YK{I}-%wZq?^75HSsonr49j|Ey0-Gc`HpP1RRy~N
z_Bpbr2~X@H61A_mqHRDfy?Gx7Gtbv0{{(>vnJEOGdAI6LdN`*$Ta~uXrXXtd1~lzS
zDqY)(+CEhn8hw0#nTTF=Xf-OLyncmypap#w+Gk2bR$yl1ZzMKU<^1Pke7@arW$3vd
zHra1}l~6&`J*>AW@2sOz@4x?<{m3!sW>*m&oi0zvSvos*hgU+P76dJa*sguaZAzbyTH
z2=#u~62cahlY{1L>5oINKFq*7DgxhN!*f0szxiBuk)|tDJF|P;NPPQo6d*;2{!AY3
z9~?>Q@*9D^T}G3wFjJ&n*Q3^$JSrI5mmrU$&VTsJX*uotgJN3bya-|6VRt!(N`=lB
z$ryF#zc&J?-aUqd(2B~BI*F!VVGl@F&nlch@f`)dEEP~|R)@$Pzr;(Hp`AtdmLH()
z^YsV|YmAIY=w(Qb(G^DS+1%1INX3VhT1v>=8|A6{MT9>JWjE8ISJfj?9XiL=oVYM#
zr=rnD&U$0a`xF6#5Qqu^C6w!&*+)3*t4uzxoBSN;U2l-sc>TrA{a}XhfOS}t=im`s
zDNepKW23GNvV&Z0`qr2Sh!P58mDl@+_>-yZl3T4_hYLRYqlbV9p0|d^aISbe7WH)s
zNGfHoP$I5ft=Yr)&p2Uw#2=ZPUVW^x=6~BKb2LWiNh9^3YSDL2&yp?~t)ft5?uGXC
z4%SJlFexqywZ&{ZY{g=Zw#_Nw#VQWgul|;9mWCGxV=euZ&b_QRyErmZes#VFoSQb=
zBsu^d1Cw5SHaVcKYwR<-$5?@W9DvgQojKLZIv8^=D*EJ)Woz(WOjUiGE1B_p!42L-
zo|P|ZiSpIUSR!}h$*@b818Z}s@6(HoR%e&Yh{_^=ir7prjgCZ$)T=#4m{@|om8Ku>
zy9iZW@ULxU#gt~Dsdb8ua9R4z7L^tzG>g2|7`m~>AamE7uW?Y0P(6&4l{_-tihN!>
zzS%C&=9A-mu4nM*!3Raa!%@0nX;urhVaLAzG#CrL*=iO~Gb4JIpJ^den5pvpu96*>
z?gw35^URHcy^Dj(IQ;k5QctFG7NO>#v5+=FS)<JNu=qC$egh`TR1Jbzm!L&C9H6$%(Yt;>^P0)q
zvB5O&TmE;RR9Jq2x~kf9^P6A+s>R~sySx_e#v~TqNqFDmb5SYXJI5MdudWy!byzU?
z!i{n%VBNYHZ`j3~aKt_}bI!mO#r*kr#!HZScp*&TF3^3ZiG`&1DP9d+nS=
zzV(oyIdnI`dQZ88S>D4`xcAKqIAgso*aXi)k-j?e%hM6wm);bM?*E-KZ=SI9`s4#M
zjktKgO2S05NHmNhRGDqQ?zvXp%^__erB!M6))a^qi&A%{)y+I4`*f|>XhU5;^o(yVQGbVWW+Kc)NeY1B6!K6
z3#-G6Md7Y52-Xg7TG!TvuhG&+c9s_Lq$`jtmVU={AK)}8in^CZ|Gpf@&}lh*v%B1V
zUEoU97Ez`lJ-2YF#T08}^Ysjkz)Rq?C$(mrTsS=)T|GGt(l~pZCyN~RJPM2Yr!$qm
z+kfJ?x;XTF+D7qvs*PsJamHQ3ccw!uNp>XIk+Q=leJ(!T3p@Dx4e?9X7B+_~+Aaul
z^#p{}VRuDcB=OJ!NRwb5fJ(XH4`FI0H${BAm$4#{??&&UU?kyAk_--0
z5#E7oBCZ%L$!07^mK;^}uF+|tcmA+NtT;B`rW9ImoaonNbRkau434>UY%yF^s*b*m
z=~iN%Mv(V_k_@=+UGT~xw@hZ{lu9UKgH(#l6)5w5Hc)@fxs;E>?9%2d|%
z$mOhdc3iB`1TZ++j^UpNH$DGR=q>U!zI7+vbfYa7J+CfI#bn}~{pZPwU0e|$&PPco
zt-IHfW?ZtlNSCYZJTPUu=MkIsWB=S}=faR9ZIi@51CO*|VF8c+5y24sa>-%*>aMq(
zd8EH0v-u6j^T#J~`udnd1bR^s;W+;IyRh%0GbBfA3fd})up;y9er=M0#T?SrmEKl9
zF@Pv|Zvam}yQ8};{@UH~(IDMCXRknP{owL(<;$LlJ8Lyunp{Q&uj(TLMV0+N+&So|
zu+y4(Unb;y!5S^azyN9EHhj?O1A6YA6I^J($$8
z-?)1ErB%TOGCDcWB=_bb<7WnrjFR6H-c5Bcs>E%So)r?lbh$B`OV_$s+j66agh~f#
z8qxYBCf78bC1{7;3EhgjkAT7N?2>sP{6Bog=>m0lSP-x_xvc~u2b;%&(2EhF1v+Fn
zxjFLY7}B1Xn)PDJptw9XzpHb)tr;lOjqB!RwNmCdU-yFux)SUzG~OQ!~e9>b0xPs9b5Plg<9{;GyP3Vr6yi~0mPx!{z6Fj^3sDH5b_nl
zI}RIL8@|;5)80=t3~P`I)qrzTZoJ)pejv0}yn@*$=UTSMf=e{SOY(H}k*r&QIQ@3q
z^Ge4W^%B4IKHb4Ukry>?C6}swAtH)7FlTA8qMU!)m4K#$tg_G!Y<7$rlEmHc2dT
zw?WF_PAT}(;`ZnmI9*;SxsoQV?HrW8P=U*pby$8JWW6d#{>ujA$mnlG@jHYof&FG)
z=XBVY?RPSG9CvM2)zA_zq>3IBDut1MF>X`5+`-fBs)|C4h+;prXs(e&90f6<$pMjV
zbiRm&`W#az-e1zn=IiOFns^UYH>y5~0)$!%gBfUIr<5&&^!f~+ie7}tZ
z)4DOZFq;1>wST!;A!Q21iO#+6r{9%SSW$#i-P3P)Z}>5)x|qasJ{b(Viz^CV1BB06
zMSE%S^Cm5H7WXpQ%nuNaJP4q9ebzbbV2+zN!jFV_Pp%>wVA8WZ`XRe_7*6$vgA)`D
znfT=caw(?e=jN`M2@9!Q0!N9F&cadN_f{1K`Of*&zJDPT(4pUYJZd@J=f~N5l@L)#
zUN9%Ym_PqrQ3+Q|m)dsvWi
z$h(}#f5vr#ln(jYpju_V1-Wg1Or>!0L
z*vjoXY+EbBNusx~cHLPlp1Oi`1IP`Fr!x0ZA&XV8h6}h{JuV4LZiHop3nvtHcx9=I
z)l}Pl>T^?7HCtw=1e#z_J-H9T>6!eYLp5n1*)yl^-0Kh-s?2G6ME!@OoY45Y313;1
z8-1IimU1g@^>Vl|Ve=vKSAV~iYN?m0w(d0TfmwCx_#!T}FQBjhLvf@(1TWSh&rj}X
zmimyJ&HBxKuBrXOt}pQ_Pne&``z01xGXEbmL$vo!P?_rh`+G@2lttan@~>HXw~jdU
zO6ZM<6)0Lg6U=9n8iOU`(WX=5pvf
zxKYKX-|&|Fa*%#wLcTbD@^Qt`tL0RPCyol}UAZ*;^7mr1qsT?~`lI-e
zHgDX?qg+g^u?54EP^n%dFHsT>^lK0LL;~Io)P)=D6+_i+#p>7}~i7
z^NhYsp!L|D9hjmcZPaw#J5(zA>i+%rlt7?~iG2Vk+YjPCmCw7d=NSBVQ$-!~$?}Cq
z`{37GodvMN?bGhr)jvyZ~+$;TG?
z*Gc_*2UZL^p*v$CFr}slf)ON*e|MFEWzjfFzppZ~7yz}bQwjr6-HI4>>%XSIHjJaFAP9&E(jX}v(lr%n
z1*JxZG^2Cm1O%j|kyMG%NQa}lo6)h+xse0LzR#XN;PcB~XXo5;UGGaauV2<79ZrYB2~KY?}vh0RLLg;0AVL
z$<%_JnO4Gk%QF_loEZr5ZQ-VXYd5nLN`A=9+;DZ_Ie>FSWs|6J1j^Jxe_j;#IVCieUebD9+Ktb0u#7j{)6X%+c%iQCA%+C$8iLW|7ss0=6qNOIr8bA
z1RS%ZGpFtVo?wFloUPl)=X7z~do+?iEP5mE4p-0FJU`
zxu-vw>nfgGJurrt0@!B*g+#pPj*eGSyQ$xE7w;sRws*6P^Vsd@+uND=P9D{-^Xov=1oR@9+GCipbjs-XZP(86COK%l!dX!4B+5t&Z{n8I
zt}_m(`By6lFsdkboHAOr$GtRUpgDV(80jXwHh@G`X`#pn+zsr9v~1g{akeRlJ#~DklX;z{pn!4J4C{K4{&dmr>WZc#xhb?9%Fgn$>JcnK
zXBI1Ds!^~AZ`$zv71D&ogt(@OCmMNIO8rKAlTd~lX
zCuy*9bH_#ZCtIIFbFas`$t*WrpvKLeW~?9^JQP2>E)(E&kW;uyjtpmv0TZxaeM_Zf
z&f&9DnmRhFhmc|Ph1cw1ZECKYOlhyX621^r$g6y~Rfx>zoV$qTYfOIVRuHOWh2{0*
z!V4Y`fSM0v^y+HD4(6@SDof*$9odDVy(XXA6;0nY
zfT`AH;4q+?Y^Cpa8R#Z_%-7VIos~PMbRJN02in)GiXU^TPSO}YRalD@9w+-6$0M%;
z$sn)aF|RXAE(WnYsdV;Q_8WG89+|g6HV)r2i&@1pN6Ip9Si=96GuuEKyUd5)_1kk~
zhs=0!H46hygfHaz{oCk411P?{C*y2dPbRyAIb%fod7bQ+ed&(Of_yie6}u!4JVBQc
zjfR9`0+bfbIMxm~m3=-VWFIim{H?E@4{KY9dX4b4t@*oxi%Y~me7M7~Dq8I6f|+WA
zQD{<|`8|$nk2iyR=6{-o)Wj?Pc>%=3o%Bfrb>h~~K^OlKgen#;fi-&lJvi@RLhh8O
zg+JcuKK+kimG*&XE}kD^9@9qGeHmf?#4IRXyAC4qnc>Ge8lNQ_8|yvl&kNR<4YIq{
zQ2x2`v!=F3c{fMv-J8Aft=}-8+O#9bBF(hEg9nL=#Tdr@)#vD~w0y^@
zu*+|Tl>VY37dP<274yAZc~rT7YdN~Ne5prYkNq(ZQ)fRB^igw3(v6tL53!>r)acYIF-9A;u|Y`g=oEjZ=#pm252lB`%d-^tt>`-%dk
zBSkru9X17B6l7}zU#8KW)n*1yVZE-JYsKH`w8dkV>MN#SYc!)hMDt9MG?2B-(I
zm@Q@ldWyO7x~7J{Q9B5Cx4+fF_#-s_Ejbfw&@VZqV(i%opuxj_tHT>^(;qg-pg)7^>7>Wg2eD!#rlZ6
zKl?-Ca%5L>Udc*uEc)kN5$m{sxQh1JhdcA|qT9
zv+3Gq84s@oXe9n18;H)UOfYoNq7alnU$YK{=ks(ANS&}cE(2SJKB20mu#32XF)h^v
z-fD;DC$889NorcpT-F4O({%LWP%a!rDtQrRA3rFjo
z-T=tNlFxkZD5ga}P{ri(LVG14N)yD4lO4~$MSp}g}}Zh
zct+o=N{{}n)A>lP!26nS?w`lu-B~>VK6fIB7-rk
zPpzMjBeTB=le~Uyx`Pk&sL5bEdn%uFB0Ww*QE!rtpa>0=We|1nG+^giKd{^4kAbtE
zv#%z9205$HOfRpZSz@q&!UM|P%tGvE6V0Ex9`}*x6T?JM+k=)Va;SIl!k{Z7PI1^h
zq(P~3DLaUw&{I+dU+2&qfJ#h_m&d`oCGEpRgvAO!k_SuKZa}>J8Ms>Cb1d2DSQj|num$}5(xR5pb^jK
zooXlYRoKLe?1p3$3k+BndbC@CC^;U>DO&7evm~^%)l^A|x@xqWh8kT{A`=v(XunrdM9ESpT+KS-Oi5<+d9Ry>+A^;2Fjt%tpB2wE|4)ga+)pn$J+Qm29DHNpkj1Ib?n21;BOV_l#L*Ft?E-(1jD*T69Y9blS
zo|IfrV|?5rMjU_;v-Z~qEy0odKW*7b^`md<%UgV&Ti|nnwX{`&d4B$)M`zmonH}Ve
zUOmMdWs8xSJkQJWi(iHuDg|Dl^VSDyqLC)3KeF$d{hYoIex6Ce|1LMgj2<%6)~V4?
zMRHU3x1`&+`OBedd$V44k0b>P<03hgaQ4?!>yh7%lMHO2c)z
z0=(()1R>CYGk!7j9+e)EX?LrT1vOIXVsq9re0Gozqm8nm@8NYe`Q`3z$!TjxF+m86KJKd#3YQZeI9`_bx=K2Gt
zqqHV7RJ2uztJV32Nvxd``w=-FEhFC+xxKKc~zakAP$k
zCAhLnG|^PF-~I2=W0|jd{}C`kvf-8}qK%Q<=Tmr@_#e{pHI)V_)$K*WCcu#--8rK_
z+yM!oFvcrkBwLjF?<3^+DAp~9^2VBW@-}Klrc&G-c)jlC;Ks+;qwBTi-N_+Guq
zx)bUBs_7bO*C&{+1Jfz~RZ-MWex&WfkLFK$k=taBD?u>^DAUCcy}7f&E=_{E+3rVX
z45$fYgRrBjjWb6d-+2%t%+a@~kC==(iqKW8m1g~O+$oHyvj
z#7jOV`Rl*ecrlN2A45?MqdlOm@A9I&3<@wnGAv1Rhaa`Fuqu=1+G?xur6Z&c!&AAS
z_T^T~Oc-!;(GNBo^-c(v{6Pbxc(!g=MD!m)X)*RW`r}-P-yi$8Ut_e}?3bB;;h98-
zj_&AGBdSpUBFm3vpFcXC+y(Ul$X7L>fy#5|e{?wyYT~-tOFFR!qzIFk7YTI%A#%3s
zCjA*59Gh1aTHh%LEzvHZ??A_(UyNSdcsHtvUIGKyhe-uEdHnKBVVV<>?xIEtCqz~J
znszjuR4=eJRlCr_#<4`vMXFshi>ecaKM&dtv_6Zaghy{