copy files from dsit/datalab

This commit is contained in:
Alexis GUYOT 2024-02-29 09:49:04 +01:00
parent 804248da22
commit 9c5c71336d
67 changed files with 7385 additions and 1 deletions

View File

@ -0,0 +1,48 @@
kind: pipeline
name: Build & publish main
steps:
- name: publish-image
pull: always
image: plugins/kaniko:1.7.1-kaniko1.9.1
settings:
auto_tag: true
auto_tag_suffix: latest
registry: code.groupe-genes.fr
repo: code.groupe-genes.fr/datalab/docker-images-datalab/activetigger
username:
from_secret: docker_username
password:
from_secret: docker_password
when:
event:
exclude:
- pull_request
- name: deploy
image: alpine
environment:
kubernetes_server:
from_secret: kubernetes_server
kubernetes_cert:
from_secret: kubernetes_cert
kubernetes_token:
from_secret: kubernetes_token
commands:
- apk add --no-cache curl
- curl -LL -o /usr/bin/kubectl "https://dl.k8s.io/release/v1.28.2/bin/linux/amd64/kubectl"
- curl -LL -o helm.tar.gz "https://get.helm.sh/helm-v3.14.0-linux-amd64.tar.gz"
- tar xf "helm.tar.gz" && mv ./linux-amd64/helm /usr/bin/helm
- chmod +x /usr/bin/kubectl
- chmod +x /usr/bin/helm
- kubectl config set-cluster default --server=$kubernetes_server --insecure-skip-tls-verify=true
- kubectl config set-credentials user --token=$kubernetes_token
- kubectl config set-context default --user=user --cluster=default --namespace=activetigger
- kubectl config use-context default
- kubectl get pods
- helm ls -n activetigger --debug
- helm dependency build ./helm-chart
- helm upgrade activetigger ./helm-chart -f ./helm-chart/values.yaml -n activetigger
when:
event:
exclude:
- pull_request

View File

@ -0,0 +1,111 @@
FROM ubuntu:22.04
ARG DEBIAN_FRONTEND=noninteractive
COPY requirements.r /requirements.r
COPY requirementspython.txt /requirementspython.txt
# Installation python
RUN apt-get update && apt-get install -y \
python3.10 \
python3.10-distutils \
python3.10-venv \
python3-pip \
r-base \
wget \
&& apt-get clean
# Installation R + shiny server
RUN apt-get update && \
apt-get install -y r-base
RUN R -e "install.packages('shiny', repos='https://cran.rstudio.com/')"
# Install gdebi-core and shiny-server
RUN apt-get update
RUN apt-get install -y gdebi-core
RUN wget https://download3.rstudio.org/ubuntu-18.04/x86_64/shiny-server-1.5.21.1012-amd64.deb
RUN gdebi --non-interactive shiny-server-1.5.21.1012-amd64.deb
## Packages package R (à installer depuis l'exécutable R employé par shiny server)
RUN Rscript /requirements.r
## Environnement python | a vérifier dans requirementspython.txt l'installation des cu118 se fais de cette manière
# Install Miniconda
RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
bash Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda && \
rm Miniconda3-latest-Linux-x86_64.sh
# Add Conda binaries to PATH
ENV PATH="/opt/conda/bin:${PATH}"
# Create a Conda environment and activate it
RUN conda create -n tigger python==3.10 && \
echo "conda activate tigger" >> ~/.bashrc
# Mise à jour et installation des dépendances système
RUN rm -rf /var/lib/apt/lists/*
RUN pip3 install --no-cache-dir \
torch torchvision torchaudio \
-f https://download.pytorch.org/whl/cu118/torch_stable.html
RUN apt-get update && apt-get install -y curl build-essential
RUN pip3 install --no-cache-dir six
# Install Rust using rustup
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
# Add Cargo's bin directory to the PATH environment variable
ENV PATH="/root/.cargo/bin:${PATH}"
RUN pip3 install --no-cache-dir --upgrade setuptools
# Autres installations de bibliothèques Python
RUN pip3 install argparse
RUN pip3 install datasets
RUN pip3 install fasttext
RUN pip3 install numpy
RUN pip3 install pandas
RUN pip3 install pyarrow
RUN pip3 install scikit-learn
RUN pip3 install sentence-transformers
RUN pip3 install transformers
RUN pip3 install typing-inspect==0.8.0
RUN pip3 install typing-extensions==4.6.1
RUN pip3 install spacy
# Mettre en place des configurations supplémentaires si nécessaire
# Commande par défaut à exécuter lorsque le conteneur démarre
CMD ["/bin/bash"]
## Téléchargement des modèles spacy et fasttext
### Français
WORKDIR ~
RUN python -m spacy download fr_core_news_sm
RUN wget https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.fr.300.bin.gz \
&& gunzip cc.fr.300.bin.gz
# A chaque création d'instance
## Clone git pour créer la nouvelle instance (remplacer "tigger-name" par le nom que prendra l'instance, ie https://analytics.huma-num.fr/Prenom.Nom/tigger-name/)
RUN mkdir -p ~/zPublish/shiny/tigger-name
COPY activetigger/ ~/zPublish/shiny/tigger-name
## Dans l'application
## Tout en haut à gauche, bouton "+" pour "create project". Puis dans les champs :
## - data directory: moi j'utilise toujours ~/tagging/domaine (genre ~/tagging/radio ou ~/tagging/journaux), mais c'est à toi de voir où tu veux que les données et tags soient stockées sur ton serveur
## - je conseille de cocher toutes les cases : python, spacy, fasttext, sbert, gpu
## - python : "~/conda/envs/tigger/bin/python"
## - fasttext : "~/cc.fr.300.bin" (càd qu'il faut donner le chemin du modèle sur ton serveur, pas juste le nom)
## - spacy et SBERT : garder les valeurs par défaut pour la langue choisie

View File

@ -0,0 +1,41 @@
#!/usr/bin/env python
# coding: utf-8
## FastText embed sentences
## Requires data file with columns id and text
import argparse
import fasttext
from os.path import expanduser
import pandas as pd
import pyarrow as pa
import pyarrow.feather as feather
import re
def main(args):
print("FastText: Importing data")
datapath = expanduser(args.data)
dat = feather.read_feather(datapath)
outfile = re.sub("[.]feather$", "_ft.feather", datapath)
print("FastText: Loading model")
ft = fasttext.load_model(expanduser(args.model))
print("FastText: Embedding sentences")
emb = [ft.get_sentence_vector(re.sub("\n", " ", x)) for x in dat["text"]]
print("FastText: Exporting")
emb = pd.DataFrame(emb)
emb.columns = ["ft%03d" % (x + 1) for x in range(len(emb.columns))]
emb = pd.concat([dat["id"], emb], axis=1)
feather.write_feather(emb, outfile)
print("FastText: Done")
if __name__ == "__main__":
argParser = argparse.ArgumentParser()
argParser.add_argument("-m", "--model", help="Model path", default="/data/user/b/jboelaert/cc.fr.100.bin")
argParser.add_argument("-d", "--data", help="Path to data (feather)")
args = argParser.parse_args()
main(args)

View File

@ -0,0 +1,42 @@
#!/usr/bin/env python
# coding: utf-8
## SBERT embed sentences
## Requires data file with columns id and text
import argparse
from os.path import expanduser
import pandas as pd
import pyarrow as pa
import pyarrow.feather as feather
import re
from sentence_transformers import SentenceTransformer
def main(args):
print("SBERT: Importing data")
datapath = expanduser(args.data)
dat = feather.read_feather(datapath)
outfile = re.sub("[.]feather$", "_sb.feather", datapath)
print("SBERT: Loading model")
sbert = SentenceTransformer(expanduser(args.model))
sbert.max_seq_length = 512
print("SBERT: Embedding sentences")
emb = sbert.encode(dat["text"])
print("SBERT: Exporting")
emb = pd.DataFrame(emb)
emb.columns = ["sb%03d" % (x + 1) for x in range(len(emb.columns))]
emb = pd.concat([dat["id"], emb], axis=1)
feather.write_feather(emb, outfile)
print("SBERT: Done")
if __name__ == "__main__":
argParser = argparse.ArgumentParser()
argParser.add_argument("-m", "--model", help="Model name or path", default="distiluse-base-multilingual-cased-v1")
argParser.add_argument("-d", "--data", help="Path to data (feather)")
args = argParser.parse_args()
main(args)

View File

@ -0,0 +1,174 @@
#!/usr/bin/env python
# coding: utf-8
## BERT trainer to be called by server.R
## Requires two data files with columns id, label and text
import argparse
import datasets
from datasets import load_metric
import numpy as np
from os.path import expanduser
import os
import pandas as pd
import re
from sklearn import metrics
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from transformers import Trainer, TrainingArguments, TrainerCallback
os.environ["TOKENIZERS_PARALLELISM"] = "false"
def main(args):
print("Importing data")
dattrain = pd.read_csv(expanduser(args.traindat))
datval = pd.read_csv(expanduser(args.valdat))
datval_id = datval["id"]
classcolname = "label"
## Make class_names
class_names = [x for x in dattrain[classcolname].unique()]
## Labels to class number
dattrain[classcolname] = [class_names.index(x) for x in dattrain[classcolname].to_list()]
datval[classcolname] = [class_names.index(x) for x in datval[classcolname].to_list()]
## Transform to datasets
dattrain = datasets.Dataset.from_pandas(dattrain[['text', 'label']])
datval = datasets.Dataset.from_pandas(datval[['text', 'label']])
# Model choice
modelname = expanduser(args.model)
## Tokenizer
print("Tokenizing")
tokenizer = AutoTokenizer.from_pretrained(modelname)
# toktrain = dattrain.map(lambda e: tokenizer(e['text'], truncation=True, padding="max_length"), batched=True)
# toktest = datval.map(lambda e: tokenizer(e['text'], truncation=True, padding="max_length"), batched=True)
if args.adapt:
toktrain = dattrain.map(lambda e: tokenizer(e['text'], truncation=True, padding=True, max_length=512), batched=True)
toktest = datval.map(lambda e: tokenizer(e['text'], truncation=True, padding=True, max_length=512), batched=True)
else:
toktrain = dattrain.map(lambda e: tokenizer(e['text'], truncation=True, padding="max_length", max_length=512), batched=True)
toktest = datval.map(lambda e: tokenizer(e['text'], truncation=True, padding="max_length", max_length=512), batched=True)
del(dattrain)
## Model
print("Loading model")
model = AutoModelForSequenceClassification.from_pretrained(modelname, num_labels = len(class_names))
if (args.gpu):
model.cuda()
## Train using Trainer interface
print("Training...")
BATCH_SIZE = args.batchsize
GRAD_ACC = args.gradacc
epochs = args.epochs
total_steps = (epochs * len(toktrain)) // (BATCH_SIZE * GRAD_ACC)
warmup_steps = (total_steps) // 10
eval_steps = total_steps // args.eval
training_args = TrainingArguments(
output_dir=args.session + "_train",
learning_rate=args.lrate,
weight_decay=args.wdecay,
num_train_epochs=epochs,
gradient_accumulation_steps=GRAD_ACC,
per_device_train_batch_size=BATCH_SIZE,
# per_device_eval_batch_size=BATCH_SIZE,
per_device_eval_batch_size=32,
warmup_steps=warmup_steps,
eval_steps=eval_steps,
evaluation_strategy="steps",
save_strategy="steps",
save_steps=eval_steps,
logging_steps=eval_steps,
do_eval=True,
greater_is_better=False,
load_best_model_at_end=bool(args.best),
metric_for_best_model="eval_loss"
)
trainer = Trainer(model=model, args=training_args,
train_dataset=toktrain, eval_dataset=toktest)
the_session = args.session
class HaltCallback(TrainerCallback):
"A callback that checks for _stop file to interrupt training"
def on_step_begin(self, args, state, control, **kwargs):
if os.path.exists(the_session + "_stop"):
print("\nHalted by user.\n")
control.should_training_stop = True
return(control)
else:
print("\nNot halted by user.\n")
trainer.add_callback(HaltCallback)
trainer.train()
## Add class names to model
label_to_id = {v: i for i, v in enumerate(class_names)}
model.config.label2id = label_to_id
model.config.id2label = {id: label for label, id in model.config.label2id.items()}
## Save model
model.save_pretrained(args.session)
## Prediction functions
def get_predprobs(text):
# inputs = tokenizer(text, padding="max_length", truncation=True, return_tensors="pt")
inputs = tokenizer(text, padding=True, truncation=True, max_length=512, return_tensors="pt")
if (args.gpu):
inputs = inputs.to("cuda")
outputs = model(**inputs)
res = outputs[0]
if (args.gpu):
res = res.cpu()
res = res.softmax(1).detach().numpy()
return res
def get_prediction(text):
return class_names[get_predprobs(text).argmax()]
## Metrics on validation set
print("Computing predictions")
testpred = [get_prediction(txt) for txt in datval["text"]]
testtruth = [class_names[x] for x in datval["label"]]
exportpred = pd.DataFrame(datval_id)
exportpred.columns = ["id"]
exportpred["bertpred"] = testpred
exportpred.to_csv(args.session + "_predval.csv", index=False)
if __name__ == "__main__":
argParser = argparse.ArgumentParser()
argParser.add_argument("-m", "--model", help="Model name or path", default="microsoft/Multilingual-MiniLM-L12-H384")
argParser.add_argument("-t", "--traindat", help="Path to training data")
argParser.add_argument("-v", "--valdat", help="Path to validation data")
argParser.add_argument("-b", "--batchsize", help="Batch size for training", type=int, default=4)
argParser.add_argument("-g", "--gradacc", help="Gradient accumulation for training", type=int, default=1)
argParser.add_argument("-e", "--epochs", help="Number of training epochs", type=float, default=3)
argParser.add_argument("-l", "--lrate", help="Learning rate", type=float, default=5e-05)
argParser.add_argument("-w", "--wdecay", help="Weight decay", type=float, default=.01)
argParser.add_argument("-B", "--best", help="Load best model instead of last", type=int, choices=[0,1], default=1)
argParser.add_argument("-E", "--eval", help="Number of intermediary evaluations", type=int, default=10)
argParser.add_argument("-s", "--session", help="Session name (used to save results)")
argParser.add_argument("-G", "--gpu", help="Use GPU (CUDA)", type=int, choices=[0,1], default=0)
argParser.add_argument("-A", "--adapt", help="Adapt token length to batch", type=int, choices=[0,1], default=1)
args = argParser.parse_args()
main(args)

View File

@ -0,0 +1,94 @@
#!/usr/bin/env python
# coding: utf-8
## BERT inference to be called by server.R
import argparse
import datasets
import json
import numpy as np
from os import path, remove
import pandas as pd
import pyarrow.feather as feather
import re
from torch import no_grad
from transformers import AutoModelForSequenceClassification, AutoTokenizer
def chunker(seq, batch_size):
return (seq[pos:pos + batch_size] for pos in range(0, len(seq), batch_size))
def main(args):
print("Importing data")
with open(path.expanduser(args.logfile), "w") as progfile:
progfile.write("Importing data")
dat = feather.read_feather(path.expanduser(args.dat))
with open(path.expanduser(args.logfile), "w") as progfile:
progfile.write("Tokenizing")
## Tokenizer
print("Tokenizing")
with open(path.join(path.expanduser(args.model), "config.json"), "r") as jsonfile:
modeltype = json.load(jsonfile)["_name_or_path"]
tokenizer = AutoTokenizer.from_pretrained(modeltype)
## Model
print("Loading model")
model = AutoModelForSequenceClassification.from_pretrained(path.expanduser(args.model))
if (args.gpu):
model.cuda()
## Prediction functions
def get_predprobs(text):
inputs = tokenizer(text, padding=True, truncation=True, max_length=512, return_tensors="pt")
if (args.gpu):
inputs = inputs.to("cuda")
with no_grad():
outputs = model(**inputs)
res = outputs[0]
if (args.gpu):
res = res.cpu()
res = res.softmax(1).detach().numpy()
return res
print("Computing predictions")
chunks = chunker([str(x) for x in dat[args.txtname]], args.batch)
pred = []
for i, x in enumerate(chunks):
if (i % 5 == 0):
percent = round(100 * i * args.batch / len(dat), 1)
logmsg = "Computing: " + str(percent) + "% (" + str(i * args.batch) + "/" + str(len(dat)) + ")"
with open(path.expanduser(args.logfile), "w") as progfile:
progfile.write(logmsg)
pred.append(get_predprobs(x))
pred = np.concatenate(pred)
pred = pd.DataFrame(pred)
pred.columns = ["bertpred_" + v for i, v in model.config.id2label.items()]
pred = pd.concat([dat[args.idname], pred], axis=1)
feather.write_feather(pred, path.abspath(args.output))
remove(path.expanduser(args.logfile))
if __name__ == "__main__":
argParser = argparse.ArgumentParser()
argParser.add_argument("-m", "--model", help="Trained model path")
argParser.add_argument("-d", "--dat", help="Path to data (feather file)")
argParser.add_argument("-o", "--output", help="Output path of predictions", default="tiggerbert.feather")
argParser.add_argument("-i", "--idname", help="Name of id variable", default="id")
argParser.add_argument("-x", "--txtname", help="Name of text variable", default="text")
argParser.add_argument("-l", "--logfile", help="Path to log file", default="tiggerbert-progress.txt")
argParser.add_argument("-G", "--gpu", help="Use GPU (CUDA)", type=int, choices=[0,1], default=1)
argParser.add_argument("-b", "--batch", help="Batch size", type=int, default=128)
args = argParser.parse_args()
main(args)

View File

@ -0,0 +1,159 @@
"short","spacy_name","fasttext_name","fasttext_url","language","short_lang"
"af","xx_ent_wiki_sm","cc.af.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.af.300.bin.gz","Afrikaans","(af) Afrikaans"
"als","xx_ent_wiki_sm","cc.als.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.als.300.bin.gz","Alemannic","(als) Alemannic"
"am","xx_ent_wiki_sm","cc.am.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.am.300.bin.gz","Amharic","(am) Amharic"
"an","xx_ent_wiki_sm","cc.an.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.an.300.bin.gz","Aragonese","(an) Aragonese"
"ar","xx_ent_wiki_sm","cc.ar.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ar.300.bin.gz","Arabic","(ar) Arabic"
"arz","xx_ent_wiki_sm","cc.arz.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.arz.300.bin.gz","Egyptian Arabic","(arz) Egyptian Arabic"
"as","xx_ent_wiki_sm","cc.as.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.as.300.bin.gz","Assamese","(as) Assamese"
"ast","xx_ent_wiki_sm","cc.ast.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ast.300.bin.gz","Asturian","(ast) Asturian"
"az","xx_ent_wiki_sm","cc.az.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.az.300.bin.gz","Azerbaijani","(az) Azerbaijani"
"azb","xx_ent_wiki_sm","cc.azb.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.azb.300.bin.gz","Southern Azerbaijani","(azb) Southern Azerbaijani"
"ba","xx_ent_wiki_sm","cc.ba.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ba.300.bin.gz","Bashkir","(ba) Bashkir"
"bar","xx_ent_wiki_sm","cc.bar.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.bar.300.bin.gz","Bavarian","(bar) Bavarian"
"bcl","xx_ent_wiki_sm","cc.bcl.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.bcl.300.bin.gz","Central Bicolano","(bcl) Central Bicolano"
"be","xx_ent_wiki_sm","cc.be.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.be.300.bin.gz","Belarusian","(be) Belarusian"
"bg","xx_ent_wiki_sm","cc.bg.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.bg.300.bin.gz","Bulgarian","(bg) Bulgarian"
"bh","xx_ent_wiki_sm","cc.bh.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.bh.300.bin.gz","Bihari","(bh) Bihari"
"bn","xx_ent_wiki_sm","cc.bn.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.bn.300.bin.gz","Bengali","(bn) Bengali"
"bo","xx_ent_wiki_sm","cc.bo.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.bo.300.bin.gz","Tibetan","(bo) Tibetan"
"bpy","xx_ent_wiki_sm","cc.bpy.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.bpy.300.bin.gz","Bishnupriya Manipuri","(bpy) Bishnupriya Manipuri"
"br","xx_ent_wiki_sm","cc.br.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.br.300.bin.gz","Breton","(br) Breton"
"bs","xx_ent_wiki_sm","cc.bs.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.bs.300.bin.gz","Bosnian","(bs) Bosnian"
"ca","ca_core_news_sm","cc.ca.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ca.300.bin.gz","Catalan","(ca) Catalan"
"ce","xx_ent_wiki_sm","cc.ce.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ce.300.bin.gz","Chechen","(ce) Chechen"
"ceb","xx_ent_wiki_sm","cc.ceb.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ceb.300.bin.gz","Cebuano","(ceb) Cebuano"
"ckb","xx_ent_wiki_sm","cc.ckb.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ckb.300.bin.gz","Kurdish (Sorani)","(ckb) Kurdish (Sorani)"
"co","xx_ent_wiki_sm","cc.co.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.co.300.bin.gz","Corsican","(co) Corsican"
"cs","xx_ent_wiki_sm","cc.cs.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.cs.300.bin.gz","Czech","(cs) Czech"
"cv","xx_ent_wiki_sm","cc.cv.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.cv.300.bin.gz","Chuvash","(cv) Chuvash"
"cy","xx_ent_wiki_sm","cc.cy.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.cy.300.bin.gz","Welsh","(cy) Welsh"
"da","da_core_news_sm","cc.da.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.da.300.bin.gz","Danish","(da) Danish"
"de","de_core_news_sm","cc.de.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.de.300.bin.gz","German","(de) German"
"diq","xx_ent_wiki_sm","cc.diq.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.diq.300.bin.gz","Zazaki","(diq) Zazaki"
"dv","xx_ent_wiki_sm","cc.dv.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.dv.300.bin.gz","Divehi","(dv) Divehi"
"el","el_core_news_sm","cc.el.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.el.300.bin.gz","Greek","(el) Greek"
"eml","xx_ent_wiki_sm","cc.eml.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.eml.300.bin.gz","Emilian-Romagnol","(eml) Emilian-Romagnol"
"en","en_core_web_sm","cc.en.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.en.300.bin.gz","English","(en) English"
"eo","xx_ent_wiki_sm","cc.eo.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.eo.300.bin.gz","Esperanto","(eo) Esperanto"
"es","es_core_news_sm","cc.es.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.es.300.bin.gz","Spanish","(es) Spanish"
"et","xx_ent_wiki_sm","cc.et.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.et.300.bin.gz","Estonian","(et) Estonian"
"eu","xx_ent_wiki_sm","cc.eu.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.eu.300.bin.gz","Basque","(eu) Basque"
"fa","xx_ent_wiki_sm","cc.fa.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.fa.300.bin.gz","Persian","(fa) Persian"
"fi","fi_core_news_sm","cc.fi.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.fi.300.bin.gz","Finnish","(fi) Finnish"
"fr","fr_core_news_sm","cc.fr.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.fr.300.bin.gz","French","(fr) French"
"frr","xx_ent_wiki_sm","cc.frr.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.frr.300.bin.gz","North Frisian","(frr) North Frisian"
"fy","xx_ent_wiki_sm","cc.fy.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.fy.300.bin.gz","West Frisian","(fy) West Frisian"
"ga","xx_ent_wiki_sm","cc.ga.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ga.300.bin.gz","Irish","(ga) Irish"
"gd","xx_ent_wiki_sm","cc.gd.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.gd.300.bin.gz","Scottish Gaelic","(gd) Scottish Gaelic"
"gl","xx_ent_wiki_sm","cc.gl.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.gl.300.bin.gz","Galician","(gl) Galician"
"gom","xx_ent_wiki_sm","cc.gom.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.gom.300.bin.gz","Goan Konkani","(gom) Goan Konkani"
"gu","xx_ent_wiki_sm","cc.gu.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.gu.300.bin.gz","Gujarati","(gu) Gujarati"
"gv","xx_ent_wiki_sm","cc.gv.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.gv.300.bin.gz","Manx","(gv) Manx"
"he","xx_ent_wiki_sm","cc.he.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.he.300.bin.gz","Hebrew","(he) Hebrew"
"hi","xx_ent_wiki_sm","cc.hi.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.hi.300.bin.gz","Hindi","(hi) Hindi"
"hif","xx_ent_wiki_sm","cc.hif.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.hif.300.bin.gz","Fiji Hindi","(hif) Fiji Hindi"
"hr","hr_core_news_sm","cc.hr.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.hr.300.bin.gz","Croatian","(hr) Croatian"
"hsb","xx_ent_wiki_sm","cc.hsb.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.hsb.300.bin.gz","Upper Sorbian","(hsb) Upper Sorbian"
"ht","xx_ent_wiki_sm","cc.ht.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ht.300.bin.gz","Haitian","(ht) Haitian"
"hu","xx_ent_wiki_sm","cc.hu.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.hu.300.bin.gz","Hungarian","(hu) Hungarian"
"hy","xx_ent_wiki_sm","cc.hy.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.hy.300.bin.gz","Armenian","(hy) Armenian"
"ia","xx_ent_wiki_sm","cc.ia.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ia.300.bin.gz","Interlingua","(ia) Interlingua"
"id","xx_ent_wiki_sm","cc.id.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.id.300.bin.gz","Indonesian","(id) Indonesian"
"ilo","xx_ent_wiki_sm","cc.ilo.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ilo.300.bin.gz","Ilokano","(ilo) Ilokano"
"io","xx_ent_wiki_sm","cc.io.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.io.300.bin.gz","Ido","(io) Ido"
"is","xx_ent_wiki_sm","cc.is.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.is.300.bin.gz","Icelandic","(is) Icelandic"
"it","it_core_news_sm","cc.it.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.it.300.bin.gz","Italian","(it) Italian"
"ja","ja_core_news_sm","cc.ja.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ja.300.bin.gz","Japanese","(ja) Japanese"
"jv","xx_ent_wiki_sm","cc.jv.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.jv.300.bin.gz","Javanese","(jv) Javanese"
"ka","xx_ent_wiki_sm","cc.ka.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ka.300.bin.gz","Georgian","(ka) Georgian"
"kk","xx_ent_wiki_sm","cc.kk.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.kk.300.bin.gz","Kazakh","(kk) Kazakh"
"km","xx_ent_wiki_sm","cc.km.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.km.300.bin.gz","Khmer","(km) Khmer"
"kn","xx_ent_wiki_sm","cc.kn.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.kn.300.bin.gz","Kannada","(kn) Kannada"
"ko","ko_core_news_sm","cc.ko.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ko.300.bin.gz","Korean","(ko) Korean"
"ku","xx_ent_wiki_sm","cc.ku.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ku.300.bin.gz","Kurdish (Kurmanji)","(ku) Kurdish (Kurmanji)"
"ky","xx_ent_wiki_sm","cc.ky.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ky.300.bin.gz","Kirghiz","(ky) Kirghiz"
"la","xx_ent_wiki_sm","cc.la.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.la.300.bin.gz","Latin","(la) Latin"
"lb","xx_ent_wiki_sm","cc.lb.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.lb.300.bin.gz","Luxembourgish","(lb) Luxembourgish"
"li","xx_ent_wiki_sm","cc.li.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.li.300.bin.gz","Limburgish","(li) Limburgish"
"lmo","xx_ent_wiki_sm","cc.lmo.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.lmo.300.bin.gz","Lombard","(lmo) Lombard"
"lt","lt_core_news_sm","cc.lt.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.lt.300.bin.gz","Lithuanian","(lt) Lithuanian"
"lv","xx_ent_wiki_sm","cc.lv.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.lv.300.bin.gz","Latvian","(lv) Latvian"
"mai","xx_ent_wiki_sm","cc.mai.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.mai.300.bin.gz","Maithili","(mai) Maithili"
"mg","xx_ent_wiki_sm","cc.mg.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.mg.300.bin.gz","Malagasy","(mg) Malagasy"
"mhr","xx_ent_wiki_sm","cc.mhr.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.mhr.300.bin.gz","Meadow Mari","(mhr) Meadow Mari"
"min","xx_ent_wiki_sm","cc.min.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.min.300.bin.gz","Minangkabau","(min) Minangkabau"
"mk","mk_core_news_sm","cc.mk.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.mk.300.bin.gz","Macedonian","(mk) Macedonian"
"ml","xx_ent_wiki_sm","cc.ml.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ml.300.bin.gz","Malayalam","(ml) Malayalam"
"mn","xx_ent_wiki_sm","cc.mn.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.mn.300.bin.gz","Mongolian","(mn) Mongolian"
"mr","xx_ent_wiki_sm","cc.mr.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.mr.300.bin.gz","Marathi","(mr) Marathi"
"mrj","xx_ent_wiki_sm","cc.mrj.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.mrj.300.bin.gz","Hill Mari","(mrj) Hill Mari"
"ms","xx_ent_wiki_sm","cc.ms.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ms.300.bin.gz","Malay","(ms) Malay"
"mt","xx_ent_wiki_sm","cc.mt.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.mt.300.bin.gz","Maltese","(mt) Maltese"
"mwl","xx_ent_wiki_sm","cc.mwl.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.mwl.300.bin.gz","Mirandese","(mwl) Mirandese"
"my","xx_ent_wiki_sm","cc.my.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.my.300.bin.gz","Burmese","(my) Burmese"
"myv","xx_ent_wiki_sm","cc.myv.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.myv.300.bin.gz","Erzya","(myv) Erzya"
"mzn","xx_ent_wiki_sm","cc.mzn.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.mzn.300.bin.gz","Mazandarani","(mzn) Mazandarani"
"nah","xx_ent_wiki_sm","cc.nah.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.nah.300.bin.gz","Nahuatl","(nah) Nahuatl"
"nap","xx_ent_wiki_sm","cc.nap.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.nap.300.bin.gz","Neapolitan","(nap) Neapolitan"
"nds","xx_ent_wiki_sm","cc.nds.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.nds.300.bin.gz","Low Saxon","(nds) Low Saxon"
"ne","xx_ent_wiki_sm","cc.ne.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ne.300.bin.gz","Nepali","(ne) Nepali"
"new","xx_ent_wiki_sm","cc.new.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.new.300.bin.gz","Newar","(new) Newar"
"nl","nl_core_news_sm","cc.nl.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.nl.300.bin.gz","Dutch","(nl) Dutch"
"nn","xx_ent_wiki_sm","cc.nn.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.nn.300.bin.gz","Norwegian (Nynorsk)","(nn) Norwegian (Nynorsk)"
"no","xx_ent_wiki_sm","cc.no.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.no.300.bin.gz","Norwegian (Bokmål)","(no) Norwegian (Bokmål)"
"nso","xx_ent_wiki_sm","cc.nso.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.nso.300.bin.gz","Northern Sotho","(nso) Northern Sotho"
"oc","xx_ent_wiki_sm","cc.oc.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.oc.300.bin.gz","Occitan","(oc) Occitan"
"or","xx_ent_wiki_sm","cc.or.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.or.300.bin.gz","Oriya","(or) Oriya"
"os","xx_ent_wiki_sm","cc.os.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.os.300.bin.gz","Ossetian","(os) Ossetian"
"pa","xx_ent_wiki_sm","cc.pa.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.pa.300.bin.gz","Eastern Punjabi","(pa) Eastern Punjabi"
"pam","xx_ent_wiki_sm","cc.pam.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.pam.300.bin.gz","Kapampangan","(pam) Kapampangan"
"pfl","xx_ent_wiki_sm","cc.pfl.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.pfl.300.bin.gz","Palatinate German","(pfl) Palatinate German"
"pl","pl_core_news_sm","cc.pl.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.pl.300.bin.gz","Polish","(pl) Polish"
"pms","xx_ent_wiki_sm","cc.pms.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.pms.300.bin.gz","Piedmontese","(pms) Piedmontese"
"pnb","xx_ent_wiki_sm","cc.pnb.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.pnb.300.bin.gz","Western Punjabi","(pnb) Western Punjabi"
"ps","xx_ent_wiki_sm","cc.ps.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ps.300.bin.gz","Pashto","(ps) Pashto"
"pt","pt_core_news_sm","cc.pt.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.pt.300.bin.gz","Portuguese","(pt) Portuguese"
"qu","xx_ent_wiki_sm","cc.qu.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.qu.300.bin.gz","Quechua","(qu) Quechua"
"rm","xx_ent_wiki_sm","cc.rm.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.rm.300.bin.gz","Romansh","(rm) Romansh"
"ro","ro_core_news_sm","cc.ro.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ro.300.bin.gz","Romanian","(ro) Romanian"
"ru","ru_core_news_sm","cc.ru.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ru.300.bin.gz","Russian","(ru) Russian"
"sa","xx_ent_wiki_sm","cc.sa.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sa.300.bin.gz","Sanskrit","(sa) Sanskrit"
"sah","xx_ent_wiki_sm","cc.sah.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sah.300.bin.gz","Sakha","(sah) Sakha"
"sc","xx_ent_wiki_sm","cc.sc.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sc.300.bin.gz","Sardinian","(sc) Sardinian"
"scn","xx_ent_wiki_sm","cc.scn.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.scn.300.bin.gz","Sicilian","(scn) Sicilian"
"sco","xx_ent_wiki_sm","cc.sco.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sco.300.bin.gz","Scots","(sco) Scots"
"sd","xx_ent_wiki_sm","cc.sd.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sd.300.bin.gz","Sindhi","(sd) Sindhi"
"sh","xx_ent_wiki_sm","cc.sh.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sh.300.bin.gz","Serbo-Croatian","(sh) Serbo-Croatian"
"si","xx_ent_wiki_sm","cc.si.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.si.300.bin.gz","Sinhalese","(si) Sinhalese"
"sk","xx_ent_wiki_sm","cc.sk.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sk.300.bin.gz","Slovak","(sk) Slovak"
"sl","sl_core_news_sm","cc.sl.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sl.300.bin.gz","Slovenian","(sl) Slovenian"
"so","xx_ent_wiki_sm","cc.so.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.so.300.bin.gz","Somali","(so) Somali"
"sq","xx_ent_wiki_sm","cc.sq.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sq.300.bin.gz","Albanian","(sq) Albanian"
"sr","xx_ent_wiki_sm","cc.sr.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sr.300.bin.gz","Serbian","(sr) Serbian"
"su","xx_ent_wiki_sm","cc.su.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.su.300.bin.gz","Sundanese","(su) Sundanese"
"sv","sv_core_news_sm","cc.sv.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sv.300.bin.gz","Swedish","(sv) Swedish"
"sw","xx_ent_wiki_sm","cc.sw.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sw.300.bin.gz","Swahili","(sw) Swahili"
"ta","xx_ent_wiki_sm","cc.ta.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ta.300.bin.gz","Tamil","(ta) Tamil"
"te","xx_ent_wiki_sm","cc.te.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.te.300.bin.gz","Telugu","(te) Telugu"
"tg","xx_ent_wiki_sm","cc.tg.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.tg.300.bin.gz","Tajik","(tg) Tajik"
"th","xx_ent_wiki_sm","cc.th.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.th.300.bin.gz","Thai","(th) Thai"
"tk","xx_ent_wiki_sm","cc.tk.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.tk.300.bin.gz","Turkmen","(tk) Turkmen"
"tl","xx_ent_wiki_sm","cc.tl.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.tl.300.bin.gz","Tagalog","(tl) Tagalog"
"tr","xx_ent_wiki_sm","cc.tr.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.tr.300.bin.gz","Turkish","(tr) Turkish"
"tt","xx_ent_wiki_sm","cc.tt.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.tt.300.bin.gz","Tatar","(tt) Tatar"
"ug","xx_ent_wiki_sm","cc.ug.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ug.300.bin.gz","Uyghur","(ug) Uyghur"
"uk","uk_core_news_sm","cc.uk.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.uk.300.bin.gz","Ukrainian","(uk) Ukrainian"
"ur","xx_ent_wiki_sm","cc.ur.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ur.300.bin.gz","Urdu","(ur) Urdu"
"uz","xx_ent_wiki_sm","cc.uz.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.uz.300.bin.gz","Uzbek","(uz) Uzbek"
"vec","xx_ent_wiki_sm","cc.vec.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.vec.300.bin.gz","Venetian","(vec) Venetian"
"vi","xx_ent_wiki_sm","cc.vi.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.vi.300.bin.gz","Vietnamese","(vi) Vietnamese"
"vls","xx_ent_wiki_sm","cc.vls.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.vls.300.bin.gz","West Flemish","(vls) West Flemish"
"vo","xx_ent_wiki_sm","cc.vo.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.vo.300.bin.gz","Volapük","(vo) Volapük"
"wa","xx_ent_wiki_sm","cc.wa.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.wa.300.bin.gz","Walloon","(wa) Walloon"
"war","xx_ent_wiki_sm","cc.war.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.war.300.bin.gz","Waray","(war) Waray"
"xmf","xx_ent_wiki_sm","cc.xmf.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.xmf.300.bin.gz","Mingrelian","(xmf) Mingrelian"
"yi","xx_ent_wiki_sm","cc.yi.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.yi.300.bin.gz","Yiddish","(yi) Yiddish"
"yo","xx_ent_wiki_sm","cc.yo.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.yo.300.bin.gz","Yoruba","(yo) Yoruba"
"zea","xx_ent_wiki_sm","cc.zea.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.zea.300.bin.gz","Zeelandic","(zea) Zeelandic"
"zh","zh_core_web_sm","cc.zh.300.bin","https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.zh.300.bin.gz","Chinese","(zh) Chinese"
1 short spacy_name fasttext_name fasttext_url language short_lang
2 af xx_ent_wiki_sm cc.af.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.af.300.bin.gz Afrikaans (af) Afrikaans
3 als xx_ent_wiki_sm cc.als.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.als.300.bin.gz Alemannic (als) Alemannic
4 am xx_ent_wiki_sm cc.am.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.am.300.bin.gz Amharic (am) Amharic
5 an xx_ent_wiki_sm cc.an.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.an.300.bin.gz Aragonese (an) Aragonese
6 ar xx_ent_wiki_sm cc.ar.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ar.300.bin.gz Arabic (ar) Arabic
7 arz xx_ent_wiki_sm cc.arz.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.arz.300.bin.gz Egyptian Arabic (arz) Egyptian Arabic
8 as xx_ent_wiki_sm cc.as.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.as.300.bin.gz Assamese (as) Assamese
9 ast xx_ent_wiki_sm cc.ast.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ast.300.bin.gz Asturian (ast) Asturian
10 az xx_ent_wiki_sm cc.az.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.az.300.bin.gz Azerbaijani (az) Azerbaijani
11 azb xx_ent_wiki_sm cc.azb.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.azb.300.bin.gz Southern Azerbaijani (azb) Southern Azerbaijani
12 ba xx_ent_wiki_sm cc.ba.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ba.300.bin.gz Bashkir (ba) Bashkir
13 bar xx_ent_wiki_sm cc.bar.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.bar.300.bin.gz Bavarian (bar) Bavarian
14 bcl xx_ent_wiki_sm cc.bcl.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.bcl.300.bin.gz Central Bicolano (bcl) Central Bicolano
15 be xx_ent_wiki_sm cc.be.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.be.300.bin.gz Belarusian (be) Belarusian
16 bg xx_ent_wiki_sm cc.bg.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.bg.300.bin.gz Bulgarian (bg) Bulgarian
17 bh xx_ent_wiki_sm cc.bh.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.bh.300.bin.gz Bihari (bh) Bihari
18 bn xx_ent_wiki_sm cc.bn.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.bn.300.bin.gz Bengali (bn) Bengali
19 bo xx_ent_wiki_sm cc.bo.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.bo.300.bin.gz Tibetan (bo) Tibetan
20 bpy xx_ent_wiki_sm cc.bpy.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.bpy.300.bin.gz Bishnupriya Manipuri (bpy) Bishnupriya Manipuri
21 br xx_ent_wiki_sm cc.br.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.br.300.bin.gz Breton (br) Breton
22 bs xx_ent_wiki_sm cc.bs.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.bs.300.bin.gz Bosnian (bs) Bosnian
23 ca ca_core_news_sm cc.ca.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ca.300.bin.gz Catalan (ca) Catalan
24 ce xx_ent_wiki_sm cc.ce.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ce.300.bin.gz Chechen (ce) Chechen
25 ceb xx_ent_wiki_sm cc.ceb.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ceb.300.bin.gz Cebuano (ceb) Cebuano
26 ckb xx_ent_wiki_sm cc.ckb.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ckb.300.bin.gz Kurdish (Sorani) (ckb) Kurdish (Sorani)
27 co xx_ent_wiki_sm cc.co.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.co.300.bin.gz Corsican (co) Corsican
28 cs xx_ent_wiki_sm cc.cs.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.cs.300.bin.gz Czech (cs) Czech
29 cv xx_ent_wiki_sm cc.cv.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.cv.300.bin.gz Chuvash (cv) Chuvash
30 cy xx_ent_wiki_sm cc.cy.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.cy.300.bin.gz Welsh (cy) Welsh
31 da da_core_news_sm cc.da.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.da.300.bin.gz Danish (da) Danish
32 de de_core_news_sm cc.de.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.de.300.bin.gz German (de) German
33 diq xx_ent_wiki_sm cc.diq.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.diq.300.bin.gz Zazaki (diq) Zazaki
34 dv xx_ent_wiki_sm cc.dv.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.dv.300.bin.gz Divehi (dv) Divehi
35 el el_core_news_sm cc.el.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.el.300.bin.gz Greek (el) Greek
36 eml xx_ent_wiki_sm cc.eml.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.eml.300.bin.gz Emilian-Romagnol (eml) Emilian-Romagnol
37 en en_core_web_sm cc.en.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.en.300.bin.gz English (en) English
38 eo xx_ent_wiki_sm cc.eo.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.eo.300.bin.gz Esperanto (eo) Esperanto
39 es es_core_news_sm cc.es.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.es.300.bin.gz Spanish (es) Spanish
40 et xx_ent_wiki_sm cc.et.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.et.300.bin.gz Estonian (et) Estonian
41 eu xx_ent_wiki_sm cc.eu.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.eu.300.bin.gz Basque (eu) Basque
42 fa xx_ent_wiki_sm cc.fa.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.fa.300.bin.gz Persian (fa) Persian
43 fi fi_core_news_sm cc.fi.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.fi.300.bin.gz Finnish (fi) Finnish
44 fr fr_core_news_sm cc.fr.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.fr.300.bin.gz French (fr) French
45 frr xx_ent_wiki_sm cc.frr.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.frr.300.bin.gz North Frisian (frr) North Frisian
46 fy xx_ent_wiki_sm cc.fy.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.fy.300.bin.gz West Frisian (fy) West Frisian
47 ga xx_ent_wiki_sm cc.ga.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ga.300.bin.gz Irish (ga) Irish
48 gd xx_ent_wiki_sm cc.gd.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.gd.300.bin.gz Scottish Gaelic (gd) Scottish Gaelic
49 gl xx_ent_wiki_sm cc.gl.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.gl.300.bin.gz Galician (gl) Galician
50 gom xx_ent_wiki_sm cc.gom.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.gom.300.bin.gz Goan Konkani (gom) Goan Konkani
51 gu xx_ent_wiki_sm cc.gu.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.gu.300.bin.gz Gujarati (gu) Gujarati
52 gv xx_ent_wiki_sm cc.gv.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.gv.300.bin.gz Manx (gv) Manx
53 he xx_ent_wiki_sm cc.he.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.he.300.bin.gz Hebrew (he) Hebrew
54 hi xx_ent_wiki_sm cc.hi.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.hi.300.bin.gz Hindi (hi) Hindi
55 hif xx_ent_wiki_sm cc.hif.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.hif.300.bin.gz Fiji Hindi (hif) Fiji Hindi
56 hr hr_core_news_sm cc.hr.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.hr.300.bin.gz Croatian (hr) Croatian
57 hsb xx_ent_wiki_sm cc.hsb.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.hsb.300.bin.gz Upper Sorbian (hsb) Upper Sorbian
58 ht xx_ent_wiki_sm cc.ht.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ht.300.bin.gz Haitian (ht) Haitian
59 hu xx_ent_wiki_sm cc.hu.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.hu.300.bin.gz Hungarian (hu) Hungarian
60 hy xx_ent_wiki_sm cc.hy.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.hy.300.bin.gz Armenian (hy) Armenian
61 ia xx_ent_wiki_sm cc.ia.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ia.300.bin.gz Interlingua (ia) Interlingua
62 id xx_ent_wiki_sm cc.id.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.id.300.bin.gz Indonesian (id) Indonesian
63 ilo xx_ent_wiki_sm cc.ilo.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ilo.300.bin.gz Ilokano (ilo) Ilokano
64 io xx_ent_wiki_sm cc.io.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.io.300.bin.gz Ido (io) Ido
65 is xx_ent_wiki_sm cc.is.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.is.300.bin.gz Icelandic (is) Icelandic
66 it it_core_news_sm cc.it.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.it.300.bin.gz Italian (it) Italian
67 ja ja_core_news_sm cc.ja.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ja.300.bin.gz Japanese (ja) Japanese
68 jv xx_ent_wiki_sm cc.jv.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.jv.300.bin.gz Javanese (jv) Javanese
69 ka xx_ent_wiki_sm cc.ka.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ka.300.bin.gz Georgian (ka) Georgian
70 kk xx_ent_wiki_sm cc.kk.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.kk.300.bin.gz Kazakh (kk) Kazakh
71 km xx_ent_wiki_sm cc.km.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.km.300.bin.gz Khmer (km) Khmer
72 kn xx_ent_wiki_sm cc.kn.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.kn.300.bin.gz Kannada (kn) Kannada
73 ko ko_core_news_sm cc.ko.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ko.300.bin.gz Korean (ko) Korean
74 ku xx_ent_wiki_sm cc.ku.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ku.300.bin.gz Kurdish (Kurmanji) (ku) Kurdish (Kurmanji)
75 ky xx_ent_wiki_sm cc.ky.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ky.300.bin.gz Kirghiz (ky) Kirghiz
76 la xx_ent_wiki_sm cc.la.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.la.300.bin.gz Latin (la) Latin
77 lb xx_ent_wiki_sm cc.lb.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.lb.300.bin.gz Luxembourgish (lb) Luxembourgish
78 li xx_ent_wiki_sm cc.li.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.li.300.bin.gz Limburgish (li) Limburgish
79 lmo xx_ent_wiki_sm cc.lmo.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.lmo.300.bin.gz Lombard (lmo) Lombard
80 lt lt_core_news_sm cc.lt.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.lt.300.bin.gz Lithuanian (lt) Lithuanian
81 lv xx_ent_wiki_sm cc.lv.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.lv.300.bin.gz Latvian (lv) Latvian
82 mai xx_ent_wiki_sm cc.mai.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.mai.300.bin.gz Maithili (mai) Maithili
83 mg xx_ent_wiki_sm cc.mg.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.mg.300.bin.gz Malagasy (mg) Malagasy
84 mhr xx_ent_wiki_sm cc.mhr.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.mhr.300.bin.gz Meadow Mari (mhr) Meadow Mari
85 min xx_ent_wiki_sm cc.min.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.min.300.bin.gz Minangkabau (min) Minangkabau
86 mk mk_core_news_sm cc.mk.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.mk.300.bin.gz Macedonian (mk) Macedonian
87 ml xx_ent_wiki_sm cc.ml.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ml.300.bin.gz Malayalam (ml) Malayalam
88 mn xx_ent_wiki_sm cc.mn.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.mn.300.bin.gz Mongolian (mn) Mongolian
89 mr xx_ent_wiki_sm cc.mr.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.mr.300.bin.gz Marathi (mr) Marathi
90 mrj xx_ent_wiki_sm cc.mrj.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.mrj.300.bin.gz Hill Mari (mrj) Hill Mari
91 ms xx_ent_wiki_sm cc.ms.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ms.300.bin.gz Malay (ms) Malay
92 mt xx_ent_wiki_sm cc.mt.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.mt.300.bin.gz Maltese (mt) Maltese
93 mwl xx_ent_wiki_sm cc.mwl.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.mwl.300.bin.gz Mirandese (mwl) Mirandese
94 my xx_ent_wiki_sm cc.my.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.my.300.bin.gz Burmese (my) Burmese
95 myv xx_ent_wiki_sm cc.myv.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.myv.300.bin.gz Erzya (myv) Erzya
96 mzn xx_ent_wiki_sm cc.mzn.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.mzn.300.bin.gz Mazandarani (mzn) Mazandarani
97 nah xx_ent_wiki_sm cc.nah.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.nah.300.bin.gz Nahuatl (nah) Nahuatl
98 nap xx_ent_wiki_sm cc.nap.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.nap.300.bin.gz Neapolitan (nap) Neapolitan
99 nds xx_ent_wiki_sm cc.nds.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.nds.300.bin.gz Low Saxon (nds) Low Saxon
100 ne xx_ent_wiki_sm cc.ne.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ne.300.bin.gz Nepali (ne) Nepali
101 new xx_ent_wiki_sm cc.new.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.new.300.bin.gz Newar (new) Newar
102 nl nl_core_news_sm cc.nl.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.nl.300.bin.gz Dutch (nl) Dutch
103 nn xx_ent_wiki_sm cc.nn.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.nn.300.bin.gz Norwegian (Nynorsk) (nn) Norwegian (Nynorsk)
104 no xx_ent_wiki_sm cc.no.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.no.300.bin.gz Norwegian (Bokmål) (no) Norwegian (Bokmål)
105 nso xx_ent_wiki_sm cc.nso.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.nso.300.bin.gz Northern Sotho (nso) Northern Sotho
106 oc xx_ent_wiki_sm cc.oc.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.oc.300.bin.gz Occitan (oc) Occitan
107 or xx_ent_wiki_sm cc.or.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.or.300.bin.gz Oriya (or) Oriya
108 os xx_ent_wiki_sm cc.os.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.os.300.bin.gz Ossetian (os) Ossetian
109 pa xx_ent_wiki_sm cc.pa.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.pa.300.bin.gz Eastern Punjabi (pa) Eastern Punjabi
110 pam xx_ent_wiki_sm cc.pam.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.pam.300.bin.gz Kapampangan (pam) Kapampangan
111 pfl xx_ent_wiki_sm cc.pfl.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.pfl.300.bin.gz Palatinate German (pfl) Palatinate German
112 pl pl_core_news_sm cc.pl.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.pl.300.bin.gz Polish (pl) Polish
113 pms xx_ent_wiki_sm cc.pms.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.pms.300.bin.gz Piedmontese (pms) Piedmontese
114 pnb xx_ent_wiki_sm cc.pnb.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.pnb.300.bin.gz Western Punjabi (pnb) Western Punjabi
115 ps xx_ent_wiki_sm cc.ps.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ps.300.bin.gz Pashto (ps) Pashto
116 pt pt_core_news_sm cc.pt.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.pt.300.bin.gz Portuguese (pt) Portuguese
117 qu xx_ent_wiki_sm cc.qu.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.qu.300.bin.gz Quechua (qu) Quechua
118 rm xx_ent_wiki_sm cc.rm.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.rm.300.bin.gz Romansh (rm) Romansh
119 ro ro_core_news_sm cc.ro.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ro.300.bin.gz Romanian (ro) Romanian
120 ru ru_core_news_sm cc.ru.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ru.300.bin.gz Russian (ru) Russian
121 sa xx_ent_wiki_sm cc.sa.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sa.300.bin.gz Sanskrit (sa) Sanskrit
122 sah xx_ent_wiki_sm cc.sah.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sah.300.bin.gz Sakha (sah) Sakha
123 sc xx_ent_wiki_sm cc.sc.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sc.300.bin.gz Sardinian (sc) Sardinian
124 scn xx_ent_wiki_sm cc.scn.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.scn.300.bin.gz Sicilian (scn) Sicilian
125 sco xx_ent_wiki_sm cc.sco.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sco.300.bin.gz Scots (sco) Scots
126 sd xx_ent_wiki_sm cc.sd.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sd.300.bin.gz Sindhi (sd) Sindhi
127 sh xx_ent_wiki_sm cc.sh.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sh.300.bin.gz Serbo-Croatian (sh) Serbo-Croatian
128 si xx_ent_wiki_sm cc.si.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.si.300.bin.gz Sinhalese (si) Sinhalese
129 sk xx_ent_wiki_sm cc.sk.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sk.300.bin.gz Slovak (sk) Slovak
130 sl sl_core_news_sm cc.sl.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sl.300.bin.gz Slovenian (sl) Slovenian
131 so xx_ent_wiki_sm cc.so.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.so.300.bin.gz Somali (so) Somali
132 sq xx_ent_wiki_sm cc.sq.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sq.300.bin.gz Albanian (sq) Albanian
133 sr xx_ent_wiki_sm cc.sr.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sr.300.bin.gz Serbian (sr) Serbian
134 su xx_ent_wiki_sm cc.su.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.su.300.bin.gz Sundanese (su) Sundanese
135 sv sv_core_news_sm cc.sv.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sv.300.bin.gz Swedish (sv) Swedish
136 sw xx_ent_wiki_sm cc.sw.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sw.300.bin.gz Swahili (sw) Swahili
137 ta xx_ent_wiki_sm cc.ta.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ta.300.bin.gz Tamil (ta) Tamil
138 te xx_ent_wiki_sm cc.te.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.te.300.bin.gz Telugu (te) Telugu
139 tg xx_ent_wiki_sm cc.tg.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.tg.300.bin.gz Tajik (tg) Tajik
140 th xx_ent_wiki_sm cc.th.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.th.300.bin.gz Thai (th) Thai
141 tk xx_ent_wiki_sm cc.tk.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.tk.300.bin.gz Turkmen (tk) Turkmen
142 tl xx_ent_wiki_sm cc.tl.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.tl.300.bin.gz Tagalog (tl) Tagalog
143 tr xx_ent_wiki_sm cc.tr.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.tr.300.bin.gz Turkish (tr) Turkish
144 tt xx_ent_wiki_sm cc.tt.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.tt.300.bin.gz Tatar (tt) Tatar
145 ug xx_ent_wiki_sm cc.ug.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ug.300.bin.gz Uyghur (ug) Uyghur
146 uk uk_core_news_sm cc.uk.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.uk.300.bin.gz Ukrainian (uk) Ukrainian
147 ur xx_ent_wiki_sm cc.ur.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ur.300.bin.gz Urdu (ur) Urdu
148 uz xx_ent_wiki_sm cc.uz.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.uz.300.bin.gz Uzbek (uz) Uzbek
149 vec xx_ent_wiki_sm cc.vec.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.vec.300.bin.gz Venetian (vec) Venetian
150 vi xx_ent_wiki_sm cc.vi.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.vi.300.bin.gz Vietnamese (vi) Vietnamese
151 vls xx_ent_wiki_sm cc.vls.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.vls.300.bin.gz West Flemish (vls) West Flemish
152 vo xx_ent_wiki_sm cc.vo.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.vo.300.bin.gz Volapük (vo) Volapük
153 wa xx_ent_wiki_sm cc.wa.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.wa.300.bin.gz Walloon (wa) Walloon
154 war xx_ent_wiki_sm cc.war.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.war.300.bin.gz Waray (war) Waray
155 xmf xx_ent_wiki_sm cc.xmf.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.xmf.300.bin.gz Mingrelian (xmf) Mingrelian
156 yi xx_ent_wiki_sm cc.yi.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.yi.300.bin.gz Yiddish (yi) Yiddish
157 yo xx_ent_wiki_sm cc.yo.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.yo.300.bin.gz Yoruba (yo) Yoruba
158 zea xx_ent_wiki_sm cc.zea.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.zea.300.bin.gz Zeelandic (zea) Zeelandic
159 zh zh_core_web_sm cc.zh.300.bin https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.zh.300.bin.gz Chinese (zh) Chinese

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,40 @@
#!/usr/bin/env python
# coding: utf-8
## Spacy tokenize texts
## Requires data file with columns id and text
import argparse
from os.path import expanduser
import pandas as pd
import pyarrow as pa
import pyarrow.feather as feather
import spacy
import re
def main(args):
print("Tokenizer: Importing data")
datapath = expanduser(args.data)
dat = feather.read_feather(datapath)
outfile = re.sub("[.]feather$", "_spa.feather", datapath)
print("Tokenizer: Loading model")
spa = spacy.load(expanduser(args.model))
print("Tokenizer: Tokenizing sentences")
tok = [" ".join([str(token) for token in spa.tokenizer(text)]) for text in dat["text"]]
print("Tokenizer: Exporting")
tok = pd.concat([dat["id"], pd.DataFrame(tok)], axis=1)
tok.columns = ["id", "text_spa"]
feather.write_feather(tok, outfile)
print("Tokenizer: Done")
if __name__ == "__main__":
argParser = argparse.ArgumentParser()
argParser.add_argument("-m", "--model", help="Model name", default="fr_core_news_sm")
argParser.add_argument("-d", "--data", help="Path to data (feather)")
args = argParser.parse_args()
main(args)

View File

@ -0,0 +1,631 @@
## 21/04/2020 : shiny pour active learning étiquetage de textes
shinyUI(fluidPage(
title = "Active Tigger",
############################################################################
## Top panel: Title, strategy, model options
############################################################################
fluidRow(
column(
4,
fluidRow(
column(4, br(),
HTML('<img src="active_tigger.png" width="100%">')),
column(
8, br(),
p(strong("Project / Scheme")),
fluidRow(
column(2, HTML(paste0(
'<div title="New project">',
actionButton("createProject", "+"),
'</div>'))),
column(10, selectInput("selectProject", NULL, NULL, NULL))
),
fluidRow(
column(2, HTML(paste0(
'<div title="New scheme">',
actionButton("createScheme", "+"),
'</div>'))),
column(10, selectInput("selectScheme", NULL, NULL, NULL))
)
))
),
column(
2, br(),
HTML(paste0(
'<div title="Querying strategy">',
p(strong("Strategy")),
selectInput("strategy", NULL,
choices= c("Active" = "entropy",
"MaxProb" = "maxprob",
"Random" = "random",
"Deterministic" = "sequential"),
selected = "sequential"),
conditionalPanel("input.strategy == 'maxprob'",
selectInput("maxprobWhich", NULL, NULL)),
'</div>'))
),
column(
2, br(),
HTML(paste0(
'<div title="Query from which?">',
p(strong("On")),
selectInput("sampleChoice", NULL,
choices = c("Untagged" = "untagged",
"Tagged" = "tagged",
"All" = "all")),
conditionalPanel("input.sampleChoice == 'tagged'",
selectInput("taggedWhich", NULL, NULL)),
'</div>'))
),
column(4, p(br()),
verbatimTextOutput("trainDiagno"),
fluidRow(
column(8, HTML(paste0(
'<div title="Train prediction model 🤖">',
actionButton("modelTrain", "🤖 Train", width = "100%"),
'</div>'))),
column(4, HTML(paste0(
'<div title="🤖 Prediction model options">',
checkboxInput("showTrainOptions", "🔧"),
'</div>')))
))
),
## General training options
conditionalPanel(
"input.showTrainOptions",
hr(),
fluidRow(
column(
4,
conditionalPanel(
"input.showTrainOptions & input.use_regressors.includes('regex')",
uiOutput("panelExtraRegex")
)
),
column(
4,
p(strong("Predictors")),
selectizeInput("use_regressors", NULL, "",
multiple = TRUE, width = "100%"),
conditionalPanel(
"input.showTrainOptions & input.use_regressors.includes('extra')",
selectizeInput("use_ootregnum", "Extra predictors: continuous", "",
multiple = TRUE, width = "100%"),
selectizeInput("use_ootregcat", "Extra predictors: categorical", "",
multiple = TRUE, width = "100%")
),
conditionalPanel(
"input.showTrainOptions & input.use_regressors.includes('dfm')",
hr(),
fluidRow(
column(
6, numericInput("dfmMinTermfreq", "DFM : Min Termfreq",
min= 1, max= 1e3, value= 5, step= 1)),
column(
6, numericInput("dfmMinDocfreq", "DFM : Min Docfreq",
min= 1, max= 1e3, value= 5, step= 1))),
fluidRow(
column(
4, checkboxInput("dfmTfIdf", "Tf-Idf", TRUE)),
column(
4, selectInput("dfmTfScheme", label = NULL,
choices= c("logcount", "count", "prop", "propmax",
"boolean", "augmented", "logave"),
selected= "logcount")),
column(
4, selectInput("dfmDfScheme", label = NULL,
choices= c("inverse", "count",
"inversemax", "inverseprob", "unary"),
selected= "inverse"))
),
numericInput("dfmNgrams", "N-grams", value= 1,
min= 1, max= 10, step = 1)
)
),
column(
4,
HTML(paste0(
'<div title="Auto train after # tags (0=never)">',
fluidRow(
column(6, strong("Auto train every")),
column(6, numericInput("trainCountdown", NULL, 0, 0, 1e6, 1))),
'</div>'
)),
HTML(paste0(
'<div title="🤖 model (recommended: Liblinear)">',
fluidRow(
column(6, strong("Model")),
column(6, selectInput("predModel", NULL, selected = "linear",
choices = c("Naive Bayes" = "naive bayes",
"KNN" = "knn",
"Liblinear" = "linear",
"LASSO" = "lasso",
"Random Forest" = "random forest")))
),
'</div>'
)),
## Model-specific training options
### Random forest options
conditionalPanel(
"input.showTrainOptions & input.predModel == 'random forest'",
fluidRow(
numericInput("rfNumTrees", label = "Num. trees",
min = 1, max = 2e3, value = 500, step = 1),
numericInput("rfMtry", label = "mtry",
min = 0, max = 1e5, value = 0, step = 1),
numericInput("rfSampleFrac", label = "Sample fraction",
min = 0, max = 1, value = 1, step = .01)
)
),
### Naive Bayes options
conditionalPanel(
"input.showTrainOptions & input.predModel == 'naive bayes'",
flowLayout(
numericInput("naiveSmooth", label = "Smooth",
min = 0, max = 2e3,
value = 1, step = 1e-3),
selectInput("naivePrior", "Prior",
c("uniform", "docfreq", "termfreq")),
selectInput("naiveDistri", "Distribution",
c("multinomial", "Bernoulli"))
)
),
### Lasso options
conditionalPanel(
"input.showTrainOptions & input.predModel == 'lasso'",
strong("Lasso penalty"),
fluidRow(
column(
6, numericInput("glmLambda", label = NULL, min = 0, max = 2e3,
value = 0, step = 1e-6)),
column(
6, actionButton("glmCV", label= "Find best (CV)")))
),
### Linear options
conditionalPanel(
"input.showTrainOptions & input.predModel == 'linear'",
strong("Liblinear Cost"),
fluidRow(
column(
6, numericInput("liblinCost", label= NULL, min= 0, max= 2e10,
value= 32, step= 1)),
column(
6, actionButton("liblinCV", label= "Find best (CV)")))
),
### KNN options
conditionalPanel(
"input.showTrainOptions & input.predModel == 'knn'",
flowLayout(
strong("N. Neighbours"),
numericInput("knnK", label = NULL, min = 1, max = 1e2,
value = 3, step = 1)
)
)
)
),
hr()
),
############################################################################
## Main panel set
############################################################################
tabsetPanel(
id = "mainPanelset",
selected = "Tagging",
########################################################################
## Project panel
########################################################################
tabPanel(
"Project",
br(),
tabsetPanel(
id = "tabsetProject",
selected = "Sample",
tabPanel(
"Settings",
br(),
actionButton("saveSystem", "Save changes"),
h4("Files"),
fluidRow(
column(2, p("Data directory")),
column(4, uiOutput("sys_datadir")),
column(6, p("Place (on the server) where the data and project are stored"))
),
fluidRow(
column(2, p("Data filename")),
column(4, uiOutput("sys_datafile")),
column(6, p("Main file, containing id and text columns"))
),
h4("Variables"),
fluidRow(
column(2, p("ID")),
column(4, uiOutput("sys_var_id")),
column(6, p("Name of the id variable, unique identifier of each text"))
),
fluidRow(
column(2, p("Text")),
column(4, uiOutput("sys_var_text")),
column(6, p("Name of the text variables: if more than one, texts are concatenated in the specified order"))
),
fluidRow(
column(2, p("Tags")),
column(4, uiOutput("sys_var_tag")),
column(6, p("Names of scheme variables"))
),
fluidRow(
column(2, p("Comments")),
column(4, uiOutput("sys_var_comm_ui")),
column(6, p("Name of the comments variable"))
),
fluidRow(
column(2, p("Context")),
column(4, uiOutput("sys_var_context_ui")),
column(6, p("Names of variables not used in the models, but may be displayed during tagging"))
),
h4("System"),
fluidRow(
column(2, checkboxInput("sys_use_python", "Python backend", FALSE)),
column(4, conditionalPanel(
"input.sys_use_python",
textInput("sys_which_python", NULL, value = "python3",
placeholder = "(custom python path)"))),
column(6, conditionalPanel(
"input.sys_use_python",
p("This must be a working python3 environment, with the required modules installed (see documentation)")))
),
conditionalPanel("input.sys_use_python", list(
fluidRow(
column(2, checkboxInput("sys_use_spacy", "SpaCy tokenization", FALSE)),
column(4, conditionalPanel("input.sys_use_spacy", textInput(
"sys_use_spacy_model", NULL, NULL, placeholder = "(spacy model name)"))),
column(6, p("Name of the spacy tokenizer model, used in DTM and word embeddings"))
),
conditionalPanel("input.sys_use_spacy", fluidRow(
column(2),
column(9, uiOutput("sys_spacyDlUI")))
),
fluidRow(
column(2, checkboxInput("sys_use_ft", "FastText word embeddings", FALSE)),
column(4, conditionalPanel("input.sys_use_ft", textInput(
"sys_use_ft_model", NULL, NULL, placeholder = "(fasttext model path)"))),
column(6, p("Path to the local fasttext model binary"))
),
conditionalPanel("input.sys_use_ft", fluidRow(
column(2),
column(9, uiOutput("sys_ftDlUI")))
),
fluidRow(
column(2, checkboxInput("sys_use_sb", "SBERT sentence embeddings", FALSE)),
column(4, conditionalPanel("input.sys_use_sb", textInput(
"sys_use_sb_model", NULL, NULL,
placeholder = "(custom sentence_transformers model)"))),
column(6, p("(GPU recommended) Name or path of the sentence-transformers model"))
),
conditionalPanel("input.sys_use_python", list(
checkboxInput("sys_use_gpu", "GPU support (CUDA, for SBERT and BERT)", FALSE),
br(),
wellPanel(
h4("Model picker"),
fluidRow(
column(2, p("Language")),
column(4, uiOutput("sys_ex_lang_ui")),
column(6, p("Used to preset tokenization and embedding models"))
),
fluidRow(
column(2),
column(4, strong("Recommended model")),
column(6, strong("Download instructions"))
),
fluidRow(
column(2, p("SpaCy tokenization")),
column(4, uiOutput("sys_ex_spacy")),
column(6, uiOutput("sys_ex_spacy_dl"))
),
fluidRow(
column(2, p("FastText word embeddings")),
column(4, uiOutput("sys_ex_ft")),
column(6, uiOutput("sys_ex_ft_dl"))
),
fluidRow(
column(2, p("SBERT sentence embeddings")),
column(4, uiOutput("sys_ex_sb")),
column(6, p("(Auto download by python module)"))
)
)
))
))
),
tabPanel(
"Sample",
br(),
fluidRow(
column(
4,
wellPanel(
fluidRow(
column(8, h4("Sample")),
column(4, actionButton("dataImport", "Import", width = "100%"))),
fluidRow(
column(6, numericInput("dataNrows", "N. rows", 500, 10, 1e4, 1)),
column(6, numericInput("dataSkipRows", "Skip rows", 0, 0, step = 1))
)
)
),
column(8, uiOutput("dataMessage"), uiOutput("panelData"))
)
),
tabPanel(
"Scheme",
br(),
fluidRow(
# column(4, uiOutput("panelScheme")),
column(
4,
wellPanel(
h4("Current scheme"),
fluidRow(
column(2, HTML(paste0(
"<div title='Delete scheme'>",
actionButton("schemeDelete", "🗑", width = "100%"),
"</div>"))),
column(6, uiOutput("printScheme")),
column(4, HTML(paste0(
"<div title='Save scheme description'>",
actionButton("schemeDescrSave", "Save", width = "100%"),
"</div>")))
),
br(),
textAreaInput("schemeDescr", NULL, width = "100%", rows = 10,
placeholder = "Write scheme description here"),
hr()
)
),
column(8, uiOutput("panelRetag"))
)
)
)
),
########################################################################
## Text / visualization panel
########################################################################
tabPanel(
"Tagging",
fluidRow(
column(
3,
br(),
fluidRow(
column(8, textInput("regexFilter", label = NULL,
placeholder = "(Regex filter)")),
column(4, checkboxInput("regexCaseSens", "Case"))),
wellPanel(
## Tagging buttons
fluidRow(
column(8, textInput("newLab", label = NULL,
placeholder = "(New label)")),
column(4, actionButton("currentAction", "Create"))
),
# fluidRow(uiOutput("oracleButtons")),
uiOutput("oracleButtons"),
br(),
textInput("currentComment", NULL, "", width = "100%",
placeholder = "(Comment)"),
br(),
uiOutput("makeOracleConfirm")
),
# fluidRow(
# column(6, checkboxInput("showContext", "Context")),
# column(6, actionButton("oops", strong("Oops")))
# ),
checkboxInput("showContext", "Context"),
conditionalPanel("input.showContext", htmlOutput("currentContext"))
),
column(
9,
fluidRow(
column(2, checkboxInput("panelText", "Text", TRUE)),
column(2, checkboxInput("panelVisu", "Visualization", FALSE),
offset = 8)
),
uiOutput("textVisuCols") # Handled in server.R for adaptive columns
)
)
),
########################################################################
## History panel
########################################################################
tabPanel(
"History",
br(),
actionButton("histSave", "Save changes"),
br(),
br(),
DT::dataTableOutput("histDTable")
),
########################################################################
## Stats panel
########################################################################
tabPanel(
"Stats",
br(),
fluidRow(
column(
3,
h3("Counts"),
tableOutput("statsTagTable")
),
column(
9,
h3("10-CV diagnostics"),
actionButton("statsCVgo", "Compute 10-CV"),
br(),
verbatimTextOutput("statsCVoutput"),
DT::dataTableOutput("statsCVtable")
)
),
hr(),
h3("Gold Standard")
),
########################################################################
## BERT panel
########################################################################
tabPanel(
"BERT",
fluidRow(
column(
3,
br(),
h3("Train new BERT"),
fluidRow(
column(6, actionButton("bertTrain", "Train BERT", width = "100%")),
column(6, checkboxInput("bertOptions", "Options"))),
fluidRow(
column(6, textInput(
"bertSaveName", NULL, placeholder = "(save name)")),
column(6, actionButton("bertSave", "Save", width = "100%"))),
actionLink("bertLast", "Last trained model"),
h3("Saved models"),
uiOutput("bertSaved")
),
column(
9,
br(),
conditionalPanel(
"input.bertOptions",
fluidRow(
column(6, selectInput(
"bertModel", "Model",
c("(Fr) CamemBERT-base" = "camembert/camembert-base",
"(Fr) CamemBERT-large" = "camembert/camembert-large",
"(Fr) FlauBERT-small" = "flaubert/flaubert_small_cased",
"(Fr) FlauBERT-base" = "flaubert/flaubert_base_cased",
"(Fr) FlauBERT-large" = "flaubert/flaubert_large_cased",
"(En) DistilBERT-base" = "distilbert-base-cased",
"(En) RoBERTa-base" = "roberta-base",
"(En) DeBERTa-base" = "microsoft/deberta-base",
"(Multi) DistilmBERT-base" = "distilbert-base-multilingual-cased",
"(Multi) MiniLM" = "microsoft/Multilingual-MiniLM-L12-H384",
"(Multi) XLM-RoBERTa-base" = "xlm-roberta-base"))),
column(6)
),
fluidRow(
column(3, numericInput("bertEpochs", "Epochs", 3, 1, 20, 1)),
column(3, numericInput("bertLrate", "Learning rate", 2e-5, 1e-6, 1, 1e-6)),
column(3, numericInput("bertWdecay", "Weight decay", 0.01, 0, 10, 1e-6)),
column(3)
),
fluidRow(
column(3, numericInput("bertBatchsize", "Batch size", 4, 1, 32, 1)),
column(3, numericInput("bertGradacc", "Gradient accum.", 4, 1, 32, 1)),
column(3, br(), checkboxInput("bertAdapt", "Adapt token length to batch", TRUE)),
column(3)
),
fluidRow(
column(3, numericInput("bertValidFrac", "Validation fraction", .2, 0, .9)),
column(3, numericInput("bertValidSeed", "Validation seed", 1234, 1, 9e8)),
column(3, numericInput("bertNeval", "N. validation evals", 10, 1, 100, 1)),
column(3, br(), checkboxInput("bertBest", "Keep best", TRUE))
),
fluidRow(
column(3, numericInput("bertMinOccur", "Min. class occurences", 1, 1, 1e4, 1)),
column(3, br(), checkboxInput("bertBalance", "Balance classes", FALSE)),
column(3),
column(3)
)
),
fluidRow(
column(
6,
# flowLayout(
# actionButton(
# "bertGoPred", "Infer on current data", width = "100%"),
# actionButton(
# "bertDelete", "Delete saved model", width = "100%")),
verbatimTextOutput("bertMsg")),
column(6, plotOutput("bertValPlot", height = 200))),
verbatimTextOutput("bertMsgHyperpar"),
DT::dataTableOutput("bertValstats")
)
)
),
########################################################################
## Export panel
########################################################################
tabPanel(
"Export",
h4("Export tagged data"),
p("Download the tags and predicted probabilities from the complete model, on the current data sample."),
# downloadButton("downloadCsv", "Save csv"),
flowLayout(
selectInput(
"dlTagSelect", NULL, c("tags", "comments", "predictions"),
c("tags", "comments", "predictions"), multiple = TRUE),
selectInput("dlTagFormat", NULL, c("csv", "feather"), "csv"),
downloadButton("dlTagSave", NULL, title = "Save tags")
),
hr(),
h4("Export embeddings"),
p("Download the embeddings (incl. from visualization if present), on the current data sample."),
flowLayout(
selectInput(
"dlEmbedSelect", NULL, c("FastText" = "ft", "SBERT" = "sb"),
selected = "sb", multiple = TRUE),
selectInput("dlEmbedFormat", NULL, c("csv", "feather"), "feather"),
downloadButton("dlEmbedSave", NULL, title = "Save embeddings")
),
hr(),
h4("Export BERT predictions"),
p("Download the predicted probabilities from the chosen BERT model, on the complete dataset."),
flowLayout(
selectInput("dlBPSelect", NULL, NULL, NULL),
selectInput("dlBPFormat", NULL, c("csv", "feather"), "feather"),
actionButton("dlBPInfer", "Predict"),
verbatimTextOutput("dlBPMsg"),
uiOutput("dlBPDlButton")
),
hr(),
h4("Export BERT models")
)
),
br(), br(), br(), br(), br(), br(), br(), br(), br(), br(), br(), br(), br(),
br(), br(), br(), br(), br(), br()
))

Binary file not shown.

After

Width:  |  Height:  |  Size: 104 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 245 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

View File

@ -0,0 +1 @@
install.packages(c("arrow", "class", "data.table", "DT", "foreign", "glmnet", "haven", "LiblineaR", "Matrix", "Metrics", "quanteda", "quanteda.textmodels", "ranger", "readODS", "readxl", "RJSONIO", "rlang", "Rtsne", "shiny", "SparseM", "stringi", "uwot"))

View File

@ -0,0 +1,6 @@
pip install argparse datasets fasttext numpy pandas pyarrow sklearn
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
pip install transformers[torch]
pip install sentence_transformers
pip install -U typing-inspect==0.8.0 typing_extensions==4.6.1
pip install spacy

View File

@ -0,0 +1,3 @@
## Helm charts datalab du Genes
Dépôts regroupant l'ensemble des images dockers créer par le genes et qui seront utilisées pour le déploiement de nouveau services dans le catalogue datalab via helm chart

View File

@ -0,0 +1,15 @@
argparse
datasets
fasttext
numpy
pandas
pyarrow
scikit-learn
torch==1.8.0+cu118
torchvision==0.9.0+cu118
torchaudio==0.8.0
transformers
sentence_transformers
typing-inspect==0.8.0
typing-extensions==4.6.1
spacy

View File

@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/

View File

@ -0,0 +1,24 @@
apiVersion: v2
name: activetigger
description: A Helm chart for Kubernetes
# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.1.0
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "1.0.0"

View File

@ -0,0 +1,63 @@
# dépôt pour déploiement automatique d'un serveur shiny (R + python) pour mise à disposition à des étudiants pour quelques mois
A voir la configuration de l'ingress dans le fichier values.yaml du helm chart activetigger
Image docker où les consignes de déploiement ont été ajoutées : https://code.groupe-genes.fr/DSIT/datalab/src/branch/main/docker-images-datalab/activetigger
# CONSIGNE DE DEPLOIEMENT
Voici le git d'Active Tigger : https://gitlab.univ-lille.fr/julien.boelaert/activetigger
Le programme nécessite python, R et shiny server.
## Packages R (à installer depuis l'exécutable R employé par shiny server)
```
install.packages(c("arrow", "class", "data.table", "DT", "foreign", "glmnet", "haven", "LiblineaR", "Matrix", "Metrics", "quanteda", "quanteda.textmodels", "ranger", "readODS", "readxl", "RJSONIO", "rlang", "Rtsne", "shiny", "SparseM", "stringi", "uwot"))
```
## Environnement python
```
conda create -n tigger python==3.10
conda activate tigger
```
```
pip install argparse datasets fasttext numpy pandas pyarrow sklearn
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
pip install transformers[torch]
pip install sentence_transformers
pip install -U typing-inspect==0.8.0 typing_extensions==4.6.1
pip install spacy
```
## Téléchargement des modèles spacy et fasttext
### Français
```
python -m spacy download fr_core_news_sm
cd ~
wget https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.fr.300.bin.gz
gunzip cc.fr.300.bin.gz
```
### Anglais
```
python -m spacy download en_core_web_sm
cd ~
wget https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.en.300.bin.gz
gunzip cc.en.300.bin.gz
```
# A chaque création d'instance
## Clone git pour créer la nouvelle instance (remplacer "tigger-name" par le nom que prendra l'instance, ie https://analytics.huma-num.fr/Prenom.Nom/tigger-name/)
```
cd ~/zPublish/shiny
git clone https://gitlab.univ-lille.fr/julien.boelaert/activetigger.git tigger-name
```
## Dans l'application
Tout en haut à gauche, bouton "+" pour "create project". Puis dans les champs :
- data directory: moi j'utilise toujours ~/tagging/domaine (genre ~/tagging/radio ou ~/tagging/journaux), mais c'est à toi de voir où tu veux que les données et tags soient stockées sur ton serveur
- je conseille de cocher toutes les cases : python, spacy, fasttext, sbert, gpu
- python : "~/conda/envs/tigger/bin/python"
- fasttext : "~/cc.fr.300.bin" (càd qu'il faut donner le chemin du modèle sur ton serveur, pas juste le nom)
- spacy et SBERT : garder les valeurs par défaut pour la langue choisie

View File

@ -0,0 +1,22 @@
1. Get the application URL by running these commands:
{{- if .Values.ingress.enabled }}
{{- range $host := .Values.ingress.hosts }}
{{- range .paths }}
http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
{{- end }}
{{- end }}
{{- else if contains "NodePort" .Values.service.type }}
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "test1.fullname" . }})
export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
echo http://$NODE_IP:$NODE_PORT
{{- else if contains "LoadBalancer" .Values.service.type }}
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "test1.fullname" . }}'
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "test1.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
echo http://$SERVICE_IP:{{ .Values.service.port }}
{{- else if contains "ClusterIP" .Values.service.type }}
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "test1.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
echo "Visit http://127.0.0.1:8080 to use your application"
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
{{- end }}

View File

@ -0,0 +1,62 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "test1.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "test1.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}
{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "test1.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Common labels
*/}}
{{- define "test1.labels" -}}
helm.sh/chart: {{ include "test1.chart" . }}
{{ include "test1.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}
{{/*
Selector labels
*/}}
{{- define "test1.selectorLabels" -}}
app.kubernetes.io/name: {{ include "test1.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}
{{/*
Create the name of the service account to use
*/}}
{{- define "test1.serviceAccountName" -}}
{{- if .Values.serviceAccount.create }}
{{- default (include "test1.fullname" .) .Values.serviceAccount.name }}
{{- else }}
{{- default "default" .Values.serviceAccount.name }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,68 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "test1.fullname" . }}
labels:
{{- include "test1.labels" . | nindent 4 }}
spec:
{{- if not .Values.autoscaling.enabled }}
replicas: {{ .Values.replicaCount }}
{{- end }}
selector:
matchLabels:
{{- include "test1.selectorLabels" . | nindent 6 }}
template:
metadata:
{{- with .Values.podAnnotations }}
annotations:
{{- toYaml . | nindent 8 }}
{{- end }}
labels:
{{- include "test1.labels" . | nindent 8 }}
{{- with .Values.podLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
spec:
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
serviceAccountName: {{ include "test1.serviceAccountName" . }}
securityContext:
{{- toYaml .Values.podSecurityContext | nindent 8 }}
containers:
- name: {{ .Chart.Name }}
securityContext:
{{- toYaml .Values.securityContext | nindent 12 }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
ports:
- name: http
containerPort: {{ .Values.service.port }}
protocol: TCP
livenessProbe:
{{- toYaml .Values.livenessProbe | nindent 12 }}
readinessProbe:
{{- toYaml .Values.readinessProbe | nindent 12 }}
resources:
{{- toYaml .Values.resources | nindent 12 }}
{{- with .Values.volumeMounts }}
volumeMounts:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .Values.volumes }}
volumes:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}

View File

@ -0,0 +1,32 @@
{{- if .Values.autoscaling.enabled }}
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: {{ include "test1.fullname" . }}
labels:
{{- include "test1.labels" . | nindent 4 }}
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: {{ include "test1.fullname" . }}
minReplicas: {{ .Values.autoscaling.minReplicas }}
maxReplicas: {{ .Values.autoscaling.maxReplicas }}
metrics:
{{- if .Values.autoscaling.targetCPUUtilizationPercentage }}
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}
{{- end }}
{{- if .Values.autoscaling.targetMemoryUtilizationPercentage }}
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,61 @@
{{- if .Values.ingress.enabled -}}
{{- $fullName := include "test1.fullname" . -}}
{{- $svcPort := .Values.service.port -}}
{{- if and .Values.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }}
{{- if not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class") }}
{{- $_ := set .Values.ingress.annotations "kubernetes.io/ingress.class" .Values.ingress.className}}
{{- end }}
{{- end }}
{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}}
apiVersion: networking.k8s.io/v1
{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
apiVersion: networking.k8s.io/v1beta1
{{- else -}}
apiVersion: extensions/v1beta1
{{- end }}
kind: Ingress
metadata:
name: {{ $fullName }}
labels:
{{- include "test1.labels" . | nindent 4 }}
{{- with .Values.ingress.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
{{- if and .Values.ingress.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }}
ingressClassName: {{ .Values.ingress.className }}
{{- end }}
{{- if .Values.ingress.tls }}
tls:
{{- range .Values.ingress.tls }}
- hosts:
{{- range .hosts }}
- {{ . | quote }}
{{- end }}
secretName: {{ .secretName }}
{{- end }}
{{- end }}
rules:
{{- range .Values.ingress.hosts }}
- host: {{ .host | quote }}
http:
paths:
{{- range .paths }}
- path: {{ .path }}
{{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }}
pathType: {{ .pathType }}
{{- end }}
backend:
{{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }}
service:
name: {{ $fullName }}
port:
number: {{ $svcPort }}
{{- else }}
serviceName: {{ $fullName }}
servicePort: {{ $svcPort }}
{{- end }}
{{- end }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,15 @@
apiVersion: v1
kind: Service
metadata:
name: {{ include "test1.fullname" . }}
labels:
{{- include "test1.labels" . | nindent 4 }}
spec:
type: {{ .Values.service.type }}
ports:
- port: {{ .Values.service.port }}
targetPort: http
protocol: TCP
name: http
selector:
{{- include "test1.selectorLabels" . | nindent 4 }}

View File

@ -0,0 +1,13 @@
{{- if .Values.serviceAccount.create -}}
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ include "test1.serviceAccountName" . }}
labels:
{{- include "test1.labels" . | nindent 4 }}
{{- with .Values.serviceAccount.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
automountServiceAccountToken: {{ .Values.serviceAccount.automount }}
{{- end }}

View File

@ -0,0 +1,15 @@
apiVersion: v1
kind: Pod
metadata:
name: "{{ include "test1.fullname" . }}-test-connection"
labels:
{{- include "test1.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": test
spec:
containers:
- name: wget
image: busybox
command: ['wget']
args: ['{{ include "test1.fullname" . }}:{{ .Values.service.port }}']
restartPolicy: Never

View File

@ -0,0 +1,107 @@
# Default values for test1.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
replicaCount: 1
image:
repository: https://code.groupe-genes.fr/dsit/datalab/src/branch/main/docker-images-datalab/activetigger
pullPolicy: IfNotPresent
# Overrides the image tag whose default is the chart appVersion.
tag: "latest"
imagePullSecrets: []
nameOverride: ""
fullnameOverride: ""
serviceAccount:
# Specifies whether a service account should be created
create: true
# Automatically mount a ServiceAccount's API credentials?
automount: true
# Annotations to add to the service account
annotations: {}
# The name of the service account to use.
# If not set and create is true, a name is generated using the fullname template
name: ""
podAnnotations: {}
podLabels: {}
podSecurityContext: {}
# fsGroup: 2000
securityContext: {}
# capabilities:
# drop:
# - ALL
# readOnlyRootFilesystem: true
# runAsNonRoot: true
# runAsUser: 1000
service:
type: ClusterIP
port: 8000
ingress:
enabled: false
className: ""
annotations: {}
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: "true"
hosts:
- host: chart-example.local
paths:
- path: /
pathType: ImplementationSpecific
tls: []
# - secretName: chart-example-tls
# hosts:
# - chart-example.local
resources: {}
# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
# resources, such as Minikube. If you do want to specify resources, uncomment the following
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
# limits:
# cpu: 100m
# memory: 128Mi
# requests:
# cpu: 100m
# memory: 128Mi
livenessProbe:
httpGet:
path: /
port: http
readinessProbe:
httpGet:
path: /
port: http
autoscaling:
enabled: false
minReplicas: 1
maxReplicas: 100
targetCPUUtilizationPercentage: 80
# targetMemoryUtilizationPercentage: 80
# Additional volumes on the output Deployment definition.
volumes: []
# - name: foo
# secret:
# secretName: mysecret
# optional: false
# Additional volumeMounts on the output Deployment definition.
volumeMounts: []
# - name: foo
# mountPath: "/etc/foo"
# readOnly: true
nodeSelector: {}
tolerations: []
affinity: {}

View File

@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/

View File

@ -0,0 +1,24 @@
apiVersion: v2
name: overleaf
description: A Helm chart for Kubernetes
# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.1.0
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "1.16.0"

View File

@ -0,0 +1,20 @@
## Informations utiles
Exemple insee:
chart:
https://github.com/InseeFrLab/helm-charts-interactive-services/blob/main/charts/vscode-python/Chart.yaml
dockerfile:
https://github.com/InseeFrLab/images-datascience/blob/main/vscode/Dockerfile
OVERLEAF:
image:
https://github.com/overleaf/overleaf
https://github.com/overleaf/overleaf/blob/main/server-ce/Dockerfile
https://github.com/overleaf/toolkit/
https://github.com/overleaf/toolkit/blob/master/doc/quick-start-guide.md
helm chart exemple:
https://artifacthub.io/packages/helm/geek-cookbook/overleaf
https://github.com/geek-cookbook/charts/tree/main/charts/stable/overleaf

View File

@ -0,0 +1,22 @@
1. Get the application URL by running these commands:
{{- if .Values.ingress.enabled }}
{{- range $host := .Values.ingress.hosts }}
{{- range .paths }}
http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
{{- end }}
{{- end }}
{{- else if contains "NodePort" .Values.service.type }}
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "overleaf.fullname" . }})
export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
echo http://$NODE_IP:$NODE_PORT
{{- else if contains "LoadBalancer" .Values.service.type }}
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "overleaf.fullname" . }}'
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "overleaf.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
echo http://$SERVICE_IP:{{ .Values.service.port }}
{{- else if contains "ClusterIP" .Values.service.type }}
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "overleaf.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
echo "Visit http://127.0.0.1:8080 to use your application"
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
{{- end }}

View File

@ -0,0 +1,62 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "overleaf.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "overleaf.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}
{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "overleaf.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Common labels
*/}}
{{- define "overleaf.labels" -}}
helm.sh/chart: {{ include "overleaf.chart" . }}
{{ include "overleaf.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}
{{/*
Selector labels
*/}}
{{- define "overleaf.selectorLabels" -}}
app.kubernetes.io/name: {{ include "overleaf.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}
{{/*
Create the name of the service account to use
*/}}
{{- define "overleaf.serviceAccountName" -}}
{{- if .Values.serviceAccount.create }}
{{- default (include "overleaf.fullname" .) .Values.serviceAccount.name }}
{{- else }}
{{- default "default" .Values.serviceAccount.name }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,68 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "overleaf.fullname" . }}
labels:
{{- include "overleaf.labels" . | nindent 4 }}
spec:
{{- if not .Values.autoscaling.enabled }}
replicas: {{ .Values.replicaCount }}
{{- end }}
selector:
matchLabels:
{{- include "overleaf.selectorLabels" . | nindent 6 }}
template:
metadata:
{{- with .Values.podAnnotations }}
annotations:
{{- toYaml . | nindent 8 }}
{{- end }}
labels:
{{- include "overleaf.labels" . | nindent 8 }}
{{- with .Values.podLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
spec:
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
serviceAccountName: {{ include "overleaf.serviceAccountName" . }}
securityContext:
{{- toYaml .Values.podSecurityContext | nindent 8 }}
containers:
- name: {{ .Chart.Name }}
securityContext:
{{- toYaml .Values.securityContext | nindent 12 }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
ports:
- name: http
containerPort: {{ .Values.service.port }}
protocol: TCP
livenessProbe:
{{- toYaml .Values.livenessProbe | nindent 12 }}
readinessProbe:
{{- toYaml .Values.readinessProbe | nindent 12 }}
resources:
{{- toYaml .Values.resources | nindent 12 }}
{{- with .Values.volumeMounts }}
volumeMounts:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .Values.volumes }}
volumes:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}

View File

@ -0,0 +1,32 @@
{{- if .Values.autoscaling.enabled }}
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: {{ include "overleaf.fullname" . }}
labels:
{{- include "overleaf.labels" . | nindent 4 }}
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: {{ include "overleaf.fullname" . }}
minReplicas: {{ .Values.autoscaling.minReplicas }}
maxReplicas: {{ .Values.autoscaling.maxReplicas }}
metrics:
{{- if .Values.autoscaling.targetCPUUtilizationPercentage }}
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}
{{- end }}
{{- if .Values.autoscaling.targetMemoryUtilizationPercentage }}
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,61 @@
{{- if .Values.ingress.enabled -}}
{{- $fullName := include "overleaf.fullname" . -}}
{{- $svcPort := .Values.service.port -}}
{{- if and .Values.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }}
{{- if not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class") }}
{{- $_ := set .Values.ingress.annotations "kubernetes.io/ingress.class" .Values.ingress.className}}
{{- end }}
{{- end }}
{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}}
apiVersion: networking.k8s.io/v1
{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
apiVersion: networking.k8s.io/v1beta1
{{- else -}}
apiVersion: extensions/v1beta1
{{- end }}
kind: Ingress
metadata:
name: {{ $fullName }}
labels:
{{- include "overleaf.labels" . | nindent 4 }}
{{- with .Values.ingress.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
{{- if and .Values.ingress.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }}
ingressClassName: {{ .Values.ingress.className }}
{{- end }}
{{- if .Values.ingress.tls }}
tls:
{{- range .Values.ingress.tls }}
- hosts:
{{- range .hosts }}
- {{ . | quote }}
{{- end }}
secretName: {{ .secretName }}
{{- end }}
{{- end }}
rules:
{{- range .Values.ingress.hosts }}
- host: {{ .host | quote }}
http:
paths:
{{- range .paths }}
- path: {{ .path }}
{{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }}
pathType: {{ .pathType }}
{{- end }}
backend:
{{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }}
service:
name: {{ $fullName }}
port:
number: {{ $svcPort }}
{{- else }}
serviceName: {{ $fullName }}
servicePort: {{ $svcPort }}
{{- end }}
{{- end }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,15 @@
apiVersion: v1
kind: Service
metadata:
name: {{ include "overleaf.fullname" . }}
labels:
{{- include "overleaf.labels" . | nindent 4 }}
spec:
type: {{ .Values.service.type }}
ports:
- port: {{ .Values.service.port }}
targetPort: http
protocol: TCP
name: http
selector:
{{- include "overleaf.selectorLabels" . | nindent 4 }}

View File

@ -0,0 +1,13 @@
{{- if .Values.serviceAccount.create -}}
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ include "overleaf.serviceAccountName" . }}
labels:
{{- include "overleaf.labels" . | nindent 4 }}
{{- with .Values.serviceAccount.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
automountServiceAccountToken: {{ .Values.serviceAccount.automount }}
{{- end }}

View File

@ -0,0 +1,15 @@
apiVersion: v1
kind: Pod
metadata:
name: "{{ include "overleaf.fullname" . }}-test-connection"
labels:
{{- include "overleaf.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": test
spec:
containers:
- name: wget
image: busybox
command: ['wget']
args: ['{{ include "overleaf.fullname" . }}:{{ .Values.service.port }}']
restartPolicy: Never

View File

@ -0,0 +1,107 @@
# Default values for overleaf.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
replicaCount: 1
image:
repository: sharelatex/sharelatex
pullPolicy: IfNotPresent
# Overrides the image tag whose default is the chart appVersion.
tag: 4
imagePullSecrets: []
nameOverride: ""
fullnameOverride: ""
serviceAccount:
# Specifies whether a service account should be created
create: true
# Automatically mount a ServiceAccount's API credentials?
automount: true
# Annotations to add to the service account
annotations: {}
# The name of the service account to use.
# If not set and create is true, a name is generated using the fullname template
name: ""
podAnnotations: {}
podLabels: {}
podSecurityContext: {}
# fsGroup: 2000
securityContext: {}
# capabilities:
# drop:
# - ALL
# readOnlyRootFilesystem: true
# runAsNonRoot: true
# runAsUser: 1000
service:
type: ClusterIP
port: 80
ingress:
enabled: false
className: ""
annotations: {}
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: "true"
hosts:
- host: chart-example.local
paths:
- path: /
pathType: ImplementationSpecific
tls: []
# - secretName: chart-example-tls
# hosts:
# - chart-example.local
resources: {}
# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
# resources, such as Minikube. If you do want to specify resources, uncomment the following
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
# limits:
# cpu: 100m
# memory: 128Mi
# requests:
# cpu: 100m
# memory: 128Mi
livenessProbe:
httpGet:
path: /
port: http
readinessProbe:
httpGet:
path: /
port: http
autoscaling:
enabled: false
minReplicas: 1
maxReplicas: 100
targetCPUUtilizationPercentage: 80
# targetMemoryUtilizationPercentage: 80
# Additional volumes on the output Deployment definition.
volumes: []
# - name: foo
# secret:
# secretName: mysecret
# optional: false
# Additional volumeMounts on the output Deployment definition.
volumeMounts: []
# - name: foo
# mountPath: "/etc/foo"
# readOnly: true
nodeSelector: {}
tolerations: []
affinity: {}

View File

@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/

View File

@ -0,0 +1,24 @@
apiVersion: v2
name: test2
description: A Helm chart for Kubernetes
# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.1.0
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "1.16.0"

View File

@ -0,0 +1,22 @@
1. Get the application URL by running these commands:
{{- if .Values.ingress.enabled }}
{{- range $host := .Values.ingress.hosts }}
{{- range .paths }}
http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
{{- end }}
{{- end }}
{{- else if contains "NodePort" .Values.service.type }}
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "test2.fullname" . }})
export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
echo http://$NODE_IP:$NODE_PORT
{{- else if contains "LoadBalancer" .Values.service.type }}
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "test2.fullname" . }}'
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "test2.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
echo http://$SERVICE_IP:{{ .Values.service.port }}
{{- else if contains "ClusterIP" .Values.service.type }}
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "test2.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
echo "Visit http://127.0.0.1:8080 to use your application"
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
{{- end }}

View File

@ -0,0 +1,62 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "test2.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "test2.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}
{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "test2.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Common labels
*/}}
{{- define "test2.labels" -}}
helm.sh/chart: {{ include "test2.chart" . }}
{{ include "test2.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}
{{/*
Selector labels
*/}}
{{- define "test2.selectorLabels" -}}
app.kubernetes.io/name: {{ include "test2.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}
{{/*
Create the name of the service account to use
*/}}
{{- define "test2.serviceAccountName" -}}
{{- if .Values.serviceAccount.create }}
{{- default (include "test2.fullname" .) .Values.serviceAccount.name }}
{{- else }}
{{- default "default" .Values.serviceAccount.name }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,68 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "test2.fullname" . }}
labels:
{{- include "test2.labels" . | nindent 4 }}
spec:
{{- if not .Values.autoscaling.enabled }}
replicas: {{ .Values.replicaCount }}
{{- end }}
selector:
matchLabels:
{{- include "test2.selectorLabels" . | nindent 6 }}
template:
metadata:
{{- with .Values.podAnnotations }}
annotations:
{{- toYaml . | nindent 8 }}
{{- end }}
labels:
{{- include "test2.labels" . | nindent 8 }}
{{- with .Values.podLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
spec:
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
serviceAccountName: {{ include "test2.serviceAccountName" . }}
securityContext:
{{- toYaml .Values.podSecurityContext | nindent 8 }}
containers:
- name: {{ .Chart.Name }}
securityContext:
{{- toYaml .Values.securityContext | nindent 12 }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
ports:
- name: http
containerPort: {{ .Values.service.port }}
protocol: TCP
livenessProbe:
{{- toYaml .Values.livenessProbe | nindent 12 }}
readinessProbe:
{{- toYaml .Values.readinessProbe | nindent 12 }}
resources:
{{- toYaml .Values.resources | nindent 12 }}
{{- with .Values.volumeMounts }}
volumeMounts:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .Values.volumes }}
volumes:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}

View File

@ -0,0 +1,32 @@
{{- if .Values.autoscaling.enabled }}
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: {{ include "test2.fullname" . }}
labels:
{{- include "test2.labels" . | nindent 4 }}
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: {{ include "test2.fullname" . }}
minReplicas: {{ .Values.autoscaling.minReplicas }}
maxReplicas: {{ .Values.autoscaling.maxReplicas }}
metrics:
{{- if .Values.autoscaling.targetCPUUtilizationPercentage }}
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}
{{- end }}
{{- if .Values.autoscaling.targetMemoryUtilizationPercentage }}
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,61 @@
{{- if .Values.ingress.enabled -}}
{{- $fullName := include "test2.fullname" . -}}
{{- $svcPort := .Values.service.port -}}
{{- if and .Values.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }}
{{- if not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class") }}
{{- $_ := set .Values.ingress.annotations "kubernetes.io/ingress.class" .Values.ingress.className}}
{{- end }}
{{- end }}
{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}}
apiVersion: networking.k8s.io/v1
{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
apiVersion: networking.k8s.io/v1beta1
{{- else -}}
apiVersion: extensions/v1beta1
{{- end }}
kind: Ingress
metadata:
name: {{ $fullName }}
labels:
{{- include "test2.labels" . | nindent 4 }}
{{- with .Values.ingress.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
{{- if and .Values.ingress.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }}
ingressClassName: {{ .Values.ingress.className }}
{{- end }}
{{- if .Values.ingress.tls }}
tls:
{{- range .Values.ingress.tls }}
- hosts:
{{- range .hosts }}
- {{ . | quote }}
{{- end }}
secretName: {{ .secretName }}
{{- end }}
{{- end }}
rules:
{{- range .Values.ingress.hosts }}
- host: {{ .host | quote }}
http:
paths:
{{- range .paths }}
- path: {{ .path }}
{{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }}
pathType: {{ .pathType }}
{{- end }}
backend:
{{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }}
service:
name: {{ $fullName }}
port:
number: {{ $svcPort }}
{{- else }}
serviceName: {{ $fullName }}
servicePort: {{ $svcPort }}
{{- end }}
{{- end }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,15 @@
apiVersion: v1
kind: Service
metadata:
name: {{ include "test2.fullname" . }}
labels:
{{- include "test2.labels" . | nindent 4 }}
spec:
type: {{ .Values.service.type }}
ports:
- port: {{ .Values.service.port }}
targetPort: http
protocol: TCP
name: http
selector:
{{- include "test2.selectorLabels" . | nindent 4 }}

View File

@ -0,0 +1,13 @@
{{- if .Values.serviceAccount.create -}}
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ include "test2.serviceAccountName" . }}
labels:
{{- include "test2.labels" . | nindent 4 }}
{{- with .Values.serviceAccount.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
automountServiceAccountToken: {{ .Values.serviceAccount.automount }}
{{- end }}

View File

@ -0,0 +1,15 @@
apiVersion: v1
kind: Pod
metadata:
name: "{{ include "test2.fullname" . }}-test-connection"
labels:
{{- include "test2.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": test
spec:
containers:
- name: wget
image: busybox
command: ['wget']
args: ['{{ include "test2.fullname" . }}:{{ .Values.service.port }}']
restartPolicy: Never

View File

@ -0,0 +1,107 @@
# Default values for test2.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
replicaCount: 1
image:
repository: nginx
pullPolicy: IfNotPresent
# Overrides the image tag whose default is the chart appVersion.
tag: ""
imagePullSecrets: []
nameOverride: ""
fullnameOverride: ""
serviceAccount:
# Specifies whether a service account should be created
create: true
# Automatically mount a ServiceAccount's API credentials?
automount: true
# Annotations to add to the service account
annotations: {}
# The name of the service account to use.
# If not set and create is true, a name is generated using the fullname template
name: ""
podAnnotations: {}
podLabels: {}
podSecurityContext: {}
# fsGroup: 2000
securityContext: {}
# capabilities:
# drop:
# - ALL
# readOnlyRootFilesystem: true
# runAsNonRoot: true
# runAsUser: 1000
service:
type: ClusterIP
port: 80
ingress:
enabled: false
className: ""
annotations: {}
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: "true"
hosts:
- host: chart-example.local
paths:
- path: /
pathType: ImplementationSpecific
tls: []
# - secretName: chart-example-tls
# hosts:
# - chart-example.local
resources: {}
# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
# resources, such as Minikube. If you do want to specify resources, uncomment the following
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
# limits:
# cpu: 100m
# memory: 128Mi
# requests:
# cpu: 100m
# memory: 128Mi
livenessProbe:
httpGet:
path: /
port: http
readinessProbe:
httpGet:
path: /
port: http
autoscaling:
enabled: false
minReplicas: 1
maxReplicas: 100
targetCPUUtilizationPercentage: 80
# targetMemoryUtilizationPercentage: 80
# Additional volumes on the output Deployment definition.
volumes: []
# - name: foo
# secret:
# secretName: mysecret
# optional: false
# Additional volumeMounts on the output Deployment definition.
volumeMounts: []
# - name: foo
# mountPath: "/etc/foo"
# readOnly: true
nodeSelector: {}
tolerations: []
affinity: {}

View File

@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/

View File

@ -0,0 +1,24 @@
apiVersion: v2
name: test3
description: A Helm chart for Kubernetes
# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.1.0
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "1.16.0"

View File

@ -0,0 +1,22 @@
1. Get the application URL by running these commands:
{{- if .Values.ingress.enabled }}
{{- range $host := .Values.ingress.hosts }}
{{- range .paths }}
http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
{{- end }}
{{- end }}
{{- else if contains "NodePort" .Values.service.type }}
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "test3.fullname" . }})
export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
echo http://$NODE_IP:$NODE_PORT
{{- else if contains "LoadBalancer" .Values.service.type }}
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "test3.fullname" . }}'
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "test3.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
echo http://$SERVICE_IP:{{ .Values.service.port }}
{{- else if contains "ClusterIP" .Values.service.type }}
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "test3.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
echo "Visit http://127.0.0.1:8080 to use your application"
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
{{- end }}

View File

@ -0,0 +1,62 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "test3.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "test3.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}
{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "test3.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Common labels
*/}}
{{- define "test3.labels" -}}
helm.sh/chart: {{ include "test3.chart" . }}
{{ include "test3.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}
{{/*
Selector labels
*/}}
{{- define "test3.selectorLabels" -}}
app.kubernetes.io/name: {{ include "test3.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}
{{/*
Create the name of the service account to use
*/}}
{{- define "test3.serviceAccountName" -}}
{{- if .Values.serviceAccount.create }}
{{- default (include "test3.fullname" .) .Values.serviceAccount.name }}
{{- else }}
{{- default "default" .Values.serviceAccount.name }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,68 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "test3.fullname" . }}
labels:
{{- include "test3.labels" . | nindent 4 }}
spec:
{{- if not .Values.autoscaling.enabled }}
replicas: {{ .Values.replicaCount }}
{{- end }}
selector:
matchLabels:
{{- include "test3.selectorLabels" . | nindent 6 }}
template:
metadata:
{{- with .Values.podAnnotations }}
annotations:
{{- toYaml . | nindent 8 }}
{{- end }}
labels:
{{- include "test3.labels" . | nindent 8 }}
{{- with .Values.podLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
spec:
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
serviceAccountName: {{ include "test3.serviceAccountName" . }}
securityContext:
{{- toYaml .Values.podSecurityContext | nindent 8 }}
containers:
- name: {{ .Chart.Name }}
securityContext:
{{- toYaml .Values.securityContext | nindent 12 }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
ports:
- name: http
containerPort: {{ .Values.service.port }}
protocol: TCP
livenessProbe:
{{- toYaml .Values.livenessProbe | nindent 12 }}
readinessProbe:
{{- toYaml .Values.readinessProbe | nindent 12 }}
resources:
{{- toYaml .Values.resources | nindent 12 }}
{{- with .Values.volumeMounts }}
volumeMounts:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .Values.volumes }}
volumes:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}

View File

@ -0,0 +1,32 @@
{{- if .Values.autoscaling.enabled }}
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: {{ include "test3.fullname" . }}
labels:
{{- include "test3.labels" . | nindent 4 }}
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: {{ include "test3.fullname" . }}
minReplicas: {{ .Values.autoscaling.minReplicas }}
maxReplicas: {{ .Values.autoscaling.maxReplicas }}
metrics:
{{- if .Values.autoscaling.targetCPUUtilizationPercentage }}
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}
{{- end }}
{{- if .Values.autoscaling.targetMemoryUtilizationPercentage }}
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,61 @@
{{- if .Values.ingress.enabled -}}
{{- $fullName := include "test3.fullname" . -}}
{{- $svcPort := .Values.service.port -}}
{{- if and .Values.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }}
{{- if not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class") }}
{{- $_ := set .Values.ingress.annotations "kubernetes.io/ingress.class" .Values.ingress.className}}
{{- end }}
{{- end }}
{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}}
apiVersion: networking.k8s.io/v1
{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
apiVersion: networking.k8s.io/v1beta1
{{- else -}}
apiVersion: extensions/v1beta1
{{- end }}
kind: Ingress
metadata:
name: {{ $fullName }}
labels:
{{- include "test3.labels" . | nindent 4 }}
{{- with .Values.ingress.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
{{- if and .Values.ingress.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }}
ingressClassName: {{ .Values.ingress.className }}
{{- end }}
{{- if .Values.ingress.tls }}
tls:
{{- range .Values.ingress.tls }}
- hosts:
{{- range .hosts }}
- {{ . | quote }}
{{- end }}
secretName: {{ .secretName }}
{{- end }}
{{- end }}
rules:
{{- range .Values.ingress.hosts }}
- host: {{ .host | quote }}
http:
paths:
{{- range .paths }}
- path: {{ .path }}
{{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }}
pathType: {{ .pathType }}
{{- end }}
backend:
{{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }}
service:
name: {{ $fullName }}
port:
number: {{ $svcPort }}
{{- else }}
serviceName: {{ $fullName }}
servicePort: {{ $svcPort }}
{{- end }}
{{- end }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,15 @@
apiVersion: v1
kind: Service
metadata:
name: {{ include "test3.fullname" . }}
labels:
{{- include "test3.labels" . | nindent 4 }}
spec:
type: {{ .Values.service.type }}
ports:
- port: {{ .Values.service.port }}
targetPort: http
protocol: TCP
name: http
selector:
{{- include "test3.selectorLabels" . | nindent 4 }}

View File

@ -0,0 +1,13 @@
{{- if .Values.serviceAccount.create -}}
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ include "test3.serviceAccountName" . }}
labels:
{{- include "test3.labels" . | nindent 4 }}
{{- with .Values.serviceAccount.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
automountServiceAccountToken: {{ .Values.serviceAccount.automount }}
{{- end }}

View File

@ -0,0 +1,15 @@
apiVersion: v1
kind: Pod
metadata:
name: "{{ include "test3.fullname" . }}-test-connection"
labels:
{{- include "test3.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": test
spec:
containers:
- name: wget
image: busybox
command: ['wget']
args: ['{{ include "test3.fullname" . }}:{{ .Values.service.port }}']
restartPolicy: Never

View File

@ -0,0 +1,107 @@
# Default values for test3.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
replicaCount: 1
image:
repository: nginx
pullPolicy: IfNotPresent
# Overrides the image tag whose default is the chart appVersion.
tag: ""
imagePullSecrets: []
nameOverride: ""
fullnameOverride: ""
serviceAccount:
# Specifies whether a service account should be created
create: true
# Automatically mount a ServiceAccount's API credentials?
automount: true
# Annotations to add to the service account
annotations: {}
# The name of the service account to use.
# If not set and create is true, a name is generated using the fullname template
name: ""
podAnnotations: {}
podLabels: {}
podSecurityContext: {}
# fsGroup: 2000
securityContext: {}
# capabilities:
# drop:
# - ALL
# readOnlyRootFilesystem: true
# runAsNonRoot: true
# runAsUser: 1000
service:
type: ClusterIP
port: 80
ingress:
enabled: false
className: ""
annotations: {}
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: "true"
hosts:
- host: chart-example.local
paths:
- path: /
pathType: ImplementationSpecific
tls: []
# - secretName: chart-example-tls
# hosts:
# - chart-example.local
resources: {}
# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
# resources, such as Minikube. If you do want to specify resources, uncomment the following
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
# limits:
# cpu: 100m
# memory: 128Mi
# requests:
# cpu: 100m
# memory: 128Mi
livenessProbe:
httpGet:
path: /
port: http
readinessProbe:
httpGet:
path: /
port: http
autoscaling:
enabled: false
minReplicas: 1
maxReplicas: 100
targetCPUUtilizationPercentage: 80
# targetMemoryUtilizationPercentage: 80
# Additional volumes on the output Deployment definition.
volumes: []
# - name: foo
# secret:
# secretName: mysecret
# optional: false
# Additional volumeMounts on the output Deployment definition.
volumeMounts: []
# - name: foo
# mountPath: "/etc/foo"
# readOnly: true
nodeSelector: {}
tolerations: []
affinity: {}

View File

@ -0,0 +1,3 @@
## Helm charts test
dépôt de test pour ajouter de nouveau service au catalogue datalab

5
index.md Normal file
View File

@ -0,0 +1,5 @@
## Helm charts datalab du Genes
Dépôts regroupant l'ensemble des helm-charts utilisé pour le déploiement des services, cette version est actuellement en test
Voir le fichier "values.yaml" pour rajouter un nouveau dépôt au catalogue du datalab

1
kk
View File

@ -1 +0,0 @@
hh

18
values.yaml Normal file
View File

@ -0,0 +1,18 @@
## texte à rajouter sur le fichier values.yaml de l'app onyxia pour ajouter le catalogue test (https://code.groupe-genes.fr/DSIT/kube-apps/src/branch/main/apps/onyxia/values.yaml)
{
"id": "test",
"name": "test",
"description": "test services.",
"maintainer": "support@groupe-genes.fr",
"location": "link vers dépots à rajouter une fois créé",
"status": "PROD",
"highlightedCharts":
[
"overleaf",
"activetigger",
"test2",
"test3",
],
"type": "helm",
},