FROM ubuntu:22.04
ARG DEBIAN_FRONTEND=noninteractive

COPY requirements.r /requirements.r
COPY requirementspython.txt /requirementspython.txt


# Installation python
RUN apt-get update && apt-get install -y \
    python3.10 \
    python3.10-distutils \
    python3.10-venv \
    python3-pip \
    r-base \
	wget \
    && apt-get clean

# Installation R + shiny server
RUN apt-get update && \
    apt-get install -y r-base
RUN R -e "install.packages('shiny', repos='https://cran.rstudio.com/')"

# Install gdebi-core and shiny-server
RUN apt-get update
RUN apt-get install -y gdebi-core
RUN wget https://download3.rstudio.org/ubuntu-18.04/x86_64/shiny-server-1.5.21.1012-amd64.deb
RUN gdebi --non-interactive shiny-server-1.5.21.1012-amd64.deb


## Packages package R (à installer depuis l'exécutable R employé par shiny server) 
RUN Rscript /requirements.r

## Environnement python | a vérifier dans requirementspython.txt l'installation des cu118 se fais de cette manière


# Install Miniconda
 RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
    bash Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda && \
    rm Miniconda3-latest-Linux-x86_64.sh

# Add Conda binaries to PATH
ENV PATH="/opt/conda/bin:${PATH}"

# Create a Conda environment and activate it
RUN conda create -n tigger python==3.10 && \
    echo "conda activate tigger" >> ~/.bashrc

# Mise à jour et installation des dépendances système
RUN rm -rf /var/lib/apt/lists/*

RUN pip3 install --no-cache-dir \
    torch torchvision torchaudio \
    -f https://download.pytorch.org/whl/cu118/torch_stable.html

RUN apt-get update && apt-get install -y curl build-essential
RUN pip3 install --no-cache-dir six

# Install Rust using rustup
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y

# Add Cargo's bin directory to the PATH environment variable
ENV PATH="/root/.cargo/bin:${PATH}"

RUN pip3 install --no-cache-dir --upgrade setuptools


# Autres installations de bibliothèques Python
RUN pip3 install argparse 
RUN pip3 install datasets 
RUN pip3 install fasttext 
RUN pip3 install numpy 
RUN pip3 install pandas 
RUN pip3 install pyarrow 
RUN pip3 install scikit-learn 
RUN pip3 install sentence-transformers 
RUN pip3 install transformers 
RUN pip3 install typing-inspect==0.8.0 
RUN pip3 install typing-extensions==4.6.1 
RUN pip3 install spacy

# Mettre en place des configurations supplémentaires si nécessaire

# Commande par défaut à exécuter lorsque le conteneur démarre
CMD ["/bin/bash"]

## Téléchargement des modèles spacy et fasttext

### Français
WORKDIR ~
RUN python -m spacy download fr_core_news_sm

RUN wget https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.fr.300.bin.gz \
    && gunzip cc.fr.300.bin.gz


# A chaque création d'instance

## Clone git pour créer la nouvelle instance (remplacer "tigger-name" par le nom que prendra l'instance, ie https://analytics.huma-num.fr/Prenom.Nom/tigger-name/)
RUN mkdir -p ~/zPublish/shiny/tigger-name
COPY activetigger/ ~/zPublish/shiny/tigger-name

## Dans l'application

## Tout en haut à gauche, bouton "+" pour "create project". Puis dans les champs :

## - data directory: moi j'utilise toujours ~/tagging/domaine (genre ~/tagging/radio ou ~/tagging/journaux), mais c'est à toi de voir où tu veux que les données et tags soient stockées sur ton serveur
## - je conseille de cocher toutes les cases : python, spacy, fasttext, sbert, gpu
## - python : "~/conda/envs/tigger/bin/python"
## - fasttext : "~/cc.fr.300.bin" (càd qu'il faut donner le chemin du modèle sur ton serveur, pas juste le nom)
## - spacy et SBERT : garder les valeurs par défaut pour la langue choisie