341 lines
16 KiB
Python
341 lines
16 KiB
Python
from typing import List, Tuple, Union
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
from django.core.files.base import File
|
|
from django.db.models import Q
|
|
from django.db.transaction import atomic
|
|
from django.http import Http404
|
|
from rest_framework import status
|
|
from rest_framework.exceptions import APIException
|
|
from rest_framework.parsers import MultiPartParser
|
|
from rest_framework.permissions import IsAdminUser, IsAuthenticated
|
|
from rest_framework.request import Request
|
|
from rest_framework.response import Response
|
|
from rest_framework.views import APIView
|
|
|
|
from ..models import Administre, Poste
|
|
from ..models.competence import Competence
|
|
from ..models.domaine import Domaine
|
|
from ..models.filiere import Filiere
|
|
from ..serializers.alimentation import ChargementCompetencesSerializer
|
|
from ..utils import cleanString
|
|
from ..utils.alimentation_decorators import (data_perf_logger_factory,
|
|
get_data_logger)
|
|
from ..utils.decorators import execution_time, query_count
|
|
from ..utils_extraction import open_excel
|
|
|
|
|
|
class _SkillFiles():
|
|
""" Regroupe les constantes de fichiers de compétences """
|
|
|
|
class Ref():
|
|
""" Constantes pour les colonnes du fichier de référence """
|
|
|
|
PK = 'Macro compétence libellé court (15 caractères MAXI)' # E (tous onglets)
|
|
CATEGORIE = 'Catégorie' # A (tous onglets)
|
|
DOMAINE = 'Domaine' # B (tous onglets)
|
|
FILIERE = 'Filière' # C (tous onglets)
|
|
LIBELLE = 'Macro compétence libellé long' # D (tous onglets)
|
|
|
|
class Specific():
|
|
""" Constantes pour les colonnes du fichier de compétences particulières """
|
|
|
|
ADMINISTRE_PK = 'N SAP' # A
|
|
ADMINISTRE_COMPETENCE_1 = 'COMPETENCE 1' # Q
|
|
ADMINISTRE_COMPETENCE_2 = 'COMPETENCE 2' # R
|
|
ADMINISTRE_COMPETENCE_3 = 'COMPETENCE 2.1' # S la notation .1 est documentée dans 'read_excel' (pandas)
|
|
|
|
POSTE_DOMAINE = 'DOM EIP' # N
|
|
POSTE_FILIERE = 'FIL EIP' # O
|
|
POSTE_FE = 'CODE FE' # C
|
|
POSTE_FONCTION = 'CODE FONCTION' # T
|
|
POSTE_NF = 'NR EIP' # P
|
|
POSTE_COMPETENCE_1 = 'COMPETENCE 1' # Q
|
|
POSTE_COMPETENCE_2 = 'COMPETENCE 2' # R
|
|
POSTE_COMPETENCE_3 = 'COMPETENCE 2.1' # S la notation .1 est documentée dans 'read_excel' (pandas)
|
|
|
|
|
|
class ChargementCompetenceView(APIView):
|
|
"""
|
|
Cette classe est dédiée au vue de chargement des competences.
|
|
- Charge et traite les fichiers de compétences.
|
|
- Attribue les compétences présentes aux administrés et postes correspondants
|
|
"""
|
|
|
|
permission_classes = [IsAuthenticated, IsAdminUser]
|
|
parser_classes = [MultiPartParser]
|
|
serializer_class = ChargementCompetencesSerializer
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
super().__init__(*args, **kwargs)
|
|
self.logger = get_data_logger(self)
|
|
|
|
|
|
def get(self, request):
|
|
return Response("Formulaire de chargement des référentiels de compétences")
|
|
|
|
|
|
def _read_ref_skills(self, file: File) -> pd.DataFrame:
|
|
"""
|
|
Extrait les données du référentiel de compétences.
|
|
|
|
:param file: référentiel de compétences
|
|
:type file: class:`django.core.files.base.File`
|
|
|
|
:return: DataFrame
|
|
:rtype: class:`pandas.DataFrame`
|
|
"""
|
|
COLS = _SkillFiles.Ref
|
|
col_mapping = {
|
|
COLS.PK: Competence.Cols.PK,
|
|
COLS.LIBELLE: Competence.Cols.LIBELLE,
|
|
COLS.CATEGORIE: Competence.Cols.CATEGORIE,
|
|
COLS.DOMAINE: Competence.Cols.REL_DOMAINE + '_id',
|
|
COLS.FILIERE: Competence.Cols.REL_FILIERE + '_id'
|
|
}
|
|
df = (pd.concat([open_excel(file, sheetname=i, usecols=col_mapping.keys(), engine='openpyxl') for i in range(3)])
|
|
.dropna(subset=[COLS.PK])
|
|
.fillna(np.nan)
|
|
.astype(str)
|
|
.replace([np.nan, 'nan'], [None, None]))
|
|
df[COLS.PK] = df[COLS.PK].str.replace('[^a-zA-Z0-9]', '_', regex=True)
|
|
return (df.drop_duplicates(subset=[COLS.PK])
|
|
.rename(columns=col_mapping))
|
|
|
|
|
|
def _read_specific_skills(self, administre: bool, file: File) -> pd.DataFrame:
|
|
"""
|
|
Extrait les données du fichier de compétences particulières.
|
|
|
|
:param administre: True pour les administré, False pour les postes
|
|
:type administre: bool
|
|
|
|
:param file: fichier de compétences particulières
|
|
:type file: class:`django.core.files.base.File`
|
|
|
|
:return: DataFrame
|
|
:rtype: class:`pandas.DataFrame`
|
|
"""
|
|
COLS = _SkillFiles.Specific
|
|
if administre:
|
|
col_mapping = {COLS.ADMINISTRE_PK: Administre.Cols.PK}
|
|
col_types = {COLS.ADMINISTRE_PK: 'int32'}
|
|
cols_skill = [COLS.ADMINISTRE_COMPETENCE_1, COLS.ADMINISTRE_COMPETENCE_2, COLS.ADMINISTRE_COMPETENCE_3]
|
|
col_skill = COLS.ADMINISTRE_COMPETENCE_1
|
|
sheetname = 0
|
|
else:
|
|
col_mapping = {
|
|
COLS.POSTE_DOMAINE: Poste.Cols.REL_DOMAINE + '_id',
|
|
COLS.POSTE_FILIERE: Poste.Cols.REL_FILIERE + '_id',
|
|
COLS.POSTE_FE: Poste.Cols.REL_FORMATION_EMPLOI + '_id',
|
|
COLS.POSTE_FONCTION: Poste.Cols.FONCTION,
|
|
COLS.POSTE_NF: Poste.Cols.NIVEAU_FONCTIONNEL,
|
|
}
|
|
col_types = {c: 'str' for c in col_mapping.keys()}
|
|
cols_skill = [COLS.POSTE_COMPETENCE_1, COLS.POSTE_COMPETENCE_2, COLS.POSTE_COMPETENCE_3]
|
|
col_skill = COLS.POSTE_COMPETENCE_1
|
|
sheetname = 1
|
|
|
|
dfs = []
|
|
for temp_col_skill in cols_skill:
|
|
dfs.append(open_excel(file, sheetname=sheetname, usecols=[*col_mapping.keys(), temp_col_skill], engine='openpyxl')
|
|
.rename(columns={temp_col_skill: col_skill}))
|
|
df = (pd.concat(dfs)
|
|
.dropna(subset=[col_skill])
|
|
.fillna(np.nan)
|
|
.astype({**col_types, COLS.ADMINISTRE_COMPETENCE_1: 'str'})
|
|
.replace([np.nan, 'nan'], [None, None]))
|
|
df[col_skill] = df[col_skill].str.replace('[^a-zA-Z0-9]', '_', regex=True)
|
|
return (df.drop_duplicates()
|
|
.rename(columns=col_mapping))
|
|
|
|
|
|
@atomic
|
|
def _update_ref(self, df: pd.DataFrame, domaines_in_db: Union[Tuple[str], List[str]], filieres_in_db: Union[Tuple[str], List[str]]) -> None:
|
|
"""
|
|
Met à jour la table des compétences à partir du DataFrame de données de référence.
|
|
|
|
:param df: données de référence
|
|
:type df: class:`pandas.DataFrame`
|
|
"""
|
|
ModelType = Competence
|
|
Cols = ModelType.Cols
|
|
col_pk = Cols.PK
|
|
fields_to_update = (Cols.LIBELLE, Cols.CATEGORIE, Cols.REL_DOMAINE + '_id', Cols.REL_FILIERE + '_id')
|
|
models_in_db = {m.pk: m for m in ModelType.objects.only(col_pk, *fields_to_update)}
|
|
|
|
batch_size = 100
|
|
dict_create = {}
|
|
dict_update = {}
|
|
dict_up_to_date = {}
|
|
error_count = 0
|
|
to_ignore = {}
|
|
for idx, rec in enumerate(df.to_dict('records')):
|
|
pk = rec.get(col_pk)
|
|
try:
|
|
domaine = rec.get(Cols.REL_DOMAINE + '_id')
|
|
if domaine is not None and domaine not in domaines_in_db:
|
|
to_ignore.setdefault(pk, {}).setdefault(Domaine, domaine)
|
|
continue
|
|
filiere = rec.get(Cols.REL_FILIERE + '_id')
|
|
if filiere is not None and filiere not in filieres_in_db:
|
|
to_ignore.setdefault(pk, {}).setdefault(Filiere, filiere)
|
|
continue
|
|
|
|
in_db = models_in_db.get(pk)
|
|
model = ModelType(pk=pk, **{f: rec.get(f) for f in fields_to_update})
|
|
if not in_db:
|
|
model.full_clean(validate_unique=False)
|
|
dict_create.setdefault(pk, model)
|
|
elif any(getattr(in_db, f) != getattr(model, f) for f in fields_to_update):
|
|
model.full_clean(validate_unique=False)
|
|
dict_update.setdefault(pk, model)
|
|
else:
|
|
dict_up_to_date.setdefault(pk, model)
|
|
except Exception:
|
|
error_count = error_count + 1
|
|
self.logger.exception('%s une erreur est survenue à la ligne : %s (pk=%s)', ModelType.__name__, idx, pk)
|
|
|
|
if error_count:
|
|
self.logger.warning("%s(s) en erreur : %s", ModelType.__name__, error_count)
|
|
|
|
if to_ignore:
|
|
self.logger.warning('%s(s) ignorée(s) : %s', ModelType.__name__, len(to_ignore))
|
|
for _pk, _dict in to_ignore.items():
|
|
self.logger.warning('- %s car :', _pk)
|
|
for _type, v in _dict.items():
|
|
self.logger.warning(' - %s absent(e) du référentiel : %s', _type.__name__, v)
|
|
|
|
if dict_create:
|
|
ModelType.objects.bulk_create(dict_create.values(), batch_size=batch_size)
|
|
self.logger.info('%s(s) créée(s) : %s', ModelType.__name__, len(dict_create))
|
|
|
|
if fields_to_update:
|
|
if dict_update:
|
|
ModelType.objects.bulk_update(dict_update.values(), batch_size=batch_size, fields=fields_to_update)
|
|
self.logger.info('%s(s) mise(s) à jour : %s', ModelType.__name__, len(dict_update))
|
|
|
|
if dict_up_to_date:
|
|
self.logger.info('%s(s) déjà à jour : %s', ModelType.__name__, len(dict_up_to_date))
|
|
|
|
deleted = ModelType.objects.filter(~Q(pk__in={*dict_create.keys(), *dict_update.keys(), *dict_up_to_date.keys()})).delete()[0]
|
|
if deleted:
|
|
self.logger.info('%s(s) supprimée(s) : %s', ModelType.__name__, deleted)
|
|
|
|
|
|
@atomic
|
|
def _update_specific(self, administre: bool, df: pd.DataFrame, skills_in_db: Union[Tuple[str], List[str]]) -> None:
|
|
"""
|
|
Met à jour les liens M2M entre le modèle et les compétences.
|
|
|
|
:param administre: True pour les administré, False pour les postes
|
|
:type administre: bool
|
|
|
|
:param df: données de référence
|
|
:type df: class:`pandas.DataFrame`
|
|
|
|
:param skills_in_db: clés de toutes les compétences en base, les autres compétences sont ignorées
|
|
:type skills_in_db: Union[Tuple[str], List[str]]
|
|
"""
|
|
if administre:
|
|
ModelType = Administre
|
|
Cols = ModelType.Cols
|
|
LinkModelType = getattr(ModelType, Cols.M2M_COMPETENCES).through
|
|
fields_to_filter = (Cols.PK,)
|
|
col_skill = _SkillFiles.Specific.ADMINISTRE_COMPETENCE_1
|
|
else:
|
|
ModelType = Poste
|
|
Cols = ModelType.Cols
|
|
LinkModelType = getattr(ModelType, Cols.M2M_COMPETENCES).through
|
|
fields_to_filter = (Cols.REL_DOMAINE + '_id', Cols.REL_FILIERE + '_id', Cols.REL_FORMATION_EMPLOI + '_id', Cols.FONCTION, Cols.NIVEAU_FONCTIONNEL)
|
|
col_skill = _SkillFiles.Specific.POSTE_COMPETENCE_1
|
|
|
|
link_dict = {}
|
|
to_ignore = set()
|
|
for rec in df.to_dict('records'):
|
|
skill = rec.get(col_skill)
|
|
if skill not in skills_in_db:
|
|
to_ignore.add(skill)
|
|
else:
|
|
key = tuple(rec.get(f) for f in fields_to_filter)
|
|
link_dict.setdefault(key, set()).add(skill)
|
|
if to_ignore:
|
|
self.logger.warning('%s(s) ignorée(s) car absente(s) du référentiel : %s (%s)', Competence.__name__, len(to_ignore), to_ignore)
|
|
|
|
batch_size = 100
|
|
error_count = 0
|
|
to_create = []
|
|
for in_db in ModelType.objects.only('pk', *fields_to_filter):
|
|
try:
|
|
links = link_dict.get(tuple(getattr(in_db, f) for f in fields_to_filter)) or ()
|
|
for link in links:
|
|
to_create.append(LinkModelType(**{f'{ModelType.__name__.lower()}_id': in_db.pk, 'competence_id': link}))
|
|
except Exception:
|
|
error_count = error_count + 1
|
|
self.logger.exception("une erreur est survenue lors de l'ajout de lien(s) %s[pk=%s]/%s", ModelType.__name__, in_db.pk, Competence.__name__)
|
|
|
|
if error_count:
|
|
self.logger.warning("lien(s) %s/%s en erreur : %s", ModelType.__name__, Competence.__name__, error_count)
|
|
|
|
deleted = LinkModelType.objects.all().delete()[0]
|
|
if deleted:
|
|
self.logger.info('lien(s) %s/%s supprimé(s) : %s', ModelType.__name__, Competence.__name__, deleted)
|
|
|
|
if to_create:
|
|
LinkModelType.objects.bulk_create(to_create, batch_size=batch_size)
|
|
self.logger.info('lien(s) %s/%s créé(s) : %s', ModelType.__name__, Competence.__name__, len(to_create))
|
|
|
|
|
|
@execution_time(warn_after=30000, logger_factory=data_perf_logger_factory)
|
|
@query_count(warn_after=50, logger_factory=data_perf_logger_factory)
|
|
def post(self, request: Request) -> Response:
|
|
"""
|
|
Charge les competences, met à jour la table de compétences et les liens M2M avec les administrés et les postes.
|
|
|
|
:param request: Request contenant le fichier de competence
|
|
:type request: rest_framework.request.Request
|
|
|
|
:return: un message
|
|
:rtype: class:`rest_framework.response.Response`
|
|
"""
|
|
try:
|
|
# validation et récupération des fichiers
|
|
ser = self.serializer_class(data=request.data)
|
|
ser.is_valid(raise_exception=True)
|
|
ref_skill_file = ser.validated_data.get('ref_skills')
|
|
specific_skill_file = ser.validated_data.get('specific_skills')
|
|
|
|
try:
|
|
df_ref = self._read_ref_skills(ref_skill_file)
|
|
self.logger.info('Lecture du fichier de référentiel de compétences ------> Succès')
|
|
self._update_ref(df_ref,
|
|
domaines_in_db=list(Domaine.objects.values_list('pk', flat=True)),
|
|
filieres_in_db=list(Filiere.objects.values_list('pk', flat=True)))
|
|
self.logger.info('Mise à jour du référentiel de compétences ------> Succès')
|
|
except Exception as e :
|
|
self.logger.info('Lecture du fichier de référentiel de compétences ------> Ignoré')
|
|
self.logger.info(e)
|
|
|
|
try:
|
|
df_specific_administre = self._read_specific_skills(True, specific_skill_file)
|
|
df_specific_poste = self._read_specific_skills(False, specific_skill_file)
|
|
self.logger.info("Lecture des compétences particulières d'administrés ------> Succès")
|
|
self.logger.info('Lecture des compétences particulières de postes ------> Succès')
|
|
ref_data = list(Competence.objects.values_list('pk', flat=True))
|
|
self._update_specific(True, df_specific_administre, ref_data)
|
|
self.logger.info("Mise à jour des compétences particulières d'administrés ------> Succès")
|
|
self._update_specific(False, df_specific_poste, ref_data)
|
|
self.logger.info('Mise à jour des compétences particulières de postes ------> Success')
|
|
except Exception as e :
|
|
self.logger.info("Mise à jour des compétences particulières d'administrés et postes ------> Ignoré")
|
|
self.logger.info(e)
|
|
|
|
return Response({'Insertion réussie'})
|
|
except (Http404, APIException):
|
|
raise
|
|
except BaseException:
|
|
message = "Impossible d'alimenter le référentiel de compétences"
|
|
self.logger.exception(message)
|
|
raise APIException(message)
|