init
This commit is contained in:
340
backend-django/backend/views/chargement_competences.py
Normal file
340
backend-django/backend/views/chargement_competences.py
Normal file
@@ -0,0 +1,340 @@
|
||||
from typing import List, Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from django.core.files.base import File
|
||||
from django.db.models import Q
|
||||
from django.db.transaction import atomic
|
||||
from django.http import Http404
|
||||
from rest_framework import status
|
||||
from rest_framework.exceptions import APIException
|
||||
from rest_framework.parsers import MultiPartParser
|
||||
from rest_framework.permissions import IsAdminUser, IsAuthenticated
|
||||
from rest_framework.request import Request
|
||||
from rest_framework.response import Response
|
||||
from rest_framework.views import APIView
|
||||
|
||||
from ..models import Administre, Poste
|
||||
from ..models.competence import Competence
|
||||
from ..models.domaine import Domaine
|
||||
from ..models.filiere import Filiere
|
||||
from ..serializers.alimentation import ChargementCompetencesSerializer
|
||||
from ..utils import cleanString
|
||||
from ..utils.alimentation_decorators import (data_perf_logger_factory,
|
||||
get_data_logger)
|
||||
from ..utils.decorators import execution_time, query_count
|
||||
from ..utils_extraction import open_excel
|
||||
|
||||
|
||||
class _SkillFiles():
|
||||
""" Regroupe les constantes de fichiers de compétences """
|
||||
|
||||
class Ref():
|
||||
""" Constantes pour les colonnes du fichier de référence """
|
||||
|
||||
PK = 'Macro compétence libellé court (15 caractères MAXI)' # E (tous onglets)
|
||||
CATEGORIE = 'Catégorie' # A (tous onglets)
|
||||
DOMAINE = 'Domaine' # B (tous onglets)
|
||||
FILIERE = 'Filière' # C (tous onglets)
|
||||
LIBELLE = 'Macro compétence libellé long' # D (tous onglets)
|
||||
|
||||
class Specific():
|
||||
""" Constantes pour les colonnes du fichier de compétences particulières """
|
||||
|
||||
ADMINISTRE_PK = 'N SAP' # A
|
||||
ADMINISTRE_COMPETENCE_1 = 'COMPETENCE 1' # Q
|
||||
ADMINISTRE_COMPETENCE_2 = 'COMPETENCE 2' # R
|
||||
ADMINISTRE_COMPETENCE_3 = 'COMPETENCE 2.1' # S la notation .1 est documentée dans 'read_excel' (pandas)
|
||||
|
||||
POSTE_DOMAINE = 'DOM EIP' # N
|
||||
POSTE_FILIERE = 'FIL EIP' # O
|
||||
POSTE_FE = 'CODE FE' # C
|
||||
POSTE_FONCTION = 'CODE FONCTION' # T
|
||||
POSTE_NF = 'NR EIP' # P
|
||||
POSTE_COMPETENCE_1 = 'COMPETENCE 1' # Q
|
||||
POSTE_COMPETENCE_2 = 'COMPETENCE 2' # R
|
||||
POSTE_COMPETENCE_3 = 'COMPETENCE 2.1' # S la notation .1 est documentée dans 'read_excel' (pandas)
|
||||
|
||||
|
||||
class ChargementCompetenceView(APIView):
|
||||
"""
|
||||
Cette classe est dédiée au vue de chargement des competences.
|
||||
- Charge et traite les fichiers de compétences.
|
||||
- Attribue les compétences présentes aux administrés et postes correspondants
|
||||
"""
|
||||
|
||||
permission_classes = [IsAuthenticated, IsAdminUser]
|
||||
parser_classes = [MultiPartParser]
|
||||
serializer_class = ChargementCompetencesSerializer
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.logger = get_data_logger(self)
|
||||
|
||||
|
||||
def get(self, request):
|
||||
return Response("Formulaire de chargement des référentiels de compétences")
|
||||
|
||||
|
||||
def _read_ref_skills(self, file: File) -> pd.DataFrame:
|
||||
"""
|
||||
Extrait les données du référentiel de compétences.
|
||||
|
||||
:param file: référentiel de compétences
|
||||
:type file: class:`django.core.files.base.File`
|
||||
|
||||
:return: DataFrame
|
||||
:rtype: class:`pandas.DataFrame`
|
||||
"""
|
||||
COLS = _SkillFiles.Ref
|
||||
col_mapping = {
|
||||
COLS.PK: Competence.Cols.PK,
|
||||
COLS.LIBELLE: Competence.Cols.LIBELLE,
|
||||
COLS.CATEGORIE: Competence.Cols.CATEGORIE,
|
||||
COLS.DOMAINE: Competence.Cols.REL_DOMAINE + '_id',
|
||||
COLS.FILIERE: Competence.Cols.REL_FILIERE + '_id'
|
||||
}
|
||||
df = (pd.concat([open_excel(file, sheetname=i, usecols=col_mapping.keys(), engine='openpyxl') for i in range(3)])
|
||||
.dropna(subset=[COLS.PK])
|
||||
.fillna(np.nan)
|
||||
.astype(str)
|
||||
.replace([np.nan, 'nan'], [None, None]))
|
||||
df[COLS.PK] = df[COLS.PK].str.replace('[^a-zA-Z0-9]', '_', regex=True)
|
||||
return (df.drop_duplicates(subset=[COLS.PK])
|
||||
.rename(columns=col_mapping))
|
||||
|
||||
|
||||
def _read_specific_skills(self, administre: bool, file: File) -> pd.DataFrame:
|
||||
"""
|
||||
Extrait les données du fichier de compétences particulières.
|
||||
|
||||
:param administre: True pour les administré, False pour les postes
|
||||
:type administre: bool
|
||||
|
||||
:param file: fichier de compétences particulières
|
||||
:type file: class:`django.core.files.base.File`
|
||||
|
||||
:return: DataFrame
|
||||
:rtype: class:`pandas.DataFrame`
|
||||
"""
|
||||
COLS = _SkillFiles.Specific
|
||||
if administre:
|
||||
col_mapping = {COLS.ADMINISTRE_PK: Administre.Cols.PK}
|
||||
col_types = {COLS.ADMINISTRE_PK: 'int32'}
|
||||
cols_skill = [COLS.ADMINISTRE_COMPETENCE_1, COLS.ADMINISTRE_COMPETENCE_2, COLS.ADMINISTRE_COMPETENCE_3]
|
||||
col_skill = COLS.ADMINISTRE_COMPETENCE_1
|
||||
sheetname = 0
|
||||
else:
|
||||
col_mapping = {
|
||||
COLS.POSTE_DOMAINE: Poste.Cols.REL_DOMAINE + '_id',
|
||||
COLS.POSTE_FILIERE: Poste.Cols.REL_FILIERE + '_id',
|
||||
COLS.POSTE_FE: Poste.Cols.REL_FORMATION_EMPLOI + '_id',
|
||||
COLS.POSTE_FONCTION: Poste.Cols.FONCTION,
|
||||
COLS.POSTE_NF: Poste.Cols.NIVEAU_FONCTIONNEL,
|
||||
}
|
||||
col_types = {c: 'str' for c in col_mapping.keys()}
|
||||
cols_skill = [COLS.POSTE_COMPETENCE_1, COLS.POSTE_COMPETENCE_2, COLS.POSTE_COMPETENCE_3]
|
||||
col_skill = COLS.POSTE_COMPETENCE_1
|
||||
sheetname = 1
|
||||
|
||||
dfs = []
|
||||
for temp_col_skill in cols_skill:
|
||||
dfs.append(open_excel(file, sheetname=sheetname, usecols=[*col_mapping.keys(), temp_col_skill], engine='openpyxl')
|
||||
.rename(columns={temp_col_skill: col_skill}))
|
||||
df = (pd.concat(dfs)
|
||||
.dropna(subset=[col_skill])
|
||||
.fillna(np.nan)
|
||||
.astype({**col_types, COLS.ADMINISTRE_COMPETENCE_1: 'str'})
|
||||
.replace([np.nan, 'nan'], [None, None]))
|
||||
df[col_skill] = df[col_skill].str.replace('[^a-zA-Z0-9]', '_', regex=True)
|
||||
return (df.drop_duplicates()
|
||||
.rename(columns=col_mapping))
|
||||
|
||||
|
||||
@atomic
|
||||
def _update_ref(self, df: pd.DataFrame, domaines_in_db: Union[Tuple[str], List[str]], filieres_in_db: Union[Tuple[str], List[str]]) -> None:
|
||||
"""
|
||||
Met à jour la table des compétences à partir du DataFrame de données de référence.
|
||||
|
||||
:param df: données de référence
|
||||
:type df: class:`pandas.DataFrame`
|
||||
"""
|
||||
ModelType = Competence
|
||||
Cols = ModelType.Cols
|
||||
col_pk = Cols.PK
|
||||
fields_to_update = (Cols.LIBELLE, Cols.CATEGORIE, Cols.REL_DOMAINE + '_id', Cols.REL_FILIERE + '_id')
|
||||
models_in_db = {m.pk: m for m in ModelType.objects.only(col_pk, *fields_to_update)}
|
||||
|
||||
batch_size = 100
|
||||
dict_create = {}
|
||||
dict_update = {}
|
||||
dict_up_to_date = {}
|
||||
error_count = 0
|
||||
to_ignore = {}
|
||||
for idx, rec in enumerate(df.to_dict('records')):
|
||||
pk = rec.get(col_pk)
|
||||
try:
|
||||
domaine = rec.get(Cols.REL_DOMAINE + '_id')
|
||||
if domaine is not None and domaine not in domaines_in_db:
|
||||
to_ignore.setdefault(pk, {}).setdefault(Domaine, domaine)
|
||||
continue
|
||||
filiere = rec.get(Cols.REL_FILIERE + '_id')
|
||||
if filiere is not None and filiere not in filieres_in_db:
|
||||
to_ignore.setdefault(pk, {}).setdefault(Filiere, filiere)
|
||||
continue
|
||||
|
||||
in_db = models_in_db.get(pk)
|
||||
model = ModelType(pk=pk, **{f: rec.get(f) for f in fields_to_update})
|
||||
if not in_db:
|
||||
model.full_clean(validate_unique=False)
|
||||
dict_create.setdefault(pk, model)
|
||||
elif any(getattr(in_db, f) != getattr(model, f) for f in fields_to_update):
|
||||
model.full_clean(validate_unique=False)
|
||||
dict_update.setdefault(pk, model)
|
||||
else:
|
||||
dict_up_to_date.setdefault(pk, model)
|
||||
except Exception:
|
||||
error_count = error_count + 1
|
||||
self.logger.exception('%s une erreur est survenue à la ligne : %s (pk=%s)', ModelType.__name__, idx, pk)
|
||||
|
||||
if error_count:
|
||||
self.logger.warning("%s(s) en erreur : %s", ModelType.__name__, error_count)
|
||||
|
||||
if to_ignore:
|
||||
self.logger.warning('%s(s) ignorée(s) : %s', ModelType.__name__, len(to_ignore))
|
||||
for _pk, _dict in to_ignore.items():
|
||||
self.logger.warning('- %s car :', _pk)
|
||||
for _type, v in _dict.items():
|
||||
self.logger.warning(' - %s absent(e) du référentiel : %s', _type.__name__, v)
|
||||
|
||||
if dict_create:
|
||||
ModelType.objects.bulk_create(dict_create.values(), batch_size=batch_size)
|
||||
self.logger.info('%s(s) créée(s) : %s', ModelType.__name__, len(dict_create))
|
||||
|
||||
if fields_to_update:
|
||||
if dict_update:
|
||||
ModelType.objects.bulk_update(dict_update.values(), batch_size=batch_size, fields=fields_to_update)
|
||||
self.logger.info('%s(s) mise(s) à jour : %s', ModelType.__name__, len(dict_update))
|
||||
|
||||
if dict_up_to_date:
|
||||
self.logger.info('%s(s) déjà à jour : %s', ModelType.__name__, len(dict_up_to_date))
|
||||
|
||||
deleted = ModelType.objects.filter(~Q(pk__in={*dict_create.keys(), *dict_update.keys(), *dict_up_to_date.keys()})).delete()[0]
|
||||
if deleted:
|
||||
self.logger.info('%s(s) supprimée(s) : %s', ModelType.__name__, deleted)
|
||||
|
||||
|
||||
@atomic
|
||||
def _update_specific(self, administre: bool, df: pd.DataFrame, skills_in_db: Union[Tuple[str], List[str]]) -> None:
|
||||
"""
|
||||
Met à jour les liens M2M entre le modèle et les compétences.
|
||||
|
||||
:param administre: True pour les administré, False pour les postes
|
||||
:type administre: bool
|
||||
|
||||
:param df: données de référence
|
||||
:type df: class:`pandas.DataFrame`
|
||||
|
||||
:param skills_in_db: clés de toutes les compétences en base, les autres compétences sont ignorées
|
||||
:type skills_in_db: Union[Tuple[str], List[str]]
|
||||
"""
|
||||
if administre:
|
||||
ModelType = Administre
|
||||
Cols = ModelType.Cols
|
||||
LinkModelType = getattr(ModelType, Cols.M2M_COMPETENCES).through
|
||||
fields_to_filter = (Cols.PK,)
|
||||
col_skill = _SkillFiles.Specific.ADMINISTRE_COMPETENCE_1
|
||||
else:
|
||||
ModelType = Poste
|
||||
Cols = ModelType.Cols
|
||||
LinkModelType = getattr(ModelType, Cols.M2M_COMPETENCES).through
|
||||
fields_to_filter = (Cols.REL_DOMAINE + '_id', Cols.REL_FILIERE + '_id', Cols.REL_FORMATION_EMPLOI + '_id', Cols.FONCTION, Cols.NIVEAU_FONCTIONNEL)
|
||||
col_skill = _SkillFiles.Specific.POSTE_COMPETENCE_1
|
||||
|
||||
link_dict = {}
|
||||
to_ignore = set()
|
||||
for rec in df.to_dict('records'):
|
||||
skill = rec.get(col_skill)
|
||||
if skill not in skills_in_db:
|
||||
to_ignore.add(skill)
|
||||
else:
|
||||
key = tuple(rec.get(f) for f in fields_to_filter)
|
||||
link_dict.setdefault(key, set()).add(skill)
|
||||
if to_ignore:
|
||||
self.logger.warning('%s(s) ignorée(s) car absente(s) du référentiel : %s (%s)', Competence.__name__, len(to_ignore), to_ignore)
|
||||
|
||||
batch_size = 100
|
||||
error_count = 0
|
||||
to_create = []
|
||||
for in_db in ModelType.objects.only('pk', *fields_to_filter):
|
||||
try:
|
||||
links = link_dict.get(tuple(getattr(in_db, f) for f in fields_to_filter)) or ()
|
||||
for link in links:
|
||||
to_create.append(LinkModelType(**{f'{ModelType.__name__.lower()}_id': in_db.pk, 'competence_id': link}))
|
||||
except Exception:
|
||||
error_count = error_count + 1
|
||||
self.logger.exception("une erreur est survenue lors de l'ajout de lien(s) %s[pk=%s]/%s", ModelType.__name__, in_db.pk, Competence.__name__)
|
||||
|
||||
if error_count:
|
||||
self.logger.warning("lien(s) %s/%s en erreur : %s", ModelType.__name__, Competence.__name__, error_count)
|
||||
|
||||
deleted = LinkModelType.objects.all().delete()[0]
|
||||
if deleted:
|
||||
self.logger.info('lien(s) %s/%s supprimé(s) : %s', ModelType.__name__, Competence.__name__, deleted)
|
||||
|
||||
if to_create:
|
||||
LinkModelType.objects.bulk_create(to_create, batch_size=batch_size)
|
||||
self.logger.info('lien(s) %s/%s créé(s) : %s', ModelType.__name__, Competence.__name__, len(to_create))
|
||||
|
||||
|
||||
@execution_time(warn_after=30000, logger_factory=data_perf_logger_factory)
|
||||
@query_count(warn_after=50, logger_factory=data_perf_logger_factory)
|
||||
def post(self, request: Request) -> Response:
|
||||
"""
|
||||
Charge les competences, met à jour la table de compétences et les liens M2M avec les administrés et les postes.
|
||||
|
||||
:param request: Request contenant le fichier de competence
|
||||
:type request: rest_framework.request.Request
|
||||
|
||||
:return: un message
|
||||
:rtype: class:`rest_framework.response.Response`
|
||||
"""
|
||||
try:
|
||||
# validation et récupération des fichiers
|
||||
ser = self.serializer_class(data=request.data)
|
||||
ser.is_valid(raise_exception=True)
|
||||
ref_skill_file = ser.validated_data.get('ref_skills')
|
||||
specific_skill_file = ser.validated_data.get('specific_skills')
|
||||
|
||||
try:
|
||||
df_ref = self._read_ref_skills(ref_skill_file)
|
||||
self.logger.info('Lecture du fichier de référentiel de compétences ------> Succès')
|
||||
self._update_ref(df_ref,
|
||||
domaines_in_db=list(Domaine.objects.values_list('pk', flat=True)),
|
||||
filieres_in_db=list(Filiere.objects.values_list('pk', flat=True)))
|
||||
self.logger.info('Mise à jour du référentiel de compétences ------> Succès')
|
||||
except Exception as e :
|
||||
self.logger.info('Lecture du fichier de référentiel de compétences ------> Ignoré')
|
||||
self.logger.info(e)
|
||||
|
||||
try:
|
||||
df_specific_administre = self._read_specific_skills(True, specific_skill_file)
|
||||
df_specific_poste = self._read_specific_skills(False, specific_skill_file)
|
||||
self.logger.info("Lecture des compétences particulières d'administrés ------> Succès")
|
||||
self.logger.info('Lecture des compétences particulières de postes ------> Succès')
|
||||
ref_data = list(Competence.objects.values_list('pk', flat=True))
|
||||
self._update_specific(True, df_specific_administre, ref_data)
|
||||
self.logger.info("Mise à jour des compétences particulières d'administrés ------> Succès")
|
||||
self._update_specific(False, df_specific_poste, ref_data)
|
||||
self.logger.info('Mise à jour des compétences particulières de postes ------> Success')
|
||||
except Exception as e :
|
||||
self.logger.info("Mise à jour des compétences particulières d'administrés et postes ------> Ignoré")
|
||||
self.logger.info(e)
|
||||
|
||||
return Response({'Insertion réussie'})
|
||||
except (Http404, APIException):
|
||||
raise
|
||||
except BaseException:
|
||||
message = "Impossible d'alimenter le référentiel de compétences"
|
||||
self.logger.exception(message)
|
||||
raise APIException(message)
|
||||
Reference in New Issue
Block a user