diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..3419e72a --- /dev/null +++ b/.gitignore @@ -0,0 +1,76 @@ +.DS_Store +.ipynb_checkpoints/ +docs/node_modules +*/.pyc +.vscode + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + + +#Pydoc generated_file for OpenDBM Python API Documentation +docs/website/api/*api.md + +#docker dependencies for mac +.github/brew-colima +.github/brew-docker +# Distribution / packaging +speech/ +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg + + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + + +#pycharm +.idea/ + +#documentation +docs/.docusaurus +.docusaurus +docs/.nvmrc +.nvmrc +node_modules +docs/website/build/ +docs/sync-api-docs/generatedComponentApiDocs.js +docs/sync-api-docs/extracted.json \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 0951d4ec..e8ae4c6f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,12 @@ -FROM python:3.6 +FROM python:3.7 FROM ubuntu:18.04 MAINTAINER fnndsc "vijay.yadav@aicure.com" + +ENV LANG C.UTF-8 +ENV LC_ALL C.UTF-8 + RUN apt-get update && apt-get install -y python3-pip \ && apt-get install -y wget \ && apt-get install -y automake --upgrade \ @@ -20,21 +24,21 @@ RUN ln -sfn /usr/bin/pip3 /usr/bin/pip COPY . /app #cloning openface -WORKDIR /app/pkg +WORKDIR /app/opendbm/pkg RUN git clone https://github.com/AiCure/open_dbm.git -b openface RUN echo "Installing OpenFace..." -WORKDIR /app/pkg/open_dbm/OpenFace +WORKDIR /app/opendbm/pkg/open_dbm/OpenFace RUN bash ./download_models.sh RUN dpkg --configure -a RUN su -c ./install.sh RUN echo "Done OpenFace!" RUN echo "Cloning DeepSpeech..." -WORKDIR /app/pkg +WORKDIR /app/opendbm/pkg RUN git clone https://github.com/mozilla/DeepSpeech.git -WORKDIR /app/pkg/DeepSpeech +WORKDIR /app/opendbm/pkg/DeepSpeech RUN wget https://github.com/mozilla/DeepSpeech/releases/download/v0.9.1/deepspeech-0.9.1-models.pbmm RUN wget https://github.com/mozilla/DeepSpeech/releases/download/v0.9.1/deepspeech-0.9.1-models.scorer @@ -43,4 +47,4 @@ RUN pip install --upgrade pip RUN pip install -r requirements.txt RUN echo "Requirement txt done!" -CMD [ "python", "./process_data.py" ] +CMD [ "python", "./process_data.py" ] \ No newline at end of file diff --git a/dbm_lib/__init__.py b/dbm_lib/__init__.py deleted file mode 100644 index fc44a229..00000000 --- a/dbm_lib/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -""" -file_name: init -project_name: DBM -created: 2020-20-07 -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -DBMLIB_PATH = os.path.dirname(__file__) -DBMLIB_SERVICE_CONFIG = os.path.abspath(os.path.join(DBMLIB_PATH, '../resources/services/services.yml')) -DBMLIB_FEATURE_CONFIG = os.path.abspath(os.path.join(DBMLIB_PATH, '../resources/features/raw_feature.yml')) -DBMLIB_DERIVE_FEATURE_CONFIG = os.path.abspath(os.path.join(DBMLIB_PATH, '../resources/features/derived_feature.yml')) \ No newline at end of file diff --git a/dbm_lib/config/config_raw_feature.py b/dbm_lib/config/config_raw_feature.py deleted file mode 100644 index b0fc683f..00000000 --- a/dbm_lib/config/config_raw_feature.py +++ /dev/null @@ -1,277 +0,0 @@ -""" -file_name: config_raw_feature -project_name: DBM -created: 2020-20-07 -""" - -import yaml -from dbm_lib import DBMLIB_FEATURE_CONFIG - -class ConfigRawReader(object): - """Summary - Read sevice end ponit - """ - def __init__(self, - feature_config_yml=None): - """Summary - Args: - feature_config_yml (None, optional): yml file defined service configuration - """ - - if feature_config_yml is None: - feature_config = DBMLIB_FEATURE_CONFIG - else: - feature_config = feature_config_yml - - with open(feature_config, 'r') as ymlfile: - config = yaml.load(ymlfile) - - #Verbal features - self.base_raw = config - self.err_reason = config['raw_feature']['error_reason'] - - #Output range - self.mov_headvel_start = config['raw_feature']['mov_headvel_start'] - self.mov_headvel_end = config['raw_feature']['mov_headvel_end'] - - #Acoustic variable - self.aco_int = config['raw_feature']['aco_int'] - self.aco_ff = config['raw_feature']['aco_ff'] - self.aco_voiceLabel = config['raw_feature']['aco_voiceLabel'] - self.aco_hnr = config['raw_feature']['aco_hnr'] - self.aco_gne = config['raw_feature']['aco_gne'] - self.aco_fm1 = config['raw_feature']['aco_fm1'] - self.aco_fm2 = config['raw_feature']['aco_fm2'] - self.aco_fm3 = config['raw_feature']['aco_fm3'] - self.aco_fm4 = config['raw_feature']['aco_fm4'] - self.aco_jitter = config['raw_feature']['aco_jitter'] - self.aco_shimmer = config['raw_feature']['aco_shimmer'] - self.aco_mfcc1 = config['raw_feature']['aco_mfcc1'] - self.aco_mfcc2 = config['raw_feature']['aco_mfcc2'] - self.aco_mfcc3 = config['raw_feature']['aco_mfcc3'] - self.aco_mfcc4 = config['raw_feature']['aco_mfcc4'] - self.aco_mfcc5 = config['raw_feature']['aco_mfcc5'] - self.aco_mfcc6 = config['raw_feature']['aco_mfcc6'] - self.aco_mfcc7 = config['raw_feature']['aco_mfcc7'] - self.aco_mfcc8 = config['raw_feature']['aco_mfcc8'] - self.aco_mfcc9 = config['raw_feature']['aco_mfcc9'] - self.aco_mfcc10 = config['raw_feature']['aco_mfcc10'] - self.aco_mfcc11 = config['raw_feature']['aco_mfcc11'] - self.aco_mfcc12 = config['raw_feature']['aco_mfcc12'] - self.aco_voiceFrame = config['raw_feature']['aco_voiceFrame'] - self.aco_totVoiceFrame = config['raw_feature']['aco_totVoiceFrame'] - self.aco_voicePct = config['raw_feature']['aco_voicePct'] - self.aco_pausetime = config['raw_feature']['aco_pausetime'] - self.aco_totaltime = config['raw_feature']['aco_totaltime'] - self.aco_speakingtime = config['raw_feature']['aco_speakingtime'] - self.aco_numpauses = config['raw_feature']['aco_numpauses'] - self.aco_pausefrac = config['raw_feature']['aco_pausefrac'] - - #Facial Action Unit (for consistency) - self.fac_AU01int = config['raw_feature']['fac_AU01int'] - self.fac_AU02int = config['raw_feature']['fac_AU02int'] - self.fac_AU04int = config['raw_feature']['fac_AU04int'] - self.fac_AU05int = config['raw_feature']['fac_AU05int'] - self.fac_AU06int = config['raw_feature']['fac_AU06int'] - self.fac_AU07int = config['raw_feature']['fac_AU07int'] - self.fac_AU09int = config['raw_feature']['fac_AU09int'] - self.fac_AU10int = config['raw_feature']['fac_AU10int'] - self.fac_AU12int = config['raw_feature']['fac_AU12int'] - self.fac_AU14int = config['raw_feature']['fac_AU14int'] - self.fac_AU15int = config['raw_feature']['fac_AU15int'] - self.fac_AU17int = config['raw_feature']['fac_AU17int'] - self.fac_AU20int = config['raw_feature']['fac_AU20int'] - self.fac_AU23int = config['raw_feature']['fac_AU23int'] - self.fac_AU25int = config['raw_feature']['fac_AU25int'] - self.fac_AU26int = config['raw_feature']['fac_AU26int'] - self.fac_AU45int = config['raw_feature']['fac_AU45int'] - self.fac_AU01pres = config['raw_feature']['fac_AU01pres'] - self.fac_AU02pres = config['raw_feature']['fac_AU02pres'] - self.fac_AU04pres = config['raw_feature']['fac_AU04pres'] - self.fac_AU05pres = config['raw_feature']['fac_AU05pres'] - self.fac_AU06pres = config['raw_feature']['fac_AU06pres'] - self.fac_AU07pres = config['raw_feature']['fac_AU07pres'] - self.fac_AU09pres = config['raw_feature']['fac_AU09pres'] - self.fac_AU10pres = config['raw_feature']['fac_AU10pres'] - self.fac_AU12pres = config['raw_feature']['fac_AU12pres'] - self.fac_AU14pres = config['raw_feature']['fac_AU14pres'] - self.fac_AU15pres = config['raw_feature']['fac_AU15pres'] - self.fac_AU17pres = config['raw_feature']['fac_AU17pres'] - self.fac_AU20pres = config['raw_feature']['fac_AU20pres'] - self.fac_AU23pres = config['raw_feature']['fac_AU23pres'] - self.fac_AU25pres = config['raw_feature']['fac_AU25pres'] - self.fac_AU26pres = config['raw_feature']['fac_AU26pres'] - self.fac_AU28pres = config['raw_feature']['fac_AU28pres'] - self.fac_AU45pres = config['raw_feature']['fac_AU45pres'] - - #Facial Landmarks (for consistency) - self.fac_LMK00disp = config['raw_feature']['fac_LMK00disp'] - self.fac_LMK01disp = config['raw_feature']['fac_LMK01disp'] - self.fac_LMK02disp = config['raw_feature']['fac_LMK02disp'] - self.fac_LMK03disp = config['raw_feature']['fac_LMK03disp'] - self.fac_LMK04disp = config['raw_feature']['fac_LMK04disp'] - self.fac_LMK05disp = config['raw_feature']['fac_LMK05disp'] - self.fac_LMK06disp = config['raw_feature']['fac_LMK06disp'] - self.fac_LMK07disp = config['raw_feature']['fac_LMK07disp'] - self.fac_LMK08disp = config['raw_feature']['fac_LMK08disp'] - self.fac_LMK09disp = config['raw_feature']['fac_LMK09disp'] - self.fac_LMK10disp = config['raw_feature']['fac_LMK10disp'] - self.fac_LMK11disp = config['raw_feature']['fac_LMK11disp'] - self.fac_LMK12disp = config['raw_feature']['fac_LMK12disp'] - self.fac_LMK13disp = config['raw_feature']['fac_LMK13disp'] - self.fac_LMK14disp = config['raw_feature']['fac_LMK14disp'] - self.fac_LMK15disp = config['raw_feature']['fac_LMK15disp'] - self.fac_LMK16disp = config['raw_feature']['fac_LMK16disp'] - self.fac_LMK17disp = config['raw_feature']['fac_LMK17disp'] - self.fac_LMK18disp = config['raw_feature']['fac_LMK18disp'] - self.fac_LMK19disp = config['raw_feature']['fac_LMK19disp'] - self.fac_LMK20disp = config['raw_feature']['fac_LMK20disp'] - self.fac_LMK21disp = config['raw_feature']['fac_LMK21disp'] - self.fac_LMK22disp = config['raw_feature']['fac_LMK22disp'] - self.fac_LMK23disp = config['raw_feature']['fac_LMK23disp'] - self.fac_LMK24disp = config['raw_feature']['fac_LMK24disp'] - self.fac_LMK25disp = config['raw_feature']['fac_LMK25disp'] - self.fac_LMK26disp = config['raw_feature']['fac_LMK26disp'] - self.fac_LMK27disp = config['raw_feature']['fac_LMK27disp'] - self.fac_LMK28disp = config['raw_feature']['fac_LMK28disp'] - self.fac_LMK29disp = config['raw_feature']['fac_LMK29disp'] - self.fac_LMK30disp = config['raw_feature']['fac_LMK30disp'] - self.fac_LMK31disp = config['raw_feature']['fac_LMK31disp'] - self.fac_LMK32disp = config['raw_feature']['fac_LMK32disp'] - self.fac_LMK33disp = config['raw_feature']['fac_LMK33disp'] - self.fac_LMK34disp = config['raw_feature']['fac_LMK34disp'] - self.fac_LMK35disp = config['raw_feature']['fac_LMK35disp'] - self.fac_LMK36disp = config['raw_feature']['fac_LMK36disp'] - self.fac_LMK37disp = config['raw_feature']['fac_LMK37disp'] - self.fac_LMK38disp = config['raw_feature']['fac_LMK38disp'] - self.fac_LMK39disp = config['raw_feature']['fac_LMK39disp'] - self.fac_LMK40disp = config['raw_feature']['fac_LMK40disp'] - self.fac_LMK41disp = config['raw_feature']['fac_LMK41disp'] - self.fac_LMK42disp = config['raw_feature']['fac_LMK42disp'] - self.fac_LMK43disp = config['raw_feature']['fac_LMK43disp'] - self.fac_LMK44disp = config['raw_feature']['fac_LMK44disp'] - self.fac_LMK45disp = config['raw_feature']['fac_LMK45disp'] - self.fac_LMK46disp = config['raw_feature']['fac_LMK46disp'] - self.fac_LMK47disp = config['raw_feature']['fac_LMK47disp'] - self.fac_LMK48disp = config['raw_feature']['fac_LMK48disp'] - self.fac_LMK49disp = config['raw_feature']['fac_LMK49disp'] - self.fac_LMK50disp = config['raw_feature']['fac_LMK50disp'] - self.fac_LMK51disp = config['raw_feature']['fac_LMK51disp'] - self.fac_LMK52disp = config['raw_feature']['fac_LMK52disp'] - self.fac_LMK53disp = config['raw_feature']['fac_LMK53disp'] - self.fac_LMK54disp = config['raw_feature']['fac_LMK54disp'] - self.fac_LMK55disp = config['raw_feature']['fac_LMK55disp'] - self.fac_LMK56disp = config['raw_feature']['fac_LMK56disp'] - self.fac_LMK57disp = config['raw_feature']['fac_LMK57disp'] - self.fac_LMK58disp = config['raw_feature']['fac_LMK58disp'] - self.fac_LMK59disp = config['raw_feature']['fac_LMK59disp'] - self.fac_LMK60disp = config['raw_feature']['fac_LMK60disp'] - self.fac_LMK61disp = config['raw_feature']['fac_LMK61disp'] - self.fac_LMK62disp = config['raw_feature']['fac_LMK62disp'] - self.fac_LMK63disp = config['raw_feature']['fac_LMK63disp'] - self.fac_LMK64disp = config['raw_feature']['fac_LMK64disp'] - self.fac_LMK65disp = config['raw_feature']['fac_LMK65disp'] - self.fac_LMK66disp = config['raw_feature']['fac_LMK66disp'] - self.fac_LMK67disp = config['raw_feature']['fac_LMK67disp'] - - #Facial features - self.hap_exp = config['raw_feature']['hap_exp'] - self.sad_exp = config['raw_feature']['sad_exp'] - self.sur_exp = config['raw_feature']['sur_exp'] - self.fea_exp = config['raw_feature']['fea_exp'] - self.ang_exp = config['raw_feature']['ang_exp'] - self.dis_exp = config['raw_feature']['dis_exp'] - self.con_exp = config['raw_feature']['con_exp'] - self.happ_occ = config['raw_feature']['happ_occ'] - self.sad_occ = config['raw_feature']['sad_occ'] - self.sur_occ = config['raw_feature']['sur_occ'] - self.fea_occ = config['raw_feature']['fea_occ'] - self.ang_occ = config['raw_feature']['ang_occ'] - self.dis_occ = config['raw_feature']['dis_occ'] - self.con_occ = config['raw_feature']['con_occ'] - self.pos_exp = config['raw_feature']['pos_exp'] - self.neg_exp = config['raw_feature']['neg_exp'] - self.neu_exp = config['raw_feature']['neu_exp'] - self.cai_exp = config['raw_feature']['cai_exp'] - self.com_exp = config['raw_feature']['com_exp'] - self.com_lower_exp = config['raw_feature']['com_lower_exp'] - self.com_upper_exp = config['raw_feature']['com_upper_exp'] - self.pai_exp = config['raw_feature']['pai_exp'] - self.hap_exp_full = config['raw_feature']['hap_exp_full'] - self.sad_exp_full = config['raw_feature']['sad_exp_full'] - self.sur_exp_full = config['raw_feature']['sur_exp_full'] - self.fea_exp_full = config['raw_feature']['fea_exp_full'] - self.ang_exp_full = config['raw_feature']['ang_exp_full'] - self.dis_exp_full = config['raw_feature']['dis_exp_full'] - self.con_exp_full = config['raw_feature']['con_exp_full'] - self.pos_exp_full = config['raw_feature']['pos_exp_full'] - self.neg_exp_full = config['raw_feature']['neg_exp_full'] - self.neu_exp_full = config['raw_feature']['neu_exp_full'] - self.cai_exp_full = config['raw_feature']['cai_exp_full'] - self.com_exp_full = config['raw_feature']['com_exp_full'] - self.com_lower_exp_full = config['raw_feature']['com_lower_exp_full'] - self.com_upper_exp_full = config['raw_feature']['com_upper_exp_full'] - self.pai_exp_full = config['raw_feature']['pai_exp_full'] - self.fac_AsymMaskMouth = config['raw_feature']['fac_AsymMaskMouth'] - self.fac_AsymMaskEye = config['raw_feature']['fac_AsymMaskEye'] - self.fac_AsymMaskEyebrow = config['raw_feature']['fac_AsymMaskEyebrow'] - self.fac_AsymMaskCom = config['raw_feature']['fac_AsymMaskCom'] - - #Movement features - self.head_vel = config['raw_feature']['head_vel'] - self.mov_blink_ear = config['raw_feature']['mov_blink_ear'] - self.vid_dur = config['raw_feature']['vid_dur'] - self.fps = config['raw_feature']['fps'] - self.mov_blinkframes = config['raw_feature']['mov_blinkframes'] - self.mov_blinkdur = config['raw_feature']['mov_blinkdur'] - self.mov_Hpose_Pitch = config['raw_feature']['mov_Hpose_Pitch'] - self.mov_Hpose_Yaw = config['raw_feature']['mov_Hpose_Yaw'] - self.mov_Hpose_Roll = config['raw_feature']['mov_Hpose_Roll'] - self.mov_Hpose_Dist = config['raw_feature']['mov_Hpose_Dist'] - - self.mov_freq_trem_freq = config['raw_feature']['mov_freq_trem_freq'] - self.mov_freq_trem_index = config['raw_feature']['mov_freq_trem_index'] - self.mov_freq_trem_pindex = config['raw_feature']['mov_freq_trem_pindex'] - self.mov_amp_trem_freq = config['raw_feature']['mov_amp_trem_freq'] - self.mov_amp_trem_index = config['raw_feature']['mov_amp_trem_index'] - self.mov_amp_trem_pindex = config['raw_feature']['mov_amp_trem_pindex'] - - self.fac_tremor_median_5 = config['raw_feature']['fac_tremor_median_5'] - self.fac_tremor_median_12 = config['raw_feature']['fac_tremor_median_12'] - self.fac_tremor_median_8 = config['raw_feature']['fac_tremor_median_8'] - self.fac_tremor_median_48 = config['raw_feature']['fac_tremor_median_48'] - self.fac_tremor_median_54 = config['raw_feature']['fac_tremor_median_54'] - self.fac_tremor_median_28 = config['raw_feature']['fac_tremor_median_28'] - self.fac_tremor_median_51 = config['raw_feature']['fac_tremor_median_51'] - self.fac_tremor_median_66 = config['raw_feature']['fac_tremor_median_66'] - self.fac_tremor_median_57 = config['raw_feature']['fac_tremor_median_57'] - - self.mov_leye_x = config['raw_feature']['mov_leye_x'] - self.mov_leye_y = config['raw_feature']['mov_leye_y'] - self.mov_leye_z = config['raw_feature']['mov_leye_z'] - self.mov_reye_x = config['raw_feature']['mov_reye_x'] - self.mov_reye_y = config['raw_feature']['mov_reye_y'] - self.mov_reye_z = config['raw_feature']['mov_reye_z'] - self.mov_eleft_disp = config['raw_feature']['mov_eleft_disp'] - self.mov_eright_disp = config['raw_feature']['mov_eright_disp'] - - #NLP features - self.nlp_transcribe = config['raw_feature']['nlp_transcribe'] - self.nlp_numSentences = config['raw_feature']['nlp_numSentences'] - self.nlp_singPronPerAns = config['raw_feature']['nlp_singPronPerAns'] - self.nlp_singPronPerSen = config['raw_feature']['nlp_singPronPerSen'] - self.nlp_pastTensePerAns = config['raw_feature']['nlp_pastTensePerAns'] - self.nlp_pastTensePerSen = config['raw_feature']['nlp_pastTensePerSen'] - self.nlp_pronounsPerAns = config['raw_feature']['nlp_pronounsPerAns'] - self.nlp_pronounsPerSen = config['raw_feature']['nlp_pronounsPerSen'] - self.nlp_verbsPerAns = config['raw_feature']['nlp_verbsPerAns'] - self.nlp_verbsPerSen = config['raw_feature']['nlp_verbsPerSen'] - self.nlp_adjectivesPerAns = config['raw_feature']['nlp_adjectivesPerAns'] - self.nlp_adjectivesPerSen = config['raw_feature']['nlp_adjectivesPerSen'] - self.nlp_nounsPerAns = config['raw_feature']['nlp_nounsPerAns'] - self.nlp_nounsPerSen = config['raw_feature']['nlp_nounsPerSen'] - self.nlp_sentiment_mean = config['raw_feature']['nlp_sentiment_mean'] - self.nlp_mattr = config['raw_feature']['nlp_mattr'] - self.nlp_wordsPerMin = config['raw_feature']['nlp_wordsPerMin'] - self.nlp_totalTime = config['raw_feature']['nlp_totalTime'] - diff --git a/dbm_lib/controller/process_feature.py b/dbm_lib/controller/process_feature.py deleted file mode 100644 index 630709aa..00000000 --- a/dbm_lib/controller/process_feature.py +++ /dev/null @@ -1,164 +0,0 @@ -""" -file_name: process_features -project_name: DBM -created: 2020-20-07 -""" - -from dbm_lib.dbm_features.raw_features.audio import intensity, pitch_freq, hnr, gne, voice_frame_score, formant_freq -from dbm_lib.dbm_features.raw_features.audio import pause_segment, jitter, shimmer, mfcc -from dbm_lib.dbm_features.raw_features.video import face_asymmetry, face_au, face_emotion_expressivity, face_landmark -from dbm_lib.dbm_features.raw_features.movement import head_motion, eye_blink, eye_gaze, voice_tremor, facial_tremor -from dbm_lib.dbm_features.raw_features.nlp import transcribe, speech_features - - -import subprocess -import logging -from os.path import isfile, splitext, basename, dirname, join -import glob -import os - -logging.basicConfig(level=logging.INFO) -logger=logging.getLogger() - -def audio_to_wav(input_filepath): - """ Extracts a video's audio file and saves it to wav - Args: - input_filepath: (str) - Returns: - """ - try: - - fname, _ = splitext(input_filepath) - output_filepath = fname + '.wav' - - if not isfile(output_filepath): - call = ['ffmpeg', '-i', input_filepath, '-vn', '-acodec', 'pcm_s16le', '-ar', '44100', output_filepath] - - logger.info('Converting audio from {} to wav'.format(input_filepath)) - subprocess.check_output(call) - logger.info('wav output saved in {}'.format(output_filepath)) - else: - logger.info('Output file {} already exists'.format(output_filepath)) - - except Exception as e: - logger.error('Failed to extract audio from Video') - -def process_acoustic(video_uri, out_dir, dbm_group, r_config): - """ - processing acoustic features - Args: - video_uri: video path; out_dir: raw variable output dir - dbm_group: list of features group to process; r_config: raw feature config object - """ - if dbm_group != None and len(dbm_group)>0 and 'acoustic' not in dbm_group: - return - - logger.info('Processing acoustic variables from data in {}'.format(video_uri)) - logger.info('processing audio intensity....') - intensity.run_intensity(video_uri, out_dir, r_config) - - logger.info('processing audio pitch freq....') - pitch_freq.run_pitch(video_uri, out_dir, r_config) - - logger.info('processing HNR....') - hnr.run_hnr(video_uri, out_dir, r_config) - - logger.info('processing GNE....') - gne.run_gne(video_uri, out_dir, r_config) - - logger.info('processing voice frame score....') - voice_frame_score.run_vfs(video_uri, out_dir, r_config) - - logger.info('processing formant frequency....') - formant_freq.run_formant(video_uri, out_dir, r_config) - - logger.info('processing pause segment....') - pause_segment.run_pause_segment(video_uri, out_dir, r_config) - - logger.info('processing jitter....') - jitter.run_jitter(video_uri, out_dir, r_config) - - logger.info('processing shimmer....') - shimmer.run_shimmer(video_uri, out_dir, r_config) - - logger.info('processing mfcc....') - mfcc.run_mfcc(video_uri, out_dir, r_config) - -def process_facial(video_uri, out_dir, dbm_group, r_config): - """ - processing facial features - Args: - video_uri: video path; out_dir: raw variable output dir - dbm_group: list of features to process; r_config: raw feature config object - """ - if dbm_group != None and len(dbm_group)>0 and 'facial' not in dbm_group: - return - - logger.info('Processing facial variables from data in {}'.format(video_uri)) - logger.info('processing facial asymmetry....') - face_asymmetry.run_face_asymmetry(video_uri, out_dir, r_config) - - logger.info('processing facial Action Unit....') - face_au.run_face_au(video_uri, out_dir, r_config) - - logger.info('processing facial expressivity....') - face_emotion_expressivity.run_face_expressivity(video_uri, out_dir, r_config) - - logger.info('processing facial landmark....') - face_landmark.run_face_landmark(video_uri, out_dir, r_config) - -def process_movement(video_uri, out_dir, dbm_group, r_config, dlib_model): - """ - processing facial features - Args: - video_uri: video path; out_dir: raw variable output dir - dbm_group: list of features to process; r_config: raw feature config object - dlib_model: shape predictor model path - """ - if dbm_group != None and len(dbm_group)>0 and 'movement' not in dbm_group: - return - - logger.info('Processing movement variables from data in {}'.format(video_uri)) - - logger.info('processing head movement....') - head_motion.run_head_movement(video_uri, out_dir, r_config) - - logger.info('processing eye blink....') - eye_blink.run_eye_blink(video_uri, out_dir, r_config, dlib_model) - - logger.info('processing eye gaze....') - eye_gaze.run_eye_gaze(video_uri, out_dir, r_config) - - logger.info('processing voice tremor....') - voice_tremor.run_vtremor(video_uri, out_dir, r_config) - - logger.info('processing facial tremor....') - facial_tremor.fac_tremor_process(video_uri, out_dir, r_config, model_output=True) - - -def process_nlp(video_uri, out_dir, dbm_group, tran_tog, r_config, deep_path): - """ - processing nlp features - Args: - video_uri: video path; out_dir: raw variable output dir - dbm_group: list of features to process; r_config: raw feature config object - deep_path: deep speech build path - """ - if dbm_group != None and len(dbm_group)>0 and 'speech' not in dbm_group: - return - - logger.info('Processing nlp variables from data in {}'.format(video_uri)) - transcribe.run_transcribe(video_uri, out_dir, r_config, deep_path) - speech_features.run_speech_feature(video_uri, out_dir, r_config, tran_tog) - - -def remove_file(file_path, file_ext = '.wav'): - """ - removing wav file - """ - file_dir = dirname(file_path) - file_name, _ = splitext(basename(file_path)) - wav_file = glob.glob(join(file_dir, file_name + file_ext)) - - if len(wav_file)> 0: - os.remove(wav_file[0]) diff --git a/dbm_lib/dbm_features/derived_features/__init__.py b/dbm_lib/dbm_features/derived_features/__init__.py deleted file mode 100644 index 4a214911..00000000 --- a/dbm_lib/dbm_features/derived_features/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -""" -file_name: init -project_name: DBM -created: 2020-20-07 -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - diff --git a/dbm_lib/dbm_features/raw_features/audio/formant_freq.py b/dbm_lib/dbm_features/raw_features/audio/formant_freq.py deleted file mode 100644 index 64568ac7..00000000 --- a/dbm_lib/dbm_features/raw_features/audio/formant_freq.py +++ /dev/null @@ -1,133 +0,0 @@ -""" -file_name: formant_freq -project_name: DBM -created: 2020-20-07 -""" - -import pandas as pd -import parselmouth -import numpy as np -import parselmouth -import librosa -import glob -from os.path import join -import logging - -from dbm_lib.dbm_features.raw_features.util import util as ut - -logging.basicConfig(level=logging.INFO) -logger=logging.getLogger() - -formant_dir = 'acoustic/formant_freq' -csv_ext = '_formant.csv' -error_txt = 'error: length less than 0.064' - -def formant_list(formant,snd): - """ - Getting formant frequency per second - Args: - formant: Formant object for sound wave - snd: Parselmouth sound object - Returns: - List of first through fourth formant for each frame - """ - f1_list = [] - f2_list = [] - f3_list = [] - f4_list = [] - - dur = snd.duration-0.02 - dur_round = round(dur, 2) - - time_list = np.arange(0.001, dur_round, 0.001) - for time in time_list: - - f1 = formant.get_value_at_time(1,time) - f2 = formant.get_value_at_time(2,time) - f3 = formant.get_value_at_time(3,time) - f4 = formant.get_value_at_time(4,time) - - f1_list.append(f1) - f2_list.append(f2) - f3_list.append(f3) - f4_list.append(f4) - return f1_list,f2_list,f3_list,f4_list - -def formant_score(path): - """ - Using parselmouth library fetching Formant Frequency - Args: - path: (.wav) audio file location - Returns: - (list) list of Formant freq for each voice frame - """ - sound_pat = parselmouth.Sound(path) - formant = sound_pat.to_formant_burg(time_step=.001) - f_score = formant_list(formant,sound_pat) - return f_score - -def calc_formant(video_uri, audio_file, out_loc, fl_name, r_config): - """ - Preparing Formant freq matrix - Args: - audio_file: (.wav) parsed audio file; fl_name: input file name - out_loc: (str) Output directory; r_config: raw variable config - """ - - f1_list,f2_list,f3_list,f4_list = formant_score(audio_file) - df_formant = pd.DataFrame(f1_list, columns=[r_config.aco_fm1]) - - df_formant[r_config.aco_fm2] = f2_list - df_formant[r_config.aco_fm3] = f3_list - df_formant[r_config.aco_fm4] = f4_list - - df_formant.replace('', np.nan, regex=True,inplace=True) - df_formant[r_config.err_reason] = 'Pass'# will replace with threshold in future release - - df_formant['Frames'] = df_formant.index - df_formant['dbm_master_url'] = video_uri - - logger.info('Saving Output file {} '.format(out_loc)) - ut.save_output(df_formant, out_loc, fl_name, formant_dir, csv_ext) - -def empty_fm(video_uri, out_loc, fl_name, r_config): - - """ - Preparing empty formant frequency matrix if something fails - """ - cols = ['Frames', r_config.aco_fm1, r_config.aco_fm2, r_config.aco_fm3, r_config.aco_fm4, r_config.err_reason] - out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, error_txt]] - df_fm = pd.DataFrame(out_val, columns = cols) - df_fm['dbm_master_url'] = video_uri - - logger.info('Saving Output file {} '.format(out_loc)) - ut.save_output(df_fm, out_loc, fl_name, formant_dir, csv_ext) - -def run_formant(video_uri, out_dir, r_config): - - """ - Processing all patient's for fetching Formant freq - --------------- - --------------- - Args: - video_uri: video path; r_config: raw variable config object - out_dir: (str) Output directory for processed output - """ - try: - - input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) - aud_filter = glob.glob(join(input_loc, fl_name + '.wav')) - if len(aud_filter)>0: - - audio_file = aud_filter[0] - aud_dur = librosa.get_duration(filename=audio_file) - - if float(aud_dur) < 0.064: - logger.info('Output file {} size is less than 0.064sec'.format(audio_file)) - - empty_fm(video_uri, out_loc, fl_name, r_config) - return - - calc_formant(video_uri, audio_file, out_loc, fl_name, r_config) - except Exception as e: - logger.error('Failed to process audio file') diff --git a/dbm_lib/dbm_features/raw_features/audio/gne.py b/dbm_lib/dbm_features/raw_features/audio/gne.py deleted file mode 100644 index 00a98f81..00000000 --- a/dbm_lib/dbm_features/raw_features/audio/gne.py +++ /dev/null @@ -1,161 +0,0 @@ -""" -file_name: gne -project_name: DBM -created: 2020-20-07 -""" - -import pandas as pd -import numpy as np -import os -import glob -import parselmouth -import librosa -import more_itertools as mit -from os.path import join -import logging - -from dbm_lib.dbm_features.raw_features.util import util as ut - -logging.basicConfig(level=logging.INFO) -logger=logging.getLogger() - -gne_dir = 'acoustic/glottal_noise' -ff_dir = 'acoustic/pitch' -csv_ext = '_gne.csv' - -def gne_ratio(sound): - """ - Using parselmouth library fetching glottal noise excitation ratio - Args: - sound: parselmouth object - Returns: - (list) list of gne ratio for each voice frame - """ - harmonicity_gne = sound.to_harmonicity_gne() - gne_all_bands = harmonicity_gne.values - gne_all_bands = np.where(gne_all_bands==-200, np.NaN, gne_all_bands) - - gne = np.nanmax(gne_all_bands) # following http://www.fon.hum.uva.nl/rob/NKI_TEVA/TEVA/HTML/NKI_TEVA.pdf - return gne - -def empty_gne(video_uri, out_loc, fl_name, r_config, error_txt): - """ - Preparing empty GNE matrix if something fails - """ - cols = ['Frames', r_config.aco_gne, r_config.err_reason] - out_val = [[np.nan, np.nan, error_txt]] - - df_gne = pd.DataFrame(out_val, columns = cols) - df_gne['dbm_master_url'] = video_uri - - logger.info('Saving Output file {} '.format(out_loc)) - ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext) - -def segment_pitch(dir_path, r_config): - """ - segmenting pitch freq for each voice segment - """ - com_speech_sort, voiced_yes, voiced_no = ([], ) * 3 - for file in os.listdir(dir_path): - try: - - if file.endswith('_pitch.csv'): - - ff_df = pd.read_csv((dir_path+'/'+file)) - voice_label = ff_df[r_config.aco_voiceLabel] - - indices_yes = [i for i, x in enumerate(voice_label) if x == "yes"] - voiced_yes = [list(group) for group in mit.consecutive_groups(indices_yes)] - - indices_no = [i for i, x in enumerate(voice_label) if x == "no"] - voiced_no = [list(group) for group in mit.consecutive_groups(indices_no)] - - com_speech = voiced_yes + voiced_no - com_speech_sort = sorted(com_speech, key=lambda x: x[0]) - except: - pass - - return com_speech_sort, voiced_yes, voiced_no - -def segment_gne(com_speech_sort, voiced_yes, voiced_no, gne_all_frames, audio_file): - """ - calculating gne for each voice segment - """ - snd = parselmouth.Sound(audio_file) - pitch = snd.to_pitch(time_step=.001) - - for idx, vs in enumerate(com_speech_sort): - try: - - max_gne = np.NaN - if vs in voiced_yes and len(vs)>1: - - start_time = pitch.get_time_from_frame_number(vs[0]) - end_time = pitch.get_time_from_frame_number(vs[-1]) - - snd_start = int(snd.get_frame_number_from_time(start_time)) - snd_end = int(snd.get_frame_number_from_time(end_time)) - - samples = parselmouth.Sound(snd.as_array()[0][snd_start:snd_end]) - max_gne = gne_ratio(samples) - except: - pass - - gne_all_frames[idx] = max_gne - return gne_all_frames - -def calc_gne(video_uri, audio_file, out_loc, fl_name, r_config): - """ - Preparing gne matrix - Args: - audio_file: (.wav) parsed audio file - out_loc: (str) Output directory for csv's - """ - dir_path = os.path.join(out_loc, ff_dir) - if os.path.isdir(dir_path): - voice_seg = segment_pitch(dir_path, r_config) - - gne_all_frames = [np.NaN] * len(voice_seg[0]) - gne_segment_frames = segment_gne(voice_seg[0], voice_seg[1], voice_seg[2], gne_all_frames, audio_file) - - df_gne = pd.DataFrame(gne_segment_frames, columns=[r_config.aco_gne]) - df_gne[r_config.err_reason] = 'Pass'# will replace with threshold in future release - - df_gne['Frames'] = df_gne.index - df_gne['dbm_master_url'] = video_uri - - logger.info('Processing Output file {} '.format(out_loc)) - ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext) - - else: - error_txt = 'error: pitch freq not available' - empty_gne(video_uri, out_loc, fl_name, r_config, error_txt) - -def run_gne(video_uri, out_dir, r_config): - """ - Processing all patient's for fetching glottal noise ratio - --------------- - --------------- - Args: - video_uri: video path; r_config: raw variable config object - out_dir: (str) Output directory for processed output - """ - try: - - input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) - aud_filter = glob.glob(join(input_loc, fl_name + '.wav')) - if len(aud_filter)>0: - - audio_file = aud_filter[0] - aud_dur = librosa.get_duration(filename=audio_file) - - if float(aud_dur) < 0.064: - logger.info('Output file {} size is less than 0.064sec'.format(audio_file)) - - error_txt = 'error: length less than 0.064' - empty_gne(video_uri, out_loc, fl_name, r_config, error_txt) - return - - calc_gne(video_uri, audio_file, out_loc, fl_name, r_config) - except Exception as e: - logger.error('Failed to process audio file') \ No newline at end of file diff --git a/dbm_lib/dbm_features/raw_features/audio/hnr.py b/dbm_lib/dbm_features/raw_features/audio/hnr.py deleted file mode 100644 index d3dfec72..00000000 --- a/dbm_lib/dbm_features/raw_features/audio/hnr.py +++ /dev/null @@ -1,96 +0,0 @@ -""" -file_name: hnr -project_name: DBM -created: 2020-20-07 -""" - -import pandas as pd -import numpy as np -import os -import glob -import parselmouth -import librosa -from os.path import join -import logging - -from dbm_lib.dbm_features.raw_features.util import util as ut - -logging.basicConfig(level=logging.INFO) -logger=logging.getLogger() - -hnr_dir = 'acoustic/harmonic_noise' -csv_ext = '_hnr.csv' -error_txt = 'error: length less than 0.064' - -def hnr_ratio(filepath): - """ - Using parselmouth library fetching harmonic noise ratio ratio - Args: - path: (.wav) audio file location - Returns: - (list) list of hnr ratio for each voice frame, min,max and mean hnr - """ - sound = parselmouth.Sound(filepath) - harmonicity = sound.to_harmonicity_ac(time_step=.001) - - hnr_all_frames = harmonicity.values#[harmonicity.values != -200] nan it (****) - hnr_all_frames = np.where(hnr_all_frames==-200, np.NaN, hnr_all_frames) - return hnr_all_frames.transpose() - -def calc_hnr(video_uri, audio_file, out_loc, fl_name, r_config): - """ - Preparing harmonic noise matrix - Args: - audio_file: (.wav) parsed audio file - out_loc: (str) Output directory for csv's - """ - - hnr_all_frames = hnr_ratio(audio_file) - df_hnr = pd.DataFrame(hnr_all_frames, columns=[r_config.aco_hnr]) - - df_hnr['Frames'] = df_hnr.index - df_hnr['dbm_master_url'] = video_uri - df_hnr[r_config.err_reason] = 'Pass'# will replace with threshold in future release - - logger.info('Saving Output file {} '.format(out_loc)) - ut.save_output(df_hnr, out_loc, fl_name, hnr_dir, csv_ext) - -def empty_hnr(video_uri, out_loc, fl_name, r_config): - """ - Preparing empty HNR matrix if something fails - """ - cols = ['Frames', r_config.aco_hnr, r_config.err_reason] - out_val = [[np.nan, np.nan, error_txt]] - df_hnr = pd.DataFrame(out_val, columns = cols) - df_hnr['dbm_master_url'] = video_uri - - logger.info('Saving Output file {} '.format(out_loc)) - ut.save_output(df_hnr, out_loc, fl_name, hnr_dir, csv_ext) - -def run_hnr(video_uri, out_dir, r_config): - """ - Processing all patient's for fetching harmonic noise ratio - ------------------- - ------------------- - Args: - video_uri: video path; r_config: raw variable config object - out_dir: (str) Output directory for processed output - """ - try: - - input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) - aud_filter = glob.glob(join(input_loc, fl_name + '.wav')) - if len(aud_filter)>0: - - audio_file = aud_filter[0] - aud_dur = librosa.get_duration(filename=audio_file) - - if float(aud_dur) < 0.064: - logger.info('Output file {} size is less than 0.064sec'.format(audio_file)) - - empty_hnr(video_uri, out_loc, fl_name, r_config) - return - - calc_hnr(video_uri, audio_file, out_loc, fl_name, r_config) - except Exception as e: - logger.error('Failed to process audio file') \ No newline at end of file diff --git a/dbm_lib/dbm_features/raw_features/audio/intensity.py b/dbm_lib/dbm_features/raw_features/audio/intensity.py deleted file mode 100644 index 383267f8..00000000 --- a/dbm_lib/dbm_features/raw_features/audio/intensity.py +++ /dev/null @@ -1,92 +0,0 @@ -""" -file_name: intensity -project_name: DBM -created: 2020-20-07 -""" - -import pandas as pd -import numpy as np -import glob -import parselmouth -import librosa -from os.path import join -import logging - -from dbm_lib.dbm_features.raw_features.util import util as ut - -logging.basicConfig(level=logging.INFO) -logger=logging.getLogger() - -intensity_dir = 'acoustic/intensity' -csv_ext = '_intensity.csv' -error_txt = 'error: length less than 0.064' - -def intensity_score(path): - """ - Using parselmouth library fetching Intensity - Args: - path: (.wav) audio file location - Returns: - (list) list of Intensity for each voice frame - """ - sound_pat = parselmouth.Sound(path) - intensity = sound_pat.to_intensity(time_step=.001) - return intensity.values[0] - -def calc_intensity(video_uri, audio_file, out_loc, fl_name, r_config): - """ - Preparing Intensity matrix - Args: - audio_file: (.wav) parsed audio file - out_loc: (str) Output directory for csv's - """ - - intensity_frames = intensity_score(audio_file) - df_intensity = pd.DataFrame(intensity_frames, columns=[r_config.aco_int]) - - df_intensity['Frames'] = df_intensity.index - df_intensity['dbm_master_url'] = video_uri - df_intensity[r_config.err_reason] = 'Pass'# will replace with threshold in future release - - logger.info('Saving Output file {} '.format(out_loc)) - ut.save_output(df_intensity, out_loc, fl_name, intensity_dir, csv_ext) - -def empty_intensity(video_uri, out_loc, fl_name, r_config): - """ - Preparing empty Intensity matrix if something fails - """ - cols = ['Frames', r_config.aco_int, r_config.err_reason] - out_val = [[np.nan, np.nan, error_txt]] - df_int = pd.DataFrame(out_val, columns = cols) - df_int['dbm_master_url'] = video_uri - - logger.info('Saving Output file {} '.format(out_loc)) - ut.save_output(df_int, out_loc, fl_name, intensity_dir, csv_ext) - -def run_intensity(video_uri, out_dir, r_config): - """ - Processing all patient's for fetching Intensity - ------------------- - ------------------- - Args: - video_uri: video path; r_config: raw variable config object - out_dir: (str) Output directory for processed output - """ - try: - - input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) - aud_filter = glob.glob(join(input_loc, fl_name + '.wav')) - if len(aud_filter)>0: - - audio_file = aud_filter[0] - aud_dur = librosa.get_duration(filename=audio_file) - - if float(aud_dur) < 0.064: - logger.info('Output file {} size is less than 0.064sec'.format(audio_file)) - - empty_intensity(video_uri, out_loc, fl_name, r_config) - return - - calc_intensity(video_uri, audio_file, out_loc, fl_name, r_config) - except Exception as e: - logger.error('Failed to process audio file') \ No newline at end of file diff --git a/dbm_lib/dbm_features/raw_features/audio/jitter.py b/dbm_lib/dbm_features/raw_features/audio/jitter.py deleted file mode 100644 index 443e15ae..00000000 --- a/dbm_lib/dbm_features/raw_features/audio/jitter.py +++ /dev/null @@ -1,159 +0,0 @@ -""" -file_name: jitter_processing -project_name: DBM -created: 2020-20-07 -""" - -import pandas as pd -import numpy as np -import os -import glob -import parselmouth -import librosa -import numpy as np -import more_itertools as mit -from os.path import join -import logging - -from dbm_lib.dbm_features.raw_features.util import util as ut - -logging.basicConfig(level=logging.INFO) -logger=logging.getLogger() - -jitter_dir = 'acoustic/jitter' -ff_dir = 'acoustic/pitch' -csv_ext = '_jitter.csv' - -def audio_jitter(sound): - """ - Using parselmouth library fetching jitter - Args: - sound: parselmouth object - Returns: - (list) list of jitters for each voice frame - """ - pointProcess = parselmouth.praat.call(sound, "To PointProcess (periodic, cc)...", 80, 500) - jitter = parselmouth.praat.call(pointProcess, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3) - return jitter - -def empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt): - """ - Preparing empty jitter matrix if something fails - """ - cols = ['Frames', r_config.aco_jitter, r_config.err_reason] - out_val = [[np.nan, np.nan, error_txt]] - df_jitter = pd.DataFrame(out_val, columns = cols) - df_jitter['dbm_master_url'] = video_uri - - logger.info('Saving Output file {} '.format(out_loc)) - ut.save_output(df_jitter, out_loc, fl_name, jitter_dir, csv_ext) - -def segment_pitch(dir_path, r_config): - """ - segmenting pitch freq for each voice segment - """ - com_speech_sort, voiced_yes, voiced_no = ([], ) * 3 - for file in os.listdir(dir_path): - try: - - if file.endswith('_pitch.csv'): - - ff_df = pd.read_csv((dir_path+'/'+file)) - voice_label = ff_df[r_config.aco_voiceLabel] - - indices_yes = [i for i, x in enumerate(voice_label) if x == "yes"] - voiced_yes = [list(group) for group in mit.consecutive_groups(indices_yes)] - - indices_no = [i for i, x in enumerate(voice_label) if x == "no"] - voiced_no = [list(group) for group in mit.consecutive_groups(indices_no)] - - com_speech = voiced_yes + voiced_no - com_speech_sort = sorted(com_speech, key=lambda x: x[0]) - except: - pass - - return com_speech_sort, voiced_yes, voiced_no - -def segment_jitter(com_speech_sort, voiced_yes, voiced_no, jitter_frames, audio_file): - """ - calculating jitter for each voice segment - """ - snd = parselmouth.Sound(audio_file) - pitch = snd.to_pitch(time_step=.001) - - for idx, vs in enumerate(com_speech_sort): - try: - - jitter = np.NaN - if vs in voiced_yes and len(vs)>1: - - start_time = pitch.get_time_from_frame_number(vs[0]) - end_time = pitch.get_time_from_frame_number(vs[-1]) - - snd_start = int(snd.get_frame_number_from_time(start_time)) - snd_end = int(snd.get_frame_number_from_time(end_time)) - - samples = parselmouth.Sound(snd.as_array()[0][snd_start:snd_end]) - jitter = audio_jitter(samples) - except: - pass - - jitter_frames[idx] = jitter - return jitter_frames - -def calc_jitter(video_uri, audio_file, out_loc, fl_name, r_config): - """ - Preparing jitter matrix - Args: - audio_file: (.wav) parsed audio file - out_loc: (str) Output directory for csv - r_config: config.config_raw_feature.pyConfigFeatureNmReader object - """ - dir_path = os.path.join(out_loc, ff_dir) - if os.path.isdir(dir_path): - voice_seg = segment_pitch(dir_path, r_config) - - jitter_frames = [np.NaN] * len(voice_seg[0]) - jitter_segment_frames = segment_jitter(voice_seg[0], voice_seg[1], voice_seg[2], jitter_frames, audio_file) - - df_jitter = pd.DataFrame(jitter_segment_frames, columns=[r_config.aco_jitter]) - df_jitter[r_config.err_reason] = 'Pass'# will replace with threshold in future release - - df_jitter['Frames'] = df_jitter.index - df_jitter['dbm_master_url'] = video_uri - - logger.info('Processing Output file {} '.format(out_loc)) - ut.save_output(df_jitter, out_loc, fl_name, jitter_dir, csv_ext) - - else: - error_txt = 'error: fundamental freq not available' - empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt) - -def run_jitter(video_uri, out_dir, r_config): - """ - Processing all patient's videos for fetching jitter - ------------------- - ------------------- - Args: - video_uri: video path; r_config: raw variable config object - out_dir: (str) Output directory for processed output - """ - try: - - input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) - aud_filter = glob.glob(join(input_loc, fl_name + '.wav')) - if len(aud_filter)>0: - - audio_file = aud_filter[0] - aud_dur = librosa.get_duration(filename=audio_file) - - if float(aud_dur) < 0.064: - logger.info('Output file {} size is less than 0.064sec'.format(audio_file)) - - error_txt = 'error: length less than 0.064' - empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt) - return - - calc_jitter(video_uri, audio_file, out_loc, fl_name, r_config) - except Exception as e: - logger.error('Failed to process audio file') \ No newline at end of file diff --git a/dbm_lib/dbm_features/raw_features/audio/mfcc.py b/dbm_lib/dbm_features/raw_features/audio/mfcc.py deleted file mode 100644 index 505b3205..00000000 --- a/dbm_lib/dbm_features/raw_features/audio/mfcc.py +++ /dev/null @@ -1,105 +0,0 @@ -""" -file_name: mfcc -project_name: DBM -created: 2020-20-07 -""" - -import pandas as pd -import os -import glob -import parselmouth -import librosa -import numpy as np -import librosa -from os.path import join -import logging - -from dbm_lib.dbm_features.raw_features.util import util as ut - -logging.basicConfig(level=logging.INFO) -logger=logging.getLogger() - -mfcc_dir = 'acoustic/mfcc' -csv_ext = '_mfcc.csv' -error_txt = 'error: length less than 0.064' - -def empty_mfcc(video_uri, out_loc, fl_name, r_config): - - """ - Preparing empty empty_mfcc matrix if something fails - """ - cols = ['Frames', r_config.aco_mfcc1, r_config.aco_mfcc2, r_config.aco_mfcc3, r_config.aco_mfcc4, r_config.aco_mfcc5, - r_config.aco_mfcc6, r_config.aco_mfcc7, r_config.aco_mfcc8, r_config.aco_mfcc9, r_config.aco_mfcc10, - r_config.aco_mfcc11, r_config.aco_mfcc12, r_config.err_reason] - out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, - error_txt]] - df_mfcc = pd.DataFrame(out_val, columns = cols) - df_mfcc['dbm_master_url'] = video_uri - - logger.info('Saving Output file {} '.format(out_loc)) - ut.save_output(df_mfcc, out_loc, fl_name, mfcc_dir, csv_ext) - -def audio_mfcc(path): - """ - Using parselmouth library fetching mfccs - Args: - path: (.wav) audio file location - Returns: - (list) list of mfccs for each voice frame - """ - sound = parselmouth.Sound(path) - mfcc_object = sound.to_mfcc(time_step=.001,number_of_coefficients=12) - mfccs = mfcc_object.to_array() - mfccs = np.delete(mfccs, (0), axis=0) - return mfccs - -def calc_mfcc(video_uri, audio_file, out_loc, fl_name, r_config): - """ - Preparing mfcc matrix - Args: - audio_file: (.wav) parsed audio file - out_loc: output location to save csv - fl_name: (str) name of audio file - r_config: config.config_raw_feature.pyConfigFeatureNmReader object - """ - dict_ = {} - mfccs = audio_mfcc(audio_file) - - for i in range(1,13): - conf_str = r_config.base_raw['raw_feature'] - dict_[conf_str['aco_mfcc' + str(i)]] = mfccs[i-1, :] - - df = pd.DataFrame(dict_) - df['Frames'] = df.index - - df[r_config.err_reason] = 'Pass'# may replace based on threshold in future release - df['dbm_master_url'] = video_uri - - ut.save_output(df, out_loc, fl_name, mfcc_dir, csv_ext) - -def run_mfcc(video_uri, out_dir, r_config): - """ - Processing all patients to fetch mfccs - - Args: - video_uri: video path; r_config: raw variable config object - out_dir: (str) Output directory for processed output - """ - try: - - input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) - aud_filter = glob.glob(join(input_loc, fl_name + '.wav')) - if len(aud_filter)>0: - - audio_file = aud_filter[0] - aud_dur = librosa.get_duration(filename=audio_file) - - if float(aud_dur) < 0.064: - logger.info('Output file {} size is less than 0.064sec'.format(audio_file)) - - empty_mfcc(video_uri, out_loc, fl_name, r_config) - return - - calc_mfcc(video_uri, audio_file, out_loc, fl_name, r_config) - except Exception as e: - logger.error('Failed to process audio file') \ No newline at end of file diff --git a/dbm_lib/dbm_features/raw_features/audio/pitch_freq.py b/dbm_lib/dbm_features/raw_features/audio/pitch_freq.py deleted file mode 100644 index 66f35718..00000000 --- a/dbm_lib/dbm_features/raw_features/audio/pitch_freq.py +++ /dev/null @@ -1,113 +0,0 @@ -""" -file_name: pitch_freq -project_name: DBM -created: 2020-20-07 -""" - -import pandas as pd -import os -import glob -import parselmouth -import librosa -import numpy as np -from os.path import join -import logging - -from dbm_lib.dbm_features.raw_features.util import util as ut - -logging.basicConfig(level=logging.INFO) -logger=logging.getLogger() - -ff_dir = 'acoustic/pitch' -csv_ext = '_pitch.csv' -error_txt = 'error: length less than 0.064' - -def audio_pitch(path): - """ - Using parselmouth library fetching pitch/fundamental frequency - Args: - path: (.wav) audio file location - Returns: - (list) list of pitch/fundamental frequency for each voice frame - """ - sound_pat = parselmouth.Sound(path) - pitch = sound_pat.to_pitch(time_step=.001) - pitch_values = pitch.selected_array['frequency'] - - return list(pitch_values) - -def label_speech(row,fd_freq): - """ - identify whether frame is voiced or not - Args: - row: (item) pitch frequency value - Returns: - (str) yes or no indicator for voice - """ - if row[fd_freq] > 0 : - return 'yes' - else: - return 'no' - -def calc_pitch(video_uri, audio_file, out_loc, fl_name, r_config): - - """ - Preparing pitch frequency matrix - Args: - audio_file: (.wav) parsed audio file - row: (dataframe) subject details from master csv - new_out_base_dir: (str) Output directory for csv - """ - - ff_frames = audio_pitch(audio_file) - df_ffreq = pd.DataFrame(ff_frames, columns=[r_config.aco_ff]) - - df_ffreq['Frames'] = df_ffreq.index - df_ffreq[r_config.aco_voiceLabel] = df_ffreq.apply(lambda row: label_speech(row, r_config.aco_ff),axis=1) - - df_ffreq[r_config.err_reason] = 'Pass'# will replace with threshold in future release - df_ffreq['dbm_master_url'] = video_uri - - logger.info('Processing Output file {} '.format(out_loc)) - ut.save_output(df_ffreq, out_loc, fl_name, ff_dir, csv_ext) - -def empty_pitch(video_uri, out_loc, fl_name, r_config): - """ - Preparing empty pitch frequency matrix if something fails - """ - - df_ffreq = pd.DataFrame([[np.nan, np.nan, 'no', error_txt]], - columns=['Frames', r_config.aco_ff, r_config.aco_voiceLabel, r_config.err_reason]) - df_ffreq['dbm_master_url'] = video_uri - - logger.info('Saving Output file {} '.format(out_loc)) - ut.save_output(df_ffreq, out_loc, fl_name, ff_dir, csv_ext) - -def run_pitch(video_uri, out_dir, r_config): - - """ - Processing audio for fetching pitch - ------------------- - ------------------- - Args: - video_uri: video path; r_config: raw variable config object - out_dir: (str) Output directory for processed output - """ - try: - - input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) - aud_filter = glob.glob(join(input_loc, fl_name + '.wav')) - if len(aud_filter)>0: - - audio_file = aud_filter[0] - aud_dur = librosa.get_duration(filename=audio_file) - - if float(aud_dur) < 0.064: - logger.info('Output file {} size is less than 0.064sec'.format(audio_file)) - - empty_pitch(video_uri, out_loc, fl_name, r_config) - return - - calc_pitch(video_uri, audio_file, out_loc, fl_name, r_config) - except Exception as e: - logger.error('Failed to process audio file') \ No newline at end of file diff --git a/dbm_lib/dbm_features/raw_features/audio/shimmer.py b/dbm_lib/dbm_features/raw_features/audio/shimmer.py deleted file mode 100644 index c33f163b..00000000 --- a/dbm_lib/dbm_features/raw_features/audio/shimmer.py +++ /dev/null @@ -1,160 +0,0 @@ -""" -file_name: shimmer_processing -project_name: DBM -created: 2020-20-07 -""" - -import pandas as pd -import numpy as np -import os -import glob -import parselmouth -import librosa -import numpy as np -import more_itertools as mit -from os.path import join - -import logging - -from dbm_lib.dbm_features.raw_features.util import util as ut - -logging.basicConfig(level=logging.INFO) -logger=logging.getLogger() - -shimmer_dir = 'acoustic/shimmer' -ff_dir = 'acoustic/pitch' -csv_ext = '_shimmer.csv' - -def audio_shimmer(sound): - """ - Using parselmouth library fetching shimmer - Args: - sound: parselmouth object - Returns: - (list) list of shimmers for each voice frame - """ - pointProcess = parselmouth.praat.call(sound, "To PointProcess (periodic, cc)...", 80, 500) - shimmer = parselmouth.praat.call([sound, pointProcess], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6) - return shimmer - -def empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt): - """ - Preparing empty shimmer matrix if something fails - """ - cols = ['Frames', r_config.aco_shimmer, r_config.err_reason] - out_val = [[np.nan, np.nan, error_txt]] - df_shimmer = pd.DataFrame(out_val, columns = cols) - df_shimmer['dbm_master_url'] = video_uri - - logger.info('Saving Output file {} '.format(out_loc)) - ut.save_output(df_shimmer, out_loc, fl_name, shimmer_dir, csv_ext) - -def segment_pitch(dir_path, r_config): - """ - segmenting pitch freq for each voice segment - """ - com_speech_sort, voiced_yes, voiced_no = ([], ) * 3 - for file in os.listdir(dir_path): - try: - - if file.endswith('_pitch.csv'): - - ff_df = pd.read_csv((dir_path+'/'+file)) - voice_label = ff_df[r_config.aco_voiceLabel] - - indices_yes = [i for i, x in enumerate(voice_label) if x == "yes"] - voiced_yes = [list(group) for group in mit.consecutive_groups(indices_yes)] - - indices_no = [i for i, x in enumerate(voice_label) if x == "no"] - voiced_no = [list(group) for group in mit.consecutive_groups(indices_no)] - - com_speech = voiced_yes + voiced_no - com_speech_sort = sorted(com_speech, key=lambda x: x[0]) - except: - pass - - return com_speech_sort, voiced_yes, voiced_no - -def segment_shimmer(com_speech_sort, voiced_yes, voiced_no, shimmer_frames, audio_file): - """ - calculating shimmer for each voice segment - """ - snd = parselmouth.Sound(audio_file) - pitch = snd.to_pitch(time_step=.001) - - for idx, vs in enumerate(com_speech_sort): - try: - - shimmer = np.NaN - if vs in voiced_yes and len(vs)>1: - - start_time = pitch.get_time_from_frame_number(vs[0]) - end_time = pitch.get_time_from_frame_number(vs[-1]) - - snd_start = int(snd.get_frame_number_from_time(start_time)) - snd_end = int(snd.get_frame_number_from_time(end_time)) - - samples = parselmouth.Sound(snd.as_array()[0][snd_start:snd_end]) - shimmer = audio_shimmer(samples) - except: - pass - - shimmer_frames[idx] = shimmer - return shimmer_frames - -def calc_shimmer(video_uri, audio_file, out_loc, fl_name, r_config): - """ - Preparing shimmer matrix - Args: - audio_file: (.wav) parsed audio file - out_loc: (str) Output directory for csv - r_config: config.config_raw_feature.pyConfigFeatureNmReader object - """ - dir_path = os.path.join(out_loc, ff_dir) - if os.path.isdir(dir_path): - voice_seg = segment_pitch(dir_path, r_config) - - shimmer_frames = [np.NaN] * len(voice_seg[0]) - shimmer_segment_frames = segment_shimmer(voice_seg[0], voice_seg[1], voice_seg[2], shimmer_frames, audio_file) - - df_shimmer = pd.DataFrame(shimmer_segment_frames, columns=[r_config.aco_shimmer]) - df_shimmer[r_config.err_reason] = 'Pass'# will replace with threshold in future release - - df_shimmer['Frames'] = df_shimmer.index - df_shimmer['dbm_master_url'] = video_uri - - logger.info('Processing Output file {} '.format(out_loc)) - ut.save_output(df_shimmer, out_loc, fl_name, shimmer_dir, csv_ext) - - else: - error_txt = 'error: fundamental freq not available' - empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt) - -def run_shimmer(video_uri, out_dir, r_config): - """ - Processing all patients to fetch shimmer - --------------- - --------------- - Args: - video_uri: video path; r_config: raw variable config object - out_dir: (str) Output directory for processed output - """ - try: - - input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) - aud_filter = glob.glob(join(input_loc, fl_name + '.wav')) - if len(aud_filter)>0: - - audio_file = aud_filter[0] - aud_dur = librosa.get_duration(filename=audio_file) - - if float(aud_dur) < 0.064: - logger.info('Output file {} size is less than 0.064sec'.format(audio_file)) - - error_txt = 'error: length less than 0.064' - empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt) - return - - calc_shimmer(video_uri, audio_file, out_loc, fl_name, r_config) - except Exception as e: - logger.error('Failed to process audio file') \ No newline at end of file diff --git a/dbm_lib/dbm_features/raw_features/audio/voice_frame_score.py b/dbm_lib/dbm_features/raw_features/audio/voice_frame_score.py deleted file mode 100644 index 1d8d0bb3..00000000 --- a/dbm_lib/dbm_features/raw_features/audio/voice_frame_score.py +++ /dev/null @@ -1,111 +0,0 @@ -""" -file_name: voice_frame_score -project_name: DBM -created: 2020-20-07 -""" - -import parselmouth -import pandas as pd -import numpy as np -import glob -import librosa -from os.path import join -import logging - -from dbm_lib.dbm_features.raw_features.util import util as ut - -logging.basicConfig(level=logging.INFO) -logger=logging.getLogger() - -vfs_dir = 'acoustic/voice_frame_score' -csv_ext = '_voiceprev.csv' -error_txt = 'error: length less than 0.064' - -def audio_pitch_frame(pitch): - """ - Computing total number of speech and participant voiced frames - Args: - pitch: speech pitch - Returns: - (float) total voice frames and participant voiced frames - """ - total_frames = pitch.get_number_of_frames() - voiced_frames = pitch.count_voiced_frames() - return total_frames, voiced_frames - -def voice_segment(path): - """ - Using parselmouth library for fundamental frequency - Args: - path: (.wav) audio file location - Returns: - (float) total voice frames, participant voiced frames and voiced frames percentage - """ - sound_pat = parselmouth.Sound(path) - pitch = sound_pat.to_pitch() - total_frames,voiced_frames = audio_pitch_frame(pitch) - - voiced_percentage = (voiced_frames/total_frames)*100 - return voiced_percentage, voiced_frames, total_frames - -def calc_vfs(video_uri, audio_file, out_loc, fl_name, r_config): - """ - creating dataframe matrix for voice frame score - Args: - audio_file: Audio file path - new_out_base_dir: AWS instance output base directory path - f_nm_config: Config file object - """ - - voice_percentage,voiced_frames, total_frames = voice_segment(audio_file) - df_vfs = pd.DataFrame([voiced_frames], columns=[r_config.aco_voiceFrame]) - - df_vfs[r_config.aco_totVoiceFrame] = [total_frames] - df_vfs[r_config.aco_voicePct] = [voice_percentage] - df_vfs[r_config.err_reason] = 'Pass'# will replace with threshold in future release - - df_vfs['Frames'] = df_vfs.index - df_vfs['dbm_master_url'] = video_uri - - logger.info('Saving Output file {} '.format(out_loc)) - ut.save_output(df_vfs, out_loc, fl_name, vfs_dir, csv_ext) - -def empty_vfs(video_uri, out_loc, fl_name, r_config): - """ - Preparing empty VFS matrix if something fails - """ - cols = ['Frames', r_config.aco_voiceFrame, r_config.aco_totVoiceFrame, r_config.aco_voicePct, r_config.err_reason] - out_val = [[np.nan, np.nan, np.nan, np.nan, error_txt]] - df_vfs = pd.DataFrame(out_val, columns = cols) - df_vfs['dbm_master_url'] = video_uri - - logger.info('Saving Output file {} '.format(out_loc)) - ut.save_output(df_vfs, out_loc, fl_name, vfs_dir, csv_ext) - -def run_vfs(video_uri, out_dir, r_config): - """ - Processing all participants for fetching voice frame score - --------------- - --------------- - Args: - video_uri: video path; r_config: raw variable config object - out_dir: (str) Output directory for processed output - """ - try: - - input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) - aud_filter = glob.glob(join(input_loc, fl_name + '.wav')) - if len(aud_filter)>0: - - audio_file = aud_filter[0] - aud_dur = librosa.get_duration(filename=audio_file) - - if float(aud_dur) < 0.064: - logger.info('Output file {} size is less than 0.064sec'.format(audio_file)) - - empty_vfs(video_uri, out_loc, fl_name, r_config) - return - - calc_vfs(video_uri, audio_file, out_loc, fl_name, r_config) - except Exception as e: - logger.error('Failed to process audio file') \ No newline at end of file diff --git a/dbm_lib/dbm_features/raw_features/movement/__init__.py b/dbm_lib/dbm_features/raw_features/movement/__init__.py deleted file mode 100644 index 2ed5b79a..00000000 --- a/dbm_lib/dbm_features/raw_features/movement/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -""" -file_name: init -project_name: DBM -created: 2020-20-07 -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -DBMLIB_PATH = os.path.dirname(__file__) -DBMLIB_VTREMOR_LIB = os.path.abspath(os.path.join(DBMLIB_PATH, - '../../../../resources/libraries/voice_tremor.praat')) -DBMLIB_FTREMOR_CONFIG = os.path.abspath(os.path.join(DBMLIB_PATH, '../../../../resources/features/facial/config.json')) - diff --git a/dbm_lib/dbm_features/raw_features/movement/eye_blink.py b/dbm_lib/dbm_features/raw_features/movement/eye_blink.py deleted file mode 100644 index 297bb06f..00000000 --- a/dbm_lib/dbm_features/raw_features/movement/eye_blink.py +++ /dev/null @@ -1,160 +0,0 @@ -""" -file_name: eye_blink -project_name: DBM -created: 2020-20-07 -""" - -import os -import glob -from scipy.spatial import distance as dist -from scipy.signal import find_peaks -from imutils.video import FileVideoStream -from imutils.video import VideoStream -from imutils import face_utils -from moviepy.editor import VideoFileClip -import numpy as np -import pandas as pd -import imutils -import time -import dlib -import cv2 -import logging - -from dbm_lib.dbm_features.raw_features.util import util as ut - -logging.basicConfig(level=logging.INFO) -logger=logging.getLogger() - -movement_expr_dir = 'movement/eye_blink' -csv_ext = '_eyeblinks.csv' - -def eye_aspect_ratio(eye): - """ - Computing eye aspect ratio for an individual frame - Args: - eye: Eye landmarks - Return: - Eye aspect ratio for a frame - """ - # euclidean distance for vertical eye landmarks - dist_cor1 = dist.euclidean(eye[1], eye[5]) - dist_cor2 = dist.euclidean(eye[2], eye[4]) - - # euclidean distance for horizontal eye landmark - dist_cor3 = dist.euclidean(eye[0], eye[3]) - - ear = (dist_cor1 + dist_cor2) / (2.0 * dist_cor3) - return ear - -def blink_detection(video_path,facial_landmarks,raw_config): - """ - Blink detection for each frame - Args: - video_path: MP4 file location - facial_landmarks: Facial landmark pre-trained model path - raw_config: Raw configuration file object - Return: - Dataframe with blink informatiom like blink frame, duration etc. - """ - TOT_FRAME = 1 - blink_frame = [] - ear_frame = [] - - clip = VideoFileClip(video_path, has_mask=True) - vid_length = clip.duration - - identifier = dlib.get_frontal_face_detector() #dlib's face detector (HOG-based) - forecaster = dlib.shape_predictor(facial_landmarks) # the facial landmark predictor - - #left and right eye landmarks - (left_beg, left_end) = face_utils.FACIAL_LANDMARKS_IDXS["left_eye"] - (right_beg, right_end) = face_utils.FACIAL_LANDMARKS_IDXS["right_eye"] - - f_stream = True - vid_stream = FileVideoStream(video_path).start() - - while True: - try: - #check if stream/frame available in video - if f_stream and not vid_stream.more(): - break - - #reading & converting frame into grayscale - vid_frame = vid_stream.read() - vid_frame = imutils.resize(vid_frame, width=450) - gray = cv2.cvtColor(vid_frame, cv2.COLOR_BGR2GRAY) - - #detecting face - rects = identifier(gray, 0) - for rect in rects: - - lmk = forecaster(gray, rect) - lmk = face_utils.shape_to_np(lmk) - - l_eye = lmk[left_beg:left_end] #Extracting left eye ratio - r_eye = lmk[right_beg:right_end] #Extracting right eye ratio - l_ear = eye_aspect_ratio(l_eye) # eye aspect ratio for left eye - r_ear = eye_aspect_ratio(r_eye) # eye aspect ratio for right eye - - ear = (l_ear + r_ear) / 2.0 # average the eye aspect ratio - blink_frame.append(TOT_FRAME) - ear_frame.append(ear) - - TOT_FRAME += 1 - except Exception as e: - #logger.error("blink detection processing failed for: {}".format(video_path)) - continue - - blink_df = pd.DataFrame(ear_frame, columns =[raw_config.mov_blink_ear]) - blink_df[raw_config.vid_dur] = vid_length - blink_df[raw_config.fps] = int(TOT_FRAME/vid_length) - blink_df[raw_config.mov_blinkframes] = blink_frame - - peaks, _ = find_peaks(blink_df[raw_config.mov_blink_ear]*-1, prominence=0.1)#prominence = 0.1 based on tuning - final_blink_df = blink_df.iloc[peaks,:].reset_index(drop=True) - - u_blink_df = blink_dur(final_blink_df,raw_config) - u_blink_df['dbm_master_url'] = video_path - return u_blink_df - -def blink_dur(blink_df,raw_config): - """ - Computing blink duration between each blink - Args: - blink_df : Dataframe with blink informatiom like blink frame - raw_config: Raw configuration file object - Returns: - Updated dataframe with blink duration - """ - dur_list = [] - if len(blink_df)>0: - blink_df[raw_config.mov_blinkdur] = blink_df[raw_config.mov_blinkframes].diff().fillna( - blink_df[raw_config.mov_blinkframes]) - else: - blink_df[raw_config.mov_blinkdur] = np.nan - blink_df[raw_config.mov_blinkdur] = blink_df[raw_config.mov_blinkdur]/blink_df[raw_config.fps] - return blink_df - -def run_eye_blink(video_uri, out_dir, r_config, facial_landmarks): - """ - Processing all patient's for getting eye blink artifacts - --------------- - --------------- - Args: - video_uri: video path; input_dir : input directory for video's - out_dir: (str) Output directory for processed output; r_config: raw variable config object; - facial_landmarks: landmark model path - """ - try: - - input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) - vid_file_path = os.path.exists(video_uri) - if vid_file_path==True: - - logger.info('Processing Output file {} '.format(os.path.join(out_loc, fl_name))) - df_blink = blink_detection(video_uri, facial_landmarks, r_config) - ut.save_output(df_blink, out_loc, fl_name, movement_expr_dir, csv_ext) - - except Exception as e: - logger.error('Failed to process video file') - \ No newline at end of file diff --git a/dbm_lib/dbm_features/raw_features/movement/eye_gaze.py b/dbm_lib/dbm_features/raw_features/movement/eye_gaze.py deleted file mode 100644 index c52893db..00000000 --- a/dbm_lib/dbm_features/raw_features/movement/eye_gaze.py +++ /dev/null @@ -1,148 +0,0 @@ -""" -file_name: eye_gaze -project_name: DBM -created: 2020-30-11 -""" - -import os -import glob -import pandas as pd -import numpy as np -from scipy.spatial import distance -from os.path import join -import logging - -from dbm_lib.dbm_features.raw_features.util import util as ut - -logging.basicConfig(level=logging.INFO) -logger=logging.getLogger() - -eye_pose_dir = 'movement/gaze' -eye_pose_ext = '_eyegaze.csv' - -def eye_motion_df(l_disp, r_disp, error_list, r_config): - """ - Generating eye movement dataframe - - Args: - l_disp: displacement list(left eye); l_disp: displacement list(right eye) - r_config: raw variable config file object - - Reutrns: - Final eye displacement dataframe - """ - df_eye_left = pd.DataFrame(l_disp, columns=[r_config.mov_eleft_disp]) - df_eye_right = pd.DataFrame(r_disp, columns=[r_config.mov_eright_disp]) - - df_eye_motion = pd.concat([df_eye_left, df_eye_right], axis=1, sort=False) - df_eye_motion[r_config.err_reason] = error_list - return df_eye_motion - -def filter_motion(df_of, df_disp, col_l, col_r, r_config): - """ - Filtering final eye movement dataframe - - Args: - df_of: Openface raw out dataframe; col_r: right eye column - col_l: left eye column; r_config: raw variable config file object - """ - - df_of = df_of[col_l + col_r + [' confidence']] - df_of.loc[(df_of[' confidence'].astype(float) < 0.8), col_l + col_r] = np.nan - - df_filter = df_of[col_l + col_r] - df_filter.columns = [r_config.mov_leye_x, r_config.mov_leye_y, r_config.mov_leye_z, - r_config.mov_reye_x, r_config.mov_reye_y, r_config.mov_reye_z] - - df_motion = pd.concat([df_filter, df_disp], axis=1, sort=False) - return df_motion - -def eye_disp(of_results, col, r_config): - """ - Computing head velocity frame by frame - - Args: - of_results: Openface raw out dataframe - r_config: Face config file object - - Reutrns: - Final head velocity frame by frame output - """ - distance_list = [] - error_list = [] - - of_results = of_results[col+ [' confidence']] - for index, row in of_results.iterrows(): - dst = np.nan - - if index == 0 or float(row[' confidence']) < 0.8: #Threshold < 0.8 - distance_list.append(dst) - - if float(row[' confidence']) < 0.8: - error_list.append('confidence less than 80%') - - else: - error_list.append('Pass') - continue - - if index > 0: - - point_x = (of_results[col[0]][index-1], of_results[col[1]][index-1], of_results[col[2]][index-1]) - point_y = (row[col[0]],row[col[1]],row[col[2]]) - try: - dst = distance.euclidean(point_x, point_y) - except: - pass - - distance_list.append(abs(dst)) - error_list.append('Pass') - - return distance_list, error_list - -def calc_eye_mov(video_uri, df_of, out_loc, fl_name, r_config): - """ - Computing eye motion variables - Args: - df_of: Openface dataframe - out_loc: Output path for saving output csv's - fl_name: file name for output csv - r_config: raw variable config file object - - """ - - col_l = [ ' gaze_0_x', ' gaze_0_y', ' gaze_0_z'] - col_r = [ ' gaze_1_x', ' gaze_1_y', ' gaze_1_z'] - - gazel_disp, err_l = eye_disp(df_of, col_l, r_config) - gazer_disp, err_r = eye_disp(df_of, col_r, r_config) - - df_disp = eye_motion_df(gazel_disp, gazer_disp, err_l, r_config) - df_disp['dbm_master_url'] = video_uri - - df_motion = filter_motion(df_of, df_disp, col_l, col_r, r_config) - ut.save_output(df_motion, out_loc, fl_name, eye_pose_dir, eye_pose_ext) - -def run_eye_gaze(video_uri, out_dir, r_config): - """ - Processing all patient's for getting eye movement artifacts - -------------------------------- - -------------------------------- - Args: - video_uri: video path; input_dir : input directory for video's - out_dir: (str) Output directory for processed output; r_config: raw variable config object - """ - try: - - #filtering path to generate input & output path - input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) - of_csv_path = glob.glob(join(out_loc, fl_name + '_openface/*.csv')) - - if len(of_csv_path)>0: - - of_csv = of_csv_path[0] - df_of = pd.read_csv(of_csv, error_bad_lines=False) - - logger.info('Processing Output file {} '.format(os.path.join(out_loc, fl_name))) - calc_eye_mov(video_uri, df_of, out_loc, fl_name, r_config) - except Exception as e: - logger.error('Failed to process video file') \ No newline at end of file diff --git a/dbm_lib/dbm_features/raw_features/movement/facial_tremor.py b/dbm_lib/dbm_features/raw_features/movement/facial_tremor.py deleted file mode 100644 index 929cc19f..00000000 --- a/dbm_lib/dbm_features/raw_features/movement/facial_tremor.py +++ /dev/null @@ -1,164 +0,0 @@ -import sys, os, glob, cv2, re -import pickle, json -import pandas as pd -import numpy as np -import numpy.ma as ma -import logging -from os.path import join - -from dbm_lib.dbm_features.raw_features.util import util as ut -from dbm_lib.dbm_features.raw_features.util.math_util import * - -from dbm_lib.dbm_features.raw_features.movement import DBMLIB_FTREMOR_CONFIG - -logging.basicConfig(level=logging.INFO) -logger=logging.getLogger() - -ft_dir = 'movement/facial_tremor' -csv_ext = '_fac_tremor.csv' -model_ext = '_fac_model.csv' -fac_features_ext = '_fac_features.csv' - -def compute_features(out_dir, df_of, r_config): - """ Computes features - - Returns: features in vector format - """ - config = json.loads(open(DBMLIB_FTREMOR_CONFIG,'r').read()) - - pattern_x = re.compile("l\d+_x") - pattern_y = re.compile("l\d+_y") - - # assumption: distance of face to camera remains at roughly static - - # logic break - landmark_columns = [] - for col in df_of.columns: - if pattern_x.match(col) or pattern_y.match(col): - landmark_columns.append(col) - - df_of= df_of[(df_of[landmark_columns]!= 0).any(axis=1)] - df_of.reset_index(inplace=True) - - num_frames = len(df_of) - logger.info("Number of frames to be processed: {}".format(str(num_frames))) - landmarks = config['landmarks'] - - try: - if num_frames == 0: - error_reason = "No frames with visible face." - logger.error(error_reason) - return empty_frame(landmarks, r_config, error_reason) - -# if num_frames < 60: -# error_reason = 'Number of frames with visible face < 60. Video too short' -# logger.error(error_reason) -# return empty_frame(landmarks, f_cfg, error_reason) - - first_row = df_of.iloc[0] - - facew = abs(first_row[config['face_width_left']] - first_row[config['face_width_right']]) - faceh = abs(first_row[config['face_height_left']] - first_row[config['face_height_right']]) - - if facew == 0 or faceh == 0: - error_reason = 'face width or height = 0. Check landmark values' - logger.error(error_reason) - return empty_frame(landmarks, r_config) - - fac_disp = calc_displacement_vec(df_of, landmarks, num_frames) - - # if verbose: - # logger.info("Displacement output: {}".format(str(fac_disp))) - - fac_disp_median = np.median(fac_disp, axis = 1) - fac_disp_mean = np.mean(fac_disp, axis = 1) - - if len(fac_disp.shape)!=2: - error_reason = 'fac_disp is not 2D. smth went wrong with disp calc' - logger.error(error_reason) - return empty_frame(landmarks, r_config, error_reason) - - if len(fac_disp[0])<=1: - error_reason = 'Video too short. smth went wrong with disp calc' - logger.error(error_reason) - return empty_frame(landmarks, r_config, error_reason) - - fac_corr_mat = np.corrcoef(fac_disp, rowvar = True) - # extract relevant row from cov matrix - ref_lmk_index = [i for i, lmk in enumerate(landmarks) if config['ref_lmk']==lmk] - fac_corr = fac_corr_mat[ref_lmk_index][0] - - fac_area = config['ref_area'] / (facew * faceh) - - # if verbose: - # logger.info("Face area: {}".format(fac_area)) - # logger.info("Face Displacement Median: {}".format(str(fac_disp_median))) - # logger.info("Face Displacement Mean: {}".format(str(fac_disp_mean))) - - fac_features1 = np.multiply(fac_area * fac_disp_median, (1. - fac_corr)) - fac_features2 = np.multiply(fac_area * fac_disp_mean, (1. - fac_corr)) - -# base_fac_features = np.dot(fac_area * fac_disp_median, (1. - fac_corr)) - - fac_features_dict = {} - for i, landmark in enumerate(landmarks): - fac_features_dict['fac_features_mean_{}'.format(landmark)] = [fac_features2[i]] - raw_variable_map = 'fac_tremor_median_{}'.format(landmark) - fac_features_dict[r_config.base_raw['raw_feature'][raw_variable_map]] = [fac_features1[i]] - - fac_features_dict['fac_disp_median_{}'.format(landmark)] = [fac_disp_median[i]] - fac_features_dict['fac_corr_{}'.format(landmark)] = [fac_corr[i]] - - fac_features_dict[r_config.err_reason] = [''] - data = pd.DataFrame.from_dict(fac_features_dict) - logger.info('Concluded computing tremor features') - - return data - - except Exception as e: - logger.error('Error computing tremor features: {}'.format(str(e))) - return empty_frame(landmarks, r_config, str(e)) - -def empty_frame(landmarks, r_config, error_reason): - fac_features_dict = {} - for i, landmark in enumerate(landmarks): - raw_variable_map = 'fac_tremor_median_{}'.format(landmark) - fac_features_dict[r_config.base_raw['raw_feature'][raw_variable_map]] = [np.nan] - - fac_features_dict['fac_features_mean_{}'.format(landmark)] = [np.nan] - fac_features_dict['fac_disp_median_{}'.format(landmark)] = [np.nan] - fac_features_dict['fac_corr_{}'.format(landmark)] = [np.nan] - - fac_features_dict[r_config.err_reason] = [error_reason] - empty_frame = pd.DataFrame.from_dict(fac_features_dict) - return empty_frame - -def fac_tremor_process(video_uri, out_dir, r_config, model_output=False): - """ - processing input videos - - - """ -# try: - - input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) - of_csv_path = glob.glob(join(out_loc, fl_name + '_openface_lmk/*.csv')) - - if len(of_csv_path)>0: - of_csv = of_csv_path[0] - df_of = pd.read_csv(of_csv, error_bad_lines=False) - - logger.info('Processing Output file {} '.format(os.path.join(out_loc, fl_name))) - - feats = compute_features(of_csv_path , df_of, r_config) - -# if model_output: -# result = score(feats, r_config) -# feats = pd.concat([feats, result], axis=1) - - ut.save_output(feats, out_loc, fl_name, ft_dir, csv_ext) - - - -# except Exception as e: - logger.error('Failed to process video file') diff --git a/dbm_lib/dbm_features/raw_features/movement/head_motion.py b/dbm_lib/dbm_features/raw_features/movement/head_motion.py deleted file mode 100644 index 709b4f6c..00000000 --- a/dbm_lib/dbm_features/raw_features/movement/head_motion.py +++ /dev/null @@ -1,196 +0,0 @@ -""" -file_name: head_mov -project_name: DBM -created: 2020-20-07 -""" - -import os -import glob -import pandas as pd -import numpy as np -from scipy.spatial import distance -from os.path import join -import logging - -from dbm_lib.dbm_features.raw_features.util import util as ut - -logging.basicConfig(level=logging.INFO) -logger=logging.getLogger() - -h_mov_dir = 'movement/head_movement' -h_pose_dir = 'movement/head_pose' -h_mov_ext = '_headmov.csv' -h_pose_ext = '_headpose.csv' - -def head_pose_dist(of_results): - """ - Computing head pose distance frame by frame - - Args: - of_results: Openface raw out dataframe - f_nm_config: Face config file object - - Reutrns: - Final head pose distance frame by frame output - """ - distance_list = [] - error_list = [] - for index, row in of_results.iterrows(): - dst = np.nan - - if index == 0 or float(row[' confidence']) < 0.2: #Threshold < 0.2 - distance_list.append(dst) - - if float(row[' confidence']) < 0.2: - error_list.append('confidence less than 20%') - - else: - error_list.append('Pass') - continue - - if index > 0: - - point_x = (of_results[' pose_Rx'][index-1], of_results[' pose_Ry'][index-1], of_results[' pose_Rz'][index-1]) - point_y = (row[' pose_Rx'],row[' pose_Ry'],row[' pose_Rz']) - try: - dst = distance.euclidean(point_x, point_y) - except: - pass - distance_list.append(abs(dst)) - error_list.append('Pass') - return distance_list, error_list - -def head_pose(of_results,r_config): - """ - Generating head pose estimation dataframe - - Args: - distance_val: distance list - f_nm_config: raw variable config file object - - Reutrns: - Final head pose estimation dataframe - """ - pose_dist_list, error_list = head_pose_dist(of_results) - of_results.loc[(of_results[' confidence'].astype(float) < 0.2), [' pose_Rx',' pose_Ry',' pose_Rz']] = np.nan - pose_of = of_results[[' pose_Rx',' pose_Ry',' pose_Rz']] - pose_of.columns = [r_config.mov_Hpose_Pitch, r_config.mov_Hpose_Yaw, r_config.mov_Hpose_Roll] - pose_of[r_config.mov_Hpose_Dist] = pose_dist_list - pose_of[r_config.err_reason] = error_list - - return pose_of - -def head_motion_df(distance_val, error_list, r_config): - """ - Generating head movement dataframe - - Args: - distance_val: distance list - r_config: raw variable config file object - - Reutrns: - Final head velocity dataframe - """ - head_motion = r_config.head_vel - df_head_motion = pd.DataFrame(distance_val, columns=[head_motion]) - df_head_motion['Frames'] = df_head_motion.index - - new_df_intensity = df_head_motion[['Frames', head_motion]] - new_df_intensity[r_config.err_reason] = error_list - - return new_df_intensity - -def head_vel(of_results, r_config): - """ - Computing head velocity frame by frame - - Args: - of_results: Openface raw out dataframe - r_config: Face config file object - - Reutrns: - Final head velocity frame by frame output - """ - distance_list = [] - error_list = [] - for index, row in of_results.iterrows(): - dst = np.nan - - if index == 0 or float(row[' confidence']) < 0.2: #Threshold < 0.2 - distance_list.append(dst) - - if float(row[' confidence']) < 0.2: - error_list.append('confidence less than 20%') - - else: - error_list.append('Pass') - continue - - if index > 0: - - point_x = (of_results[' pose_Tx'][index-1], of_results[' pose_Ty'][index-1], of_results[' pose_Tz'][index-1]) - point_y = (row[' pose_Tx'],row[' pose_Ty'],row[' pose_Tz']) - try: - dst = distance.euclidean(point_x, point_y) - except: - pass - - if abs(dst)>200: - dst = np.nan - error_list.append('Out of range') - - else: - error_list.append('Pass') - distance_list.append(dst) - df_velocity = head_motion_df(distance_list, error_list, r_config) - - return df_velocity - -def calc_head_mov(video_uri, df_of, out_loc, fl_name, r_config): - """ - Computing head motion and head pose variables - Args: - df_of: Openface dataframe - out_loc: Output path for saving output csv's - fl_name: file name for output csv - r_config: raw variable config file object - - """ - - col = [' confidence',' pose_Rx',' pose_Ry',' pose_Rz',' pose_Tx', ' pose_Ty', ' pose_Tz'] - df_of = df_of[col] - - df_hmotion = head_vel(df_of, r_config) - df_hmotion['dbm_master_url'] = video_uri - - df_pose = head_pose(df_of, r_config) - df_pose['dbm_master_url'] = video_uri - - ut.save_output(df_hmotion, out_loc, fl_name, h_mov_dir, h_mov_ext) - ut.save_output(df_pose, out_loc, fl_name, h_pose_dir, h_pose_ext) - -def run_head_movement(video_uri, out_dir, r_config): - """ - Processing all patient's for getting movement artifacts for cdx_analysis workflow - -------------------------------- - -------------------------------- - Args: - video_uri: video path; input_dir : input directory for video's - out_dir: (str) Output directory for processed output; r_config: raw variable config object - """ - try: - - #filtering path to generate input & output path - input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) - of_csv_path = glob.glob(join(out_loc, fl_name + '_openface/*.csv')) - - - if len(of_csv_path)>0: - - of_csv = of_csv_path[0] - df_of = pd.read_csv(of_csv, error_bad_lines=False) - - logger.info('Processing Output file {} '.format(os.path.join(out_loc, fl_name))) - calc_head_mov(video_uri, df_of, out_loc, fl_name, r_config) - except Exception as e: - logger.error('Failed to process video file') diff --git a/dbm_lib/dbm_features/raw_features/movement/voice_tremor.py b/dbm_lib/dbm_features/raw_features/movement/voice_tremor.py deleted file mode 100644 index 868fa152..00000000 --- a/dbm_lib/dbm_features/raw_features/movement/voice_tremor.py +++ /dev/null @@ -1,94 +0,0 @@ -import pandas as pd -import os -import glob -from os.path import join -import parselmouth -from parselmouth.praat import call, run_file -import numpy as np -import librosa -import json -import re -import logging - -from dbm_lib.dbm_features.raw_features.util import util as ut -from dbm_lib.dbm_features.raw_features.movement import DBMLIB_VTREMOR_LIB - -logging.basicConfig(level=logging.INFO) -logger=logging.getLogger() - -vt_dir = 'movement/voice_tremor' -csv_ext = '_vtremor.csv' - -#Executing praat script using parselmouth function -def tremor_praat(snd_file,r_cfg): - """ - Generating Voice tremor endpoint dataframe - Args: - snd_file: (.wav) parsed audio file - r_cfg: Raw variable configuration file - Returns tremor endpoint dataframe - """ - snd = parselmouth.Sound(snd_file) - tremor_var = run_file(snd,DBMLIB_VTREMOR_LIB,capture_output=True) - new_tremor_var = re.sub('--undefined--', '0', tremor_var[1]) - res = json.loads(new_tremor_var) - tremor_df = pd.DataFrame(res,index=['0',]) - tremor_df.columns = [r_cfg.mov_freq_trem_freq,r_cfg.mov_amp_trem_freq,r_cfg.mov_freq_trem_index, - r_cfg.mov_amp_trem_index,r_cfg.mov_freq_trem_pindex,r_cfg.mov_amp_trem_pindex] - return tremor_df - -def prepare_vtrem_output(audio_file, out_loc, r_config, fl_name): - """ - Preparing voice tremor matrix - Args: - audio_file: (.wav) parsed audio file ; r_config: raw config object - out_loc: (str) Output directory for csv ; fl_name: file name - """ - df_tremor = tremor_praat(audio_file, r_config) - df_tremor[r_config.err_reason] = 'Pass'# will replace with threshold in future release - - logger.info('Processing Output file {} '.format(os.path.join(out_loc, fl_name))) - ut.save_output(df_tremor, out_loc, fl_name, vt_dir, csv_ext) - -def prepare_empty_vt(out_loc, fl_name, r_config, error_txt): - - """ - Preparing empty voice tremor matrix - """ - cols = [r_config.mov_freq_trem_freq, r_config.mov_amp_trem_freq, r_config.mov_freq_trem_index, - r_config.mov_amp_trem_index, r_config.mov_freq_trem_pindex, r_config.mov_amp_trem_pindex, r_config.err_reason] - - out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, error_txt]] - df_tremor = pd.DataFrame(out_val, columns = cols) - - logger.info('Saving Output file {} '.format(os.path.join(out_loc, fl_name))) - ut.save_output(df_tremor, out_loc, fl_name, vt_dir, csv_ext) - -def run_vtremor(video_uri, out_dir, r_config): - """ - Processing all patient's for fetching Formant freq - --------------- - --------------- - Args: - video_uri: video path; r_config: raw variable config object - out_dir: (str) Output directory for processed output - """ - try: - - input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) - aud_filter = glob.glob(join(input_loc, fl_name + '.wav')) - if len(aud_filter)>0: - - audio_file = aud_filter[0] - aud_dur = librosa.get_duration(filename=audio_file) - - if float(aud_dur) < 0.5: - logger.info('Output file {} size is less than 0.5sec'.format(audio_file)) - - error_txt = 'error: length less than 0.5 sec' - prepare_empty_vt(video_uri, out_loc, fl_name, error_txt) - return - prepare_vtrem_output(audio_file, out_loc, r_config, fl_name) - except Exception as e: - logger.error('Failed to compute Voice Tremor {} for {}'.format(e,video_uri)) - prepare_empty_vt(out_loc, fl_name, r_config, e) diff --git a/dbm_lib/dbm_features/raw_features/nlp/speech_features.py b/dbm_lib/dbm_features/raw_features/nlp/speech_features.py deleted file mode 100644 index 4217a968..00000000 --- a/dbm_lib/dbm_features/raw_features/nlp/speech_features.py +++ /dev/null @@ -1,51 +0,0 @@ -""" -file_name: speech_features -project_name: DBM -created: 2020-13-11 -""" - -import os -import numpy as np -import pandas as pd -import glob -from os.path import join -import logging -import shutil - -from dbm_lib.dbm_features.raw_features.util import util as ut -from dbm_lib.dbm_features.raw_features.util import nlp_util as n_util - -logging.basicConfig(level=logging.INFO) -logger=logging.getLogger() - -speech_dir = 'speech/speech_feature' -speech_ext = '_nlp.csv' -transcribe_ext = 'speech/deepspeech/*_transcribe.csv' - -def run_speech_feature(video_uri, out_dir, r_config, tran_tog): - """ - Processing all patient's for fetching nlp features - ------------------- - ------------------- - Args: - video_uri: video path; r_config: raw variable config object - out_dir: (str) Output directory for processed output - """ - try: - - input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) - - transcribe_path = glob.glob(join(out_loc, transcribe_ext)) - if len(transcribe_path)>0: - - transcribe_df = pd.read_csv(transcribe_path[0]) - df_speech= n_util.process_speech(transcribe_df, r_config) - - logger.info('Saving Output file {} '.format(out_loc)) - ut.save_output(df_speech, out_loc, fl_name, speech_dir, speech_ext) - - if (tran_tog == None) or (tran_tog != 'on'): - shutil.rmtree(os.path.dirname(transcribe_path[0])) - - except Exception as e: - logger.error('Failed to process video file') diff --git a/dbm_lib/dbm_features/raw_features/nlp/transcribe.py b/dbm_lib/dbm_features/raw_features/nlp/transcribe.py deleted file mode 100644 index 0e23d824..00000000 --- a/dbm_lib/dbm_features/raw_features/nlp/transcribe.py +++ /dev/null @@ -1,84 +0,0 @@ -""" -file_name: transcribe -project_name: DBM -created: 2020-10-11 -""" - -import pandas as pd -import numpy as np -import librosa -import glob -from os.path import join -import logging - -from dbm_lib.dbm_features.raw_features.util import util as ut -from dbm_lib.dbm_features.raw_features.util import nlp_util as n_util - -logging.basicConfig(level=logging.INFO) -logger=logging.getLogger() - -formant_dir = 'speech/deepspeech' -csv_ext = '_transcribe.csv' -error_txt = 'error: length less than 0.1' - -def calc_transcribe(video_uri, audio_file, out_loc, fl_name, r_config, deep_path, aud_dur): - """ - Preparing Formant freq matrix - Args: - audio_file: (.wav) parsed audio file; fl_name: input file name - out_loc: (str) Output directory; r_config: raw variable config - """ - - text = n_util.process_deepspeech(audio_file, deep_path) - df_formant = pd.DataFrame([text], columns=[r_config.nlp_transcribe]) - - df_formant.replace('', np.nan, regex=True,inplace=True) - df_formant[r_config.nlp_totalTime] = aud_dur - df_formant[r_config.err_reason] = 'Pass'# will replace with threshold in future release - df_formant['dbm_master_url'] = video_uri - - logger.info('Saving Output file {} '.format(out_loc)) - ut.save_output(df_formant, out_loc, fl_name, formant_dir, csv_ext) - -def empty_transcribe(video_uri, out_loc, fl_name, r_config): - - """ - Preparing empty formant frequency matrix if something fails - """ - cols = [r_config.nlp_transcribe, r_config.nlp_totalTime, r_config.err_reason] - out_val = [[np.nan, np.nan, error_txt]] - df_fm = pd.DataFrame(out_val, columns = cols) - df_fm['dbm_master_url'] = video_uri - - logger.info('Saving Output file {} '.format(out_loc)) - ut.save_output(df_fm, out_loc, fl_name, formant_dir, csv_ext) - -def run_transcribe(video_uri, out_dir, r_config, deep_path): - - """ - Processing all patient's for fetching Formant freq - --------------- - --------------- - Args: - video_uri: video path; r_config: raw variable config object - out_dir: (str) Output directory for processed output; deep_path: deepspeech build path - """ - try: - - input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) - aud_filter = glob.glob(join(input_loc, fl_name + '.wav')) - if len(aud_filter)>0: - - audio_file = aud_filter[0] - aud_dur = librosa.get_duration(filename=audio_file) - - if float(aud_dur) < 0.1: - logger.info('Output file {} size is less than 0.1 sec'.format(audio_file)) - - empty_transcribe(video_uri, out_loc, fl_name, r_config) - return - - calc_transcribe(video_uri, audio_file, out_loc, fl_name, r_config, deep_path, aud_dur) - except Exception as e: - logger.error('Failed to process audio file') - \ No newline at end of file diff --git a/dbm_lib/dbm_features/raw_features/util/nlp_util.py b/dbm_lib/dbm_features/raw_features/util/nlp_util.py deleted file mode 100644 index a13edc9b..00000000 --- a/dbm_lib/dbm_features/raw_features/util/nlp_util.py +++ /dev/null @@ -1,212 +0,0 @@ -""" -file_name: nlp_util -project_name: DBM -created: 2020-10-11 -""" - -import subprocess -import json -import numpy as np -import pandas as pd -import os -import logging - -import nltk -import re -from lexicalrichness import LexicalRichness -from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer - -logging.basicConfig(level=logging.INFO) -logger=logging.getLogger() - -#Speech to text using Deepspeech 0.9.1 -def deepspeech(AUDIO_FILE,deep_path): - """ - Extracting text from audio using Deep Speech neural network trained model - Returns: - Text: text which is extracted from audio - """ - api = 'deepspeech' - arg_speech0 = '--model' - arg_speech_path0 = os.path.join(deep_path, 'deepspeech-0.9.1-models.pbmm') - arg_speech1 = '--scorer' - arg_speech_path1 = os.path.join(deep_path, 'deepspeech-0.9.1-models.scorer') - arg_audio = "--audio" - - out = subprocess.Popen([api, arg_speech0, arg_speech_path0, arg_speech1, arg_speech_path1, arg_audio, AUDIO_FILE], - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT) - logger.info('Deepspeech output...... {}'.format(out)) - try: - stdout,stderr = out.communicate() - except: - return "error", "error" - #print(stderr) - return stdout,stderr - -def deep_speech_output_clean(result): - """ - Parsing deep speech output(text) - Return: - Text from speech - """ - text = "" - if len(result)>0: - res_split = str(result[0]).split('\\n') - - if len(res_split)>0: - for i in range(len(res_split)): - if 'Inference took' in res_split[i]: - text = res_split[i + 1] - return text - return text - -def process_deepspeech(audio_file,deep_path): - """ - Transcribing audio to extract text from speech - """ - deep_output = deepspeech(audio_file,deep_path) - deep_text= deep_speech_output_clean(deep_output) - - return deep_text - -def nltk_download(): - - try: - nltk.data.find('tokenizers/punkt') - - except LookupError: - logger.info('punkt is not available') - nltk.download('punkt') - - try: - nltk.data.find('averaged_perceptron_tagger') - - except LookupError: - logger.info('averaged_perceptron_tagger is not available') - nltk.download('averaged_perceptron_tagger') - -def empty_speech(r_config, master_url, error_txt): - """ - Preparing empty speech matrix with error - Args: - r_config: raw config file object - error_txt: Error message during transcription - - Returns: - Empty dataframe for speech features with error - """ - - col = [r_config.nlp_numSentences, r_config.nlp_singPronPerAns, r_config.nlp_singPronPerSen, r_config.nlp_pastTensePerAns, - r_config.nlp_pastTensePerSen, r_config.nlp_pronounsPerAns, r_config.nlp_pronounsPerSen, r_config.nlp_verbsPerAns, - r_config.nlp_verbsPerSen, r_config.nlp_adjectivesPerAns, r_config.nlp_adjectivesPerSen, r_config.nlp_nounsPerAns, - r_config.nlp_nounsPerSen, r_config.nlp_sentiment_mean, r_config.nlp_mattr, r_config.nlp_wordsPerMin, - r_config.nlp_totalTime, r_config.err_reason] - - df_speech = pd.DataFrame([[np.nan] * len(col) + [error_txt]], columns = col) - df_speech['dbm_master_url'] = master_url - - return df_speech - -def divide_var(speech_var1, spech_var2): - """ - divide variables - """ - speech_var = np.nan - if spech_var2!=0: - speech_var = speech_var1/spech_var2 - return speech_var - -def process_speech(transcribe_df,r_config): - """ - Preparing speech features - Args: - transcribe_df: Transcribed dataframe - r_config: raw config file object - Returns: - Dataframe for speech features - """ - transcribe_df = transcribe_df.replace(np.nan, '', regex=True) - err_transcribe = transcribe_df[r_config.err_reason].iloc[0] - transcribe = transcribe_df[r_config.nlp_transcribe].iloc[0] - total_time = transcribe_df[r_config.nlp_totalTime].iloc[0] - master_url = transcribe_df['dbm_master_url'].iloc[0] - - #clean transcribe - transcribe = transcribe.replace(",", "") - transcribe = " ".join(re.findall(r"[\w']+|[.!?]", transcribe)) - - if err_transcribe != 'Pass': - df_speech = empty_speech(r_config, master_url, error_txt) - - return df_speech - - speech_dict = {} - nltk_download() - - sentences = nltk.tokenize.sent_tokenize(transcribe) - words_all = nltk.tokenize.word_tokenize(transcribe) - num_sentences = len(sentences) - - speech_dict[r_config.nlp_numSentences] = num_sentences - - #nlp_singPron - i_s = transcribe.count('I') - me_s = transcribe.count('me') - my_s = transcribe.count('my') - sing_count = i_s + me_s + my_s - - speech_dict[r_config.nlp_singPronPerAns] = sing_count if len(words_all)>0 else np.nan - speech_dict[r_config.nlp_singPronPerSen] = divide_var(speech_dict[r_config.nlp_singPronPerAns], num_sentences) - - tagged = nltk.pos_tag(transcribe.split()) - tagged_df = pd.DataFrame(tagged, columns=['word', 'pos_tag']) - - #Past tense per answer - all_POSs = tagged_df['pos_tag'].tolist() - speech_dict[r_config.nlp_pastTensePerAns] = all_POSs.count('VBD') if len(words_all)>0 else np.nan - speech_dict[r_config.nlp_pastTensePerSen] = divide_var(speech_dict[r_config.nlp_pastTensePerAns], num_sentences) - - #Pronoun per answer - pronounsPerAns = all_POSs.count('PRP') + all_POSs.count('PRP$') - speech_dict[r_config.nlp_pronounsPerAns] = pronounsPerAns if len(words_all)>0 else np.nan - speech_dict[r_config.nlp_pronounsPerSen] = divide_var(speech_dict[r_config.nlp_pronounsPerAns], num_sentences) - - #Verb per answer - verbPerAns = all_POSs.count('VB') + all_POSs.count('VBD') + all_POSs.count('VBG') \ - + all_POSs.count('VBN') + all_POSs.count('VBP') + all_POSs.count('VBZ') - speech_dict[r_config.nlp_verbsPerAns] = verbPerAns if len(words_all) > 0 else np.nan - speech_dict[r_config.nlp_verbsPerSen] = divide_var(speech_dict[r_config.nlp_verbsPerAns], num_sentences) - - #Adjective per answer - adjectivesAns = all_POSs.count('JJ') + all_POSs.count('JJR') + all_POSs.count('JJS') - speech_dict[r_config.nlp_adjectivesPerAns] = adjectivesAns if len(words_all) > 0 else np.nan - speech_dict[r_config.nlp_adjectivesPerSen] = divide_var(speech_dict[r_config.nlp_adjectivesPerAns], num_sentences) - - #Noun per answer - nounsAns = all_POSs.count('NN') + all_POSs.count('NNP') + all_POSs.count('NNS') - speech_dict[r_config.nlp_nounsPerAns] = nounsAns if len(words_all) > 0 else np.nan - speech_dict[r_config.nlp_nounsPerSen] = divide_var(speech_dict[r_config.nlp_nounsPerAns], num_sentences) - - #Sentiment analysis - vader = SentimentIntensityAnalyzer() - sentence_valences = [] - - for s in sentences: - sentiment_dict = vader.polarity_scores(s) - sentence_valences.append(sentiment_dict['compound']) - - speech_dict[r_config.nlp_sentiment_mean] = np.mean(sentence_valences) if len(sentence_valences) > 0 else np.nan - non_punc = list(value for value in words_all if value not in ['.','!','?']) - - non_punc_as_str = " ".join(str(non_punc)) - lex = LexicalRichness(non_punc_as_str) - speech_dict[r_config.nlp_mattr] = lex.mattr(window_size=lex.words) if lex.words > 0 else np.nan - - #Number of words per minute - speech_dict[r_config.nlp_wordsPerMin] = divide_var(len(non_punc), total_time)*60 - speech_dict[r_config.nlp_totalTime] = total_time - speech_dict['dbm_master_url'] = master_url - - df_speech = pd.DataFrame([speech_dict]) - return df_speech diff --git a/dbm_lib/dbm_features/raw_features/util/util.py b/dbm_lib/dbm_features/raw_features/util/util.py deleted file mode 100644 index 7ed15c11..00000000 --- a/dbm_lib/dbm_features/raw_features/util/util.py +++ /dev/null @@ -1,112 +0,0 @@ -""" -file_name: util -project_name: DBM -created: 2020-20-07 -""" - -import os -import glob -import numpy as np -import subprocess - -def filter_path(video_url, out_dir): - - """ - Filtering video uri path to prepare input and ouptut location - - Args: - video_url: S3 bucket path for video - out_dir: Output directory path - - """ - - fl_name,_ = os.path.splitext(os.path.basename(video_url)) - input_loc = os.path.dirname(video_url) - out_loc = os.path.join(out_dir, fl_name) - return input_loc, out_loc, fl_name - -def save_output(df, out_loc, fl_name, f_dir, f_ext): - """ - creating output directory for Audio features - Args: - df: (dataframe) feature dataframe[ex: Formant freq, pitch] - out_loc: (dir) Output location where we want to save raw output - fl_name: file name - f_dir: directory name for a feature - f_ext: extension for a feature [ex: '_pose.csv'] - """ - full_f_name = fl_name + f_ext - dir_path = os.path.join(out_loc, f_dir) - - if not os.path.exists(dir_path): - os.makedirs(dir_path) - - sav_path = os.path.join(dir_path,full_f_name) - df.to_csv(sav_path, index=False) - -def audio_process(base_dir,video_url): - """ - Parsing cleaned audio files(Audio files without IMA voice) - Args: - base_dir: Base path for raw data - video_url: Raw video file path - """ - new_video_url = base_dir+'/'.join(video_url[2:]) - split_val = new_video_url.split('/') - wav_path = '/'.join(split_val[0:len(split_val)-1]) - audio_split_check = glob.glob(wav_path + '/*_split.wav') - return audio_split_check - -def compute_open_face_features(input_filepath, - output_directory, - open_face_executable, - au_static=False, - tracked_visualization=False, - clobber=False, - verbose=True): - """ - Runs OpenFace on an input video. - See https://github.com/TadasBaltrusaitis/OpenFace/wiki/Command-line-arguments - Args: - input_filepath: - output_directory: - au_static: - tracked_visualization: - open_face_executable: - clobber: (bool) if True existing files will be overwritten - verbose: - Returns: - (str) path to output csv file - Raises: - IOError if OpenFace executable is missing - """ - - if not os.path.isfile(open_face_executable): - raise IOError("OpenFace executable {} could not be found.".format(open_face_executable)) - - bn, _ = os.path.splitext(os.path.basename(input_filepath)) - if not output_directory: - output_directory = os.path.join(os.path.dirname(input_filepath), bn + '_openface') - - output_csv = os.path.join(output_directory, bn + '.csv') - if not os.path.isfile(output_csv) or clobber: - call = [open_face_executable, ] - if au_static: - call += ['-au_static', ] - - if tracked_visualization: - call += ['-tracked', ] - - call += ['-q', '-2Dfp', '-3Dfp', '-pdmparams', '-pose', '-aus', '-gaze'] - call += ['-f', input_filepath, '-out_dir', output_directory] - - if verbose: - print('Computing OpenFace features {} from video file'.format(input_filepath)) - subprocess.check_output(call) - if verbose: - print('OpenFace features saved to {}'.format(output_directory)) - else: - if verbose: - print('Output file {} already exists'.format(output_csv)) - - return os.path.join(output_directory, bn + '.csv') diff --git a/dbm_lib/dbm_features/raw_features/util/video_util.py b/dbm_lib/dbm_features/raw_features/util/video_util.py deleted file mode 100644 index 2a60480e..00000000 --- a/dbm_lib/dbm_features/raw_features/util/video_util.py +++ /dev/null @@ -1,190 +0,0 @@ -""" -file_name: video_util -project_name: DBM -created: 2020-20-07 -""" - -import pandas as pd -import numpy as np -import glob -from dbm_lib.dbm_features.raw_features.util import util as ut - -def smooth(x,window_len=11,window='hanning'): - """smooth the data using a window with requested size. - - This method is based on the convolution of a scaled window with the signal. - The signal is prepared by introducing reflected copies of the signal - (with the window size) in both ends so that transient parts are minimized - in the begining and end part of the output signal. - - input: - x: the input signal - window_len: the dimension of the smoothing window; should be an odd integer - window: the type of window from 'flat', 'hanning', 'hamming', 'bartlett', 'blackman' - flat window will produce a moving average smoothing. - - output: - the smoothed signal - - example: - - t=linspace(-2,2,0.1) - x=sin(t)+randn(len(t))*0.1 - y=smooth(x) - - see also: - - numpy.hanning, numpy.hamming, numpy.bartlett, numpy.blackman, numpy.convolve - scipy.signal.lfilter - - TODO: the window parameter could be the window itself if an array instead of a string - NOTE: length(output) != length(input), to correct this: return y[(window_len/2-1):-(window_len/2)] instead of just y. - """ - if x.ndim != 1: - raise (ValueError, "smooth only accepts 1 dimension arrays.") - if x.size < window_len: - raise (ValueError, "Input vector needs to be bigger than window size.") - if window_len<3: - return x - if not window in ['flat', 'hanning', 'hamming', 'bartlett', 'blackman']: - raise (ValueError, "Window is on of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'") - s=np.r_[x[window_len-1:0:-1],x,x[-2:-window_len-1:-1]] - #print(len(s)) - if window == 'flat': #moving average - w=np.ones(window_len,'d') - else: - w=eval('np.'+window+'(window_len)') - y=np.convolve(w/w.sum(),s,mode='valid') - return y[int(window_len/2):-int(window_len/2)] - -def filter_by_confidence_and_thresh(x, fea, thresh): - if x['s_confidence'] > 0.2 and np.fabs(x[fea]) < thresh: - return x[fea] - else: - return np.NaN - -def add_au_emotion(x, emotion,emotion_type,exp_type): - """ - computing individula emotion expressivity matrix - Args: - emotion: Action Unit - """ - error_reason = 'Pass' - if x['s_confidence'] > 0.8: #if using smooth, no need for 'success' - sum_r = 0 - cnt = 0 - for au in emotion: - au_c_label = " AU{:02d}_c".format(au) - au_r_label = " AU{:02d}_r".format(au) - if x[au_c_label]==1 and (not np.isnan(x[au_r_label])): #there are data with face in, but au_c=0 - sum_r += x[au_r_label] - cnt += 6 - if exp_type=='full' and x[au_c_label]==0: #Logic to compute emotion expressivity when all AU's are present - cnt = 0 - break - if cnt > 0: - sum_r /= cnt - else: - sum_r = 0 - v_emo = x[emotion_type] + sum_r - else: - v_emo = np.NaN - error_reason = 'confidence less than 80%' - - return v_emo, error_reason - -def add_au_occ(x, emotion,emotion_type): - """ - computing individula emotion presence - Args: - emotion: Action Unit - """ - au_pres = [] - em_pres = 0 - error_reason = 'Pass' - if x['s_confidence'] > 0.8: #if using smooth, no need for 'success' - for au in emotion: - au_c_label = " AU{:02d}_c".format(au) - if x[au_c_label]==1: #there are data with face in, but au_c=0 - au_pres.append(1) - - if len(au_pres) == len(emotion): - em_pres = 1 - else: - em_pres = np.NaN - error_reason = 'confidence less than 80%' - return em_pres, error_reason - -def emotion_exp(em_au,of,em_col,err_col): - """ - Computing individual emotion expressivity and adding it to dataframe - """ - for emotion in em_au: - of[[em_col[0],err_col]]=of.apply(add_au_emotion, args=(emotion,em_col[0],'partial',), axis=1, result_type='expand') - of[[em_col[1],err_col]]=of.apply(add_au_emotion, args=(emotion,em_col[1],'full',), axis=1, result_type='expand') - -def emotion_pres(em_au,of,em_col,err_col): - """ - Computing individual emotion expressivity and adding it to dataframe - """ - for emotion in em_au: - of[[em_col,err_col]]=of.apply(add_au_occ, args=(emotion,em_col,), axis=1, result_type='expand') - -def calc_of_for_video(of,face_cfg,fe_cfg): - """ - Creating dataframe for emotion expressivity - """ - new_cols = [fe_cfg.hap_exp,fe_cfg.sad_exp,fe_cfg.sur_exp,fe_cfg.fea_exp,fe_cfg.ang_exp,fe_cfg.dis_exp,fe_cfg.con_exp, - fe_cfg.pai_exp,fe_cfg.neg_exp,fe_cfg.pos_exp,fe_cfg.neu_exp,fe_cfg.com_lower_exp,fe_cfg.com_upper_exp, - fe_cfg.cai_exp,fe_cfg.com_exp,fe_cfg.happ_occ,fe_cfg.sad_occ,fe_cfg.sur_occ,fe_cfg.fea_occ,fe_cfg.ang_occ, - fe_cfg.dis_occ,fe_cfg.con_occ,fe_cfg.hap_exp_full,fe_cfg.sad_exp_full,fe_cfg.sur_exp_full,fe_cfg.fea_exp_full, - fe_cfg.ang_exp_full,fe_cfg.dis_exp_full,fe_cfg.con_exp_full,fe_cfg.pai_exp_full,fe_cfg.neg_exp_full, - fe_cfg.pos_exp_full,fe_cfg.neu_exp_full,fe_cfg.cai_exp_full,fe_cfg.com_lower_exp_full,fe_cfg.com_upper_exp_full, - fe_cfg.com_exp_full] - of[new_cols] = pd.DataFrame([[0] * len(new_cols)], index=of.index) - of[fe_cfg.err_reason] = 'Pass' - - #Composite happiness expressivity - emotion_exp(face_cfg.happiness,of,[fe_cfg.hap_exp,fe_cfg.hap_exp_full],fe_cfg.err_reason) - #Composite sadness expressivity - emotion_exp(face_cfg.sadness,of,[fe_cfg.sad_exp,fe_cfg.sad_exp_full],fe_cfg.err_reason) - #Composite surprise expressivity - emotion_exp(face_cfg.surprise,of,[fe_cfg.sur_exp,fe_cfg.sur_exp_full],fe_cfg.err_reason) - #Composite fear expressivity - emotion_exp(face_cfg.fear,of,[fe_cfg.fea_exp,fe_cfg.fea_exp_full],fe_cfg.err_reason) - #Composite anger expressivity - emotion_exp(face_cfg.anger,of,[fe_cfg.ang_exp,fe_cfg.ang_exp_full],fe_cfg.err_reason) - #Composite disgust expressivity - emotion_exp(face_cfg.disgust,of,[fe_cfg.dis_exp,fe_cfg.dis_exp_full],fe_cfg.err_reason) - #Composite contempt expressivity - emotion_exp(face_cfg.contempt,of,[fe_cfg.con_exp,fe_cfg.con_exp_full],fe_cfg.err_reason) - #Composite Negative Expressivity - emotion_exp(face_cfg.NEG_ACTION_UNITS,of,[fe_cfg.neg_exp,fe_cfg.neg_exp_full],fe_cfg.err_reason) - #Composite Positive Expressivity - emotion_exp(face_cfg.POS_ACTION_UNITS,of,[fe_cfg.pos_exp,fe_cfg.pos_exp_full],fe_cfg.err_reason) - #Composite Neutral Expressivity - emotion_exp(face_cfg.NET_ACTION_UNITS,of,[fe_cfg.neu_exp,fe_cfg.neu_exp_full],fe_cfg.err_reason) - #Composite Activation Expressivity - emotion_exp(face_cfg.cai,of,[fe_cfg.cai_exp,fe_cfg.cai_exp_full],fe_cfg.err_reason) - #Composite Expressivity - emotion_exp(face_cfg.ACTION_UNITS,of,[fe_cfg.com_exp,fe_cfg.com_exp_full],fe_cfg.err_reason) - #Composite lower face expressivity - emotion_exp(face_cfg.LOWER_ACTION_UNITS,of,[fe_cfg.com_lower_exp,fe_cfg.com_lower_exp_full],fe_cfg.err_reason) - #Composite upper face Expressivity - emotion_exp(face_cfg.UPPER_ACTION_UNITS,of,[fe_cfg.com_upper_exp,fe_cfg.com_upper_exp_full],fe_cfg.err_reason) - #Composite pain expressivity - emotion_exp(face_cfg.pain,of,[fe_cfg.pai_exp,fe_cfg.pai_exp_full],fe_cfg.err_reason) - #AU happiness presence - emotion_pres(face_cfg.happiness,of,fe_cfg.happ_occ,fe_cfg.err_reason) - #AU Sad presence - emotion_pres(face_cfg.sadness,of,fe_cfg.sad_occ,fe_cfg.err_reason) - #AU Surprise presence - emotion_pres(face_cfg.surprise,of,fe_cfg.sur_occ,fe_cfg.err_reason) - #AU fear presence - emotion_pres(face_cfg.fear,of,fe_cfg.fea_occ,fe_cfg.err_reason) - #AU anger presence - emotion_pres(face_cfg.anger,of,fe_cfg.ang_occ,fe_cfg.err_reason) - #AU disgust presence - emotion_pres(face_cfg.disgust,of,fe_cfg.dis_occ,fe_cfg.err_reason) - #AU contempt presence - emotion_pres(face_cfg.contempt,of,fe_cfg.con_occ,fe_cfg.err_reason) diff --git a/dbm_lib/dbm_features/raw_features/video/__init__.py b/dbm_lib/dbm_features/raw_features/video/__init__.py deleted file mode 100644 index b5ca7da1..00000000 --- a/dbm_lib/dbm_features/raw_features/video/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -""" -file_name: __init__ -project_name: DBM -created: 2020-20-07 -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -DBMLIB_PATH = os.path.dirname(__file__) -DBMLIB_FACE_CONFIG = os.path.abspath(os.path.join(DBMLIB_PATH, '../../../../resources/services/face_util.yml')) \ No newline at end of file diff --git a/dbm_lib/dbm_features/raw_features/video/face_asymmetry.py b/dbm_lib/dbm_features/raw_features/video/face_asymmetry.py deleted file mode 100644 index 4249abcb..00000000 --- a/dbm_lib/dbm_features/raw_features/video/face_asymmetry.py +++ /dev/null @@ -1,355 +0,0 @@ -""" -file_name: face_asymmetry.py -project_name: DBM -created: 2020-20-07 -""" - -from mpl_toolkits import mplot3d -from matplotlib import pyplot as plt -import time -import numpy as np -import os -import datetime -import glob -import cv2 -from scipy.spatial.transform import Rotation as R -import subprocess -import pandas as pd -from os.path import join -import logging - -from dbm_lib.dbm_features.raw_features.video.face_config.face_config_reader import ConfigFaceReader -from dbm_lib.dbm_features.raw_features.util import video_util as vu -from dbm_lib.dbm_features.raw_features.util import util as ut - -logging.basicConfig(level=logging.INFO) -logger=logging.getLogger() - -face_asym_dir = 'facial/face_asymmetry' -csv_ext = '_facasym.csv' - -cv2_color_purple = (254,19,188) -color_blue = (0,0,1.0) -color_green = (0,1.0,0) -color_red = (1.0,0,0) -color_y = (1.0,1.0,0) - -error_code_message = { - 0: 'pass', - 1: 'confidence less than 80%', - } -error_message_code = {y:x for x,y in error_code_message.items()} - -def visualize_vid(fn, attr=None, write_out=False): - - vid = cv2.VideoCapture(fn) - tot = int(vid.get(cv2.CAP_PROP_FRAME_COUNT)) - fps = vid.get(cv2.CAP_PROP_FPS) - frame_width = int(vid.get(3)) - frame_height = int(vid.get(4)) - - if write_out: - fig_w = 680 #680 667 676 #frame_width in order of Ali, Vennessa, synthesis - fig_h = 659 #659 659 659 #frame_height - out_vid = cv2.VideoWriter('out.mp4',cv2.VideoWriter_fourcc(*'MP4V'), fps, (fig_w,fig_h)) - - plt.figure(figsize=(8, 8)) - try: - frameid = 0 - while(True): - ret, frame = vid.read() - if not ret: - # Release the Video Device if ret is false - vid.release() - print('Released Video Resource') - break - frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) - frameid += 1 - logger.info(frameid, frame.shape) - - if 'lmks_frms' in attr: - lmks_frms = attr['lmks_frms'] - for i in range(lmks_frms[frameid].shape[0]): - cv2.circle(frame,(int(lmks_frms[frameid][i,0]),int(lmks_frms[frameid][i,1])), 2, cv2_color_purple, -1) - - if write_out: - cv2.putText(frame,'Frame: '+str(frameid), (10,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 3) - - plt.subplot(211) - plt.imshow(frame) - plt.axis('off'); plt.pause(0.2); - - if 'score_asym' in attr: - ax = plt.subplot(212) - ax.cla() - ax.set_xlim(0,140) #ax.set_xlim(0,300) - ax.set_ylim(0,10) - - sa = attr['score_asym'] - s = sa[np.where(sa[:,0] <= frameid),:][0,:,:] - - for i in range(1,s.shape[1]): - plt.plot(s[:,0], s[:,i]) - - plt.legend(['mouth', 'eyebrow', 'eye', 'mouth+eye+eyebrow']) - plt.minorticks_on() - plt.grid(b=True, which='major', color='r', linestyle='-') - plt.grid(b=True, which='minor', color='r', linestyle='--') - - plt.savefig('tmp.png', bbox_inches='tight') - print(cv2.imread('tmp.png').shape) - - plt.clf() - if write_out: - out_vid.write(cv2.imread('tmp.png')) - - except KeyboardInterrupt: - # Release the Video Device - vid.release() - if write_out: - out_vid.release() - logger.info('Exception, and Video Resource Released') - - if write_out: - out_vid.release() - - -def retrieve_attr(of_df): - ''' - Retrieve landmarks and pose_translation for each frame from openface output - Args: - of_df: dataframe output from openface, including detected landmark coordinates - Returns: - lmks_frms: dictionary, with frame id as key and 68 landmark set as value - pose_p: dictionary, with frame id as key and pose param as value - ''' - tot_lmks = 68 # openface specific - if len([i for i in of_df.columns.to_list() if ' x_' in i]) != tot_lmks: - return {} - - lmks_frms = {} - pose_p = {} - - for fi in sorted(of_df['frame'].to_list()): - lmks = np.zeros((tot_lmks,6)) - r = of_df[of_df['frame']==fi] - - for i in range(tot_lmks): - lmk_y = r[' y_'+str(i)].iloc[0] - lmk_x = r[' x_'+str(i)].iloc[0] - lmk_X = r[' X_'+str(i)].iloc[0] - lmk_Y = r[' Y_'+str(i)].iloc[0] - lmk_Z = r[' Z_'+str(i)].iloc[0] - - confi = r[' confidence'] - lmks[i,:] = [lmk_x, lmk_y, lmk_X, lmk_Y, lmk_Z, confi] - - lmks_frms[fi] = lmks - pose_p[fi] = [r[' pose_Tx'].iloc[0], r[' pose_Ty'].iloc[0], r[' pose_Tz'].iloc[0], - r[' pose_Rx'].iloc[0], r[' pose_Ry'].iloc[0], r[' pose_Rz'].iloc[0]] - - return lmks_frms, pose_p - - -def mirror_point(a, b, c, d, x1, y1, z1): - # mirror a point w.r.t a 3D plane - k =(-a * x1-b * y1-c * z1-d)/float((a * a + b * b + c * c)) - - x2 = a * k + x1 - y2 = b * k + y1 - z2 = c * k + z1 - - x3 = 2 * x2-x1 - y3 = 2 * y2-y1 - z3 = 2 * z2-z1 - return [x3, y3, z3] - - -def dist_vec2plane(vec, nrm): - # Calculate the projected length of a vector (vec) to a plane defined by its normal (nrm) - return np.sqrt(np.dot(vec, vec) - np.dot(vec, nrm)**2) - - -def vis_lmks3d(lmks_frms, vis_idx): - """ - Visualizing facial landmarks - """ - fig = plt.figure() - color_type = ['b','g','r','y','c'] - assert len(color_type) > len(vis_idx) - - for fi in sorted(list(lmks_frms.keys())): - ax = plt.axes(projection="3d") - for i,vi in enumerate(vis_idx): - ax.scatter(lmks_frms[fi][vi,2], lmks_frms[fi][vi,3], lmks_frms[fi][vi,4], c=color_type[i]) - - ax.axes.set_xlim3d(left=-75, right=100) - ax.axes.set_ylim3d(bottom=-200, top=25) - ax.axes.set_zlim3d(bottom=440, top=560) - ax.view_init(-89, -90) #elev, ariz - plt.title(str(fi)); ax.set_xlabel('X'); ax.set_ylabel('Y'); ax.set_zlabel('Z') - plt.pause(0.2) - plt.cla() - plt.draw() - -def calc_fac_asymmetry(attr, is_vis=False): - ''' - Quantify facial asymmetry - Args: - attr: attribute dictionary containing necessary features for calculation, e.g., - lmks_frms: dictionary, with frame id as key and 68 landmark set (OpenFace) as value - pose_param: dictionary, with frame id as key and pose param as value - Returns: - score_asym: 2D array of size (num_frms, num_asymm_fea), with frame id as the 0th column, and each remaining column as one asymmetry feature - ''' - # openface landmark indices - lmks_ref_idx = list(range(0,17)) + list(range(27,36)) - lmks_mid_idx = [27,28,29,30,33,51,62,66,57,8] - lmks_rgt_idx = [0,1,2,3,4,5,6,7, - 17,18,19,20,21, - 36,37,38,39,40,41, - 48,49,50, - 59,58, - 60,61, - 67] - lmks_lft_idx = [16,15,14,13,12,11,10,9, - 26,25,24,23,22, - 45,44,43,42,47,46, - 54,53,52, - 55,56, - 64,63, - 65] - - lmks_mth_idx = list(range(48,68)) - lmks_ebr_idx = list(range(17,27)) - lmks_eye_idx = list(range(36,48)) - assert len(lmks_lft_idx)==len(lmks_rgt_idx) - - fea_list = ['mouth', 'eyebrow', 'eye', 'composite'] - score_asym = np.empty(shape=(0, 0)) - - if ('lmks_frms' in attr) and ('pose_param' in attr): - lmks_frms = attr['lmks_frms'] - pose_p = attr['pose_param'] - - if is_vis: - vis_lmks3d(lmks_frms, [lmks_lft_idx, lmks_rgt_idx, lmks_mid_idx, lmks_ref_idx]) - - score_asym = np.zeros((len(lmks_frms),len(fea_list)+1+1)) # +1: extra column for error code - if is_vis: - fig = plt.figure() - ax = plt.axes(projection="3d") - - for s,fi in enumerate(sorted(list(lmks_frms.keys()))): - lmks_3d = lmks_frms[fi][:,2:5] - pose = pose_p[fi] - err_code = error_message_code['pass'] - - if lmks_frms[fi][0,5] < 0.8: - err_code = error_message_code['confidence less than 80%'] - score_asym[s,:] = [fi,np.NaN,np.NaN,np.NaN,np.NaN,err_code] - continue - - rx = R.from_euler('x', pose[3]) - ry = R.from_euler('y', pose[4]) - rz = R.from_euler('z', pose[5]) - - vec_pose = rz.apply(ry.apply(rx.apply([0,0,1]))) - anc_idx = [30, 27, 8] # for central plane estimation - nrm = np.cross(lmks_3d[anc_idx[2],:] - lmks_3d[anc_idx[0],:], - lmks_3d[anc_idx[1],:] - lmks_3d[anc_idx[0],:]) - - nrm = nrm / np.linalg.norm(nrm) - a,b,c = nrm - d = np.dot(nrm, lmks_3d[anc_idx[0],:]) - - dist_L2R_mth = [] - dist_L2R_ebr = [] - dist_L2R_eye = [] - dist_com = [] - - lmks_rfl = np.empty((0,3)) - src_idx = lmks_lft_idx - - for k,idx in enumerate(src_idx): - p_rfl = np.array(mirror_point(a, b, c, -d, lmks_3d[idx,0], lmks_3d[idx,1], lmks_3d[idx,2])) - lmks_rfl = np.vstack((lmks_rfl, p_rfl)) - dist = dist_vec2plane((p_rfl-lmks_3d[lmks_rgt_idx[k],:]), vec_pose) - - if idx in lmks_mth_idx: - dist_L2R_mth.append(dist) - if idx in lmks_ebr_idx: - dist_L2R_ebr.append(dist) - if idx in lmks_eye_idx: - dist_L2R_eye.append(dist) - if (idx in lmks_mth_idx) or (idx in lmks_ebr_idx) or (idx in lmks_eye_idx): - dist_com.append(dist) - score_asym[s,:] = [fi,np.mean(dist_L2R_mth),np.mean(dist_L2R_ebr),np.mean(dist_L2R_eye),np.mean(dist_com),err_code] - - if is_vis: - ax.scatter(lmks_3d[:,0], lmks_3d[:,1], lmks_3d[:,2]) - ax.scatter(lmks_rfl[:,0], lmks_rfl[:,1], lmks_rfl[:,2], c='y') - ax.scatter(pose_p[fi][0], pose_p[fi][1], pose_p[fi][2], c='c') - plt.title('mirrored landmarks, frame: '+str(fi)); ax.set_xlabel('X'); ax.set_ylabel('Y'); ax.set_zlabel('Z') - plt.pause(0.2) - plt.cla() - plt.draw() - - return score_asym - - -def calc_asym_feature(open_face_csv, f_cfg): - """ - Calculating facial asymmetry features and preparing final df - """ - df_list = [] - - of_df = pd.read_csv(open_face_csv, error_bad_lines=False) - lmks_frms, pose_p = retrieve_attr(of_df) - - attr = {'lmks_frms': lmks_frms, 'pose_param': pose_p} - score_asym = calc_fac_asymmetry(attr) - - df_score_asym = pd.DataFrame(score_asym, columns=['frame', f_cfg.fac_AsymMaskMouth, f_cfg.fac_AsymMaskEyebrow, - f_cfg.fac_AsymMaskEye, f_cfg.fac_AsymMaskCom, f_cfg.err_reason]) - df_score_asym[f_cfg.err_reason] = df_score_asym[f_cfg.err_reason].apply(lambda x: error_code_message[x]) - - df_score_asym['frame'] = of_df['frame'] - df_score_asym['face_id'] = of_df[' face_id'] - df_score_asym['timestamp'] = of_df[' timestamp'] - df_score_asym['confidence'] = of_df[' confidence'] - df_score_asym['success'] = of_df[' success'] - - df_list.append(df_score_asym) - return df_list - - -def run_face_asymmetry(video_uri, out_dir, f_cfg): - """ - Processing all patient's for calculating facial asymmetry - --------------- - --------------- - Args: - video_uri: video path; f_cfg: face config object - out_dir: (str) Output directory for processed output - """ - try: - - #Baseline logic - cfr = ConfigFaceReader() - input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) - - of_csv_path = glob.glob(join(out_loc, fl_name + '_openface/*.csv')) - if len(of_csv_path)>0: - - of_csv = of_csv_path[0] - asym_df_list = calc_asym_feature(of_csv, f_cfg) - - asym_final_df = pd.concat(asym_df_list, ignore_index=True) - asym_final_df['dbm_master_url'] = video_uri - - logger.info('Processing Output file {} '.format(os.path.join(out_loc, fl_name))) - ut.save_output(asym_final_df, out_loc, fl_name, face_asym_dir, csv_ext) - - except Exception as e: - logger.error('Failed to process video file') \ No newline at end of file diff --git a/dbm_lib/dbm_features/raw_features/video/face_au.py b/dbm_lib/dbm_features/raw_features/video/face_au.py deleted file mode 100644 index 3a3f3338..00000000 --- a/dbm_lib/dbm_features/raw_features/video/face_au.py +++ /dev/null @@ -1,98 +0,0 @@ -""" -file_name: face_au.py -project_name: DBM -created: 2020-20-07 -""" - -import os -import numpy as np -import pandas as pd -import datetime -import glob -from os.path import join -import logging - -from dbm_lib.dbm_features.raw_features.video.face_config.face_config_reader import ConfigFaceReader -from dbm_lib.dbm_features.raw_features.util import video_util as vu -from dbm_lib.dbm_features.raw_features.util import util as ut - -logging.basicConfig(level=logging.INFO) -logger=logging.getLogger() - -face_au_dir = 'facial/face_au' -csv_ext = '_facau.csv' - - -def extract_col_nm_au(cols): - """ - Extract action unit (au) column names from openface output (csv) - Args: - cols: column names from open face output (csv) - Returns: - (list) list of au column names - """ - cols_lmk = [] - au_tags = ' AU' - cols_au = [c for c in cols if au_tags in c] - return cols_au - - -def au_col_nm_map(df): - """ - Rename dataframe action unit column names to match functional specifications v1.0 - Args: - df: dataframe - Returns: - dataframe with mapped variables - """ - dict_au_cols = {} - for col in list(df): - if ' AU' in col: - idx = col.rfind('_') - if idx > -1: - au_id = col[idx-2:idx] - if '_r' in col: - dict_au_cols[col] = 'fac_AU' + au_id + 'int' - if '_c' in col: - dict_au_cols[col] = 'fac_AU' + au_id + 'pres' - df.rename(columns=dict_au_cols, inplace=True) - return df - - -def run_face_au(video_uri, out_dir, f_cfg): - """ - Processing all patient's for fetching action units - --------------- - --------------- - Args: - video_uri: video path; f_cfg: face config object - out_dir: (str) Output directory for processed output - """ - try: - - #Baseline logic - cfr = ConfigFaceReader() - input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) - - of_csv_path = glob.glob(join(out_loc, fl_name + '_openface/*.csv')) - if len(of_csv_path)>0: - - df_of = pd.read_csv(of_csv_path[0], error_bad_lines=False) - df_au = df_of[extract_col_nm_au(df_of)] - df_au = df_au.copy() - - df_au['frame'] = df_of['frame'] - df_au['face_id'] = df_of[' face_id'] - df_au['timestamp'] = df_of[' timestamp'] - df_au['confidence'] = df_of[' confidence'] - df_au['success'] = df_of[' success'] - - df_au = au_col_nm_map(df_au) - df_au['dbm_master_url'] = video_uri - - logger.info('Processing Output file {} '.format(os.path.join(out_loc, fl_name))) - ut.save_output(df_au, out_loc, fl_name, face_au_dir, csv_ext) - - except Exception as e: - logger.error('Failed to process video file') - \ No newline at end of file diff --git a/dbm_lib/dbm_features/raw_features/video/face_emotion_expressivity.py b/dbm_lib/dbm_features/raw_features/video/face_emotion_expressivity.py deleted file mode 100644 index d47966cf..00000000 --- a/dbm_lib/dbm_features/raw_features/video/face_emotion_expressivity.py +++ /dev/null @@ -1,86 +0,0 @@ -""" -file_name: process_emotion_expressivity -project_name: DBM -created: 2020-20-07 -""" - -import os -import numpy as np -import pandas as pd -import datetime -import glob -from os.path import join -import logging - -from dbm_lib.dbm_features.raw_features.video.face_config.face_config_reader import ConfigFaceReader -from dbm_lib.dbm_features.raw_features.util import video_util as vu -from dbm_lib.dbm_features.raw_features.util import util as ut - -logging.basicConfig(level=logging.INFO) -logger=logging.getLogger() - -face_expr_dir = 'facial/face_expressivity' -csv_ext = '_facemo.csv' - -#Openface feature extraction -def of_feature(df_of, cfr, f_cfg): - """ - Creating dataframe for face expressivity - Args: - of: open face attributes - Returns: - (list) list of expressivity score for emotions - """ - df_list = [] - df_of['s_confidence'] = vu.smooth(df_of[' confidence'].values, window='flat').tolist() - - if 'AU' in cfr.SELECTED_FEATURES : - vu.calc_of_for_video(df_of, cfr, f_cfg) - #Normalizing facial expressivity for Composite and Negative expr(Range 0 to 1) - - if len(df_of[f_cfg.neg_exp])>0: - df_of[f_cfg.neg_exp] = df_of[f_cfg.neg_exp]/5 - - if len(df_of[f_cfg.neg_exp_full])>0: - df_of[f_cfg.neg_exp_full] = df_of[f_cfg.neg_exp_full]/5 - - if len(df_of[f_cfg.com_exp])>0: - df_of[f_cfg.com_exp] = df_of[f_cfg.com_exp]/7 - - if len(df_of[f_cfg.com_exp_full])>0: - df_of[f_cfg.com_exp_full] = df_of[f_cfg.com_exp_full]/7 - - df_list.append(df_of) - return df_list - - -def run_face_expressivity(video_uri, out_dir, f_cfg): - """ - Processing all patient's for fetching facial landmarks - --------------- - --------------- - Args: - video_uri: video path; f_cfg: raw variable config object - out_dir: (str) Output directory for processed output - """ - try: - - #Baseline logic - cfr = ConfigFaceReader() - input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) - - of_csv_path = glob.glob(join(out_loc, fl_name + '_openface/*.csv')) - if len(of_csv_path)>0: - - df_of = pd.read_csv(of_csv_path[0], error_bad_lines=False) - df_of = df_of[cfr.AU_fl] - expr_df_list = of_feature(df_of, cfr, f_cfg) - - exp_final_df = pd.concat(expr_df_list, ignore_index=True) - exp_final_df['dbm_master_url'] = video_uri - - logger.info('Processing Output file {} '.format(os.path.join(out_loc, fl_name))) - ut.save_output(exp_final_df, out_loc, fl_name, face_expr_dir, csv_ext) - - except Exception as e: - logger.error('Failed to process video file') diff --git a/dbm_lib/dbm_features/raw_features/video/face_landmark.py b/dbm_lib/dbm_features/raw_features/video/face_landmark.py deleted file mode 100644 index cc309508..00000000 --- a/dbm_lib/dbm_features/raw_features/video/face_landmark.py +++ /dev/null @@ -1,122 +0,0 @@ -""" -file_name: face_landmark -project_name: DBM -created: 2020-20-07 -""" - -import os -import numpy as np -import pandas as pd -import datetime -import glob -from os.path import join -import logging - -from dbm_lib.dbm_features.raw_features.video.face_config.face_config_reader import ConfigFaceReader -from dbm_lib.dbm_features.raw_features.util import video_util as vu -from dbm_lib.dbm_features.raw_features.util import util as ut - -logging.basicConfig(level=logging.INFO) -logger=logging.getLogger() - -face_lmk_dir = 'facial/face_landmark' -csv_ext = '_faclmk.csv' - -def extract_col_nm_lmk(cols): - """ - Extract landmark column names from openface output (csv) - Args: - cols: column names from open face output (csv) - Returns: - (list) list of landmark column names - """ - cols_lmk = [] - lmk_tags = [' y_', ' x_', ' X_', ' Y_', ' Z_'] - for c in cols: - if any(t in c for t in lmk_tags): - cols_lmk.append(c) - return cols_lmk - - -def lmk_col_nm_map(df): - """ - Rename dataframe landmark column names to match functional specifications v1.0 - Args: - df: dataframe - """ - dict_lmk_cols = {} - for col in list(df): - idx = col.rfind('_')+1 - if idx > 0: - lmk_id = col[idx:] if len(col[idx:])>1 else '0'+col[idx:] - if ' y_' in col: - dict_lmk_cols[col] = 'fac_LMK' + lmk_id + 'r' - if ' x_' in col: - dict_lmk_cols[col] = 'fac_LMK' + lmk_id + 'c' - if ' X_' in col: - dict_lmk_cols[col] = 'fac_LMK' + lmk_id + 'X' - if ' Y_' in col: - dict_lmk_cols[col] = 'fac_LMK' + lmk_id + 'Y' - if ' Z_' in col: - dict_lmk_cols[col] = 'fac_LMK' + lmk_id + 'Z' - df.rename(columns=dict_lmk_cols, inplace=True) - return df - - -def add_disp_3D(df): - """ - Add 3D displacement for each landmark - Args: - df: landmark dataframe - """ - df = df.sort_values(by=['frame'], ascending=False) - cols_lmk = [col for col in list(df) if 'fac_LMK' in col] - df_t = df[cols_lmk] - df_diff = df_t.diff() - df_diff = df_diff.pow(2) - - tot_lmk = 68 # 68 landmark model - for i in range(tot_lmk): - lmk_id = '{:02d}'.format(i) - df['fac_LMK'+lmk_id+'disp'] = df_diff[['fac_LMK'+lmk_id+'X', 'fac_LMK'+lmk_id+'Y', 'fac_LMK'+lmk_id+'Z']].sum(axis=1).apply(np.sqrt) - - return df - - -def run_face_landmark(video_uri, out_dir, f_cfg): - """ - Processing all patient's for fetching facial landmarks - --------------- - --------------- - Args: - video_uri: video path; f_cfg: raw variable config object - out_dir: (str) Output directory for processed output - """ - try: - - #Baseline logic - cfr = ConfigFaceReader() - input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) - - of_csv_path = glob.glob(join(out_loc, fl_name + '_openface/*.csv')) - if len(of_csv_path)>0: - - df_of = pd.read_csv(of_csv_path[0], error_bad_lines=False) - df_lmk = df_of[extract_col_nm_lmk(df_of)] - df_lmk = df_lmk.copy() - - df_lmk['frame'] = df_of['frame'] - df_lmk['face_id'] = df_of[' face_id'] - df_lmk['timestamp'] = df_of[' timestamp'] - df_lmk['confidence'] = df_of[' confidence'] - df_lmk['success'] = df_of[' success'] - - df_lmk = lmk_col_nm_map(df_lmk) - df_lmk = add_disp_3D(df_lmk) - df_lmk['dbm_master_url'] = video_uri - - logger.info('Processing Output file {} '.format(join(out_loc, fl_name))) - ut.save_output(df_lmk, out_loc, fl_name, face_lmk_dir, csv_ext) - - except Exception as e: - logger.error('Failed to process video file') \ No newline at end of file diff --git a/opendbm/__init__.py b/opendbm/__init__.py new file mode 100644 index 00000000..c8528dad --- /dev/null +++ b/opendbm/__init__.py @@ -0,0 +1,4 @@ +from opendbm.api_lib.facial_activity import FacialActivity +from opendbm.api_lib.movement import Movement +from opendbm.api_lib.speech import Speech +from opendbm.api_lib.verbal_acoustics import VerbalAcoustics diff --git a/opendbm/api_lib/__init__.py b/opendbm/api_lib/__init__.py new file mode 100644 index 00000000..a7d49822 --- /dev/null +++ b/opendbm/api_lib/__init__.py @@ -0,0 +1 @@ +from .model import DEEEPSPEECH_URL, DEEPSPEECH_MODELS, OPENDBM_DATA, AudioModel diff --git a/opendbm/api_lib/facial_activity/__init__.py b/opendbm/api_lib/facial_activity/__init__.py new file mode 100644 index 00000000..a20716c8 --- /dev/null +++ b/opendbm/api_lib/facial_activity/__init__.py @@ -0,0 +1 @@ +from opendbm.api_lib.facial_activity.api import FacialActivity diff --git a/opendbm/api_lib/facial_activity/_action_unit.py b/opendbm/api_lib/facial_activity/_action_unit.py new file mode 100644 index 00000000..35b75b09 --- /dev/null +++ b/opendbm/api_lib/facial_activity/_action_unit.py @@ -0,0 +1,49 @@ +import tempfile + +from opendbm.api_lib.model import VideoModel +from opendbm.dbm_lib.dbm_features.raw_features.video.face_au import run_face_au + + +class ActionUnit(VideoModel): + def __init__(self): + super().__init__() + self._params = [ + "fac_AU01int", + "fac_AU02int", + "fac_AU04int", + "fac_AU05int", + "fac_AU06int", + "fac_AU07int", + "fac_AU09int", + "fac_AU10int", + "fac_AU12int", + "fac_AU14int", + "fac_AU15int", + "fac_AU17int", + "fac_AU20int", + "fac_AU23int", + "fac_AU25int", + "fac_AU26int", + "fac_AU45int", + "fac_AU01pres", + "fac_AU02pres", + "fac_AU04pres", + "fac_AU05pres", + "fac_AU06pres", + "fac_AU07pres", + "fac_AU09pres", + "fac_AU10pres", + "fac_AU12pres", + "fac_AU14pres", + "fac_AU15pres", + "fac_AU17pres", + "fac_AU20pres", + "fac_AU23pres", + "fac_AU25pres", + "fac_AU26pres", + "fac_AU28pres", + "fac_AU45pres", + ] + + def _fit_transform(self, path): + return run_face_au(path, f"{tempfile.gettempdir()}/", self.r_config, save=False) diff --git a/opendbm/api_lib/facial_activity/_asymmetry.py b/opendbm/api_lib/facial_activity/_asymmetry.py new file mode 100644 index 00000000..04ae78e8 --- /dev/null +++ b/opendbm/api_lib/facial_activity/_asymmetry.py @@ -0,0 +1,20 @@ +import tempfile + +from opendbm.api_lib.model import VideoModel +from opendbm.dbm_lib import run_face_asymmetry + + +class Asymmetry(VideoModel): + def __init__(self): + super().__init__() + self._params = [ + "fac_asymmaskmouth", + "fac_asymmaskeye", + "fac_asymmaskeyebrow", + "fac_asymmaskcom", + ] + + def _fit_transform(self, path): + return run_face_asymmetry( + path, f"{tempfile.gettempdir()}/", self.r_config, save=False + ) diff --git a/opendbm/api_lib/facial_activity/_expressivity.py b/opendbm/api_lib/facial_activity/_expressivity.py new file mode 100644 index 00000000..f69da6e2 --- /dev/null +++ b/opendbm/api_lib/facial_activity/_expressivity.py @@ -0,0 +1,89 @@ +import tempfile + +from opendbm.api_lib.model import VideoModel +from opendbm.dbm_lib.dbm_features.raw_features.video import face_emotion_expressivity + + +class Expressivity(VideoModel): + def __init__(self): + super().__init__() + self._params = [ + " AU01_r", + " AU02_r", + " AU04_r", + " AU05_r", + " AU06_r", + " AU07_r", + " AU09_r", + " AU10_r", + " AU12_r", + " AU14_r", + " AU15_r", + " AU17_r", + " AU20_r", + " AU25_r", + " AU26_r", + " AU45_r", + " AU01_c", + " AU02_c", + " AU04_c", + " AU05_c", + " AU06_c", + " AU07_c", + " AU10_c", + " AU12_c", + " AU14_c", + " AU15_c", + " AU17_c", + " AU20_c", + " AU23_c", + " AU25_c", + " AU26_c", + " AU28_c", + " AU45_c", + " AU09_c", + " AU23_r", + "s_confidence", + "fac_hapintsoft", + "fac_sadintsoft", + "fac_surintsoft", + "fac_feaintsoft", + "fac_angintsoft", + "fac_disintsoft", + "fac_conintsoft", + "fac_paiintsoft", + "fac_negintsoft", + "fac_posintsoft", + "neu_exp", + "fac_comlowintsoft", + "fac_comuppintsoft", + "cai_exp", + "fac_comintsoft", + "fac_happres", + "fac_sadpres", + "fac_surpres", + "fac_feapres", + "fac_angpres", + "fac_dispres", + "fac_conpres", + "fac_hapinthard", + "fac_sadinthard", + "fac_surinthard", + "fac_feainthard", + "fac_anginthard", + "fac_disinthard", + "fac_coninthard", + "fac_paiinthard", + "fac_neginthard", + "fac_posinthard", + "neu_exp_full", + "cai_exp_full", + "fac_comlowinthard", + "fac_comuppinthard", + "fac_cominthard", + ] + + def _fit_transform(self, path): + return face_emotion_expressivity.run_face_expressivity( + path, f"{tempfile.gettempdir()}/", self.r_config, save=False + ) diff --git a/opendbm/api_lib/facial_activity/_landmark.py b/opendbm/api_lib/facial_activity/_landmark.py new file mode 100644 index 00000000..9ed9f538 --- /dev/null +++ b/opendbm/api_lib/facial_activity/_landmark.py @@ -0,0 +1,24 @@ +import tempfile + +from opendbm.api_lib.model import VideoModel +from opendbm.dbm_lib import run_face_landmark + + +def r_num_fmt(fmt, rnum): + return list(map(lambda x: fmt.format(i="%02d" % x), rnum)) + + +lcols = [] +for vr in ["r", "c", "X", "Y", "Z"]: + lcols += r_num_fmt(f"fac_LMK{{i}}{vr}", range(68)) + + +class Landmark(VideoModel): + def __init__(self): + super().__init__() + self._params = lcols + + def _fit_transform(self, path): + return run_face_landmark( + path, f"{tempfile.gettempdir()}/", self.r_config, save=False + ) diff --git a/opendbm/api_lib/facial_activity/api.py b/opendbm/api_lib/facial_activity/api.py new file mode 100644 index 00000000..83a06753 --- /dev/null +++ b/opendbm/api_lib/facial_activity/api.py @@ -0,0 +1,84 @@ +import shutil +import tempfile +from collections import OrderedDict + +from opendbm.api_lib.model import VideoModel +from opendbm.api_lib.util import check_isfile + +from ._action_unit import ActionUnit +from ._asymmetry import Asymmetry +from ._expressivity import Expressivity +from ._landmark import Landmark + + +class FacialActivity(VideoModel): + def __init__(self): + super().__init__() + + self._landmark = Landmark() + self._action_unit = ActionUnit() + self._asymmetry = Asymmetry() + self._expressivity = Expressivity() + + self._models = OrderedDict( + { + "landmark": self._landmark, + "action_unit": self._action_unit, + "asymmetry": self._asymmetry, + "expressivity": self._expressivity, + } + ) + + def fit(self, path): + """Fit a file in filepath to OpenFace Model. Make sure to set the Docker to be active first. + For installation, see https://aicure.github.io/open_dbm/docs/openface-docker-installation + + Parameters + ---------- + path : string, + File Path of MP4/MOV file. + """ + + check_isfile(path) + result_path, bn = super()._fit(path, "facial") + + for k, v in self._models.items(): + v._df = v._fit_transform(result_path) + + shutil.rmtree(f"{tempfile.gettempdir()}/{bn}/") + + def get_landmark(self): + """ + Get the model object of Landmark + Returns: + self: object + Model Object + """ + return self._landmark + + def get_action_unit(self): + """ + Get the model object of Action Unit + Returns: + self: object + Model Object + """ + return self._action_unit + + def get_asymmetry(self): + """ + Get the model object of Facial Asymmetry + Returns: + self: object + Model Object + """ + return self._asymmetry + + def get_expressivity(self): + """ + Get the model object of Facial Expressivity + Returns: + self: object + Model Object + """ + return self._expressivity diff --git a/opendbm/api_lib/model.py b/opendbm/api_lib/model.py new file mode 100644 index 00000000..7ce54505 --- /dev/null +++ b/opendbm/api_lib/model.py @@ -0,0 +1,205 @@ +import os +import platform +import subprocess +import tempfile +from pathlib import Path + +from opendbm.api_lib.util import docker_command_dec, wsllize +from opendbm.dbm_lib import config_derive_feature, config_raw_feature, config_reader + +OPENFACE_PATH_VIDEO = "pkg/open_dbm/OpenFace/build/bin/FaceLandmarkVid" +OPENFACE_PATH = "pkg/open_dbm/OpenFace/build/bin/FeatureExtraction" +DEEEPSPEECH_URL = "https://github.com/mozilla/DeepSpeech/releases/download/v0.9.1" +DEEPSPEECH_MODELS = ["deepspeech-0.9.1-models.pbmm", "deepspeech-0.9.1-models.scorer"] +MODEL_PATH = os.path.dirname(__file__) +OPENDBM_DATA = Path.home() / ".opendbm" +DLIB_SHAPE_MODEL = os.path.abspath( + os.path.join( + MODEL_PATH, "../pkg/shape_detector/shape_predictor_68_face_landmarks.dat" + ) +) +FACIAL_ACTIVITY_ARGS = [ + "-q", + "-2Dfp", + "-3Dfp", + "-pdmparams", + "-pose", + "-aus", + "-gaze", + "-f", +] + + +class Model(object): + def __init__(self): + self.s_config = config_reader.ConfigReader() + self.r_config = config_raw_feature.ConfigRawReader() + self.d_config = config_derive_feature.ConfigDeriveReader() + self._df = None + self._params = [] + + def to_dataframe(self): + """ + Convert the result of the processed data into dataframe. + Returns: + pandas dataframe + """ + if self._df is None: + raise Exception("Model has not been fit yet") + else: + return self._df + + def mean(self): + """ + get mean/average of data + Returns: + pandas.Series + """ + return self._df[self._params].mean() + + def std(self): + """ + get std of data + Returns: + pandas.Series + """ + return self._df[self._params].std() + + +class VideoModel(Model): + """ + A class to process the data of facial and Movement. + """ + + def __init__(self): + super().__init__() + + @docker_command_dec + def _fit(self, path, dbm_group): + """ + A function where the model is processing the data. + The model lived in the docker image, + where the full path of the model is stated in a variable named openface_call + Args: + path: input path of the file + dbm_group: self-explanatory. This function only accept dbm_group of facial and movement. + + Returns: + output path of the processed file by the model. + """ + docker_temp_dir = "/app/tmp/" + wsl_cmd, temp_dir = wsllize((tempfile.gettempdir())) + filename = os.path.basename(path) + bn, _ = os.path.splitext(filename) + + facial_args = " ".join(FACIAL_ACTIVITY_ARGS) + docker_call = wsl_cmd + ["docker", "exec", "dbm_container", "/bin/bash", "-c"] + + openface_call = [ + docker_call + + [f"{OPENFACE_PATH} {facial_args} {path} -out_dir {docker_temp_dir}"], + docker_call + + [ + f"{OPENFACE_PATH_VIDEO} {facial_args} {path} -out_dir {docker_temp_dir}" + ], + ] + + out_dir_openface = [ + f"{temp_dir}/{bn}/{bn}_openface/", + f"{temp_dir}/{bn}_landmark_output/{bn}_landmark_output_openface_lmk/", + ] + result_path = [ + docker_temp_dir + bn + ".csv", + docker_temp_dir + bn + "_landmark_output.csv", + ] + + if dbm_group == "facial": + openface_csv = self._processing_video( + dbm_group, + openface_call[0], + out_dir_openface[0], + result_path[0], + wsl_cmd, + temp_dir, + bn, + ) + + return openface_csv, bn + else: + + openface_csv = self._processing_video( + "facial", + openface_call[0], + out_dir_openface[0], + result_path[0], + wsl_cmd, + temp_dir, + bn, + ) + openface_lmk_csv = self._processing_video( + "movement", + openface_call[1], + out_dir_openface[1], + result_path[1], + wsl_cmd, + temp_dir, + bn, + ) + + return openface_csv, openface_lmk_csv, bn + + def _processing_video( + self, dbm_group, call, out_dir, result_path, wsl_cmd, temp_dir, bn + ): + """ + Helper function for _fit method + """ + + subprocess.Popen( + call, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + stdin=subprocess.PIPE, + ).wait() + mkdir_cmd = wsl_cmd + ["mkdir", "-p", out_dir] + + copy_cmd = wsl_cmd + ["docker", "cp", f"dbm_container:/{result_path}", out_dir] + subprocess.Popen( + mkdir_cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + stdin=subprocess.PIPE, + ).wait() + subprocess.Popen( + copy_cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + stdin=subprocess.PIPE, + ).wait() + + if platform.system() == "Windows": + path_in_temp = out_dir[len(temp_dir) :] + out_dir = (tempfile.gettempdir()) + path_in_temp + + if dbm_group == "facial": + return out_dir + bn + ".csv" + else: + return out_dir + bn + "_landmark_output.csv" + + +class AudioModel(Model): + """ + A class to process the data of speech and acoustic + """ + + def __init__(self): + super().__init__() + + def prep_func(func): + def wrapper(self, *args, **kwargs): + path = args[0] + + df = func(self, path, **kwargs) + return df + + return wrapper diff --git a/opendbm/api_lib/movement/__init__.py b/opendbm/api_lib/movement/__init__.py new file mode 100644 index 00000000..4fb13e1e --- /dev/null +++ b/opendbm/api_lib/movement/__init__.py @@ -0,0 +1 @@ +from opendbm.api_lib.movement.api import Movement diff --git a/opendbm/api_lib/movement/_eye_blink.py b/opendbm/api_lib/movement/_eye_blink.py new file mode 100644 index 00000000..fd9d9458 --- /dev/null +++ b/opendbm/api_lib/movement/_eye_blink.py @@ -0,0 +1,11 @@ +from opendbm.api_lib.model import DLIB_SHAPE_MODEL, VideoModel +from opendbm.dbm_lib.dbm_features.raw_features.movement.eye_blink import run_eye_blink + + +class EyeBlink(VideoModel): + def __init__(self): + super().__init__() + self._params = ["mov_blink_ear", "mov_blinkframes", "mov_blinkdur", "fps"] + + def _fit_transform(self, path): + return run_eye_blink(path, ".", self.r_config, DLIB_SHAPE_MODEL, save=False) diff --git a/opendbm/api_lib/movement/_eye_gaze.py b/opendbm/api_lib/movement/_eye_gaze.py new file mode 100644 index 00000000..770da45d --- /dev/null +++ b/opendbm/api_lib/movement/_eye_gaze.py @@ -0,0 +1,24 @@ +import tempfile + +from opendbm.api_lib.model import VideoModel +from opendbm.dbm_lib.dbm_features.raw_features.movement.eye_gaze import run_eye_gaze + + +class EyeGaze(VideoModel): + def __init__(self): + super().__init__() + self._params = [ + "mov_lefteyex", + "mov_lefteyey", + "mov_lefteyez", + "mov_righteyex", + "mov_righteyey", + "mov_righteyez", + "mov_leyedisp", + "mov_reyedisp", + ] + + def _fit_transform(self, path): + return run_eye_gaze( + path, f"{tempfile.gettempdir()}/", self.r_config, save=False + ) diff --git a/opendbm/api_lib/movement/_facial_tremor.py b/opendbm/api_lib/movement/_facial_tremor.py new file mode 100644 index 00000000..2582e984 --- /dev/null +++ b/opendbm/api_lib/movement/_facial_tremor.py @@ -0,0 +1,52 @@ +import tempfile + +from opendbm.api_lib.model import VideoModel +from opendbm.dbm_lib import fac_tremor_process + + +class FacialTremor(VideoModel): + def __init__(self): + super().__init__() + self._params = [ + "fac_features_mean_5", + "fac_tremor_median_5", + "fac_disp_median_5", + "fac_corr_5", + "fac_features_mean_12", + "fac_tremor_median_12", + "fac_disp_median_12", + "fac_corr_12", + "fac_features_mean_8", + "fac_tremor_median_8", + "fac_disp_median_8", + "fac_corr_8", + "fac_features_mean_48", + "fac_tremor_median_48", + "fac_disp_median_48", + "fac_corr_48", + "fac_features_mean_54", + "fac_tremor_median_54", + "fac_disp_median_54", + "fac_corr_54", + "fac_features_mean_28", + "fac_tremor_median_28", + "fac_disp_median_28", + "fac_corr_28", + "fac_features_mean_51", + "fac_tremor_median_51", + "fac_disp_median_51", + "fac_corr_51", + "fac_features_mean_66", + "fac_tremor_median_66", + "fac_disp_median_66", + "fac_corr_66", + "fac_features_mean_57", + "fac_tremor_median_57", + "fac_disp_median_57", + "fac_corr_57", + ] + + def _fit_transform(self, path): + return fac_tremor_process( + path, f"{tempfile.gettempdir()}/", self.r_config, save=False + ) diff --git a/opendbm/api_lib/movement/_head_movement.py b/opendbm/api_lib/movement/_head_movement.py new file mode 100644 index 00000000..25875db3 --- /dev/null +++ b/opendbm/api_lib/movement/_head_movement.py @@ -0,0 +1,19 @@ +import tempfile + +from opendbm.api_lib.model import VideoModel +from opendbm.dbm_lib import run_head_movement + + +class HeadMovement(VideoModel): + def __init__(self): + super().__init__() + self._params = [ + "mov_headvel", + "mov_hposepitch", + "mov_hposeyaw", + "mov_hposeroll", + "mov_hposedist", + ] + + def _fit_transform(self, path): + return run_head_movement(path, f"{tempfile.gettempdir()}/", self.r_config) diff --git a/opendbm/api_lib/movement/_vocal_tremor.py b/opendbm/api_lib/movement/_vocal_tremor.py new file mode 100644 index 00000000..e99f0c26 --- /dev/null +++ b/opendbm/api_lib/movement/_vocal_tremor.py @@ -0,0 +1,18 @@ +from opendbm.api_lib.model import VideoModel +from opendbm.dbm_lib.dbm_features.raw_features.movement.voice_tremor import run_vtremor + + +class VocalTremor(VideoModel): + def __init__(self): + super().__init__() + self._params = [ + "mov_freqtremfreq", + "mov_freqtremindex", + "mov_freqtrempindex", + "mov_amptremfreq", + "mov_amptremindex", + "mov_amptrempindex", + ] + + def _fit_transform(self, path): + return run_vtremor(path, ".", self.r_config, save=False) diff --git a/opendbm/api_lib/movement/api.py b/opendbm/api_lib/movement/api.py new file mode 100644 index 00000000..b1045807 --- /dev/null +++ b/opendbm/api_lib/movement/api.py @@ -0,0 +1,105 @@ +import os +import shutil +import tempfile +from collections import OrderedDict + +from opendbm.api_lib.model import VideoModel +from opendbm.api_lib.util import check_isfile +from opendbm.dbm_lib.controller import process_feature as pf + +from ._eye_blink import EyeBlink +from ._eye_gaze import EyeGaze +from ._facial_tremor import FacialTremor +from ._head_movement import HeadMovement +from ._vocal_tremor import VocalTremor + + +class Movement(VideoModel): + def __init__(self): + super().__init__() + self._eye_blink = EyeBlink() + self._eye_gaze = EyeGaze() + self._facial_tremor = FacialTremor() + self._head_movement = HeadMovement() + self._vocal_tremor = VocalTremor() + + self._models = OrderedDict( + { + "eye_blink": self._eye_blink, + "eye_gaze": self._eye_gaze, + "facial_tremor": self._facial_tremor, + "head_movement": self._head_movement, + "vocal_tremor": self._vocal_tremor, + } + ) + + def fit(self, path): + """Fit a file in filepath to OpenFace Model. Make sure to set the Docker to be active first. + For installation, see https://aicure.github.io/open_dbm/docs/openface-docker-installation + + Parameters + ---------- + path : string, + File Path of MP4/MOV file. + + """ + check_isfile(path) + result_path, result_path_lmk, bn = super()._fit(path, "movement") + wav_path = pf.audio_to_wav(path, tmp=True) + + for k, v in self._models.items(): + if k in ["eye_gaze", "head_movement"]: + v._df = v._fit_transform(result_path) + elif k == "facial_tremor": + v._df = v._fit_transform(result_path_lmk) + elif k == "vocal_tremor": + v._df = v._fit_transform(wav_path) + else: + v._df = v._fit_transform(path) + shutil.rmtree(f"{tempfile.gettempdir()}/{bn}/") + shutil.rmtree(f"{tempfile.gettempdir()}/{bn}_landmark_output/") + os.remove(wav_path) + + def get_eye_blink(self): + """ + Get the model object of Eye Blink + Returns: + self: object + """ + return self._eye_blink + + def get_eye_gaze(self): + """ + Get the model object of Eye Gaze + Returns: + self: object + Model Object + """ + return self._eye_gaze + + def get_facial_tremor(self): + """ + Get the model object of Facial Tremor + Returns: + self: object + Model Object + """ + return self._facial_tremor + + def get_head_movement(self): + """ + Get the model object of Head Movement + Returns: + self: object + Model Object + """ + return self._head_movement + + def get_vocal_tremor(self): + """ + Get the model object of Vocal Tremor + Returns: + self: object + Model Object + """ + return self._vocal_tremor diff --git a/opendbm/api_lib/speech/__init__.py b/opendbm/api_lib/speech/__init__.py new file mode 100644 index 00000000..d3803da2 --- /dev/null +++ b/opendbm/api_lib/speech/__init__.py @@ -0,0 +1 @@ +from opendbm.api_lib.speech.api import Speech diff --git a/opendbm/api_lib/speech/_speech_features.py b/opendbm/api_lib/speech/_speech_features.py new file mode 100644 index 00000000..6e21357b --- /dev/null +++ b/opendbm/api_lib/speech/_speech_features.py @@ -0,0 +1,34 @@ +import tempfile + +from opendbm.api_lib.model import OPENDBM_DATA, AudioModel +from opendbm.dbm_lib import run_speech_feature + + +class SpeechFeature(AudioModel): + def __init__(self): + super().__init__() + self._params = [ + "nlp_numSentences", + "nlp_singPronPerAns", + "nlp_singPronPerSen", + "nlp_pastTensePerAns", + "nlp_pastTensePerSen", + "nlp_pronounsPerAns", + "nlp_pronounsPerSen", + "nlp_verbsPerAns", + "nlp_verbsPerSen", + "nlp_adjectivesPerAns", + "nlp_adjectivesPerSen", + "nlp_nounsPerAns", + "nlp_nounsPerSen", + "nlp_sentiment_mean", + "nlp_mattr", + "nlp_wordsPerMin", + "nlp_totalTime", + ] + + @AudioModel.prep_func + def _fit_transform(self, path): + return run_speech_feature( + path, f"{tempfile.gettempdir()}/", self.r_config, OPENDBM_DATA, save=False + ) diff --git a/opendbm/api_lib/speech/_transcribe.py b/opendbm/api_lib/speech/_transcribe.py new file mode 100644 index 00000000..ba201d58 --- /dev/null +++ b/opendbm/api_lib/speech/_transcribe.py @@ -0,0 +1,16 @@ +import tempfile + +from opendbm.api_lib.model import OPENDBM_DATA, AudioModel +from opendbm.dbm_lib.dbm_features.raw_features.nlp.transcribe import run_transcribe + + +class Transcribe(AudioModel): + def __init__(self): + super().__init__() + self._params = ["nlp_transcribe", "nlp_totalTime"] + + @AudioModel.prep_func + def _fit_transform(self, path): + return run_transcribe( + path, f"{tempfile.gettempdir()}/", self.r_config, OPENDBM_DATA, save=False + ) diff --git a/opendbm/api_lib/speech/api.py b/opendbm/api_lib/speech/api.py new file mode 100644 index 00000000..d4dd1dc7 --- /dev/null +++ b/opendbm/api_lib/speech/api.py @@ -0,0 +1,73 @@ +import logging +import os + +from opendbm.api_lib import DEEEPSPEECH_URL, DEEPSPEECH_MODELS, OPENDBM_DATA, AudioModel +from opendbm.api_lib.util import check_file, check_isfile, download_url + +from ._speech_features import SpeechFeature +from ._transcribe import Transcribe + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger() + + +class Speech(AudioModel): + def __init__(self): + super().__init__() + self._transcribe = Transcribe() + self._speech_features = SpeechFeature() + self._models = { + "transcribe": self._transcribe, + "speech_features": self._speech_features, + } + + def fit(self, path): + """Fit a file in filepath to Deepspeech Model. + + Parameters + ---------- + path : string, + File Path of Video/Sound file format. + """ + check_isfile(path) + self._check_model_exists() + path, is_wav = check_file(path) + for v in self._models.values(): + v._df = v._fit_transform(path) + if not is_wav: + os.remove(path) + + @staticmethod + def _check_model_exists(): + """ + Check if deepspeech model is exists. if not, download to OPENDBM Directory. + """ + if not OPENDBM_DATA.exists(): + os.mkdir(OPENDBM_DATA) + for dm in DEEPSPEECH_MODELS: + pt = OPENDBM_DATA / dm + if not pt.exists(): + logger.info( + f"{dm} not exists. Automatically downloading to {OPENDBM_DATA}/" + ) + download_url(f"{DEEEPSPEECH_URL}/{dm}", pt) + else: + continue + + def get_transcribe(self): + """ + Get the model object of Transcribe + Returns: + self: object + Model Object + """ + return self._transcribe + + def get_speech_features(self): + """ + Get the model object of Speech Features + Returns: + self: object + Model Object + """ + return self._speech_features diff --git a/opendbm/api_lib/util.py b/opendbm/api_lib/util.py new file mode 100644 index 00000000..bf40b771 --- /dev/null +++ b/opendbm/api_lib/util.py @@ -0,0 +1,196 @@ +# import urllib, os +import logging +import os +import platform +import subprocess +import tempfile +import urllib.request as ur + +from tqdm import tqdm + +from opendbm.dbm_lib.controller import process_feature as pf + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger() + + +# urllib = getattr(urllib, 'request', urllib) + + +class TqdmUpTo(tqdm): + """Provides `update_to(n)` which uses `tqdm.update(delta_n)`.""" + + def update_to(self, b=1, bsize=1, tsize=None): + """ + b : int, optional + Number of blocks transferred so far [default: 1]. + bsize : int, optional + Size of each block (in tqdm units) [default: 1]. + tsize : int, optional + Total size (in tqdm units). If [default: None] remains unchanged. + """ + if tsize is not None: + self.total = tsize + return self.update(b * bsize - self.n) # also sets self.n = b * bsize + + +def download_url(url, local_path): + """ + Function to download url and drop it to the local path + """ + with TqdmUpTo( + unit="B", + unit_scale=True, + unit_divisor=1024, + miniters=1, + desc=url.split("/")[-1], + ) as t: # all optional kwargs + ur.urlretrieve(url, filename=local_path, reporthook=t.update_to, data=None) + t.total = t.n + + +def wsllize(path): + """ + Add WSL prefix if the platform using windows. + This function will also convert input path to wsl structure based on given path. + Args: + path: path of the input data + + Returns: + wsl prefix + """ + if platform.system() == "Windows": + wsl_cmd = ["wsl"] + path = subprocess.check_output(["wsl", "wslpath", repr(path)]).decode("utf-8") + if path.endswith("\n"): + path = path[:-1] + return wsl_cmd, path + else: + return [], path + + +def check_isfile(path): + if not os.path.isfile(path): + raise FileNotFoundError("File not found. Make sure specify the correct path") + + +def check_file(path): + """ + Check if file is in wav format. if not, convert to wav. + Args: + path: Input path + + Returns: + path: output path of the new wav file + bool: returns True if file is wav format + """ + return ( + (pf.audio_to_wav(path, tmp=True), False) + if not path.endswith(".wav") + else (path, True) + ) + + +def check_docker_model_exist(wsl_cmd, model_name): + """ + check if docker model is present or not. + + Args: + wsl_cmd: wsl prefix is platform is Windows + model_name: self-explanatory + + """ + + try: + check_docker_model_exist = subprocess.check_output( + wsl_cmd + ["docker", "image", "ls"] + ).decode("utf-8") + if model_name not in check_docker_model_exist: + raise FileNotFoundError( + f""" + {model_name} model not found. Make sure to + download the model first. For further instruction about download, + please see our web documentation. + """ + ) + except subprocess.CalledProcessError: + raise EnvironmentError("Make sure to set the Docker to be active") + + +def docker_command_dec(fn): + """ + Decorator to execute model in Docker environment. + Starting the container and exit state is handled here + Args: + fn: any fn that need to access docker + + Returns: + decorated fn + """ + import os + + def inner(*args, **kwargs): + wsl_cmd, path = wsllize((args[1])) + + check_docker_model_exist(wsl_cmd, "dbm-openface") + + create_docker = wsl_cmd + [ + "docker", + "create", + "-ti", + "--name", + "dbm_container", + "dbm-openface", + "bash", + ] + + copy_file_to_docker = wsl_cmd + ["docker", "cp", path, "dbm_container:/app/"] + start_container = wsl_cmd + ["docker", "start", "dbm_container"] + terminate_container = wsl_cmd + ["docker", "stop", "dbm_container"] + remove_container = wsl_cmd + ["docker", "rm", "dbm_container"] + + subprocess.Popen( + create_docker, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + stdin=subprocess.PIPE, + ).wait() + subprocess.Popen( + copy_file_to_docker, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + stdin=subprocess.PIPE, + ).wait() + subprocess.Popen( + start_container, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + stdin=subprocess.PIPE, + ).wait() + + try: + + args = args[0], "/app/" + os.path.basename(args[1]), args[2] + + result = fn(*args, **kwargs) + + return result + except Exception as e: + + logger.info(f"Failed: {e}") + + finally: + subprocess.Popen( + terminate_container, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + stdin=subprocess.PIPE, + ).wait() + subprocess.Popen( + remove_container, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + stdin=subprocess.PIPE, + ).wait() + + return inner diff --git a/opendbm/api_lib/verbal_acoustics/__init__.py b/opendbm/api_lib/verbal_acoustics/__init__.py new file mode 100644 index 00000000..a6383641 --- /dev/null +++ b/opendbm/api_lib/verbal_acoustics/__init__.py @@ -0,0 +1 @@ +from opendbm.api_lib.verbal_acoustics.api import VerbalAcoustics diff --git a/opendbm/api_lib/verbal_acoustics/_audio_intensity.py b/opendbm/api_lib/verbal_acoustics/_audio_intensity.py new file mode 100644 index 00000000..15d1a731 --- /dev/null +++ b/opendbm/api_lib/verbal_acoustics/_audio_intensity.py @@ -0,0 +1,12 @@ +from opendbm.api_lib.model import AudioModel +from opendbm.dbm_lib import run_intensity + + +class AudioIntensity(AudioModel): + def __init__(self): + super().__init__() + self._params = ["aco_int"] + + @AudioModel.prep_func + def _fit_transform(self, path, **kwargs): + return run_intensity(path, ".", self.r_config, save=False) diff --git a/opendbm/api_lib/verbal_acoustics/_formant_frequency.py b/opendbm/api_lib/verbal_acoustics/_formant_frequency.py new file mode 100644 index 00000000..348ffc0e --- /dev/null +++ b/opendbm/api_lib/verbal_acoustics/_formant_frequency.py @@ -0,0 +1,14 @@ +import pandas as pd + +from opendbm.api_lib.model import AudioModel +from opendbm.dbm_lib import run_formant + + +class FormantFrequency(AudioModel): + def __init__(self): + super().__init__() + self._params = ["aco_fm1", "aco_fm2", "aco_fm3", "aco_fm4"] + + @AudioModel.prep_func + def _fit_transform(self, path, **kwargs): + return run_formant(path, ".", self.r_config, save=False) diff --git a/opendbm/api_lib/verbal_acoustics/_glottal_noise.py b/opendbm/api_lib/verbal_acoustics/_glottal_noise.py new file mode 100644 index 00000000..6d0256c6 --- /dev/null +++ b/opendbm/api_lib/verbal_acoustics/_glottal_noise.py @@ -0,0 +1,14 @@ +import pandas as pd + +from opendbm.api_lib.model import AudioModel +from opendbm.dbm_lib.dbm_features.raw_features.audio.gne import run_gne + + +class GlottalNoiseRatio(AudioModel): + def __init__(self): + super().__init__() + self._params = ["aco_gne"] + + @AudioModel.prep_func + def _fit_transform(self, path, **kwargs): + return run_gne(path, ".", self.r_config, save=False, ff_df=kwargs["ff_df"]) diff --git a/opendbm/api_lib/verbal_acoustics/_harmonic_noise.py b/opendbm/api_lib/verbal_acoustics/_harmonic_noise.py new file mode 100644 index 00000000..82280564 --- /dev/null +++ b/opendbm/api_lib/verbal_acoustics/_harmonic_noise.py @@ -0,0 +1,14 @@ +import pandas as pd + +from opendbm.api_lib.model import AudioModel +from opendbm.dbm_lib.dbm_features.raw_features.audio.hnr import run_hnr + + +class HarmonicsNoiseRatio(AudioModel): + def __init__(self): + super().__init__() + self._params = ["aco_hnr"] + + @AudioModel.prep_func + def _fit_transform(self, path, **kwargs): + return run_hnr(path, ".", self.r_config, save=False) diff --git a/opendbm/api_lib/verbal_acoustics/_jitter.py b/opendbm/api_lib/verbal_acoustics/_jitter.py new file mode 100644 index 00000000..46ff8856 --- /dev/null +++ b/opendbm/api_lib/verbal_acoustics/_jitter.py @@ -0,0 +1,14 @@ +import pandas as pd + +from opendbm.api_lib.model import AudioModel +from opendbm.dbm_lib.dbm_features.raw_features.audio.jitter import run_jitter + + +class Jitter(AudioModel): + def __init__(self): + super().__init__() + self._params = ["aco_jitter"] + + @AudioModel.prep_func + def _fit_transform(self, path, **kwargs): + return run_jitter(path, ".", self.r_config, save=False, ff_df=kwargs["ff_df"]) diff --git a/opendbm/api_lib/verbal_acoustics/_mfcc.py b/opendbm/api_lib/verbal_acoustics/_mfcc.py new file mode 100644 index 00000000..31caa022 --- /dev/null +++ b/opendbm/api_lib/verbal_acoustics/_mfcc.py @@ -0,0 +1,12 @@ +from opendbm.api_lib.model import AudioModel +from opendbm.dbm_lib import run_mfcc + + +class MFCC(AudioModel): + def __init__(self): + super().__init__() + self._params = ["aco_mfcc" + str(i) for i in range(1, 13)] + + @AudioModel.prep_func + def _fit_transform(self, path, **kwargs): + return run_mfcc(path, ".", self.r_config, save=False) diff --git a/opendbm/api_lib/verbal_acoustics/_pause_characteristics.py b/opendbm/api_lib/verbal_acoustics/_pause_characteristics.py new file mode 100644 index 00000000..4bb29438 --- /dev/null +++ b/opendbm/api_lib/verbal_acoustics/_pause_characteristics.py @@ -0,0 +1,20 @@ +import pandas as pd + +from opendbm.api_lib.model import AudioModel +from opendbm.dbm_lib import run_pause_segment + + +class PauseCharacteristics(AudioModel): + def __init__(self): + super().__init__() + self._params = [ + "aco_totaltime", + "aco_speakingtime", + "aco_numpauses", + "aco_pausetime", + "aco_pausefrac", + ] + + @AudioModel.prep_func + def _fit_transform(self, path, **kwargs): + return run_pause_segment(path, ".", self.r_config, save=False) diff --git a/opendbm/api_lib/verbal_acoustics/_pitch_frequency.py b/opendbm/api_lib/verbal_acoustics/_pitch_frequency.py new file mode 100644 index 00000000..cda595b5 --- /dev/null +++ b/opendbm/api_lib/verbal_acoustics/_pitch_frequency.py @@ -0,0 +1,14 @@ +import pandas as pd + +from opendbm.api_lib.model import AudioModel +from opendbm.dbm_lib.dbm_features.raw_features.audio.pitch_freq import run_pitch + + +class PitchFrequency(AudioModel): + def __init__(self): + super().__init__() + self._params = ["aco_ff"] + + @AudioModel.prep_func + def _fit_transform(self, path, **kwargs): + return run_pitch(path, ".", self.r_config, save=False) diff --git a/opendbm/api_lib/verbal_acoustics/_shimmer.py b/opendbm/api_lib/verbal_acoustics/_shimmer.py new file mode 100644 index 00000000..b1da3723 --- /dev/null +++ b/opendbm/api_lib/verbal_acoustics/_shimmer.py @@ -0,0 +1,14 @@ +import pandas as pd + +from opendbm.api_lib.model import AudioModel +from opendbm.dbm_lib.dbm_features.raw_features.audio.shimmer import run_shimmer + + +class Shimmer(AudioModel): + def __init__(self): + super().__init__() + self._params = ["aco_shimmer"] + + @AudioModel.prep_func + def _fit_transform(self, path, **kwargs): + return run_shimmer(path, ".", self.r_config, save=False, ff_df=kwargs["ff_df"]) diff --git a/opendbm/api_lib/verbal_acoustics/_voice_prevalence.py b/opendbm/api_lib/verbal_acoustics/_voice_prevalence.py new file mode 100644 index 00000000..2094ff38 --- /dev/null +++ b/opendbm/api_lib/verbal_acoustics/_voice_prevalence.py @@ -0,0 +1,14 @@ +import pandas as pd + +from opendbm.api_lib.model import AudioModel +from opendbm.dbm_lib.dbm_features.raw_features.audio.voice_frame_score import run_vfs + + +class VoicePrevalence(AudioModel): + def __init__(self): + super().__init__() + self._params = ["aco_voiceframe", "aco_totvoiceframe", "aco_voicepct"] + + @AudioModel.prep_func + def _fit_transform(self, path, **kwargs): + return run_vfs(path, ".", self.r_config, save=False) diff --git a/opendbm/api_lib/verbal_acoustics/api.py b/opendbm/api_lib/verbal_acoustics/api.py new file mode 100644 index 00000000..a9d345ab --- /dev/null +++ b/opendbm/api_lib/verbal_acoustics/api.py @@ -0,0 +1,154 @@ +import os +from collections import OrderedDict + +from opendbm.api_lib.model import AudioModel +from opendbm.api_lib.util import check_file, check_isfile +from opendbm.dbm_lib.controller import process_feature as pf + +from ._audio_intensity import AudioIntensity +from ._formant_frequency import FormantFrequency +from ._glottal_noise import GlottalNoiseRatio +from ._harmonic_noise import HarmonicsNoiseRatio +from ._jitter import Jitter +from ._mfcc import MFCC +from ._pause_characteristics import PauseCharacteristics +from ._pitch_frequency import PitchFrequency +from ._shimmer import Shimmer +from ._voice_prevalence import VoicePrevalence + + +class VerbalAcoustics(AudioModel): + def __init__(self): + super().__init__() + self._auint = AudioIntensity() + self._pitchfreq = PitchFrequency() + self._forfreq = FormantFrequency() + self._hnr = HarmonicsNoiseRatio() + self._gne = GlottalNoiseRatio() + self._jitter = Jitter() + self._shimmer = Shimmer() + self._pchar = PauseCharacteristics() + self._vopre = VoicePrevalence() + self._mfcc = MFCC() + self._models = OrderedDict( + { + "audio_intensity": self._auint, + "pitch_frequency": self._pitchfreq, + "formant_frequency": self._forfreq, + "harmonic_noise": self._hnr, + "glottal_noise": self._gne, + "jitter": self._jitter, + "shimmer": self._shimmer, + "pause_characteristics": self._pchar, + "voice_prevalence": self._vopre, + "mfcc": self._mfcc, + } + ) + + def fit(self, path): + """Fit a file in filepath to parselmouth Model. + + Parameters + ---------- + path : string, + File Path of Video/Sound file format. + """ + check_isfile(path) + path, is_wav = check_file(path) + for k, v in self._models.items(): + if k in ["glottal_noise", "jitter", "shimmer"]: + v._df = v._fit_transform(path, ff_df=self._pitchfreq._df) + else: + v._df = v._fit_transform(path) + if not is_wav: + os.remove(path) + + def get_audio_intensity(self): + """ + Get the model object of Audio Intensity + Returns: + self: object + Model Object + """ + return self._auint + + def get_pitch_frequency(self): + """ + Get the model object of Pitch Frequency + Returns: + self: object + Model Object + """ + return self._pitchfreq + + def get_formant_frequency(self): + """ + Get the model object of Formant Frequency + Returns: + self: object + Model Object + """ + return self._forfreq + + def get_harmonic_noise(self): + """ + Get the model object of Harmonic Noise + Returns: + self: object + Model Object + """ + return self._hnr + + def get_glottal_noise(self): + """ + Get the model object of Glottal Noise + Returns: + self: object + Model Object + """ + return self._gne + + def get_jitter(self): + """ + Get the model object of Jitter + Returns: + self: object + Model Object + """ + return self._jitter + + def get_shimmer(self): + """ + Get the model object of Shimmer + Returns: + self: object + Model Object + """ + return self._shimmer + + def get_pause_characteristics(self): + """ + Get the model object of Pause Characteristics + Returns: + self: object + Model Object + """ + return self._pchar + + def get_voice_prevalence(self): + """ + Get the model object of Vocal Prevalence + Returns: + self: object + Model Object + """ + return self._vopre + + def get_mfcc(self): + """ + Get the model object of MFCC + Returns: + self: object + Model Object + """ + return self._mfcc diff --git a/opendbm/dbm_lib/__init__.py b/opendbm/dbm_lib/__init__.py new file mode 100644 index 00000000..aee1920c --- /dev/null +++ b/opendbm/dbm_lib/__init__.py @@ -0,0 +1,19 @@ +""" +file_name: init +project_name: DBM +created: 2020-20-07 +""" + +from __future__ import absolute_import, division, print_function + +from .config import config_derive_feature, config_raw_feature, config_reader +from .dbm_features.raw_features.audio.formant_freq import run_formant +from .dbm_features.raw_features.audio.intensity import run_intensity +from .dbm_features.raw_features.audio.mfcc import run_mfcc +from .dbm_features.raw_features.audio.pause_segment import run_pause_segment +from .dbm_features.raw_features.movement.facial_tremor import fac_tremor_process +from .dbm_features.raw_features.movement.head_motion import run_head_movement +from .dbm_features.raw_features.nlp.speech_features import run_speech_feature +from .dbm_features.raw_features.video import ConfigFaceReader +from .dbm_features.raw_features.video.face_asymmetry import run_face_asymmetry +from .dbm_features.raw_features.video.face_landmark import run_face_landmark diff --git a/opendbm/dbm_lib/config/__init__.py b/opendbm/dbm_lib/config/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/dbm_lib/config/config_derive_feature.py b/opendbm/dbm_lib/config/config_derive_feature.py similarity index 59% rename from dbm_lib/config/config_derive_feature.py rename to opendbm/dbm_lib/config/config_derive_feature.py index b448df74..5562bf41 100644 --- a/dbm_lib/config/config_derive_feature.py +++ b/opendbm/dbm_lib/config/config_derive_feature.py @@ -4,26 +4,32 @@ project_name: DBM created: 2020-20-07 """ +import os + import yaml -from dbm_lib import DBMLIB_DERIVE_FEATURE_CONFIG + +DBMLIB_PATH = os.path.dirname(__file__) +DBMLIB_DERIVE_FEATURE_CONFIG = os.path.abspath( + os.path.join(DBMLIB_PATH, "../../resources/features/derived_feature.yml") +) + class ConfigDeriveReader(object): """Summary Read sevice end ponit """ - def __init__(self, - feature_config_yml=None): + + def __init__(self, feature_config_yml=None): """Summary Args: feature_config_yml (None, optional): yml file defined service configuration """ - + if feature_config_yml is None: feature_config = DBMLIB_DERIVE_FEATURE_CONFIG else: feature_config = feature_config_yml - with open(feature_config, 'r') as ymlfile: - config = yaml.load(ymlfile) + with open(feature_config, "r") as ymlfile: + config = yaml.load(ymlfile, Loader=yaml.CLoader) self.base_derive = config - \ No newline at end of file diff --git a/opendbm/dbm_lib/config/config_raw_feature.py b/opendbm/dbm_lib/config/config_raw_feature.py new file mode 100644 index 00000000..147d7010 --- /dev/null +++ b/opendbm/dbm_lib/config/config_raw_feature.py @@ -0,0 +1,283 @@ +""" +file_name: config_raw_feature +project_name: DBM +created: 2020-20-07 +""" + +import os + +import yaml + +DBMLIB_PATH = os.path.dirname(__file__) +DBMLIB_FEATURE_CONFIG = os.path.abspath( + os.path.join(DBMLIB_PATH, "../../resources/features/raw_feature.yml") +) + + +class ConfigRawReader(object): + """Summary + Read sevice end ponit + """ + + def __init__(self, feature_config_yml=None): + """Summary + Args: + feature_config_yml (None, optional): yml file defined service configuration + """ + + if feature_config_yml is None: + feature_config = DBMLIB_FEATURE_CONFIG + else: + feature_config = feature_config_yml + + with open(feature_config, "r") as ymlfile: + config = yaml.load(ymlfile, Loader=yaml.CLoader) + + # Verbal features + self.base_raw = config + self.err_reason = config["raw_feature"]["error_reason"] + + # Output range + self.mov_headvel_start = config["raw_feature"]["mov_headvel_start"] + self.mov_headvel_end = config["raw_feature"]["mov_headvel_end"] + + # Acoustic variable + self.aco_int = config["raw_feature"]["aco_int"] + self.aco_ff = config["raw_feature"]["aco_ff"] + self.aco_voiceLabel = config["raw_feature"]["aco_voiceLabel"] + self.aco_hnr = config["raw_feature"]["aco_hnr"] + self.aco_gne = config["raw_feature"]["aco_gne"] + self.aco_fm1 = config["raw_feature"]["aco_fm1"] + self.aco_fm2 = config["raw_feature"]["aco_fm2"] + self.aco_fm3 = config["raw_feature"]["aco_fm3"] + self.aco_fm4 = config["raw_feature"]["aco_fm4"] + self.aco_jitter = config["raw_feature"]["aco_jitter"] + self.aco_shimmer = config["raw_feature"]["aco_shimmer"] + self.aco_mfcc1 = config["raw_feature"]["aco_mfcc1"] + self.aco_mfcc2 = config["raw_feature"]["aco_mfcc2"] + self.aco_mfcc3 = config["raw_feature"]["aco_mfcc3"] + self.aco_mfcc4 = config["raw_feature"]["aco_mfcc4"] + self.aco_mfcc5 = config["raw_feature"]["aco_mfcc5"] + self.aco_mfcc6 = config["raw_feature"]["aco_mfcc6"] + self.aco_mfcc7 = config["raw_feature"]["aco_mfcc7"] + self.aco_mfcc8 = config["raw_feature"]["aco_mfcc8"] + self.aco_mfcc9 = config["raw_feature"]["aco_mfcc9"] + self.aco_mfcc10 = config["raw_feature"]["aco_mfcc10"] + self.aco_mfcc11 = config["raw_feature"]["aco_mfcc11"] + self.aco_mfcc12 = config["raw_feature"]["aco_mfcc12"] + self.aco_voiceFrame = config["raw_feature"]["aco_voiceFrame"] + self.aco_totVoiceFrame = config["raw_feature"]["aco_totVoiceFrame"] + self.aco_voicePct = config["raw_feature"]["aco_voicePct"] + self.aco_pausetime = config["raw_feature"]["aco_pausetime"] + self.aco_totaltime = config["raw_feature"]["aco_totaltime"] + self.aco_speakingtime = config["raw_feature"]["aco_speakingtime"] + self.aco_numpauses = config["raw_feature"]["aco_numpauses"] + self.aco_pausefrac = config["raw_feature"]["aco_pausefrac"] + + # Facial Action Unit (for consistency) + self.fac_AU01int = config["raw_feature"]["fac_AU01int"] + self.fac_AU02int = config["raw_feature"]["fac_AU02int"] + self.fac_AU04int = config["raw_feature"]["fac_AU04int"] + self.fac_AU05int = config["raw_feature"]["fac_AU05int"] + self.fac_AU06int = config["raw_feature"]["fac_AU06int"] + self.fac_AU07int = config["raw_feature"]["fac_AU07int"] + self.fac_AU09int = config["raw_feature"]["fac_AU09int"] + self.fac_AU10int = config["raw_feature"]["fac_AU10int"] + self.fac_AU12int = config["raw_feature"]["fac_AU12int"] + self.fac_AU14int = config["raw_feature"]["fac_AU14int"] + self.fac_AU15int = config["raw_feature"]["fac_AU15int"] + self.fac_AU17int = config["raw_feature"]["fac_AU17int"] + self.fac_AU20int = config["raw_feature"]["fac_AU20int"] + self.fac_AU23int = config["raw_feature"]["fac_AU23int"] + self.fac_AU25int = config["raw_feature"]["fac_AU25int"] + self.fac_AU26int = config["raw_feature"]["fac_AU26int"] + self.fac_AU45int = config["raw_feature"]["fac_AU45int"] + self.fac_AU01pres = config["raw_feature"]["fac_AU01pres"] + self.fac_AU02pres = config["raw_feature"]["fac_AU02pres"] + self.fac_AU04pres = config["raw_feature"]["fac_AU04pres"] + self.fac_AU05pres = config["raw_feature"]["fac_AU05pres"] + self.fac_AU06pres = config["raw_feature"]["fac_AU06pres"] + self.fac_AU07pres = config["raw_feature"]["fac_AU07pres"] + self.fac_AU09pres = config["raw_feature"]["fac_AU09pres"] + self.fac_AU10pres = config["raw_feature"]["fac_AU10pres"] + self.fac_AU12pres = config["raw_feature"]["fac_AU12pres"] + self.fac_AU14pres = config["raw_feature"]["fac_AU14pres"] + self.fac_AU15pres = config["raw_feature"]["fac_AU15pres"] + self.fac_AU17pres = config["raw_feature"]["fac_AU17pres"] + self.fac_AU20pres = config["raw_feature"]["fac_AU20pres"] + self.fac_AU23pres = config["raw_feature"]["fac_AU23pres"] + self.fac_AU25pres = config["raw_feature"]["fac_AU25pres"] + self.fac_AU26pres = config["raw_feature"]["fac_AU26pres"] + self.fac_AU28pres = config["raw_feature"]["fac_AU28pres"] + self.fac_AU45pres = config["raw_feature"]["fac_AU45pres"] + + # Facial Landmarks (for consistency) + self.fac_LMK00disp = config["raw_feature"]["fac_LMK00disp"] + self.fac_LMK01disp = config["raw_feature"]["fac_LMK01disp"] + self.fac_LMK02disp = config["raw_feature"]["fac_LMK02disp"] + self.fac_LMK03disp = config["raw_feature"]["fac_LMK03disp"] + self.fac_LMK04disp = config["raw_feature"]["fac_LMK04disp"] + self.fac_LMK05disp = config["raw_feature"]["fac_LMK05disp"] + self.fac_LMK06disp = config["raw_feature"]["fac_LMK06disp"] + self.fac_LMK07disp = config["raw_feature"]["fac_LMK07disp"] + self.fac_LMK08disp = config["raw_feature"]["fac_LMK08disp"] + self.fac_LMK09disp = config["raw_feature"]["fac_LMK09disp"] + self.fac_LMK10disp = config["raw_feature"]["fac_LMK10disp"] + self.fac_LMK11disp = config["raw_feature"]["fac_LMK11disp"] + self.fac_LMK12disp = config["raw_feature"]["fac_LMK12disp"] + self.fac_LMK13disp = config["raw_feature"]["fac_LMK13disp"] + self.fac_LMK14disp = config["raw_feature"]["fac_LMK14disp"] + self.fac_LMK15disp = config["raw_feature"]["fac_LMK15disp"] + self.fac_LMK16disp = config["raw_feature"]["fac_LMK16disp"] + self.fac_LMK17disp = config["raw_feature"]["fac_LMK17disp"] + self.fac_LMK18disp = config["raw_feature"]["fac_LMK18disp"] + self.fac_LMK19disp = config["raw_feature"]["fac_LMK19disp"] + self.fac_LMK20disp = config["raw_feature"]["fac_LMK20disp"] + self.fac_LMK21disp = config["raw_feature"]["fac_LMK21disp"] + self.fac_LMK22disp = config["raw_feature"]["fac_LMK22disp"] + self.fac_LMK23disp = config["raw_feature"]["fac_LMK23disp"] + self.fac_LMK24disp = config["raw_feature"]["fac_LMK24disp"] + self.fac_LMK25disp = config["raw_feature"]["fac_LMK25disp"] + self.fac_LMK26disp = config["raw_feature"]["fac_LMK26disp"] + self.fac_LMK27disp = config["raw_feature"]["fac_LMK27disp"] + self.fac_LMK28disp = config["raw_feature"]["fac_LMK28disp"] + self.fac_LMK29disp = config["raw_feature"]["fac_LMK29disp"] + self.fac_LMK30disp = config["raw_feature"]["fac_LMK30disp"] + self.fac_LMK31disp = config["raw_feature"]["fac_LMK31disp"] + self.fac_LMK32disp = config["raw_feature"]["fac_LMK32disp"] + self.fac_LMK33disp = config["raw_feature"]["fac_LMK33disp"] + self.fac_LMK34disp = config["raw_feature"]["fac_LMK34disp"] + self.fac_LMK35disp = config["raw_feature"]["fac_LMK35disp"] + self.fac_LMK36disp = config["raw_feature"]["fac_LMK36disp"] + self.fac_LMK37disp = config["raw_feature"]["fac_LMK37disp"] + self.fac_LMK38disp = config["raw_feature"]["fac_LMK38disp"] + self.fac_LMK39disp = config["raw_feature"]["fac_LMK39disp"] + self.fac_LMK40disp = config["raw_feature"]["fac_LMK40disp"] + self.fac_LMK41disp = config["raw_feature"]["fac_LMK41disp"] + self.fac_LMK42disp = config["raw_feature"]["fac_LMK42disp"] + self.fac_LMK43disp = config["raw_feature"]["fac_LMK43disp"] + self.fac_LMK44disp = config["raw_feature"]["fac_LMK44disp"] + self.fac_LMK45disp = config["raw_feature"]["fac_LMK45disp"] + self.fac_LMK46disp = config["raw_feature"]["fac_LMK46disp"] + self.fac_LMK47disp = config["raw_feature"]["fac_LMK47disp"] + self.fac_LMK48disp = config["raw_feature"]["fac_LMK48disp"] + self.fac_LMK49disp = config["raw_feature"]["fac_LMK49disp"] + self.fac_LMK50disp = config["raw_feature"]["fac_LMK50disp"] + self.fac_LMK51disp = config["raw_feature"]["fac_LMK51disp"] + self.fac_LMK52disp = config["raw_feature"]["fac_LMK52disp"] + self.fac_LMK53disp = config["raw_feature"]["fac_LMK53disp"] + self.fac_LMK54disp = config["raw_feature"]["fac_LMK54disp"] + self.fac_LMK55disp = config["raw_feature"]["fac_LMK55disp"] + self.fac_LMK56disp = config["raw_feature"]["fac_LMK56disp"] + self.fac_LMK57disp = config["raw_feature"]["fac_LMK57disp"] + self.fac_LMK58disp = config["raw_feature"]["fac_LMK58disp"] + self.fac_LMK59disp = config["raw_feature"]["fac_LMK59disp"] + self.fac_LMK60disp = config["raw_feature"]["fac_LMK60disp"] + self.fac_LMK61disp = config["raw_feature"]["fac_LMK61disp"] + self.fac_LMK62disp = config["raw_feature"]["fac_LMK62disp"] + self.fac_LMK63disp = config["raw_feature"]["fac_LMK63disp"] + self.fac_LMK64disp = config["raw_feature"]["fac_LMK64disp"] + self.fac_LMK65disp = config["raw_feature"]["fac_LMK65disp"] + self.fac_LMK66disp = config["raw_feature"]["fac_LMK66disp"] + self.fac_LMK67disp = config["raw_feature"]["fac_LMK67disp"] + + # Facial features + self.hap_exp = config["raw_feature"]["hap_exp"] + self.sad_exp = config["raw_feature"]["sad_exp"] + self.sur_exp = config["raw_feature"]["sur_exp"] + self.fea_exp = config["raw_feature"]["fea_exp"] + self.ang_exp = config["raw_feature"]["ang_exp"] + self.dis_exp = config["raw_feature"]["dis_exp"] + self.con_exp = config["raw_feature"]["con_exp"] + self.happ_occ = config["raw_feature"]["happ_occ"] + self.sad_occ = config["raw_feature"]["sad_occ"] + self.sur_occ = config["raw_feature"]["sur_occ"] + self.fea_occ = config["raw_feature"]["fea_occ"] + self.ang_occ = config["raw_feature"]["ang_occ"] + self.dis_occ = config["raw_feature"]["dis_occ"] + self.con_occ = config["raw_feature"]["con_occ"] + self.pos_exp = config["raw_feature"]["pos_exp"] + self.neg_exp = config["raw_feature"]["neg_exp"] + self.neu_exp = config["raw_feature"]["neu_exp"] + self.cai_exp = config["raw_feature"]["cai_exp"] + self.com_exp = config["raw_feature"]["com_exp"] + self.com_lower_exp = config["raw_feature"]["com_lower_exp"] + self.com_upper_exp = config["raw_feature"]["com_upper_exp"] + self.pai_exp = config["raw_feature"]["pai_exp"] + self.hap_exp_full = config["raw_feature"]["hap_exp_full"] + self.sad_exp_full = config["raw_feature"]["sad_exp_full"] + self.sur_exp_full = config["raw_feature"]["sur_exp_full"] + self.fea_exp_full = config["raw_feature"]["fea_exp_full"] + self.ang_exp_full = config["raw_feature"]["ang_exp_full"] + self.dis_exp_full = config["raw_feature"]["dis_exp_full"] + self.con_exp_full = config["raw_feature"]["con_exp_full"] + self.pos_exp_full = config["raw_feature"]["pos_exp_full"] + self.neg_exp_full = config["raw_feature"]["neg_exp_full"] + self.neu_exp_full = config["raw_feature"]["neu_exp_full"] + self.cai_exp_full = config["raw_feature"]["cai_exp_full"] + self.com_exp_full = config["raw_feature"]["com_exp_full"] + self.com_lower_exp_full = config["raw_feature"]["com_lower_exp_full"] + self.com_upper_exp_full = config["raw_feature"]["com_upper_exp_full"] + self.pai_exp_full = config["raw_feature"]["pai_exp_full"] + self.fac_AsymMaskMouth = config["raw_feature"]["fac_AsymMaskMouth"] + self.fac_AsymMaskEye = config["raw_feature"]["fac_AsymMaskEye"] + self.fac_AsymMaskEyebrow = config["raw_feature"]["fac_AsymMaskEyebrow"] + self.fac_AsymMaskCom = config["raw_feature"]["fac_AsymMaskCom"] + + # Movement features + self.head_vel = config["raw_feature"]["head_vel"] + self.mov_blink_ear = config["raw_feature"]["mov_blink_ear"] + self.vid_dur = config["raw_feature"]["vid_dur"] + self.fps = config["raw_feature"]["fps"] + self.mov_blinkframes = config["raw_feature"]["mov_blinkframes"] + self.mov_blinkdur = config["raw_feature"]["mov_blinkdur"] + self.mov_Hpose_Pitch = config["raw_feature"]["mov_Hpose_Pitch"] + self.mov_Hpose_Yaw = config["raw_feature"]["mov_Hpose_Yaw"] + self.mov_Hpose_Roll = config["raw_feature"]["mov_Hpose_Roll"] + self.mov_Hpose_Dist = config["raw_feature"]["mov_Hpose_Dist"] + + self.mov_freq_trem_freq = config["raw_feature"]["mov_freq_trem_freq"] + self.mov_freq_trem_index = config["raw_feature"]["mov_freq_trem_index"] + self.mov_freq_trem_pindex = config["raw_feature"]["mov_freq_trem_pindex"] + self.mov_amp_trem_freq = config["raw_feature"]["mov_amp_trem_freq"] + self.mov_amp_trem_index = config["raw_feature"]["mov_amp_trem_index"] + self.mov_amp_trem_pindex = config["raw_feature"]["mov_amp_trem_pindex"] + + self.fac_tremor_median_5 = config["raw_feature"]["fac_tremor_median_5"] + self.fac_tremor_median_12 = config["raw_feature"]["fac_tremor_median_12"] + self.fac_tremor_median_8 = config["raw_feature"]["fac_tremor_median_8"] + self.fac_tremor_median_48 = config["raw_feature"]["fac_tremor_median_48"] + self.fac_tremor_median_54 = config["raw_feature"]["fac_tremor_median_54"] + self.fac_tremor_median_28 = config["raw_feature"]["fac_tremor_median_28"] + self.fac_tremor_median_51 = config["raw_feature"]["fac_tremor_median_51"] + self.fac_tremor_median_66 = config["raw_feature"]["fac_tremor_median_66"] + self.fac_tremor_median_57 = config["raw_feature"]["fac_tremor_median_57"] + + self.mov_leye_x = config["raw_feature"]["mov_leye_x"] + self.mov_leye_y = config["raw_feature"]["mov_leye_y"] + self.mov_leye_z = config["raw_feature"]["mov_leye_z"] + self.mov_reye_x = config["raw_feature"]["mov_reye_x"] + self.mov_reye_y = config["raw_feature"]["mov_reye_y"] + self.mov_reye_z = config["raw_feature"]["mov_reye_z"] + self.mov_eleft_disp = config["raw_feature"]["mov_eleft_disp"] + self.mov_eright_disp = config["raw_feature"]["mov_eright_disp"] + + # NLP features + self.nlp_transcribe = config["raw_feature"]["nlp_transcribe"] + self.nlp_numSentences = config["raw_feature"]["nlp_numSentences"] + self.nlp_singPronPerAns = config["raw_feature"]["nlp_singPronPerAns"] + self.nlp_singPronPerSen = config["raw_feature"]["nlp_singPronPerSen"] + self.nlp_pastTensePerAns = config["raw_feature"]["nlp_pastTensePerAns"] + self.nlp_pastTensePerSen = config["raw_feature"]["nlp_pastTensePerSen"] + self.nlp_pronounsPerAns = config["raw_feature"]["nlp_pronounsPerAns"] + self.nlp_pronounsPerSen = config["raw_feature"]["nlp_pronounsPerSen"] + self.nlp_verbsPerAns = config["raw_feature"]["nlp_verbsPerAns"] + self.nlp_verbsPerSen = config["raw_feature"]["nlp_verbsPerSen"] + self.nlp_adjectivesPerAns = config["raw_feature"]["nlp_adjectivesPerAns"] + self.nlp_adjectivesPerSen = config["raw_feature"]["nlp_adjectivesPerSen"] + self.nlp_nounsPerAns = config["raw_feature"]["nlp_nounsPerAns"] + self.nlp_nounsPerSen = config["raw_feature"]["nlp_nounsPerSen"] + self.nlp_sentiment_mean = config["raw_feature"]["nlp_sentiment_mean"] + self.nlp_mattr = config["raw_feature"]["nlp_mattr"] + self.nlp_wordsPerMin = config["raw_feature"]["nlp_wordsPerMin"] + self.nlp_totalTime = config["raw_feature"]["nlp_totalTime"] diff --git a/dbm_lib/config/config_reader.py b/opendbm/dbm_lib/config/config_reader.py similarity index 57% rename from dbm_lib/config/config_reader.py rename to opendbm/dbm_lib/config/config_reader.py index be23309b..923454ba 100644 --- a/dbm_lib/config/config_reader.py +++ b/opendbm/dbm_lib/config/config_reader.py @@ -4,15 +4,22 @@ project_name: DBM created: 2020-20-07 """ +import os + import yaml -from dbm_lib import DBMLIB_SERVICE_CONFIG + +DBMLIB_PATH = os.path.dirname(__file__) +DBMLIB_SERVICE_CONFIG = os.path.abspath( + os.path.join(DBMLIB_PATH, "../../resources/services/services.yml") +) + class ConfigReader(object): """Summary Read sevice end ponit """ - def __init__(self, - service_config_yml=None): + + def __init__(self, service_config_yml=None): """Summary Args: service_config_yml (None, optional): yml file defined service configuration @@ -21,47 +28,47 @@ class ConfigReader(object): service_config = DBMLIB_SERVICE_CONFIG else: service_config = service_config_yml - - with open(service_config, 'r') as ymlfile: - config = yaml.load(ymlfile) - self.input_dir = config['cdx_configuration']['input_dir'] - self.output_dir = config['cdx_configuration']['output_dir'] - self.out_derived_dir = config['cdx_configuration']['out_derived_dir'] - self.of_path = config['cdx_configuration']['open_face_path'] - self.facial_landmarks = config['cdx_configuration']['facial_landmarks'] - self.feature_group = config['cdx_configuration']['feature_group'] - + + with open(service_config, "r") as ymlfile: + config = yaml.load(ymlfile, Loader=yaml.CLoader) + self.input_dir = config["cdx_configuration"]["input_dir"] + self.output_dir = config["cdx_configuration"]["output_dir"] + self.out_derived_dir = config["cdx_configuration"]["out_derived_dir"] + self.of_path = config["cdx_configuration"]["open_face_path"] + self.facial_landmarks = config["cdx_configuration"]["facial_landmarks"] + self.feature_group = config["cdx_configuration"]["feature_group"] + def get_open_face_path(self): """Summary Returns: TYPE: end point """ return self.of_path - + def get_input_dir(self): """Summary Returns: TYPE: end point """ return self.input_dir - + def get_output_dir(self): """Summary Returns: TYPE: end point """ return self.output_dir - + def get_out_derived_dir(self): """Summary Returns: TYPE: end point """ return self.out_derived_dir - + def get_fac_landmark_path(self): """Summary Returns: TYPE: end point """ - return self.facial_landmarks \ No newline at end of file + return self.facial_landmarks diff --git a/opendbm/dbm_lib/controller/__init__.py b/opendbm/dbm_lib/controller/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/opendbm/dbm_lib/controller/process_feature.py b/opendbm/dbm_lib/controller/process_feature.py new file mode 100644 index 00000000..e5f450af --- /dev/null +++ b/opendbm/dbm_lib/controller/process_feature.py @@ -0,0 +1,187 @@ +""" +file_name: process_features +project_name: DBM +created: 2020-20-07 +""" + +import glob +import logging +import os +import subprocess +import tempfile +from os.path import basename, dirname, isfile, join, splitext + +from opendbm.dbm_lib.dbm_features.raw_features import audio, movement, nlp, video + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger() + + +def audio_to_wav(input_filepath, tmp=False): + """Extracts a video's audio file and saves it to wav + Args: + input_filepath: (str) + Returns: + """ + try: + + fname, _ = splitext(input_filepath) + if tmp: + fname = os.path.basename(input_filepath) + output_filepath = f"{tempfile.gettempdir()}/{fname}.wav" + else: + output_filepath = fname + ".wav" + + if not isfile(output_filepath): + call = [ + "ffmpeg", + "-i", + input_filepath, + "-vn", + "-acodec", + "pcm_s16le", + "-ar", + "44100", + output_filepath, + ] + + logger.info("Converting audio from {} to wav".format(input_filepath)) + subprocess.Popen( + call, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + stdin=subprocess.PIPE, + ).wait() + # subprocess.check_output(call) + logger.info("wav output saved in {}".format(output_filepath)) + else: + logger.info("Output file {} already exists".format(output_filepath)) + return output_filepath + + except Exception as e: + logger.error("Failed to extract audio from Video", e) + + +def process_acoustic(video_uri, out_dir, dbm_group, r_config): + """ + processing acoustic features + Args: + video_uri: video path; out_dir: raw variable output dir + dbm_group: list of features group to process; r_config: raw feature config object + """ + if dbm_group is not None and len(dbm_group) > 0 and "acoustic" not in dbm_group: + return + + logger.info("Processing acoustic variables from data in {}".format(video_uri)) + logger.info("processing audio intensity....") + audio.intensity.run_intensity(video_uri, out_dir, r_config) + + logger.info("processing audio pitch freq....") + audio.pitch_freq.run_pitch(video_uri, out_dir, r_config) + + logger.info("processing HNR....") + audio.hnr.run_hnr(video_uri, out_dir, r_config) + + logger.info("processing GNE....") + audio.gne.run_gne(video_uri, out_dir, r_config) + + logger.info("processing voice frame score....") + audio.voice_frame_score.run_vfs(video_uri, out_dir, r_config) + + logger.info("processing formant frequency....") + audio.formant_freq.run_formant(video_uri, out_dir, r_config) + + logger.info("processing pause segment....") + audio.pause_segment.run_pause_segment(video_uri, out_dir, r_config) + + logger.info("processing jitter....") + audio.jitter.run_jitter(video_uri, out_dir, r_config) + + logger.info("processing shimmer....") + audio.shimmer.run_shimmer(video_uri, out_dir, r_config) + + logger.info("processing mfcc....") + audio.mfcc.run_mfcc(video_uri, out_dir, r_config) + + +def process_facial(video_uri, out_dir, dbm_group, r_config): + """ + processing facial features + Args: + video_uri: video path; out_dir: raw variable output dir + dbm_group: list of features to process; r_config: raw feature config object + """ + if dbm_group is not None and len(dbm_group) > 0 and "facial" not in dbm_group: + return + + logger.info("Processing facial variables from data in {}".format(video_uri)) + logger.info("processing facial asymmetry....") + video.face_asymmetry.run_face_asymmetry(video_uri, out_dir, r_config) + + logger.info("processing facial Action Unit....") + video.face_au.run_face_au(video_uri, out_dir, r_config) + + logger.info("processing facial expressivity....") + video.face_emotion_expressivity.run_face_expressivity(video_uri, out_dir, r_config) + + logger.info("processing facial landmark....") + video.face_landmark.run_face_landmark(video_uri, out_dir, r_config) + + +def process_movement(video_uri, out_dir, dbm_group, r_config, dlib_model): + """ + processing facial features + Args: + video_uri: video path; out_dir: raw variable output dir + dbm_group: list of features to process; r_config: raw feature config object + dlib_model: shape predictor model path + """ + if dbm_group is not None and len(dbm_group) > 0 and "movement" not in dbm_group: + return + + logger.info("Processing movement variables from data in {}".format(video_uri)) + + logger.info("processing head movement....") + movement.head_motion.run_head_movement(video_uri, out_dir, r_config) + + logger.info("processing eye blink....") + movement.eye_blink.run_eye_blink(video_uri, out_dir, r_config, dlib_model) + + logger.info("processing eye gaze....") + movement.eye_gaze.run_eye_gaze(video_uri, out_dir, r_config) + + logger.info("processing voice tremor....") + movement.voice_tremor.run_vtremor(video_uri, out_dir, r_config) + + logger.info("processing facial tremor....") + movement.facial_tremor.fac_tremor_process( + video_uri, out_dir, r_config, model_output=True + ) + + +def process_nlp(video_uri, out_dir, dbm_group, tran_tog, r_config, deep_path): + """ + processing nlp features + Args: + video_uri: video path; out_dir: raw variable output dir + dbm_group: list of features to process; r_config: raw feature config object + deep_path: deep speech build path + """ + if dbm_group is not None and len(dbm_group) > 0 and "speech" not in dbm_group: + return + + logger.info("Processing nlp variables from data in {}".format(video_uri)) + nlp.transcribe.run_transcribe(video_uri, out_dir, r_config, deep_path) + nlp.speech_features.run_speech_feature(video_uri, out_dir, r_config, tran_tog) + + +def remove_file(file_path, file_ext=".wav"): + """ + removing wav file + """ + file_dir = dirname(file_path) + file_name, _ = splitext(basename(file_path)) + wav_file = glob.glob(join(file_dir, file_name + file_ext)) + + if len(wav_file) > 0: + os.remove(wav_file[0]) diff --git a/opendbm/dbm_lib/dbm_features/__init__.py b/opendbm/dbm_lib/dbm_features/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/opendbm/dbm_lib/dbm_features/derived_features/__init__.py b/opendbm/dbm_lib/dbm_features/derived_features/__init__.py new file mode 100644 index 00000000..5b391f0f --- /dev/null +++ b/opendbm/dbm_lib/dbm_features/derived_features/__init__.py @@ -0,0 +1,7 @@ +""" +file_name: init +project_name: DBM +created: 2020-20-07 +""" + +from __future__ import absolute_import, division, print_function diff --git a/dbm_lib/dbm_features/derived_features/derive.py b/opendbm/dbm_lib/dbm_features/derived_features/derive.py similarity index 55% rename from dbm_lib/dbm_features/derived_features/derive.py rename to opendbm/dbm_lib/dbm_features/derived_features/derive.py index 55f699c1..597532ba 100644 --- a/dbm_lib/dbm_features/derived_features/derive.py +++ b/opendbm/dbm_lib/dbm_features/derived_features/derive.py @@ -4,15 +4,17 @@ project_name: DBM created: 2020-20-07 """ -import pandas as pd -import numpy as np import glob -import os import logging +import os from datetime import datetime +import numpy as np +import pandas as pd + logging.basicConfig(level=logging.INFO) -logger=logging.getLogger() +logger = logging.getLogger() + def dict_to_df(feature_dict, file): """ @@ -21,92 +23,106 @@ def dict_to_df(feature_dict, file): final_dict = {k: v for d in feature_dict for k, v in d.items()} feature_df = pd.DataFrame([final_dict]) - feature_df['Filename'] = file - + feature_df["Filename"] = file + return feature_df + def save_derive_output(df_list, out_loc): """ Saving derive variable output """ try: - if len(df_list)>0: + if len(df_list) > 0: df = df_list[0] - - file_name = os.path.join(out_loc, 'derived_output.csv') + + file_name = os.path.join(out_loc, "derived_output.csv") if not os.path.exists(out_loc): - + os.makedirs(out_loc) df.to_csv(file_name, index=False) - + except Exception as e: - logger.error('Failed to save derived variable csv') + e + logger.error("Failed to save derived variable csv") + def feature_output(df_fea, exp_var, cal_type): """ - Computing mean value of dataframe columns + Computing mean value of dataframe columns """ exp_val = np.nan try: - + df_ = df_fea[exp_var].astype(float).copy() df_ = df_.dropna().reset_index(drop=True) - - if len(df_)>0: - if cal_type == 'mean': - exp_val = df_.mean(axis = 0, skipna = True) + if len(df_) > 0: - elif cal_type == 'std': - exp_val = df_.std(axis = 0, skipna = True) + if cal_type == "mean": + exp_val = df_.mean(axis=0, skipna=True) - elif cal_type == 'count':#use case for eye blink - exp_var = 'mov_blink' - exp_val = (len(df_)/df_[0])*60 + elif cal_type == "std": + exp_val = df_.std(axis=0, skipna=True) - elif cal_type == 'pct': - if len(df_)>0: - exp_val = len(df_[df_ > 0])/len(df_) + elif cal_type == "count": # use case for eye blink + exp_var = "mov_blink" + exp_val = (len(df_) / df_[0]) * 60 - elif cal_type == 'range': + elif cal_type == "pct": + if len(df_) > 0: + exp_val = len(df_[df_ > 0]) / len(df_) + + elif cal_type == "range": exp_val = max(df_) - min(df_) except Exception as e: - logger.error('Failed to compute calculation: {}'.format(e)) + logger.error("Failed to compute calculation: {}".format(e)) pass - - var_name = exp_var + '_' + cal_type + + var_name = exp_var + "_" + cal_type exp_val = float("{0:.4f}".format(exp_val)) var_val = (var_name, exp_val) - + return var_val + def cal_type_dict(var_df, raw_df, d_cfg_Obj, r_cfg_Obj): - - var_name = str(var_df['var_id']) - - #fetching key based on variable name from raw config + + var_name = str(var_df["var_id"]) + + # fetching key based on variable name from raw config var_key = list(r_cfg_Obj.keys())[list(r_cfg_Obj.values()).index(var_name)] - cal_type = d_cfg_Obj[var_key] # calculation type from config - + cal_type = d_cfg_Obj[var_key] # calculation type from config + var_val = [feature_output(raw_df, var_name, cal) for cal in cal_type] var_val_dict = dict(var_val) - + return var_val_dict + def compute_feature(raw_df, var_cols, d_cfg_Obj, r_cfg_Obj): """ Computing features """ - #Variable data frame for each feature group - var_df = pd.DataFrame(var_cols,columns=['var_id']) + # Variable data frame for each feature group + var_df = pd.DataFrame(var_cols, columns=["var_id"]) feature_dict = {} - - if len(raw_df)>0: - feature_dict = var_df.apply(cal_type_dict, args=(raw_df, d_cfg_Obj, r_cfg_Obj, ), axis=1) + + if len(raw_df) > 0: + feature_dict = var_df.apply( + cal_type_dict, + args=( + raw_df, + d_cfg_Obj, + r_cfg_Obj, + ), + axis=1, + ) return feature_dict - + + def calc_derive(input_file, input_dir, r_cfg_Obj, d_cfg_Obj, feature): """ Calculating derived variable @@ -114,51 +130,53 @@ def calc_derive(input_file, input_dir, r_cfg_Obj, d_cfg_Obj, feature): df_list = [] df = pd.DataFrame() for file in input_file: - + file_name, _ = os.path.splitext(os.path.basename(file)) input_loc = os.path.join(input_dir, file_name) - + var_cols = [r_cfg_Obj[x] for x in d_cfg_Obj[feature]] - - fea_loc = d_cfg_Obj[feature + '_LOC'] - fea_res = glob.glob(os.path.join(input_loc, '*/*/*' + fea_loc + '.csv')) - - if len(fea_res)>0: + + fea_loc = d_cfg_Obj[feature + "_LOC"] + fea_res = glob.glob(os.path.join(input_loc, "*/*/*" + fea_loc + ".csv")) + + if len(fea_res) > 0: raw_df = pd.read_csv(fea_res[0]) feature_dict = compute_feature(raw_df, var_cols, d_cfg_Obj, r_cfg_Obj) - - if len(feature_dict)>0: + + if len(feature_dict) > 0: feature_df = dict_to_df(feature_dict, file) df_list.append(feature_df) - - if len(df_list)>0: + + if len(df_list) > 0: df = pd.concat(df_list, ignore_index=True) return df + def run_derive(input_file, input_dir, output_dir, r_config, d_config): """ Processing derived variable """ - d_cfg_Obj = d_config.base_derive['derive_feature'] - r_cfg_Obj = r_config.base_raw['raw_feature'] - feature_group = d_cfg_Obj['FEATURE_GROUP'] - - #Iterating over feature group + d_cfg_Obj = d_config.base_derive["derive_feature"] + r_cfg_Obj = r_config.base_raw["raw_feature"] + feature_group = d_cfg_Obj["FEATURE_GROUP"] + + # Iterating over feature group df_list = [] for feature in feature_group: try: - + df_fea = calc_derive(input_file, input_dir, r_cfg_Obj, d_cfg_Obj, feature) - if len(df_fea)>0: - + if len(df_fea) > 0: + if len(df_list) == 0: df_list.append(df_fea) else: - result = pd.merge(df_list[0], df_fea, how='outer', on=['Filename']) + result = pd.merge(df_list[0], df_fea, how="outer", on=["Filename"]) df_list = [result] - + except Exception as e: - logger.error('Failed to process derived variables {}'.format(feature)) - + e + logger.error("Failed to process derived variables {}".format(feature)) + logger.info("Saving derived variable output...") - save_derive_output(df_list, output_dir) \ No newline at end of file + save_derive_output(df_list, output_dir) diff --git a/opendbm/dbm_lib/dbm_features/raw_features/__init__.py b/opendbm/dbm_lib/dbm_features/raw_features/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/opendbm/dbm_lib/dbm_features/raw_features/audio/__init__.py b/opendbm/dbm_lib/dbm_features/raw_features/audio/__init__.py new file mode 100644 index 00000000..9ad82f4b --- /dev/null +++ b/opendbm/dbm_lib/dbm_features/raw_features/audio/__init__.py @@ -0,0 +1,10 @@ +from .formant_freq import run_formant +from .gne import run_gne +from .hnr import run_hnr +from .intensity import run_intensity +from .jitter import run_jitter +from .mfcc import run_mfcc +from .pause_segment import run_pause_segment +from .pitch_freq import run_pitch +from .shimmer import run_shimmer +from .voice_frame_score import run_vfs diff --git a/opendbm/dbm_lib/dbm_features/raw_features/audio/formant_freq.py b/opendbm/dbm_lib/dbm_features/raw_features/audio/formant_freq.py new file mode 100644 index 00000000..7cd5346f --- /dev/null +++ b/opendbm/dbm_lib/dbm_features/raw_features/audio/formant_freq.py @@ -0,0 +1,155 @@ +""" +file_name: formant_freq +project_name: DBM +created: 2020-20-07 +""" + +import glob +import logging +from os.path import join + +import numpy as np +import pandas as pd +import parselmouth + +from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger() + +formant_dir = "acoustic/formant_freq" +csv_ext = "_formant.csv" +error_txt = "error: length less than 0.064" + + +def formant_list(formant, snd): + """ + Getting formant frequency per second + Args: + formant: Formant object for sound wave + snd: Parselmouth sound object + Returns: + List of first through fourth formant for each frame + """ + f1_list = [] + f2_list = [] + f3_list = [] + f4_list = [] + + dur = snd.duration - 0.02 + dur_round = round(dur, 2) + + time_list = np.arange(0.001, dur_round, 0.001) + for time in time_list: + + f1 = formant.get_value_at_time(1, time) + f2 = formant.get_value_at_time(2, time) + f3 = formant.get_value_at_time(3, time) + f4 = formant.get_value_at_time(4, time) + + f1_list.append(f1) + f2_list.append(f2) + f3_list.append(f3) + f4_list.append(f4) + return f1_list, f2_list, f3_list, f4_list + + +def formant_score(path): + """ + Using parselmouth library fetching Formant Frequency + Args: + path: (.wav) audio file location + Returns: + (list) list of Formant freq for each voice frame + """ + sound_pat = parselmouth.Sound(path) + formant = sound_pat.to_formant_burg(time_step=0.001) + f_score = formant_list(formant, sound_pat) + return f_score + + +def calc_formant(video_uri, audio_file, out_loc, fl_name, r_config, save=True): + """ + Preparing Formant freq matrix + Args: + audio_file: (.wav) parsed audio file; fl_name: input file name + out_loc: (str) Output directory; r_config: raw variable config + """ + + f1_list, f2_list, f3_list, f4_list = formant_score(audio_file) + df_formant = pd.DataFrame(f1_list, columns=[r_config.aco_fm1]) + + df_formant[r_config.aco_fm2] = f2_list + df_formant[r_config.aco_fm3] = f3_list + df_formant[r_config.aco_fm4] = f4_list + + df_formant.replace("", np.nan, regex=True, inplace=True) + df_formant[ + r_config.err_reason + ] = "Pass" # will replace with threshold in future release + + df_formant["Frames"] = df_formant.index + df_formant["dbm_master_url"] = video_uri + + if save: + logger.info("Saving Output file {} ".format(out_loc)) + ut.save_output(df_formant, out_loc, fl_name, formant_dir, csv_ext) + return df_formant + + +def empty_fm(video_uri, out_loc, fl_name, r_config, save=True): + + """ + Preparing empty formant frequency matrix if something fails + """ + cols = [ + "Frames", + r_config.aco_fm1, + r_config.aco_fm2, + r_config.aco_fm3, + r_config.aco_fm4, + r_config.err_reason, + ] + out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, error_txt]] + df_fm = pd.DataFrame(out_val, columns=cols) + df_fm["dbm_master_url"] = video_uri + + if save: + logger.info("Saving Output file {} ".format(out_loc)) + ut.save_output(df_fm, out_loc, fl_name, formant_dir, csv_ext) + return df_fm + + +def run_formant(video_uri, out_dir, r_config, save=True): + + """ + Processing all patient's for fetching Formant freq + --------------- + --------------- + Args: + video_uri: video path; r_config: raw variable config object + out_dir: (str) Output directory for processed output + """ + try: + + input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) + aud_filter = glob.glob(join(input_loc, fl_name + ".wav")) + if len(aud_filter) > 0: + + audio_file = aud_filter[0] + aud_dur = ut.get_length(audio_file) + + if float(aud_dur) < 0.064: + logger.info( + "Output file {} size is less than 0.064sec".format(audio_file) + ) + + df = empty_fm(video_uri, out_loc, fl_name, r_config, save=save) + else: + df = calc_formant( + video_uri, audio_file, out_loc, fl_name, r_config, save=save + ) + return df + except Exception as e: + e + logger.error("Failed to process audio file") diff --git a/opendbm/dbm_lib/dbm_features/raw_features/audio/gne.py b/opendbm/dbm_lib/dbm_features/raw_features/audio/gne.py new file mode 100644 index 00000000..7601da14 --- /dev/null +++ b/opendbm/dbm_lib/dbm_features/raw_features/audio/gne.py @@ -0,0 +1,166 @@ +""" +file_name: gne +project_name: DBM +created: 2020-20-07 +""" + +import glob +import logging +import os +from os.path import join + +import more_itertools as mit +import numpy as np +import pandas as pd +import parselmouth + +from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger() + +gne_dir = "acoustic/glottal_noise" +ff_dir = "acoustic/pitch" +csv_ext = "_gne.csv" + + +def gne_ratio(sound): + """ + Using parselmouth library fetching glottal noise excitation ratio + Args: + sound: parselmouth object + Returns: + (list) list of gne ratio for each voice frame + """ + harmonicity_gne = sound.to_harmonicity_gne() + gne_all_bands = harmonicity_gne.values + gne_all_bands = np.where(gne_all_bands == -200, np.NaN, gne_all_bands) + + gne = np.nanmax( + gne_all_bands + ) # following http://www.fon.hum.uva.nl/rob/NKI_TEVA/TEVA/HTML/NKI_TEVA.pdf + return gne + + +def empty_gne(video_uri, out_loc, fl_name, r_config, error_txt, save=True): + """ + Preparing empty GNE matrix if something fails + """ + cols = ["Frames", r_config.aco_gne, r_config.err_reason] + out_val = [[np.nan, np.nan, error_txt]] + + df_gne = pd.DataFrame(out_val, columns=cols) + df_gne["dbm_master_url"] = video_uri + + if save: + logger.info("Saving Output file {} ".format(out_loc)) + ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext) + return df_gne + + +def segment_gne(com_speech_sort, voiced_yes, voiced_no, gne_all_frames, audio_file): + """ + calculating gne for each voice segment + """ + snd = parselmouth.Sound(audio_file) + pitch = snd.to_pitch(time_step=0.001) + + for idx, vs in enumerate(com_speech_sort): + try: + + max_gne = np.NaN + if vs in voiced_yes and len(vs) > 1: + + start_time = pitch.get_time_from_frame_number(vs[0]) + end_time = pitch.get_time_from_frame_number(vs[-1]) + + snd_start = int(snd.get_frame_number_from_time(start_time)) + snd_end = int(snd.get_frame_number_from_time(end_time)) + + samples = parselmouth.Sound(snd.as_array()[0][snd_start:snd_end]) + max_gne = gne_ratio(samples) + except: + pass + + gne_all_frames[idx] = max_gne + return gne_all_frames + + +def calc_gne(video_uri, audio_file, out_loc, fl_name, r_config, save=True, ff_df=None): + """ + Preparing gne matrix + Args: + audio_file: (.wav) parsed audio file + out_loc: (str) Output directory for csv's + """ + dir_path = os.path.join(out_loc, ff_dir) + if os.path.isdir(dir_path) or ff_df is not None: + if ff_df is not None: + voice_seg = ut.process_segment_pitch(ff_df, r_config) + else: + voice_seg = ut.segment_pitch(dir_path, r_config, ff_df=ff_df) + + gne_all_frames = [np.NaN] * len(voice_seg[0]) + gne_segment_frames = segment_gne( + voice_seg[0], voice_seg[1], voice_seg[2], gne_all_frames, audio_file + ) + + df_gne = pd.DataFrame(gne_segment_frames, columns=[r_config.aco_gne]) + df_gne[ + r_config.err_reason + ] = "Pass" # will replace with threshold in future release + + df_gne["Frames"] = df_gne.index + df_gne["dbm_master_url"] = video_uri + + if save: + logger.info("Processing Output file {} ".format(out_loc)) + ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext) + return df_gne + + else: + error_txt = "error: pitch freq not available" + return empty_gne(video_uri, out_loc, fl_name, r_config, error_txt, save=save) + + +def run_gne(video_uri, out_dir, r_config, save=True, ff_df=None): + """ + Processing all patient's for fetching glottal noise ratio + --------------- + --------------- + Args: + video_uri: video path; r_config: raw variable config object + out_dir: (str) Output directory for processed output + """ + try: + + input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) + aud_filter = glob.glob(join(input_loc, fl_name + ".wav")) + if len(aud_filter) > 0: + + audio_file = aud_filter[0] + aud_dur = ut.get_length(audio_file) + + if float(aud_dur) < 0.064: + logger.info( + "Output file {} size is less than 0.064sec".format(audio_file) + ) + + error_txt = "error: length less than 0.064" + df = empty_gne( + video_uri, out_loc, fl_name, r_config, error_txt, save=save + ) + else: + df = calc_gne( + video_uri, + audio_file, + out_loc, + fl_name, + r_config, + save=save, + ff_df=ff_df, + ) + return df + except Exception as e: + e + logger.error("Failed to process audio file") diff --git a/opendbm/dbm_lib/dbm_features/raw_features/audio/hnr.py b/opendbm/dbm_lib/dbm_features/raw_features/audio/hnr.py new file mode 100644 index 00000000..89bb26b1 --- /dev/null +++ b/opendbm/dbm_lib/dbm_features/raw_features/audio/hnr.py @@ -0,0 +1,112 @@ +""" +file_name: hnr +project_name: DBM +created: 2020-20-07 +""" + +import glob +import logging +import os +from os.path import join + +import numpy as np +import pandas as pd +import parselmouth + +from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger() + +hnr_dir = "acoustic/harmonic_noise" +csv_ext = "_hnr.csv" +error_txt = "error: length less than 0.064" + + +def hnr_ratio(filepath): + """ + Using parselmouth library fetching harmonic noise ratio ratio + Args: + path: (.wav) audio file location + Returns: + (list) list of hnr ratio for each voice frame, min,max and mean hnr + """ + sound = parselmouth.Sound(filepath) + harmonicity = sound.to_harmonicity_ac(time_step=0.001) + + hnr_all_frames = harmonicity.values # [harmonicity.values != -200] nan it (****) + hnr_all_frames = np.where(hnr_all_frames == -200, np.NaN, hnr_all_frames) + return hnr_all_frames.transpose() + + +def calc_hnr(video_uri, audio_file, out_loc, fl_name, r_config, save=True): + """ + Preparing harmonic noise matrix + Args: + audio_file: (.wav) parsed audio file + out_loc: (str) Output directory for csv's + """ + + hnr_all_frames = hnr_ratio(audio_file) + df_hnr = pd.DataFrame(hnr_all_frames, columns=[r_config.aco_hnr]) + + df_hnr["Frames"] = df_hnr.index + df_hnr["dbm_master_url"] = video_uri + df_hnr[ + r_config.err_reason + ] = "Pass" # will replace with threshold in future release + + if save: + logger.info("Saving Output file {} ".format(out_loc)) + ut.save_output(df_hnr, out_loc, fl_name, hnr_dir, csv_ext) + return df_hnr + + +def empty_hnr(video_uri, out_loc, fl_name, r_config, save=True): + """ + Preparing empty HNR matrix if something fails + """ + cols = ["Frames", r_config.aco_hnr, r_config.err_reason] + out_val = [[np.nan, np.nan, error_txt]] + df_hnr = pd.DataFrame(out_val, columns=cols) + df_hnr["dbm_master_url"] = video_uri + + if save: + logger.info("Saving Output file {} ".format(out_loc)) + ut.save_output(df_hnr, out_loc, fl_name, hnr_dir, csv_ext) + return df_hnr + + +def run_hnr(video_uri, out_dir, r_config, save=True): + """ + Processing all patient's for fetching harmonic noise ratio + ------------------- + ------------------- + Args: + video_uri: video path; r_config: raw variable config object + out_dir: (str) Output directory for processed output + """ + try: + + input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) + aud_filter = glob.glob(join(input_loc, fl_name + ".wav")) + if len(aud_filter) > 0: + + audio_file = aud_filter[0] + aud_dur = ut.get_length(audio_file) + + if float(aud_dur) < 0.064: + logger.info( + "Output file {} size is less than 0.064sec".format(audio_file) + ) + + df = empty_hnr(video_uri, out_loc, fl_name, r_config, save=save) + + else: + df = calc_hnr( + video_uri, audio_file, out_loc, fl_name, r_config, save=save + ) + return df + except Exception as e: + e + logger.error("Failed to process audio file") diff --git a/opendbm/dbm_lib/dbm_features/raw_features/audio/intensity.py b/opendbm/dbm_lib/dbm_features/raw_features/audio/intensity.py new file mode 100644 index 00000000..33d3b18a --- /dev/null +++ b/opendbm/dbm_lib/dbm_features/raw_features/audio/intensity.py @@ -0,0 +1,107 @@ +""" +file_name: intensity +project_name: DBM +created: 2020-20-07 +""" + +import glob +import logging +from os.path import join + +import numpy as np +import pandas as pd +import parselmouth + +from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger() + +intensity_dir = "acoustic/intensity" +csv_ext = "_intensity.csv" +error_txt = "error: length less than 0.064" + + +def intensity_score(path): + """ + Using parselmouth library fetching Intensity + Args: + path: (.wav) audio file location + Returns: + (list) list of Intensity for each voice frame + """ + sound_pat = parselmouth.Sound(path) + intensity = sound_pat.to_intensity(time_step=0.001) + return intensity.values[0] + + +def calc_intensity(video_uri, audio_file, out_loc, fl_name, r_config, save=True): + """ + Preparing Intensity matrix + Args: + audio_file: (.wav) parsed audio file + out_loc: (str) Output directory for csv's + """ + + intensity_frames = intensity_score(audio_file) + df_intensity = pd.DataFrame(intensity_frames, columns=[r_config.aco_int]) + + df_intensity["Frames"] = df_intensity.index + df_intensity["dbm_master_url"] = video_uri + df_intensity[ + r_config.err_reason + ] = "Pass" # will replace with threshold in future release + + if save: + logger.info("Saving Output file {} ".format(out_loc)) + ut.save_output(df_intensity, out_loc, fl_name, intensity_dir, csv_ext) + return df_intensity + + +def empty_intensity(video_uri, out_loc, fl_name, r_config, save=True): + """ + Preparing empty Intensity matrix if something fails + """ + cols = ["Frames", r_config.aco_int, r_config.err_reason] + out_val = [[np.nan, np.nan, error_txt]] + df_int = pd.DataFrame(out_val, columns=cols) + df_int["dbm_master_url"] = video_uri + + if save: + logger.info("Saving Output file {} ".format(out_loc)) + ut.save_output(df_int, out_loc, fl_name, intensity_dir, csv_ext) + return df_int + + +def run_intensity(video_uri, out_dir, r_config, save=True): + """ + Processing all patient's for fetching Intensity + ------------------- + ------------------- + Args: + video_uri: video path; r_config: raw variable config object + out_dir: (str) Output directory for processed output + """ + try: + + input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) + aud_filter = glob.glob(join(input_loc, fl_name + ".wav")) + if len(aud_filter) > 0: + + audio_file = aud_filter[0] + aud_dur = ut.get_length(audio_file) + + if float(aud_dur) < 0.064: + logger.info( + "Output file {} size is less than 0.064sec".format(audio_file) + ) + + df = empty_intensity(video_uri, out_loc, fl_name, r_config, save=save) + else: + df = calc_intensity( + video_uri, audio_file, out_loc, fl_name, r_config, save=save + ) + return df + except Exception as e: + e + logger.error("Failed to process audio file") diff --git a/opendbm/dbm_lib/dbm_features/raw_features/audio/jitter.py b/opendbm/dbm_lib/dbm_features/raw_features/audio/jitter.py new file mode 100644 index 00000000..c2cd08e6 --- /dev/null +++ b/opendbm/dbm_lib/dbm_features/raw_features/audio/jitter.py @@ -0,0 +1,166 @@ +""" +file_name: jitter_processing +project_name: DBM +created: 2020-20-07 +""" + +import glob +import logging +import os +from os.path import join + +import numpy as np +import pandas as pd +import parselmouth + +from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger() + +jitter_dir = "acoustic/jitter" +ff_dir = "acoustic/pitch" +csv_ext = "_jitter.csv" + + +def audio_jitter(sound): + """ + Using parselmouth library fetching jitter + Args: + sound: parselmouth object + Returns: + (list) list of jitters for each voice frame + """ + pointProcess = parselmouth.praat.call( + sound, "To PointProcess (periodic, cc)...", 80, 500 + ) + jitter = parselmouth.praat.call( + pointProcess, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3 + ) + return jitter + + +def empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt, save=True): + """ + Preparing empty jitter matrix if something fails + """ + cols = ["Frames", r_config.aco_jitter, r_config.err_reason] + out_val = [[np.nan, np.nan, error_txt]] + df_jitter = pd.DataFrame(out_val, columns=cols) + df_jitter["dbm_master_url"] = video_uri + + if save: + logger.info("Saving Output file {} ".format(out_loc)) + ut.save_output(df_jitter, out_loc, fl_name, jitter_dir, csv_ext) + return df_jitter + + +def segment_jitter(com_speech_sort, voiced_yes, voiced_no, jitter_frames, audio_file): + """ + calculating jitter for each voice segment + """ + snd = parselmouth.Sound(audio_file) + pitch = snd.to_pitch(time_step=0.001) + + for idx, vs in enumerate(com_speech_sort): + try: + + jitter = np.NaN + if vs in voiced_yes and len(vs) > 1: + + start_time = pitch.get_time_from_frame_number(vs[0]) + end_time = pitch.get_time_from_frame_number(vs[-1]) + + snd_start = int(snd.get_frame_number_from_time(start_time)) + snd_end = int(snd.get_frame_number_from_time(end_time)) + + samples = parselmouth.Sound(snd.as_array()[0][snd_start:snd_end]) + jitter = audio_jitter(samples) + except: + pass + + jitter_frames[idx] = jitter + return jitter_frames + + +def calc_jitter( + video_uri, audio_file, out_loc, fl_name, r_config, save=True, ff_df=None +): + """ + Preparing jitter matrix + Args: + audio_file: (.wav) parsed audio file + out_loc: (str) Output directory for csv + r_config: config.config_raw_feature.pyConfigFeatureNmReader object + """ + dir_path = os.path.join(out_loc, ff_dir) + if os.path.isdir(dir_path) or ff_df is not None: + + if ff_df is not None: + voice_seg = ut.process_segment_pitch(ff_df, r_config) + else: + voice_seg = ut.segment_pitch(dir_path, r_config, ff_df=ff_df) + + jitter_frames = [np.NaN] * len(voice_seg[0]) + jitter_segment_frames = segment_jitter( + voice_seg[0], voice_seg[1], voice_seg[2], jitter_frames, audio_file + ) + + df_jitter = pd.DataFrame(jitter_segment_frames, columns=[r_config.aco_jitter]) + df_jitter[ + r_config.err_reason + ] = "Pass" # will replace with threshold in future release + + df_jitter["Frames"] = df_jitter.index + df_jitter["dbm_master_url"] = video_uri + if save: + logger.info("Processing Output file {} ".format(out_loc)) + ut.save_output(df_jitter, out_loc, fl_name, jitter_dir, csv_ext) + df = df_jitter + else: + error_txt = "error: fundamental freq not available" + df = empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt, save=save) + return df + + +def run_jitter(video_uri, out_dir, r_config, save=True, ff_df=None): + """ + Processing all patient's videos for fetching jitter + ------------------- + ------------------- + Args: + video_uri: video path; r_config: raw variable config object + out_dir: (str) Output directory for processed output + """ + try: + + input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) + aud_filter = glob.glob(join(input_loc, fl_name + ".wav")) + if len(aud_filter) > 0: + + audio_file = aud_filter[0] + aud_dur = ut.get_length(audio_file) + + if float(aud_dur) < 0.064: + logger.info( + "Output file {} size is less than 0.064sec".format(audio_file) + ) + + error_txt = "error: length less than 0.064" + df = empty_jitter( + video_uri, out_loc, fl_name, r_config, error_txt, save=save + ) + else: + df = calc_jitter( + video_uri, + audio_file, + out_loc, + fl_name, + r_config, + save=save, + ff_df=ff_df, + ) + return df + except Exception as e: + logger.error("Error in jitter: {}".format(e)) + logger.error("Failed to process audio file") diff --git a/opendbm/dbm_lib/dbm_features/raw_features/audio/mfcc.py b/opendbm/dbm_lib/dbm_features/raw_features/audio/mfcc.py new file mode 100644 index 00000000..16047910 --- /dev/null +++ b/opendbm/dbm_lib/dbm_features/raw_features/audio/mfcc.py @@ -0,0 +1,147 @@ +""" +file_name: mfcc +project_name: DBM +created: 2020-20-07 +""" + +import glob +import logging +import os +from os.path import join + +import numpy as np +import pandas as pd +import parselmouth + +from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger() + +mfcc_dir = "acoustic/mfcc" +csv_ext = "_mfcc.csv" +error_txt = "error: length less than 0.064" + + +def empty_mfcc(video_uri, out_loc, fl_name, r_config, save=True): + + """ + Preparing empty empty_mfcc matrix if something fails + """ + cols = [ + "Frames", + r_config.aco_mfcc1, + r_config.aco_mfcc2, + r_config.aco_mfcc3, + r_config.aco_mfcc4, + r_config.aco_mfcc5, + r_config.aco_mfcc6, + r_config.aco_mfcc7, + r_config.aco_mfcc8, + r_config.aco_mfcc9, + r_config.aco_mfcc10, + r_config.aco_mfcc11, + r_config.aco_mfcc12, + r_config.err_reason, + ] + out_val = [ + [ + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + error_txt, + ] + ] + df_mfcc = pd.DataFrame(out_val, columns=cols) + df_mfcc["dbm_master_url"] = video_uri + + if save: + logger.info("Saving Output file {} ".format(out_loc)) + ut.save_output(df_mfcc, out_loc, fl_name, mfcc_dir, csv_ext) + + return df_mfcc + + +def audio_mfcc(path): + """ + Using parselmouth library fetching mfccs + Args: + path: (.wav) audio file location + Returns: + (list) list of mfccs for each voice frame + """ + sound = parselmouth.Sound(path) + mfcc_object = sound.to_mfcc(time_step=0.001, number_of_coefficients=12) + mfccs = mfcc_object.to_array() + mfccs = np.delete(mfccs, (0), axis=0) + return mfccs + + +def calc_mfcc(video_uri, audio_file, out_loc, fl_name, r_config, save=True): + """ + Preparing mfcc matrix + Args: + audio_file: (.wav) parsed audio file + out_loc: output location to save csv + fl_name: (str) name of audio file + r_config: config.config_raw_feature.pyConfigFeatureNmReader object + """ + dict_ = {} + mfccs = audio_mfcc(audio_file) + + for i in range(1, 13): + conf_str = r_config.base_raw["raw_feature"] + dict_[conf_str["aco_mfcc" + str(i)]] = mfccs[i - 1, :] + + df = pd.DataFrame(dict_) + df["Frames"] = df.index + + df[r_config.err_reason] = "Pass" # may replace based on threshold in future release + df["dbm_master_url"] = video_uri + + if save: + logger.info("Saving Output file {} ".format(out_loc)) + ut.save_output(df, out_loc, fl_name, mfcc_dir, csv_ext) + return df + + +def run_mfcc(video_uri, out_dir, r_config, save=True): + """ + Processing all patients to fetch mfccs + + Args: + video_uri: video path; r_config: raw variable config object + out_dir: (str) Output directory for processed output + """ + try: + + input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) + aud_filter = glob.glob(join(input_loc, fl_name + ".wav")) + if len(aud_filter) > 0: + + audio_file = aud_filter[0] + aud_dur = ut.get_length(audio_file) + + if float(aud_dur) < 0.064: + logger.info( + "Output file {} size is less than 0.064sec".format(audio_file) + ) + + return empty_mfcc(video_uri, out_loc, fl_name, r_config, save=save) + + return calc_mfcc( + video_uri, audio_file, out_loc, fl_name, r_config, save=save + ) + except Exception as e: + e + logger.error("Failed to process audio file") diff --git a/dbm_lib/dbm_features/raw_features/audio/pause_segment.py b/opendbm/dbm_lib/dbm_features/raw_features/audio/pause_segment.py similarity index 53% rename from dbm_lib/dbm_features/raw_features/audio/pause_segment.py rename to opendbm/dbm_lib/dbm_features/raw_features/audio/pause_segment.py index b5c43534..1ce1e048 100644 --- a/dbm_lib/dbm_features/raw_features/audio/pause_segment.py +++ b/opendbm/dbm_lib/dbm_features/raw_features/audio/pause_segment.py @@ -4,24 +4,25 @@ project_name: DBM created: 2020-20-07 """ -import os import glob -from pydub import AudioSegment -import librosa -import pandas as pd -import numpy as np -import webrtcvad -from os.path import join import logging +import os +from os.path import join -from dbm_lib.dbm_features.raw_features.util import vad_utilities as vu -from dbm_lib.dbm_features.raw_features.util import util as ut +import numpy as np +import pandas as pd +import webrtcvad +from pydub import AudioSegment + +from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut +from opendbm.dbm_lib.dbm_features.raw_features.util import vad_utilities as vu logging.basicConfig(level=logging.INFO) -logger=logging.getLogger() +logger = logging.getLogger() + +pause_seg_dir = "acoustic/pause_segment" +csv_ext = "_pausechar.csv" -pause_seg_dir = 'acoustic/pause_segment' -csv_ext = '_pausechar.csv' def get_timing_cues(seg_starts_sec, seg_ends_sec, r_config): """ @@ -36,25 +37,27 @@ def get_timing_cues(seg_starts_sec, seg_ends_sec, r_config): speaking_time = np.sum(np.asarray(seg_ends_sec) - np.asarray(seg_starts_sec)) num_pauses = len(seg_starts_sec) - 1 pause_len = np.zeros(num_pauses) - + for p in range(num_pauses): - pause_len[p] = seg_starts_sec[p+1] - seg_ends_sec[p] - - if len(pause_len)>0: - pause_len_mean = np.mean(pause_len) - pause_len_std = np.std(pause_len) + pause_len[p] = seg_starts_sec[p + 1] - seg_ends_sec[p] + + if len(pause_len) > 0: pause_time = np.sum(pause_len) - + else: - pause_len_mean = 0 - pause_len_std = 0 pause_time = 0 - + pause_frac = pause_time / total_time - timing_dict = {r_config.aco_totaltime: total_time, r_config.aco_speakingtime: speaking_time, - r_config.aco_numpauses: num_pauses, r_config.aco_pausetime: pause_time, r_config.aco_pausefrac: pause_frac} + timing_dict = { + r_config.aco_totaltime: total_time, + r_config.aco_speakingtime: speaking_time, + r_config.aco_numpauses: num_pauses, + r_config.aco_pausetime: pause_time, + r_config.aco_pausefrac: pause_frac, + } return timing_dict + def process_silence(audio_file, r_config): """ Returns dataframe for pause between words using voice activity detection @@ -65,64 +68,80 @@ def process_silence(audio_file, r_config): """ feat_dict_list = [] y, sr = vu.read_wave(audio_file) - + # 3 is most aggressive (splits most), 0 least (better for low snr) aggressiveness = 3 frame_dur_ms = 20 - - #pause segment(long & short pad) + + # pause segment(long & short pad) long_pad_around_voice_ms = 200 short_pad_around_voice_ms = 100 - - if len(y)>0: + + if len(y) > 0: vad = webrtcvad.Vad(aggressiveness) - + frames = vu.frame_generator(frame_dur_ms, y, sr) frames = list(frames) - - #longer pad time screens out little blips, but misses short silences - long_seg_starts, long_seg_ends = vu.vad_get_segment_times(sr, frame_dur_ms, long_pad_around_voice_ms, vad, frames) - - #Logic to handle blank audio file + + # longer pad time screens out little blips, but misses short silences + long_seg_starts, long_seg_ends = vu.vad_get_segment_times( + sr, frame_dur_ms, long_pad_around_voice_ms, vad, frames + ) + + # Logic to handle blank audio file if len(long_seg_starts) == 0 or len(long_seg_ends) == 0: - return '' - + return "" + t_start = long_seg_starts[0] t_end = long_seg_ends[-1] # shorter pad time captures short silences (but misfires on little blips) - short_seg_starts, short_seg_ends = vu.vad_get_segment_times(sr, frame_dur_ms, short_pad_around_voice_ms, vad, frames) - + short_seg_starts, short_seg_ends = vu.vad_get_segment_times( + sr, frame_dur_ms, short_pad_around_voice_ms, vad, frames + ) + seg_starts = [] seg_ends = [] - for k in range(len(short_seg_starts)): # logic to clean up some typical misfires - if (short_seg_starts[k] >=t_start) and (short_seg_starts[k] <= t_end): - + for k in range( + len(short_seg_starts) + ): # logic to clean up some typical misfires + if (short_seg_starts[k] >= t_start) and (short_seg_starts[k] <= t_end): + seg_starts.append(short_seg_starts[k]) seg_ends.append(short_seg_ends[k]) if len(seg_starts) == 0 or len(seg_ends) == 0: - return '' - + return "" + timing_dict = get_timing_cues(seg_starts, seg_ends, r_config) feat_dict_list.append(timing_dict) - + df = pd.DataFrame(feat_dict_list) - df[r_config.err_reason] = 'Pass'# will replace with threshold in future release + df[r_config.err_reason] = "Pass" # will replace with threshold in future release return df -def empty_pause_segment(video_uri, out_loc, fl_name, r_config, error_txt): + +def empty_pause_segment(video_uri, out_loc, fl_name, r_config, error_txt, save=True): """ Preparing empty Pause Segment matrix if something fails """ - cols = [r_config.aco_totaltime, r_config.aco_speakingtime, r_config.aco_numpauses, r_config.aco_pausetime, - r_config.aco_pausefrac, r_config.err_reason] + cols = [ + r_config.aco_totaltime, + r_config.aco_speakingtime, + r_config.aco_numpauses, + r_config.aco_pausetime, + r_config.aco_pausefrac, + r_config.err_reason, + ] out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, error_txt]] - df_pause = pd.DataFrame(out_val, columns = cols) - df_pause['dbm_master_url'] = video_uri - - logger.info('Saving Output file {} '.format(out_loc)) - ut.save_output(df_pause, out_loc, fl_name, pause_seg_dir, csv_ext) + df_pause = pd.DataFrame(out_val, columns=cols) + df_pause["dbm_master_url"] = video_uri -def run_pause_segment(video_uri, out_dir, r_config): + if save: + logger.info("Saving Output file {} ".format(out_loc)) + ut.save_output(df_pause, out_loc, fl_name, pause_seg_dir, csv_ext) + return df_pause + + +def run_pause_segment(video_uri, out_dir, r_config, save=True): """ Processing all patient's for getting Pause Segment --------------- @@ -132,41 +151,50 @@ def run_pause_segment(video_uri, out_dir, r_config): out_dir: (str) Output directory for processed output """ try: - + input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) - aud_filter = glob.glob(join(input_loc, fl_name + '.wav')) - if len(aud_filter)>0: + aud_filter = glob.glob(join(input_loc, fl_name + ".wav")) + if len(aud_filter) > 0: audio_file = aud_filter[0] - aud_dur = librosa.get_duration(filename=audio_file) + aud_dur = ut.get_length(audio_file) if float(aud_dur) < 0.064: - logger.info('Output file {} size is less than 0.064sec'.format(audio_file)) + logger.info( + "Output file {} size is less than 0.064sec".format(audio_file) + ) - error_txt = 'error: length less than 0.064' + error_txt = "error: length less than 0.064" empty_pause_segment(video_uri, out_loc, fl_name, r_config, error_txt) return - logger.info('Converting stereo sound to mono-lD') + logger.info("Converting stereo sound to mono-lD") sound_mono = AudioSegment.from_wav(audio_file) sound_mono = sound_mono.set_channels(1) sound_mono = sound_mono.set_frame_rate(48000) - mono_wav = os.path.join(input_loc, fl_name + '_mono.wav') + mono_wav = os.path.join(input_loc, fl_name + "_mono.wav") sound_mono.export(mono_wav, format="wav") df_pause_seg = process_silence(mono_wav, r_config) - os.remove(mono_wav)#removing mono wav file + os.remove(mono_wav) # removing mono wav file - if isinstance(df_pause_seg, pd.DataFrame) and len(df_pause_seg)>0: - logger.info('Processing Output file {} '.format(out_loc)) - - df_pause_seg['dbm_master_url'] = video_uri - ut.save_output(df_pause_seg, out_loc, fl_name, pause_seg_dir, csv_ext) + if isinstance(df_pause_seg, pd.DataFrame) and len(df_pause_seg) > 0: + df_pause_seg["dbm_master_url"] = video_uri + if save: + logger.info("Processing Output file {} ".format(out_loc)) + ut.save_output( + df_pause_seg, out_loc, fl_name, pause_seg_dir, csv_ext + ) + df = df_pause_seg else: - error_txt = 'error: webrtcvad returns no segment' - empty_pause_segment(video_uri, out_loc, fl_name, r_config, error_txt) - + error_txt = "error: webrtcvad returns no segment" + df = empty_pause_segment( + video_uri, out_loc, fl_name, r_config, error_txt, save=save + ) + return df + except Exception as e: - logger.error('Failed to process audio file') \ No newline at end of file + e + logger.error("Failed to process audio file", str(e)) diff --git a/opendbm/dbm_lib/dbm_features/raw_features/audio/pitch_freq.py b/opendbm/dbm_lib/dbm_features/raw_features/audio/pitch_freq.py new file mode 100644 index 00000000..3b4a23ad --- /dev/null +++ b/opendbm/dbm_lib/dbm_features/raw_features/audio/pitch_freq.py @@ -0,0 +1,139 @@ +""" +file_name: pitch_freq +project_name: DBM +created: 2020-20-07 +""" + +import glob +import logging +import os +from os.path import join + +import numpy as np +import pandas as pd +import parselmouth + +from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger() + +ff_dir = "acoustic/pitch" +csv_ext = "_pitch.csv" +error_txt = "error: length less than 0.064" + + +def audio_pitch(path): + """ + Using parselmouth library fetching pitch/fundamental frequency + Args: + path: (.wav) audio file location + Returns: + (list) list of pitch/fundamental frequency for each voice frame + """ + sound_pat = parselmouth.Sound(path) + pitch = sound_pat.to_pitch(time_step=0.001) + pitch_values = pitch.selected_array["frequency"] + + return list(pitch_values) + + +def label_speech(row, fd_freq): + """ + identify whether frame is voiced or not + Args: + row: (item) pitch frequency value + Returns: + (str) yes or no indicator for voice + """ + if row[fd_freq] > 0: + return "yes" + else: + return "no" + + +def calc_pitch(video_uri, audio_file, out_loc, fl_name, r_config, save=True): + + """ + Preparing pitch frequency matrix + Args: + audio_file: (.wav) parsed audio file + row: (dataframe) subject details from master csv + new_out_base_dir: (str) Output directory for csv + """ + + ff_frames = audio_pitch(audio_file) + df_ffreq = pd.DataFrame(ff_frames, columns=[r_config.aco_ff]) + + df_ffreq["Frames"] = df_ffreq.index + df_ffreq[r_config.aco_voiceLabel] = df_ffreq.apply( + lambda row: label_speech(row, r_config.aco_ff), axis=1 + ) + + df_ffreq[ + r_config.err_reason + ] = "Pass" # will replace with threshold in future release + df_ffreq["dbm_master_url"] = video_uri + + if save: + logger.info("Processing Output file {} ".format(out_loc)) + ut.save_output(df_ffreq, out_loc, fl_name, ff_dir, csv_ext) + return df_ffreq + + +def empty_pitch(video_uri, out_loc, fl_name, r_config, save=True): + """ + Preparing empty pitch frequency matrix if something fails + """ + + df_ffreq = pd.DataFrame( + [[np.nan, np.nan, "no", error_txt]], + columns=[ + "Frames", + r_config.aco_ff, + r_config.aco_voiceLabel, + r_config.err_reason, + ], + ) + df_ffreq["dbm_master_url"] = video_uri + + if save: + logger.info("Saving Output file {} ".format(out_loc)) + ut.save_output(df_ffreq, out_loc, fl_name, ff_dir, csv_ext) + return df_ffreq + + +def run_pitch(video_uri, out_dir, r_config, save=True): + + """ + Processing audio for fetching pitch + ------------------- + ------------------- + Args: + video_uri: video path; r_config: raw variable config object + out_dir: (str) Output directory for processed output + """ + try: + + input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) + aud_filter = glob.glob(join(input_loc, fl_name + ".wav")) + if len(aud_filter) > 0: + + audio_file = aud_filter[0] + aud_dur = ut.get_length(audio_file) + + if float(aud_dur) < 0.064: + logger.info( + "Output file {} size is less than 0.064sec".format(audio_file) + ) + + df = empty_pitch(video_uri, out_loc, fl_name, r_config, save=save) + else: + df = calc_pitch( + video_uri, audio_file, out_loc, fl_name, r_config, save=save + ) + return df + + except Exception as e: + e + logger.error("Failed to process audio file") diff --git a/opendbm/dbm_lib/dbm_features/raw_features/audio/shimmer.py b/opendbm/dbm_lib/dbm_features/raw_features/audio/shimmer.py new file mode 100644 index 00000000..d0d74b98 --- /dev/null +++ b/opendbm/dbm_lib/dbm_features/raw_features/audio/shimmer.py @@ -0,0 +1,166 @@ +""" +file_name: shimmer_processing +project_name: DBM +created: 2020-20-07 +""" + +import glob +import logging +import os +from os.path import join + +import more_itertools as mit +import numpy as np +import pandas as pd +import parselmouth + +from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger() + +shimmer_dir = "acoustic/shimmer" +ff_dir = "acoustic/pitch" +csv_ext = "_shimmer.csv" + + +def audio_shimmer(sound): + """ + Using parselmouth library fetching shimmer + Args: + sound: parselmouth object + Returns: + (list) list of shimmers for each voice frame + """ + pointProcess = parselmouth.praat.call( + sound, "To PointProcess (periodic, cc)...", 80, 500 + ) + shimmer = parselmouth.praat.call( + [sound, pointProcess], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6 + ) + return shimmer + + +def empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt, save=True): + """ + Preparing empty shimmer matrix if something fails + """ + cols = ["Frames", r_config.aco_shimmer, r_config.err_reason] + out_val = [[np.nan, np.nan, error_txt]] + df_shimmer = pd.DataFrame(out_val, columns=cols) + df_shimmer["dbm_master_url"] = video_uri + + if save: + logger.info("Saving Output file {} ".format(out_loc)) + ut.save_output(df_shimmer, out_loc, fl_name, shimmer_dir, csv_ext) + return df_shimmer + + +def segment_shimmer(com_speech_sort, voiced_yes, voiced_no, shimmer_frames, audio_file): + """ + calculating shimmer for each voice segment + """ + snd = parselmouth.Sound(audio_file) + pitch = snd.to_pitch(time_step=0.001) + + for idx, vs in enumerate(com_speech_sort): + try: + + shimmer = np.NaN + if vs in voiced_yes and len(vs) > 1: + + start_time = pitch.get_time_from_frame_number(vs[0]) + end_time = pitch.get_time_from_frame_number(vs[-1]) + + snd_start = int(snd.get_frame_number_from_time(start_time)) + snd_end = int(snd.get_frame_number_from_time(end_time)) + + samples = parselmouth.Sound(snd.as_array()[0][snd_start:snd_end]) + shimmer = audio_shimmer(samples) + except: + pass + + shimmer_frames[idx] = shimmer + return shimmer_frames + + +def calc_shimmer( + video_uri, audio_file, out_loc, fl_name, r_config, save=True, ff_df=None +): + """ + Preparing shimmer matrix + Args: + audio_file: (.wav) parsed audio file + out_loc: (str) Output directory for csv + r_config: config.config_raw_feature.pyConfigFeatureNmReader object + """ + dir_path = os.path.join(out_loc, ff_dir) + if os.path.isdir(dir_path) or ff_df is not None: + if ff_df is not None: + voice_seg = ut.process_segment_pitch(ff_df, r_config) + else: + voice_seg = ut.segment_pitch(dir_path, r_config, ff_df=ff_df) + + shimmer_frames = [np.NaN] * len(voice_seg[0]) + shimmer_segment_frames = segment_shimmer( + voice_seg[0], voice_seg[1], voice_seg[2], shimmer_frames, audio_file + ) + + df_shimmer = pd.DataFrame( + shimmer_segment_frames, columns=[r_config.aco_shimmer] + ) + df_shimmer[ + r_config.err_reason + ] = "Pass" # will replace with threshold in future release + + df_shimmer["Frames"] = df_shimmer.index + df_shimmer["dbm_master_url"] = video_uri + if save: + logger.info("Processing Output file {} ".format(out_loc)) + ut.save_output(df_shimmer, out_loc, fl_name, shimmer_dir, csv_ext) + df = df_shimmer + else: + error_txt = "error: fundamental freq not available" + df = empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt, save=save) + return df + + +def run_shimmer(video_uri, out_dir, r_config, save=True, ff_df=None): + """ + Processing all patients to fetch shimmer + --------------- + --------------- + Args: + video_uri: video path; r_config: raw variable config object + out_dir: (str) Output directory for processed output + """ + # try: + + input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) + aud_filter = glob.glob(join(input_loc, fl_name + ".wav")) + if len(aud_filter) > 0: + + audio_file = aud_filter[0] + aud_dur = ut.get_length(audio_file) + + if float(aud_dur) < 0.064: + logger.info("Output file {} size is less than 0.064sec".format(audio_file)) + + error_txt = "error: length less than 0.064" + df = empty_shimmer( + video_uri, out_loc, fl_name, r_config, error_txt, save=save + ) + else: + df = calc_shimmer( + video_uri, + audio_file, + out_loc, + fl_name, + r_config, + save=save, + ff_df=ff_df, + ) + return df + # except Exception as e: + # logger.error('Error in shimmer: {}'.format(e)) + # logger.error('Failed to process audio file') diff --git a/opendbm/dbm_lib/dbm_features/raw_features/audio/voice_frame_score.py b/opendbm/dbm_lib/dbm_features/raw_features/audio/voice_frame_score.py new file mode 100644 index 00000000..5126a156 --- /dev/null +++ b/opendbm/dbm_lib/dbm_features/raw_features/audio/voice_frame_score.py @@ -0,0 +1,131 @@ +""" +file_name: voice_frame_score +project_name: DBM +created: 2020-20-07 +""" + +import glob +import logging +from os.path import join + +import numpy as np +import pandas as pd +import parselmouth + +from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger() + +vfs_dir = "acoustic/voice_frame_score" +csv_ext = "_voiceprev.csv" +error_txt = "error: length less than 0.064" + + +def audio_pitch_frame(pitch): + """ + Computing total number of speech and participant voiced frames + Args: + pitch: speech pitch + Returns: + (float) total voice frames and participant voiced frames + """ + total_frames = pitch.get_number_of_frames() + voiced_frames = pitch.count_voiced_frames() + return total_frames, voiced_frames + + +def voice_segment(path): + """ + Using parselmouth library for fundamental frequency + Args: + path: (.wav) audio file location + Returns: + (float) total voice frames, participant voiced frames and voiced frames percentage + """ + sound_pat = parselmouth.Sound(path) + pitch = sound_pat.to_pitch() + total_frames, voiced_frames = audio_pitch_frame(pitch) + + voiced_percentage = (voiced_frames / total_frames) * 100 + return voiced_percentage, voiced_frames, total_frames + + +def calc_vfs(video_uri, audio_file, out_loc, fl_name, r_config, save=True): + """ + creating dataframe matrix for voice frame score + Args: + audio_file: Audio file path + new_out_base_dir: AWS instance output base directory path + f_nm_config: Config file object + """ + + voice_percentage, voiced_frames, total_frames = voice_segment(audio_file) + df_vfs = pd.DataFrame([voiced_frames], columns=[r_config.aco_voiceFrame]) + + df_vfs[r_config.aco_totVoiceFrame] = [total_frames] + df_vfs[r_config.aco_voicePct] = [voice_percentage] + df_vfs[ + r_config.err_reason + ] = "Pass" # will replace with threshold in future release + + df_vfs["Frames"] = df_vfs.index + df_vfs["dbm_master_url"] = video_uri + if save: + logger.info("Saving Output file {} ".format(out_loc)) + ut.save_output(df_vfs, out_loc, fl_name, vfs_dir, csv_ext) + return df_vfs + + +def empty_vfs(video_uri, out_loc, fl_name, r_config, save=True): + """ + Preparing empty VFS matrix if something fails + """ + cols = [ + "Frames", + r_config.aco_voiceFrame, + r_config.aco_totVoiceFrame, + r_config.aco_voicePct, + r_config.err_reason, + ] + out_val = [[np.nan, np.nan, np.nan, np.nan, error_txt]] + df_vfs = pd.DataFrame(out_val, columns=cols) + df_vfs["dbm_master_url"] = video_uri + if save: + logger.info("Saving Output file {} ".format(out_loc)) + ut.save_output(df_vfs, out_loc, fl_name, vfs_dir, csv_ext) + return df_vfs + + +def run_vfs(video_uri, out_dir, r_config, save=True): + """ + Processing all participants for fetching voice frame score + --------------- + --------------- + Args: + video_uri: video path; r_config: raw variable config object + out_dir: (str) Output directory for processed output + """ + try: + + input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) + aud_filter = glob.glob(join(input_loc, fl_name + ".wav")) + if len(aud_filter) > 0: + + audio_file = aud_filter[0] + aud_dur = ut.get_length(audio_file) + + if float(aud_dur) < 0.064: + logger.info( + "Output file {} size is less than 0.064sec".format(audio_file) + ) + + df = empty_vfs(video_uri, out_loc, fl_name, r_config, save=save) + else: + df = calc_vfs( + video_uri, audio_file, out_loc, fl_name, r_config, save=save + ) + return df + except Exception as e: + e + logger.error("Failed to process audio file") diff --git a/opendbm/dbm_lib/dbm_features/raw_features/movement/__init__.py b/opendbm/dbm_lib/dbm_features/raw_features/movement/__init__.py new file mode 100644 index 00000000..c506189e --- /dev/null +++ b/opendbm/dbm_lib/dbm_features/raw_features/movement/__init__.py @@ -0,0 +1,12 @@ +""" +file_name: init +project_name: DBM +created: 2020-20-07 +""" + +from __future__ import absolute_import, division, print_function + +from .eye_blink import run_eye_blink +from .eye_gaze import run_eye_gaze +from .facial_tremor import fac_tremor_process +from .head_motion import run_head_movement diff --git a/opendbm/dbm_lib/dbm_features/raw_features/movement/eye_blink.py b/opendbm/dbm_lib/dbm_features/raw_features/movement/eye_blink.py new file mode 100644 index 00000000..e8f2833e --- /dev/null +++ b/opendbm/dbm_lib/dbm_features/raw_features/movement/eye_blink.py @@ -0,0 +1,195 @@ +""" +file_name: eye_blink +project_name: DBM +created: 2020-20-07 +""" + +import logging +import os +import subprocess + +import cv2 +import dlib +import imutils +import numpy as np +import pandas as pd +from imutils import face_utils +from imutils.video import FileVideoStream +from scipy.signal import find_peaks +from scipy.spatial import distance as dist + +from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger() + +movement_expr_dir = "movement/eye_blink" +csv_ext = "_eyeblinks.csv" + + +def get_length(filename): + result = subprocess.run( + [ + "ffprobe", + "-v", + "error", + "-show_entries", + "format=duration", + "-of", + "default=noprint_wrappers=1:nokey=1", + filename, + ], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + stdin=subprocess.DEVNULL, + ) + return float(result.stdout) + + +def eye_aspect_ratio(eye): + """ + Computing eye aspect ratio for an individual frame + Args: + eye: Eye landmarks + Return: + Eye aspect ratio for a frame + """ + # euclidean distance for vertical eye landmarks + dist_cor1 = dist.euclidean(eye[1], eye[5]) + dist_cor2 = dist.euclidean(eye[2], eye[4]) + + # euclidean distance for horizontal eye landmark + dist_cor3 = dist.euclidean(eye[0], eye[3]) + + ear = (dist_cor1 + dist_cor2) / (2.0 * dist_cor3) + return ear + + +def blink_detection(video_path, facial_landmarks, raw_config): + """ + Blink detection for each frame + Args: + video_path: MP4 file location + facial_landmarks: Facial landmark pre-trained model path + raw_config: Raw configuration file object + Return: + Dataframe with blink informatiom like blink frame, duration etc. + """ + tot_frame = 1 + blink_frame = [] + ear_frame = [] + + # clip = VideoFileClip(video_path, has_mask=True) + vid_length = get_length(video_path) + + identifier = dlib.get_frontal_face_detector() # dlib's face detector (HOG-based) + forecaster = dlib.shape_predictor(facial_landmarks) # the facial landmark predictor + + # left and right eye landmarks + (left_beg, left_end) = face_utils.FACIAL_LANDMARKS_IDXS["left_eye"] + (right_beg, right_end) = face_utils.FACIAL_LANDMARKS_IDXS["right_eye"] + + f_stream = True + vid_stream = FileVideoStream(video_path).start() + + while True: + try: + # check if stream/frame available in video + if f_stream and not vid_stream.more(): + break + + # reading & converting frame into grayscale + vid_frame = vid_stream.read() + vid_frame = imutils.resize(vid_frame, width=450) + gray = cv2.cvtColor(vid_frame, cv2.COLOR_BGR2GRAY) + + # detecting face + rects = identifier(gray, 0) + for rect in rects: + lmk = forecaster(gray, rect) + lmk = face_utils.shape_to_np(lmk) + + l_eye = lmk[left_beg:left_end] # Extracting left eye ratio + r_eye = lmk[right_beg:right_end] # Extracting right eye ratio + l_ear = eye_aspect_ratio(l_eye) # eye aspect ratio for left eye + r_ear = eye_aspect_ratio(r_eye) # eye aspect ratio for right eye + + ear = (l_ear + r_ear) / 2.0 # average the eye aspect ratio + blink_frame.append(tot_frame) + ear_frame.append(ear) + + tot_frame += 1 + except Exception as e: + e + logger.info( + "blink detection processing finished in frame: {}".format(tot_frame - 1) + ) + continue + vid_stream.stop() + blink_df = pd.DataFrame(ear_frame, columns=[raw_config.mov_blink_ear]) + blink_df[raw_config.vid_dur] = vid_length + blink_df[raw_config.fps] = int(tot_frame / vid_length) + blink_df[raw_config.mov_blinkframes] = blink_frame + + peaks, _ = find_peaks( + blink_df[raw_config.mov_blink_ear] * -1, prominence=0.1 + ) # prominence = 0.1 based on tuning + final_blink_df = blink_df.iloc[peaks, :].reset_index(drop=True) + + u_blink_df = blink_dur(final_blink_df, raw_config) + u_blink_df["dbm_master_url"] = video_path + return u_blink_df + + +def blink_dur(blink_df, raw_config): + """ + Computing blink duration between each blink + Args: + blink_df : Dataframe with blink informatiom like blink frame + raw_config: Raw configuration file object + Returns: + Updated dataframe with blink duration + """ + if len(blink_df) > 0: + blink_df[raw_config.mov_blinkdur] = ( + blink_df[raw_config.mov_blinkframes] + .diff() + .fillna(blink_df[raw_config.mov_blinkframes]) + ) + else: + blink_df[raw_config.mov_blinkdur] = np.nan + blink_df[raw_config.mov_blinkdur] = ( + blink_df[raw_config.mov_blinkdur] / blink_df[raw_config.fps] + ) + return blink_df + + +def run_eye_blink(video_uri, out_dir, r_config, facial_landmarks, save=True): + """ + Processing all patient's for getting eye blink artifacts + --------------- + --------------- + Args: + video_uri: video path; input_dir : input directory for video's + out_dir: (str) Output directory for processed output; + r_config: raw variable config object; + facial_landmarks: landmark model path + save: whether to save in csv or not + """ + try: + input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) + + vid_file_path = os.path.exists(video_uri) + if vid_file_path: + + logger.info( + "Processing Output file {} ".format(os.path.join(out_loc, fl_name)) + ) + df_blink = blink_detection(video_uri, facial_landmarks, r_config) + if save: + ut.save_output(df_blink, out_loc, fl_name, movement_expr_dir, csv_ext) + + return df_blink + + except Exception as e: + logger.error(f"Failed to process video file: {e}") diff --git a/opendbm/dbm_lib/dbm_features/raw_features/movement/eye_gaze.py b/opendbm/dbm_lib/dbm_features/raw_features/movement/eye_gaze.py new file mode 100644 index 00000000..16470c5c --- /dev/null +++ b/opendbm/dbm_lib/dbm_features/raw_features/movement/eye_gaze.py @@ -0,0 +1,182 @@ +""" +file_name: eye_gaze +project_name: DBM +created: 2020-30-11 +""" + +import glob +import logging +import os +from os.path import join + +import numpy as np +import pandas as pd +from scipy.spatial import distance + +from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger() + +eye_pose_dir = "movement/gaze" +eye_pose_ext = "_eyegaze.csv" + + +def eye_motion_df(l_disp, r_disp, error_list, r_config): + """ + Generating eye movement dataframe + + Args: + error_list: + l_disp: displacement list(left eye); + r_disp: displacement list(right eye) + r_config: raw variable config file object + + Reutrns: + Final eye displacement dataframe + """ + df_eye_left = pd.DataFrame(l_disp, columns=[r_config.mov_eleft_disp]) + df_eye_right = pd.DataFrame(r_disp, columns=[r_config.mov_eright_disp]) + + df_eye_motion = pd.concat([df_eye_left, df_eye_right], axis=1, sort=False) + df_eye_motion[r_config.err_reason] = error_list + return df_eye_motion + + +def filter_motion(df_of, df_disp, col_l, col_r, r_config): + """ + Filtering final eye movement dataframe + + Args: + df_of: Openface raw out dataframe; + df_disp: displacement dataframe + col_r: right eye column + col_l: left eye column; + r_config: raw variable config file object + """ + + df_of = df_of[col_l + col_r + [" confidence"]].copy() + df_of.loc[(df_of[" confidence"].astype(float) < 0.8), col_l + col_r] = np.nan + + df_filter = df_of[col_l + col_r] + df_filter.columns = [ + r_config.mov_leye_x, + r_config.mov_leye_y, + r_config.mov_leye_z, + r_config.mov_reye_x, + r_config.mov_reye_y, + r_config.mov_reye_z, + ] + + df_motion = pd.concat([df_filter, df_disp], axis=1, sort=False) + return df_motion + + +def eye_disp(of_results, col, r_config): + """ + Computing head velocity frame by frame + + Args: + of_results: Openface raw out dataframe + col: col of eye_disp + r_config: Face config file object + + Reutrns: + Final head velocity frame by frame output + """ + distance_list = [] + error_list = [] + + of_results = of_results[col + [" confidence"]] + for index, row in of_results.iterrows(): + dst = np.nan + + if index == 0 or float(row[" confidence"]) < 0.8: # Threshold < 0.8 + distance_list.append(dst) + + if float(row[" confidence"]) < 0.8: + error_list.append("confidence less than 80%") + + else: + error_list.append("Pass") + continue + + if index > 0: + + point_x = ( + of_results[col[0]][index - 1], + of_results[col[1]][index - 1], + of_results[col[2]][index - 1], + ) + point_y = (row[col[0]], row[col[1]], row[col[2]]) + try: + dst = distance.euclidean(point_x, point_y) + except Exception as e: + logger.info("Exception on eye_disp method", e) + pass + + distance_list.append(abs(dst)) + error_list.append("Pass") + + return distance_list, error_list + + +def calc_eye_mov(video_uri, df_of, out_loc, fl_name, r_config, save=True): + """ + Computing eye motion variables + Args: + video_uri: self explanatory + df_of: Openface dataframe + out_loc: Output path for saving output csv's + fl_name: file name for output csv + r_config: raw variable config file object + save: whether to save result to csv or not + + """ + + col_l = [" gaze_0_x", " gaze_0_y", " gaze_0_z"] + col_r = [" gaze_1_x", " gaze_1_y", " gaze_1_z"] + + gazel_disp, err_l = eye_disp(df_of, col_l, r_config) + gazer_disp, err_r = eye_disp(df_of, col_r, r_config) + + df_disp = eye_motion_df(gazel_disp, gazer_disp, err_l, r_config) + df_disp["dbm_master_url"] = video_uri + + df_motion = filter_motion(df_of, df_disp, col_l, col_r, r_config) + if save: + ut.save_output(df_motion, out_loc, fl_name, eye_pose_dir, eye_pose_ext) + return df_motion + + +def run_eye_gaze(video_uri, out_dir, r_config, save=True): + """ + Processing all patient's for getting eye movement artifacts + -------------------------------- + -------------------------------- + Args: + video_uri: video path; input_dir : input directory for video's + out_dir: (str) Output directory for processed output; + r_config: raw variable config object + save: whether to save result to csv or not + """ + try: + + # filtering path to generate input & output path + input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) + of_csv_path = glob.glob(join(out_loc, fl_name + "_openface/*.csv")) + + if len(of_csv_path) > 0: + of_csv = of_csv_path[0] + df_of = pd.read_csv(of_csv) + + logger.info( + "Processing Output file {} ".format(os.path.join(out_loc, fl_name)) + ) + df_motion = calc_eye_mov( + video_uri, df_of, out_loc, fl_name, r_config, save=save + ) + return df_motion + + except Exception as e: + logger.error("Failed to process video file", e) diff --git a/opendbm/dbm_lib/dbm_features/raw_features/movement/facial_tremor.py b/opendbm/dbm_lib/dbm_features/raw_features/movement/facial_tremor.py new file mode 100644 index 00000000..3459573c --- /dev/null +++ b/opendbm/dbm_lib/dbm_features/raw_features/movement/facial_tremor.py @@ -0,0 +1,194 @@ +import glob +import json +import logging +import os +import pickle +import re +import sys +from os.path import join + +import cv2 +import numpy as np +import numpy.ma as ma +import pandas as pd + +from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut + +from ..util.math_util import calc_displacement_vec + +DBMLIB_PATH = os.path.dirname(__file__) + +DBMLIB_FTREMOR_CONFIG = os.path.abspath( + os.path.join(DBMLIB_PATH, "../../../../resources/features/facial/config.json") +) + + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger() + +ft_dir = "movement/facial_tremor" +csv_ext = "_fac_tremor.csv" +model_ext = "_fac_model.csv" +fac_features_ext = "_fac_features.csv" + + +def compute_features(out_dir, df_of, r_config): + """Computes features + + Returns: features in vector format + """ + config = json.loads(open(DBMLIB_FTREMOR_CONFIG, "r").read()) + + pattern_x = re.compile(r"l\d+_x") + pattern_y = re.compile(r"l\d+_y") + + # assumption: distance of face to camera remains at roughly static + + # logic break + landmark_columns = [] + for col in df_of.columns: + if pattern_x.match(col) or pattern_y.match(col): + landmark_columns.append(col) + + df_of = df_of[(df_of[landmark_columns] != 0).any(axis=1)] + df_of.reset_index(inplace=True) + + num_frames = len(df_of) + logger.info("Number of frames to be processed: {}".format(str(num_frames))) + landmarks = config["landmarks"] + + try: + if num_frames == 0: + error_reason = "No frames with visible face." + logger.error(error_reason) + return empty_frame(landmarks, r_config, error_reason) + + # if num_frames < 60: + # error_reason = 'Number of frames with visible face < 60. Video too short' + # logger.error(error_reason) + # return empty_frame(landmarks, f_cfg, error_reason) + + first_row = df_of.iloc[0] + + facew = abs( + first_row[config["face_width_left"]] - first_row[config["face_width_right"]] + ) + faceh = abs( + first_row[config["face_height_left"]] + - first_row[config["face_height_right"]] + ) + + if facew == 0 or faceh == 0: + error_reason = "face width or height = 0. Check landmark values" + logger.error(error_reason) + return empty_frame(landmarks, r_config) + + fac_disp = calc_displacement_vec(df_of, landmarks, num_frames) + + # if verbose: + # logger.info("Displacement output: {}".format(str(fac_disp))) + + fac_disp_median = np.median(fac_disp, axis=1) + fac_disp_mean = np.mean(fac_disp, axis=1) + + if len(fac_disp.shape) != 2: + error_reason = "fac_disp is not 2D. smth went wrong with disp calc" + logger.error(error_reason) + return empty_frame(landmarks, r_config, error_reason) + + if len(fac_disp[0]) <= 1: + error_reason = "Video too short. smth went wrong with disp calc" + logger.error(error_reason) + return empty_frame(landmarks, r_config, error_reason) + + fac_corr_mat = np.corrcoef(fac_disp, rowvar=True) + # extract relevant row from cov matrix + ref_lmk_index = [ + i for i, lmk in enumerate(landmarks) if config["ref_lmk"] == lmk + ] + fac_corr = fac_corr_mat[ref_lmk_index][0] + + fac_area = config["ref_area"] / (facew * faceh) + + # if verbose: + # logger.info("Face area: {}".format(fac_area)) + # logger.info("Face Displacement Median: {}".format(str(fac_disp_median))) + # logger.info("Face Displacement Mean: {}".format(str(fac_disp_mean))) + + fac_features1 = np.multiply(fac_area * fac_disp_median, (1.0 - fac_corr)) + fac_features2 = np.multiply(fac_area * fac_disp_mean, (1.0 - fac_corr)) + + # base_fac_features = np.dot(fac_area * fac_disp_median, (1. - fac_corr)) + + fac_features_dict = {} + for i, landmark in enumerate(landmarks): + fac_features_dict["fac_features_mean_{}".format(landmark)] = [ + fac_features2[i] + ] + raw_variable_map = "fac_tremor_median_{}".format(landmark) + fac_features_dict[r_config.base_raw["raw_feature"][raw_variable_map]] = [ + fac_features1[i] + ] + + fac_features_dict["fac_disp_median_{}".format(landmark)] = [ + fac_disp_median[i] + ] + fac_features_dict["fac_corr_{}".format(landmark)] = [fac_corr[i]] + + fac_features_dict[r_config.err_reason] = [""] + data = pd.DataFrame.from_dict(fac_features_dict) + logger.info("Concluded computing tremor features") + + return data + + except Exception as e: + logger.error("Error computing tremor features: {}".format(str(e))) + return empty_frame(landmarks, r_config, str(e)) + + +def empty_frame(landmarks, r_config, error_reason): + fac_features_dict = {} + for i, landmark in enumerate(landmarks): + raw_variable_map = "fac_tremor_median_{}".format(landmark) + fac_features_dict[r_config.base_raw["raw_feature"][raw_variable_map]] = [np.nan] + + fac_features_dict["fac_features_mean_{}".format(landmark)] = [np.nan] + fac_features_dict["fac_disp_median_{}".format(landmark)] = [np.nan] + fac_features_dict["fac_corr_{}".format(landmark)] = [np.nan] + + fac_features_dict[r_config.err_reason] = [error_reason] + empty_frame = pd.DataFrame.from_dict(fac_features_dict) + return empty_frame + + +def fac_tremor_process(video_uri, out_dir, r_config, model_output=False, save=True): + """ + processing input videos + + + """ + try: + + input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) + of_csv_path = glob.glob(join(out_loc, fl_name + "_openface_lmk/*_output.csv")) + if len(of_csv_path) > 0: + of_csv = of_csv_path[0] + df_of = pd.read_csv(of_csv) + + logger.info( + "Processing Output file for facial_tremor {} ".format( + os.path.join(out_loc, fl_name) + ) + ) + + feats = compute_features(of_csv_path, df_of, r_config) + + # if model_output: + # result = score(feats, r_config) + # feats = pd.concat([feats, result], axis=1) + if save: + ut.save_output(feats, out_loc, fl_name, ft_dir, csv_ext) + return feats + + except Exception as e: + logger.error("Failed to process video file for facial_tremor", str(e)) diff --git a/opendbm/dbm_lib/dbm_features/raw_features/movement/head_motion.py b/opendbm/dbm_lib/dbm_features/raw_features/movement/head_motion.py new file mode 100644 index 00000000..b1b28dea --- /dev/null +++ b/opendbm/dbm_lib/dbm_features/raw_features/movement/head_motion.py @@ -0,0 +1,240 @@ +""" +file_name: head_mov +project_name: DBM +created: 2020-20-07 +""" + +import glob +import logging +import os +from os.path import join + +import numpy as np +import pandas as pd +from scipy.spatial import distance + +from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger() + +h_mov_dir = "movement/head_movement" +h_pose_dir = "movement/head_pose" +h_mov_ext = "_headmov.csv" +h_pose_ext = "_headpose.csv" + + +def head_pose_dist(of_results): + """ + Computing head pose distance frame by frame + + Args: + of_results: Openface raw out dataframe + + Returns: + Final head pose distance frame by frame output + """ + distance_list = [] + error_list = [] + for index, row in of_results.iterrows(): + dst = np.nan + + if index == 0 or float(row[" confidence"]) < 0.2: # Threshold < 0.2 + distance_list.append(dst) + + if float(row[" confidence"]) < 0.2: + error_list.append("confidence less than 20%") + + else: + error_list.append("Pass") + continue + + if index > 0: + + point_x = ( + of_results[" pose_Rx"][index - 1], + of_results[" pose_Ry"][index - 1], + of_results[" pose_Rz"][index - 1], + ) + point_y = (row[" pose_Rx"], row[" pose_Ry"], row[" pose_Rz"]) + try: + dst = distance.euclidean(point_x, point_y) + except Exception as e: + logger.info("Exception met on head_pose_dist method", e) + pass + distance_list.append(abs(dst)) + error_list.append("Pass") + return distance_list, error_list + + +def head_pose(of_results, r_config): + """ + Generating head pose estimation dataframe + + Args: + of_results: openface results as dataframe + r_config: raw variable config file object + + Returns: + Final head pose estimation dataframe + """ + pose_dist_list, error_list = head_pose_dist(of_results) + of_results = of_results.copy() + of_results.loc[ + (of_results[" confidence"].astype(float) < 0.2), + [" pose_Rx", " pose_Ry", " pose_Rz"], + ] = np.nan + pose_of = of_results[[" pose_Rx", " pose_Ry", " pose_Rz"]] + pose_of.columns = [ + r_config.mov_Hpose_Pitch, + r_config.mov_Hpose_Yaw, + r_config.mov_Hpose_Roll, + ] + pose_of = pose_of.copy() + pose_of[r_config.mov_Hpose_Dist] = pose_dist_list + pose_of[r_config.err_reason] = error_list + + return pose_of + + +def head_motion_df(distance_val, error_list, r_config): + """ + Generating head movement dataframe + + Args: + distance_val: distance list + error_list: Error reason + r_config: raw variable config file object + + Returns: + Final head velocity dataframe + """ + head_motion = r_config.head_vel + df_head_motion = pd.DataFrame(distance_val, columns=[head_motion]) + df_head_motion["Frames"] = df_head_motion.index + + new_df_intensity = df_head_motion[["Frames", head_motion]].copy() + new_df_intensity[r_config.err_reason] = error_list + + return new_df_intensity + + +def head_vel(of_results, r_config): + """ + Computing head velocity frame by frame + + Args: + of_results: Openface raw out dataframe + r_config: Face config file object + + Returns: + Final head velocity frame by frame output + """ + distance_list = [] + error_list = [] + for index, row in of_results.iterrows(): + dst = np.nan + + if index == 0 or float(row[" confidence"]) < 0.2: # Threshold < 0.2 + distance_list.append(dst) + + if float(row[" confidence"]) < 0.2: + error_list.append("confidence less than 20%") + + else: + error_list.append("Pass") + continue + + if index > 0: + + point_x = ( + of_results[" pose_Tx"][index - 1], + of_results[" pose_Ty"][index - 1], + of_results[" pose_Tz"][index - 1], + ) + point_y = (row[" pose_Tx"], row[" pose_Ty"], row[" pose_Tz"]) + try: + dst = distance.euclidean(point_x, point_y) + except Exception as e: + logger.info("Exception met on head_vel method", e) + pass + + if abs(dst) > 200: + dst = np.nan + error_list.append("Out of range") + + else: + error_list.append("Pass") + distance_list.append(dst) + df_velocity = head_motion_df(distance_list, error_list, r_config) + + return df_velocity + + +def calc_head_mov(video_uri, df_of, out_loc, fl_name, r_config, save=True): + """ + Computing head motion and head pose variables + Args: + video_uri: video path + df_of: Openface dataframe + out_loc: Output path for saving output csv's + fl_name: file name for output csv + r_config: raw variable config file object + save: whether to save result to csv or not + + """ + + col = [ + " confidence", + " pose_Rx", + " pose_Ry", + " pose_Rz", + " pose_Tx", + " pose_Ty", + " pose_Tz", + ] + df_of = df_of[col] + + df_hmotion = head_vel(df_of, r_config) + df_hmotion["dbm_master_url"] = video_uri + + df_pose = head_pose(df_of, r_config) + df_pose["dbm_master_url"] = video_uri + + if save: + ut.save_output(df_hmotion, out_loc, fl_name, h_mov_dir, h_mov_ext) + ut.save_output(df_pose, out_loc, fl_name, h_pose_dir, h_pose_ext) + + df_mot = pd.concat([df_hmotion[["Frames", "mov_headvel"]], df_pose], axis=1) + return df_mot + + +def run_head_movement(video_uri, out_dir, r_config): + """ + Processing all patient's for getting movement artifacts for cdx_analysis workflow + -------------------------------- + -------------------------------- + Args: + video_uri: video path; input_dir : input directory for video's + out_dir: (str) Output directory for processed output; + r_config: raw variable config object + """ + try: + + # filtering path to generate input & output path + input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) + of_csv_path = glob.glob(join(out_loc, fl_name + "_openface/*.csv")) + + if len(of_csv_path) > 0: + of_csv = of_csv_path[0] + df_of = pd.read_csv(of_csv) + + logger.info( + "Processing Output file {} ".format(os.path.join(out_loc, fl_name)) + ) + + df_mot = calc_head_mov(video_uri, df_of, out_loc, fl_name, r_config) + return df_mot + + except Exception as e: + logger.error("Failed to process video file", e) diff --git a/opendbm/dbm_lib/dbm_features/raw_features/movement/voice_tremor.py b/opendbm/dbm_lib/dbm_features/raw_features/movement/voice_tremor.py new file mode 100644 index 00000000..2b136fcd --- /dev/null +++ b/opendbm/dbm_lib/dbm_features/raw_features/movement/voice_tremor.py @@ -0,0 +1,136 @@ +import glob +import json +import logging +import os +import re +from os.path import join + +import numpy as np +import pandas as pd +import parselmouth +from parselmouth.praat import run_file + +from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger() + +vt_dir = "movement/voice_tremor" +csv_ext = "_vtremor.csv" + +DBMLIB_PATH = os.path.dirname(__file__) +DBMLIB_VTREMOR_LIB = os.path.abspath( + os.path.join(DBMLIB_PATH, "../../../../resources/libraries/voice_tremor.praat") +) + + +# Executing praat script using parselmouth function +def tremor_praat(snd_file, r_cfg): + """ + Generating Voice tremor endpoint dataframe + Args: + snd_file: (.wav) parsed audio file + r_cfg: Raw variable configuration file + Returns tremor endpoint dataframe + """ + snd = parselmouth.Sound(snd_file) + tremor_var = run_file(snd, DBMLIB_VTREMOR_LIB, capture_output=True) + new_tremor_var = re.sub("--undefined--", "0", tremor_var[1]) + res = json.loads(new_tremor_var) + tremor_df = pd.DataFrame( + res, + index=[ + "0", + ], + ) + tremor_df.columns = [ + r_cfg.mov_freq_trem_freq, + r_cfg.mov_amp_trem_freq, + r_cfg.mov_freq_trem_index, + r_cfg.mov_amp_trem_index, + r_cfg.mov_freq_trem_pindex, + r_cfg.mov_amp_trem_pindex, + ] + return tremor_df + + +def prepare_vtrem_output(audio_file, out_loc, r_config, fl_name, save=True): + """ + Preparing voice tremor matrix + Args: + audio_file: (.wav) parsed audio file ; r_config: raw config object + out_loc: (str) Output directory for csv ; fl_name: file name + r_config: Raw variable configuration file + fl_name: base filepath + save: whether to write results to csv or not + """ + df_tremor = tremor_praat(audio_file, r_config) + df_tremor[ + r_config.err_reason + ] = "Pass" # will replace with threshold in future release + + if save: + logger.info("Processing Output file {} ".format(os.path.join(out_loc, fl_name))) + ut.save_output(df_tremor, out_loc, fl_name, vt_dir, csv_ext) + return df_tremor + + +def prepare_empty_vt(out_loc, fl_name, r_config, error_txt, save=True): + """ + Preparing empty voice tremor matrix + """ + cols = [ + r_config.mov_freq_trem_freq, + r_config.mov_amp_trem_freq, + r_config.mov_freq_trem_index, + r_config.mov_amp_trem_index, + r_config.mov_freq_trem_pindex, + r_config.mov_amp_trem_pindex, + r_config.err_reason, + ] + + out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, error_txt]] + df_tremor = pd.DataFrame(out_val, columns=cols) + + if save: + logger.info("Saving Output file {} ".format(os.path.join(out_loc, fl_name))) + ut.save_output(df_tremor, out_loc, fl_name, vt_dir, csv_ext) + return df_tremor + + +def run_vtremor(video_uri, out_dir, r_config, save=True): + """ + Processing all patient's for fetching Formant freq + --------------- + --------------- + Args: + video_uri: video path; r_config: raw variable config object + out_dir: (str) Output directory for processed output + r_config: Raw variable configuration file + save: whether to write results to csv or not + """ + try: + + input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) + aud_filter = glob.glob(join(input_loc, fl_name + ".wav")) + if len(aud_filter) > 0: + + audio_file = aud_filter[0] + aud_dur = ut.get_length(audio_file) + + if float(aud_dur) < 0.5: + logger.info( + "Output file {} size is less than 0.5sec".format(audio_file) + ) + + error_txt = "error: length less than 0.5 sec" + df_trem = prepare_empty_vt(video_uri, out_loc, fl_name, error_txt, save) + else: + df_trem = prepare_vtrem_output( + audio_file, out_loc, r_config, fl_name, save + ) + + return df_trem + except Exception as e: + logger.error("Failed to compute Voice Tremor {} for {}".format(e, video_uri)) + prepare_empty_vt(out_loc, fl_name, r_config, e, save) diff --git a/opendbm/dbm_lib/dbm_features/raw_features/nlp/__init__.py b/opendbm/dbm_lib/dbm_features/raw_features/nlp/__init__.py new file mode 100644 index 00000000..337183eb --- /dev/null +++ b/opendbm/dbm_lib/dbm_features/raw_features/nlp/__init__.py @@ -0,0 +1,2 @@ +from .speech_features import run_speech_feature +from .transcribe import run_transcribe diff --git a/opendbm/dbm_lib/dbm_features/raw_features/nlp/speech_features.py b/opendbm/dbm_lib/dbm_features/raw_features/nlp/speech_features.py new file mode 100644 index 00000000..6fc3caaf --- /dev/null +++ b/opendbm/dbm_lib/dbm_features/raw_features/nlp/speech_features.py @@ -0,0 +1,58 @@ +""" +file_name: speech_features +project_name: DBM +created: 2020-13-11 +""" + +import glob +import logging +import os +import shutil +from os.path import join + +import pandas as pd + +from opendbm.dbm_lib.dbm_features.raw_features.util import nlp_util as n_util +from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger() + +speech_dir = "speech/speech_feature" +speech_ext = "_nlp.csv" +transcribe_ext = "speech/deepspeech/*_transcribe.csv" + + +def run_speech_feature(video_uri, out_dir, r_config, tran_tog, save=True): + """ + Processing all patient's for fetching nlp features + ------------------- + ------------------- + Args: + video_uri: video path; r_config: raw variable config object + out_dir: (str) Output directory for processed output + """ + + input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) + + transcribe_path = glob.glob(join(out_loc, transcribe_ext)) + transcribe_df = pd.read_csv(transcribe_path[0]) + df_speech = n_util.process_speech(transcribe_df, r_config) + + if save: + logger.info("Saving Output file {} ".format(out_loc)) + logger.info("filename {} ".format(fl_name)) + ut.save_output(df_speech, out_loc, fl_name, speech_dir, speech_ext) + + if (tran_tog is None) or (tran_tog != "on"): + if os.getcwd() == "/app": # docker version + shutil.rmtree(os.path.dirname(transcribe_path[0])) + else: # api_lib version + if fl_name.endswith("mp4"): + shutil.rmtree((out_dir + "/" + fl_name).replace("//", "/")) + else: + shutil.rmtree( + (out_dir + "/" + fl_name.strip(".mp4")).replace("//", "/") + ) + + return df_speech diff --git a/opendbm/dbm_lib/dbm_features/raw_features/nlp/transcribe.py b/opendbm/dbm_lib/dbm_features/raw_features/nlp/transcribe.py new file mode 100644 index 00000000..8e2fdb7a --- /dev/null +++ b/opendbm/dbm_lib/dbm_features/raw_features/nlp/transcribe.py @@ -0,0 +1,94 @@ +""" +file_name: transcribe +project_name: DBM +created: 2020-10-11 +""" + +import glob +import logging +from os.path import join + +import numpy as np +import pandas as pd + +from opendbm.dbm_lib.dbm_features.raw_features.util import nlp_util as n_util +from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger() + +formant_dir = "speech/deepspeech" +csv_ext = "_transcribe.csv" +error_txt = "error: length less than 0.1" + + +def calc_transcribe( + video_uri, audio_file, out_loc, fl_name, r_config, deep_path, aud_dur, save=True +): + """ + Preparing Formant freq matrix + Args: + audio_file: (.wav) parsed audio file; fl_name: input file name + out_loc: (str) Output directory; r_config: raw variable config + """ + + text = n_util.process_deepspeech(audio_file, deep_path) + df_formant = pd.DataFrame([text], columns=[r_config.nlp_transcribe]) + + df_formant.replace("", np.nan, regex=True, inplace=True) + df_formant[r_config.nlp_totalTime] = aud_dur + df_formant[ + r_config.err_reason + ] = "Pass" # will replace with threshold in future release + df_formant["dbm_master_url"] = video_uri + + if save: + logger.info("Saving Output file {} ".format(out_loc)) + ut.save_output(df_formant, out_loc, fl_name, formant_dir, csv_ext) + return df_formant + + +def empty_transcribe(video_uri, out_loc, fl_name, r_config, save=True): + + """ + Preparing empty formant frequency matrix if something fails + """ + cols = [r_config.nlp_transcribe, r_config.nlp_totalTime, r_config.err_reason] + out_val = [[np.nan, np.nan, error_txt]] + df_fm = pd.DataFrame(out_val, columns=cols) + df_fm["dbm_master_url"] = video_uri + + if save: + logger.info("Saving Output file {} ".format(out_loc)) + ut.save_output(df_fm, out_loc, fl_name, formant_dir, csv_ext) + return df_fm + + +def run_transcribe(video_uri, out_dir, r_config, deep_path, save=True): + + """ + Processing all patient's for fetching Formant freq + --------------- + --------------- + Args: + video_uri: video path; r_config: raw variable config object + out_dir: (str) Output directory for processed output; + deep_path: deepspeech build path + """ + + input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) + aud_filter = glob.glob(join(input_loc, fl_name + ".wav")) + if len(aud_filter) > 0: + + audio_file = aud_filter[0] + aud_dur = ut.get_length(audio_file) + if float(aud_dur) < 0.1: + logger.info("Output file {} size is less than 0.1 sec".format(audio_file)) + + df = empty_transcribe(video_uri, out_loc, fl_name, r_config) + return df + + df = calc_transcribe( + video_uri, audio_file, out_loc, fl_name, r_config, deep_path, aud_dur + ) + return df diff --git a/opendbm/dbm_lib/dbm_features/raw_features/util/__init__.py b/opendbm/dbm_lib/dbm_features/raw_features/util/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/dbm_lib/dbm_features/raw_features/util/math_util.py b/opendbm/dbm_lib/dbm_features/raw_features/util/math_util.py similarity index 80% rename from dbm_lib/dbm_features/raw_features/util/math_util.py rename to opendbm/dbm_lib/dbm_features/raw_features/util/math_util.py index 283acadc..b0c56b68 100644 --- a/dbm_lib/dbm_features/raw_features/util/math_util.py +++ b/opendbm/dbm_lib/dbm_features/raw_features/util/math_util.py @@ -4,9 +4,13 @@ project_name: cdx_analysis created: 2019-03-16 author: Deshana Desai """ -import sys, os, glob, cv2 -import pandas as pd +import glob +import os +import sys + +import cv2 import numpy as np +import pandas as pd def euclidean_distance(point1, point2): @@ -14,7 +18,7 @@ def euclidean_distance(point1, point2): Compute euclidean distance between points """ - return np.sqrt((point1[0] - point2[0])**2 + (point1[1] - point2[1])**2) + return np.sqrt((point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2) # def detect_peaks() @@ -25,8 +29,7 @@ def expand_landmarks(landmarks): util method to expand landmark list: eg: [1,2] -> [['l1_x', 'l1_y'], ['l2_x', 'l2_y']] """ - return [['l{}_x'.format(l), 'l{}_y'.format(l)] for l in landmarks] - + return [["l{}_x".format(point), "l{}_y".format(point)] for point in landmarks] def calc_displacement_vec(df, landmarks, num_frames): @@ -44,13 +47,12 @@ def calc_displacement_vec(df, landmarks, num_frames): first_row = df.iloc[0] prev_point[j] = (first_row[pair[0]], first_row[pair[1]]) - for i in range(num_frames): frame_row = df.iloc[i] for j, pair in enumerate(landmarks): x, y = pair[0], pair[1] current = (frame_row[x], frame_row[y]) - deviation = euclidean_distance( current, prev_point[j]) + deviation = euclidean_distance(current, prev_point[j]) disp_vec[j][i] = deviation prev_point[j] = current diff --git a/opendbm/dbm_lib/dbm_features/raw_features/util/nlp_util.py b/opendbm/dbm_lib/dbm_features/raw_features/util/nlp_util.py new file mode 100644 index 00000000..2dfc6124 --- /dev/null +++ b/opendbm/dbm_lib/dbm_features/raw_features/util/nlp_util.py @@ -0,0 +1,273 @@ +""" +file_name: nlp_util +project_name: DBM +created: 2020-10-11 +""" + +import json +import logging +import os +import re +import subprocess + +import nltk +import numpy as np +import pandas as pd +from lexicalrichness import LexicalRichness +from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger() + +# Speech to text using Deepspeech 0.9.1 +def deepspeech(AUDIO_FILE, deep_path): + """ + Extracting text from audio using Deep Speech neural network trained model + Returns: + Text: text which is extracted from audio + """ + api = "deepspeech" + arg_speech0 = "--model" + arg_speech_path0 = os.path.join(deep_path, "deepspeech-0.9.1-models.pbmm") + arg_speech1 = "--scorer" + arg_speech_path1 = os.path.join(deep_path, "deepspeech-0.9.1-models.scorer") + arg_audio = "--audio" + + out = subprocess.Popen( + [ + api, + arg_speech0, + arg_speech_path0, + arg_speech1, + arg_speech_path1, + arg_audio, + AUDIO_FILE, + ], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + ) + logger.info("Deepspeech output...... {}".format(out)) + try: + stdout, stderr = out.communicate() + except: + return "error", "error" + # print(stderr) + return stdout, stderr + + +def deep_speech_output_clean(result): + """ + Parsing deep speech output(text) + Return: + Text from speech + """ + text = "" + if len(result) > 0: + res_split = str(result[0]).split("\\n") + + if len(res_split) > 0: + for i in range(len(res_split)): + if "Inference took" in res_split[i]: + text = res_split[i + 1] + return text + return text + + +def process_deepspeech(audio_file, deep_path): + """ + Transcribing audio to extract text from speech + """ + deep_output = deepspeech(audio_file, deep_path) + deep_text = deep_speech_output_clean(deep_output) + + return deep_text + + +def nltk_download(): + + try: + nltk.data.find("tokenizers/punkt") + + except LookupError: + logger.info("punkt is not available") + nltk.download("punkt") + + try: + nltk.data.find("averaged_perceptron_tagger") + + except LookupError: + logger.info("averaged_perceptron_tagger is not available") + nltk.download("averaged_perceptron_tagger") + + +def empty_speech(r_config, master_url, error_txt): + """ + Preparing empty speech matrix with error + Args: + r_config: raw config file object + error_txt: Error message during transcription + + Returns: + Empty dataframe for speech features with error + """ + + col = [ + r_config.nlp_numSentences, + r_config.nlp_singPronPerAns, + r_config.nlp_singPronPerSen, + r_config.nlp_pastTensePerAns, + r_config.nlp_pastTensePerSen, + r_config.nlp_pronounsPerAns, + r_config.nlp_pronounsPerSen, + r_config.nlp_verbsPerAns, + r_config.nlp_verbsPerSen, + r_config.nlp_adjectivesPerAns, + r_config.nlp_adjectivesPerSen, + r_config.nlp_nounsPerAns, + r_config.nlp_nounsPerSen, + r_config.nlp_sentiment_mean, + r_config.nlp_mattr, + r_config.nlp_wordsPerMin, + r_config.nlp_totalTime, + r_config.err_reason, + ] + + df_speech = pd.DataFrame([[np.nan] * len(col) + [error_txt]], columns=col) + df_speech["dbm_master_url"] = master_url + + return df_speech + + +def divide_var(speech_var1, spech_var2): + """ + divide variables + """ + speech_var = np.nan + if spech_var2 != 0: + speech_var = speech_var1 / spech_var2 + return speech_var + + +def process_speech(transcribe_df, r_config): + """ + Preparing speech features + Args: + transcribe_df: Transcribed dataframe + r_config: raw config file object + Returns: + Dataframe for speech features + """ + transcribe_df = transcribe_df.replace(np.nan, "", regex=True) + err_transcribe = transcribe_df[r_config.err_reason].iloc[0] + transcribe = transcribe_df[r_config.nlp_transcribe].iloc[0] + total_time = transcribe_df[r_config.nlp_totalTime].iloc[0] + master_url = transcribe_df["dbm_master_url"].iloc[0] + + # clean transcribe + transcribe = transcribe.replace(",", "") + transcribe = " ".join(re.findall(r"[\w']+|[.!?]", transcribe)) + + if err_transcribe != "Pass": + df_speech = empty_speech(r_config, master_url, "error") + + return df_speech + + speech_dict = {} + nltk_download() + + sentences = nltk.tokenize.sent_tokenize(transcribe) + words_all = nltk.tokenize.word_tokenize(transcribe) + num_sentences = len(sentences) + + speech_dict[r_config.nlp_numSentences] = num_sentences + + # nlp_singPron + i_s = transcribe.count("I") + me_s = transcribe.count("me") + my_s = transcribe.count("my") + sing_count = i_s + me_s + my_s + + speech_dict[r_config.nlp_singPronPerAns] = ( + sing_count if len(words_all) > 0 else np.nan + ) + speech_dict[r_config.nlp_singPronPerSen] = divide_var( + speech_dict[r_config.nlp_singPronPerAns], num_sentences + ) + + tagged = nltk.pos_tag(transcribe.split()) + tagged_df = pd.DataFrame(tagged, columns=["word", "pos_tag"]) + + # Past tense per answer + all_POSs = tagged_df["pos_tag"].tolist() + speech_dict[r_config.nlp_pastTensePerAns] = ( + all_POSs.count("VBD") if len(words_all) > 0 else np.nan + ) + speech_dict[r_config.nlp_pastTensePerSen] = divide_var( + speech_dict[r_config.nlp_pastTensePerAns], num_sentences + ) + + # Pronoun per answer + pronounsPerAns = all_POSs.count("PRP") + all_POSs.count("PRP$") + speech_dict[r_config.nlp_pronounsPerAns] = ( + pronounsPerAns if len(words_all) > 0 else np.nan + ) + speech_dict[r_config.nlp_pronounsPerSen] = divide_var( + speech_dict[r_config.nlp_pronounsPerAns], num_sentences + ) + + # Verb per answer + verbPerAns = ( + all_POSs.count("VB") + + all_POSs.count("VBD") + + all_POSs.count("VBG") + + all_POSs.count("VBN") + + all_POSs.count("VBP") + + all_POSs.count("VBZ") + ) + speech_dict[r_config.nlp_verbsPerAns] = verbPerAns if len(words_all) > 0 else np.nan + speech_dict[r_config.nlp_verbsPerSen] = divide_var( + speech_dict[r_config.nlp_verbsPerAns], num_sentences + ) + + # Adjective per answer + adjectivesAns = all_POSs.count("JJ") + all_POSs.count("JJR") + all_POSs.count("JJS") + speech_dict[r_config.nlp_adjectivesPerAns] = ( + adjectivesAns if len(words_all) > 0 else np.nan + ) + speech_dict[r_config.nlp_adjectivesPerSen] = divide_var( + speech_dict[r_config.nlp_adjectivesPerAns], num_sentences + ) + + # Noun per answer + nounsAns = all_POSs.count("NN") + all_POSs.count("NNP") + all_POSs.count("NNS") + speech_dict[r_config.nlp_nounsPerAns] = nounsAns if len(words_all) > 0 else np.nan + speech_dict[r_config.nlp_nounsPerSen] = divide_var( + speech_dict[r_config.nlp_nounsPerAns], num_sentences + ) + + # Sentiment analysis + vader = SentimentIntensityAnalyzer() + sentence_valences = [] + + for s in sentences: + sentiment_dict = vader.polarity_scores(s) + sentence_valences.append(sentiment_dict["compound"]) + + speech_dict[r_config.nlp_sentiment_mean] = ( + np.mean(sentence_valences) if len(sentence_valences) > 0 else np.nan + ) + non_punc = list(value for value in words_all if value not in [".", "!", "?"]) + + non_punc_as_str = " ".join(str(non_punc)) + lex = LexicalRichness(non_punc_as_str) + speech_dict[r_config.nlp_mattr] = ( + lex.mattr(window_size=lex.words) if lex.words > 0 else np.nan + ) + + # Number of words per minute + speech_dict[r_config.nlp_wordsPerMin] = divide_var(len(non_punc), total_time) * 60 + speech_dict[r_config.nlp_totalTime] = total_time + speech_dict["dbm_master_url"] = master_url + + df_speech = pd.DataFrame([speech_dict]) + return df_speech diff --git a/opendbm/dbm_lib/dbm_features/raw_features/util/util.py b/opendbm/dbm_lib/dbm_features/raw_features/util/util.py new file mode 100644 index 00000000..8a0b7ad1 --- /dev/null +++ b/opendbm/dbm_lib/dbm_features/raw_features/util/util.py @@ -0,0 +1,183 @@ +""" +file_name: util +project_name: DBM +created: 2020-20-07 +""" + +import glob +import os +import subprocess + +import more_itertools as mit +import numpy as np +import pandas as pd + + +def get_length(filename): + result = subprocess.run( + [ + "ffprobe", + "-v", + "error", + "-show_entries", + "format=duration", + "-of", + "default=noprint_wrappers=1:nokey=1", + filename, + ], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + stdin=subprocess.DEVNULL, + ) + return float(result.stdout) + + +def process_segment_pitch(ff_df, r_config): + voice_label = ff_df[r_config.aco_voiceLabel] + + indices_yes = [i for i, x in enumerate(voice_label) if x == "yes"] + voiced_yes = [list(group) for group in mit.consecutive_groups(indices_yes)] + + indices_no = [i for i, x in enumerate(voice_label) if x == "no"] + voiced_no = [list(group) for group in mit.consecutive_groups(indices_no)] + + com_speech = voiced_yes + voiced_no + com_speech_sort = sorted(com_speech, key=lambda x: x[0]) + return com_speech_sort, voiced_yes, voiced_no + + +def segment_pitch(dir_path, r_config, ff_df=None): + """ + segmenting pitch freq for each voice segment + """ + com_speech_sort, voiced_yes, voiced_no = ([],) * 3 + + for file in os.listdir(dir_path): + try: + if file.endswith("_pitch.csv") and ff_df is None: + ff_df = pd.read_csv((dir_path + "/" + file)) + com_speech_sort, voiced_yes, voiced_no + except: + pass + + return com_speech_sort, voiced_yes, voiced_no + + +def filter_path(video_url, out_dir): + + """ + Filtering video uri path to prepare input and ouptut location + + Args: + video_url: S3 bucket path for video + out_dir: Output directory path + + """ + + fl_name, _ = os.path.splitext(os.path.basename(video_url)) + input_loc = os.path.dirname(video_url) + out_loc = os.path.join(out_dir, fl_name) + return input_loc, out_loc, fl_name + + +def save_output(df, out_loc, fl_name, f_dir, f_ext): + """ + creating output directory for Audio features + Args: + df: (dataframe) feature dataframe[ex: Formant freq, pitch] + out_loc: (dir) Output location where we want to save raw output + fl_name: file name + f_dir: directory name for a feature + f_ext: extension for a feature [ex: '_pose.csv'] + """ + full_f_name = fl_name + f_ext + dir_path = os.path.join(out_loc, f_dir) + + if not os.path.exists(dir_path): + os.makedirs(dir_path) + + sav_path = os.path.join(dir_path, full_f_name) + df.to_csv(sav_path, index=False) + + +def audio_process(base_dir, video_url): + """ + Parsing cleaned audio files(Audio files without IMA voice) + Args: + base_dir: Base path for raw data + video_url: Raw video file path + """ + new_video_url = base_dir + "/".join(video_url[2:]) + split_val = new_video_url.split("/") + wav_path = "/".join(split_val[0 : len(split_val) - 1]) + audio_split_check = glob.glob(wav_path + "/*_split.wav") + return audio_split_check + + +def compute_open_face_features( + input_filepath, + output_directory, + open_face_executable, + au_static=False, + tracked_visualization=False, + clobber=False, + verbose=True, +): + """ + Runs OpenFace on an input video. + See https://github.com/TadasBaltrusaitis/OpenFace/wiki/Command-line-arguments + Args: + input_filepath: + output_directory: + au_static: + tracked_visualization: + open_face_executable: + clobber: (bool) if True existing files will be overwritten + verbose: + Returns: + (str) path to output csv file + Raises: + IOError if OpenFace executable is missing + """ + + if not os.path.isfile(open_face_executable): + raise IOError( + "OpenFace executable {} could not be found.".format(open_face_executable) + ) + + bn, _ = os.path.splitext(os.path.basename(input_filepath)) + if not output_directory: + output_directory = os.path.join( + os.path.dirname(input_filepath), bn + "_openface" + ) + + output_csv = os.path.join(output_directory, bn + ".csv") + if not os.path.isfile(output_csv) or clobber: + call = [ + open_face_executable, + ] + if au_static: + call += [ + "-au_static", + ] + + if tracked_visualization: + call += [ + "-tracked", + ] + + call += ["-q", "-2Dfp", "-3Dfp", "-pdmparams", "-pose", "-aus", "-gaze"] + call += ["-f", input_filepath, "-out_dir", output_directory] + + if verbose: + print( + "Computing OpenFace features {} from video file".format(input_filepath) + ) + subprocess.check_output(call) + if verbose: + print("OpenFace features saved to {}".format(output_directory)) + else: + if verbose: + print("Output file {} already exists".format(output_csv)) + + return os.path.join(output_directory, bn + ".csv") diff --git a/dbm_lib/dbm_features/raw_features/util/vad_utilities.py b/opendbm/dbm_lib/dbm_features/raw_features/util/vad_utilities.py similarity index 83% rename from dbm_lib/dbm_features/raw_features/util/vad_utilities.py rename to opendbm/dbm_lib/dbm_features/raw_features/util/vad_utilities.py index 8f11a2e8..cef9afe6 100644 --- a/dbm_lib/dbm_features/raw_features/util/vad_utilities.py +++ b/opendbm/dbm_lib/dbm_features/raw_features/util/vad_utilities.py @@ -10,11 +10,12 @@ import contextlib import sys import wave + def read_wave(path): """Reads a .wav file. Takes the path, and returns (PCM audio data, sample rate). """ - with contextlib.closing(wave.open(path, 'rb')) as wf: + with contextlib.closing(wave.open(path, "rb")) as wf: num_channels = wf.getnchannels() assert num_channels == 1 sample_width = wf.getsampwidth() @@ -27,11 +28,13 @@ def read_wave(path): class Frame(object): """Represents a "frame" of audio data.""" + def __init__(self, bytes, timestamp, duration): self.bytes = bytes self.timestamp = timestamp self.duration = duration + def frame_generator(frame_duration_ms, audio, sample_rate): """Generates audio frames from PCM audio data. Takes the desired frame duration in milliseconds, the PCM data, and @@ -43,13 +46,12 @@ def frame_generator(frame_duration_ms, audio, sample_rate): timestamp = 0.0 duration = (float(n) / sample_rate) / 2.0 while offset + n < len(audio): - yield Frame(audio[offset:offset + n], timestamp, duration) + yield Frame(audio[offset : offset + n], timestamp, duration) timestamp += duration offset += n -def vad_collector(sample_rate, frame_duration_ms, - padding_duration_ms, vad, frames): +def vad_collector(sample_rate, frame_duration_ms, padding_duration_ms, vad, frames): """Filters out non-voiced audio frames. Given a webrtcvad.Vad and a source of audio frames, yields only the voiced audio. @@ -80,7 +82,7 @@ def vad_collector(sample_rate, frame_duration_ms, for frame in frames: is_speech = vad.is_speech(frame.bytes, sample_rate) - sys.stdout.write('1' if is_speech else '0') + sys.stdout.write("1" if is_speech else "0") if not triggered: ring_buffer.append((frame, is_speech)) num_voiced = len([f for f, speech in ring_buffer if speech]) @@ -89,7 +91,7 @@ def vad_collector(sample_rate, frame_duration_ms, # TRIGGERED state. if num_voiced > 0.9 * ring_buffer.maxlen: triggered = True - sys.stdout.write('+(%s)' % (ring_buffer[0][0].timestamp,)) + sys.stdout.write("+(%s)" % (ring_buffer[0][0].timestamp,)) # We want to yield all the audio we see from now until # we are NOTTRIGGERED, but we have to start with the # audio that's already in the ring buffer. @@ -106,23 +108,23 @@ def vad_collector(sample_rate, frame_duration_ms, # unvoiced, then enter NOTTRIGGERED and yield whatever # audio we've collected. if num_unvoiced > 0.9 * ring_buffer.maxlen: - sys.stdout.write('-(%s)' % (frame.timestamp + frame.duration)) + sys.stdout.write("-(%s)" % (frame.timestamp + frame.duration)) triggered = False - yield b''.join([f.bytes for f in voiced_frames]) + yield b"".join([f.bytes for f in voiced_frames]) ring_buffer.clear() voiced_frames = [] if triggered: # BT if were in triggered state at end of signal, set output time - sys.stdout.write('-(%s)' % (frame.timestamp + frame.duration)) - sys.stdout.write('\n') + sys.stdout.write("-(%s)" % (frame.timestamp + frame.duration)) + sys.stdout.write("\n") # If we have any leftover voiced audio when we run out of input, # yield it. if voiced_frames: - yield b''.join([f.bytes for f in voiced_frames]) + yield b"".join([f.bytes for f in voiced_frames]) - -def vad_get_segment_times(sample_rate, frame_duration_ms, - padding_duration_ms, vad, frames): +def vad_get_segment_times( + sample_rate, frame_duration_ms, padding_duration_ms, vad, frames +): """Filters out non-voiced audio frames. BT: based on vad_collector, but returns start and end times for voiced segs @@ -158,7 +160,7 @@ def vad_get_segment_times(sample_rate, frame_duration_ms, for frame in frames: is_speech = vad.is_speech(frame.bytes, sample_rate) - #sys.stdout.write('1' if is_speech else '0') + # sys.stdout.write('1' if is_speech else '0') if not triggered: ring_buffer.append((frame, is_speech)) num_voiced = len([f for f, speech in ring_buffer if speech]) @@ -167,7 +169,7 @@ def vad_get_segment_times(sample_rate, frame_duration_ms, # TRIGGERED state. if num_voiced > 0.9 * ring_buffer.maxlen: triggered = True - #sys.stdout.write('+(%s)' % (ring_buffer[0][0].timestamp,)) + # sys.stdout.write('+(%s)' % (ring_buffer[0][0].timestamp,)) start_times.append(ring_buffer[0][0].timestamp) # BT ring_buffer.clear() else: @@ -179,23 +181,23 @@ def vad_get_segment_times(sample_rate, frame_duration_ms, # unvoiced, then enter NOTTRIGGERED and yield whatever # audio we've collected. if num_unvoiced > 0.9 * ring_buffer.maxlen: - #sys.stdout.write('-(%s)' % (frame.timestamp + frame.duration)) + # sys.stdout.write('-(%s)' % (frame.timestamp + frame.duration)) end_times.append(ring_buffer[0][0].timestamp + frame.duration) # BT triggered = False if triggered: # BT if were in triggered state at end of signal, set output time - #sys.stdout.write('-(%s)' % (frame.timestamp + frame.duration)) - if len(ring_buffer)>0: - end_times.append(ring_buffer[0][0].timestamp ) # BT + # sys.stdout.write('-(%s)' % (frame.timestamp + frame.duration)) + if len(ring_buffer) > 0: + end_times.append(ring_buffer[0][0].timestamp) # BT else: # only get here in very rare case that we triggered on 2nd-to-last frame end_times.append(frame.timestamp + frame.duration) - #sys.stdout.write('\n') + # sys.stdout.write('\n') - return(start_times, end_times) + return (start_times, end_times) -def filter_seg_times(seg_starts, seg_ends, pad_at_start = 0.5, len_to_keep=2.5 ): +def filter_seg_times(seg_starts, seg_ends, pad_at_start=0.5, len_to_keep=2.5): """ do some filtering on the segments found to select part for analysis rule: find the first segment that is at least (pad_at_start+len_to_keep sec long. @@ -210,12 +212,14 @@ def filter_seg_times(seg_starts, seg_ends, pad_at_start = 0.5, len_to_keep=2.5 ) not_found = True for iseg in range(len(seg_starts)): - seg_dur = seg_ends[iseg]-seg_starts[iseg] - if (not_found & (seg_dur > (pad_at_start + len_to_keep))): + seg_dur = seg_ends[iseg] - seg_starts[iseg] + if not_found & (seg_dur > (pad_at_start + len_to_keep)): t_start = seg_starts[iseg] + pad_at_start sel_start.append(t_start) sel_end.append(t_start + len_to_keep) - sel_end_longer.append(max(t_start + len_to_keep, seg_ends[iseg]-pad_at_start)) + sel_end_longer.append( + max(t_start + len_to_keep, seg_ends[iseg] - pad_at_start) + ) not_found = False - return sel_start, sel_end, sel_end_longer \ No newline at end of file + return sel_start, sel_end, sel_end_longer diff --git a/opendbm/dbm_lib/dbm_features/raw_features/util/video_util.py b/opendbm/dbm_lib/dbm_features/raw_features/util/video_util.py new file mode 100644 index 00000000..21150b5e --- /dev/null +++ b/opendbm/dbm_lib/dbm_features/raw_features/util/video_util.py @@ -0,0 +1,312 @@ +""" +file_name: video_util +project_name: DBM +created: 2020-20-07 +""" + +import glob + +import numpy as np +import pandas as pd + +from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut + + +def smooth(x, window_len=11, window="hanning"): + """smooth the data using a window with requested size. + + This method is based on the convolution of a scaled window with the signal. + The signal is prepared by introducing reflected copies of the signal + (with the window size) in both ends so that transient parts are minimized + in the begining and end part of the output signal. + + input: + x: the input signal + window_len: the dimension of the smoothing window; should be an odd integer + window: the type of window from 'flat', 'hanning', 'hamming', 'bartlett', 'blackman' + flat window will produce a moving average smoothing. + + output: + the smoothed signal + + example: + + t=linspace(-2,2,0.1) + x=sin(t)+randn(len(t))*0.1 + y=smooth(x) + + see also: + + numpy.hanning, numpy.hamming, numpy.bartlett, numpy.blackman, numpy.convolve + scipy.signal.lfilter + + TODO: the window parameter could be the window itself if an array instead of a string + NOTE: length(output) != length(input), to correct this: return y[(window_len/2-1):-(window_len/2)] instead of just y. + """ + if x.ndim != 1: + raise (ValueError, "smooth only accepts 1 dimension arrays.") + if x.size < window_len: + raise (ValueError, "Input vector needs to be bigger than window size.") + if window_len < 3: + return x + if window not in ["flat", "hanning", "hamming", "bartlett", "blackman"]: + raise ( + ValueError, + "Window is on of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'", + ) + s = np.r_[x[window_len - 1 : 0 : -1], x, x[-2 : -window_len - 1 : -1]] + # print(len(s)) + if window == "flat": # moving average + w = np.ones(window_len, "d") + else: + w = eval("np." + window + "(window_len)") + y = np.convolve(w / w.sum(), s, mode="valid") + return y[int(window_len / 2) : -int(window_len / 2)] + + +def filter_by_confidence_and_thresh(x, fea, thresh): + if x["s_confidence"] > 0.2 and np.fabs(x[fea]) < thresh: + return x[fea] + else: + return np.NaN + + +def add_au_emotion(x, emotion, emotion_type, exp_type): + """ + computing individula emotion expressivity matrix + Args: + emotion: Action Unit + """ + error_reason = "Pass" + if x["s_confidence"] > 0.8: # if using smooth, no need for 'success' + sum_r = 0 + cnt = 0 + for au in emotion: + au_c_label = " AU{:02d}_c".format(au) + au_r_label = " AU{:02d}_r".format(au) + if x[au_c_label] == 1 and ( + not np.isnan(x[au_r_label]) + ): # there are data with face in, but au_c=0 + sum_r += x[au_r_label] + cnt += 6 + if ( + exp_type == "full" and x[au_c_label] == 0 + ): # Logic to compute emotion expressivity when all AU's are present + cnt = 0 + break + if cnt > 0: + sum_r /= cnt + else: + sum_r = 0 + v_emo = x[emotion_type] + sum_r + else: + v_emo = np.NaN + error_reason = "confidence less than 80%" + + return v_emo, error_reason + + +def add_au_occ(x, emotion, emotion_type): + """ + computing individula emotion presence + Args: + emotion: Action Unit + """ + au_pres = [] + em_pres = 0 + error_reason = "Pass" + if x["s_confidence"] > 0.8: # if using smooth, no need for 'success' + for au in emotion: + au_c_label = " AU{:02d}_c".format(au) + if x[au_c_label] == 1: # there are data with face in, but au_c=0 + au_pres.append(1) + + if len(au_pres) == len(emotion): + em_pres = 1 + else: + em_pres = np.NaN + error_reason = "confidence less than 80%" + return em_pres, error_reason + + +def emotion_exp(em_au, of, em_col, err_col): + """ + Computing individual emotion expressivity and adding it to dataframe + """ + for emotion in em_au: + of[[em_col[0], err_col]] = of.apply( + add_au_emotion, + args=( + emotion, + em_col[0], + "partial", + ), + axis=1, + result_type="expand", + ) + of[[em_col[1], err_col]] = of.apply( + add_au_emotion, + args=( + emotion, + em_col[1], + "full", + ), + axis=1, + result_type="expand", + ) + + +def emotion_pres(em_au, of, em_col, err_col): + """ + Computing individual emotion expressivity and adding it to dataframe + """ + for emotion in em_au: + of[[em_col, err_col]] = of.apply( + add_au_occ, + args=( + emotion, + em_col, + ), + axis=1, + result_type="expand", + ) + + +def calc_of_for_video(of, face_cfg, fe_cfg): + """ + Creating dataframe for emotion expressivity + """ + new_cols = [ + fe_cfg.hap_exp, + fe_cfg.sad_exp, + fe_cfg.sur_exp, + fe_cfg.fea_exp, + fe_cfg.ang_exp, + fe_cfg.dis_exp, + fe_cfg.con_exp, + fe_cfg.pai_exp, + fe_cfg.neg_exp, + fe_cfg.pos_exp, + fe_cfg.neu_exp, + fe_cfg.com_lower_exp, + fe_cfg.com_upper_exp, + fe_cfg.cai_exp, + fe_cfg.com_exp, + fe_cfg.happ_occ, + fe_cfg.sad_occ, + fe_cfg.sur_occ, + fe_cfg.fea_occ, + fe_cfg.ang_occ, + fe_cfg.dis_occ, + fe_cfg.con_occ, + fe_cfg.hap_exp_full, + fe_cfg.sad_exp_full, + fe_cfg.sur_exp_full, + fe_cfg.fea_exp_full, + fe_cfg.ang_exp_full, + fe_cfg.dis_exp_full, + fe_cfg.con_exp_full, + fe_cfg.pai_exp_full, + fe_cfg.neg_exp_full, + fe_cfg.pos_exp_full, + fe_cfg.neu_exp_full, + fe_cfg.cai_exp_full, + fe_cfg.com_lower_exp_full, + fe_cfg.com_upper_exp_full, + fe_cfg.com_exp_full, + ] + of[new_cols] = pd.DataFrame([[0] * len(new_cols)], index=of.index) + of[fe_cfg.err_reason] = "Pass" + + # Composite happiness expressivity + emotion_exp( + face_cfg.happiness, of, [fe_cfg.hap_exp, fe_cfg.hap_exp_full], fe_cfg.err_reason + ) + # Composite sadness expressivity + emotion_exp( + face_cfg.sadness, of, [fe_cfg.sad_exp, fe_cfg.sad_exp_full], fe_cfg.err_reason + ) + # Composite surprise expressivity + emotion_exp( + face_cfg.surprise, of, [fe_cfg.sur_exp, fe_cfg.sur_exp_full], fe_cfg.err_reason + ) + # Composite fear expressivity + emotion_exp( + face_cfg.fear, of, [fe_cfg.fea_exp, fe_cfg.fea_exp_full], fe_cfg.err_reason + ) + # Composite anger expressivity + emotion_exp( + face_cfg.anger, of, [fe_cfg.ang_exp, fe_cfg.ang_exp_full], fe_cfg.err_reason + ) + # Composite disgust expressivity + emotion_exp( + face_cfg.disgust, of, [fe_cfg.dis_exp, fe_cfg.dis_exp_full], fe_cfg.err_reason + ) + # Composite contempt expressivity + emotion_exp( + face_cfg.contempt, of, [fe_cfg.con_exp, fe_cfg.con_exp_full], fe_cfg.err_reason + ) + # Composite Negative Expressivity + emotion_exp( + face_cfg.NEG_ACTION_UNITS, + of, + [fe_cfg.neg_exp, fe_cfg.neg_exp_full], + fe_cfg.err_reason, + ) + # Composite Positive Expressivity + emotion_exp( + face_cfg.POS_ACTION_UNITS, + of, + [fe_cfg.pos_exp, fe_cfg.pos_exp_full], + fe_cfg.err_reason, + ) + # Composite Neutral Expressivity + emotion_exp( + face_cfg.NET_ACTION_UNITS, + of, + [fe_cfg.neu_exp, fe_cfg.neu_exp_full], + fe_cfg.err_reason, + ) + # Composite Activation Expressivity + emotion_exp( + face_cfg.cai, of, [fe_cfg.cai_exp, fe_cfg.cai_exp_full], fe_cfg.err_reason + ) + # Composite Expressivity + emotion_exp( + face_cfg.ACTION_UNITS, + of, + [fe_cfg.com_exp, fe_cfg.com_exp_full], + fe_cfg.err_reason, + ) + # Composite lower face expressivity + emotion_exp( + face_cfg.LOWER_ACTION_UNITS, + of, + [fe_cfg.com_lower_exp, fe_cfg.com_lower_exp_full], + fe_cfg.err_reason, + ) + # Composite upper face Expressivity + emotion_exp( + face_cfg.UPPER_ACTION_UNITS, + of, + [fe_cfg.com_upper_exp, fe_cfg.com_upper_exp_full], + fe_cfg.err_reason, + ) + # Composite pain expressivity + emotion_exp( + face_cfg.pain, of, [fe_cfg.pai_exp, fe_cfg.pai_exp_full], fe_cfg.err_reason + ) + # AU happiness presence + emotion_pres(face_cfg.happiness, of, fe_cfg.happ_occ, fe_cfg.err_reason) + # AU Sad presence + emotion_pres(face_cfg.sadness, of, fe_cfg.sad_occ, fe_cfg.err_reason) + # AU Surprise presence + emotion_pres(face_cfg.surprise, of, fe_cfg.sur_occ, fe_cfg.err_reason) + # AU fear presence + emotion_pres(face_cfg.fear, of, fe_cfg.fea_occ, fe_cfg.err_reason) + # AU anger presence + emotion_pres(face_cfg.anger, of, fe_cfg.ang_occ, fe_cfg.err_reason) + # AU disgust presence + emotion_pres(face_cfg.disgust, of, fe_cfg.dis_occ, fe_cfg.err_reason) + # AU contempt presence + emotion_pres(face_cfg.contempt, of, fe_cfg.con_occ, fe_cfg.err_reason) diff --git a/opendbm/dbm_lib/dbm_features/raw_features/video/__init__.py b/opendbm/dbm_lib/dbm_features/raw_features/video/__init__.py new file mode 100644 index 00000000..b55d2d25 --- /dev/null +++ b/opendbm/dbm_lib/dbm_features/raw_features/video/__init__.py @@ -0,0 +1,13 @@ +""" +file_name: __init__ +project_name: DBM +created: 2020-20-07 +""" + +from __future__ import absolute_import, division, print_function + +from .face_asymmetry import run_face_asymmetry +from .face_au import run_face_au +from .face_config.face_config_reader import ConfigFaceReader +from .face_emotion_expressivity import run_face_expressivity +from .face_landmark import run_face_landmark diff --git a/opendbm/dbm_lib/dbm_features/raw_features/video/face_asymmetry.py b/opendbm/dbm_lib/dbm_features/raw_features/video/face_asymmetry.py new file mode 100644 index 00000000..fbddd96c --- /dev/null +++ b/opendbm/dbm_lib/dbm_features/raw_features/video/face_asymmetry.py @@ -0,0 +1,474 @@ +""" +file_name: face_asymmetry.py +project_name: DBM +created: 2020-20-07 +""" + +import datetime +import glob +import logging +import os +import subprocess +import time +from os.path import join + +import cv2 +import numpy as np +import pandas as pd +from matplotlib import pyplot as plt +from mpl_toolkits import mplot3d +from scipy.spatial.transform import Rotation as R + +from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut +from opendbm.dbm_lib.dbm_features.raw_features.util import video_util as vu + +from .face_config.face_config_reader import ConfigFaceReader + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger() + +face_asym_dir = "facial/face_asymmetry" +csv_ext = "_facasym.csv" + +cv2_color_purple = (254, 19, 188) +color_blue = (0, 0, 1.0) +color_green = (0, 1.0, 0) +color_red = (1.0, 0, 0) +color_y = (1.0, 1.0, 0) + +error_code_message = { + 0: "pass", + 1: "confidence less than 80%", +} +error_message_code = {y: x for x, y in error_code_message.items()} + + +def visualize_vid(fn, attr=None, write_out=False): + + vid = cv2.VideoCapture(fn) + # tot = int(vid.get(cv2.CAP_PROP_FRAME_COUNT)) + fps = vid.get(cv2.CAP_PROP_FPS) + # frame_width = int(vid.get(3)) + # frame_height = int(vid.get(4)) + + if write_out: + fig_w = 680 # 680 667 676 #frame_width in order of Ali, Vennessa, synthesis + fig_h = 659 # 659 659 659 #frame_height + out_vid = cv2.VideoWriter( + "out.mp4", cv2.VideoWriter_fourcc(*"MP4V"), fps, (fig_w, fig_h) + ) + + plt.figure(figsize=(8, 8)) + try: + frameid = 0 + while True: + ret, frame = vid.read() + if not ret: + # Release the Video Device if ret is false + vid.release() + print("Released Video Resource") + break + frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + frameid += 1 + logger.info(frameid, frame.shape) + + if "lmks_frms" in attr: + lmks_frms = attr["lmks_frms"] + for i in range(lmks_frms[frameid].shape[0]): + cv2.circle( + frame, + (int(lmks_frms[frameid][i, 0]), int(lmks_frms[frameid][i, 1])), + 2, + cv2_color_purple, + -1, + ) + + if write_out: + cv2.putText( + frame, + "Frame: " + str(frameid), + (10, 50), + cv2.FONT_HERSHEY_SIMPLEX, + 1, + (255, 255, 255), + 3, + ) + + plt.subplot(211) + plt.imshow(frame) + plt.axis("off") + plt.pause(0.2) + + if "score_asym" in attr: + ax = plt.subplot(212) + ax.cla() + ax.set_xlim(0, 140) # ax.set_xlim(0,300) + ax.set_ylim(0, 10) + + sa = attr["score_asym"] + s = sa[np.where(sa[:, 0] <= frameid), :][0, :, :] + + for i in range(1, s.shape[1]): + plt.plot(s[:, 0], s[:, i]) + + plt.legend(["mouth", "eyebrow", "eye", "mouth+eye+eyebrow"]) + plt.minorticks_on() + plt.grid(b=True, which="major", color="r", linestyle="-") + plt.grid(b=True, which="minor", color="r", linestyle="--") + + plt.savefig("tmp.png", bbox_inches="tight") + print(cv2.imread("tmp.png").shape) + + plt.clf() + if write_out: + out_vid.write(cv2.imread("tmp.png")) + + except KeyboardInterrupt: + # Release the Video Device + vid.release() + if write_out: + out_vid.release() + logger.info("Exception, and Video Resource Released") + + if write_out: + out_vid.release() + + +def retrieve_attr(of_df): + """ + Retrieve landmarks and pose_translation for each frame from openface output + Args: + of_df: dataframe output from openface, including detected landmark coordinates + Returns: + lmks_frms: dictionary, with frame id as key and 68 landmark set as value + pose_p: dictionary, with frame id as key and pose param as value + """ + tot_lmks = 68 # openface specific + if len([i for i in of_df.columns.to_list() if " x_" in i]) != tot_lmks: + return {} + + lmks_frms = {} + pose_p = {} + + for fi in sorted(of_df["frame"].to_list()): + lmks = np.zeros((tot_lmks, 6)) + r = of_df[of_df["frame"] == fi] + + for i in range(tot_lmks): + lmk_y = r[" y_" + str(i)].iloc[0] + lmk_x = r[" x_" + str(i)].iloc[0] + lmk_X = r[" X_" + str(i)].iloc[0] + lmk_Y = r[" Y_" + str(i)].iloc[0] + lmk_Z = r[" Z_" + str(i)].iloc[0] + + confi = r[" confidence"] + lmks[i, :] = [lmk_x, lmk_y, lmk_X, lmk_Y, lmk_Z, confi] + + lmks_frms[fi] = lmks + pose_p[fi] = [ + r[" pose_Tx"].iloc[0], + r[" pose_Ty"].iloc[0], + r[" pose_Tz"].iloc[0], + r[" pose_Rx"].iloc[0], + r[" pose_Ry"].iloc[0], + r[" pose_Rz"].iloc[0], + ] + + return lmks_frms, pose_p + + +def mirror_point(a, b, c, d, x1, y1, z1): + # mirror a point w.r.t a 3D plane + k = (-a * x1 - b * y1 - c * z1 - d) / float((a * a + b * b + c * c)) + + x2 = a * k + x1 + y2 = b * k + y1 + z2 = c * k + z1 + + x3 = 2 * x2 - x1 + y3 = 2 * y2 - y1 + z3 = 2 * z2 - z1 + return [x3, y3, z3] + + +def dist_vec2plane(vec, nrm): + # Calculate the projected length of a vector (vec) to a plane defined by its normal (nrm) + return np.sqrt(np.dot(vec, vec) - np.dot(vec, nrm) ** 2) + + +def vis_lmks3d(lmks_frms, vis_idx): + """ + Visualizing facial landmarks + """ + # fig = plt.figure() + color_type = ["b", "g", "r", "y", "c"] + assert len(color_type) > len(vis_idx) + + for fi in sorted(list(lmks_frms.keys())): + ax = plt.axes(projection="3d") + for i, vi in enumerate(vis_idx): + ax.scatter( + lmks_frms[fi][vi, 2], + lmks_frms[fi][vi, 3], + lmks_frms[fi][vi, 4], + c=color_type[i], + ) + + ax.axes.set_xlim3d(left=-75, right=100) + ax.axes.set_ylim3d(bottom=-200, top=25) + ax.axes.set_zlim3d(bottom=440, top=560) + ax.view_init(-89, -90) # elev, ariz + plt.title(str(fi)) + ax.set_xlabel("X") + ax.set_ylabel("Y") + ax.set_zlabel("Z") + plt.pause(0.2) + plt.cla() + plt.draw() + + +def calc_fac_asymmetry(attr, is_vis=False): + """ + Quantify facial asymmetry + Args: + attr: attribute dictionary containing necessary features for calculation, e.g., + lmks_frms: dictionary, with frame id as key and 68 landmark set (OpenFace) as value + pose_param: dictionary, with frame id as key and pose param as value + Returns: + score_asym: 2D array of size (num_frms, num_asymm_fea), with frame id as the 0th column, and each remaining column as one asymmetry feature + """ + # openface landmark indices + lmks_ref_idx = list(range(0, 17)) + list(range(27, 36)) + lmks_mid_idx = [27, 28, 29, 30, 33, 51, 62, 66, 57, 8] + lmks_rgt_idx = [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 17, + 18, + 19, + 20, + 21, + 36, + 37, + 38, + 39, + 40, + 41, + 48, + 49, + 50, + 59, + 58, + 60, + 61, + 67, + ] + lmks_lft_idx = [ + 16, + 15, + 14, + 13, + 12, + 11, + 10, + 9, + 26, + 25, + 24, + 23, + 22, + 45, + 44, + 43, + 42, + 47, + 46, + 54, + 53, + 52, + 55, + 56, + 64, + 63, + 65, + ] + + lmks_mth_idx = list(range(48, 68)) + lmks_ebr_idx = list(range(17, 27)) + lmks_eye_idx = list(range(36, 48)) + assert len(lmks_lft_idx) == len(lmks_rgt_idx) + + fea_list = ["mouth", "eyebrow", "eye", "composite"] + score_asym = np.empty(shape=(0, 0)) + + if ("lmks_frms" in attr) and ("pose_param" in attr): + lmks_frms = attr["lmks_frms"] + pose_p = attr["pose_param"] + + if is_vis: + vis_lmks3d( + lmks_frms, [lmks_lft_idx, lmks_rgt_idx, lmks_mid_idx, lmks_ref_idx] + ) + + score_asym = np.zeros( + (len(lmks_frms), len(fea_list) + 1 + 1) + ) # +1: extra column for error code + if is_vis: + # fig = plt.figure() + ax = plt.axes(projection="3d") + + for s, fi in enumerate(sorted(list(lmks_frms.keys()))): + lmks_3d = lmks_frms[fi][:, 2:5] + pose = pose_p[fi] + err_code = error_message_code["pass"] + + if lmks_frms[fi][0, 5] < 0.8: + err_code = error_message_code["confidence less than 80%"] + score_asym[s, :] = [fi, np.NaN, np.NaN, np.NaN, np.NaN, err_code] + continue + + rx = R.from_euler("x", pose[3]) + ry = R.from_euler("y", pose[4]) + rz = R.from_euler("z", pose[5]) + + vec_pose = rz.apply(ry.apply(rx.apply([0, 0, 1]))) + anc_idx = [30, 27, 8] # for central plane estimation + nrm = np.cross( + lmks_3d[anc_idx[2], :] - lmks_3d[anc_idx[0], :], + lmks_3d[anc_idx[1], :] - lmks_3d[anc_idx[0], :], + ) + + nrm = nrm / np.linalg.norm(nrm) + a, b, c = nrm + d = np.dot(nrm, lmks_3d[anc_idx[0], :]) + + dist_L2R_mth = [] + dist_L2R_ebr = [] + dist_L2R_eye = [] + dist_com = [] + + lmks_rfl = np.empty((0, 3)) + src_idx = lmks_lft_idx + + for k, idx in enumerate(src_idx): + p_rfl = np.array( + mirror_point( + a, b, c, -d, lmks_3d[idx, 0], lmks_3d[idx, 1], lmks_3d[idx, 2] + ) + ) + lmks_rfl = np.vstack((lmks_rfl, p_rfl)) + dist = dist_vec2plane((p_rfl - lmks_3d[lmks_rgt_idx[k], :]), vec_pose) + + if idx in lmks_mth_idx: + dist_L2R_mth.append(dist) + if idx in lmks_ebr_idx: + dist_L2R_ebr.append(dist) + if idx in lmks_eye_idx: + dist_L2R_eye.append(dist) + if ( + (idx in lmks_mth_idx) + or (idx in lmks_ebr_idx) + or (idx in lmks_eye_idx) + ): + dist_com.append(dist) + score_asym[s, :] = [ + fi, + np.mean(dist_L2R_mth), + np.mean(dist_L2R_ebr), + np.mean(dist_L2R_eye), + np.mean(dist_com), + err_code, + ] + + if is_vis: + ax.scatter(lmks_3d[:, 0], lmks_3d[:, 1], lmks_3d[:, 2]) + ax.scatter(lmks_rfl[:, 0], lmks_rfl[:, 1], lmks_rfl[:, 2], c="y") + ax.scatter(pose_p[fi][0], pose_p[fi][1], pose_p[fi][2], c="c") + plt.title("mirrored landmarks, frame: " + str(fi)) + ax.set_xlabel("X") + ax.set_ylabel("Y") + ax.set_zlabel("Z") + plt.pause(0.2) + plt.cla() + plt.draw() + + return score_asym + + +def calc_asym_feature(open_face_csv, f_cfg): + """ + Calculating facial asymmetry features and preparing final df + """ + df_list = [] + + of_df = pd.read_csv(open_face_csv) + lmks_frms, pose_p = retrieve_attr(of_df) + + attr = {"lmks_frms": lmks_frms, "pose_param": pose_p} + score_asym = calc_fac_asymmetry(attr) + + df_score_asym = pd.DataFrame( + score_asym, + columns=[ + "frame", + f_cfg.fac_AsymMaskMouth, + f_cfg.fac_AsymMaskEyebrow, + f_cfg.fac_AsymMaskEye, + f_cfg.fac_AsymMaskCom, + f_cfg.err_reason, + ], + ) + df_score_asym[f_cfg.err_reason] = df_score_asym[f_cfg.err_reason].apply( + lambda x: error_code_message[x] + ) + + df_score_asym["frame"] = of_df["frame"] + df_score_asym["face_id"] = of_df[" face_id"] + df_score_asym["timestamp"] = of_df[" timestamp"] + df_score_asym["confidence"] = of_df[" confidence"] + df_score_asym["success"] = of_df[" success"] + + df_list.append(df_score_asym) + return df_list + + +def run_face_asymmetry(video_uri, out_dir, f_cfg, save=True): + """ + Processing all patient's for calculating facial asymmetry + --------------- + --------------- + Args: + video_uri: video path; f_cfg: face config object + out_dir: (str) Output directory for processed output + """ + try: + + # Baseline logic + ConfigFaceReader() + input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) + + of_csv_path = glob.glob(join(out_loc, fl_name + "_openface/*.csv")) + if len(of_csv_path) > 0: + + of_csv = of_csv_path[0] + asym_df_list = calc_asym_feature(of_csv, f_cfg) + + asym_final_df = pd.concat(asym_df_list, ignore_index=True) + asym_final_df["dbm_master_url"] = video_uri + + if save: + logger.info( + "Processing Output file {} ".format(os.path.join(out_loc, fl_name)) + ) + ut.save_output(asym_final_df, out_loc, fl_name, face_asym_dir, csv_ext) + return asym_final_df + + except Exception as e: + e + logger.error("Failed to process video file") diff --git a/opendbm/dbm_lib/dbm_features/raw_features/video/face_au.py b/opendbm/dbm_lib/dbm_features/raw_features/video/face_au.py new file mode 100644 index 00000000..a313e097 --- /dev/null +++ b/opendbm/dbm_lib/dbm_features/raw_features/video/face_au.py @@ -0,0 +1,103 @@ +""" +file_name: face_au.py +project_name: DBM +created: 2020-20-07 +""" + +import datetime +import glob +import logging +import os +from os.path import join + +import numpy as np +import pandas as pd + +from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut +from opendbm.dbm_lib.dbm_features.raw_features.util import video_util as vu + +from .face_config.face_config_reader import ConfigFaceReader + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger() + +face_au_dir = "facial/face_au" +csv_ext = "_facau.csv" + + +def extract_col_nm_au(cols): + """ + Extract action unit (au) column names from openface output (csv) + Args: + cols: column names from open face output (csv) + Returns: + (list) list of au column names + """ + # cols_lmk = [] + au_tags = " AU" + cols_au = [c for c in cols if au_tags in c] + return cols_au + + +def au_col_nm_map(df): + """ + Rename dataframe action unit column names to match functional specifications v1.0 + Args: + df: dataframe + Returns: + dataframe with mapped variables + """ + dict_au_cols = {} + for col in list(df): + if " AU" in col: + idx = col.rfind("_") + if idx > -1: + au_id = col[idx - 2 : idx] + if "_r" in col: + dict_au_cols[col] = "fac_AU" + au_id + "int" + if "_c" in col: + dict_au_cols[col] = "fac_AU" + au_id + "pres" + df.rename(columns=dict_au_cols, inplace=True) + return df + + +def run_face_au(video_uri, out_dir, f_cfg, save=True): + """ + Processing all patient's for fetching action units + --------------- + --------------- + Args: + video_uri: video path; f_cfg: face config object + out_dir: (str) Output directory for processed output + """ + try: + + # Baseline logic + ConfigFaceReader() + input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) + + of_csv_path = glob.glob(join(out_loc, fl_name + "_openface/*.csv")) + if len(of_csv_path) > 0: + + df_of = pd.read_csv(of_csv_path[0]) + df_au = df_of[extract_col_nm_au(df_of)] + df_au = df_au.copy() + + df_au["frame"] = df_of["frame"] + df_au["face_id"] = df_of[" face_id"] + df_au["timestamp"] = df_of[" timestamp"] + df_au["confidence"] = df_of[" confidence"] + df_au["success"] = df_of[" success"] + + df_au = au_col_nm_map(df_au) + df_au["dbm_master_url"] = video_uri + if save: + logger.info( + "Processing Output file {} ".format(os.path.join(out_loc, fl_name)) + ) + ut.save_output(df_au, out_loc, fl_name, face_au_dir, csv_ext) + return df_au + + except Exception as e: + e + logger.error("Failed to process video file") diff --git a/opendbm/dbm_lib/dbm_features/raw_features/video/face_config/__init__.py b/opendbm/dbm_lib/dbm_features/raw_features/video/face_config/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/dbm_lib/dbm_features/raw_features/video/face_config/face_config_reader.py b/opendbm/dbm_lib/dbm_features/raw_features/video/face_config/face_config_reader.py similarity index 54% rename from dbm_lib/dbm_features/raw_features/video/face_config/face_config_reader.py rename to opendbm/dbm_lib/dbm_features/raw_features/video/face_config/face_config_reader.py index 54cf9bcd..f7e97c4f 100644 --- a/dbm_lib/dbm_features/raw_features/video/face_config/face_config_reader.py +++ b/opendbm/dbm_lib/dbm_features/raw_features/video/face_config/face_config_reader.py @@ -4,50 +4,58 @@ project_name: DBM created: 2020-20-07 """ +import os + import yaml -import boto3 -from dbm_lib.dbm_features.raw_features.video import DBMLIB_FACE_CONFIG + +DBMLIB_PATH = os.path.dirname(__file__) +DBMLIB_FACE_CONFIG = os.path.abspath( + os.path.join(DBMLIB_PATH, "../../../../../resources/services/face_util.yml") +) + class ConfigFaceReader(object): """Summary Read sevice end ponit """ - def __init__(self, - service_config_yml=None): + + def __init__(self, service_config_yml=None): """Summary Args: service_config_yml (None, optional): yml file defined service configuration """ - + if service_config_yml is None: service_config = DBMLIB_FACE_CONFIG else: service_config = service_config_yml - - with open(service_config, 'r') as ymlfile: - config = yaml.load(ymlfile) - self.ACTION_UNITS = config['cdx_face_config']['ACTION_UNITS'] - self.NEG_ACTION_UNITS = config['cdx_face_config']['NEG_ACTION_UNITS'] - self.POS_ACTION_UNITS = config['cdx_face_config']['POS_ACTION_UNITS'] - self.NET_ACTION_UNITS = config['cdx_face_config']['NET_ACTION_UNITS'] - self.LOWER_ACTION_UNITS = config['cdx_face_config']['LOWER_ACTION_UNITS'] - self.UPPER_ACTION_UNITS = config['cdx_face_config']['UPPER_ACTION_UNITS'] - self.happiness = config['cdx_face_config']['happiness'] - self.sadness = config['cdx_face_config']['sadness'] - self.surprise = config['cdx_face_config']['surprise'] - self.fear = config['cdx_face_config']['fear'] - self.anger = config['cdx_face_config']['anger'] - self.disgust = config['cdx_face_config']['disgust'] - self.contempt = config['cdx_face_config']['contempt'] - self.pain = config['cdx_face_config']['pain'] - self.cai = config['cdx_face_config']['CAI'] - self.SELECTED_FEATURES = config['cdx_face_config']['SELECTED_FEATURES'].split(',') - self.face_expr_dir = config['cdx_face_config']['face_expr_dir'] - self.face_asym_dir = config['cdx_face_config']['face_asym_dir'] - self.AU_fl = config['cdx_face_config']['AU_filters'] - self.au_int = config['cdx_face_config']['au_intensity'] - self.au_prs = config['cdx_face_config']['au_presence'] - + + with open(service_config, "r") as ymlfile: + config = yaml.load(ymlfile, Loader=yaml.CLoader) + self.ACTION_UNITS = config["cdx_face_config"]["ACTION_UNITS"] + self.NEG_ACTION_UNITS = config["cdx_face_config"]["NEG_ACTION_UNITS"] + self.POS_ACTION_UNITS = config["cdx_face_config"]["POS_ACTION_UNITS"] + self.NET_ACTION_UNITS = config["cdx_face_config"]["NET_ACTION_UNITS"] + self.LOWER_ACTION_UNITS = config["cdx_face_config"]["LOWER_ACTION_UNITS"] + self.UPPER_ACTION_UNITS = config["cdx_face_config"]["UPPER_ACTION_UNITS"] + self.happiness = config["cdx_face_config"]["happiness"] + self.sadness = config["cdx_face_config"]["sadness"] + self.surprise = config["cdx_face_config"]["surprise"] + self.fear = config["cdx_face_config"]["fear"] + self.anger = config["cdx_face_config"]["anger"] + self.disgust = config["cdx_face_config"]["disgust"] + self.contempt = config["cdx_face_config"]["contempt"] + self.pain = config["cdx_face_config"]["pain"] + self.cai = config["cdx_face_config"]["CAI"] + self.SELECTED_FEATURES = config["cdx_face_config"][ + "SELECTED_FEATURES" + ].split(",") + self.face_expr_dir = config["cdx_face_config"]["face_expr_dir"] + self.face_asym_dir = config["cdx_face_config"]["face_asym_dir"] + self.AU_fl = config["cdx_face_config"]["AU_filters"] + self.au_int = config["cdx_face_config"]["au_intensity"] + self.au_prs = config["cdx_face_config"]["au_presence"] + def get_action_unit(self): """Summary Returns: @@ -137,4 +145,4 @@ class ConfigFaceReader(object): Returns: TYPE: end point """ - return self.cai \ No newline at end of file + return self.cai diff --git a/opendbm/dbm_lib/dbm_features/raw_features/video/face_emotion_expressivity.py b/opendbm/dbm_lib/dbm_features/raw_features/video/face_emotion_expressivity.py new file mode 100644 index 00000000..3a63c7eb --- /dev/null +++ b/opendbm/dbm_lib/dbm_features/raw_features/video/face_emotion_expressivity.py @@ -0,0 +1,95 @@ +""" +file_name: process_emotion_expressivity +project_name: DBM +created: 2020-20-07 +""" + +import datetime +import glob +import logging +import os +from os.path import join + +import numpy as np +import pandas as pd + +from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut +from opendbm.dbm_lib.dbm_features.raw_features.util import video_util as vu + +from .face_config.face_config_reader import ConfigFaceReader + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger() + +face_expr_dir = "facial/face_expressivity" +csv_ext = "_facemo.csv" + +# Openface feature extraction +def of_feature(df_of, cfr, f_cfg): + """ + Creating dataframe for face expressivity + Args: + of: open face attributes + Returns: + (list) list of expressivity score for emotions + """ + df_list = [] + df_of["s_confidence"] = vu.smooth( + df_of[" confidence"].values, window="flat" + ).tolist() + + if "AU" in cfr.SELECTED_FEATURES: + vu.calc_of_for_video(df_of, cfr, f_cfg) + # Normalizing facial expressivity for Composite and Negative expr(Range 0 to 1) + + if len(df_of[f_cfg.neg_exp]) > 0: + df_of[f_cfg.neg_exp] = df_of[f_cfg.neg_exp] / 5 + + if len(df_of[f_cfg.neg_exp_full]) > 0: + df_of[f_cfg.neg_exp_full] = df_of[f_cfg.neg_exp_full] / 5 + + if len(df_of[f_cfg.com_exp]) > 0: + df_of[f_cfg.com_exp] = df_of[f_cfg.com_exp] / 7 + + if len(df_of[f_cfg.com_exp_full]) > 0: + df_of[f_cfg.com_exp_full] = df_of[f_cfg.com_exp_full] / 7 + + df_list.append(df_of) + return df_list + + +def run_face_expressivity(video_uri, out_dir, f_cfg, save=True): + """ + Processing all patient's for fetching facial landmarks + --------------- + --------------- + Args: + video_uri: video path; f_cfg: raw variable config object + out_dir: (str) Output directory for processed output + """ + try: + + # Baseline logic + cfr = ConfigFaceReader() + input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) + + of_csv_path = glob.glob(join(out_loc, fl_name + "_openface/*.csv")) + if len(of_csv_path) > 0: + + df_of = pd.read_csv(of_csv_path[0]) + df_of = df_of[cfr.AU_fl] + expr_df_list = of_feature(df_of, cfr, f_cfg) + + exp_final_df = pd.concat(expr_df_list, ignore_index=True) + exp_final_df["dbm_master_url"] = video_uri + + if save: + logger.info( + "Processing Output file {} ".format(os.path.join(out_loc, fl_name)) + ) + ut.save_output(exp_final_df, out_loc, fl_name, face_expr_dir, csv_ext) + return exp_final_df + + except Exception as e: + e + logger.error("Failed to process video file") diff --git a/opendbm/dbm_lib/dbm_features/raw_features/video/face_landmark.py b/opendbm/dbm_lib/dbm_features/raw_features/video/face_landmark.py new file mode 100644 index 00000000..6b92f9f4 --- /dev/null +++ b/opendbm/dbm_lib/dbm_features/raw_features/video/face_landmark.py @@ -0,0 +1,138 @@ +""" +file_name: face_landmark +project_name: DBM +created: 2020-20-07 +""" + +import datetime +import glob +import logging +import os +from os.path import join + +import numpy as np +import pandas as pd + +from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut +from opendbm.dbm_lib.dbm_features.raw_features.util import video_util as vu + +from .face_config.face_config_reader import ConfigFaceReader + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger() + +face_lmk_dir = "facial/face_landmark" +csv_ext = "_faclmk.csv" + + +def extract_col_nm_lmk(cols): + """ + Extract landmark column names from openface output (csv) + Args: + cols: column names from open face output (csv) + Returns: + (list) list of landmark column names + """ + cols_lmk = [] + lmk_tags = [" y_", " x_", " X_", " Y_", " Z_"] + for c in cols: + if any(t in c for t in lmk_tags): + cols_lmk.append(c) + return cols_lmk + + +def lmk_col_nm_map(df): + """ + Rename dataframe landmark column names to match functional specifications v1.0 + Args: + df: dataframe + """ + dict_lmk_cols = {} + for col in list(df): + idx = col.rfind("_") + 1 + if idx > 0: + lmk_id = col[idx:] if len(col[idx:]) > 1 else "0" + col[idx:] + if " y_" in col: + dict_lmk_cols[col] = "fac_LMK" + lmk_id + "r" + if " x_" in col: + dict_lmk_cols[col] = "fac_LMK" + lmk_id + "c" + if " X_" in col: + dict_lmk_cols[col] = "fac_LMK" + lmk_id + "X" + if " Y_" in col: + dict_lmk_cols[col] = "fac_LMK" + lmk_id + "Y" + if " Z_" in col: + dict_lmk_cols[col] = "fac_LMK" + lmk_id + "Z" + df.rename(columns=dict_lmk_cols, inplace=True) + return df + + +def add_disp_3D(df): + """ + Add 3D displacement for each landmark + Args: + df: landmark dataframe + """ + df = df.sort_values(by=["frame"], ascending=False) + cols_lmk = [col for col in list(df) if "fac_LMK" in col] + df_t = df[cols_lmk] + df_diff = df_t.diff() + df_diff = df_diff.pow(2) + + tot_lmk = 68 # 68 landmark model + for i in range(tot_lmk): + lmk_id = "{:02d}".format(i) + df["fac_LMK" + lmk_id + "disp"] = ( + df_diff[ + [ + "fac_LMK" + lmk_id + "X", + "fac_LMK" + lmk_id + "Y", + "fac_LMK" + lmk_id + "Z", + ] + ] + .sum(axis=1) + .apply(np.sqrt) + ) + + return df + + +def run_face_landmark(video_uri, out_dir, f_cfg, save=True): + """ + Processing all patient's for fetching facial landmarks + --------------- + --------------- + Args: + video_uri: video path; f_cfg: raw variable config object + out_dir: (str) Output directory for processed output + """ + try: + + # Baseline logic + ConfigFaceReader() + input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) + + of_csv_path = glob.glob(join(out_loc, fl_name + "_openface/*.csv")) + if len(of_csv_path) > 0: + + df_of = pd.read_csv(of_csv_path[0]) + df_lmk = df_of[extract_col_nm_lmk(df_of)] + df_lmk = df_lmk.copy() + + df_lmk["frame"] = df_of["frame"] + df_lmk["face_id"] = df_of[" face_id"] + df_lmk["timestamp"] = df_of[" timestamp"] + df_lmk["confidence"] = df_of[" confidence"] + df_lmk["success"] = df_of[" success"] + + df_lmk = lmk_col_nm_map(df_lmk) + df_lmk = add_disp_3D(df_lmk) + df_lmk["dbm_master_url"] = video_uri + + if save: + logger.info("Processing Output file {} ".format(join(out_loc, fl_name))) + ut.save_output(df_lmk, out_loc, fl_name, face_lmk_dir, csv_ext) + return df_lmk + + except Exception as e: + e + logger.error("Failed to process video file") diff --git a/dbm_lib/dbm_features/raw_features/video/open_face_process.py b/opendbm/dbm_lib/dbm_features/raw_features/video/open_face_process.py similarity index 62% rename from dbm_lib/dbm_features/raw_features/video/open_face_process.py rename to opendbm/dbm_lib/dbm_features/raw_features/video/open_face_process.py index 6f2e3847..fa55b98c 100644 --- a/dbm_lib/dbm_features/raw_features/video/open_face_process.py +++ b/opendbm/dbm_lib/dbm_features/raw_features/video/open_face_process.py @@ -4,19 +4,23 @@ project_name: DBM created: 2020-20-07 """ -import os -import numpy as np -import pandas as pd import glob import logging +import os -from dbm_lib.dbm_features.raw_features.util import util as ut +import numpy as np +import pandas as pd + +from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut logging.basicConfig(level=logging.INFO) -logger=logging.getLogger() +logger = logging.getLogger() -def batch_open_face(filepaths,video_url, input_dir, out_dir, of_path, video_tracking=False): - """ Computes open_face features for the files in filepaths + +def batch_open_face( + filepaths, video_url, input_dir, out_dir, of_path, video_tracking=False +): + """Computes open_face features for the files in filepaths Args: ----- @@ -31,31 +35,36 @@ def batch_open_face(filepaths,video_url, input_dir, out_dir, of_path, video_trac Returns: -------- (itreable[str]) list of .csv files - """ + """ if video_tracking: - suffix = '_openface_lmk' + suffix = "_openface_lmk" else: - suffix = '_openface' + suffix = "_openface" csv_files = [] - + for fp in filepaths: try: _, out_loc, fl_name = ut.filter_path(video_url, out_dir) full_f_name = fl_name + suffix output_directory = os.path.join(out_loc, full_f_name) - + if video_tracking and not os.path.exists(os.path.abspath(output_directory)): os.makedirs(os.path.abspath(output_directory)) - csv_files.append(ut.compute_open_face_features(fp,output_directory,of_path)) + csv_files.append( + ut.compute_open_face_features(fp, output_directory, of_path) + ) except Exception as e: - logger.error('Failed to run OpenFace on {}\n{}'.format(fp, e)) + logger.error("Failed to run OpenFace on {}\n{}".format(fp, e)) return csv_files -def process_open_face(video_uri, input_dir, out_dir, of_path, dbm_group,video_tracking): + +def process_open_face( + video_uri, input_dir, out_dir, of_path, dbm_group, video_tracking +): """ Processing all patient's for fetching emotion expressivity ------------------- @@ -66,15 +75,21 @@ def process_open_face(video_uri, input_dir, out_dir, of_path, dbm_group,video_tr """ try: - - if dbm_group != None: - check_group = ['facial','movement'] #add group here: if you want to use openface output for raw variable calculation + + if dbm_group is not None: + check_group = [ + "facial", + "movement", + ] # add group here: if you want to use openface output for raw variable calculation check_val = bool(len({*check_group} & {*dbm_group})) if not check_val: return - + filepaths = [video_uri] - csv_filepaths = batch_open_face(filepaths, video_uri, input_dir, out_dir, of_path, video_tracking) + batch_open_face( + filepaths, video_uri, input_dir, out_dir, of_path, video_tracking + ) except Exception as e: - logger.error('Failed to process video file') + e + logger.error("Failed to process video file") diff --git a/opendbm/pkg/__init__.py b/opendbm/pkg/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/opendbm/pkg/shape_detector/__init__.py b/opendbm/pkg/shape_detector/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pkg/shape_detector/shape_predictor_68_face_landmarks.dat b/opendbm/pkg/shape_detector/shape_predictor_68_face_landmarks.dat similarity index 100% rename from pkg/shape_detector/shape_predictor_68_face_landmarks.dat rename to opendbm/pkg/shape_detector/shape_predictor_68_face_landmarks.dat diff --git a/opendbm/pkg/v_tremor/__init__.py b/opendbm/pkg/v_tremor/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pkg/v_tremor/xgb_bin_vtrem.sav b/opendbm/pkg/v_tremor/xgb_bin_vtrem.sav similarity index 100% rename from pkg/v_tremor/xgb_bin_vtrem.sav rename to opendbm/pkg/v_tremor/xgb_bin_vtrem.sav diff --git a/opendbm/resources/__init__.py b/opendbm/resources/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/opendbm/resources/features/__init__.py b/opendbm/resources/features/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/resources/features/derived_feature.yml b/opendbm/resources/features/derived_feature.yml similarity index 100% rename from resources/features/derived_feature.yml rename to opendbm/resources/features/derived_feature.yml diff --git a/opendbm/resources/features/facial/__init__.py b/opendbm/resources/features/facial/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/resources/features/facial/config.json b/opendbm/resources/features/facial/config.json similarity index 100% rename from resources/features/facial/config.json rename to opendbm/resources/features/facial/config.json diff --git a/resources/features/raw_feature.yml b/opendbm/resources/features/raw_feature.yml similarity index 100% rename from resources/features/raw_feature.yml rename to opendbm/resources/features/raw_feature.yml diff --git a/opendbm/resources/libraries/__init__.py b/opendbm/resources/libraries/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/resources/libraries/voice_tremor.praat b/opendbm/resources/libraries/voice_tremor.praat similarity index 100% rename from resources/libraries/voice_tremor.praat rename to opendbm/resources/libraries/voice_tremor.praat diff --git a/opendbm/resources/services/__init__.py b/opendbm/resources/services/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/resources/services/face_util.yml b/opendbm/resources/services/face_util.yml similarity index 100% rename from resources/services/face_util.yml rename to opendbm/resources/services/face_util.yml diff --git a/resources/services/services.yml b/opendbm/resources/services/services.yml similarity index 100% rename from resources/services/services.yml rename to opendbm/resources/services/services.yml diff --git a/process_data.py b/process_data.py index ac60f4f5..efa8007b 100644 --- a/process_data.py +++ b/process_data.py @@ -4,27 +4,29 @@ project_name: DBM created: 2020-20-07 """ -from dbm_lib.config import config_reader, config_raw_feature, config_derive_feature -from dbm_lib.controller import process_feature as pf -from dbm_lib.dbm_features.raw_features.video import open_face_process as of -from dbm_lib.dbm_features.derived_features import derive as der - -import pandas as pd -import os import argparse -import logging import glob -import time +import logging +import os import subprocess +import time from os.path import splitext -logging.basicConfig(level=logging.INFO) -logger=logging.getLogger() +import pandas as pd + +from opendbm.dbm_lib import config_derive_feature, config_raw_feature, config_reader +from opendbm.dbm_lib.controller import process_feature as pf +from opendbm.dbm_lib.dbm_features.derived_features import derive as der +from opendbm.dbm_lib.dbm_features.raw_features.video import open_face_process as of + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger() + +OPENFACE_PATH_VIDEO = "opendbm/pkg/open_dbm/OpenFace/build/bin/FaceLandmarkVid" +OPENFACE_PATH = "opendbm/pkg/open_dbm/OpenFace/build/bin/FeatureExtraction" +DEEP_SPEECH = "opendbm/pkg/DeepSpeech" +DLIB_SHAPE_MODEL = "opendbm/pkg/shape_detector/shape_predictor_68_face_landmarks.dat" -OPENFACE_PATH_VIDEO = 'pkg/open_dbm/OpenFace/build/bin/FaceLandmarkVid' -OPENFACE_PATH = 'pkg/open_dbm/OpenFace/build/bin/FeatureExtraction' -DEEP_SPEECH = 'pkg/DeepSpeech' -DLIB_SHAPE_MODEL = 'pkg/shape_detector/shape_predictor_68_face_landmarks.dat' def common_video(video_file, args, r_config): """ @@ -34,18 +36,41 @@ def common_video(video_file, args, r_config): args: user supplied arguments r_config: raw feature config object """ - out_path = os.path.join(args.output_path, 'raw_variables') + out_path = os.path.join(args.output_path, "raw_variables") pf.audio_to_wav(video_file) - of.process_open_face(video_file, os.path.dirname(video_file), out_path, OPENFACE_PATH, args.dbm_group,video_tracking=False) + of.process_open_face( + video_file, + os.path.dirname(video_file), + out_path, + OPENFACE_PATH, + args.dbm_group, + video_tracking=False, + ) pf.process_facial(video_file, out_path, args.dbm_group, r_config) pf.process_acoustic(video_file, out_path, args.dbm_group, r_config) - pf.process_nlp(video_file, out_path, args.dbm_group, args.tr, r_config, DEEP_SPEECH) - if args.dbm_group == None or len(args.dbm_group)>0 and 'movement' in args.dbm_group: - of.process_open_face(video_file, os.path.dirname(video_file), out_path, OPENFACE_PATH_VIDEO, args.dbm_group, video_tracking=True) - pf.process_movement(video_file, out_path, args.dbm_group, r_config, DLIB_SHAPE_MODEL) + pf.process_nlp(video_file, out_path, args.dbm_group, args.tr, r_config, DEEP_SPEECH) + + if ( + args.dbm_group is None + or len(args.dbm_group) > 0 + and "movement" in args.dbm_group + ): + + of.process_open_face( + video_file, + os.path.dirname(video_file), + out_path, + OPENFACE_PATH_VIDEO, + args.dbm_group, + video_tracking=True, + ) + pf.process_movement( + video_file, out_path, args.dbm_group, r_config, DLIB_SHAPE_MODEL + ) pf.remove_file(video_file) + def process_raw_video_file(args, s_config, r_config): """ Processing video file @@ -55,20 +80,22 @@ def process_raw_video_file(args, s_config, r_config): r_config: raw feature config object """ try: - if args.output_path != None: + if args.output_path is not None: video_file = glob.glob(args.input_path) - if len(video_file)>0: - logger.info('Calculating raw variables...') + if len(video_file) > 0: + logger.info("Calculating raw variables...") common_video(video_file[0], args, r_config) else: - logger.info('Enter correct video(*.mp4) file path.') + logger.info("Enter correct video(*.mp4) file path.") except Exception as e: - logger.error('Failed to process mp4 file.') + e + logger.error("Failed to process mp4 file.", str(e)) pf.remove_file(video_file[0]) + def process_raw_audio_file(args, s_config, r_config): """ Processing audio file @@ -78,19 +105,28 @@ def process_raw_audio_file(args, s_config, r_config): r_config: raw feature config object """ try: - if args.output_path != None: + if args.output_path is not None: audio_file = glob.glob(args.input_path) - if len(audio_file)>0: - logger.info('Calculating raw variables...') + if len(audio_file) > 0: + logger.info("Calculating raw variables...") - out_path = os.path.join(args.output_path, 'raw_variables') + out_path = os.path.join(args.output_path, "raw_variables") pf.process_acoustic(audio_file[0], out_path, args.dbm_group, r_config) - pf.process_nlp(audio_file[0], out_path, args.dbm_group, args.tr, r_config, DEEP_SPEECH) + pf.process_nlp( + audio_file[0], + out_path, + args.dbm_group, + args.tr, + r_config, + DEEP_SPEECH, + ) else: - logger.info('Enter correct audio(*.wav) file path.') + logger.info("Enter correct audio(*.wav) file path.") except Exception as e: - logger.error('Failed to process wav file.') + # e + logger.error("Failed to process wav file.", str(e)) + def process_raw_video_dir(args, s_config, r_config): """ @@ -100,27 +136,35 @@ def process_raw_video_dir(args, s_config, r_config): s_config: service config object r_config: raw feature config object """ - if args.output_path != None: - vid_loc = glob.glob(args.input_path + '/*.mp4') + glob.glob(args.input_path + '/*.mov') + glob.glob(args.input_path + '/*.MOV') + if args.output_path is not None: + vid_loc = ( + glob.glob(args.input_path + "/*.mp4") + + glob.glob(args.input_path + "/*.mov") + + glob.glob(args.input_path + "/*.MOV") + ) if len(vid_loc) == 0: - logger.info('Directory does not have any MP4 files.') + logger.info("Directory does not have any MP4 files.") return - logger.info('Calculating raw variables...') + logger.info("Calculating raw variables...") for vid_file in vid_loc: try: fname, file_ext = os.path.splitext(vid_file) - - if file_ext.lower() == '.mov': + + if file_ext.lower() == ".mov": convert_file(vid_file) - common_video(fname+'.mp4', args, r_config) - - remove_convert(vid_file, '.mp4') #removing files(ffmpeg converted ) after processing + common_video(fname + ".mp4", args, r_config) + + remove_convert( + vid_file, ".mp4" + ) # removing files(ffmpeg converted ) after processing except Exception as e: - logger.error('Failed to process mp4 file.') + e + logger.error("Failed to process mp4 file.", str(e)) pf.remove_file(vid_file) + def process_raw_audio_dir(args, s_config, r_config): """ Processing audio file @@ -129,27 +173,42 @@ def process_raw_audio_dir(args, s_config, r_config): s_config: service config object r_config: raw feature config object """ - if args.output_path != None: - audio_loc = glob.glob(args.input_path + '/*.wav') + glob.glob(args.input_path + '/*.mp3') + glob.glob(args.input_path + '/*.MP3') + if args.output_path is not None: + audio_loc = ( + glob.glob(args.input_path + "/*.wav") + + glob.glob(args.input_path + "/*.mp3") + + glob.glob(args.input_path + "/*.MP3") + ) if len(audio_loc) == 0: - logger.info('Directory does not have any WAV files.') + logger.info("Directory does not have any WAV files.") return - logger.info('Calculating raw variables...') + logger.info("Calculating raw variables...") for audio in audio_loc: try: fname, file_ext = os.path.splitext(audio) - if file_ext.lower() == '.mp3': + if file_ext.lower() == ".mp3": convert_file(audio) - - out_path = os.path.join(args.output_path, 'raw_variables') - pf.process_acoustic(fname+'.wav', out_path, args.dbm_group, r_config) - pf.process_nlp(fname +'.wav', out_path, args.dbm_group, args.tr, r_config, DEEP_SPEECH) - - remove_convert(audio, '.wav') #removing files(ffmpeg converted) after processing + + out_path = os.path.join(args.output_path, "raw_variables") + pf.process_acoustic(fname + ".wav", out_path, args.dbm_group, r_config) + pf.process_nlp( + fname + ".wav", + out_path, + args.dbm_group, + args.tr, + r_config, + DEEP_SPEECH, + ) + + remove_convert( + audio, ".wav" + ) # removing files(ffmpeg converted) after processing except Exception as e: - logger.error('Failed to process wav file.') + e + logger.error("Failed to process wav file.") + def convert_file(input_filepath): """ @@ -158,52 +217,70 @@ def convert_file(input_filepath): _, file_ext = os.path.splitext(os.path.basename(input_filepath)) fname, _ = splitext(input_filepath) call = [] - - if file_ext.lower() == '.mp3': - output_filepath = fname + '.wav' - logger.info('Converting audio from {} to wav'.format(input_filepath)) - call = ['ffmpeg', '-i', input_filepath, output_filepath] - if file_ext.lower() == '.mov': - output_filepath = fname + '.mp4' - logger.info('Converting video from {} to mp4'.format(input_filepath)) - call = ['ffmpeg', '-i', input_filepath, '-vcodec', 'h264','-acodec','aac', '-strict', '-2', output_filepath] + if file_ext.lower() == ".mp3": + output_filepath = fname + ".wav" + logger.info("Converting audio from {} to wav".format(input_filepath)) + call = ["ffmpeg", "-i", input_filepath, output_filepath] - if len(call)>0: + if file_ext.lower() == ".mov": + output_filepath = fname + ".mp4" + logger.info("Converting video from {} to mp4".format(input_filepath)) + call = [ + "ffmpeg", + "-i", + input_filepath, + "-vcodec", + "h264", + "-acodec", + "aac", + "-strict", + "-2", + output_filepath, + ] + + if len(call) > 0: subprocess.check_output(call) - + + def remove_convert(input_filepath, file_ext): """ removing converted files after processing """ - expected_ext = ['.mp3', '.mov'] + expected_ext = [".mp3", ".mov"] input_loc, inp_ext = os.path.splitext(input_filepath) - + if inp_ext.lower() in expected_ext: pf.remove_file(input_loc + file_ext, file_ext) + def process_derive(args, r_config, d_config, input_type): """ Processing dbm derived variables """ - if input_type == 'file': + if input_type == "file": input_file = glob.glob(args.input_path) else: - input_file = glob.glob(args.input_path + '/*') + input_file = glob.glob(args.input_path + "/*") - out_raw_path = os.path.join(args.output_path, 'raw_variables') - out_derive_path = os.path.join(args.output_path, 'derived_variables') + out_raw_path = os.path.join(args.output_path, "raw_variables") + out_derive_path = os.path.join(args.output_path, "derived_variables") - logger.info('Calculating derived variables...') - feature_df = der.run_derive(input_file, out_raw_path, out_derive_path, r_config, d_config) + logger.info("Calculating derived variables...") + der.run_derive(input_file, out_raw_path, out_derive_path, r_config, d_config) -if __name__=="__main__": + +if __name__ == "__main__": start_time = time.time() parser = argparse.ArgumentParser(description="Process video/audio......") parser.add_argument("--input_path", help="path to the input files", required=True) - parser.add_argument("--output_path", help="path to the raw and derived variable output", required=True) - parser.add_argument("--dbm_group", help="list of feature groups", nargs='+') + parser.add_argument( + "--output_path", + help="path to the raw and derived variable output", + required=True, + ) + parser.add_argument("--dbm_group", help="list of feature groups", nargs="+") parser.add_argument("--tr", help="speech transcription toogle") args = parser.parse_args() @@ -214,28 +291,28 @@ if __name__=="__main__": _, file_ext = os.path.splitext(os.path.basename(args.input_path)) if file_ext: - input_type = 'file' + input_type = "file" - if file_ext.lower() in ['.mp4','.mov']: - if file_ext.lower() == '.mov': + if file_ext.lower() in [".mp4", ".mov"]: + if file_ext.lower() == ".mov": convert_file(args.input_path) - + process_raw_video_file(args, s_config, r_config) - remove_convert(args.input_path, '.mp4') + remove_convert(args.input_path, ".mp4") - elif file_ext.lower() in ['.wav','.mp3']: - if file_ext.lower() == '.mp3': + elif file_ext.lower() in [".wav", ".mp3"]: + if file_ext.lower() == ".mp3": convert_file(args.input_path) - + process_raw_audio_file(args, s_config, r_config) - remove_convert(args.input_path, '.wav') + remove_convert(args.input_path, ".wav") else: - logger.error('No WAV/MP3 or MOV/MP4 files detected in input path') + logger.error("No WAV/MP3 or MOV/MP4 files detected in input path") else: - input_type = 'dir' + input_type = "dir" process_raw_video_dir(args, s_config, r_config) process_raw_audio_dir(args, s_config, r_config) process_derive(args, r_config, d_config, input_type) exec_time = time.time() - start_time - logger.info('Done! Processing time: {} seconds'.format(exec_time)) + logger.info("Done! Processing time: {} seconds".format(exec_time)) diff --git a/process_dbm.sh b/process_dbm.sh index 10991adf..507a88f5 100644 --- a/process_dbm.sh +++ b/process_dbm.sh @@ -80,4 +80,4 @@ docker cp dbm_container:/app/output $output_path docker stop dbm_container docker rm dbm_container -exit +exit \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 9dbe1428..a615c9ca 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,27 +1,39 @@ #Python (3+) -numpy==1.17.3 -pandas==0.25.0 +Cython +llvmlite +numpy>=1.17.0 +pandas==1.1.5 praat-parselmouth moviepy scikit-image sk-video watchtower -opencv-python webrtcvad imutils -dlib==19.13.0 coloredlogs h5py ujson numba==0.48.0 -librosa +librosa==0.9.2 more_itertools -scipy==1.3.2 +scipy scikit-learn +pytest==7.0.1 pyyaml==5.4.1 pydub -deepspeech +deepspeech==0.9.3 nltk lexicalrichness vaderSentiment textblob +opencv-python>=4.5.5 +pre-commit +ffmpeg +git+https://github.com/cmusatyalab/openface.git +cmake; "Windows" not in platform_system +dlib>=19.13.0; "Windows" not in platform_system +https://github.com/sachadee/Dlib/blob/main/dlib-19.22.99-cp37-cp37m-win_amd64.whl?raw=true; "Windows" in platform_system and python_version == "3.7" +https://github.com/sachadee/Dlib/blob/main/dlib-19.22.99-cp38-cp38-win_amd64.whl?raw=true; "Windows" in platform_system and python_version == "3.8" +https://github.com/sachadee/Dlib/blob/main/dlib-19.22.99-cp39-cp39-win_amd64.whl?raw=true; "Windows" in platform_system and python_version == "3.9" +matplotlib +pydoc-markdown