Merge pull request #40 from jordihasianta/master

Refactoring OpenDBM Core Module
This commit is contained in:
Andre Daniel Paredes
2022-09-18 16:33:37 -05:00
committed by GitHub
128 changed files with 6435 additions and 4055 deletions

76
.gitignore vendored Normal file
View File

@@ -0,0 +1,76 @@
.DS_Store
.ipynb_checkpoints/
docs/node_modules
*/.pyc
.vscode
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
#Pydoc generated_file for OpenDBM Python API Documentation
docs/website/api/*api.md
#docker dependencies for mac
.github/brew-colima
.github/brew-docker
# Distribution / packaging
speech/
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
#pycharm
.idea/
#documentation
docs/.docusaurus
.docusaurus
docs/.nvmrc
.nvmrc
node_modules
docs/website/build/
docs/sync-api-docs/generatedComponentApiDocs.js
docs/sync-api-docs/extracted.json

View File

@@ -1,8 +1,12 @@
FROM python:3.6
FROM python:3.7
FROM ubuntu:18.04
MAINTAINER fnndsc "vijay.yadav@aicure.com"
ENV LANG C.UTF-8
ENV LC_ALL C.UTF-8
RUN apt-get update && apt-get install -y python3-pip \
&& apt-get install -y wget \
&& apt-get install -y automake --upgrade \
@@ -20,21 +24,21 @@ RUN ln -sfn /usr/bin/pip3 /usr/bin/pip
COPY . /app
#cloning openface
WORKDIR /app/pkg
WORKDIR /app/opendbm/pkg
RUN git clone https://github.com/AiCure/open_dbm.git -b openface
RUN echo "Installing OpenFace..."
WORKDIR /app/pkg/open_dbm/OpenFace
WORKDIR /app/opendbm/pkg/open_dbm/OpenFace
RUN bash ./download_models.sh
RUN dpkg --configure -a
RUN su -c ./install.sh
RUN echo "Done OpenFace!"
RUN echo "Cloning DeepSpeech..."
WORKDIR /app/pkg
WORKDIR /app/opendbm/pkg
RUN git clone https://github.com/mozilla/DeepSpeech.git
WORKDIR /app/pkg/DeepSpeech
WORKDIR /app/opendbm/pkg/DeepSpeech
RUN wget https://github.com/mozilla/DeepSpeech/releases/download/v0.9.1/deepspeech-0.9.1-models.pbmm
RUN wget https://github.com/mozilla/DeepSpeech/releases/download/v0.9.1/deepspeech-0.9.1-models.scorer
@@ -43,4 +47,4 @@ RUN pip install --upgrade pip
RUN pip install -r requirements.txt
RUN echo "Requirement txt done!"
CMD [ "python", "./process_data.py" ]
CMD [ "python", "./process_data.py" ]

View File

@@ -1,16 +0,0 @@
"""
file_name: init
project_name: DBM
created: 2020-20-07
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
DBMLIB_PATH = os.path.dirname(__file__)
DBMLIB_SERVICE_CONFIG = os.path.abspath(os.path.join(DBMLIB_PATH, '../resources/services/services.yml'))
DBMLIB_FEATURE_CONFIG = os.path.abspath(os.path.join(DBMLIB_PATH, '../resources/features/raw_feature.yml'))
DBMLIB_DERIVE_FEATURE_CONFIG = os.path.abspath(os.path.join(DBMLIB_PATH, '../resources/features/derived_feature.yml'))

View File

@@ -1,277 +0,0 @@
"""
file_name: config_raw_feature
project_name: DBM
created: 2020-20-07
"""
import yaml
from dbm_lib import DBMLIB_FEATURE_CONFIG
class ConfigRawReader(object):
"""Summary
Read sevice end ponit
"""
def __init__(self,
feature_config_yml=None):
"""Summary
Args:
feature_config_yml (None, optional): yml file defined service configuration
"""
if feature_config_yml is None:
feature_config = DBMLIB_FEATURE_CONFIG
else:
feature_config = feature_config_yml
with open(feature_config, 'r') as ymlfile:
config = yaml.load(ymlfile)
#Verbal features
self.base_raw = config
self.err_reason = config['raw_feature']['error_reason']
#Output range
self.mov_headvel_start = config['raw_feature']['mov_headvel_start']
self.mov_headvel_end = config['raw_feature']['mov_headvel_end']
#Acoustic variable
self.aco_int = config['raw_feature']['aco_int']
self.aco_ff = config['raw_feature']['aco_ff']
self.aco_voiceLabel = config['raw_feature']['aco_voiceLabel']
self.aco_hnr = config['raw_feature']['aco_hnr']
self.aco_gne = config['raw_feature']['aco_gne']
self.aco_fm1 = config['raw_feature']['aco_fm1']
self.aco_fm2 = config['raw_feature']['aco_fm2']
self.aco_fm3 = config['raw_feature']['aco_fm3']
self.aco_fm4 = config['raw_feature']['aco_fm4']
self.aco_jitter = config['raw_feature']['aco_jitter']
self.aco_shimmer = config['raw_feature']['aco_shimmer']
self.aco_mfcc1 = config['raw_feature']['aco_mfcc1']
self.aco_mfcc2 = config['raw_feature']['aco_mfcc2']
self.aco_mfcc3 = config['raw_feature']['aco_mfcc3']
self.aco_mfcc4 = config['raw_feature']['aco_mfcc4']
self.aco_mfcc5 = config['raw_feature']['aco_mfcc5']
self.aco_mfcc6 = config['raw_feature']['aco_mfcc6']
self.aco_mfcc7 = config['raw_feature']['aco_mfcc7']
self.aco_mfcc8 = config['raw_feature']['aco_mfcc8']
self.aco_mfcc9 = config['raw_feature']['aco_mfcc9']
self.aco_mfcc10 = config['raw_feature']['aco_mfcc10']
self.aco_mfcc11 = config['raw_feature']['aco_mfcc11']
self.aco_mfcc12 = config['raw_feature']['aco_mfcc12']
self.aco_voiceFrame = config['raw_feature']['aco_voiceFrame']
self.aco_totVoiceFrame = config['raw_feature']['aco_totVoiceFrame']
self.aco_voicePct = config['raw_feature']['aco_voicePct']
self.aco_pausetime = config['raw_feature']['aco_pausetime']
self.aco_totaltime = config['raw_feature']['aco_totaltime']
self.aco_speakingtime = config['raw_feature']['aco_speakingtime']
self.aco_numpauses = config['raw_feature']['aco_numpauses']
self.aco_pausefrac = config['raw_feature']['aco_pausefrac']
#Facial Action Unit (for consistency)
self.fac_AU01int = config['raw_feature']['fac_AU01int']
self.fac_AU02int = config['raw_feature']['fac_AU02int']
self.fac_AU04int = config['raw_feature']['fac_AU04int']
self.fac_AU05int = config['raw_feature']['fac_AU05int']
self.fac_AU06int = config['raw_feature']['fac_AU06int']
self.fac_AU07int = config['raw_feature']['fac_AU07int']
self.fac_AU09int = config['raw_feature']['fac_AU09int']
self.fac_AU10int = config['raw_feature']['fac_AU10int']
self.fac_AU12int = config['raw_feature']['fac_AU12int']
self.fac_AU14int = config['raw_feature']['fac_AU14int']
self.fac_AU15int = config['raw_feature']['fac_AU15int']
self.fac_AU17int = config['raw_feature']['fac_AU17int']
self.fac_AU20int = config['raw_feature']['fac_AU20int']
self.fac_AU23int = config['raw_feature']['fac_AU23int']
self.fac_AU25int = config['raw_feature']['fac_AU25int']
self.fac_AU26int = config['raw_feature']['fac_AU26int']
self.fac_AU45int = config['raw_feature']['fac_AU45int']
self.fac_AU01pres = config['raw_feature']['fac_AU01pres']
self.fac_AU02pres = config['raw_feature']['fac_AU02pres']
self.fac_AU04pres = config['raw_feature']['fac_AU04pres']
self.fac_AU05pres = config['raw_feature']['fac_AU05pres']
self.fac_AU06pres = config['raw_feature']['fac_AU06pres']
self.fac_AU07pres = config['raw_feature']['fac_AU07pres']
self.fac_AU09pres = config['raw_feature']['fac_AU09pres']
self.fac_AU10pres = config['raw_feature']['fac_AU10pres']
self.fac_AU12pres = config['raw_feature']['fac_AU12pres']
self.fac_AU14pres = config['raw_feature']['fac_AU14pres']
self.fac_AU15pres = config['raw_feature']['fac_AU15pres']
self.fac_AU17pres = config['raw_feature']['fac_AU17pres']
self.fac_AU20pres = config['raw_feature']['fac_AU20pres']
self.fac_AU23pres = config['raw_feature']['fac_AU23pres']
self.fac_AU25pres = config['raw_feature']['fac_AU25pres']
self.fac_AU26pres = config['raw_feature']['fac_AU26pres']
self.fac_AU28pres = config['raw_feature']['fac_AU28pres']
self.fac_AU45pres = config['raw_feature']['fac_AU45pres']
#Facial Landmarks (for consistency)
self.fac_LMK00disp = config['raw_feature']['fac_LMK00disp']
self.fac_LMK01disp = config['raw_feature']['fac_LMK01disp']
self.fac_LMK02disp = config['raw_feature']['fac_LMK02disp']
self.fac_LMK03disp = config['raw_feature']['fac_LMK03disp']
self.fac_LMK04disp = config['raw_feature']['fac_LMK04disp']
self.fac_LMK05disp = config['raw_feature']['fac_LMK05disp']
self.fac_LMK06disp = config['raw_feature']['fac_LMK06disp']
self.fac_LMK07disp = config['raw_feature']['fac_LMK07disp']
self.fac_LMK08disp = config['raw_feature']['fac_LMK08disp']
self.fac_LMK09disp = config['raw_feature']['fac_LMK09disp']
self.fac_LMK10disp = config['raw_feature']['fac_LMK10disp']
self.fac_LMK11disp = config['raw_feature']['fac_LMK11disp']
self.fac_LMK12disp = config['raw_feature']['fac_LMK12disp']
self.fac_LMK13disp = config['raw_feature']['fac_LMK13disp']
self.fac_LMK14disp = config['raw_feature']['fac_LMK14disp']
self.fac_LMK15disp = config['raw_feature']['fac_LMK15disp']
self.fac_LMK16disp = config['raw_feature']['fac_LMK16disp']
self.fac_LMK17disp = config['raw_feature']['fac_LMK17disp']
self.fac_LMK18disp = config['raw_feature']['fac_LMK18disp']
self.fac_LMK19disp = config['raw_feature']['fac_LMK19disp']
self.fac_LMK20disp = config['raw_feature']['fac_LMK20disp']
self.fac_LMK21disp = config['raw_feature']['fac_LMK21disp']
self.fac_LMK22disp = config['raw_feature']['fac_LMK22disp']
self.fac_LMK23disp = config['raw_feature']['fac_LMK23disp']
self.fac_LMK24disp = config['raw_feature']['fac_LMK24disp']
self.fac_LMK25disp = config['raw_feature']['fac_LMK25disp']
self.fac_LMK26disp = config['raw_feature']['fac_LMK26disp']
self.fac_LMK27disp = config['raw_feature']['fac_LMK27disp']
self.fac_LMK28disp = config['raw_feature']['fac_LMK28disp']
self.fac_LMK29disp = config['raw_feature']['fac_LMK29disp']
self.fac_LMK30disp = config['raw_feature']['fac_LMK30disp']
self.fac_LMK31disp = config['raw_feature']['fac_LMK31disp']
self.fac_LMK32disp = config['raw_feature']['fac_LMK32disp']
self.fac_LMK33disp = config['raw_feature']['fac_LMK33disp']
self.fac_LMK34disp = config['raw_feature']['fac_LMK34disp']
self.fac_LMK35disp = config['raw_feature']['fac_LMK35disp']
self.fac_LMK36disp = config['raw_feature']['fac_LMK36disp']
self.fac_LMK37disp = config['raw_feature']['fac_LMK37disp']
self.fac_LMK38disp = config['raw_feature']['fac_LMK38disp']
self.fac_LMK39disp = config['raw_feature']['fac_LMK39disp']
self.fac_LMK40disp = config['raw_feature']['fac_LMK40disp']
self.fac_LMK41disp = config['raw_feature']['fac_LMK41disp']
self.fac_LMK42disp = config['raw_feature']['fac_LMK42disp']
self.fac_LMK43disp = config['raw_feature']['fac_LMK43disp']
self.fac_LMK44disp = config['raw_feature']['fac_LMK44disp']
self.fac_LMK45disp = config['raw_feature']['fac_LMK45disp']
self.fac_LMK46disp = config['raw_feature']['fac_LMK46disp']
self.fac_LMK47disp = config['raw_feature']['fac_LMK47disp']
self.fac_LMK48disp = config['raw_feature']['fac_LMK48disp']
self.fac_LMK49disp = config['raw_feature']['fac_LMK49disp']
self.fac_LMK50disp = config['raw_feature']['fac_LMK50disp']
self.fac_LMK51disp = config['raw_feature']['fac_LMK51disp']
self.fac_LMK52disp = config['raw_feature']['fac_LMK52disp']
self.fac_LMK53disp = config['raw_feature']['fac_LMK53disp']
self.fac_LMK54disp = config['raw_feature']['fac_LMK54disp']
self.fac_LMK55disp = config['raw_feature']['fac_LMK55disp']
self.fac_LMK56disp = config['raw_feature']['fac_LMK56disp']
self.fac_LMK57disp = config['raw_feature']['fac_LMK57disp']
self.fac_LMK58disp = config['raw_feature']['fac_LMK58disp']
self.fac_LMK59disp = config['raw_feature']['fac_LMK59disp']
self.fac_LMK60disp = config['raw_feature']['fac_LMK60disp']
self.fac_LMK61disp = config['raw_feature']['fac_LMK61disp']
self.fac_LMK62disp = config['raw_feature']['fac_LMK62disp']
self.fac_LMK63disp = config['raw_feature']['fac_LMK63disp']
self.fac_LMK64disp = config['raw_feature']['fac_LMK64disp']
self.fac_LMK65disp = config['raw_feature']['fac_LMK65disp']
self.fac_LMK66disp = config['raw_feature']['fac_LMK66disp']
self.fac_LMK67disp = config['raw_feature']['fac_LMK67disp']
#Facial features
self.hap_exp = config['raw_feature']['hap_exp']
self.sad_exp = config['raw_feature']['sad_exp']
self.sur_exp = config['raw_feature']['sur_exp']
self.fea_exp = config['raw_feature']['fea_exp']
self.ang_exp = config['raw_feature']['ang_exp']
self.dis_exp = config['raw_feature']['dis_exp']
self.con_exp = config['raw_feature']['con_exp']
self.happ_occ = config['raw_feature']['happ_occ']
self.sad_occ = config['raw_feature']['sad_occ']
self.sur_occ = config['raw_feature']['sur_occ']
self.fea_occ = config['raw_feature']['fea_occ']
self.ang_occ = config['raw_feature']['ang_occ']
self.dis_occ = config['raw_feature']['dis_occ']
self.con_occ = config['raw_feature']['con_occ']
self.pos_exp = config['raw_feature']['pos_exp']
self.neg_exp = config['raw_feature']['neg_exp']
self.neu_exp = config['raw_feature']['neu_exp']
self.cai_exp = config['raw_feature']['cai_exp']
self.com_exp = config['raw_feature']['com_exp']
self.com_lower_exp = config['raw_feature']['com_lower_exp']
self.com_upper_exp = config['raw_feature']['com_upper_exp']
self.pai_exp = config['raw_feature']['pai_exp']
self.hap_exp_full = config['raw_feature']['hap_exp_full']
self.sad_exp_full = config['raw_feature']['sad_exp_full']
self.sur_exp_full = config['raw_feature']['sur_exp_full']
self.fea_exp_full = config['raw_feature']['fea_exp_full']
self.ang_exp_full = config['raw_feature']['ang_exp_full']
self.dis_exp_full = config['raw_feature']['dis_exp_full']
self.con_exp_full = config['raw_feature']['con_exp_full']
self.pos_exp_full = config['raw_feature']['pos_exp_full']
self.neg_exp_full = config['raw_feature']['neg_exp_full']
self.neu_exp_full = config['raw_feature']['neu_exp_full']
self.cai_exp_full = config['raw_feature']['cai_exp_full']
self.com_exp_full = config['raw_feature']['com_exp_full']
self.com_lower_exp_full = config['raw_feature']['com_lower_exp_full']
self.com_upper_exp_full = config['raw_feature']['com_upper_exp_full']
self.pai_exp_full = config['raw_feature']['pai_exp_full']
self.fac_AsymMaskMouth = config['raw_feature']['fac_AsymMaskMouth']
self.fac_AsymMaskEye = config['raw_feature']['fac_AsymMaskEye']
self.fac_AsymMaskEyebrow = config['raw_feature']['fac_AsymMaskEyebrow']
self.fac_AsymMaskCom = config['raw_feature']['fac_AsymMaskCom']
#Movement features
self.head_vel = config['raw_feature']['head_vel']
self.mov_blink_ear = config['raw_feature']['mov_blink_ear']
self.vid_dur = config['raw_feature']['vid_dur']
self.fps = config['raw_feature']['fps']
self.mov_blinkframes = config['raw_feature']['mov_blinkframes']
self.mov_blinkdur = config['raw_feature']['mov_blinkdur']
self.mov_Hpose_Pitch = config['raw_feature']['mov_Hpose_Pitch']
self.mov_Hpose_Yaw = config['raw_feature']['mov_Hpose_Yaw']
self.mov_Hpose_Roll = config['raw_feature']['mov_Hpose_Roll']
self.mov_Hpose_Dist = config['raw_feature']['mov_Hpose_Dist']
self.mov_freq_trem_freq = config['raw_feature']['mov_freq_trem_freq']
self.mov_freq_trem_index = config['raw_feature']['mov_freq_trem_index']
self.mov_freq_trem_pindex = config['raw_feature']['mov_freq_trem_pindex']
self.mov_amp_trem_freq = config['raw_feature']['mov_amp_trem_freq']
self.mov_amp_trem_index = config['raw_feature']['mov_amp_trem_index']
self.mov_amp_trem_pindex = config['raw_feature']['mov_amp_trem_pindex']
self.fac_tremor_median_5 = config['raw_feature']['fac_tremor_median_5']
self.fac_tremor_median_12 = config['raw_feature']['fac_tremor_median_12']
self.fac_tremor_median_8 = config['raw_feature']['fac_tremor_median_8']
self.fac_tremor_median_48 = config['raw_feature']['fac_tremor_median_48']
self.fac_tremor_median_54 = config['raw_feature']['fac_tremor_median_54']
self.fac_tremor_median_28 = config['raw_feature']['fac_tremor_median_28']
self.fac_tremor_median_51 = config['raw_feature']['fac_tremor_median_51']
self.fac_tremor_median_66 = config['raw_feature']['fac_tremor_median_66']
self.fac_tremor_median_57 = config['raw_feature']['fac_tremor_median_57']
self.mov_leye_x = config['raw_feature']['mov_leye_x']
self.mov_leye_y = config['raw_feature']['mov_leye_y']
self.mov_leye_z = config['raw_feature']['mov_leye_z']
self.mov_reye_x = config['raw_feature']['mov_reye_x']
self.mov_reye_y = config['raw_feature']['mov_reye_y']
self.mov_reye_z = config['raw_feature']['mov_reye_z']
self.mov_eleft_disp = config['raw_feature']['mov_eleft_disp']
self.mov_eright_disp = config['raw_feature']['mov_eright_disp']
#NLP features
self.nlp_transcribe = config['raw_feature']['nlp_transcribe']
self.nlp_numSentences = config['raw_feature']['nlp_numSentences']
self.nlp_singPronPerAns = config['raw_feature']['nlp_singPronPerAns']
self.nlp_singPronPerSen = config['raw_feature']['nlp_singPronPerSen']
self.nlp_pastTensePerAns = config['raw_feature']['nlp_pastTensePerAns']
self.nlp_pastTensePerSen = config['raw_feature']['nlp_pastTensePerSen']
self.nlp_pronounsPerAns = config['raw_feature']['nlp_pronounsPerAns']
self.nlp_pronounsPerSen = config['raw_feature']['nlp_pronounsPerSen']
self.nlp_verbsPerAns = config['raw_feature']['nlp_verbsPerAns']
self.nlp_verbsPerSen = config['raw_feature']['nlp_verbsPerSen']
self.nlp_adjectivesPerAns = config['raw_feature']['nlp_adjectivesPerAns']
self.nlp_adjectivesPerSen = config['raw_feature']['nlp_adjectivesPerSen']
self.nlp_nounsPerAns = config['raw_feature']['nlp_nounsPerAns']
self.nlp_nounsPerSen = config['raw_feature']['nlp_nounsPerSen']
self.nlp_sentiment_mean = config['raw_feature']['nlp_sentiment_mean']
self.nlp_mattr = config['raw_feature']['nlp_mattr']
self.nlp_wordsPerMin = config['raw_feature']['nlp_wordsPerMin']
self.nlp_totalTime = config['raw_feature']['nlp_totalTime']

View File

@@ -1,164 +0,0 @@
"""
file_name: process_features
project_name: DBM
created: 2020-20-07
"""
from dbm_lib.dbm_features.raw_features.audio import intensity, pitch_freq, hnr, gne, voice_frame_score, formant_freq
from dbm_lib.dbm_features.raw_features.audio import pause_segment, jitter, shimmer, mfcc
from dbm_lib.dbm_features.raw_features.video import face_asymmetry, face_au, face_emotion_expressivity, face_landmark
from dbm_lib.dbm_features.raw_features.movement import head_motion, eye_blink, eye_gaze, voice_tremor, facial_tremor
from dbm_lib.dbm_features.raw_features.nlp import transcribe, speech_features
import subprocess
import logging
from os.path import isfile, splitext, basename, dirname, join
import glob
import os
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
def audio_to_wav(input_filepath):
""" Extracts a video's audio file and saves it to wav
Args:
input_filepath: (str)
Returns:
"""
try:
fname, _ = splitext(input_filepath)
output_filepath = fname + '.wav'
if not isfile(output_filepath):
call = ['ffmpeg', '-i', input_filepath, '-vn', '-acodec', 'pcm_s16le', '-ar', '44100', output_filepath]
logger.info('Converting audio from {} to wav'.format(input_filepath))
subprocess.check_output(call)
logger.info('wav output saved in {}'.format(output_filepath))
else:
logger.info('Output file {} already exists'.format(output_filepath))
except Exception as e:
logger.error('Failed to extract audio from Video')
def process_acoustic(video_uri, out_dir, dbm_group, r_config):
"""
processing acoustic features
Args:
video_uri: video path; out_dir: raw variable output dir
dbm_group: list of features group to process; r_config: raw feature config object
"""
if dbm_group != None and len(dbm_group)>0 and 'acoustic' not in dbm_group:
return
logger.info('Processing acoustic variables from data in {}'.format(video_uri))
logger.info('processing audio intensity....')
intensity.run_intensity(video_uri, out_dir, r_config)
logger.info('processing audio pitch freq....')
pitch_freq.run_pitch(video_uri, out_dir, r_config)
logger.info('processing HNR....')
hnr.run_hnr(video_uri, out_dir, r_config)
logger.info('processing GNE....')
gne.run_gne(video_uri, out_dir, r_config)
logger.info('processing voice frame score....')
voice_frame_score.run_vfs(video_uri, out_dir, r_config)
logger.info('processing formant frequency....')
formant_freq.run_formant(video_uri, out_dir, r_config)
logger.info('processing pause segment....')
pause_segment.run_pause_segment(video_uri, out_dir, r_config)
logger.info('processing jitter....')
jitter.run_jitter(video_uri, out_dir, r_config)
logger.info('processing shimmer....')
shimmer.run_shimmer(video_uri, out_dir, r_config)
logger.info('processing mfcc....')
mfcc.run_mfcc(video_uri, out_dir, r_config)
def process_facial(video_uri, out_dir, dbm_group, r_config):
"""
processing facial features
Args:
video_uri: video path; out_dir: raw variable output dir
dbm_group: list of features to process; r_config: raw feature config object
"""
if dbm_group != None and len(dbm_group)>0 and 'facial' not in dbm_group:
return
logger.info('Processing facial variables from data in {}'.format(video_uri))
logger.info('processing facial asymmetry....')
face_asymmetry.run_face_asymmetry(video_uri, out_dir, r_config)
logger.info('processing facial Action Unit....')
face_au.run_face_au(video_uri, out_dir, r_config)
logger.info('processing facial expressivity....')
face_emotion_expressivity.run_face_expressivity(video_uri, out_dir, r_config)
logger.info('processing facial landmark....')
face_landmark.run_face_landmark(video_uri, out_dir, r_config)
def process_movement(video_uri, out_dir, dbm_group, r_config, dlib_model):
"""
processing facial features
Args:
video_uri: video path; out_dir: raw variable output dir
dbm_group: list of features to process; r_config: raw feature config object
dlib_model: shape predictor model path
"""
if dbm_group != None and len(dbm_group)>0 and 'movement' not in dbm_group:
return
logger.info('Processing movement variables from data in {}'.format(video_uri))
logger.info('processing head movement....')
head_motion.run_head_movement(video_uri, out_dir, r_config)
logger.info('processing eye blink....')
eye_blink.run_eye_blink(video_uri, out_dir, r_config, dlib_model)
logger.info('processing eye gaze....')
eye_gaze.run_eye_gaze(video_uri, out_dir, r_config)
logger.info('processing voice tremor....')
voice_tremor.run_vtremor(video_uri, out_dir, r_config)
logger.info('processing facial tremor....')
facial_tremor.fac_tremor_process(video_uri, out_dir, r_config, model_output=True)
def process_nlp(video_uri, out_dir, dbm_group, tran_tog, r_config, deep_path):
"""
processing nlp features
Args:
video_uri: video path; out_dir: raw variable output dir
dbm_group: list of features to process; r_config: raw feature config object
deep_path: deep speech build path
"""
if dbm_group != None and len(dbm_group)>0 and 'speech' not in dbm_group:
return
logger.info('Processing nlp variables from data in {}'.format(video_uri))
transcribe.run_transcribe(video_uri, out_dir, r_config, deep_path)
speech_features.run_speech_feature(video_uri, out_dir, r_config, tran_tog)
def remove_file(file_path, file_ext = '.wav'):
"""
removing wav file
"""
file_dir = dirname(file_path)
file_name, _ = splitext(basename(file_path))
wav_file = glob.glob(join(file_dir, file_name + file_ext))
if len(wav_file)> 0:
os.remove(wav_file[0])

View File

@@ -1,10 +0,0 @@
"""
file_name: init
project_name: DBM
created: 2020-20-07
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

View File

@@ -1,133 +0,0 @@
"""
file_name: formant_freq
project_name: DBM
created: 2020-20-07
"""
import pandas as pd
import parselmouth
import numpy as np
import parselmouth
import librosa
import glob
from os.path import join
import logging
from dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
formant_dir = 'acoustic/formant_freq'
csv_ext = '_formant.csv'
error_txt = 'error: length less than 0.064'
def formant_list(formant,snd):
"""
Getting formant frequency per second
Args:
formant: Formant object for sound wave
snd: Parselmouth sound object
Returns:
List of first through fourth formant for each frame
"""
f1_list = []
f2_list = []
f3_list = []
f4_list = []
dur = snd.duration-0.02
dur_round = round(dur, 2)
time_list = np.arange(0.001, dur_round, 0.001)
for time in time_list:
f1 = formant.get_value_at_time(1,time)
f2 = formant.get_value_at_time(2,time)
f3 = formant.get_value_at_time(3,time)
f4 = formant.get_value_at_time(4,time)
f1_list.append(f1)
f2_list.append(f2)
f3_list.append(f3)
f4_list.append(f4)
return f1_list,f2_list,f3_list,f4_list
def formant_score(path):
"""
Using parselmouth library fetching Formant Frequency
Args:
path: (.wav) audio file location
Returns:
(list) list of Formant freq for each voice frame
"""
sound_pat = parselmouth.Sound(path)
formant = sound_pat.to_formant_burg(time_step=.001)
f_score = formant_list(formant,sound_pat)
return f_score
def calc_formant(video_uri, audio_file, out_loc, fl_name, r_config):
"""
Preparing Formant freq matrix
Args:
audio_file: (.wav) parsed audio file; fl_name: input file name
out_loc: (str) Output directory; r_config: raw variable config
"""
f1_list,f2_list,f3_list,f4_list = formant_score(audio_file)
df_formant = pd.DataFrame(f1_list, columns=[r_config.aco_fm1])
df_formant[r_config.aco_fm2] = f2_list
df_formant[r_config.aco_fm3] = f3_list
df_formant[r_config.aco_fm4] = f4_list
df_formant.replace('', np.nan, regex=True,inplace=True)
df_formant[r_config.err_reason] = 'Pass'# will replace with threshold in future release
df_formant['Frames'] = df_formant.index
df_formant['dbm_master_url'] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_formant, out_loc, fl_name, formant_dir, csv_ext)
def empty_fm(video_uri, out_loc, fl_name, r_config):
"""
Preparing empty formant frequency matrix if something fails
"""
cols = ['Frames', r_config.aco_fm1, r_config.aco_fm2, r_config.aco_fm3, r_config.aco_fm4, r_config.err_reason]
out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, error_txt]]
df_fm = pd.DataFrame(out_val, columns = cols)
df_fm['dbm_master_url'] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_fm, out_loc, fl_name, formant_dir, csv_ext)
def run_formant(video_uri, out_dir, r_config):
"""
Processing all patient's for fetching Formant freq
---------------
---------------
Args:
video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output
"""
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
if len(aud_filter)>0:
audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file)
if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
empty_fm(video_uri, out_loc, fl_name, r_config)
return
calc_formant(video_uri, audio_file, out_loc, fl_name, r_config)
except Exception as e:
logger.error('Failed to process audio file')

View File

@@ -1,161 +0,0 @@
"""
file_name: gne
project_name: DBM
created: 2020-20-07
"""
import pandas as pd
import numpy as np
import os
import glob
import parselmouth
import librosa
import more_itertools as mit
from os.path import join
import logging
from dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
gne_dir = 'acoustic/glottal_noise'
ff_dir = 'acoustic/pitch'
csv_ext = '_gne.csv'
def gne_ratio(sound):
"""
Using parselmouth library fetching glottal noise excitation ratio
Args:
sound: parselmouth object
Returns:
(list) list of gne ratio for each voice frame
"""
harmonicity_gne = sound.to_harmonicity_gne()
gne_all_bands = harmonicity_gne.values
gne_all_bands = np.where(gne_all_bands==-200, np.NaN, gne_all_bands)
gne = np.nanmax(gne_all_bands) # following http://www.fon.hum.uva.nl/rob/NKI_TEVA/TEVA/HTML/NKI_TEVA.pdf
return gne
def empty_gne(video_uri, out_loc, fl_name, r_config, error_txt):
"""
Preparing empty GNE matrix if something fails
"""
cols = ['Frames', r_config.aco_gne, r_config.err_reason]
out_val = [[np.nan, np.nan, error_txt]]
df_gne = pd.DataFrame(out_val, columns = cols)
df_gne['dbm_master_url'] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext)
def segment_pitch(dir_path, r_config):
"""
segmenting pitch freq for each voice segment
"""
com_speech_sort, voiced_yes, voiced_no = ([], ) * 3
for file in os.listdir(dir_path):
try:
if file.endswith('_pitch.csv'):
ff_df = pd.read_csv((dir_path+'/'+file))
voice_label = ff_df[r_config.aco_voiceLabel]
indices_yes = [i for i, x in enumerate(voice_label) if x == "yes"]
voiced_yes = [list(group) for group in mit.consecutive_groups(indices_yes)]
indices_no = [i for i, x in enumerate(voice_label) if x == "no"]
voiced_no = [list(group) for group in mit.consecutive_groups(indices_no)]
com_speech = voiced_yes + voiced_no
com_speech_sort = sorted(com_speech, key=lambda x: x[0])
except:
pass
return com_speech_sort, voiced_yes, voiced_no
def segment_gne(com_speech_sort, voiced_yes, voiced_no, gne_all_frames, audio_file):
"""
calculating gne for each voice segment
"""
snd = parselmouth.Sound(audio_file)
pitch = snd.to_pitch(time_step=.001)
for idx, vs in enumerate(com_speech_sort):
try:
max_gne = np.NaN
if vs in voiced_yes and len(vs)>1:
start_time = pitch.get_time_from_frame_number(vs[0])
end_time = pitch.get_time_from_frame_number(vs[-1])
snd_start = int(snd.get_frame_number_from_time(start_time))
snd_end = int(snd.get_frame_number_from_time(end_time))
samples = parselmouth.Sound(snd.as_array()[0][snd_start:snd_end])
max_gne = gne_ratio(samples)
except:
pass
gne_all_frames[idx] = max_gne
return gne_all_frames
def calc_gne(video_uri, audio_file, out_loc, fl_name, r_config):
"""
Preparing gne matrix
Args:
audio_file: (.wav) parsed audio file
out_loc: (str) Output directory for csv's
"""
dir_path = os.path.join(out_loc, ff_dir)
if os.path.isdir(dir_path):
voice_seg = segment_pitch(dir_path, r_config)
gne_all_frames = [np.NaN] * len(voice_seg[0])
gne_segment_frames = segment_gne(voice_seg[0], voice_seg[1], voice_seg[2], gne_all_frames, audio_file)
df_gne = pd.DataFrame(gne_segment_frames, columns=[r_config.aco_gne])
df_gne[r_config.err_reason] = 'Pass'# will replace with threshold in future release
df_gne['Frames'] = df_gne.index
df_gne['dbm_master_url'] = video_uri
logger.info('Processing Output file {} '.format(out_loc))
ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext)
else:
error_txt = 'error: pitch freq not available'
empty_gne(video_uri, out_loc, fl_name, r_config, error_txt)
def run_gne(video_uri, out_dir, r_config):
"""
Processing all patient's for fetching glottal noise ratio
---------------
---------------
Args:
video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output
"""
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
if len(aud_filter)>0:
audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file)
if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
error_txt = 'error: length less than 0.064'
empty_gne(video_uri, out_loc, fl_name, r_config, error_txt)
return
calc_gne(video_uri, audio_file, out_loc, fl_name, r_config)
except Exception as e:
logger.error('Failed to process audio file')

View File

@@ -1,96 +0,0 @@
"""
file_name: hnr
project_name: DBM
created: 2020-20-07
"""
import pandas as pd
import numpy as np
import os
import glob
import parselmouth
import librosa
from os.path import join
import logging
from dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
hnr_dir = 'acoustic/harmonic_noise'
csv_ext = '_hnr.csv'
error_txt = 'error: length less than 0.064'
def hnr_ratio(filepath):
"""
Using parselmouth library fetching harmonic noise ratio ratio
Args:
path: (.wav) audio file location
Returns:
(list) list of hnr ratio for each voice frame, min,max and mean hnr
"""
sound = parselmouth.Sound(filepath)
harmonicity = sound.to_harmonicity_ac(time_step=.001)
hnr_all_frames = harmonicity.values#[harmonicity.values != -200] nan it (****)
hnr_all_frames = np.where(hnr_all_frames==-200, np.NaN, hnr_all_frames)
return hnr_all_frames.transpose()
def calc_hnr(video_uri, audio_file, out_loc, fl_name, r_config):
"""
Preparing harmonic noise matrix
Args:
audio_file: (.wav) parsed audio file
out_loc: (str) Output directory for csv's
"""
hnr_all_frames = hnr_ratio(audio_file)
df_hnr = pd.DataFrame(hnr_all_frames, columns=[r_config.aco_hnr])
df_hnr['Frames'] = df_hnr.index
df_hnr['dbm_master_url'] = video_uri
df_hnr[r_config.err_reason] = 'Pass'# will replace with threshold in future release
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_hnr, out_loc, fl_name, hnr_dir, csv_ext)
def empty_hnr(video_uri, out_loc, fl_name, r_config):
"""
Preparing empty HNR matrix if something fails
"""
cols = ['Frames', r_config.aco_hnr, r_config.err_reason]
out_val = [[np.nan, np.nan, error_txt]]
df_hnr = pd.DataFrame(out_val, columns = cols)
df_hnr['dbm_master_url'] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_hnr, out_loc, fl_name, hnr_dir, csv_ext)
def run_hnr(video_uri, out_dir, r_config):
"""
Processing all patient's for fetching harmonic noise ratio
-------------------
-------------------
Args:
video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output
"""
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
if len(aud_filter)>0:
audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file)
if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
empty_hnr(video_uri, out_loc, fl_name, r_config)
return
calc_hnr(video_uri, audio_file, out_loc, fl_name, r_config)
except Exception as e:
logger.error('Failed to process audio file')

View File

@@ -1,92 +0,0 @@
"""
file_name: intensity
project_name: DBM
created: 2020-20-07
"""
import pandas as pd
import numpy as np
import glob
import parselmouth
import librosa
from os.path import join
import logging
from dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
intensity_dir = 'acoustic/intensity'
csv_ext = '_intensity.csv'
error_txt = 'error: length less than 0.064'
def intensity_score(path):
"""
Using parselmouth library fetching Intensity
Args:
path: (.wav) audio file location
Returns:
(list) list of Intensity for each voice frame
"""
sound_pat = parselmouth.Sound(path)
intensity = sound_pat.to_intensity(time_step=.001)
return intensity.values[0]
def calc_intensity(video_uri, audio_file, out_loc, fl_name, r_config):
"""
Preparing Intensity matrix
Args:
audio_file: (.wav) parsed audio file
out_loc: (str) Output directory for csv's
"""
intensity_frames = intensity_score(audio_file)
df_intensity = pd.DataFrame(intensity_frames, columns=[r_config.aco_int])
df_intensity['Frames'] = df_intensity.index
df_intensity['dbm_master_url'] = video_uri
df_intensity[r_config.err_reason] = 'Pass'# will replace with threshold in future release
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_intensity, out_loc, fl_name, intensity_dir, csv_ext)
def empty_intensity(video_uri, out_loc, fl_name, r_config):
"""
Preparing empty Intensity matrix if something fails
"""
cols = ['Frames', r_config.aco_int, r_config.err_reason]
out_val = [[np.nan, np.nan, error_txt]]
df_int = pd.DataFrame(out_val, columns = cols)
df_int['dbm_master_url'] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_int, out_loc, fl_name, intensity_dir, csv_ext)
def run_intensity(video_uri, out_dir, r_config):
"""
Processing all patient's for fetching Intensity
-------------------
-------------------
Args:
video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output
"""
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
if len(aud_filter)>0:
audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file)
if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
empty_intensity(video_uri, out_loc, fl_name, r_config)
return
calc_intensity(video_uri, audio_file, out_loc, fl_name, r_config)
except Exception as e:
logger.error('Failed to process audio file')

View File

@@ -1,159 +0,0 @@
"""
file_name: jitter_processing
project_name: DBM
created: 2020-20-07
"""
import pandas as pd
import numpy as np
import os
import glob
import parselmouth
import librosa
import numpy as np
import more_itertools as mit
from os.path import join
import logging
from dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
jitter_dir = 'acoustic/jitter'
ff_dir = 'acoustic/pitch'
csv_ext = '_jitter.csv'
def audio_jitter(sound):
"""
Using parselmouth library fetching jitter
Args:
sound: parselmouth object
Returns:
(list) list of jitters for each voice frame
"""
pointProcess = parselmouth.praat.call(sound, "To PointProcess (periodic, cc)...", 80, 500)
jitter = parselmouth.praat.call(pointProcess, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3)
return jitter
def empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt):
"""
Preparing empty jitter matrix if something fails
"""
cols = ['Frames', r_config.aco_jitter, r_config.err_reason]
out_val = [[np.nan, np.nan, error_txt]]
df_jitter = pd.DataFrame(out_val, columns = cols)
df_jitter['dbm_master_url'] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_jitter, out_loc, fl_name, jitter_dir, csv_ext)
def segment_pitch(dir_path, r_config):
"""
segmenting pitch freq for each voice segment
"""
com_speech_sort, voiced_yes, voiced_no = ([], ) * 3
for file in os.listdir(dir_path):
try:
if file.endswith('_pitch.csv'):
ff_df = pd.read_csv((dir_path+'/'+file))
voice_label = ff_df[r_config.aco_voiceLabel]
indices_yes = [i for i, x in enumerate(voice_label) if x == "yes"]
voiced_yes = [list(group) for group in mit.consecutive_groups(indices_yes)]
indices_no = [i for i, x in enumerate(voice_label) if x == "no"]
voiced_no = [list(group) for group in mit.consecutive_groups(indices_no)]
com_speech = voiced_yes + voiced_no
com_speech_sort = sorted(com_speech, key=lambda x: x[0])
except:
pass
return com_speech_sort, voiced_yes, voiced_no
def segment_jitter(com_speech_sort, voiced_yes, voiced_no, jitter_frames, audio_file):
"""
calculating jitter for each voice segment
"""
snd = parselmouth.Sound(audio_file)
pitch = snd.to_pitch(time_step=.001)
for idx, vs in enumerate(com_speech_sort):
try:
jitter = np.NaN
if vs in voiced_yes and len(vs)>1:
start_time = pitch.get_time_from_frame_number(vs[0])
end_time = pitch.get_time_from_frame_number(vs[-1])
snd_start = int(snd.get_frame_number_from_time(start_time))
snd_end = int(snd.get_frame_number_from_time(end_time))
samples = parselmouth.Sound(snd.as_array()[0][snd_start:snd_end])
jitter = audio_jitter(samples)
except:
pass
jitter_frames[idx] = jitter
return jitter_frames
def calc_jitter(video_uri, audio_file, out_loc, fl_name, r_config):
"""
Preparing jitter matrix
Args:
audio_file: (.wav) parsed audio file
out_loc: (str) Output directory for csv
r_config: config.config_raw_feature.pyConfigFeatureNmReader object
"""
dir_path = os.path.join(out_loc, ff_dir)
if os.path.isdir(dir_path):
voice_seg = segment_pitch(dir_path, r_config)
jitter_frames = [np.NaN] * len(voice_seg[0])
jitter_segment_frames = segment_jitter(voice_seg[0], voice_seg[1], voice_seg[2], jitter_frames, audio_file)
df_jitter = pd.DataFrame(jitter_segment_frames, columns=[r_config.aco_jitter])
df_jitter[r_config.err_reason] = 'Pass'# will replace with threshold in future release
df_jitter['Frames'] = df_jitter.index
df_jitter['dbm_master_url'] = video_uri
logger.info('Processing Output file {} '.format(out_loc))
ut.save_output(df_jitter, out_loc, fl_name, jitter_dir, csv_ext)
else:
error_txt = 'error: fundamental freq not available'
empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt)
def run_jitter(video_uri, out_dir, r_config):
"""
Processing all patient's videos for fetching jitter
-------------------
-------------------
Args:
video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output
"""
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
if len(aud_filter)>0:
audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file)
if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
error_txt = 'error: length less than 0.064'
empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt)
return
calc_jitter(video_uri, audio_file, out_loc, fl_name, r_config)
except Exception as e:
logger.error('Failed to process audio file')

View File

@@ -1,105 +0,0 @@
"""
file_name: mfcc
project_name: DBM
created: 2020-20-07
"""
import pandas as pd
import os
import glob
import parselmouth
import librosa
import numpy as np
import librosa
from os.path import join
import logging
from dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
mfcc_dir = 'acoustic/mfcc'
csv_ext = '_mfcc.csv'
error_txt = 'error: length less than 0.064'
def empty_mfcc(video_uri, out_loc, fl_name, r_config):
"""
Preparing empty empty_mfcc matrix if something fails
"""
cols = ['Frames', r_config.aco_mfcc1, r_config.aco_mfcc2, r_config.aco_mfcc3, r_config.aco_mfcc4, r_config.aco_mfcc5,
r_config.aco_mfcc6, r_config.aco_mfcc7, r_config.aco_mfcc8, r_config.aco_mfcc9, r_config.aco_mfcc10,
r_config.aco_mfcc11, r_config.aco_mfcc12, r_config.err_reason]
out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan,
error_txt]]
df_mfcc = pd.DataFrame(out_val, columns = cols)
df_mfcc['dbm_master_url'] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_mfcc, out_loc, fl_name, mfcc_dir, csv_ext)
def audio_mfcc(path):
"""
Using parselmouth library fetching mfccs
Args:
path: (.wav) audio file location
Returns:
(list) list of mfccs for each voice frame
"""
sound = parselmouth.Sound(path)
mfcc_object = sound.to_mfcc(time_step=.001,number_of_coefficients=12)
mfccs = mfcc_object.to_array()
mfccs = np.delete(mfccs, (0), axis=0)
return mfccs
def calc_mfcc(video_uri, audio_file, out_loc, fl_name, r_config):
"""
Preparing mfcc matrix
Args:
audio_file: (.wav) parsed audio file
out_loc: output location to save csv
fl_name: (str) name of audio file
r_config: config.config_raw_feature.pyConfigFeatureNmReader object
"""
dict_ = {}
mfccs = audio_mfcc(audio_file)
for i in range(1,13):
conf_str = r_config.base_raw['raw_feature']
dict_[conf_str['aco_mfcc' + str(i)]] = mfccs[i-1, :]
df = pd.DataFrame(dict_)
df['Frames'] = df.index
df[r_config.err_reason] = 'Pass'# may replace based on threshold in future release
df['dbm_master_url'] = video_uri
ut.save_output(df, out_loc, fl_name, mfcc_dir, csv_ext)
def run_mfcc(video_uri, out_dir, r_config):
"""
Processing all patients to fetch mfccs
Args:
video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output
"""
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
if len(aud_filter)>0:
audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file)
if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
empty_mfcc(video_uri, out_loc, fl_name, r_config)
return
calc_mfcc(video_uri, audio_file, out_loc, fl_name, r_config)
except Exception as e:
logger.error('Failed to process audio file')

View File

@@ -1,113 +0,0 @@
"""
file_name: pitch_freq
project_name: DBM
created: 2020-20-07
"""
import pandas as pd
import os
import glob
import parselmouth
import librosa
import numpy as np
from os.path import join
import logging
from dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
ff_dir = 'acoustic/pitch'
csv_ext = '_pitch.csv'
error_txt = 'error: length less than 0.064'
def audio_pitch(path):
"""
Using parselmouth library fetching pitch/fundamental frequency
Args:
path: (.wav) audio file location
Returns:
(list) list of pitch/fundamental frequency for each voice frame
"""
sound_pat = parselmouth.Sound(path)
pitch = sound_pat.to_pitch(time_step=.001)
pitch_values = pitch.selected_array['frequency']
return list(pitch_values)
def label_speech(row,fd_freq):
"""
identify whether frame is voiced or not
Args:
row: (item) pitch frequency value
Returns:
(str) yes or no indicator for voice
"""
if row[fd_freq] > 0 :
return 'yes'
else:
return 'no'
def calc_pitch(video_uri, audio_file, out_loc, fl_name, r_config):
"""
Preparing pitch frequency matrix
Args:
audio_file: (.wav) parsed audio file
row: (dataframe) subject details from master csv
new_out_base_dir: (str) Output directory for csv
"""
ff_frames = audio_pitch(audio_file)
df_ffreq = pd.DataFrame(ff_frames, columns=[r_config.aco_ff])
df_ffreq['Frames'] = df_ffreq.index
df_ffreq[r_config.aco_voiceLabel] = df_ffreq.apply(lambda row: label_speech(row, r_config.aco_ff),axis=1)
df_ffreq[r_config.err_reason] = 'Pass'# will replace with threshold in future release
df_ffreq['dbm_master_url'] = video_uri
logger.info('Processing Output file {} '.format(out_loc))
ut.save_output(df_ffreq, out_loc, fl_name, ff_dir, csv_ext)
def empty_pitch(video_uri, out_loc, fl_name, r_config):
"""
Preparing empty pitch frequency matrix if something fails
"""
df_ffreq = pd.DataFrame([[np.nan, np.nan, 'no', error_txt]],
columns=['Frames', r_config.aco_ff, r_config.aco_voiceLabel, r_config.err_reason])
df_ffreq['dbm_master_url'] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_ffreq, out_loc, fl_name, ff_dir, csv_ext)
def run_pitch(video_uri, out_dir, r_config):
"""
Processing audio for fetching pitch
-------------------
-------------------
Args:
video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output
"""
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
if len(aud_filter)>0:
audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file)
if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
empty_pitch(video_uri, out_loc, fl_name, r_config)
return
calc_pitch(video_uri, audio_file, out_loc, fl_name, r_config)
except Exception as e:
logger.error('Failed to process audio file')

View File

@@ -1,160 +0,0 @@
"""
file_name: shimmer_processing
project_name: DBM
created: 2020-20-07
"""
import pandas as pd
import numpy as np
import os
import glob
import parselmouth
import librosa
import numpy as np
import more_itertools as mit
from os.path import join
import logging
from dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
shimmer_dir = 'acoustic/shimmer'
ff_dir = 'acoustic/pitch'
csv_ext = '_shimmer.csv'
def audio_shimmer(sound):
"""
Using parselmouth library fetching shimmer
Args:
sound: parselmouth object
Returns:
(list) list of shimmers for each voice frame
"""
pointProcess = parselmouth.praat.call(sound, "To PointProcess (periodic, cc)...", 80, 500)
shimmer = parselmouth.praat.call([sound, pointProcess], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
return shimmer
def empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt):
"""
Preparing empty shimmer matrix if something fails
"""
cols = ['Frames', r_config.aco_shimmer, r_config.err_reason]
out_val = [[np.nan, np.nan, error_txt]]
df_shimmer = pd.DataFrame(out_val, columns = cols)
df_shimmer['dbm_master_url'] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_shimmer, out_loc, fl_name, shimmer_dir, csv_ext)
def segment_pitch(dir_path, r_config):
"""
segmenting pitch freq for each voice segment
"""
com_speech_sort, voiced_yes, voiced_no = ([], ) * 3
for file in os.listdir(dir_path):
try:
if file.endswith('_pitch.csv'):
ff_df = pd.read_csv((dir_path+'/'+file))
voice_label = ff_df[r_config.aco_voiceLabel]
indices_yes = [i for i, x in enumerate(voice_label) if x == "yes"]
voiced_yes = [list(group) for group in mit.consecutive_groups(indices_yes)]
indices_no = [i for i, x in enumerate(voice_label) if x == "no"]
voiced_no = [list(group) for group in mit.consecutive_groups(indices_no)]
com_speech = voiced_yes + voiced_no
com_speech_sort = sorted(com_speech, key=lambda x: x[0])
except:
pass
return com_speech_sort, voiced_yes, voiced_no
def segment_shimmer(com_speech_sort, voiced_yes, voiced_no, shimmer_frames, audio_file):
"""
calculating shimmer for each voice segment
"""
snd = parselmouth.Sound(audio_file)
pitch = snd.to_pitch(time_step=.001)
for idx, vs in enumerate(com_speech_sort):
try:
shimmer = np.NaN
if vs in voiced_yes and len(vs)>1:
start_time = pitch.get_time_from_frame_number(vs[0])
end_time = pitch.get_time_from_frame_number(vs[-1])
snd_start = int(snd.get_frame_number_from_time(start_time))
snd_end = int(snd.get_frame_number_from_time(end_time))
samples = parselmouth.Sound(snd.as_array()[0][snd_start:snd_end])
shimmer = audio_shimmer(samples)
except:
pass
shimmer_frames[idx] = shimmer
return shimmer_frames
def calc_shimmer(video_uri, audio_file, out_loc, fl_name, r_config):
"""
Preparing shimmer matrix
Args:
audio_file: (.wav) parsed audio file
out_loc: (str) Output directory for csv
r_config: config.config_raw_feature.pyConfigFeatureNmReader object
"""
dir_path = os.path.join(out_loc, ff_dir)
if os.path.isdir(dir_path):
voice_seg = segment_pitch(dir_path, r_config)
shimmer_frames = [np.NaN] * len(voice_seg[0])
shimmer_segment_frames = segment_shimmer(voice_seg[0], voice_seg[1], voice_seg[2], shimmer_frames, audio_file)
df_shimmer = pd.DataFrame(shimmer_segment_frames, columns=[r_config.aco_shimmer])
df_shimmer[r_config.err_reason] = 'Pass'# will replace with threshold in future release
df_shimmer['Frames'] = df_shimmer.index
df_shimmer['dbm_master_url'] = video_uri
logger.info('Processing Output file {} '.format(out_loc))
ut.save_output(df_shimmer, out_loc, fl_name, shimmer_dir, csv_ext)
else:
error_txt = 'error: fundamental freq not available'
empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt)
def run_shimmer(video_uri, out_dir, r_config):
"""
Processing all patients to fetch shimmer
---------------
---------------
Args:
video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output
"""
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
if len(aud_filter)>0:
audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file)
if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
error_txt = 'error: length less than 0.064'
empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt)
return
calc_shimmer(video_uri, audio_file, out_loc, fl_name, r_config)
except Exception as e:
logger.error('Failed to process audio file')

View File

@@ -1,111 +0,0 @@
"""
file_name: voice_frame_score
project_name: DBM
created: 2020-20-07
"""
import parselmouth
import pandas as pd
import numpy as np
import glob
import librosa
from os.path import join
import logging
from dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
vfs_dir = 'acoustic/voice_frame_score'
csv_ext = '_voiceprev.csv'
error_txt = 'error: length less than 0.064'
def audio_pitch_frame(pitch):
"""
Computing total number of speech and participant voiced frames
Args:
pitch: speech pitch
Returns:
(float) total voice frames and participant voiced frames
"""
total_frames = pitch.get_number_of_frames()
voiced_frames = pitch.count_voiced_frames()
return total_frames, voiced_frames
def voice_segment(path):
"""
Using parselmouth library for fundamental frequency
Args:
path: (.wav) audio file location
Returns:
(float) total voice frames, participant voiced frames and voiced frames percentage
"""
sound_pat = parselmouth.Sound(path)
pitch = sound_pat.to_pitch()
total_frames,voiced_frames = audio_pitch_frame(pitch)
voiced_percentage = (voiced_frames/total_frames)*100
return voiced_percentage, voiced_frames, total_frames
def calc_vfs(video_uri, audio_file, out_loc, fl_name, r_config):
"""
creating dataframe matrix for voice frame score
Args:
audio_file: Audio file path
new_out_base_dir: AWS instance output base directory path
f_nm_config: Config file object
"""
voice_percentage,voiced_frames, total_frames = voice_segment(audio_file)
df_vfs = pd.DataFrame([voiced_frames], columns=[r_config.aco_voiceFrame])
df_vfs[r_config.aco_totVoiceFrame] = [total_frames]
df_vfs[r_config.aco_voicePct] = [voice_percentage]
df_vfs[r_config.err_reason] = 'Pass'# will replace with threshold in future release
df_vfs['Frames'] = df_vfs.index
df_vfs['dbm_master_url'] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_vfs, out_loc, fl_name, vfs_dir, csv_ext)
def empty_vfs(video_uri, out_loc, fl_name, r_config):
"""
Preparing empty VFS matrix if something fails
"""
cols = ['Frames', r_config.aco_voiceFrame, r_config.aco_totVoiceFrame, r_config.aco_voicePct, r_config.err_reason]
out_val = [[np.nan, np.nan, np.nan, np.nan, error_txt]]
df_vfs = pd.DataFrame(out_val, columns = cols)
df_vfs['dbm_master_url'] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_vfs, out_loc, fl_name, vfs_dir, csv_ext)
def run_vfs(video_uri, out_dir, r_config):
"""
Processing all participants for fetching voice frame score
---------------
---------------
Args:
video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output
"""
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
if len(aud_filter)>0:
audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file)
if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
empty_vfs(video_uri, out_loc, fl_name, r_config)
return
calc_vfs(video_uri, audio_file, out_loc, fl_name, r_config)
except Exception as e:
logger.error('Failed to process audio file')

View File

@@ -1,17 +0,0 @@
"""
file_name: init
project_name: DBM
created: 2020-20-07
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
DBMLIB_PATH = os.path.dirname(__file__)
DBMLIB_VTREMOR_LIB = os.path.abspath(os.path.join(DBMLIB_PATH,
'../../../../resources/libraries/voice_tremor.praat'))
DBMLIB_FTREMOR_CONFIG = os.path.abspath(os.path.join(DBMLIB_PATH, '../../../../resources/features/facial/config.json'))

View File

@@ -1,160 +0,0 @@
"""
file_name: eye_blink
project_name: DBM
created: 2020-20-07
"""
import os
import glob
from scipy.spatial import distance as dist
from scipy.signal import find_peaks
from imutils.video import FileVideoStream
from imutils.video import VideoStream
from imutils import face_utils
from moviepy.editor import VideoFileClip
import numpy as np
import pandas as pd
import imutils
import time
import dlib
import cv2
import logging
from dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
movement_expr_dir = 'movement/eye_blink'
csv_ext = '_eyeblinks.csv'
def eye_aspect_ratio(eye):
"""
Computing eye aspect ratio for an individual frame
Args:
eye: Eye landmarks
Return:
Eye aspect ratio for a frame
"""
# euclidean distance for vertical eye landmarks
dist_cor1 = dist.euclidean(eye[1], eye[5])
dist_cor2 = dist.euclidean(eye[2], eye[4])
# euclidean distance for horizontal eye landmark
dist_cor3 = dist.euclidean(eye[0], eye[3])
ear = (dist_cor1 + dist_cor2) / (2.0 * dist_cor3)
return ear
def blink_detection(video_path,facial_landmarks,raw_config):
"""
Blink detection for each frame
Args:
video_path: MP4 file location
facial_landmarks: Facial landmark pre-trained model path
raw_config: Raw configuration file object
Return:
Dataframe with blink informatiom like blink frame, duration etc.
"""
TOT_FRAME = 1
blink_frame = []
ear_frame = []
clip = VideoFileClip(video_path, has_mask=True)
vid_length = clip.duration
identifier = dlib.get_frontal_face_detector() #dlib's face detector (HOG-based)
forecaster = dlib.shape_predictor(facial_landmarks) # the facial landmark predictor
#left and right eye landmarks
(left_beg, left_end) = face_utils.FACIAL_LANDMARKS_IDXS["left_eye"]
(right_beg, right_end) = face_utils.FACIAL_LANDMARKS_IDXS["right_eye"]
f_stream = True
vid_stream = FileVideoStream(video_path).start()
while True:
try:
#check if stream/frame available in video
if f_stream and not vid_stream.more():
break
#reading & converting frame into grayscale
vid_frame = vid_stream.read()
vid_frame = imutils.resize(vid_frame, width=450)
gray = cv2.cvtColor(vid_frame, cv2.COLOR_BGR2GRAY)
#detecting face
rects = identifier(gray, 0)
for rect in rects:
lmk = forecaster(gray, rect)
lmk = face_utils.shape_to_np(lmk)
l_eye = lmk[left_beg:left_end] #Extracting left eye ratio
r_eye = lmk[right_beg:right_end] #Extracting right eye ratio
l_ear = eye_aspect_ratio(l_eye) # eye aspect ratio for left eye
r_ear = eye_aspect_ratio(r_eye) # eye aspect ratio for right eye
ear = (l_ear + r_ear) / 2.0 # average the eye aspect ratio
blink_frame.append(TOT_FRAME)
ear_frame.append(ear)
TOT_FRAME += 1
except Exception as e:
#logger.error("blink detection processing failed for: {}".format(video_path))
continue
blink_df = pd.DataFrame(ear_frame, columns =[raw_config.mov_blink_ear])
blink_df[raw_config.vid_dur] = vid_length
blink_df[raw_config.fps] = int(TOT_FRAME/vid_length)
blink_df[raw_config.mov_blinkframes] = blink_frame
peaks, _ = find_peaks(blink_df[raw_config.mov_blink_ear]*-1, prominence=0.1)#prominence = 0.1 based on tuning
final_blink_df = blink_df.iloc[peaks,:].reset_index(drop=True)
u_blink_df = blink_dur(final_blink_df,raw_config)
u_blink_df['dbm_master_url'] = video_path
return u_blink_df
def blink_dur(blink_df,raw_config):
"""
Computing blink duration between each blink
Args:
blink_df : Dataframe with blink informatiom like blink frame
raw_config: Raw configuration file object
Returns:
Updated dataframe with blink duration
"""
dur_list = []
if len(blink_df)>0:
blink_df[raw_config.mov_blinkdur] = blink_df[raw_config.mov_blinkframes].diff().fillna(
blink_df[raw_config.mov_blinkframes])
else:
blink_df[raw_config.mov_blinkdur] = np.nan
blink_df[raw_config.mov_blinkdur] = blink_df[raw_config.mov_blinkdur]/blink_df[raw_config.fps]
return blink_df
def run_eye_blink(video_uri, out_dir, r_config, facial_landmarks):
"""
Processing all patient's for getting eye blink artifacts
---------------
---------------
Args:
video_uri: video path; input_dir : input directory for video's
out_dir: (str) Output directory for processed output; r_config: raw variable config object;
facial_landmarks: landmark model path
"""
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
vid_file_path = os.path.exists(video_uri)
if vid_file_path==True:
logger.info('Processing Output file {} '.format(os.path.join(out_loc, fl_name)))
df_blink = blink_detection(video_uri, facial_landmarks, r_config)
ut.save_output(df_blink, out_loc, fl_name, movement_expr_dir, csv_ext)
except Exception as e:
logger.error('Failed to process video file')

View File

@@ -1,148 +0,0 @@
"""
file_name: eye_gaze
project_name: DBM
created: 2020-30-11
"""
import os
import glob
import pandas as pd
import numpy as np
from scipy.spatial import distance
from os.path import join
import logging
from dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
eye_pose_dir = 'movement/gaze'
eye_pose_ext = '_eyegaze.csv'
def eye_motion_df(l_disp, r_disp, error_list, r_config):
"""
Generating eye movement dataframe
Args:
l_disp: displacement list(left eye); l_disp: displacement list(right eye)
r_config: raw variable config file object
Reutrns:
Final eye displacement dataframe
"""
df_eye_left = pd.DataFrame(l_disp, columns=[r_config.mov_eleft_disp])
df_eye_right = pd.DataFrame(r_disp, columns=[r_config.mov_eright_disp])
df_eye_motion = pd.concat([df_eye_left, df_eye_right], axis=1, sort=False)
df_eye_motion[r_config.err_reason] = error_list
return df_eye_motion
def filter_motion(df_of, df_disp, col_l, col_r, r_config):
"""
Filtering final eye movement dataframe
Args:
df_of: Openface raw out dataframe; col_r: right eye column
col_l: left eye column; r_config: raw variable config file object
"""
df_of = df_of[col_l + col_r + [' confidence']]
df_of.loc[(df_of[' confidence'].astype(float) < 0.8), col_l + col_r] = np.nan
df_filter = df_of[col_l + col_r]
df_filter.columns = [r_config.mov_leye_x, r_config.mov_leye_y, r_config.mov_leye_z,
r_config.mov_reye_x, r_config.mov_reye_y, r_config.mov_reye_z]
df_motion = pd.concat([df_filter, df_disp], axis=1, sort=False)
return df_motion
def eye_disp(of_results, col, r_config):
"""
Computing head velocity frame by frame
Args:
of_results: Openface raw out dataframe
r_config: Face config file object
Reutrns:
Final head velocity frame by frame output
"""
distance_list = []
error_list = []
of_results = of_results[col+ [' confidence']]
for index, row in of_results.iterrows():
dst = np.nan
if index == 0 or float(row[' confidence']) < 0.8: #Threshold < 0.8
distance_list.append(dst)
if float(row[' confidence']) < 0.8:
error_list.append('confidence less than 80%')
else:
error_list.append('Pass')
continue
if index > 0:
point_x = (of_results[col[0]][index-1], of_results[col[1]][index-1], of_results[col[2]][index-1])
point_y = (row[col[0]],row[col[1]],row[col[2]])
try:
dst = distance.euclidean(point_x, point_y)
except:
pass
distance_list.append(abs(dst))
error_list.append('Pass')
return distance_list, error_list
def calc_eye_mov(video_uri, df_of, out_loc, fl_name, r_config):
"""
Computing eye motion variables
Args:
df_of: Openface dataframe
out_loc: Output path for saving output csv's
fl_name: file name for output csv
r_config: raw variable config file object
"""
col_l = [ ' gaze_0_x', ' gaze_0_y', ' gaze_0_z']
col_r = [ ' gaze_1_x', ' gaze_1_y', ' gaze_1_z']
gazel_disp, err_l = eye_disp(df_of, col_l, r_config)
gazer_disp, err_r = eye_disp(df_of, col_r, r_config)
df_disp = eye_motion_df(gazel_disp, gazer_disp, err_l, r_config)
df_disp['dbm_master_url'] = video_uri
df_motion = filter_motion(df_of, df_disp, col_l, col_r, r_config)
ut.save_output(df_motion, out_loc, fl_name, eye_pose_dir, eye_pose_ext)
def run_eye_gaze(video_uri, out_dir, r_config):
"""
Processing all patient's for getting eye movement artifacts
--------------------------------
--------------------------------
Args:
video_uri: video path; input_dir : input directory for video's
out_dir: (str) Output directory for processed output; r_config: raw variable config object
"""
try:
#filtering path to generate input & output path
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
of_csv_path = glob.glob(join(out_loc, fl_name + '_openface/*.csv'))
if len(of_csv_path)>0:
of_csv = of_csv_path[0]
df_of = pd.read_csv(of_csv, error_bad_lines=False)
logger.info('Processing Output file {} '.format(os.path.join(out_loc, fl_name)))
calc_eye_mov(video_uri, df_of, out_loc, fl_name, r_config)
except Exception as e:
logger.error('Failed to process video file')

View File

@@ -1,164 +0,0 @@
import sys, os, glob, cv2, re
import pickle, json
import pandas as pd
import numpy as np
import numpy.ma as ma
import logging
from os.path import join
from dbm_lib.dbm_features.raw_features.util import util as ut
from dbm_lib.dbm_features.raw_features.util.math_util import *
from dbm_lib.dbm_features.raw_features.movement import DBMLIB_FTREMOR_CONFIG
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
ft_dir = 'movement/facial_tremor'
csv_ext = '_fac_tremor.csv'
model_ext = '_fac_model.csv'
fac_features_ext = '_fac_features.csv'
def compute_features(out_dir, df_of, r_config):
""" Computes features
Returns: features in vector format
"""
config = json.loads(open(DBMLIB_FTREMOR_CONFIG,'r').read())
pattern_x = re.compile("l\d+_x")
pattern_y = re.compile("l\d+_y")
# assumption: distance of face to camera remains at roughly static
# logic break
landmark_columns = []
for col in df_of.columns:
if pattern_x.match(col) or pattern_y.match(col):
landmark_columns.append(col)
df_of= df_of[(df_of[landmark_columns]!= 0).any(axis=1)]
df_of.reset_index(inplace=True)
num_frames = len(df_of)
logger.info("Number of frames to be processed: {}".format(str(num_frames)))
landmarks = config['landmarks']
try:
if num_frames == 0:
error_reason = "No frames with visible face."
logger.error(error_reason)
return empty_frame(landmarks, r_config, error_reason)
# if num_frames < 60:
# error_reason = 'Number of frames with visible face < 60. Video too short'
# logger.error(error_reason)
# return empty_frame(landmarks, f_cfg, error_reason)
first_row = df_of.iloc[0]
facew = abs(first_row[config['face_width_left']] - first_row[config['face_width_right']])
faceh = abs(first_row[config['face_height_left']] - first_row[config['face_height_right']])
if facew == 0 or faceh == 0:
error_reason = 'face width or height = 0. Check landmark values'
logger.error(error_reason)
return empty_frame(landmarks, r_config)
fac_disp = calc_displacement_vec(df_of, landmarks, num_frames)
# if verbose:
# logger.info("Displacement output: {}".format(str(fac_disp)))
fac_disp_median = np.median(fac_disp, axis = 1)
fac_disp_mean = np.mean(fac_disp, axis = 1)
if len(fac_disp.shape)!=2:
error_reason = 'fac_disp is not 2D. smth went wrong with disp calc'
logger.error(error_reason)
return empty_frame(landmarks, r_config, error_reason)
if len(fac_disp[0])<=1:
error_reason = 'Video too short. smth went wrong with disp calc'
logger.error(error_reason)
return empty_frame(landmarks, r_config, error_reason)
fac_corr_mat = np.corrcoef(fac_disp, rowvar = True)
# extract relevant row from cov matrix
ref_lmk_index = [i for i, lmk in enumerate(landmarks) if config['ref_lmk']==lmk]
fac_corr = fac_corr_mat[ref_lmk_index][0]
fac_area = config['ref_area'] / (facew * faceh)
# if verbose:
# logger.info("Face area: {}".format(fac_area))
# logger.info("Face Displacement Median: {}".format(str(fac_disp_median)))
# logger.info("Face Displacement Mean: {}".format(str(fac_disp_mean)))
fac_features1 = np.multiply(fac_area * fac_disp_median, (1. - fac_corr))
fac_features2 = np.multiply(fac_area * fac_disp_mean, (1. - fac_corr))
# base_fac_features = np.dot(fac_area * fac_disp_median, (1. - fac_corr))
fac_features_dict = {}
for i, landmark in enumerate(landmarks):
fac_features_dict['fac_features_mean_{}'.format(landmark)] = [fac_features2[i]]
raw_variable_map = 'fac_tremor_median_{}'.format(landmark)
fac_features_dict[r_config.base_raw['raw_feature'][raw_variable_map]] = [fac_features1[i]]
fac_features_dict['fac_disp_median_{}'.format(landmark)] = [fac_disp_median[i]]
fac_features_dict['fac_corr_{}'.format(landmark)] = [fac_corr[i]]
fac_features_dict[r_config.err_reason] = ['']
data = pd.DataFrame.from_dict(fac_features_dict)
logger.info('Concluded computing tremor features')
return data
except Exception as e:
logger.error('Error computing tremor features: {}'.format(str(e)))
return empty_frame(landmarks, r_config, str(e))
def empty_frame(landmarks, r_config, error_reason):
fac_features_dict = {}
for i, landmark in enumerate(landmarks):
raw_variable_map = 'fac_tremor_median_{}'.format(landmark)
fac_features_dict[r_config.base_raw['raw_feature'][raw_variable_map]] = [np.nan]
fac_features_dict['fac_features_mean_{}'.format(landmark)] = [np.nan]
fac_features_dict['fac_disp_median_{}'.format(landmark)] = [np.nan]
fac_features_dict['fac_corr_{}'.format(landmark)] = [np.nan]
fac_features_dict[r_config.err_reason] = [error_reason]
empty_frame = pd.DataFrame.from_dict(fac_features_dict)
return empty_frame
def fac_tremor_process(video_uri, out_dir, r_config, model_output=False):
"""
processing input videos
"""
# try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
of_csv_path = glob.glob(join(out_loc, fl_name + '_openface_lmk/*.csv'))
if len(of_csv_path)>0:
of_csv = of_csv_path[0]
df_of = pd.read_csv(of_csv, error_bad_lines=False)
logger.info('Processing Output file {} '.format(os.path.join(out_loc, fl_name)))
feats = compute_features(of_csv_path , df_of, r_config)
# if model_output:
# result = score(feats, r_config)
# feats = pd.concat([feats, result], axis=1)
ut.save_output(feats, out_loc, fl_name, ft_dir, csv_ext)
# except Exception as e:
logger.error('Failed to process video file')

View File

@@ -1,196 +0,0 @@
"""
file_name: head_mov
project_name: DBM
created: 2020-20-07
"""
import os
import glob
import pandas as pd
import numpy as np
from scipy.spatial import distance
from os.path import join
import logging
from dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
h_mov_dir = 'movement/head_movement'
h_pose_dir = 'movement/head_pose'
h_mov_ext = '_headmov.csv'
h_pose_ext = '_headpose.csv'
def head_pose_dist(of_results):
"""
Computing head pose distance frame by frame
Args:
of_results: Openface raw out dataframe
f_nm_config: Face config file object
Reutrns:
Final head pose distance frame by frame output
"""
distance_list = []
error_list = []
for index, row in of_results.iterrows():
dst = np.nan
if index == 0 or float(row[' confidence']) < 0.2: #Threshold < 0.2
distance_list.append(dst)
if float(row[' confidence']) < 0.2:
error_list.append('confidence less than 20%')
else:
error_list.append('Pass')
continue
if index > 0:
point_x = (of_results[' pose_Rx'][index-1], of_results[' pose_Ry'][index-1], of_results[' pose_Rz'][index-1])
point_y = (row[' pose_Rx'],row[' pose_Ry'],row[' pose_Rz'])
try:
dst = distance.euclidean(point_x, point_y)
except:
pass
distance_list.append(abs(dst))
error_list.append('Pass')
return distance_list, error_list
def head_pose(of_results,r_config):
"""
Generating head pose estimation dataframe
Args:
distance_val: distance list
f_nm_config: raw variable config file object
Reutrns:
Final head pose estimation dataframe
"""
pose_dist_list, error_list = head_pose_dist(of_results)
of_results.loc[(of_results[' confidence'].astype(float) < 0.2), [' pose_Rx',' pose_Ry',' pose_Rz']] = np.nan
pose_of = of_results[[' pose_Rx',' pose_Ry',' pose_Rz']]
pose_of.columns = [r_config.mov_Hpose_Pitch, r_config.mov_Hpose_Yaw, r_config.mov_Hpose_Roll]
pose_of[r_config.mov_Hpose_Dist] = pose_dist_list
pose_of[r_config.err_reason] = error_list
return pose_of
def head_motion_df(distance_val, error_list, r_config):
"""
Generating head movement dataframe
Args:
distance_val: distance list
r_config: raw variable config file object
Reutrns:
Final head velocity dataframe
"""
head_motion = r_config.head_vel
df_head_motion = pd.DataFrame(distance_val, columns=[head_motion])
df_head_motion['Frames'] = df_head_motion.index
new_df_intensity = df_head_motion[['Frames', head_motion]]
new_df_intensity[r_config.err_reason] = error_list
return new_df_intensity
def head_vel(of_results, r_config):
"""
Computing head velocity frame by frame
Args:
of_results: Openface raw out dataframe
r_config: Face config file object
Reutrns:
Final head velocity frame by frame output
"""
distance_list = []
error_list = []
for index, row in of_results.iterrows():
dst = np.nan
if index == 0 or float(row[' confidence']) < 0.2: #Threshold < 0.2
distance_list.append(dst)
if float(row[' confidence']) < 0.2:
error_list.append('confidence less than 20%')
else:
error_list.append('Pass')
continue
if index > 0:
point_x = (of_results[' pose_Tx'][index-1], of_results[' pose_Ty'][index-1], of_results[' pose_Tz'][index-1])
point_y = (row[' pose_Tx'],row[' pose_Ty'],row[' pose_Tz'])
try:
dst = distance.euclidean(point_x, point_y)
except:
pass
if abs(dst)>200:
dst = np.nan
error_list.append('Out of range')
else:
error_list.append('Pass')
distance_list.append(dst)
df_velocity = head_motion_df(distance_list, error_list, r_config)
return df_velocity
def calc_head_mov(video_uri, df_of, out_loc, fl_name, r_config):
"""
Computing head motion and head pose variables
Args:
df_of: Openface dataframe
out_loc: Output path for saving output csv's
fl_name: file name for output csv
r_config: raw variable config file object
"""
col = [' confidence',' pose_Rx',' pose_Ry',' pose_Rz',' pose_Tx', ' pose_Ty', ' pose_Tz']
df_of = df_of[col]
df_hmotion = head_vel(df_of, r_config)
df_hmotion['dbm_master_url'] = video_uri
df_pose = head_pose(df_of, r_config)
df_pose['dbm_master_url'] = video_uri
ut.save_output(df_hmotion, out_loc, fl_name, h_mov_dir, h_mov_ext)
ut.save_output(df_pose, out_loc, fl_name, h_pose_dir, h_pose_ext)
def run_head_movement(video_uri, out_dir, r_config):
"""
Processing all patient's for getting movement artifacts for cdx_analysis workflow
--------------------------------
--------------------------------
Args:
video_uri: video path; input_dir : input directory for video's
out_dir: (str) Output directory for processed output; r_config: raw variable config object
"""
try:
#filtering path to generate input & output path
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
of_csv_path = glob.glob(join(out_loc, fl_name + '_openface/*.csv'))
if len(of_csv_path)>0:
of_csv = of_csv_path[0]
df_of = pd.read_csv(of_csv, error_bad_lines=False)
logger.info('Processing Output file {} '.format(os.path.join(out_loc, fl_name)))
calc_head_mov(video_uri, df_of, out_loc, fl_name, r_config)
except Exception as e:
logger.error('Failed to process video file')

View File

@@ -1,94 +0,0 @@
import pandas as pd
import os
import glob
from os.path import join
import parselmouth
from parselmouth.praat import call, run_file
import numpy as np
import librosa
import json
import re
import logging
from dbm_lib.dbm_features.raw_features.util import util as ut
from dbm_lib.dbm_features.raw_features.movement import DBMLIB_VTREMOR_LIB
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
vt_dir = 'movement/voice_tremor'
csv_ext = '_vtremor.csv'
#Executing praat script using parselmouth function
def tremor_praat(snd_file,r_cfg):
"""
Generating Voice tremor endpoint dataframe
Args:
snd_file: (.wav) parsed audio file
r_cfg: Raw variable configuration file
Returns tremor endpoint dataframe
"""
snd = parselmouth.Sound(snd_file)
tremor_var = run_file(snd,DBMLIB_VTREMOR_LIB,capture_output=True)
new_tremor_var = re.sub('--undefined--', '0', tremor_var[1])
res = json.loads(new_tremor_var)
tremor_df = pd.DataFrame(res,index=['0',])
tremor_df.columns = [r_cfg.mov_freq_trem_freq,r_cfg.mov_amp_trem_freq,r_cfg.mov_freq_trem_index,
r_cfg.mov_amp_trem_index,r_cfg.mov_freq_trem_pindex,r_cfg.mov_amp_trem_pindex]
return tremor_df
def prepare_vtrem_output(audio_file, out_loc, r_config, fl_name):
"""
Preparing voice tremor matrix
Args:
audio_file: (.wav) parsed audio file ; r_config: raw config object
out_loc: (str) Output directory for csv ; fl_name: file name
"""
df_tremor = tremor_praat(audio_file, r_config)
df_tremor[r_config.err_reason] = 'Pass'# will replace with threshold in future release
logger.info('Processing Output file {} '.format(os.path.join(out_loc, fl_name)))
ut.save_output(df_tremor, out_loc, fl_name, vt_dir, csv_ext)
def prepare_empty_vt(out_loc, fl_name, r_config, error_txt):
"""
Preparing empty voice tremor matrix
"""
cols = [r_config.mov_freq_trem_freq, r_config.mov_amp_trem_freq, r_config.mov_freq_trem_index,
r_config.mov_amp_trem_index, r_config.mov_freq_trem_pindex, r_config.mov_amp_trem_pindex, r_config.err_reason]
out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, error_txt]]
df_tremor = pd.DataFrame(out_val, columns = cols)
logger.info('Saving Output file {} '.format(os.path.join(out_loc, fl_name)))
ut.save_output(df_tremor, out_loc, fl_name, vt_dir, csv_ext)
def run_vtremor(video_uri, out_dir, r_config):
"""
Processing all patient's for fetching Formant freq
---------------
---------------
Args:
video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output
"""
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
if len(aud_filter)>0:
audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file)
if float(aud_dur) < 0.5:
logger.info('Output file {} size is less than 0.5sec'.format(audio_file))
error_txt = 'error: length less than 0.5 sec'
prepare_empty_vt(video_uri, out_loc, fl_name, error_txt)
return
prepare_vtrem_output(audio_file, out_loc, r_config, fl_name)
except Exception as e:
logger.error('Failed to compute Voice Tremor {} for {}'.format(e,video_uri))
prepare_empty_vt(out_loc, fl_name, r_config, e)

View File

@@ -1,51 +0,0 @@
"""
file_name: speech_features
project_name: DBM
created: 2020-13-11
"""
import os
import numpy as np
import pandas as pd
import glob
from os.path import join
import logging
import shutil
from dbm_lib.dbm_features.raw_features.util import util as ut
from dbm_lib.dbm_features.raw_features.util import nlp_util as n_util
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
speech_dir = 'speech/speech_feature'
speech_ext = '_nlp.csv'
transcribe_ext = 'speech/deepspeech/*_transcribe.csv'
def run_speech_feature(video_uri, out_dir, r_config, tran_tog):
"""
Processing all patient's for fetching nlp features
-------------------
-------------------
Args:
video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output
"""
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
transcribe_path = glob.glob(join(out_loc, transcribe_ext))
if len(transcribe_path)>0:
transcribe_df = pd.read_csv(transcribe_path[0])
df_speech= n_util.process_speech(transcribe_df, r_config)
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_speech, out_loc, fl_name, speech_dir, speech_ext)
if (tran_tog == None) or (tran_tog != 'on'):
shutil.rmtree(os.path.dirname(transcribe_path[0]))
except Exception as e:
logger.error('Failed to process video file')

View File

@@ -1,84 +0,0 @@
"""
file_name: transcribe
project_name: DBM
created: 2020-10-11
"""
import pandas as pd
import numpy as np
import librosa
import glob
from os.path import join
import logging
from dbm_lib.dbm_features.raw_features.util import util as ut
from dbm_lib.dbm_features.raw_features.util import nlp_util as n_util
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
formant_dir = 'speech/deepspeech'
csv_ext = '_transcribe.csv'
error_txt = 'error: length less than 0.1'
def calc_transcribe(video_uri, audio_file, out_loc, fl_name, r_config, deep_path, aud_dur):
"""
Preparing Formant freq matrix
Args:
audio_file: (.wav) parsed audio file; fl_name: input file name
out_loc: (str) Output directory; r_config: raw variable config
"""
text = n_util.process_deepspeech(audio_file, deep_path)
df_formant = pd.DataFrame([text], columns=[r_config.nlp_transcribe])
df_formant.replace('', np.nan, regex=True,inplace=True)
df_formant[r_config.nlp_totalTime] = aud_dur
df_formant[r_config.err_reason] = 'Pass'# will replace with threshold in future release
df_formant['dbm_master_url'] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_formant, out_loc, fl_name, formant_dir, csv_ext)
def empty_transcribe(video_uri, out_loc, fl_name, r_config):
"""
Preparing empty formant frequency matrix if something fails
"""
cols = [r_config.nlp_transcribe, r_config.nlp_totalTime, r_config.err_reason]
out_val = [[np.nan, np.nan, error_txt]]
df_fm = pd.DataFrame(out_val, columns = cols)
df_fm['dbm_master_url'] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_fm, out_loc, fl_name, formant_dir, csv_ext)
def run_transcribe(video_uri, out_dir, r_config, deep_path):
"""
Processing all patient's for fetching Formant freq
---------------
---------------
Args:
video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output; deep_path: deepspeech build path
"""
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
if len(aud_filter)>0:
audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file)
if float(aud_dur) < 0.1:
logger.info('Output file {} size is less than 0.1 sec'.format(audio_file))
empty_transcribe(video_uri, out_loc, fl_name, r_config)
return
calc_transcribe(video_uri, audio_file, out_loc, fl_name, r_config, deep_path, aud_dur)
except Exception as e:
logger.error('Failed to process audio file')

View File

@@ -1,212 +0,0 @@
"""
file_name: nlp_util
project_name: DBM
created: 2020-10-11
"""
import subprocess
import json
import numpy as np
import pandas as pd
import os
import logging
import nltk
import re
from lexicalrichness import LexicalRichness
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
#Speech to text using Deepspeech 0.9.1
def deepspeech(AUDIO_FILE,deep_path):
"""
Extracting text from audio using Deep Speech neural network trained model
Returns:
Text: text which is extracted from audio
"""
api = 'deepspeech'
arg_speech0 = '--model'
arg_speech_path0 = os.path.join(deep_path, 'deepspeech-0.9.1-models.pbmm')
arg_speech1 = '--scorer'
arg_speech_path1 = os.path.join(deep_path, 'deepspeech-0.9.1-models.scorer')
arg_audio = "--audio"
out = subprocess.Popen([api, arg_speech0, arg_speech_path0, arg_speech1, arg_speech_path1, arg_audio, AUDIO_FILE],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
logger.info('Deepspeech output...... {}'.format(out))
try:
stdout,stderr = out.communicate()
except:
return "error", "error"
#print(stderr)
return stdout,stderr
def deep_speech_output_clean(result):
"""
Parsing deep speech output(text)
Return:
Text from speech
"""
text = ""
if len(result)>0:
res_split = str(result[0]).split('\\n')
if len(res_split)>0:
for i in range(len(res_split)):
if 'Inference took' in res_split[i]:
text = res_split[i + 1]
return text
return text
def process_deepspeech(audio_file,deep_path):
"""
Transcribing audio to extract text from speech
"""
deep_output = deepspeech(audio_file,deep_path)
deep_text= deep_speech_output_clean(deep_output)
return deep_text
def nltk_download():
try:
nltk.data.find('tokenizers/punkt')
except LookupError:
logger.info('punkt is not available')
nltk.download('punkt')
try:
nltk.data.find('averaged_perceptron_tagger')
except LookupError:
logger.info('averaged_perceptron_tagger is not available')
nltk.download('averaged_perceptron_tagger')
def empty_speech(r_config, master_url, error_txt):
"""
Preparing empty speech matrix with error
Args:
r_config: raw config file object
error_txt: Error message during transcription
Returns:
Empty dataframe for speech features with error
"""
col = [r_config.nlp_numSentences, r_config.nlp_singPronPerAns, r_config.nlp_singPronPerSen, r_config.nlp_pastTensePerAns,
r_config.nlp_pastTensePerSen, r_config.nlp_pronounsPerAns, r_config.nlp_pronounsPerSen, r_config.nlp_verbsPerAns,
r_config.nlp_verbsPerSen, r_config.nlp_adjectivesPerAns, r_config.nlp_adjectivesPerSen, r_config.nlp_nounsPerAns,
r_config.nlp_nounsPerSen, r_config.nlp_sentiment_mean, r_config.nlp_mattr, r_config.nlp_wordsPerMin,
r_config.nlp_totalTime, r_config.err_reason]
df_speech = pd.DataFrame([[np.nan] * len(col) + [error_txt]], columns = col)
df_speech['dbm_master_url'] = master_url
return df_speech
def divide_var(speech_var1, spech_var2):
"""
divide variables
"""
speech_var = np.nan
if spech_var2!=0:
speech_var = speech_var1/spech_var2
return speech_var
def process_speech(transcribe_df,r_config):
"""
Preparing speech features
Args:
transcribe_df: Transcribed dataframe
r_config: raw config file object
Returns:
Dataframe for speech features
"""
transcribe_df = transcribe_df.replace(np.nan, '', regex=True)
err_transcribe = transcribe_df[r_config.err_reason].iloc[0]
transcribe = transcribe_df[r_config.nlp_transcribe].iloc[0]
total_time = transcribe_df[r_config.nlp_totalTime].iloc[0]
master_url = transcribe_df['dbm_master_url'].iloc[0]
#clean transcribe
transcribe = transcribe.replace(",", "")
transcribe = " ".join(re.findall(r"[\w']+|[.!?]", transcribe))
if err_transcribe != 'Pass':
df_speech = empty_speech(r_config, master_url, error_txt)
return df_speech
speech_dict = {}
nltk_download()
sentences = nltk.tokenize.sent_tokenize(transcribe)
words_all = nltk.tokenize.word_tokenize(transcribe)
num_sentences = len(sentences)
speech_dict[r_config.nlp_numSentences] = num_sentences
#nlp_singPron
i_s = transcribe.count('I')
me_s = transcribe.count('me')
my_s = transcribe.count('my')
sing_count = i_s + me_s + my_s
speech_dict[r_config.nlp_singPronPerAns] = sing_count if len(words_all)>0 else np.nan
speech_dict[r_config.nlp_singPronPerSen] = divide_var(speech_dict[r_config.nlp_singPronPerAns], num_sentences)
tagged = nltk.pos_tag(transcribe.split())
tagged_df = pd.DataFrame(tagged, columns=['word', 'pos_tag'])
#Past tense per answer
all_POSs = tagged_df['pos_tag'].tolist()
speech_dict[r_config.nlp_pastTensePerAns] = all_POSs.count('VBD') if len(words_all)>0 else np.nan
speech_dict[r_config.nlp_pastTensePerSen] = divide_var(speech_dict[r_config.nlp_pastTensePerAns], num_sentences)
#Pronoun per answer
pronounsPerAns = all_POSs.count('PRP') + all_POSs.count('PRP$')
speech_dict[r_config.nlp_pronounsPerAns] = pronounsPerAns if len(words_all)>0 else np.nan
speech_dict[r_config.nlp_pronounsPerSen] = divide_var(speech_dict[r_config.nlp_pronounsPerAns], num_sentences)
#Verb per answer
verbPerAns = all_POSs.count('VB') + all_POSs.count('VBD') + all_POSs.count('VBG') \
+ all_POSs.count('VBN') + all_POSs.count('VBP') + all_POSs.count('VBZ')
speech_dict[r_config.nlp_verbsPerAns] = verbPerAns if len(words_all) > 0 else np.nan
speech_dict[r_config.nlp_verbsPerSen] = divide_var(speech_dict[r_config.nlp_verbsPerAns], num_sentences)
#Adjective per answer
adjectivesAns = all_POSs.count('JJ') + all_POSs.count('JJR') + all_POSs.count('JJS')
speech_dict[r_config.nlp_adjectivesPerAns] = adjectivesAns if len(words_all) > 0 else np.nan
speech_dict[r_config.nlp_adjectivesPerSen] = divide_var(speech_dict[r_config.nlp_adjectivesPerAns], num_sentences)
#Noun per answer
nounsAns = all_POSs.count('NN') + all_POSs.count('NNP') + all_POSs.count('NNS')
speech_dict[r_config.nlp_nounsPerAns] = nounsAns if len(words_all) > 0 else np.nan
speech_dict[r_config.nlp_nounsPerSen] = divide_var(speech_dict[r_config.nlp_nounsPerAns], num_sentences)
#Sentiment analysis
vader = SentimentIntensityAnalyzer()
sentence_valences = []
for s in sentences:
sentiment_dict = vader.polarity_scores(s)
sentence_valences.append(sentiment_dict['compound'])
speech_dict[r_config.nlp_sentiment_mean] = np.mean(sentence_valences) if len(sentence_valences) > 0 else np.nan
non_punc = list(value for value in words_all if value not in ['.','!','?'])
non_punc_as_str = " ".join(str(non_punc))
lex = LexicalRichness(non_punc_as_str)
speech_dict[r_config.nlp_mattr] = lex.mattr(window_size=lex.words) if lex.words > 0 else np.nan
#Number of words per minute
speech_dict[r_config.nlp_wordsPerMin] = divide_var(len(non_punc), total_time)*60
speech_dict[r_config.nlp_totalTime] = total_time
speech_dict['dbm_master_url'] = master_url
df_speech = pd.DataFrame([speech_dict])
return df_speech

View File

@@ -1,112 +0,0 @@
"""
file_name: util
project_name: DBM
created: 2020-20-07
"""
import os
import glob
import numpy as np
import subprocess
def filter_path(video_url, out_dir):
"""
Filtering video uri path to prepare input and ouptut location
Args:
video_url: S3 bucket path for video
out_dir: Output directory path
"""
fl_name,_ = os.path.splitext(os.path.basename(video_url))
input_loc = os.path.dirname(video_url)
out_loc = os.path.join(out_dir, fl_name)
return input_loc, out_loc, fl_name
def save_output(df, out_loc, fl_name, f_dir, f_ext):
"""
creating output directory for Audio features
Args:
df: (dataframe) feature dataframe[ex: Formant freq, pitch]
out_loc: (dir) Output location where we want to save raw output
fl_name: file name
f_dir: directory name for a feature
f_ext: extension for a feature [ex: '_pose.csv']
"""
full_f_name = fl_name + f_ext
dir_path = os.path.join(out_loc, f_dir)
if not os.path.exists(dir_path):
os.makedirs(dir_path)
sav_path = os.path.join(dir_path,full_f_name)
df.to_csv(sav_path, index=False)
def audio_process(base_dir,video_url):
"""
Parsing cleaned audio files(Audio files without IMA voice)
Args:
base_dir: Base path for raw data
video_url: Raw video file path
"""
new_video_url = base_dir+'/'.join(video_url[2:])
split_val = new_video_url.split('/')
wav_path = '/'.join(split_val[0:len(split_val)-1])
audio_split_check = glob.glob(wav_path + '/*_split.wav')
return audio_split_check
def compute_open_face_features(input_filepath,
output_directory,
open_face_executable,
au_static=False,
tracked_visualization=False,
clobber=False,
verbose=True):
"""
Runs OpenFace on an input video.
See https://github.com/TadasBaltrusaitis/OpenFace/wiki/Command-line-arguments
Args:
input_filepath:
output_directory:
au_static:
tracked_visualization:
open_face_executable:
clobber: (bool) if True existing files will be overwritten
verbose:
Returns:
(str) path to output csv file
Raises:
IOError if OpenFace executable is missing
"""
if not os.path.isfile(open_face_executable):
raise IOError("OpenFace executable {} could not be found.".format(open_face_executable))
bn, _ = os.path.splitext(os.path.basename(input_filepath))
if not output_directory:
output_directory = os.path.join(os.path.dirname(input_filepath), bn + '_openface')
output_csv = os.path.join(output_directory, bn + '.csv')
if not os.path.isfile(output_csv) or clobber:
call = [open_face_executable, ]
if au_static:
call += ['-au_static', ]
if tracked_visualization:
call += ['-tracked', ]
call += ['-q', '-2Dfp', '-3Dfp', '-pdmparams', '-pose', '-aus', '-gaze']
call += ['-f', input_filepath, '-out_dir', output_directory]
if verbose:
print('Computing OpenFace features {} from video file'.format(input_filepath))
subprocess.check_output(call)
if verbose:
print('OpenFace features saved to {}'.format(output_directory))
else:
if verbose:
print('Output file {} already exists'.format(output_csv))
return os.path.join(output_directory, bn + '.csv')

View File

@@ -1,190 +0,0 @@
"""
file_name: video_util
project_name: DBM
created: 2020-20-07
"""
import pandas as pd
import numpy as np
import glob
from dbm_lib.dbm_features.raw_features.util import util as ut
def smooth(x,window_len=11,window='hanning'):
"""smooth the data using a window with requested size.
This method is based on the convolution of a scaled window with the signal.
The signal is prepared by introducing reflected copies of the signal
(with the window size) in both ends so that transient parts are minimized
in the begining and end part of the output signal.
input:
x: the input signal
window_len: the dimension of the smoothing window; should be an odd integer
window: the type of window from 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'
flat window will produce a moving average smoothing.
output:
the smoothed signal
example:
t=linspace(-2,2,0.1)
x=sin(t)+randn(len(t))*0.1
y=smooth(x)
see also:
numpy.hanning, numpy.hamming, numpy.bartlett, numpy.blackman, numpy.convolve
scipy.signal.lfilter
TODO: the window parameter could be the window itself if an array instead of a string
NOTE: length(output) != length(input), to correct this: return y[(window_len/2-1):-(window_len/2)] instead of just y.
"""
if x.ndim != 1:
raise (ValueError, "smooth only accepts 1 dimension arrays.")
if x.size < window_len:
raise (ValueError, "Input vector needs to be bigger than window size.")
if window_len<3:
return x
if not window in ['flat', 'hanning', 'hamming', 'bartlett', 'blackman']:
raise (ValueError, "Window is on of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'")
s=np.r_[x[window_len-1:0:-1],x,x[-2:-window_len-1:-1]]
#print(len(s))
if window == 'flat': #moving average
w=np.ones(window_len,'d')
else:
w=eval('np.'+window+'(window_len)')
y=np.convolve(w/w.sum(),s,mode='valid')
return y[int(window_len/2):-int(window_len/2)]
def filter_by_confidence_and_thresh(x, fea, thresh):
if x['s_confidence'] > 0.2 and np.fabs(x[fea]) < thresh:
return x[fea]
else:
return np.NaN
def add_au_emotion(x, emotion,emotion_type,exp_type):
"""
computing individula emotion expressivity matrix
Args:
emotion: Action Unit
"""
error_reason = 'Pass'
if x['s_confidence'] > 0.8: #if using smooth, no need for 'success'
sum_r = 0
cnt = 0
for au in emotion:
au_c_label = " AU{:02d}_c".format(au)
au_r_label = " AU{:02d}_r".format(au)
if x[au_c_label]==1 and (not np.isnan(x[au_r_label])): #there are data with face in, but au_c=0
sum_r += x[au_r_label]
cnt += 6
if exp_type=='full' and x[au_c_label]==0: #Logic to compute emotion expressivity when all AU's are present
cnt = 0
break
if cnt > 0:
sum_r /= cnt
else:
sum_r = 0
v_emo = x[emotion_type] + sum_r
else:
v_emo = np.NaN
error_reason = 'confidence less than 80%'
return v_emo, error_reason
def add_au_occ(x, emotion,emotion_type):
"""
computing individula emotion presence
Args:
emotion: Action Unit
"""
au_pres = []
em_pres = 0
error_reason = 'Pass'
if x['s_confidence'] > 0.8: #if using smooth, no need for 'success'
for au in emotion:
au_c_label = " AU{:02d}_c".format(au)
if x[au_c_label]==1: #there are data with face in, but au_c=0
au_pres.append(1)
if len(au_pres) == len(emotion):
em_pres = 1
else:
em_pres = np.NaN
error_reason = 'confidence less than 80%'
return em_pres, error_reason
def emotion_exp(em_au,of,em_col,err_col):
"""
Computing individual emotion expressivity and adding it to dataframe
"""
for emotion in em_au:
of[[em_col[0],err_col]]=of.apply(add_au_emotion, args=(emotion,em_col[0],'partial',), axis=1, result_type='expand')
of[[em_col[1],err_col]]=of.apply(add_au_emotion, args=(emotion,em_col[1],'full',), axis=1, result_type='expand')
def emotion_pres(em_au,of,em_col,err_col):
"""
Computing individual emotion expressivity and adding it to dataframe
"""
for emotion in em_au:
of[[em_col,err_col]]=of.apply(add_au_occ, args=(emotion,em_col,), axis=1, result_type='expand')
def calc_of_for_video(of,face_cfg,fe_cfg):
"""
Creating dataframe for emotion expressivity
"""
new_cols = [fe_cfg.hap_exp,fe_cfg.sad_exp,fe_cfg.sur_exp,fe_cfg.fea_exp,fe_cfg.ang_exp,fe_cfg.dis_exp,fe_cfg.con_exp,
fe_cfg.pai_exp,fe_cfg.neg_exp,fe_cfg.pos_exp,fe_cfg.neu_exp,fe_cfg.com_lower_exp,fe_cfg.com_upper_exp,
fe_cfg.cai_exp,fe_cfg.com_exp,fe_cfg.happ_occ,fe_cfg.sad_occ,fe_cfg.sur_occ,fe_cfg.fea_occ,fe_cfg.ang_occ,
fe_cfg.dis_occ,fe_cfg.con_occ,fe_cfg.hap_exp_full,fe_cfg.sad_exp_full,fe_cfg.sur_exp_full,fe_cfg.fea_exp_full,
fe_cfg.ang_exp_full,fe_cfg.dis_exp_full,fe_cfg.con_exp_full,fe_cfg.pai_exp_full,fe_cfg.neg_exp_full,
fe_cfg.pos_exp_full,fe_cfg.neu_exp_full,fe_cfg.cai_exp_full,fe_cfg.com_lower_exp_full,fe_cfg.com_upper_exp_full,
fe_cfg.com_exp_full]
of[new_cols] = pd.DataFrame([[0] * len(new_cols)], index=of.index)
of[fe_cfg.err_reason] = 'Pass'
#Composite happiness expressivity
emotion_exp(face_cfg.happiness,of,[fe_cfg.hap_exp,fe_cfg.hap_exp_full],fe_cfg.err_reason)
#Composite sadness expressivity
emotion_exp(face_cfg.sadness,of,[fe_cfg.sad_exp,fe_cfg.sad_exp_full],fe_cfg.err_reason)
#Composite surprise expressivity
emotion_exp(face_cfg.surprise,of,[fe_cfg.sur_exp,fe_cfg.sur_exp_full],fe_cfg.err_reason)
#Composite fear expressivity
emotion_exp(face_cfg.fear,of,[fe_cfg.fea_exp,fe_cfg.fea_exp_full],fe_cfg.err_reason)
#Composite anger expressivity
emotion_exp(face_cfg.anger,of,[fe_cfg.ang_exp,fe_cfg.ang_exp_full],fe_cfg.err_reason)
#Composite disgust expressivity
emotion_exp(face_cfg.disgust,of,[fe_cfg.dis_exp,fe_cfg.dis_exp_full],fe_cfg.err_reason)
#Composite contempt expressivity
emotion_exp(face_cfg.contempt,of,[fe_cfg.con_exp,fe_cfg.con_exp_full],fe_cfg.err_reason)
#Composite Negative Expressivity
emotion_exp(face_cfg.NEG_ACTION_UNITS,of,[fe_cfg.neg_exp,fe_cfg.neg_exp_full],fe_cfg.err_reason)
#Composite Positive Expressivity
emotion_exp(face_cfg.POS_ACTION_UNITS,of,[fe_cfg.pos_exp,fe_cfg.pos_exp_full],fe_cfg.err_reason)
#Composite Neutral Expressivity
emotion_exp(face_cfg.NET_ACTION_UNITS,of,[fe_cfg.neu_exp,fe_cfg.neu_exp_full],fe_cfg.err_reason)
#Composite Activation Expressivity
emotion_exp(face_cfg.cai,of,[fe_cfg.cai_exp,fe_cfg.cai_exp_full],fe_cfg.err_reason)
#Composite Expressivity
emotion_exp(face_cfg.ACTION_UNITS,of,[fe_cfg.com_exp,fe_cfg.com_exp_full],fe_cfg.err_reason)
#Composite lower face expressivity
emotion_exp(face_cfg.LOWER_ACTION_UNITS,of,[fe_cfg.com_lower_exp,fe_cfg.com_lower_exp_full],fe_cfg.err_reason)
#Composite upper face Expressivity
emotion_exp(face_cfg.UPPER_ACTION_UNITS,of,[fe_cfg.com_upper_exp,fe_cfg.com_upper_exp_full],fe_cfg.err_reason)
#Composite pain expressivity
emotion_exp(face_cfg.pain,of,[fe_cfg.pai_exp,fe_cfg.pai_exp_full],fe_cfg.err_reason)
#AU happiness presence
emotion_pres(face_cfg.happiness,of,fe_cfg.happ_occ,fe_cfg.err_reason)
#AU Sad presence
emotion_pres(face_cfg.sadness,of,fe_cfg.sad_occ,fe_cfg.err_reason)
#AU Surprise presence
emotion_pres(face_cfg.surprise,of,fe_cfg.sur_occ,fe_cfg.err_reason)
#AU fear presence
emotion_pres(face_cfg.fear,of,fe_cfg.fea_occ,fe_cfg.err_reason)
#AU anger presence
emotion_pres(face_cfg.anger,of,fe_cfg.ang_occ,fe_cfg.err_reason)
#AU disgust presence
emotion_pres(face_cfg.disgust,of,fe_cfg.dis_occ,fe_cfg.err_reason)
#AU contempt presence
emotion_pres(face_cfg.contempt,of,fe_cfg.con_occ,fe_cfg.err_reason)

View File

@@ -1,14 +0,0 @@
"""
file_name: __init__
project_name: DBM
created: 2020-20-07
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
DBMLIB_PATH = os.path.dirname(__file__)
DBMLIB_FACE_CONFIG = os.path.abspath(os.path.join(DBMLIB_PATH, '../../../../resources/services/face_util.yml'))

View File

@@ -1,355 +0,0 @@
"""
file_name: face_asymmetry.py
project_name: DBM
created: 2020-20-07
"""
from mpl_toolkits import mplot3d
from matplotlib import pyplot as plt
import time
import numpy as np
import os
import datetime
import glob
import cv2
from scipy.spatial.transform import Rotation as R
import subprocess
import pandas as pd
from os.path import join
import logging
from dbm_lib.dbm_features.raw_features.video.face_config.face_config_reader import ConfigFaceReader
from dbm_lib.dbm_features.raw_features.util import video_util as vu
from dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
face_asym_dir = 'facial/face_asymmetry'
csv_ext = '_facasym.csv'
cv2_color_purple = (254,19,188)
color_blue = (0,0,1.0)
color_green = (0,1.0,0)
color_red = (1.0,0,0)
color_y = (1.0,1.0,0)
error_code_message = {
0: 'pass',
1: 'confidence less than 80%',
}
error_message_code = {y:x for x,y in error_code_message.items()}
def visualize_vid(fn, attr=None, write_out=False):
vid = cv2.VideoCapture(fn)
tot = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))
fps = vid.get(cv2.CAP_PROP_FPS)
frame_width = int(vid.get(3))
frame_height = int(vid.get(4))
if write_out:
fig_w = 680 #680 667 676 #frame_width in order of Ali, Vennessa, synthesis
fig_h = 659 #659 659 659 #frame_height
out_vid = cv2.VideoWriter('out.mp4',cv2.VideoWriter_fourcc(*'MP4V'), fps, (fig_w,fig_h))
plt.figure(figsize=(8, 8))
try:
frameid = 0
while(True):
ret, frame = vid.read()
if not ret:
# Release the Video Device if ret is false
vid.release()
print('Released Video Resource')
break
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frameid += 1
logger.info(frameid, frame.shape)
if 'lmks_frms' in attr:
lmks_frms = attr['lmks_frms']
for i in range(lmks_frms[frameid].shape[0]):
cv2.circle(frame,(int(lmks_frms[frameid][i,0]),int(lmks_frms[frameid][i,1])), 2, cv2_color_purple, -1)
if write_out:
cv2.putText(frame,'Frame: '+str(frameid), (10,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 3)
plt.subplot(211)
plt.imshow(frame)
plt.axis('off'); plt.pause(0.2);
if 'score_asym' in attr:
ax = plt.subplot(212)
ax.cla()
ax.set_xlim(0,140) #ax.set_xlim(0,300)
ax.set_ylim(0,10)
sa = attr['score_asym']
s = sa[np.where(sa[:,0] <= frameid),:][0,:,:]
for i in range(1,s.shape[1]):
plt.plot(s[:,0], s[:,i])
plt.legend(['mouth', 'eyebrow', 'eye', 'mouth+eye+eyebrow'])
plt.minorticks_on()
plt.grid(b=True, which='major', color='r', linestyle='-')
plt.grid(b=True, which='minor', color='r', linestyle='--')
plt.savefig('tmp.png', bbox_inches='tight')
print(cv2.imread('tmp.png').shape)
plt.clf()
if write_out:
out_vid.write(cv2.imread('tmp.png'))
except KeyboardInterrupt:
# Release the Video Device
vid.release()
if write_out:
out_vid.release()
logger.info('Exception, and Video Resource Released')
if write_out:
out_vid.release()
def retrieve_attr(of_df):
'''
Retrieve landmarks and pose_translation for each frame from openface output
Args:
of_df: dataframe output from openface, including detected landmark coordinates
Returns:
lmks_frms: dictionary, with frame id as key and 68 landmark set as value
pose_p: dictionary, with frame id as key and pose param as value
'''
tot_lmks = 68 # openface specific
if len([i for i in of_df.columns.to_list() if ' x_' in i]) != tot_lmks:
return {}
lmks_frms = {}
pose_p = {}
for fi in sorted(of_df['frame'].to_list()):
lmks = np.zeros((tot_lmks,6))
r = of_df[of_df['frame']==fi]
for i in range(tot_lmks):
lmk_y = r[' y_'+str(i)].iloc[0]
lmk_x = r[' x_'+str(i)].iloc[0]
lmk_X = r[' X_'+str(i)].iloc[0]
lmk_Y = r[' Y_'+str(i)].iloc[0]
lmk_Z = r[' Z_'+str(i)].iloc[0]
confi = r[' confidence']
lmks[i,:] = [lmk_x, lmk_y, lmk_X, lmk_Y, lmk_Z, confi]
lmks_frms[fi] = lmks
pose_p[fi] = [r[' pose_Tx'].iloc[0], r[' pose_Ty'].iloc[0], r[' pose_Tz'].iloc[0],
r[' pose_Rx'].iloc[0], r[' pose_Ry'].iloc[0], r[' pose_Rz'].iloc[0]]
return lmks_frms, pose_p
def mirror_point(a, b, c, d, x1, y1, z1):
# mirror a point w.r.t a 3D plane
k =(-a * x1-b * y1-c * z1-d)/float((a * a + b * b + c * c))
x2 = a * k + x1
y2 = b * k + y1
z2 = c * k + z1
x3 = 2 * x2-x1
y3 = 2 * y2-y1
z3 = 2 * z2-z1
return [x3, y3, z3]
def dist_vec2plane(vec, nrm):
# Calculate the projected length of a vector (vec) to a plane defined by its normal (nrm)
return np.sqrt(np.dot(vec, vec) - np.dot(vec, nrm)**2)
def vis_lmks3d(lmks_frms, vis_idx):
"""
Visualizing facial landmarks
"""
fig = plt.figure()
color_type = ['b','g','r','y','c']
assert len(color_type) > len(vis_idx)
for fi in sorted(list(lmks_frms.keys())):
ax = plt.axes(projection="3d")
for i,vi in enumerate(vis_idx):
ax.scatter(lmks_frms[fi][vi,2], lmks_frms[fi][vi,3], lmks_frms[fi][vi,4], c=color_type[i])
ax.axes.set_xlim3d(left=-75, right=100)
ax.axes.set_ylim3d(bottom=-200, top=25)
ax.axes.set_zlim3d(bottom=440, top=560)
ax.view_init(-89, -90) #elev, ariz
plt.title(str(fi)); ax.set_xlabel('X'); ax.set_ylabel('Y'); ax.set_zlabel('Z')
plt.pause(0.2)
plt.cla()
plt.draw()
def calc_fac_asymmetry(attr, is_vis=False):
'''
Quantify facial asymmetry
Args:
attr: attribute dictionary containing necessary features for calculation, e.g.,
lmks_frms: dictionary, with frame id as key and 68 landmark set (OpenFace) as value
pose_param: dictionary, with frame id as key and pose param as value
Returns:
score_asym: 2D array of size (num_frms, num_asymm_fea), with frame id as the 0th column, and each remaining column as one asymmetry feature
'''
# openface landmark indices
lmks_ref_idx = list(range(0,17)) + list(range(27,36))
lmks_mid_idx = [27,28,29,30,33,51,62,66,57,8]
lmks_rgt_idx = [0,1,2,3,4,5,6,7,
17,18,19,20,21,
36,37,38,39,40,41,
48,49,50,
59,58,
60,61,
67]
lmks_lft_idx = [16,15,14,13,12,11,10,9,
26,25,24,23,22,
45,44,43,42,47,46,
54,53,52,
55,56,
64,63,
65]
lmks_mth_idx = list(range(48,68))
lmks_ebr_idx = list(range(17,27))
lmks_eye_idx = list(range(36,48))
assert len(lmks_lft_idx)==len(lmks_rgt_idx)
fea_list = ['mouth', 'eyebrow', 'eye', 'composite']
score_asym = np.empty(shape=(0, 0))
if ('lmks_frms' in attr) and ('pose_param' in attr):
lmks_frms = attr['lmks_frms']
pose_p = attr['pose_param']
if is_vis:
vis_lmks3d(lmks_frms, [lmks_lft_idx, lmks_rgt_idx, lmks_mid_idx, lmks_ref_idx])
score_asym = np.zeros((len(lmks_frms),len(fea_list)+1+1)) # +1: extra column for error code
if is_vis:
fig = plt.figure()
ax = plt.axes(projection="3d")
for s,fi in enumerate(sorted(list(lmks_frms.keys()))):
lmks_3d = lmks_frms[fi][:,2:5]
pose = pose_p[fi]
err_code = error_message_code['pass']
if lmks_frms[fi][0,5] < 0.8:
err_code = error_message_code['confidence less than 80%']
score_asym[s,:] = [fi,np.NaN,np.NaN,np.NaN,np.NaN,err_code]
continue
rx = R.from_euler('x', pose[3])
ry = R.from_euler('y', pose[4])
rz = R.from_euler('z', pose[5])
vec_pose = rz.apply(ry.apply(rx.apply([0,0,1])))
anc_idx = [30, 27, 8] # for central plane estimation
nrm = np.cross(lmks_3d[anc_idx[2],:] - lmks_3d[anc_idx[0],:],
lmks_3d[anc_idx[1],:] - lmks_3d[anc_idx[0],:])
nrm = nrm / np.linalg.norm(nrm)
a,b,c = nrm
d = np.dot(nrm, lmks_3d[anc_idx[0],:])
dist_L2R_mth = []
dist_L2R_ebr = []
dist_L2R_eye = []
dist_com = []
lmks_rfl = np.empty((0,3))
src_idx = lmks_lft_idx
for k,idx in enumerate(src_idx):
p_rfl = np.array(mirror_point(a, b, c, -d, lmks_3d[idx,0], lmks_3d[idx,1], lmks_3d[idx,2]))
lmks_rfl = np.vstack((lmks_rfl, p_rfl))
dist = dist_vec2plane((p_rfl-lmks_3d[lmks_rgt_idx[k],:]), vec_pose)
if idx in lmks_mth_idx:
dist_L2R_mth.append(dist)
if idx in lmks_ebr_idx:
dist_L2R_ebr.append(dist)
if idx in lmks_eye_idx:
dist_L2R_eye.append(dist)
if (idx in lmks_mth_idx) or (idx in lmks_ebr_idx) or (idx in lmks_eye_idx):
dist_com.append(dist)
score_asym[s,:] = [fi,np.mean(dist_L2R_mth),np.mean(dist_L2R_ebr),np.mean(dist_L2R_eye),np.mean(dist_com),err_code]
if is_vis:
ax.scatter(lmks_3d[:,0], lmks_3d[:,1], lmks_3d[:,2])
ax.scatter(lmks_rfl[:,0], lmks_rfl[:,1], lmks_rfl[:,2], c='y')
ax.scatter(pose_p[fi][0], pose_p[fi][1], pose_p[fi][2], c='c')
plt.title('mirrored landmarks, frame: '+str(fi)); ax.set_xlabel('X'); ax.set_ylabel('Y'); ax.set_zlabel('Z')
plt.pause(0.2)
plt.cla()
plt.draw()
return score_asym
def calc_asym_feature(open_face_csv, f_cfg):
"""
Calculating facial asymmetry features and preparing final df
"""
df_list = []
of_df = pd.read_csv(open_face_csv, error_bad_lines=False)
lmks_frms, pose_p = retrieve_attr(of_df)
attr = {'lmks_frms': lmks_frms, 'pose_param': pose_p}
score_asym = calc_fac_asymmetry(attr)
df_score_asym = pd.DataFrame(score_asym, columns=['frame', f_cfg.fac_AsymMaskMouth, f_cfg.fac_AsymMaskEyebrow,
f_cfg.fac_AsymMaskEye, f_cfg.fac_AsymMaskCom, f_cfg.err_reason])
df_score_asym[f_cfg.err_reason] = df_score_asym[f_cfg.err_reason].apply(lambda x: error_code_message[x])
df_score_asym['frame'] = of_df['frame']
df_score_asym['face_id'] = of_df[' face_id']
df_score_asym['timestamp'] = of_df[' timestamp']
df_score_asym['confidence'] = of_df[' confidence']
df_score_asym['success'] = of_df[' success']
df_list.append(df_score_asym)
return df_list
def run_face_asymmetry(video_uri, out_dir, f_cfg):
"""
Processing all patient's for calculating facial asymmetry
---------------
---------------
Args:
video_uri: video path; f_cfg: face config object
out_dir: (str) Output directory for processed output
"""
try:
#Baseline logic
cfr = ConfigFaceReader()
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
of_csv_path = glob.glob(join(out_loc, fl_name + '_openface/*.csv'))
if len(of_csv_path)>0:
of_csv = of_csv_path[0]
asym_df_list = calc_asym_feature(of_csv, f_cfg)
asym_final_df = pd.concat(asym_df_list, ignore_index=True)
asym_final_df['dbm_master_url'] = video_uri
logger.info('Processing Output file {} '.format(os.path.join(out_loc, fl_name)))
ut.save_output(asym_final_df, out_loc, fl_name, face_asym_dir, csv_ext)
except Exception as e:
logger.error('Failed to process video file')

View File

@@ -1,98 +0,0 @@
"""
file_name: face_au.py
project_name: DBM
created: 2020-20-07
"""
import os
import numpy as np
import pandas as pd
import datetime
import glob
from os.path import join
import logging
from dbm_lib.dbm_features.raw_features.video.face_config.face_config_reader import ConfigFaceReader
from dbm_lib.dbm_features.raw_features.util import video_util as vu
from dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
face_au_dir = 'facial/face_au'
csv_ext = '_facau.csv'
def extract_col_nm_au(cols):
"""
Extract action unit (au) column names from openface output (csv)
Args:
cols: column names from open face output (csv)
Returns:
(list) list of au column names
"""
cols_lmk = []
au_tags = ' AU'
cols_au = [c for c in cols if au_tags in c]
return cols_au
def au_col_nm_map(df):
"""
Rename dataframe action unit column names to match functional specifications v1.0
Args:
df: dataframe
Returns:
dataframe with mapped variables
"""
dict_au_cols = {}
for col in list(df):
if ' AU' in col:
idx = col.rfind('_')
if idx > -1:
au_id = col[idx-2:idx]
if '_r' in col:
dict_au_cols[col] = 'fac_AU' + au_id + 'int'
if '_c' in col:
dict_au_cols[col] = 'fac_AU' + au_id + 'pres'
df.rename(columns=dict_au_cols, inplace=True)
return df
def run_face_au(video_uri, out_dir, f_cfg):
"""
Processing all patient's for fetching action units
---------------
---------------
Args:
video_uri: video path; f_cfg: face config object
out_dir: (str) Output directory for processed output
"""
try:
#Baseline logic
cfr = ConfigFaceReader()
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
of_csv_path = glob.glob(join(out_loc, fl_name + '_openface/*.csv'))
if len(of_csv_path)>0:
df_of = pd.read_csv(of_csv_path[0], error_bad_lines=False)
df_au = df_of[extract_col_nm_au(df_of)]
df_au = df_au.copy()
df_au['frame'] = df_of['frame']
df_au['face_id'] = df_of[' face_id']
df_au['timestamp'] = df_of[' timestamp']
df_au['confidence'] = df_of[' confidence']
df_au['success'] = df_of[' success']
df_au = au_col_nm_map(df_au)
df_au['dbm_master_url'] = video_uri
logger.info('Processing Output file {} '.format(os.path.join(out_loc, fl_name)))
ut.save_output(df_au, out_loc, fl_name, face_au_dir, csv_ext)
except Exception as e:
logger.error('Failed to process video file')

View File

@@ -1,86 +0,0 @@
"""
file_name: process_emotion_expressivity
project_name: DBM
created: 2020-20-07
"""
import os
import numpy as np
import pandas as pd
import datetime
import glob
from os.path import join
import logging
from dbm_lib.dbm_features.raw_features.video.face_config.face_config_reader import ConfigFaceReader
from dbm_lib.dbm_features.raw_features.util import video_util as vu
from dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
face_expr_dir = 'facial/face_expressivity'
csv_ext = '_facemo.csv'
#Openface feature extraction
def of_feature(df_of, cfr, f_cfg):
"""
Creating dataframe for face expressivity
Args:
of: open face attributes
Returns:
(list) list of expressivity score for emotions
"""
df_list = []
df_of['s_confidence'] = vu.smooth(df_of[' confidence'].values, window='flat').tolist()
if 'AU' in cfr.SELECTED_FEATURES :
vu.calc_of_for_video(df_of, cfr, f_cfg)
#Normalizing facial expressivity for Composite and Negative expr(Range 0 to 1)
if len(df_of[f_cfg.neg_exp])>0:
df_of[f_cfg.neg_exp] = df_of[f_cfg.neg_exp]/5
if len(df_of[f_cfg.neg_exp_full])>0:
df_of[f_cfg.neg_exp_full] = df_of[f_cfg.neg_exp_full]/5
if len(df_of[f_cfg.com_exp])>0:
df_of[f_cfg.com_exp] = df_of[f_cfg.com_exp]/7
if len(df_of[f_cfg.com_exp_full])>0:
df_of[f_cfg.com_exp_full] = df_of[f_cfg.com_exp_full]/7
df_list.append(df_of)
return df_list
def run_face_expressivity(video_uri, out_dir, f_cfg):
"""
Processing all patient's for fetching facial landmarks
---------------
---------------
Args:
video_uri: video path; f_cfg: raw variable config object
out_dir: (str) Output directory for processed output
"""
try:
#Baseline logic
cfr = ConfigFaceReader()
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
of_csv_path = glob.glob(join(out_loc, fl_name + '_openface/*.csv'))
if len(of_csv_path)>0:
df_of = pd.read_csv(of_csv_path[0], error_bad_lines=False)
df_of = df_of[cfr.AU_fl]
expr_df_list = of_feature(df_of, cfr, f_cfg)
exp_final_df = pd.concat(expr_df_list, ignore_index=True)
exp_final_df['dbm_master_url'] = video_uri
logger.info('Processing Output file {} '.format(os.path.join(out_loc, fl_name)))
ut.save_output(exp_final_df, out_loc, fl_name, face_expr_dir, csv_ext)
except Exception as e:
logger.error('Failed to process video file')

View File

@@ -1,122 +0,0 @@
"""
file_name: face_landmark
project_name: DBM
created: 2020-20-07
"""
import os
import numpy as np
import pandas as pd
import datetime
import glob
from os.path import join
import logging
from dbm_lib.dbm_features.raw_features.video.face_config.face_config_reader import ConfigFaceReader
from dbm_lib.dbm_features.raw_features.util import video_util as vu
from dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
face_lmk_dir = 'facial/face_landmark'
csv_ext = '_faclmk.csv'
def extract_col_nm_lmk(cols):
"""
Extract landmark column names from openface output (csv)
Args:
cols: column names from open face output (csv)
Returns:
(list) list of landmark column names
"""
cols_lmk = []
lmk_tags = [' y_', ' x_', ' X_', ' Y_', ' Z_']
for c in cols:
if any(t in c for t in lmk_tags):
cols_lmk.append(c)
return cols_lmk
def lmk_col_nm_map(df):
"""
Rename dataframe landmark column names to match functional specifications v1.0
Args:
df: dataframe
"""
dict_lmk_cols = {}
for col in list(df):
idx = col.rfind('_')+1
if idx > 0:
lmk_id = col[idx:] if len(col[idx:])>1 else '0'+col[idx:]
if ' y_' in col:
dict_lmk_cols[col] = 'fac_LMK' + lmk_id + 'r'
if ' x_' in col:
dict_lmk_cols[col] = 'fac_LMK' + lmk_id + 'c'
if ' X_' in col:
dict_lmk_cols[col] = 'fac_LMK' + lmk_id + 'X'
if ' Y_' in col:
dict_lmk_cols[col] = 'fac_LMK' + lmk_id + 'Y'
if ' Z_' in col:
dict_lmk_cols[col] = 'fac_LMK' + lmk_id + 'Z'
df.rename(columns=dict_lmk_cols, inplace=True)
return df
def add_disp_3D(df):
"""
Add 3D displacement for each landmark
Args:
df: landmark dataframe
"""
df = df.sort_values(by=['frame'], ascending=False)
cols_lmk = [col for col in list(df) if 'fac_LMK' in col]
df_t = df[cols_lmk]
df_diff = df_t.diff()
df_diff = df_diff.pow(2)
tot_lmk = 68 # 68 landmark model
for i in range(tot_lmk):
lmk_id = '{:02d}'.format(i)
df['fac_LMK'+lmk_id+'disp'] = df_diff[['fac_LMK'+lmk_id+'X', 'fac_LMK'+lmk_id+'Y', 'fac_LMK'+lmk_id+'Z']].sum(axis=1).apply(np.sqrt)
return df
def run_face_landmark(video_uri, out_dir, f_cfg):
"""
Processing all patient's for fetching facial landmarks
---------------
---------------
Args:
video_uri: video path; f_cfg: raw variable config object
out_dir: (str) Output directory for processed output
"""
try:
#Baseline logic
cfr = ConfigFaceReader()
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
of_csv_path = glob.glob(join(out_loc, fl_name + '_openface/*.csv'))
if len(of_csv_path)>0:
df_of = pd.read_csv(of_csv_path[0], error_bad_lines=False)
df_lmk = df_of[extract_col_nm_lmk(df_of)]
df_lmk = df_lmk.copy()
df_lmk['frame'] = df_of['frame']
df_lmk['face_id'] = df_of[' face_id']
df_lmk['timestamp'] = df_of[' timestamp']
df_lmk['confidence'] = df_of[' confidence']
df_lmk['success'] = df_of[' success']
df_lmk = lmk_col_nm_map(df_lmk)
df_lmk = add_disp_3D(df_lmk)
df_lmk['dbm_master_url'] = video_uri
logger.info('Processing Output file {} '.format(join(out_loc, fl_name)))
ut.save_output(df_lmk, out_loc, fl_name, face_lmk_dir, csv_ext)
except Exception as e:
logger.error('Failed to process video file')

4
opendbm/__init__.py Normal file
View File

@@ -0,0 +1,4 @@
from opendbm.api_lib.facial_activity import FacialActivity
from opendbm.api_lib.movement import Movement
from opendbm.api_lib.speech import Speech
from opendbm.api_lib.verbal_acoustics import VerbalAcoustics

View File

@@ -0,0 +1 @@
from .model import DEEEPSPEECH_URL, DEEPSPEECH_MODELS, OPENDBM_DATA, AudioModel

View File

@@ -0,0 +1 @@
from opendbm.api_lib.facial_activity.api import FacialActivity

View File

@@ -0,0 +1,49 @@
import tempfile
from opendbm.api_lib.model import VideoModel
from opendbm.dbm_lib.dbm_features.raw_features.video.face_au import run_face_au
class ActionUnit(VideoModel):
def __init__(self):
super().__init__()
self._params = [
"fac_AU01int",
"fac_AU02int",
"fac_AU04int",
"fac_AU05int",
"fac_AU06int",
"fac_AU07int",
"fac_AU09int",
"fac_AU10int",
"fac_AU12int",
"fac_AU14int",
"fac_AU15int",
"fac_AU17int",
"fac_AU20int",
"fac_AU23int",
"fac_AU25int",
"fac_AU26int",
"fac_AU45int",
"fac_AU01pres",
"fac_AU02pres",
"fac_AU04pres",
"fac_AU05pres",
"fac_AU06pres",
"fac_AU07pres",
"fac_AU09pres",
"fac_AU10pres",
"fac_AU12pres",
"fac_AU14pres",
"fac_AU15pres",
"fac_AU17pres",
"fac_AU20pres",
"fac_AU23pres",
"fac_AU25pres",
"fac_AU26pres",
"fac_AU28pres",
"fac_AU45pres",
]
def _fit_transform(self, path):
return run_face_au(path, f"{tempfile.gettempdir()}/", self.r_config, save=False)

View File

@@ -0,0 +1,20 @@
import tempfile
from opendbm.api_lib.model import VideoModel
from opendbm.dbm_lib import run_face_asymmetry
class Asymmetry(VideoModel):
def __init__(self):
super().__init__()
self._params = [
"fac_asymmaskmouth",
"fac_asymmaskeye",
"fac_asymmaskeyebrow",
"fac_asymmaskcom",
]
def _fit_transform(self, path):
return run_face_asymmetry(
path, f"{tempfile.gettempdir()}/", self.r_config, save=False
)

View File

@@ -0,0 +1,89 @@
import tempfile
from opendbm.api_lib.model import VideoModel
from opendbm.dbm_lib.dbm_features.raw_features.video import face_emotion_expressivity
class Expressivity(VideoModel):
def __init__(self):
super().__init__()
self._params = [
" AU01_r",
" AU02_r",
" AU04_r",
" AU05_r",
" AU06_r",
" AU07_r",
" AU09_r",
" AU10_r",
" AU12_r",
" AU14_r",
" AU15_r",
" AU17_r",
" AU20_r",
" AU25_r",
" AU26_r",
" AU45_r",
" AU01_c",
" AU02_c",
" AU04_c",
" AU05_c",
" AU06_c",
" AU07_c",
" AU10_c",
" AU12_c",
" AU14_c",
" AU15_c",
" AU17_c",
" AU20_c",
" AU23_c",
" AU25_c",
" AU26_c",
" AU28_c",
" AU45_c",
" AU09_c",
" AU23_r",
"s_confidence",
"fac_hapintsoft",
"fac_sadintsoft",
"fac_surintsoft",
"fac_feaintsoft",
"fac_angintsoft",
"fac_disintsoft",
"fac_conintsoft",
"fac_paiintsoft",
"fac_negintsoft",
"fac_posintsoft",
"neu_exp",
"fac_comlowintsoft",
"fac_comuppintsoft",
"cai_exp",
"fac_comintsoft",
"fac_happres",
"fac_sadpres",
"fac_surpres",
"fac_feapres",
"fac_angpres",
"fac_dispres",
"fac_conpres",
"fac_hapinthard",
"fac_sadinthard",
"fac_surinthard",
"fac_feainthard",
"fac_anginthard",
"fac_disinthard",
"fac_coninthard",
"fac_paiinthard",
"fac_neginthard",
"fac_posinthard",
"neu_exp_full",
"cai_exp_full",
"fac_comlowinthard",
"fac_comuppinthard",
"fac_cominthard",
]
def _fit_transform(self, path):
return face_emotion_expressivity.run_face_expressivity(
path, f"{tempfile.gettempdir()}/", self.r_config, save=False
)

View File

@@ -0,0 +1,24 @@
import tempfile
from opendbm.api_lib.model import VideoModel
from opendbm.dbm_lib import run_face_landmark
def r_num_fmt(fmt, rnum):
return list(map(lambda x: fmt.format(i="%02d" % x), rnum))
lcols = []
for vr in ["r", "c", "X", "Y", "Z"]:
lcols += r_num_fmt(f"fac_LMK{{i}}{vr}", range(68))
class Landmark(VideoModel):
def __init__(self):
super().__init__()
self._params = lcols
def _fit_transform(self, path):
return run_face_landmark(
path, f"{tempfile.gettempdir()}/", self.r_config, save=False
)

View File

@@ -0,0 +1,84 @@
import shutil
import tempfile
from collections import OrderedDict
from opendbm.api_lib.model import VideoModel
from opendbm.api_lib.util import check_isfile
from ._action_unit import ActionUnit
from ._asymmetry import Asymmetry
from ._expressivity import Expressivity
from ._landmark import Landmark
class FacialActivity(VideoModel):
def __init__(self):
super().__init__()
self._landmark = Landmark()
self._action_unit = ActionUnit()
self._asymmetry = Asymmetry()
self._expressivity = Expressivity()
self._models = OrderedDict(
{
"landmark": self._landmark,
"action_unit": self._action_unit,
"asymmetry": self._asymmetry,
"expressivity": self._expressivity,
}
)
def fit(self, path):
"""Fit a file in filepath to OpenFace Model. Make sure to set the Docker to be active first.
For installation, see https://aicure.github.io/open_dbm/docs/openface-docker-installation
Parameters
----------
path : string,
File Path of MP4/MOV file.
"""
check_isfile(path)
result_path, bn = super()._fit(path, "facial")
for k, v in self._models.items():
v._df = v._fit_transform(result_path)
shutil.rmtree(f"{tempfile.gettempdir()}/{bn}/")
def get_landmark(self):
"""
Get the model object of Landmark
Returns:
self: object
Model Object
"""
return self._landmark
def get_action_unit(self):
"""
Get the model object of Action Unit
Returns:
self: object
Model Object
"""
return self._action_unit
def get_asymmetry(self):
"""
Get the model object of Facial Asymmetry
Returns:
self: object
Model Object
"""
return self._asymmetry
def get_expressivity(self):
"""
Get the model object of Facial Expressivity
Returns:
self: object
Model Object
"""
return self._expressivity

205
opendbm/api_lib/model.py Normal file
View File

@@ -0,0 +1,205 @@
import os
import platform
import subprocess
import tempfile
from pathlib import Path
from opendbm.api_lib.util import docker_command_dec, wsllize
from opendbm.dbm_lib import config_derive_feature, config_raw_feature, config_reader
OPENFACE_PATH_VIDEO = "pkg/open_dbm/OpenFace/build/bin/FaceLandmarkVid"
OPENFACE_PATH = "pkg/open_dbm/OpenFace/build/bin/FeatureExtraction"
DEEEPSPEECH_URL = "https://github.com/mozilla/DeepSpeech/releases/download/v0.9.1"
DEEPSPEECH_MODELS = ["deepspeech-0.9.1-models.pbmm", "deepspeech-0.9.1-models.scorer"]
MODEL_PATH = os.path.dirname(__file__)
OPENDBM_DATA = Path.home() / ".opendbm"
DLIB_SHAPE_MODEL = os.path.abspath(
os.path.join(
MODEL_PATH, "../pkg/shape_detector/shape_predictor_68_face_landmarks.dat"
)
)
FACIAL_ACTIVITY_ARGS = [
"-q",
"-2Dfp",
"-3Dfp",
"-pdmparams",
"-pose",
"-aus",
"-gaze",
"-f",
]
class Model(object):
def __init__(self):
self.s_config = config_reader.ConfigReader()
self.r_config = config_raw_feature.ConfigRawReader()
self.d_config = config_derive_feature.ConfigDeriveReader()
self._df = None
self._params = []
def to_dataframe(self):
"""
Convert the result of the processed data into dataframe.
Returns:
pandas dataframe
"""
if self._df is None:
raise Exception("Model has not been fit yet")
else:
return self._df
def mean(self):
"""
get mean/average of data
Returns:
pandas.Series
"""
return self._df[self._params].mean()
def std(self):
"""
get std of data
Returns:
pandas.Series
"""
return self._df[self._params].std()
class VideoModel(Model):
"""
A class to process the data of facial and Movement.
"""
def __init__(self):
super().__init__()
@docker_command_dec
def _fit(self, path, dbm_group):
"""
A function where the model is processing the data.
The model lived in the docker image,
where the full path of the model is stated in a variable named openface_call
Args:
path: input path of the file
dbm_group: self-explanatory. This function only accept dbm_group of facial and movement.
Returns:
output path of the processed file by the model.
"""
docker_temp_dir = "/app/tmp/"
wsl_cmd, temp_dir = wsllize((tempfile.gettempdir()))
filename = os.path.basename(path)
bn, _ = os.path.splitext(filename)
facial_args = " ".join(FACIAL_ACTIVITY_ARGS)
docker_call = wsl_cmd + ["docker", "exec", "dbm_container", "/bin/bash", "-c"]
openface_call = [
docker_call
+ [f"{OPENFACE_PATH} {facial_args} {path} -out_dir {docker_temp_dir}"],
docker_call
+ [
f"{OPENFACE_PATH_VIDEO} {facial_args} {path} -out_dir {docker_temp_dir}"
],
]
out_dir_openface = [
f"{temp_dir}/{bn}/{bn}_openface/",
f"{temp_dir}/{bn}_landmark_output/{bn}_landmark_output_openface_lmk/",
]
result_path = [
docker_temp_dir + bn + ".csv",
docker_temp_dir + bn + "_landmark_output.csv",
]
if dbm_group == "facial":
openface_csv = self._processing_video(
dbm_group,
openface_call[0],
out_dir_openface[0],
result_path[0],
wsl_cmd,
temp_dir,
bn,
)
return openface_csv, bn
else:
openface_csv = self._processing_video(
"facial",
openface_call[0],
out_dir_openface[0],
result_path[0],
wsl_cmd,
temp_dir,
bn,
)
openface_lmk_csv = self._processing_video(
"movement",
openface_call[1],
out_dir_openface[1],
result_path[1],
wsl_cmd,
temp_dir,
bn,
)
return openface_csv, openface_lmk_csv, bn
def _processing_video(
self, dbm_group, call, out_dir, result_path, wsl_cmd, temp_dir, bn
):
"""
Helper function for _fit method
"""
subprocess.Popen(
call,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
stdin=subprocess.PIPE,
).wait()
mkdir_cmd = wsl_cmd + ["mkdir", "-p", out_dir]
copy_cmd = wsl_cmd + ["docker", "cp", f"dbm_container:/{result_path}", out_dir]
subprocess.Popen(
mkdir_cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
stdin=subprocess.PIPE,
).wait()
subprocess.Popen(
copy_cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
stdin=subprocess.PIPE,
).wait()
if platform.system() == "Windows":
path_in_temp = out_dir[len(temp_dir) :]
out_dir = (tempfile.gettempdir()) + path_in_temp
if dbm_group == "facial":
return out_dir + bn + ".csv"
else:
return out_dir + bn + "_landmark_output.csv"
class AudioModel(Model):
"""
A class to process the data of speech and acoustic
"""
def __init__(self):
super().__init__()
def prep_func(func):
def wrapper(self, *args, **kwargs):
path = args[0]
df = func(self, path, **kwargs)
return df
return wrapper

View File

@@ -0,0 +1 @@
from opendbm.api_lib.movement.api import Movement

View File

@@ -0,0 +1,11 @@
from opendbm.api_lib.model import DLIB_SHAPE_MODEL, VideoModel
from opendbm.dbm_lib.dbm_features.raw_features.movement.eye_blink import run_eye_blink
class EyeBlink(VideoModel):
def __init__(self):
super().__init__()
self._params = ["mov_blink_ear", "mov_blinkframes", "mov_blinkdur", "fps"]
def _fit_transform(self, path):
return run_eye_blink(path, ".", self.r_config, DLIB_SHAPE_MODEL, save=False)

View File

@@ -0,0 +1,24 @@
import tempfile
from opendbm.api_lib.model import VideoModel
from opendbm.dbm_lib.dbm_features.raw_features.movement.eye_gaze import run_eye_gaze
class EyeGaze(VideoModel):
def __init__(self):
super().__init__()
self._params = [
"mov_lefteyex",
"mov_lefteyey",
"mov_lefteyez",
"mov_righteyex",
"mov_righteyey",
"mov_righteyez",
"mov_leyedisp",
"mov_reyedisp",
]
def _fit_transform(self, path):
return run_eye_gaze(
path, f"{tempfile.gettempdir()}/", self.r_config, save=False
)

View File

@@ -0,0 +1,52 @@
import tempfile
from opendbm.api_lib.model import VideoModel
from opendbm.dbm_lib import fac_tremor_process
class FacialTremor(VideoModel):
def __init__(self):
super().__init__()
self._params = [
"fac_features_mean_5",
"fac_tremor_median_5",
"fac_disp_median_5",
"fac_corr_5",
"fac_features_mean_12",
"fac_tremor_median_12",
"fac_disp_median_12",
"fac_corr_12",
"fac_features_mean_8",
"fac_tremor_median_8",
"fac_disp_median_8",
"fac_corr_8",
"fac_features_mean_48",
"fac_tremor_median_48",
"fac_disp_median_48",
"fac_corr_48",
"fac_features_mean_54",
"fac_tremor_median_54",
"fac_disp_median_54",
"fac_corr_54",
"fac_features_mean_28",
"fac_tremor_median_28",
"fac_disp_median_28",
"fac_corr_28",
"fac_features_mean_51",
"fac_tremor_median_51",
"fac_disp_median_51",
"fac_corr_51",
"fac_features_mean_66",
"fac_tremor_median_66",
"fac_disp_median_66",
"fac_corr_66",
"fac_features_mean_57",
"fac_tremor_median_57",
"fac_disp_median_57",
"fac_corr_57",
]
def _fit_transform(self, path):
return fac_tremor_process(
path, f"{tempfile.gettempdir()}/", self.r_config, save=False
)

View File

@@ -0,0 +1,19 @@
import tempfile
from opendbm.api_lib.model import VideoModel
from opendbm.dbm_lib import run_head_movement
class HeadMovement(VideoModel):
def __init__(self):
super().__init__()
self._params = [
"mov_headvel",
"mov_hposepitch",
"mov_hposeyaw",
"mov_hposeroll",
"mov_hposedist",
]
def _fit_transform(self, path):
return run_head_movement(path, f"{tempfile.gettempdir()}/", self.r_config)

View File

@@ -0,0 +1,18 @@
from opendbm.api_lib.model import VideoModel
from opendbm.dbm_lib.dbm_features.raw_features.movement.voice_tremor import run_vtremor
class VocalTremor(VideoModel):
def __init__(self):
super().__init__()
self._params = [
"mov_freqtremfreq",
"mov_freqtremindex",
"mov_freqtrempindex",
"mov_amptremfreq",
"mov_amptremindex",
"mov_amptrempindex",
]
def _fit_transform(self, path):
return run_vtremor(path, ".", self.r_config, save=False)

View File

@@ -0,0 +1,105 @@
import os
import shutil
import tempfile
from collections import OrderedDict
from opendbm.api_lib.model import VideoModel
from opendbm.api_lib.util import check_isfile
from opendbm.dbm_lib.controller import process_feature as pf
from ._eye_blink import EyeBlink
from ._eye_gaze import EyeGaze
from ._facial_tremor import FacialTremor
from ._head_movement import HeadMovement
from ._vocal_tremor import VocalTremor
class Movement(VideoModel):
def __init__(self):
super().__init__()
self._eye_blink = EyeBlink()
self._eye_gaze = EyeGaze()
self._facial_tremor = FacialTremor()
self._head_movement = HeadMovement()
self._vocal_tremor = VocalTremor()
self._models = OrderedDict(
{
"eye_blink": self._eye_blink,
"eye_gaze": self._eye_gaze,
"facial_tremor": self._facial_tremor,
"head_movement": self._head_movement,
"vocal_tremor": self._vocal_tremor,
}
)
def fit(self, path):
"""Fit a file in filepath to OpenFace Model. Make sure to set the Docker to be active first.
For installation, see https://aicure.github.io/open_dbm/docs/openface-docker-installation
Parameters
----------
path : string,
File Path of MP4/MOV file.
"""
check_isfile(path)
result_path, result_path_lmk, bn = super()._fit(path, "movement")
wav_path = pf.audio_to_wav(path, tmp=True)
for k, v in self._models.items():
if k in ["eye_gaze", "head_movement"]:
v._df = v._fit_transform(result_path)
elif k == "facial_tremor":
v._df = v._fit_transform(result_path_lmk)
elif k == "vocal_tremor":
v._df = v._fit_transform(wav_path)
else:
v._df = v._fit_transform(path)
shutil.rmtree(f"{tempfile.gettempdir()}/{bn}/")
shutil.rmtree(f"{tempfile.gettempdir()}/{bn}_landmark_output/")
os.remove(wav_path)
def get_eye_blink(self):
"""
Get the model object of Eye Blink
Returns:
self: object
"""
return self._eye_blink
def get_eye_gaze(self):
"""
Get the model object of Eye Gaze
Returns:
self: object
Model Object
"""
return self._eye_gaze
def get_facial_tremor(self):
"""
Get the model object of Facial Tremor
Returns:
self: object
Model Object
"""
return self._facial_tremor
def get_head_movement(self):
"""
Get the model object of Head Movement
Returns:
self: object
Model Object
"""
return self._head_movement
def get_vocal_tremor(self):
"""
Get the model object of Vocal Tremor
Returns:
self: object
Model Object
"""
return self._vocal_tremor

View File

@@ -0,0 +1 @@
from opendbm.api_lib.speech.api import Speech

View File

@@ -0,0 +1,34 @@
import tempfile
from opendbm.api_lib.model import OPENDBM_DATA, AudioModel
from opendbm.dbm_lib import run_speech_feature
class SpeechFeature(AudioModel):
def __init__(self):
super().__init__()
self._params = [
"nlp_numSentences",
"nlp_singPronPerAns",
"nlp_singPronPerSen",
"nlp_pastTensePerAns",
"nlp_pastTensePerSen",
"nlp_pronounsPerAns",
"nlp_pronounsPerSen",
"nlp_verbsPerAns",
"nlp_verbsPerSen",
"nlp_adjectivesPerAns",
"nlp_adjectivesPerSen",
"nlp_nounsPerAns",
"nlp_nounsPerSen",
"nlp_sentiment_mean",
"nlp_mattr",
"nlp_wordsPerMin",
"nlp_totalTime",
]
@AudioModel.prep_func
def _fit_transform(self, path):
return run_speech_feature(
path, f"{tempfile.gettempdir()}/", self.r_config, OPENDBM_DATA, save=False
)

View File

@@ -0,0 +1,16 @@
import tempfile
from opendbm.api_lib.model import OPENDBM_DATA, AudioModel
from opendbm.dbm_lib.dbm_features.raw_features.nlp.transcribe import run_transcribe
class Transcribe(AudioModel):
def __init__(self):
super().__init__()
self._params = ["nlp_transcribe", "nlp_totalTime"]
@AudioModel.prep_func
def _fit_transform(self, path):
return run_transcribe(
path, f"{tempfile.gettempdir()}/", self.r_config, OPENDBM_DATA, save=False
)

View File

@@ -0,0 +1,73 @@
import logging
import os
from opendbm.api_lib import DEEEPSPEECH_URL, DEEPSPEECH_MODELS, OPENDBM_DATA, AudioModel
from opendbm.api_lib.util import check_file, check_isfile, download_url
from ._speech_features import SpeechFeature
from ._transcribe import Transcribe
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
class Speech(AudioModel):
def __init__(self):
super().__init__()
self._transcribe = Transcribe()
self._speech_features = SpeechFeature()
self._models = {
"transcribe": self._transcribe,
"speech_features": self._speech_features,
}
def fit(self, path):
"""Fit a file in filepath to Deepspeech Model.
Parameters
----------
path : string,
File Path of Video/Sound file format.
"""
check_isfile(path)
self._check_model_exists()
path, is_wav = check_file(path)
for v in self._models.values():
v._df = v._fit_transform(path)
if not is_wav:
os.remove(path)
@staticmethod
def _check_model_exists():
"""
Check if deepspeech model is exists. if not, download to OPENDBM Directory.
"""
if not OPENDBM_DATA.exists():
os.mkdir(OPENDBM_DATA)
for dm in DEEPSPEECH_MODELS:
pt = OPENDBM_DATA / dm
if not pt.exists():
logger.info(
f"{dm} not exists. Automatically downloading to {OPENDBM_DATA}/"
)
download_url(f"{DEEEPSPEECH_URL}/{dm}", pt)
else:
continue
def get_transcribe(self):
"""
Get the model object of Transcribe
Returns:
self: object
Model Object
"""
return self._transcribe
def get_speech_features(self):
"""
Get the model object of Speech Features
Returns:
self: object
Model Object
"""
return self._speech_features

196
opendbm/api_lib/util.py Normal file
View File

@@ -0,0 +1,196 @@
# import urllib, os
import logging
import os
import platform
import subprocess
import tempfile
import urllib.request as ur
from tqdm import tqdm
from opendbm.dbm_lib.controller import process_feature as pf
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
# urllib = getattr(urllib, 'request', urllib)
class TqdmUpTo(tqdm):
"""Provides `update_to(n)` which uses `tqdm.update(delta_n)`."""
def update_to(self, b=1, bsize=1, tsize=None):
"""
b : int, optional
Number of blocks transferred so far [default: 1].
bsize : int, optional
Size of each block (in tqdm units) [default: 1].
tsize : int, optional
Total size (in tqdm units). If [default: None] remains unchanged.
"""
if tsize is not None:
self.total = tsize
return self.update(b * bsize - self.n) # also sets self.n = b * bsize
def download_url(url, local_path):
"""
Function to download url and drop it to the local path
"""
with TqdmUpTo(
unit="B",
unit_scale=True,
unit_divisor=1024,
miniters=1,
desc=url.split("/")[-1],
) as t: # all optional kwargs
ur.urlretrieve(url, filename=local_path, reporthook=t.update_to, data=None)
t.total = t.n
def wsllize(path):
"""
Add WSL prefix if the platform using windows.
This function will also convert input path to wsl structure based on given path.
Args:
path: path of the input data
Returns:
wsl prefix
"""
if platform.system() == "Windows":
wsl_cmd = ["wsl"]
path = subprocess.check_output(["wsl", "wslpath", repr(path)]).decode("utf-8")
if path.endswith("\n"):
path = path[:-1]
return wsl_cmd, path
else:
return [], path
def check_isfile(path):
if not os.path.isfile(path):
raise FileNotFoundError("File not found. Make sure specify the correct path")
def check_file(path):
"""
Check if file is in wav format. if not, convert to wav.
Args:
path: Input path
Returns:
path: output path of the new wav file
bool: returns True if file is wav format
"""
return (
(pf.audio_to_wav(path, tmp=True), False)
if not path.endswith(".wav")
else (path, True)
)
def check_docker_model_exist(wsl_cmd, model_name):
"""
check if docker model is present or not.
Args:
wsl_cmd: wsl prefix is platform is Windows
model_name: self-explanatory
"""
try:
check_docker_model_exist = subprocess.check_output(
wsl_cmd + ["docker", "image", "ls"]
).decode("utf-8")
if model_name not in check_docker_model_exist:
raise FileNotFoundError(
f"""
{model_name} model not found. Make sure to
download the model first. For further instruction about download,
please see our web documentation.
"""
)
except subprocess.CalledProcessError:
raise EnvironmentError("Make sure to set the Docker to be active")
def docker_command_dec(fn):
"""
Decorator to execute model in Docker environment.
Starting the container and exit state is handled here
Args:
fn: any fn that need to access docker
Returns:
decorated fn
"""
import os
def inner(*args, **kwargs):
wsl_cmd, path = wsllize((args[1]))
check_docker_model_exist(wsl_cmd, "dbm-openface")
create_docker = wsl_cmd + [
"docker",
"create",
"-ti",
"--name",
"dbm_container",
"dbm-openface",
"bash",
]
copy_file_to_docker = wsl_cmd + ["docker", "cp", path, "dbm_container:/app/"]
start_container = wsl_cmd + ["docker", "start", "dbm_container"]
terminate_container = wsl_cmd + ["docker", "stop", "dbm_container"]
remove_container = wsl_cmd + ["docker", "rm", "dbm_container"]
subprocess.Popen(
create_docker,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
stdin=subprocess.PIPE,
).wait()
subprocess.Popen(
copy_file_to_docker,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
stdin=subprocess.PIPE,
).wait()
subprocess.Popen(
start_container,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
stdin=subprocess.PIPE,
).wait()
try:
args = args[0], "/app/" + os.path.basename(args[1]), args[2]
result = fn(*args, **kwargs)
return result
except Exception as e:
logger.info(f"Failed: {e}")
finally:
subprocess.Popen(
terminate_container,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
stdin=subprocess.PIPE,
).wait()
subprocess.Popen(
remove_container,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
stdin=subprocess.PIPE,
).wait()
return inner

View File

@@ -0,0 +1 @@
from opendbm.api_lib.verbal_acoustics.api import VerbalAcoustics

View File

@@ -0,0 +1,12 @@
from opendbm.api_lib.model import AudioModel
from opendbm.dbm_lib import run_intensity
class AudioIntensity(AudioModel):
def __init__(self):
super().__init__()
self._params = ["aco_int"]
@AudioModel.prep_func
def _fit_transform(self, path, **kwargs):
return run_intensity(path, ".", self.r_config, save=False)

View File

@@ -0,0 +1,14 @@
import pandas as pd
from opendbm.api_lib.model import AudioModel
from opendbm.dbm_lib import run_formant
class FormantFrequency(AudioModel):
def __init__(self):
super().__init__()
self._params = ["aco_fm1", "aco_fm2", "aco_fm3", "aco_fm4"]
@AudioModel.prep_func
def _fit_transform(self, path, **kwargs):
return run_formant(path, ".", self.r_config, save=False)

View File

@@ -0,0 +1,14 @@
import pandas as pd
from opendbm.api_lib.model import AudioModel
from opendbm.dbm_lib.dbm_features.raw_features.audio.gne import run_gne
class GlottalNoiseRatio(AudioModel):
def __init__(self):
super().__init__()
self._params = ["aco_gne"]
@AudioModel.prep_func
def _fit_transform(self, path, **kwargs):
return run_gne(path, ".", self.r_config, save=False, ff_df=kwargs["ff_df"])

View File

@@ -0,0 +1,14 @@
import pandas as pd
from opendbm.api_lib.model import AudioModel
from opendbm.dbm_lib.dbm_features.raw_features.audio.hnr import run_hnr
class HarmonicsNoiseRatio(AudioModel):
def __init__(self):
super().__init__()
self._params = ["aco_hnr"]
@AudioModel.prep_func
def _fit_transform(self, path, **kwargs):
return run_hnr(path, ".", self.r_config, save=False)

View File

@@ -0,0 +1,14 @@
import pandas as pd
from opendbm.api_lib.model import AudioModel
from opendbm.dbm_lib.dbm_features.raw_features.audio.jitter import run_jitter
class Jitter(AudioModel):
def __init__(self):
super().__init__()
self._params = ["aco_jitter"]
@AudioModel.prep_func
def _fit_transform(self, path, **kwargs):
return run_jitter(path, ".", self.r_config, save=False, ff_df=kwargs["ff_df"])

View File

@@ -0,0 +1,12 @@
from opendbm.api_lib.model import AudioModel
from opendbm.dbm_lib import run_mfcc
class MFCC(AudioModel):
def __init__(self):
super().__init__()
self._params = ["aco_mfcc" + str(i) for i in range(1, 13)]
@AudioModel.prep_func
def _fit_transform(self, path, **kwargs):
return run_mfcc(path, ".", self.r_config, save=False)

View File

@@ -0,0 +1,20 @@
import pandas as pd
from opendbm.api_lib.model import AudioModel
from opendbm.dbm_lib import run_pause_segment
class PauseCharacteristics(AudioModel):
def __init__(self):
super().__init__()
self._params = [
"aco_totaltime",
"aco_speakingtime",
"aco_numpauses",
"aco_pausetime",
"aco_pausefrac",
]
@AudioModel.prep_func
def _fit_transform(self, path, **kwargs):
return run_pause_segment(path, ".", self.r_config, save=False)

View File

@@ -0,0 +1,14 @@
import pandas as pd
from opendbm.api_lib.model import AudioModel
from opendbm.dbm_lib.dbm_features.raw_features.audio.pitch_freq import run_pitch
class PitchFrequency(AudioModel):
def __init__(self):
super().__init__()
self._params = ["aco_ff"]
@AudioModel.prep_func
def _fit_transform(self, path, **kwargs):
return run_pitch(path, ".", self.r_config, save=False)

View File

@@ -0,0 +1,14 @@
import pandas as pd
from opendbm.api_lib.model import AudioModel
from opendbm.dbm_lib.dbm_features.raw_features.audio.shimmer import run_shimmer
class Shimmer(AudioModel):
def __init__(self):
super().__init__()
self._params = ["aco_shimmer"]
@AudioModel.prep_func
def _fit_transform(self, path, **kwargs):
return run_shimmer(path, ".", self.r_config, save=False, ff_df=kwargs["ff_df"])

View File

@@ -0,0 +1,14 @@
import pandas as pd
from opendbm.api_lib.model import AudioModel
from opendbm.dbm_lib.dbm_features.raw_features.audio.voice_frame_score import run_vfs
class VoicePrevalence(AudioModel):
def __init__(self):
super().__init__()
self._params = ["aco_voiceframe", "aco_totvoiceframe", "aco_voicepct"]
@AudioModel.prep_func
def _fit_transform(self, path, **kwargs):
return run_vfs(path, ".", self.r_config, save=False)

View File

@@ -0,0 +1,154 @@
import os
from collections import OrderedDict
from opendbm.api_lib.model import AudioModel
from opendbm.api_lib.util import check_file, check_isfile
from opendbm.dbm_lib.controller import process_feature as pf
from ._audio_intensity import AudioIntensity
from ._formant_frequency import FormantFrequency
from ._glottal_noise import GlottalNoiseRatio
from ._harmonic_noise import HarmonicsNoiseRatio
from ._jitter import Jitter
from ._mfcc import MFCC
from ._pause_characteristics import PauseCharacteristics
from ._pitch_frequency import PitchFrequency
from ._shimmer import Shimmer
from ._voice_prevalence import VoicePrevalence
class VerbalAcoustics(AudioModel):
def __init__(self):
super().__init__()
self._auint = AudioIntensity()
self._pitchfreq = PitchFrequency()
self._forfreq = FormantFrequency()
self._hnr = HarmonicsNoiseRatio()
self._gne = GlottalNoiseRatio()
self._jitter = Jitter()
self._shimmer = Shimmer()
self._pchar = PauseCharacteristics()
self._vopre = VoicePrevalence()
self._mfcc = MFCC()
self._models = OrderedDict(
{
"audio_intensity": self._auint,
"pitch_frequency": self._pitchfreq,
"formant_frequency": self._forfreq,
"harmonic_noise": self._hnr,
"glottal_noise": self._gne,
"jitter": self._jitter,
"shimmer": self._shimmer,
"pause_characteristics": self._pchar,
"voice_prevalence": self._vopre,
"mfcc": self._mfcc,
}
)
def fit(self, path):
"""Fit a file in filepath to parselmouth Model.
Parameters
----------
path : string,
File Path of Video/Sound file format.
"""
check_isfile(path)
path, is_wav = check_file(path)
for k, v in self._models.items():
if k in ["glottal_noise", "jitter", "shimmer"]:
v._df = v._fit_transform(path, ff_df=self._pitchfreq._df)
else:
v._df = v._fit_transform(path)
if not is_wav:
os.remove(path)
def get_audio_intensity(self):
"""
Get the model object of Audio Intensity
Returns:
self: object
Model Object
"""
return self._auint
def get_pitch_frequency(self):
"""
Get the model object of Pitch Frequency
Returns:
self: object
Model Object
"""
return self._pitchfreq
def get_formant_frequency(self):
"""
Get the model object of Formant Frequency
Returns:
self: object
Model Object
"""
return self._forfreq
def get_harmonic_noise(self):
"""
Get the model object of Harmonic Noise
Returns:
self: object
Model Object
"""
return self._hnr
def get_glottal_noise(self):
"""
Get the model object of Glottal Noise
Returns:
self: object
Model Object
"""
return self._gne
def get_jitter(self):
"""
Get the model object of Jitter
Returns:
self: object
Model Object
"""
return self._jitter
def get_shimmer(self):
"""
Get the model object of Shimmer
Returns:
self: object
Model Object
"""
return self._shimmer
def get_pause_characteristics(self):
"""
Get the model object of Pause Characteristics
Returns:
self: object
Model Object
"""
return self._pchar
def get_voice_prevalence(self):
"""
Get the model object of Vocal Prevalence
Returns:
self: object
Model Object
"""
return self._vopre
def get_mfcc(self):
"""
Get the model object of MFCC
Returns:
self: object
Model Object
"""
return self._mfcc

View File

@@ -0,0 +1,19 @@
"""
file_name: init
project_name: DBM
created: 2020-20-07
"""
from __future__ import absolute_import, division, print_function
from .config import config_derive_feature, config_raw_feature, config_reader
from .dbm_features.raw_features.audio.formant_freq import run_formant
from .dbm_features.raw_features.audio.intensity import run_intensity
from .dbm_features.raw_features.audio.mfcc import run_mfcc
from .dbm_features.raw_features.audio.pause_segment import run_pause_segment
from .dbm_features.raw_features.movement.facial_tremor import fac_tremor_process
from .dbm_features.raw_features.movement.head_motion import run_head_movement
from .dbm_features.raw_features.nlp.speech_features import run_speech_feature
from .dbm_features.raw_features.video import ConfigFaceReader
from .dbm_features.raw_features.video.face_asymmetry import run_face_asymmetry
from .dbm_features.raw_features.video.face_landmark import run_face_landmark

View File

View File

@@ -4,26 +4,32 @@ project_name: DBM
created: 2020-20-07
"""
import os
import yaml
from dbm_lib import DBMLIB_DERIVE_FEATURE_CONFIG
DBMLIB_PATH = os.path.dirname(__file__)
DBMLIB_DERIVE_FEATURE_CONFIG = os.path.abspath(
os.path.join(DBMLIB_PATH, "../../resources/features/derived_feature.yml")
)
class ConfigDeriveReader(object):
"""Summary
Read sevice end ponit
"""
def __init__(self,
feature_config_yml=None):
def __init__(self, feature_config_yml=None):
"""Summary
Args:
feature_config_yml (None, optional): yml file defined service configuration
"""
if feature_config_yml is None:
feature_config = DBMLIB_DERIVE_FEATURE_CONFIG
else:
feature_config = feature_config_yml
with open(feature_config, 'r') as ymlfile:
config = yaml.load(ymlfile)
with open(feature_config, "r") as ymlfile:
config = yaml.load(ymlfile, Loader=yaml.CLoader)
self.base_derive = config

View File

@@ -0,0 +1,283 @@
"""
file_name: config_raw_feature
project_name: DBM
created: 2020-20-07
"""
import os
import yaml
DBMLIB_PATH = os.path.dirname(__file__)
DBMLIB_FEATURE_CONFIG = os.path.abspath(
os.path.join(DBMLIB_PATH, "../../resources/features/raw_feature.yml")
)
class ConfigRawReader(object):
"""Summary
Read sevice end ponit
"""
def __init__(self, feature_config_yml=None):
"""Summary
Args:
feature_config_yml (None, optional): yml file defined service configuration
"""
if feature_config_yml is None:
feature_config = DBMLIB_FEATURE_CONFIG
else:
feature_config = feature_config_yml
with open(feature_config, "r") as ymlfile:
config = yaml.load(ymlfile, Loader=yaml.CLoader)
# Verbal features
self.base_raw = config
self.err_reason = config["raw_feature"]["error_reason"]
# Output range
self.mov_headvel_start = config["raw_feature"]["mov_headvel_start"]
self.mov_headvel_end = config["raw_feature"]["mov_headvel_end"]
# Acoustic variable
self.aco_int = config["raw_feature"]["aco_int"]
self.aco_ff = config["raw_feature"]["aco_ff"]
self.aco_voiceLabel = config["raw_feature"]["aco_voiceLabel"]
self.aco_hnr = config["raw_feature"]["aco_hnr"]
self.aco_gne = config["raw_feature"]["aco_gne"]
self.aco_fm1 = config["raw_feature"]["aco_fm1"]
self.aco_fm2 = config["raw_feature"]["aco_fm2"]
self.aco_fm3 = config["raw_feature"]["aco_fm3"]
self.aco_fm4 = config["raw_feature"]["aco_fm4"]
self.aco_jitter = config["raw_feature"]["aco_jitter"]
self.aco_shimmer = config["raw_feature"]["aco_shimmer"]
self.aco_mfcc1 = config["raw_feature"]["aco_mfcc1"]
self.aco_mfcc2 = config["raw_feature"]["aco_mfcc2"]
self.aco_mfcc3 = config["raw_feature"]["aco_mfcc3"]
self.aco_mfcc4 = config["raw_feature"]["aco_mfcc4"]
self.aco_mfcc5 = config["raw_feature"]["aco_mfcc5"]
self.aco_mfcc6 = config["raw_feature"]["aco_mfcc6"]
self.aco_mfcc7 = config["raw_feature"]["aco_mfcc7"]
self.aco_mfcc8 = config["raw_feature"]["aco_mfcc8"]
self.aco_mfcc9 = config["raw_feature"]["aco_mfcc9"]
self.aco_mfcc10 = config["raw_feature"]["aco_mfcc10"]
self.aco_mfcc11 = config["raw_feature"]["aco_mfcc11"]
self.aco_mfcc12 = config["raw_feature"]["aco_mfcc12"]
self.aco_voiceFrame = config["raw_feature"]["aco_voiceFrame"]
self.aco_totVoiceFrame = config["raw_feature"]["aco_totVoiceFrame"]
self.aco_voicePct = config["raw_feature"]["aco_voicePct"]
self.aco_pausetime = config["raw_feature"]["aco_pausetime"]
self.aco_totaltime = config["raw_feature"]["aco_totaltime"]
self.aco_speakingtime = config["raw_feature"]["aco_speakingtime"]
self.aco_numpauses = config["raw_feature"]["aco_numpauses"]
self.aco_pausefrac = config["raw_feature"]["aco_pausefrac"]
# Facial Action Unit (for consistency)
self.fac_AU01int = config["raw_feature"]["fac_AU01int"]
self.fac_AU02int = config["raw_feature"]["fac_AU02int"]
self.fac_AU04int = config["raw_feature"]["fac_AU04int"]
self.fac_AU05int = config["raw_feature"]["fac_AU05int"]
self.fac_AU06int = config["raw_feature"]["fac_AU06int"]
self.fac_AU07int = config["raw_feature"]["fac_AU07int"]
self.fac_AU09int = config["raw_feature"]["fac_AU09int"]
self.fac_AU10int = config["raw_feature"]["fac_AU10int"]
self.fac_AU12int = config["raw_feature"]["fac_AU12int"]
self.fac_AU14int = config["raw_feature"]["fac_AU14int"]
self.fac_AU15int = config["raw_feature"]["fac_AU15int"]
self.fac_AU17int = config["raw_feature"]["fac_AU17int"]
self.fac_AU20int = config["raw_feature"]["fac_AU20int"]
self.fac_AU23int = config["raw_feature"]["fac_AU23int"]
self.fac_AU25int = config["raw_feature"]["fac_AU25int"]
self.fac_AU26int = config["raw_feature"]["fac_AU26int"]
self.fac_AU45int = config["raw_feature"]["fac_AU45int"]
self.fac_AU01pres = config["raw_feature"]["fac_AU01pres"]
self.fac_AU02pres = config["raw_feature"]["fac_AU02pres"]
self.fac_AU04pres = config["raw_feature"]["fac_AU04pres"]
self.fac_AU05pres = config["raw_feature"]["fac_AU05pres"]
self.fac_AU06pres = config["raw_feature"]["fac_AU06pres"]
self.fac_AU07pres = config["raw_feature"]["fac_AU07pres"]
self.fac_AU09pres = config["raw_feature"]["fac_AU09pres"]
self.fac_AU10pres = config["raw_feature"]["fac_AU10pres"]
self.fac_AU12pres = config["raw_feature"]["fac_AU12pres"]
self.fac_AU14pres = config["raw_feature"]["fac_AU14pres"]
self.fac_AU15pres = config["raw_feature"]["fac_AU15pres"]
self.fac_AU17pres = config["raw_feature"]["fac_AU17pres"]
self.fac_AU20pres = config["raw_feature"]["fac_AU20pres"]
self.fac_AU23pres = config["raw_feature"]["fac_AU23pres"]
self.fac_AU25pres = config["raw_feature"]["fac_AU25pres"]
self.fac_AU26pres = config["raw_feature"]["fac_AU26pres"]
self.fac_AU28pres = config["raw_feature"]["fac_AU28pres"]
self.fac_AU45pres = config["raw_feature"]["fac_AU45pres"]
# Facial Landmarks (for consistency)
self.fac_LMK00disp = config["raw_feature"]["fac_LMK00disp"]
self.fac_LMK01disp = config["raw_feature"]["fac_LMK01disp"]
self.fac_LMK02disp = config["raw_feature"]["fac_LMK02disp"]
self.fac_LMK03disp = config["raw_feature"]["fac_LMK03disp"]
self.fac_LMK04disp = config["raw_feature"]["fac_LMK04disp"]
self.fac_LMK05disp = config["raw_feature"]["fac_LMK05disp"]
self.fac_LMK06disp = config["raw_feature"]["fac_LMK06disp"]
self.fac_LMK07disp = config["raw_feature"]["fac_LMK07disp"]
self.fac_LMK08disp = config["raw_feature"]["fac_LMK08disp"]
self.fac_LMK09disp = config["raw_feature"]["fac_LMK09disp"]
self.fac_LMK10disp = config["raw_feature"]["fac_LMK10disp"]
self.fac_LMK11disp = config["raw_feature"]["fac_LMK11disp"]
self.fac_LMK12disp = config["raw_feature"]["fac_LMK12disp"]
self.fac_LMK13disp = config["raw_feature"]["fac_LMK13disp"]
self.fac_LMK14disp = config["raw_feature"]["fac_LMK14disp"]
self.fac_LMK15disp = config["raw_feature"]["fac_LMK15disp"]
self.fac_LMK16disp = config["raw_feature"]["fac_LMK16disp"]
self.fac_LMK17disp = config["raw_feature"]["fac_LMK17disp"]
self.fac_LMK18disp = config["raw_feature"]["fac_LMK18disp"]
self.fac_LMK19disp = config["raw_feature"]["fac_LMK19disp"]
self.fac_LMK20disp = config["raw_feature"]["fac_LMK20disp"]
self.fac_LMK21disp = config["raw_feature"]["fac_LMK21disp"]
self.fac_LMK22disp = config["raw_feature"]["fac_LMK22disp"]
self.fac_LMK23disp = config["raw_feature"]["fac_LMK23disp"]
self.fac_LMK24disp = config["raw_feature"]["fac_LMK24disp"]
self.fac_LMK25disp = config["raw_feature"]["fac_LMK25disp"]
self.fac_LMK26disp = config["raw_feature"]["fac_LMK26disp"]
self.fac_LMK27disp = config["raw_feature"]["fac_LMK27disp"]
self.fac_LMK28disp = config["raw_feature"]["fac_LMK28disp"]
self.fac_LMK29disp = config["raw_feature"]["fac_LMK29disp"]
self.fac_LMK30disp = config["raw_feature"]["fac_LMK30disp"]
self.fac_LMK31disp = config["raw_feature"]["fac_LMK31disp"]
self.fac_LMK32disp = config["raw_feature"]["fac_LMK32disp"]
self.fac_LMK33disp = config["raw_feature"]["fac_LMK33disp"]
self.fac_LMK34disp = config["raw_feature"]["fac_LMK34disp"]
self.fac_LMK35disp = config["raw_feature"]["fac_LMK35disp"]
self.fac_LMK36disp = config["raw_feature"]["fac_LMK36disp"]
self.fac_LMK37disp = config["raw_feature"]["fac_LMK37disp"]
self.fac_LMK38disp = config["raw_feature"]["fac_LMK38disp"]
self.fac_LMK39disp = config["raw_feature"]["fac_LMK39disp"]
self.fac_LMK40disp = config["raw_feature"]["fac_LMK40disp"]
self.fac_LMK41disp = config["raw_feature"]["fac_LMK41disp"]
self.fac_LMK42disp = config["raw_feature"]["fac_LMK42disp"]
self.fac_LMK43disp = config["raw_feature"]["fac_LMK43disp"]
self.fac_LMK44disp = config["raw_feature"]["fac_LMK44disp"]
self.fac_LMK45disp = config["raw_feature"]["fac_LMK45disp"]
self.fac_LMK46disp = config["raw_feature"]["fac_LMK46disp"]
self.fac_LMK47disp = config["raw_feature"]["fac_LMK47disp"]
self.fac_LMK48disp = config["raw_feature"]["fac_LMK48disp"]
self.fac_LMK49disp = config["raw_feature"]["fac_LMK49disp"]
self.fac_LMK50disp = config["raw_feature"]["fac_LMK50disp"]
self.fac_LMK51disp = config["raw_feature"]["fac_LMK51disp"]
self.fac_LMK52disp = config["raw_feature"]["fac_LMK52disp"]
self.fac_LMK53disp = config["raw_feature"]["fac_LMK53disp"]
self.fac_LMK54disp = config["raw_feature"]["fac_LMK54disp"]
self.fac_LMK55disp = config["raw_feature"]["fac_LMK55disp"]
self.fac_LMK56disp = config["raw_feature"]["fac_LMK56disp"]
self.fac_LMK57disp = config["raw_feature"]["fac_LMK57disp"]
self.fac_LMK58disp = config["raw_feature"]["fac_LMK58disp"]
self.fac_LMK59disp = config["raw_feature"]["fac_LMK59disp"]
self.fac_LMK60disp = config["raw_feature"]["fac_LMK60disp"]
self.fac_LMK61disp = config["raw_feature"]["fac_LMK61disp"]
self.fac_LMK62disp = config["raw_feature"]["fac_LMK62disp"]
self.fac_LMK63disp = config["raw_feature"]["fac_LMK63disp"]
self.fac_LMK64disp = config["raw_feature"]["fac_LMK64disp"]
self.fac_LMK65disp = config["raw_feature"]["fac_LMK65disp"]
self.fac_LMK66disp = config["raw_feature"]["fac_LMK66disp"]
self.fac_LMK67disp = config["raw_feature"]["fac_LMK67disp"]
# Facial features
self.hap_exp = config["raw_feature"]["hap_exp"]
self.sad_exp = config["raw_feature"]["sad_exp"]
self.sur_exp = config["raw_feature"]["sur_exp"]
self.fea_exp = config["raw_feature"]["fea_exp"]
self.ang_exp = config["raw_feature"]["ang_exp"]
self.dis_exp = config["raw_feature"]["dis_exp"]
self.con_exp = config["raw_feature"]["con_exp"]
self.happ_occ = config["raw_feature"]["happ_occ"]
self.sad_occ = config["raw_feature"]["sad_occ"]
self.sur_occ = config["raw_feature"]["sur_occ"]
self.fea_occ = config["raw_feature"]["fea_occ"]
self.ang_occ = config["raw_feature"]["ang_occ"]
self.dis_occ = config["raw_feature"]["dis_occ"]
self.con_occ = config["raw_feature"]["con_occ"]
self.pos_exp = config["raw_feature"]["pos_exp"]
self.neg_exp = config["raw_feature"]["neg_exp"]
self.neu_exp = config["raw_feature"]["neu_exp"]
self.cai_exp = config["raw_feature"]["cai_exp"]
self.com_exp = config["raw_feature"]["com_exp"]
self.com_lower_exp = config["raw_feature"]["com_lower_exp"]
self.com_upper_exp = config["raw_feature"]["com_upper_exp"]
self.pai_exp = config["raw_feature"]["pai_exp"]
self.hap_exp_full = config["raw_feature"]["hap_exp_full"]
self.sad_exp_full = config["raw_feature"]["sad_exp_full"]
self.sur_exp_full = config["raw_feature"]["sur_exp_full"]
self.fea_exp_full = config["raw_feature"]["fea_exp_full"]
self.ang_exp_full = config["raw_feature"]["ang_exp_full"]
self.dis_exp_full = config["raw_feature"]["dis_exp_full"]
self.con_exp_full = config["raw_feature"]["con_exp_full"]
self.pos_exp_full = config["raw_feature"]["pos_exp_full"]
self.neg_exp_full = config["raw_feature"]["neg_exp_full"]
self.neu_exp_full = config["raw_feature"]["neu_exp_full"]
self.cai_exp_full = config["raw_feature"]["cai_exp_full"]
self.com_exp_full = config["raw_feature"]["com_exp_full"]
self.com_lower_exp_full = config["raw_feature"]["com_lower_exp_full"]
self.com_upper_exp_full = config["raw_feature"]["com_upper_exp_full"]
self.pai_exp_full = config["raw_feature"]["pai_exp_full"]
self.fac_AsymMaskMouth = config["raw_feature"]["fac_AsymMaskMouth"]
self.fac_AsymMaskEye = config["raw_feature"]["fac_AsymMaskEye"]
self.fac_AsymMaskEyebrow = config["raw_feature"]["fac_AsymMaskEyebrow"]
self.fac_AsymMaskCom = config["raw_feature"]["fac_AsymMaskCom"]
# Movement features
self.head_vel = config["raw_feature"]["head_vel"]
self.mov_blink_ear = config["raw_feature"]["mov_blink_ear"]
self.vid_dur = config["raw_feature"]["vid_dur"]
self.fps = config["raw_feature"]["fps"]
self.mov_blinkframes = config["raw_feature"]["mov_blinkframes"]
self.mov_blinkdur = config["raw_feature"]["mov_blinkdur"]
self.mov_Hpose_Pitch = config["raw_feature"]["mov_Hpose_Pitch"]
self.mov_Hpose_Yaw = config["raw_feature"]["mov_Hpose_Yaw"]
self.mov_Hpose_Roll = config["raw_feature"]["mov_Hpose_Roll"]
self.mov_Hpose_Dist = config["raw_feature"]["mov_Hpose_Dist"]
self.mov_freq_trem_freq = config["raw_feature"]["mov_freq_trem_freq"]
self.mov_freq_trem_index = config["raw_feature"]["mov_freq_trem_index"]
self.mov_freq_trem_pindex = config["raw_feature"]["mov_freq_trem_pindex"]
self.mov_amp_trem_freq = config["raw_feature"]["mov_amp_trem_freq"]
self.mov_amp_trem_index = config["raw_feature"]["mov_amp_trem_index"]
self.mov_amp_trem_pindex = config["raw_feature"]["mov_amp_trem_pindex"]
self.fac_tremor_median_5 = config["raw_feature"]["fac_tremor_median_5"]
self.fac_tremor_median_12 = config["raw_feature"]["fac_tremor_median_12"]
self.fac_tremor_median_8 = config["raw_feature"]["fac_tremor_median_8"]
self.fac_tremor_median_48 = config["raw_feature"]["fac_tremor_median_48"]
self.fac_tremor_median_54 = config["raw_feature"]["fac_tremor_median_54"]
self.fac_tremor_median_28 = config["raw_feature"]["fac_tremor_median_28"]
self.fac_tremor_median_51 = config["raw_feature"]["fac_tremor_median_51"]
self.fac_tremor_median_66 = config["raw_feature"]["fac_tremor_median_66"]
self.fac_tremor_median_57 = config["raw_feature"]["fac_tremor_median_57"]
self.mov_leye_x = config["raw_feature"]["mov_leye_x"]
self.mov_leye_y = config["raw_feature"]["mov_leye_y"]
self.mov_leye_z = config["raw_feature"]["mov_leye_z"]
self.mov_reye_x = config["raw_feature"]["mov_reye_x"]
self.mov_reye_y = config["raw_feature"]["mov_reye_y"]
self.mov_reye_z = config["raw_feature"]["mov_reye_z"]
self.mov_eleft_disp = config["raw_feature"]["mov_eleft_disp"]
self.mov_eright_disp = config["raw_feature"]["mov_eright_disp"]
# NLP features
self.nlp_transcribe = config["raw_feature"]["nlp_transcribe"]
self.nlp_numSentences = config["raw_feature"]["nlp_numSentences"]
self.nlp_singPronPerAns = config["raw_feature"]["nlp_singPronPerAns"]
self.nlp_singPronPerSen = config["raw_feature"]["nlp_singPronPerSen"]
self.nlp_pastTensePerAns = config["raw_feature"]["nlp_pastTensePerAns"]
self.nlp_pastTensePerSen = config["raw_feature"]["nlp_pastTensePerSen"]
self.nlp_pronounsPerAns = config["raw_feature"]["nlp_pronounsPerAns"]
self.nlp_pronounsPerSen = config["raw_feature"]["nlp_pronounsPerSen"]
self.nlp_verbsPerAns = config["raw_feature"]["nlp_verbsPerAns"]
self.nlp_verbsPerSen = config["raw_feature"]["nlp_verbsPerSen"]
self.nlp_adjectivesPerAns = config["raw_feature"]["nlp_adjectivesPerAns"]
self.nlp_adjectivesPerSen = config["raw_feature"]["nlp_adjectivesPerSen"]
self.nlp_nounsPerAns = config["raw_feature"]["nlp_nounsPerAns"]
self.nlp_nounsPerSen = config["raw_feature"]["nlp_nounsPerSen"]
self.nlp_sentiment_mean = config["raw_feature"]["nlp_sentiment_mean"]
self.nlp_mattr = config["raw_feature"]["nlp_mattr"]
self.nlp_wordsPerMin = config["raw_feature"]["nlp_wordsPerMin"]
self.nlp_totalTime = config["raw_feature"]["nlp_totalTime"]

View File

@@ -4,15 +4,22 @@ project_name: DBM
created: 2020-20-07
"""
import os
import yaml
from dbm_lib import DBMLIB_SERVICE_CONFIG
DBMLIB_PATH = os.path.dirname(__file__)
DBMLIB_SERVICE_CONFIG = os.path.abspath(
os.path.join(DBMLIB_PATH, "../../resources/services/services.yml")
)
class ConfigReader(object):
"""Summary
Read sevice end ponit
"""
def __init__(self,
service_config_yml=None):
def __init__(self, service_config_yml=None):
"""Summary
Args:
service_config_yml (None, optional): yml file defined service configuration
@@ -21,47 +28,47 @@ class ConfigReader(object):
service_config = DBMLIB_SERVICE_CONFIG
else:
service_config = service_config_yml
with open(service_config, 'r') as ymlfile:
config = yaml.load(ymlfile)
self.input_dir = config['cdx_configuration']['input_dir']
self.output_dir = config['cdx_configuration']['output_dir']
self.out_derived_dir = config['cdx_configuration']['out_derived_dir']
self.of_path = config['cdx_configuration']['open_face_path']
self.facial_landmarks = config['cdx_configuration']['facial_landmarks']
self.feature_group = config['cdx_configuration']['feature_group']
with open(service_config, "r") as ymlfile:
config = yaml.load(ymlfile, Loader=yaml.CLoader)
self.input_dir = config["cdx_configuration"]["input_dir"]
self.output_dir = config["cdx_configuration"]["output_dir"]
self.out_derived_dir = config["cdx_configuration"]["out_derived_dir"]
self.of_path = config["cdx_configuration"]["open_face_path"]
self.facial_landmarks = config["cdx_configuration"]["facial_landmarks"]
self.feature_group = config["cdx_configuration"]["feature_group"]
def get_open_face_path(self):
"""Summary
Returns:
TYPE: end point
"""
return self.of_path
def get_input_dir(self):
"""Summary
Returns:
TYPE: end point
"""
return self.input_dir
def get_output_dir(self):
"""Summary
Returns:
TYPE: end point
"""
return self.output_dir
def get_out_derived_dir(self):
"""Summary
Returns:
TYPE: end point
"""
return self.out_derived_dir
def get_fac_landmark_path(self):
"""Summary
Returns:
TYPE: end point
"""
return self.facial_landmarks
return self.facial_landmarks

View File

View File

@@ -0,0 +1,187 @@
"""
file_name: process_features
project_name: DBM
created: 2020-20-07
"""
import glob
import logging
import os
import subprocess
import tempfile
from os.path import basename, dirname, isfile, join, splitext
from opendbm.dbm_lib.dbm_features.raw_features import audio, movement, nlp, video
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
def audio_to_wav(input_filepath, tmp=False):
"""Extracts a video's audio file and saves it to wav
Args:
input_filepath: (str)
Returns:
"""
try:
fname, _ = splitext(input_filepath)
if tmp:
fname = os.path.basename(input_filepath)
output_filepath = f"{tempfile.gettempdir()}/{fname}.wav"
else:
output_filepath = fname + ".wav"
if not isfile(output_filepath):
call = [
"ffmpeg",
"-i",
input_filepath,
"-vn",
"-acodec",
"pcm_s16le",
"-ar",
"44100",
output_filepath,
]
logger.info("Converting audio from {} to wav".format(input_filepath))
subprocess.Popen(
call,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
stdin=subprocess.PIPE,
).wait()
# subprocess.check_output(call)
logger.info("wav output saved in {}".format(output_filepath))
else:
logger.info("Output file {} already exists".format(output_filepath))
return output_filepath
except Exception as e:
logger.error("Failed to extract audio from Video", e)
def process_acoustic(video_uri, out_dir, dbm_group, r_config):
"""
processing acoustic features
Args:
video_uri: video path; out_dir: raw variable output dir
dbm_group: list of features group to process; r_config: raw feature config object
"""
if dbm_group is not None and len(dbm_group) > 0 and "acoustic" not in dbm_group:
return
logger.info("Processing acoustic variables from data in {}".format(video_uri))
logger.info("processing audio intensity....")
audio.intensity.run_intensity(video_uri, out_dir, r_config)
logger.info("processing audio pitch freq....")
audio.pitch_freq.run_pitch(video_uri, out_dir, r_config)
logger.info("processing HNR....")
audio.hnr.run_hnr(video_uri, out_dir, r_config)
logger.info("processing GNE....")
audio.gne.run_gne(video_uri, out_dir, r_config)
logger.info("processing voice frame score....")
audio.voice_frame_score.run_vfs(video_uri, out_dir, r_config)
logger.info("processing formant frequency....")
audio.formant_freq.run_formant(video_uri, out_dir, r_config)
logger.info("processing pause segment....")
audio.pause_segment.run_pause_segment(video_uri, out_dir, r_config)
logger.info("processing jitter....")
audio.jitter.run_jitter(video_uri, out_dir, r_config)
logger.info("processing shimmer....")
audio.shimmer.run_shimmer(video_uri, out_dir, r_config)
logger.info("processing mfcc....")
audio.mfcc.run_mfcc(video_uri, out_dir, r_config)
def process_facial(video_uri, out_dir, dbm_group, r_config):
"""
processing facial features
Args:
video_uri: video path; out_dir: raw variable output dir
dbm_group: list of features to process; r_config: raw feature config object
"""
if dbm_group is not None and len(dbm_group) > 0 and "facial" not in dbm_group:
return
logger.info("Processing facial variables from data in {}".format(video_uri))
logger.info("processing facial asymmetry....")
video.face_asymmetry.run_face_asymmetry(video_uri, out_dir, r_config)
logger.info("processing facial Action Unit....")
video.face_au.run_face_au(video_uri, out_dir, r_config)
logger.info("processing facial expressivity....")
video.face_emotion_expressivity.run_face_expressivity(video_uri, out_dir, r_config)
logger.info("processing facial landmark....")
video.face_landmark.run_face_landmark(video_uri, out_dir, r_config)
def process_movement(video_uri, out_dir, dbm_group, r_config, dlib_model):
"""
processing facial features
Args:
video_uri: video path; out_dir: raw variable output dir
dbm_group: list of features to process; r_config: raw feature config object
dlib_model: shape predictor model path
"""
if dbm_group is not None and len(dbm_group) > 0 and "movement" not in dbm_group:
return
logger.info("Processing movement variables from data in {}".format(video_uri))
logger.info("processing head movement....")
movement.head_motion.run_head_movement(video_uri, out_dir, r_config)
logger.info("processing eye blink....")
movement.eye_blink.run_eye_blink(video_uri, out_dir, r_config, dlib_model)
logger.info("processing eye gaze....")
movement.eye_gaze.run_eye_gaze(video_uri, out_dir, r_config)
logger.info("processing voice tremor....")
movement.voice_tremor.run_vtremor(video_uri, out_dir, r_config)
logger.info("processing facial tremor....")
movement.facial_tremor.fac_tremor_process(
video_uri, out_dir, r_config, model_output=True
)
def process_nlp(video_uri, out_dir, dbm_group, tran_tog, r_config, deep_path):
"""
processing nlp features
Args:
video_uri: video path; out_dir: raw variable output dir
dbm_group: list of features to process; r_config: raw feature config object
deep_path: deep speech build path
"""
if dbm_group is not None and len(dbm_group) > 0 and "speech" not in dbm_group:
return
logger.info("Processing nlp variables from data in {}".format(video_uri))
nlp.transcribe.run_transcribe(video_uri, out_dir, r_config, deep_path)
nlp.speech_features.run_speech_feature(video_uri, out_dir, r_config, tran_tog)
def remove_file(file_path, file_ext=".wav"):
"""
removing wav file
"""
file_dir = dirname(file_path)
file_name, _ = splitext(basename(file_path))
wav_file = glob.glob(join(file_dir, file_name + file_ext))
if len(wav_file) > 0:
os.remove(wav_file[0])

View File

View File

@@ -0,0 +1,7 @@
"""
file_name: init
project_name: DBM
created: 2020-20-07
"""
from __future__ import absolute_import, division, print_function

View File

@@ -4,15 +4,17 @@ project_name: DBM
created: 2020-20-07
"""
import pandas as pd
import numpy as np
import glob
import os
import logging
import os
from datetime import datetime
import numpy as np
import pandas as pd
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
logger = logging.getLogger()
def dict_to_df(feature_dict, file):
"""
@@ -21,92 +23,106 @@ def dict_to_df(feature_dict, file):
final_dict = {k: v for d in feature_dict for k, v in d.items()}
feature_df = pd.DataFrame([final_dict])
feature_df['Filename'] = file
feature_df["Filename"] = file
return feature_df
def save_derive_output(df_list, out_loc):
"""
Saving derive variable output
"""
try:
if len(df_list)>0:
if len(df_list) > 0:
df = df_list[0]
file_name = os.path.join(out_loc, 'derived_output.csv')
file_name = os.path.join(out_loc, "derived_output.csv")
if not os.path.exists(out_loc):
os.makedirs(out_loc)
df.to_csv(file_name, index=False)
except Exception as e:
logger.error('Failed to save derived variable csv')
e
logger.error("Failed to save derived variable csv")
def feature_output(df_fea, exp_var, cal_type):
"""
Computing mean value of dataframe columns
Computing mean value of dataframe columns
"""
exp_val = np.nan
try:
df_ = df_fea[exp_var].astype(float).copy()
df_ = df_.dropna().reset_index(drop=True)
if len(df_)>0:
if cal_type == 'mean':
exp_val = df_.mean(axis = 0, skipna = True)
if len(df_) > 0:
elif cal_type == 'std':
exp_val = df_.std(axis = 0, skipna = True)
if cal_type == "mean":
exp_val = df_.mean(axis=0, skipna=True)
elif cal_type == 'count':#use case for eye blink
exp_var = 'mov_blink'
exp_val = (len(df_)/df_[0])*60
elif cal_type == "std":
exp_val = df_.std(axis=0, skipna=True)
elif cal_type == 'pct':
if len(df_)>0:
exp_val = len(df_[df_ > 0])/len(df_)
elif cal_type == "count": # use case for eye blink
exp_var = "mov_blink"
exp_val = (len(df_) / df_[0]) * 60
elif cal_type == 'range':
elif cal_type == "pct":
if len(df_) > 0:
exp_val = len(df_[df_ > 0]) / len(df_)
elif cal_type == "range":
exp_val = max(df_) - min(df_)
except Exception as e:
logger.error('Failed to compute calculation: {}'.format(e))
logger.error("Failed to compute calculation: {}".format(e))
pass
var_name = exp_var + '_' + cal_type
var_name = exp_var + "_" + cal_type
exp_val = float("{0:.4f}".format(exp_val))
var_val = (var_name, exp_val)
return var_val
def cal_type_dict(var_df, raw_df, d_cfg_Obj, r_cfg_Obj):
var_name = str(var_df['var_id'])
#fetching key based on variable name from raw config
var_name = str(var_df["var_id"])
# fetching key based on variable name from raw config
var_key = list(r_cfg_Obj.keys())[list(r_cfg_Obj.values()).index(var_name)]
cal_type = d_cfg_Obj[var_key] # calculation type from config
cal_type = d_cfg_Obj[var_key] # calculation type from config
var_val = [feature_output(raw_df, var_name, cal) for cal in cal_type]
var_val_dict = dict(var_val)
return var_val_dict
def compute_feature(raw_df, var_cols, d_cfg_Obj, r_cfg_Obj):
"""
Computing features
"""
#Variable data frame for each feature group
var_df = pd.DataFrame(var_cols,columns=['var_id'])
# Variable data frame for each feature group
var_df = pd.DataFrame(var_cols, columns=["var_id"])
feature_dict = {}
if len(raw_df)>0:
feature_dict = var_df.apply(cal_type_dict, args=(raw_df, d_cfg_Obj, r_cfg_Obj, ), axis=1)
if len(raw_df) > 0:
feature_dict = var_df.apply(
cal_type_dict,
args=(
raw_df,
d_cfg_Obj,
r_cfg_Obj,
),
axis=1,
)
return feature_dict
def calc_derive(input_file, input_dir, r_cfg_Obj, d_cfg_Obj, feature):
"""
Calculating derived variable
@@ -114,51 +130,53 @@ def calc_derive(input_file, input_dir, r_cfg_Obj, d_cfg_Obj, feature):
df_list = []
df = pd.DataFrame()
for file in input_file:
file_name, _ = os.path.splitext(os.path.basename(file))
input_loc = os.path.join(input_dir, file_name)
var_cols = [r_cfg_Obj[x] for x in d_cfg_Obj[feature]]
fea_loc = d_cfg_Obj[feature + '_LOC']
fea_res = glob.glob(os.path.join(input_loc, '*/*/*' + fea_loc + '.csv'))
if len(fea_res)>0:
fea_loc = d_cfg_Obj[feature + "_LOC"]
fea_res = glob.glob(os.path.join(input_loc, "*/*/*" + fea_loc + ".csv"))
if len(fea_res) > 0:
raw_df = pd.read_csv(fea_res[0])
feature_dict = compute_feature(raw_df, var_cols, d_cfg_Obj, r_cfg_Obj)
if len(feature_dict)>0:
if len(feature_dict) > 0:
feature_df = dict_to_df(feature_dict, file)
df_list.append(feature_df)
if len(df_list)>0:
if len(df_list) > 0:
df = pd.concat(df_list, ignore_index=True)
return df
def run_derive(input_file, input_dir, output_dir, r_config, d_config):
"""
Processing derived variable
"""
d_cfg_Obj = d_config.base_derive['derive_feature']
r_cfg_Obj = r_config.base_raw['raw_feature']
feature_group = d_cfg_Obj['FEATURE_GROUP']
#Iterating over feature group
d_cfg_Obj = d_config.base_derive["derive_feature"]
r_cfg_Obj = r_config.base_raw["raw_feature"]
feature_group = d_cfg_Obj["FEATURE_GROUP"]
# Iterating over feature group
df_list = []
for feature in feature_group:
try:
df_fea = calc_derive(input_file, input_dir, r_cfg_Obj, d_cfg_Obj, feature)
if len(df_fea)>0:
if len(df_fea) > 0:
if len(df_list) == 0:
df_list.append(df_fea)
else:
result = pd.merge(df_list[0], df_fea, how='outer', on=['Filename'])
result = pd.merge(df_list[0], df_fea, how="outer", on=["Filename"])
df_list = [result]
except Exception as e:
logger.error('Failed to process derived variables {}'.format(feature))
e
logger.error("Failed to process derived variables {}".format(feature))
logger.info("Saving derived variable output...")
save_derive_output(df_list, output_dir)
save_derive_output(df_list, output_dir)

View File

@@ -0,0 +1,10 @@
from .formant_freq import run_formant
from .gne import run_gne
from .hnr import run_hnr
from .intensity import run_intensity
from .jitter import run_jitter
from .mfcc import run_mfcc
from .pause_segment import run_pause_segment
from .pitch_freq import run_pitch
from .shimmer import run_shimmer
from .voice_frame_score import run_vfs

View File

@@ -0,0 +1,155 @@
"""
file_name: formant_freq
project_name: DBM
created: 2020-20-07
"""
import glob
import logging
from os.path import join
import numpy as np
import pandas as pd
import parselmouth
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
formant_dir = "acoustic/formant_freq"
csv_ext = "_formant.csv"
error_txt = "error: length less than 0.064"
def formant_list(formant, snd):
"""
Getting formant frequency per second
Args:
formant: Formant object for sound wave
snd: Parselmouth sound object
Returns:
List of first through fourth formant for each frame
"""
f1_list = []
f2_list = []
f3_list = []
f4_list = []
dur = snd.duration - 0.02
dur_round = round(dur, 2)
time_list = np.arange(0.001, dur_round, 0.001)
for time in time_list:
f1 = formant.get_value_at_time(1, time)
f2 = formant.get_value_at_time(2, time)
f3 = formant.get_value_at_time(3, time)
f4 = formant.get_value_at_time(4, time)
f1_list.append(f1)
f2_list.append(f2)
f3_list.append(f3)
f4_list.append(f4)
return f1_list, f2_list, f3_list, f4_list
def formant_score(path):
"""
Using parselmouth library fetching Formant Frequency
Args:
path: (.wav) audio file location
Returns:
(list) list of Formant freq for each voice frame
"""
sound_pat = parselmouth.Sound(path)
formant = sound_pat.to_formant_burg(time_step=0.001)
f_score = formant_list(formant, sound_pat)
return f_score
def calc_formant(video_uri, audio_file, out_loc, fl_name, r_config, save=True):
"""
Preparing Formant freq matrix
Args:
audio_file: (.wav) parsed audio file; fl_name: input file name
out_loc: (str) Output directory; r_config: raw variable config
"""
f1_list, f2_list, f3_list, f4_list = formant_score(audio_file)
df_formant = pd.DataFrame(f1_list, columns=[r_config.aco_fm1])
df_formant[r_config.aco_fm2] = f2_list
df_formant[r_config.aco_fm3] = f3_list
df_formant[r_config.aco_fm4] = f4_list
df_formant.replace("", np.nan, regex=True, inplace=True)
df_formant[
r_config.err_reason
] = "Pass" # will replace with threshold in future release
df_formant["Frames"] = df_formant.index
df_formant["dbm_master_url"] = video_uri
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_formant, out_loc, fl_name, formant_dir, csv_ext)
return df_formant
def empty_fm(video_uri, out_loc, fl_name, r_config, save=True):
"""
Preparing empty formant frequency matrix if something fails
"""
cols = [
"Frames",
r_config.aco_fm1,
r_config.aco_fm2,
r_config.aco_fm3,
r_config.aco_fm4,
r_config.err_reason,
]
out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, error_txt]]
df_fm = pd.DataFrame(out_val, columns=cols)
df_fm["dbm_master_url"] = video_uri
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_fm, out_loc, fl_name, formant_dir, csv_ext)
return df_fm
def run_formant(video_uri, out_dir, r_config, save=True):
"""
Processing all patient's for fetching Formant freq
---------------
---------------
Args:
video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output
"""
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
if len(aud_filter) > 0:
audio_file = aud_filter[0]
aud_dur = ut.get_length(audio_file)
if float(aud_dur) < 0.064:
logger.info(
"Output file {} size is less than 0.064sec".format(audio_file)
)
df = empty_fm(video_uri, out_loc, fl_name, r_config, save=save)
else:
df = calc_formant(
video_uri, audio_file, out_loc, fl_name, r_config, save=save
)
return df
except Exception as e:
e
logger.error("Failed to process audio file")

View File

@@ -0,0 +1,166 @@
"""
file_name: gne
project_name: DBM
created: 2020-20-07
"""
import glob
import logging
import os
from os.path import join
import more_itertools as mit
import numpy as np
import pandas as pd
import parselmouth
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
gne_dir = "acoustic/glottal_noise"
ff_dir = "acoustic/pitch"
csv_ext = "_gne.csv"
def gne_ratio(sound):
"""
Using parselmouth library fetching glottal noise excitation ratio
Args:
sound: parselmouth object
Returns:
(list) list of gne ratio for each voice frame
"""
harmonicity_gne = sound.to_harmonicity_gne()
gne_all_bands = harmonicity_gne.values
gne_all_bands = np.where(gne_all_bands == -200, np.NaN, gne_all_bands)
gne = np.nanmax(
gne_all_bands
) # following http://www.fon.hum.uva.nl/rob/NKI_TEVA/TEVA/HTML/NKI_TEVA.pdf
return gne
def empty_gne(video_uri, out_loc, fl_name, r_config, error_txt, save=True):
"""
Preparing empty GNE matrix if something fails
"""
cols = ["Frames", r_config.aco_gne, r_config.err_reason]
out_val = [[np.nan, np.nan, error_txt]]
df_gne = pd.DataFrame(out_val, columns=cols)
df_gne["dbm_master_url"] = video_uri
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext)
return df_gne
def segment_gne(com_speech_sort, voiced_yes, voiced_no, gne_all_frames, audio_file):
"""
calculating gne for each voice segment
"""
snd = parselmouth.Sound(audio_file)
pitch = snd.to_pitch(time_step=0.001)
for idx, vs in enumerate(com_speech_sort):
try:
max_gne = np.NaN
if vs in voiced_yes and len(vs) > 1:
start_time = pitch.get_time_from_frame_number(vs[0])
end_time = pitch.get_time_from_frame_number(vs[-1])
snd_start = int(snd.get_frame_number_from_time(start_time))
snd_end = int(snd.get_frame_number_from_time(end_time))
samples = parselmouth.Sound(snd.as_array()[0][snd_start:snd_end])
max_gne = gne_ratio(samples)
except:
pass
gne_all_frames[idx] = max_gne
return gne_all_frames
def calc_gne(video_uri, audio_file, out_loc, fl_name, r_config, save=True, ff_df=None):
"""
Preparing gne matrix
Args:
audio_file: (.wav) parsed audio file
out_loc: (str) Output directory for csv's
"""
dir_path = os.path.join(out_loc, ff_dir)
if os.path.isdir(dir_path) or ff_df is not None:
if ff_df is not None:
voice_seg = ut.process_segment_pitch(ff_df, r_config)
else:
voice_seg = ut.segment_pitch(dir_path, r_config, ff_df=ff_df)
gne_all_frames = [np.NaN] * len(voice_seg[0])
gne_segment_frames = segment_gne(
voice_seg[0], voice_seg[1], voice_seg[2], gne_all_frames, audio_file
)
df_gne = pd.DataFrame(gne_segment_frames, columns=[r_config.aco_gne])
df_gne[
r_config.err_reason
] = "Pass" # will replace with threshold in future release
df_gne["Frames"] = df_gne.index
df_gne["dbm_master_url"] = video_uri
if save:
logger.info("Processing Output file {} ".format(out_loc))
ut.save_output(df_gne, out_loc, fl_name, gne_dir, csv_ext)
return df_gne
else:
error_txt = "error: pitch freq not available"
return empty_gne(video_uri, out_loc, fl_name, r_config, error_txt, save=save)
def run_gne(video_uri, out_dir, r_config, save=True, ff_df=None):
"""
Processing all patient's for fetching glottal noise ratio
---------------
---------------
Args:
video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output
"""
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
if len(aud_filter) > 0:
audio_file = aud_filter[0]
aud_dur = ut.get_length(audio_file)
if float(aud_dur) < 0.064:
logger.info(
"Output file {} size is less than 0.064sec".format(audio_file)
)
error_txt = "error: length less than 0.064"
df = empty_gne(
video_uri, out_loc, fl_name, r_config, error_txt, save=save
)
else:
df = calc_gne(
video_uri,
audio_file,
out_loc,
fl_name,
r_config,
save=save,
ff_df=ff_df,
)
return df
except Exception as e:
e
logger.error("Failed to process audio file")

View File

@@ -0,0 +1,112 @@
"""
file_name: hnr
project_name: DBM
created: 2020-20-07
"""
import glob
import logging
import os
from os.path import join
import numpy as np
import pandas as pd
import parselmouth
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
hnr_dir = "acoustic/harmonic_noise"
csv_ext = "_hnr.csv"
error_txt = "error: length less than 0.064"
def hnr_ratio(filepath):
"""
Using parselmouth library fetching harmonic noise ratio ratio
Args:
path: (.wav) audio file location
Returns:
(list) list of hnr ratio for each voice frame, min,max and mean hnr
"""
sound = parselmouth.Sound(filepath)
harmonicity = sound.to_harmonicity_ac(time_step=0.001)
hnr_all_frames = harmonicity.values # [harmonicity.values != -200] nan it (****)
hnr_all_frames = np.where(hnr_all_frames == -200, np.NaN, hnr_all_frames)
return hnr_all_frames.transpose()
def calc_hnr(video_uri, audio_file, out_loc, fl_name, r_config, save=True):
"""
Preparing harmonic noise matrix
Args:
audio_file: (.wav) parsed audio file
out_loc: (str) Output directory for csv's
"""
hnr_all_frames = hnr_ratio(audio_file)
df_hnr = pd.DataFrame(hnr_all_frames, columns=[r_config.aco_hnr])
df_hnr["Frames"] = df_hnr.index
df_hnr["dbm_master_url"] = video_uri
df_hnr[
r_config.err_reason
] = "Pass" # will replace with threshold in future release
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_hnr, out_loc, fl_name, hnr_dir, csv_ext)
return df_hnr
def empty_hnr(video_uri, out_loc, fl_name, r_config, save=True):
"""
Preparing empty HNR matrix if something fails
"""
cols = ["Frames", r_config.aco_hnr, r_config.err_reason]
out_val = [[np.nan, np.nan, error_txt]]
df_hnr = pd.DataFrame(out_val, columns=cols)
df_hnr["dbm_master_url"] = video_uri
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_hnr, out_loc, fl_name, hnr_dir, csv_ext)
return df_hnr
def run_hnr(video_uri, out_dir, r_config, save=True):
"""
Processing all patient's for fetching harmonic noise ratio
-------------------
-------------------
Args:
video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output
"""
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
if len(aud_filter) > 0:
audio_file = aud_filter[0]
aud_dur = ut.get_length(audio_file)
if float(aud_dur) < 0.064:
logger.info(
"Output file {} size is less than 0.064sec".format(audio_file)
)
df = empty_hnr(video_uri, out_loc, fl_name, r_config, save=save)
else:
df = calc_hnr(
video_uri, audio_file, out_loc, fl_name, r_config, save=save
)
return df
except Exception as e:
e
logger.error("Failed to process audio file")

View File

@@ -0,0 +1,107 @@
"""
file_name: intensity
project_name: DBM
created: 2020-20-07
"""
import glob
import logging
from os.path import join
import numpy as np
import pandas as pd
import parselmouth
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
intensity_dir = "acoustic/intensity"
csv_ext = "_intensity.csv"
error_txt = "error: length less than 0.064"
def intensity_score(path):
"""
Using parselmouth library fetching Intensity
Args:
path: (.wav) audio file location
Returns:
(list) list of Intensity for each voice frame
"""
sound_pat = parselmouth.Sound(path)
intensity = sound_pat.to_intensity(time_step=0.001)
return intensity.values[0]
def calc_intensity(video_uri, audio_file, out_loc, fl_name, r_config, save=True):
"""
Preparing Intensity matrix
Args:
audio_file: (.wav) parsed audio file
out_loc: (str) Output directory for csv's
"""
intensity_frames = intensity_score(audio_file)
df_intensity = pd.DataFrame(intensity_frames, columns=[r_config.aco_int])
df_intensity["Frames"] = df_intensity.index
df_intensity["dbm_master_url"] = video_uri
df_intensity[
r_config.err_reason
] = "Pass" # will replace with threshold in future release
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_intensity, out_loc, fl_name, intensity_dir, csv_ext)
return df_intensity
def empty_intensity(video_uri, out_loc, fl_name, r_config, save=True):
"""
Preparing empty Intensity matrix if something fails
"""
cols = ["Frames", r_config.aco_int, r_config.err_reason]
out_val = [[np.nan, np.nan, error_txt]]
df_int = pd.DataFrame(out_val, columns=cols)
df_int["dbm_master_url"] = video_uri
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_int, out_loc, fl_name, intensity_dir, csv_ext)
return df_int
def run_intensity(video_uri, out_dir, r_config, save=True):
"""
Processing all patient's for fetching Intensity
-------------------
-------------------
Args:
video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output
"""
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
if len(aud_filter) > 0:
audio_file = aud_filter[0]
aud_dur = ut.get_length(audio_file)
if float(aud_dur) < 0.064:
logger.info(
"Output file {} size is less than 0.064sec".format(audio_file)
)
df = empty_intensity(video_uri, out_loc, fl_name, r_config, save=save)
else:
df = calc_intensity(
video_uri, audio_file, out_loc, fl_name, r_config, save=save
)
return df
except Exception as e:
e
logger.error("Failed to process audio file")

View File

@@ -0,0 +1,166 @@
"""
file_name: jitter_processing
project_name: DBM
created: 2020-20-07
"""
import glob
import logging
import os
from os.path import join
import numpy as np
import pandas as pd
import parselmouth
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
jitter_dir = "acoustic/jitter"
ff_dir = "acoustic/pitch"
csv_ext = "_jitter.csv"
def audio_jitter(sound):
"""
Using parselmouth library fetching jitter
Args:
sound: parselmouth object
Returns:
(list) list of jitters for each voice frame
"""
pointProcess = parselmouth.praat.call(
sound, "To PointProcess (periodic, cc)...", 80, 500
)
jitter = parselmouth.praat.call(
pointProcess, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3
)
return jitter
def empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt, save=True):
"""
Preparing empty jitter matrix if something fails
"""
cols = ["Frames", r_config.aco_jitter, r_config.err_reason]
out_val = [[np.nan, np.nan, error_txt]]
df_jitter = pd.DataFrame(out_val, columns=cols)
df_jitter["dbm_master_url"] = video_uri
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_jitter, out_loc, fl_name, jitter_dir, csv_ext)
return df_jitter
def segment_jitter(com_speech_sort, voiced_yes, voiced_no, jitter_frames, audio_file):
"""
calculating jitter for each voice segment
"""
snd = parselmouth.Sound(audio_file)
pitch = snd.to_pitch(time_step=0.001)
for idx, vs in enumerate(com_speech_sort):
try:
jitter = np.NaN
if vs in voiced_yes and len(vs) > 1:
start_time = pitch.get_time_from_frame_number(vs[0])
end_time = pitch.get_time_from_frame_number(vs[-1])
snd_start = int(snd.get_frame_number_from_time(start_time))
snd_end = int(snd.get_frame_number_from_time(end_time))
samples = parselmouth.Sound(snd.as_array()[0][snd_start:snd_end])
jitter = audio_jitter(samples)
except:
pass
jitter_frames[idx] = jitter
return jitter_frames
def calc_jitter(
video_uri, audio_file, out_loc, fl_name, r_config, save=True, ff_df=None
):
"""
Preparing jitter matrix
Args:
audio_file: (.wav) parsed audio file
out_loc: (str) Output directory for csv
r_config: config.config_raw_feature.pyConfigFeatureNmReader object
"""
dir_path = os.path.join(out_loc, ff_dir)
if os.path.isdir(dir_path) or ff_df is not None:
if ff_df is not None:
voice_seg = ut.process_segment_pitch(ff_df, r_config)
else:
voice_seg = ut.segment_pitch(dir_path, r_config, ff_df=ff_df)
jitter_frames = [np.NaN] * len(voice_seg[0])
jitter_segment_frames = segment_jitter(
voice_seg[0], voice_seg[1], voice_seg[2], jitter_frames, audio_file
)
df_jitter = pd.DataFrame(jitter_segment_frames, columns=[r_config.aco_jitter])
df_jitter[
r_config.err_reason
] = "Pass" # will replace with threshold in future release
df_jitter["Frames"] = df_jitter.index
df_jitter["dbm_master_url"] = video_uri
if save:
logger.info("Processing Output file {} ".format(out_loc))
ut.save_output(df_jitter, out_loc, fl_name, jitter_dir, csv_ext)
df = df_jitter
else:
error_txt = "error: fundamental freq not available"
df = empty_jitter(video_uri, out_loc, fl_name, r_config, error_txt, save=save)
return df
def run_jitter(video_uri, out_dir, r_config, save=True, ff_df=None):
"""
Processing all patient's videos for fetching jitter
-------------------
-------------------
Args:
video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output
"""
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
if len(aud_filter) > 0:
audio_file = aud_filter[0]
aud_dur = ut.get_length(audio_file)
if float(aud_dur) < 0.064:
logger.info(
"Output file {} size is less than 0.064sec".format(audio_file)
)
error_txt = "error: length less than 0.064"
df = empty_jitter(
video_uri, out_loc, fl_name, r_config, error_txt, save=save
)
else:
df = calc_jitter(
video_uri,
audio_file,
out_loc,
fl_name,
r_config,
save=save,
ff_df=ff_df,
)
return df
except Exception as e:
logger.error("Error in jitter: {}".format(e))
logger.error("Failed to process audio file")

View File

@@ -0,0 +1,147 @@
"""
file_name: mfcc
project_name: DBM
created: 2020-20-07
"""
import glob
import logging
import os
from os.path import join
import numpy as np
import pandas as pd
import parselmouth
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
mfcc_dir = "acoustic/mfcc"
csv_ext = "_mfcc.csv"
error_txt = "error: length less than 0.064"
def empty_mfcc(video_uri, out_loc, fl_name, r_config, save=True):
"""
Preparing empty empty_mfcc matrix if something fails
"""
cols = [
"Frames",
r_config.aco_mfcc1,
r_config.aco_mfcc2,
r_config.aco_mfcc3,
r_config.aco_mfcc4,
r_config.aco_mfcc5,
r_config.aco_mfcc6,
r_config.aco_mfcc7,
r_config.aco_mfcc8,
r_config.aco_mfcc9,
r_config.aco_mfcc10,
r_config.aco_mfcc11,
r_config.aco_mfcc12,
r_config.err_reason,
]
out_val = [
[
np.nan,
np.nan,
np.nan,
np.nan,
np.nan,
np.nan,
np.nan,
np.nan,
np.nan,
np.nan,
np.nan,
np.nan,
np.nan,
error_txt,
]
]
df_mfcc = pd.DataFrame(out_val, columns=cols)
df_mfcc["dbm_master_url"] = video_uri
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_mfcc, out_loc, fl_name, mfcc_dir, csv_ext)
return df_mfcc
def audio_mfcc(path):
"""
Using parselmouth library fetching mfccs
Args:
path: (.wav) audio file location
Returns:
(list) list of mfccs for each voice frame
"""
sound = parselmouth.Sound(path)
mfcc_object = sound.to_mfcc(time_step=0.001, number_of_coefficients=12)
mfccs = mfcc_object.to_array()
mfccs = np.delete(mfccs, (0), axis=0)
return mfccs
def calc_mfcc(video_uri, audio_file, out_loc, fl_name, r_config, save=True):
"""
Preparing mfcc matrix
Args:
audio_file: (.wav) parsed audio file
out_loc: output location to save csv
fl_name: (str) name of audio file
r_config: config.config_raw_feature.pyConfigFeatureNmReader object
"""
dict_ = {}
mfccs = audio_mfcc(audio_file)
for i in range(1, 13):
conf_str = r_config.base_raw["raw_feature"]
dict_[conf_str["aco_mfcc" + str(i)]] = mfccs[i - 1, :]
df = pd.DataFrame(dict_)
df["Frames"] = df.index
df[r_config.err_reason] = "Pass" # may replace based on threshold in future release
df["dbm_master_url"] = video_uri
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df, out_loc, fl_name, mfcc_dir, csv_ext)
return df
def run_mfcc(video_uri, out_dir, r_config, save=True):
"""
Processing all patients to fetch mfccs
Args:
video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output
"""
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
if len(aud_filter) > 0:
audio_file = aud_filter[0]
aud_dur = ut.get_length(audio_file)
if float(aud_dur) < 0.064:
logger.info(
"Output file {} size is less than 0.064sec".format(audio_file)
)
return empty_mfcc(video_uri, out_loc, fl_name, r_config, save=save)
return calc_mfcc(
video_uri, audio_file, out_loc, fl_name, r_config, save=save
)
except Exception as e:
e
logger.error("Failed to process audio file")

View File

@@ -4,24 +4,25 @@ project_name: DBM
created: 2020-20-07
"""
import os
import glob
from pydub import AudioSegment
import librosa
import pandas as pd
import numpy as np
import webrtcvad
from os.path import join
import logging
import os
from os.path import join
from dbm_lib.dbm_features.raw_features.util import vad_utilities as vu
from dbm_lib.dbm_features.raw_features.util import util as ut
import numpy as np
import pandas as pd
import webrtcvad
from pydub import AudioSegment
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
from opendbm.dbm_lib.dbm_features.raw_features.util import vad_utilities as vu
logging.basicConfig(level=logging.INFO)
logger=logging.getLogger()
logger = logging.getLogger()
pause_seg_dir = "acoustic/pause_segment"
csv_ext = "_pausechar.csv"
pause_seg_dir = 'acoustic/pause_segment'
csv_ext = '_pausechar.csv'
def get_timing_cues(seg_starts_sec, seg_ends_sec, r_config):
"""
@@ -36,25 +37,27 @@ def get_timing_cues(seg_starts_sec, seg_ends_sec, r_config):
speaking_time = np.sum(np.asarray(seg_ends_sec) - np.asarray(seg_starts_sec))
num_pauses = len(seg_starts_sec) - 1
pause_len = np.zeros(num_pauses)
for p in range(num_pauses):
pause_len[p] = seg_starts_sec[p+1] - seg_ends_sec[p]
if len(pause_len)>0:
pause_len_mean = np.mean(pause_len)
pause_len_std = np.std(pause_len)
pause_len[p] = seg_starts_sec[p + 1] - seg_ends_sec[p]
if len(pause_len) > 0:
pause_time = np.sum(pause_len)
else:
pause_len_mean = 0
pause_len_std = 0
pause_time = 0
pause_frac = pause_time / total_time
timing_dict = {r_config.aco_totaltime: total_time, r_config.aco_speakingtime: speaking_time,
r_config.aco_numpauses: num_pauses, r_config.aco_pausetime: pause_time, r_config.aco_pausefrac: pause_frac}
timing_dict = {
r_config.aco_totaltime: total_time,
r_config.aco_speakingtime: speaking_time,
r_config.aco_numpauses: num_pauses,
r_config.aco_pausetime: pause_time,
r_config.aco_pausefrac: pause_frac,
}
return timing_dict
def process_silence(audio_file, r_config):
"""
Returns dataframe for pause between words using voice activity detection
@@ -65,64 +68,80 @@ def process_silence(audio_file, r_config):
"""
feat_dict_list = []
y, sr = vu.read_wave(audio_file)
# 3 is most aggressive (splits most), 0 least (better for low snr)
aggressiveness = 3
frame_dur_ms = 20
#pause segment(long & short pad)
# pause segment(long & short pad)
long_pad_around_voice_ms = 200
short_pad_around_voice_ms = 100
if len(y)>0:
if len(y) > 0:
vad = webrtcvad.Vad(aggressiveness)
frames = vu.frame_generator(frame_dur_ms, y, sr)
frames = list(frames)
#longer pad time screens out little blips, but misses short silences
long_seg_starts, long_seg_ends = vu.vad_get_segment_times(sr, frame_dur_ms, long_pad_around_voice_ms, vad, frames)
#Logic to handle blank audio file
# longer pad time screens out little blips, but misses short silences
long_seg_starts, long_seg_ends = vu.vad_get_segment_times(
sr, frame_dur_ms, long_pad_around_voice_ms, vad, frames
)
# Logic to handle blank audio file
if len(long_seg_starts) == 0 or len(long_seg_ends) == 0:
return ''
return ""
t_start = long_seg_starts[0]
t_end = long_seg_ends[-1]
# shorter pad time captures short silences (but misfires on little blips)
short_seg_starts, short_seg_ends = vu.vad_get_segment_times(sr, frame_dur_ms, short_pad_around_voice_ms, vad, frames)
short_seg_starts, short_seg_ends = vu.vad_get_segment_times(
sr, frame_dur_ms, short_pad_around_voice_ms, vad, frames
)
seg_starts = []
seg_ends = []
for k in range(len(short_seg_starts)): # logic to clean up some typical misfires
if (short_seg_starts[k] >=t_start) and (short_seg_starts[k] <= t_end):
for k in range(
len(short_seg_starts)
): # logic to clean up some typical misfires
if (short_seg_starts[k] >= t_start) and (short_seg_starts[k] <= t_end):
seg_starts.append(short_seg_starts[k])
seg_ends.append(short_seg_ends[k])
if len(seg_starts) == 0 or len(seg_ends) == 0:
return ''
return ""
timing_dict = get_timing_cues(seg_starts, seg_ends, r_config)
feat_dict_list.append(timing_dict)
df = pd.DataFrame(feat_dict_list)
df[r_config.err_reason] = 'Pass'# will replace with threshold in future release
df[r_config.err_reason] = "Pass" # will replace with threshold in future release
return df
def empty_pause_segment(video_uri, out_loc, fl_name, r_config, error_txt):
def empty_pause_segment(video_uri, out_loc, fl_name, r_config, error_txt, save=True):
"""
Preparing empty Pause Segment matrix if something fails
"""
cols = [r_config.aco_totaltime, r_config.aco_speakingtime, r_config.aco_numpauses, r_config.aco_pausetime,
r_config.aco_pausefrac, r_config.err_reason]
cols = [
r_config.aco_totaltime,
r_config.aco_speakingtime,
r_config.aco_numpauses,
r_config.aco_pausetime,
r_config.aco_pausefrac,
r_config.err_reason,
]
out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, error_txt]]
df_pause = pd.DataFrame(out_val, columns = cols)
df_pause['dbm_master_url'] = video_uri
logger.info('Saving Output file {} '.format(out_loc))
ut.save_output(df_pause, out_loc, fl_name, pause_seg_dir, csv_ext)
df_pause = pd.DataFrame(out_val, columns=cols)
df_pause["dbm_master_url"] = video_uri
def run_pause_segment(video_uri, out_dir, r_config):
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_pause, out_loc, fl_name, pause_seg_dir, csv_ext)
return df_pause
def run_pause_segment(video_uri, out_dir, r_config, save=True):
"""
Processing all patient's for getting Pause Segment
---------------
@@ -132,41 +151,50 @@ def run_pause_segment(video_uri, out_dir, r_config):
out_dir: (str) Output directory for processed output
"""
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
if len(aud_filter)>0:
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
if len(aud_filter) > 0:
audio_file = aud_filter[0]
aud_dur = librosa.get_duration(filename=audio_file)
aud_dur = ut.get_length(audio_file)
if float(aud_dur) < 0.064:
logger.info('Output file {} size is less than 0.064sec'.format(audio_file))
logger.info(
"Output file {} size is less than 0.064sec".format(audio_file)
)
error_txt = 'error: length less than 0.064'
error_txt = "error: length less than 0.064"
empty_pause_segment(video_uri, out_loc, fl_name, r_config, error_txt)
return
logger.info('Converting stereo sound to mono-lD')
logger.info("Converting stereo sound to mono-lD")
sound_mono = AudioSegment.from_wav(audio_file)
sound_mono = sound_mono.set_channels(1)
sound_mono = sound_mono.set_frame_rate(48000)
mono_wav = os.path.join(input_loc, fl_name + '_mono.wav')
mono_wav = os.path.join(input_loc, fl_name + "_mono.wav")
sound_mono.export(mono_wav, format="wav")
df_pause_seg = process_silence(mono_wav, r_config)
os.remove(mono_wav)#removing mono wav file
os.remove(mono_wav) # removing mono wav file
if isinstance(df_pause_seg, pd.DataFrame) and len(df_pause_seg)>0:
logger.info('Processing Output file {} '.format(out_loc))
df_pause_seg['dbm_master_url'] = video_uri
ut.save_output(df_pause_seg, out_loc, fl_name, pause_seg_dir, csv_ext)
if isinstance(df_pause_seg, pd.DataFrame) and len(df_pause_seg) > 0:
df_pause_seg["dbm_master_url"] = video_uri
if save:
logger.info("Processing Output file {} ".format(out_loc))
ut.save_output(
df_pause_seg, out_loc, fl_name, pause_seg_dir, csv_ext
)
df = df_pause_seg
else:
error_txt = 'error: webrtcvad returns no segment'
empty_pause_segment(video_uri, out_loc, fl_name, r_config, error_txt)
error_txt = "error: webrtcvad returns no segment"
df = empty_pause_segment(
video_uri, out_loc, fl_name, r_config, error_txt, save=save
)
return df
except Exception as e:
logger.error('Failed to process audio file')
e
logger.error("Failed to process audio file", str(e))

View File

@@ -0,0 +1,139 @@
"""
file_name: pitch_freq
project_name: DBM
created: 2020-20-07
"""
import glob
import logging
import os
from os.path import join
import numpy as np
import pandas as pd
import parselmouth
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
ff_dir = "acoustic/pitch"
csv_ext = "_pitch.csv"
error_txt = "error: length less than 0.064"
def audio_pitch(path):
"""
Using parselmouth library fetching pitch/fundamental frequency
Args:
path: (.wav) audio file location
Returns:
(list) list of pitch/fundamental frequency for each voice frame
"""
sound_pat = parselmouth.Sound(path)
pitch = sound_pat.to_pitch(time_step=0.001)
pitch_values = pitch.selected_array["frequency"]
return list(pitch_values)
def label_speech(row, fd_freq):
"""
identify whether frame is voiced or not
Args:
row: (item) pitch frequency value
Returns:
(str) yes or no indicator for voice
"""
if row[fd_freq] > 0:
return "yes"
else:
return "no"
def calc_pitch(video_uri, audio_file, out_loc, fl_name, r_config, save=True):
"""
Preparing pitch frequency matrix
Args:
audio_file: (.wav) parsed audio file
row: (dataframe) subject details from master csv
new_out_base_dir: (str) Output directory for csv
"""
ff_frames = audio_pitch(audio_file)
df_ffreq = pd.DataFrame(ff_frames, columns=[r_config.aco_ff])
df_ffreq["Frames"] = df_ffreq.index
df_ffreq[r_config.aco_voiceLabel] = df_ffreq.apply(
lambda row: label_speech(row, r_config.aco_ff), axis=1
)
df_ffreq[
r_config.err_reason
] = "Pass" # will replace with threshold in future release
df_ffreq["dbm_master_url"] = video_uri
if save:
logger.info("Processing Output file {} ".format(out_loc))
ut.save_output(df_ffreq, out_loc, fl_name, ff_dir, csv_ext)
return df_ffreq
def empty_pitch(video_uri, out_loc, fl_name, r_config, save=True):
"""
Preparing empty pitch frequency matrix if something fails
"""
df_ffreq = pd.DataFrame(
[[np.nan, np.nan, "no", error_txt]],
columns=[
"Frames",
r_config.aco_ff,
r_config.aco_voiceLabel,
r_config.err_reason,
],
)
df_ffreq["dbm_master_url"] = video_uri
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_ffreq, out_loc, fl_name, ff_dir, csv_ext)
return df_ffreq
def run_pitch(video_uri, out_dir, r_config, save=True):
"""
Processing audio for fetching pitch
-------------------
-------------------
Args:
video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output
"""
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
if len(aud_filter) > 0:
audio_file = aud_filter[0]
aud_dur = ut.get_length(audio_file)
if float(aud_dur) < 0.064:
logger.info(
"Output file {} size is less than 0.064sec".format(audio_file)
)
df = empty_pitch(video_uri, out_loc, fl_name, r_config, save=save)
else:
df = calc_pitch(
video_uri, audio_file, out_loc, fl_name, r_config, save=save
)
return df
except Exception as e:
e
logger.error("Failed to process audio file")

View File

@@ -0,0 +1,166 @@
"""
file_name: shimmer_processing
project_name: DBM
created: 2020-20-07
"""
import glob
import logging
import os
from os.path import join
import more_itertools as mit
import numpy as np
import pandas as pd
import parselmouth
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
shimmer_dir = "acoustic/shimmer"
ff_dir = "acoustic/pitch"
csv_ext = "_shimmer.csv"
def audio_shimmer(sound):
"""
Using parselmouth library fetching shimmer
Args:
sound: parselmouth object
Returns:
(list) list of shimmers for each voice frame
"""
pointProcess = parselmouth.praat.call(
sound, "To PointProcess (periodic, cc)...", 80, 500
)
shimmer = parselmouth.praat.call(
[sound, pointProcess], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6
)
return shimmer
def empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt, save=True):
"""
Preparing empty shimmer matrix if something fails
"""
cols = ["Frames", r_config.aco_shimmer, r_config.err_reason]
out_val = [[np.nan, np.nan, error_txt]]
df_shimmer = pd.DataFrame(out_val, columns=cols)
df_shimmer["dbm_master_url"] = video_uri
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_shimmer, out_loc, fl_name, shimmer_dir, csv_ext)
return df_shimmer
def segment_shimmer(com_speech_sort, voiced_yes, voiced_no, shimmer_frames, audio_file):
"""
calculating shimmer for each voice segment
"""
snd = parselmouth.Sound(audio_file)
pitch = snd.to_pitch(time_step=0.001)
for idx, vs in enumerate(com_speech_sort):
try:
shimmer = np.NaN
if vs in voiced_yes and len(vs) > 1:
start_time = pitch.get_time_from_frame_number(vs[0])
end_time = pitch.get_time_from_frame_number(vs[-1])
snd_start = int(snd.get_frame_number_from_time(start_time))
snd_end = int(snd.get_frame_number_from_time(end_time))
samples = parselmouth.Sound(snd.as_array()[0][snd_start:snd_end])
shimmer = audio_shimmer(samples)
except:
pass
shimmer_frames[idx] = shimmer
return shimmer_frames
def calc_shimmer(
video_uri, audio_file, out_loc, fl_name, r_config, save=True, ff_df=None
):
"""
Preparing shimmer matrix
Args:
audio_file: (.wav) parsed audio file
out_loc: (str) Output directory for csv
r_config: config.config_raw_feature.pyConfigFeatureNmReader object
"""
dir_path = os.path.join(out_loc, ff_dir)
if os.path.isdir(dir_path) or ff_df is not None:
if ff_df is not None:
voice_seg = ut.process_segment_pitch(ff_df, r_config)
else:
voice_seg = ut.segment_pitch(dir_path, r_config, ff_df=ff_df)
shimmer_frames = [np.NaN] * len(voice_seg[0])
shimmer_segment_frames = segment_shimmer(
voice_seg[0], voice_seg[1], voice_seg[2], shimmer_frames, audio_file
)
df_shimmer = pd.DataFrame(
shimmer_segment_frames, columns=[r_config.aco_shimmer]
)
df_shimmer[
r_config.err_reason
] = "Pass" # will replace with threshold in future release
df_shimmer["Frames"] = df_shimmer.index
df_shimmer["dbm_master_url"] = video_uri
if save:
logger.info("Processing Output file {} ".format(out_loc))
ut.save_output(df_shimmer, out_loc, fl_name, shimmer_dir, csv_ext)
df = df_shimmer
else:
error_txt = "error: fundamental freq not available"
df = empty_shimmer(video_uri, out_loc, fl_name, r_config, error_txt, save=save)
return df
def run_shimmer(video_uri, out_dir, r_config, save=True, ff_df=None):
"""
Processing all patients to fetch shimmer
---------------
---------------
Args:
video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output
"""
# try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
if len(aud_filter) > 0:
audio_file = aud_filter[0]
aud_dur = ut.get_length(audio_file)
if float(aud_dur) < 0.064:
logger.info("Output file {} size is less than 0.064sec".format(audio_file))
error_txt = "error: length less than 0.064"
df = empty_shimmer(
video_uri, out_loc, fl_name, r_config, error_txt, save=save
)
else:
df = calc_shimmer(
video_uri,
audio_file,
out_loc,
fl_name,
r_config,
save=save,
ff_df=ff_df,
)
return df
# except Exception as e:
# logger.error('Error in shimmer: {}'.format(e))
# logger.error('Failed to process audio file')

View File

@@ -0,0 +1,131 @@
"""
file_name: voice_frame_score
project_name: DBM
created: 2020-20-07
"""
import glob
import logging
from os.path import join
import numpy as np
import pandas as pd
import parselmouth
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
vfs_dir = "acoustic/voice_frame_score"
csv_ext = "_voiceprev.csv"
error_txt = "error: length less than 0.064"
def audio_pitch_frame(pitch):
"""
Computing total number of speech and participant voiced frames
Args:
pitch: speech pitch
Returns:
(float) total voice frames and participant voiced frames
"""
total_frames = pitch.get_number_of_frames()
voiced_frames = pitch.count_voiced_frames()
return total_frames, voiced_frames
def voice_segment(path):
"""
Using parselmouth library for fundamental frequency
Args:
path: (.wav) audio file location
Returns:
(float) total voice frames, participant voiced frames and voiced frames percentage
"""
sound_pat = parselmouth.Sound(path)
pitch = sound_pat.to_pitch()
total_frames, voiced_frames = audio_pitch_frame(pitch)
voiced_percentage = (voiced_frames / total_frames) * 100
return voiced_percentage, voiced_frames, total_frames
def calc_vfs(video_uri, audio_file, out_loc, fl_name, r_config, save=True):
"""
creating dataframe matrix for voice frame score
Args:
audio_file: Audio file path
new_out_base_dir: AWS instance output base directory path
f_nm_config: Config file object
"""
voice_percentage, voiced_frames, total_frames = voice_segment(audio_file)
df_vfs = pd.DataFrame([voiced_frames], columns=[r_config.aco_voiceFrame])
df_vfs[r_config.aco_totVoiceFrame] = [total_frames]
df_vfs[r_config.aco_voicePct] = [voice_percentage]
df_vfs[
r_config.err_reason
] = "Pass" # will replace with threshold in future release
df_vfs["Frames"] = df_vfs.index
df_vfs["dbm_master_url"] = video_uri
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_vfs, out_loc, fl_name, vfs_dir, csv_ext)
return df_vfs
def empty_vfs(video_uri, out_loc, fl_name, r_config, save=True):
"""
Preparing empty VFS matrix if something fails
"""
cols = [
"Frames",
r_config.aco_voiceFrame,
r_config.aco_totVoiceFrame,
r_config.aco_voicePct,
r_config.err_reason,
]
out_val = [[np.nan, np.nan, np.nan, np.nan, error_txt]]
df_vfs = pd.DataFrame(out_val, columns=cols)
df_vfs["dbm_master_url"] = video_uri
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_vfs, out_loc, fl_name, vfs_dir, csv_ext)
return df_vfs
def run_vfs(video_uri, out_dir, r_config, save=True):
"""
Processing all participants for fetching voice frame score
---------------
---------------
Args:
video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output
"""
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
if len(aud_filter) > 0:
audio_file = aud_filter[0]
aud_dur = ut.get_length(audio_file)
if float(aud_dur) < 0.064:
logger.info(
"Output file {} size is less than 0.064sec".format(audio_file)
)
df = empty_vfs(video_uri, out_loc, fl_name, r_config, save=save)
else:
df = calc_vfs(
video_uri, audio_file, out_loc, fl_name, r_config, save=save
)
return df
except Exception as e:
e
logger.error("Failed to process audio file")

View File

@@ -0,0 +1,12 @@
"""
file_name: init
project_name: DBM
created: 2020-20-07
"""
from __future__ import absolute_import, division, print_function
from .eye_blink import run_eye_blink
from .eye_gaze import run_eye_gaze
from .facial_tremor import fac_tremor_process
from .head_motion import run_head_movement

View File

@@ -0,0 +1,195 @@
"""
file_name: eye_blink
project_name: DBM
created: 2020-20-07
"""
import logging
import os
import subprocess
import cv2
import dlib
import imutils
import numpy as np
import pandas as pd
from imutils import face_utils
from imutils.video import FileVideoStream
from scipy.signal import find_peaks
from scipy.spatial import distance as dist
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
movement_expr_dir = "movement/eye_blink"
csv_ext = "_eyeblinks.csv"
def get_length(filename):
result = subprocess.run(
[
"ffprobe",
"-v",
"error",
"-show_entries",
"format=duration",
"-of",
"default=noprint_wrappers=1:nokey=1",
filename,
],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
stdin=subprocess.DEVNULL,
)
return float(result.stdout)
def eye_aspect_ratio(eye):
"""
Computing eye aspect ratio for an individual frame
Args:
eye: Eye landmarks
Return:
Eye aspect ratio for a frame
"""
# euclidean distance for vertical eye landmarks
dist_cor1 = dist.euclidean(eye[1], eye[5])
dist_cor2 = dist.euclidean(eye[2], eye[4])
# euclidean distance for horizontal eye landmark
dist_cor3 = dist.euclidean(eye[0], eye[3])
ear = (dist_cor1 + dist_cor2) / (2.0 * dist_cor3)
return ear
def blink_detection(video_path, facial_landmarks, raw_config):
"""
Blink detection for each frame
Args:
video_path: MP4 file location
facial_landmarks: Facial landmark pre-trained model path
raw_config: Raw configuration file object
Return:
Dataframe with blink informatiom like blink frame, duration etc.
"""
tot_frame = 1
blink_frame = []
ear_frame = []
# clip = VideoFileClip(video_path, has_mask=True)
vid_length = get_length(video_path)
identifier = dlib.get_frontal_face_detector() # dlib's face detector (HOG-based)
forecaster = dlib.shape_predictor(facial_landmarks) # the facial landmark predictor
# left and right eye landmarks
(left_beg, left_end) = face_utils.FACIAL_LANDMARKS_IDXS["left_eye"]
(right_beg, right_end) = face_utils.FACIAL_LANDMARKS_IDXS["right_eye"]
f_stream = True
vid_stream = FileVideoStream(video_path).start()
while True:
try:
# check if stream/frame available in video
if f_stream and not vid_stream.more():
break
# reading & converting frame into grayscale
vid_frame = vid_stream.read()
vid_frame = imutils.resize(vid_frame, width=450)
gray = cv2.cvtColor(vid_frame, cv2.COLOR_BGR2GRAY)
# detecting face
rects = identifier(gray, 0)
for rect in rects:
lmk = forecaster(gray, rect)
lmk = face_utils.shape_to_np(lmk)
l_eye = lmk[left_beg:left_end] # Extracting left eye ratio
r_eye = lmk[right_beg:right_end] # Extracting right eye ratio
l_ear = eye_aspect_ratio(l_eye) # eye aspect ratio for left eye
r_ear = eye_aspect_ratio(r_eye) # eye aspect ratio for right eye
ear = (l_ear + r_ear) / 2.0 # average the eye aspect ratio
blink_frame.append(tot_frame)
ear_frame.append(ear)
tot_frame += 1
except Exception as e:
e
logger.info(
"blink detection processing finished in frame: {}".format(tot_frame - 1)
)
continue
vid_stream.stop()
blink_df = pd.DataFrame(ear_frame, columns=[raw_config.mov_blink_ear])
blink_df[raw_config.vid_dur] = vid_length
blink_df[raw_config.fps] = int(tot_frame / vid_length)
blink_df[raw_config.mov_blinkframes] = blink_frame
peaks, _ = find_peaks(
blink_df[raw_config.mov_blink_ear] * -1, prominence=0.1
) # prominence = 0.1 based on tuning
final_blink_df = blink_df.iloc[peaks, :].reset_index(drop=True)
u_blink_df = blink_dur(final_blink_df, raw_config)
u_blink_df["dbm_master_url"] = video_path
return u_blink_df
def blink_dur(blink_df, raw_config):
"""
Computing blink duration between each blink
Args:
blink_df : Dataframe with blink informatiom like blink frame
raw_config: Raw configuration file object
Returns:
Updated dataframe with blink duration
"""
if len(blink_df) > 0:
blink_df[raw_config.mov_blinkdur] = (
blink_df[raw_config.mov_blinkframes]
.diff()
.fillna(blink_df[raw_config.mov_blinkframes])
)
else:
blink_df[raw_config.mov_blinkdur] = np.nan
blink_df[raw_config.mov_blinkdur] = (
blink_df[raw_config.mov_blinkdur] / blink_df[raw_config.fps]
)
return blink_df
def run_eye_blink(video_uri, out_dir, r_config, facial_landmarks, save=True):
"""
Processing all patient's for getting eye blink artifacts
---------------
---------------
Args:
video_uri: video path; input_dir : input directory for video's
out_dir: (str) Output directory for processed output;
r_config: raw variable config object;
facial_landmarks: landmark model path
save: whether to save in csv or not
"""
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
vid_file_path = os.path.exists(video_uri)
if vid_file_path:
logger.info(
"Processing Output file {} ".format(os.path.join(out_loc, fl_name))
)
df_blink = blink_detection(video_uri, facial_landmarks, r_config)
if save:
ut.save_output(df_blink, out_loc, fl_name, movement_expr_dir, csv_ext)
return df_blink
except Exception as e:
logger.error(f"Failed to process video file: {e}")

View File

@@ -0,0 +1,182 @@
"""
file_name: eye_gaze
project_name: DBM
created: 2020-30-11
"""
import glob
import logging
import os
from os.path import join
import numpy as np
import pandas as pd
from scipy.spatial import distance
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
eye_pose_dir = "movement/gaze"
eye_pose_ext = "_eyegaze.csv"
def eye_motion_df(l_disp, r_disp, error_list, r_config):
"""
Generating eye movement dataframe
Args:
error_list:
l_disp: displacement list(left eye);
r_disp: displacement list(right eye)
r_config: raw variable config file object
Reutrns:
Final eye displacement dataframe
"""
df_eye_left = pd.DataFrame(l_disp, columns=[r_config.mov_eleft_disp])
df_eye_right = pd.DataFrame(r_disp, columns=[r_config.mov_eright_disp])
df_eye_motion = pd.concat([df_eye_left, df_eye_right], axis=1, sort=False)
df_eye_motion[r_config.err_reason] = error_list
return df_eye_motion
def filter_motion(df_of, df_disp, col_l, col_r, r_config):
"""
Filtering final eye movement dataframe
Args:
df_of: Openface raw out dataframe;
df_disp: displacement dataframe
col_r: right eye column
col_l: left eye column;
r_config: raw variable config file object
"""
df_of = df_of[col_l + col_r + [" confidence"]].copy()
df_of.loc[(df_of[" confidence"].astype(float) < 0.8), col_l + col_r] = np.nan
df_filter = df_of[col_l + col_r]
df_filter.columns = [
r_config.mov_leye_x,
r_config.mov_leye_y,
r_config.mov_leye_z,
r_config.mov_reye_x,
r_config.mov_reye_y,
r_config.mov_reye_z,
]
df_motion = pd.concat([df_filter, df_disp], axis=1, sort=False)
return df_motion
def eye_disp(of_results, col, r_config):
"""
Computing head velocity frame by frame
Args:
of_results: Openface raw out dataframe
col: col of eye_disp
r_config: Face config file object
Reutrns:
Final head velocity frame by frame output
"""
distance_list = []
error_list = []
of_results = of_results[col + [" confidence"]]
for index, row in of_results.iterrows():
dst = np.nan
if index == 0 or float(row[" confidence"]) < 0.8: # Threshold < 0.8
distance_list.append(dst)
if float(row[" confidence"]) < 0.8:
error_list.append("confidence less than 80%")
else:
error_list.append("Pass")
continue
if index > 0:
point_x = (
of_results[col[0]][index - 1],
of_results[col[1]][index - 1],
of_results[col[2]][index - 1],
)
point_y = (row[col[0]], row[col[1]], row[col[2]])
try:
dst = distance.euclidean(point_x, point_y)
except Exception as e:
logger.info("Exception on eye_disp method", e)
pass
distance_list.append(abs(dst))
error_list.append("Pass")
return distance_list, error_list
def calc_eye_mov(video_uri, df_of, out_loc, fl_name, r_config, save=True):
"""
Computing eye motion variables
Args:
video_uri: self explanatory
df_of: Openface dataframe
out_loc: Output path for saving output csv's
fl_name: file name for output csv
r_config: raw variable config file object
save: whether to save result to csv or not
"""
col_l = [" gaze_0_x", " gaze_0_y", " gaze_0_z"]
col_r = [" gaze_1_x", " gaze_1_y", " gaze_1_z"]
gazel_disp, err_l = eye_disp(df_of, col_l, r_config)
gazer_disp, err_r = eye_disp(df_of, col_r, r_config)
df_disp = eye_motion_df(gazel_disp, gazer_disp, err_l, r_config)
df_disp["dbm_master_url"] = video_uri
df_motion = filter_motion(df_of, df_disp, col_l, col_r, r_config)
if save:
ut.save_output(df_motion, out_loc, fl_name, eye_pose_dir, eye_pose_ext)
return df_motion
def run_eye_gaze(video_uri, out_dir, r_config, save=True):
"""
Processing all patient's for getting eye movement artifacts
--------------------------------
--------------------------------
Args:
video_uri: video path; input_dir : input directory for video's
out_dir: (str) Output directory for processed output;
r_config: raw variable config object
save: whether to save result to csv or not
"""
try:
# filtering path to generate input & output path
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
of_csv_path = glob.glob(join(out_loc, fl_name + "_openface/*.csv"))
if len(of_csv_path) > 0:
of_csv = of_csv_path[0]
df_of = pd.read_csv(of_csv)
logger.info(
"Processing Output file {} ".format(os.path.join(out_loc, fl_name))
)
df_motion = calc_eye_mov(
video_uri, df_of, out_loc, fl_name, r_config, save=save
)
return df_motion
except Exception as e:
logger.error("Failed to process video file", e)

View File

@@ -0,0 +1,194 @@
import glob
import json
import logging
import os
import pickle
import re
import sys
from os.path import join
import cv2
import numpy as np
import numpy.ma as ma
import pandas as pd
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
from ..util.math_util import calc_displacement_vec
DBMLIB_PATH = os.path.dirname(__file__)
DBMLIB_FTREMOR_CONFIG = os.path.abspath(
os.path.join(DBMLIB_PATH, "../../../../resources/features/facial/config.json")
)
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
ft_dir = "movement/facial_tremor"
csv_ext = "_fac_tremor.csv"
model_ext = "_fac_model.csv"
fac_features_ext = "_fac_features.csv"
def compute_features(out_dir, df_of, r_config):
"""Computes features
Returns: features in vector format
"""
config = json.loads(open(DBMLIB_FTREMOR_CONFIG, "r").read())
pattern_x = re.compile(r"l\d+_x")
pattern_y = re.compile(r"l\d+_y")
# assumption: distance of face to camera remains at roughly static
# logic break
landmark_columns = []
for col in df_of.columns:
if pattern_x.match(col) or pattern_y.match(col):
landmark_columns.append(col)
df_of = df_of[(df_of[landmark_columns] != 0).any(axis=1)]
df_of.reset_index(inplace=True)
num_frames = len(df_of)
logger.info("Number of frames to be processed: {}".format(str(num_frames)))
landmarks = config["landmarks"]
try:
if num_frames == 0:
error_reason = "No frames with visible face."
logger.error(error_reason)
return empty_frame(landmarks, r_config, error_reason)
# if num_frames < 60:
# error_reason = 'Number of frames with visible face < 60. Video too short'
# logger.error(error_reason)
# return empty_frame(landmarks, f_cfg, error_reason)
first_row = df_of.iloc[0]
facew = abs(
first_row[config["face_width_left"]] - first_row[config["face_width_right"]]
)
faceh = abs(
first_row[config["face_height_left"]]
- first_row[config["face_height_right"]]
)
if facew == 0 or faceh == 0:
error_reason = "face width or height = 0. Check landmark values"
logger.error(error_reason)
return empty_frame(landmarks, r_config)
fac_disp = calc_displacement_vec(df_of, landmarks, num_frames)
# if verbose:
# logger.info("Displacement output: {}".format(str(fac_disp)))
fac_disp_median = np.median(fac_disp, axis=1)
fac_disp_mean = np.mean(fac_disp, axis=1)
if len(fac_disp.shape) != 2:
error_reason = "fac_disp is not 2D. smth went wrong with disp calc"
logger.error(error_reason)
return empty_frame(landmarks, r_config, error_reason)
if len(fac_disp[0]) <= 1:
error_reason = "Video too short. smth went wrong with disp calc"
logger.error(error_reason)
return empty_frame(landmarks, r_config, error_reason)
fac_corr_mat = np.corrcoef(fac_disp, rowvar=True)
# extract relevant row from cov matrix
ref_lmk_index = [
i for i, lmk in enumerate(landmarks) if config["ref_lmk"] == lmk
]
fac_corr = fac_corr_mat[ref_lmk_index][0]
fac_area = config["ref_area"] / (facew * faceh)
# if verbose:
# logger.info("Face area: {}".format(fac_area))
# logger.info("Face Displacement Median: {}".format(str(fac_disp_median)))
# logger.info("Face Displacement Mean: {}".format(str(fac_disp_mean)))
fac_features1 = np.multiply(fac_area * fac_disp_median, (1.0 - fac_corr))
fac_features2 = np.multiply(fac_area * fac_disp_mean, (1.0 - fac_corr))
# base_fac_features = np.dot(fac_area * fac_disp_median, (1. - fac_corr))
fac_features_dict = {}
for i, landmark in enumerate(landmarks):
fac_features_dict["fac_features_mean_{}".format(landmark)] = [
fac_features2[i]
]
raw_variable_map = "fac_tremor_median_{}".format(landmark)
fac_features_dict[r_config.base_raw["raw_feature"][raw_variable_map]] = [
fac_features1[i]
]
fac_features_dict["fac_disp_median_{}".format(landmark)] = [
fac_disp_median[i]
]
fac_features_dict["fac_corr_{}".format(landmark)] = [fac_corr[i]]
fac_features_dict[r_config.err_reason] = [""]
data = pd.DataFrame.from_dict(fac_features_dict)
logger.info("Concluded computing tremor features")
return data
except Exception as e:
logger.error("Error computing tremor features: {}".format(str(e)))
return empty_frame(landmarks, r_config, str(e))
def empty_frame(landmarks, r_config, error_reason):
fac_features_dict = {}
for i, landmark in enumerate(landmarks):
raw_variable_map = "fac_tremor_median_{}".format(landmark)
fac_features_dict[r_config.base_raw["raw_feature"][raw_variable_map]] = [np.nan]
fac_features_dict["fac_features_mean_{}".format(landmark)] = [np.nan]
fac_features_dict["fac_disp_median_{}".format(landmark)] = [np.nan]
fac_features_dict["fac_corr_{}".format(landmark)] = [np.nan]
fac_features_dict[r_config.err_reason] = [error_reason]
empty_frame = pd.DataFrame.from_dict(fac_features_dict)
return empty_frame
def fac_tremor_process(video_uri, out_dir, r_config, model_output=False, save=True):
"""
processing input videos
"""
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
of_csv_path = glob.glob(join(out_loc, fl_name + "_openface_lmk/*_output.csv"))
if len(of_csv_path) > 0:
of_csv = of_csv_path[0]
df_of = pd.read_csv(of_csv)
logger.info(
"Processing Output file for facial_tremor {} ".format(
os.path.join(out_loc, fl_name)
)
)
feats = compute_features(of_csv_path, df_of, r_config)
# if model_output:
# result = score(feats, r_config)
# feats = pd.concat([feats, result], axis=1)
if save:
ut.save_output(feats, out_loc, fl_name, ft_dir, csv_ext)
return feats
except Exception as e:
logger.error("Failed to process video file for facial_tremor", str(e))

View File

@@ -0,0 +1,240 @@
"""
file_name: head_mov
project_name: DBM
created: 2020-20-07
"""
import glob
import logging
import os
from os.path import join
import numpy as np
import pandas as pd
from scipy.spatial import distance
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
h_mov_dir = "movement/head_movement"
h_pose_dir = "movement/head_pose"
h_mov_ext = "_headmov.csv"
h_pose_ext = "_headpose.csv"
def head_pose_dist(of_results):
"""
Computing head pose distance frame by frame
Args:
of_results: Openface raw out dataframe
Returns:
Final head pose distance frame by frame output
"""
distance_list = []
error_list = []
for index, row in of_results.iterrows():
dst = np.nan
if index == 0 or float(row[" confidence"]) < 0.2: # Threshold < 0.2
distance_list.append(dst)
if float(row[" confidence"]) < 0.2:
error_list.append("confidence less than 20%")
else:
error_list.append("Pass")
continue
if index > 0:
point_x = (
of_results[" pose_Rx"][index - 1],
of_results[" pose_Ry"][index - 1],
of_results[" pose_Rz"][index - 1],
)
point_y = (row[" pose_Rx"], row[" pose_Ry"], row[" pose_Rz"])
try:
dst = distance.euclidean(point_x, point_y)
except Exception as e:
logger.info("Exception met on head_pose_dist method", e)
pass
distance_list.append(abs(dst))
error_list.append("Pass")
return distance_list, error_list
def head_pose(of_results, r_config):
"""
Generating head pose estimation dataframe
Args:
of_results: openface results as dataframe
r_config: raw variable config file object
Returns:
Final head pose estimation dataframe
"""
pose_dist_list, error_list = head_pose_dist(of_results)
of_results = of_results.copy()
of_results.loc[
(of_results[" confidence"].astype(float) < 0.2),
[" pose_Rx", " pose_Ry", " pose_Rz"],
] = np.nan
pose_of = of_results[[" pose_Rx", " pose_Ry", " pose_Rz"]]
pose_of.columns = [
r_config.mov_Hpose_Pitch,
r_config.mov_Hpose_Yaw,
r_config.mov_Hpose_Roll,
]
pose_of = pose_of.copy()
pose_of[r_config.mov_Hpose_Dist] = pose_dist_list
pose_of[r_config.err_reason] = error_list
return pose_of
def head_motion_df(distance_val, error_list, r_config):
"""
Generating head movement dataframe
Args:
distance_val: distance list
error_list: Error reason
r_config: raw variable config file object
Returns:
Final head velocity dataframe
"""
head_motion = r_config.head_vel
df_head_motion = pd.DataFrame(distance_val, columns=[head_motion])
df_head_motion["Frames"] = df_head_motion.index
new_df_intensity = df_head_motion[["Frames", head_motion]].copy()
new_df_intensity[r_config.err_reason] = error_list
return new_df_intensity
def head_vel(of_results, r_config):
"""
Computing head velocity frame by frame
Args:
of_results: Openface raw out dataframe
r_config: Face config file object
Returns:
Final head velocity frame by frame output
"""
distance_list = []
error_list = []
for index, row in of_results.iterrows():
dst = np.nan
if index == 0 or float(row[" confidence"]) < 0.2: # Threshold < 0.2
distance_list.append(dst)
if float(row[" confidence"]) < 0.2:
error_list.append("confidence less than 20%")
else:
error_list.append("Pass")
continue
if index > 0:
point_x = (
of_results[" pose_Tx"][index - 1],
of_results[" pose_Ty"][index - 1],
of_results[" pose_Tz"][index - 1],
)
point_y = (row[" pose_Tx"], row[" pose_Ty"], row[" pose_Tz"])
try:
dst = distance.euclidean(point_x, point_y)
except Exception as e:
logger.info("Exception met on head_vel method", e)
pass
if abs(dst) > 200:
dst = np.nan
error_list.append("Out of range")
else:
error_list.append("Pass")
distance_list.append(dst)
df_velocity = head_motion_df(distance_list, error_list, r_config)
return df_velocity
def calc_head_mov(video_uri, df_of, out_loc, fl_name, r_config, save=True):
"""
Computing head motion and head pose variables
Args:
video_uri: video path
df_of: Openface dataframe
out_loc: Output path for saving output csv's
fl_name: file name for output csv
r_config: raw variable config file object
save: whether to save result to csv or not
"""
col = [
" confidence",
" pose_Rx",
" pose_Ry",
" pose_Rz",
" pose_Tx",
" pose_Ty",
" pose_Tz",
]
df_of = df_of[col]
df_hmotion = head_vel(df_of, r_config)
df_hmotion["dbm_master_url"] = video_uri
df_pose = head_pose(df_of, r_config)
df_pose["dbm_master_url"] = video_uri
if save:
ut.save_output(df_hmotion, out_loc, fl_name, h_mov_dir, h_mov_ext)
ut.save_output(df_pose, out_loc, fl_name, h_pose_dir, h_pose_ext)
df_mot = pd.concat([df_hmotion[["Frames", "mov_headvel"]], df_pose], axis=1)
return df_mot
def run_head_movement(video_uri, out_dir, r_config):
"""
Processing all patient's for getting movement artifacts for cdx_analysis workflow
--------------------------------
--------------------------------
Args:
video_uri: video path; input_dir : input directory for video's
out_dir: (str) Output directory for processed output;
r_config: raw variable config object
"""
try:
# filtering path to generate input & output path
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
of_csv_path = glob.glob(join(out_loc, fl_name + "_openface/*.csv"))
if len(of_csv_path) > 0:
of_csv = of_csv_path[0]
df_of = pd.read_csv(of_csv)
logger.info(
"Processing Output file {} ".format(os.path.join(out_loc, fl_name))
)
df_mot = calc_head_mov(video_uri, df_of, out_loc, fl_name, r_config)
return df_mot
except Exception as e:
logger.error("Failed to process video file", e)

View File

@@ -0,0 +1,136 @@
import glob
import json
import logging
import os
import re
from os.path import join
import numpy as np
import pandas as pd
import parselmouth
from parselmouth.praat import run_file
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
vt_dir = "movement/voice_tremor"
csv_ext = "_vtremor.csv"
DBMLIB_PATH = os.path.dirname(__file__)
DBMLIB_VTREMOR_LIB = os.path.abspath(
os.path.join(DBMLIB_PATH, "../../../../resources/libraries/voice_tremor.praat")
)
# Executing praat script using parselmouth function
def tremor_praat(snd_file, r_cfg):
"""
Generating Voice tremor endpoint dataframe
Args:
snd_file: (.wav) parsed audio file
r_cfg: Raw variable configuration file
Returns tremor endpoint dataframe
"""
snd = parselmouth.Sound(snd_file)
tremor_var = run_file(snd, DBMLIB_VTREMOR_LIB, capture_output=True)
new_tremor_var = re.sub("--undefined--", "0", tremor_var[1])
res = json.loads(new_tremor_var)
tremor_df = pd.DataFrame(
res,
index=[
"0",
],
)
tremor_df.columns = [
r_cfg.mov_freq_trem_freq,
r_cfg.mov_amp_trem_freq,
r_cfg.mov_freq_trem_index,
r_cfg.mov_amp_trem_index,
r_cfg.mov_freq_trem_pindex,
r_cfg.mov_amp_trem_pindex,
]
return tremor_df
def prepare_vtrem_output(audio_file, out_loc, r_config, fl_name, save=True):
"""
Preparing voice tremor matrix
Args:
audio_file: (.wav) parsed audio file ; r_config: raw config object
out_loc: (str) Output directory for csv ; fl_name: file name
r_config: Raw variable configuration file
fl_name: base filepath
save: whether to write results to csv or not
"""
df_tremor = tremor_praat(audio_file, r_config)
df_tremor[
r_config.err_reason
] = "Pass" # will replace with threshold in future release
if save:
logger.info("Processing Output file {} ".format(os.path.join(out_loc, fl_name)))
ut.save_output(df_tremor, out_loc, fl_name, vt_dir, csv_ext)
return df_tremor
def prepare_empty_vt(out_loc, fl_name, r_config, error_txt, save=True):
"""
Preparing empty voice tremor matrix
"""
cols = [
r_config.mov_freq_trem_freq,
r_config.mov_amp_trem_freq,
r_config.mov_freq_trem_index,
r_config.mov_amp_trem_index,
r_config.mov_freq_trem_pindex,
r_config.mov_amp_trem_pindex,
r_config.err_reason,
]
out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, error_txt]]
df_tremor = pd.DataFrame(out_val, columns=cols)
if save:
logger.info("Saving Output file {} ".format(os.path.join(out_loc, fl_name)))
ut.save_output(df_tremor, out_loc, fl_name, vt_dir, csv_ext)
return df_tremor
def run_vtremor(video_uri, out_dir, r_config, save=True):
"""
Processing all patient's for fetching Formant freq
---------------
---------------
Args:
video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output
r_config: Raw variable configuration file
save: whether to write results to csv or not
"""
try:
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
if len(aud_filter) > 0:
audio_file = aud_filter[0]
aud_dur = ut.get_length(audio_file)
if float(aud_dur) < 0.5:
logger.info(
"Output file {} size is less than 0.5sec".format(audio_file)
)
error_txt = "error: length less than 0.5 sec"
df_trem = prepare_empty_vt(video_uri, out_loc, fl_name, error_txt, save)
else:
df_trem = prepare_vtrem_output(
audio_file, out_loc, r_config, fl_name, save
)
return df_trem
except Exception as e:
logger.error("Failed to compute Voice Tremor {} for {}".format(e, video_uri))
prepare_empty_vt(out_loc, fl_name, r_config, e, save)

View File

@@ -0,0 +1,2 @@
from .speech_features import run_speech_feature
from .transcribe import run_transcribe

View File

@@ -0,0 +1,58 @@
"""
file_name: speech_features
project_name: DBM
created: 2020-13-11
"""
import glob
import logging
import os
import shutil
from os.path import join
import pandas as pd
from opendbm.dbm_lib.dbm_features.raw_features.util import nlp_util as n_util
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
speech_dir = "speech/speech_feature"
speech_ext = "_nlp.csv"
transcribe_ext = "speech/deepspeech/*_transcribe.csv"
def run_speech_feature(video_uri, out_dir, r_config, tran_tog, save=True):
"""
Processing all patient's for fetching nlp features
-------------------
-------------------
Args:
video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output
"""
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
transcribe_path = glob.glob(join(out_loc, transcribe_ext))
transcribe_df = pd.read_csv(transcribe_path[0])
df_speech = n_util.process_speech(transcribe_df, r_config)
if save:
logger.info("Saving Output file {} ".format(out_loc))
logger.info("filename {} ".format(fl_name))
ut.save_output(df_speech, out_loc, fl_name, speech_dir, speech_ext)
if (tran_tog is None) or (tran_tog != "on"):
if os.getcwd() == "/app": # docker version
shutil.rmtree(os.path.dirname(transcribe_path[0]))
else: # api_lib version
if fl_name.endswith("mp4"):
shutil.rmtree((out_dir + "/" + fl_name).replace("//", "/"))
else:
shutil.rmtree(
(out_dir + "/" + fl_name.strip(".mp4")).replace("//", "/")
)
return df_speech

View File

@@ -0,0 +1,94 @@
"""
file_name: transcribe
project_name: DBM
created: 2020-10-11
"""
import glob
import logging
from os.path import join
import numpy as np
import pandas as pd
from opendbm.dbm_lib.dbm_features.raw_features.util import nlp_util as n_util
from opendbm.dbm_lib.dbm_features.raw_features.util import util as ut
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
formant_dir = "speech/deepspeech"
csv_ext = "_transcribe.csv"
error_txt = "error: length less than 0.1"
def calc_transcribe(
video_uri, audio_file, out_loc, fl_name, r_config, deep_path, aud_dur, save=True
):
"""
Preparing Formant freq matrix
Args:
audio_file: (.wav) parsed audio file; fl_name: input file name
out_loc: (str) Output directory; r_config: raw variable config
"""
text = n_util.process_deepspeech(audio_file, deep_path)
df_formant = pd.DataFrame([text], columns=[r_config.nlp_transcribe])
df_formant.replace("", np.nan, regex=True, inplace=True)
df_formant[r_config.nlp_totalTime] = aud_dur
df_formant[
r_config.err_reason
] = "Pass" # will replace with threshold in future release
df_formant["dbm_master_url"] = video_uri
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_formant, out_loc, fl_name, formant_dir, csv_ext)
return df_formant
def empty_transcribe(video_uri, out_loc, fl_name, r_config, save=True):
"""
Preparing empty formant frequency matrix if something fails
"""
cols = [r_config.nlp_transcribe, r_config.nlp_totalTime, r_config.err_reason]
out_val = [[np.nan, np.nan, error_txt]]
df_fm = pd.DataFrame(out_val, columns=cols)
df_fm["dbm_master_url"] = video_uri
if save:
logger.info("Saving Output file {} ".format(out_loc))
ut.save_output(df_fm, out_loc, fl_name, formant_dir, csv_ext)
return df_fm
def run_transcribe(video_uri, out_dir, r_config, deep_path, save=True):
"""
Processing all patient's for fetching Formant freq
---------------
---------------
Args:
video_uri: video path; r_config: raw variable config object
out_dir: (str) Output directory for processed output;
deep_path: deepspeech build path
"""
input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
aud_filter = glob.glob(join(input_loc, fl_name + ".wav"))
if len(aud_filter) > 0:
audio_file = aud_filter[0]
aud_dur = ut.get_length(audio_file)
if float(aud_dur) < 0.1:
logger.info("Output file {} size is less than 0.1 sec".format(audio_file))
df = empty_transcribe(video_uri, out_loc, fl_name, r_config)
return df
df = calc_transcribe(
video_uri, audio_file, out_loc, fl_name, r_config, deep_path, aud_dur
)
return df

View File

@@ -4,9 +4,13 @@ project_name: cdx_analysis
created: 2019-03-16
author: Deshana Desai
"""
import sys, os, glob, cv2
import pandas as pd
import glob
import os
import sys
import cv2
import numpy as np
import pandas as pd
def euclidean_distance(point1, point2):
@@ -14,7 +18,7 @@ def euclidean_distance(point1, point2):
Compute euclidean distance between points
"""
return np.sqrt((point1[0] - point2[0])**2 + (point1[1] - point2[1])**2)
return np.sqrt((point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2)
# def detect_peaks()
@@ -25,8 +29,7 @@ def expand_landmarks(landmarks):
util method to expand landmark list:
eg: [1,2] -> [['l1_x', 'l1_y'], ['l2_x', 'l2_y']]
"""
return [['l{}_x'.format(l), 'l{}_y'.format(l)] for l in landmarks]
return [["l{}_x".format(point), "l{}_y".format(point)] for point in landmarks]
def calc_displacement_vec(df, landmarks, num_frames):
@@ -44,13 +47,12 @@ def calc_displacement_vec(df, landmarks, num_frames):
first_row = df.iloc[0]
prev_point[j] = (first_row[pair[0]], first_row[pair[1]])
for i in range(num_frames):
frame_row = df.iloc[i]
for j, pair in enumerate(landmarks):
x, y = pair[0], pair[1]
current = (frame_row[x], frame_row[y])
deviation = euclidean_distance( current, prev_point[j])
deviation = euclidean_distance(current, prev_point[j])
disp_vec[j][i] = deviation
prev_point[j] = current

View File

@@ -0,0 +1,273 @@
"""
file_name: nlp_util
project_name: DBM
created: 2020-10-11
"""
import json
import logging
import os
import re
import subprocess
import nltk
import numpy as np
import pandas as pd
from lexicalrichness import LexicalRichness
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
# Speech to text using Deepspeech 0.9.1
def deepspeech(AUDIO_FILE, deep_path):
"""
Extracting text from audio using Deep Speech neural network trained model
Returns:
Text: text which is extracted from audio
"""
api = "deepspeech"
arg_speech0 = "--model"
arg_speech_path0 = os.path.join(deep_path, "deepspeech-0.9.1-models.pbmm")
arg_speech1 = "--scorer"
arg_speech_path1 = os.path.join(deep_path, "deepspeech-0.9.1-models.scorer")
arg_audio = "--audio"
out = subprocess.Popen(
[
api,
arg_speech0,
arg_speech_path0,
arg_speech1,
arg_speech_path1,
arg_audio,
AUDIO_FILE,
],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
)
logger.info("Deepspeech output...... {}".format(out))
try:
stdout, stderr = out.communicate()
except:
return "error", "error"
# print(stderr)
return stdout, stderr
def deep_speech_output_clean(result):
"""
Parsing deep speech output(text)
Return:
Text from speech
"""
text = ""
if len(result) > 0:
res_split = str(result[0]).split("\\n")
if len(res_split) > 0:
for i in range(len(res_split)):
if "Inference took" in res_split[i]:
text = res_split[i + 1]
return text
return text
def process_deepspeech(audio_file, deep_path):
"""
Transcribing audio to extract text from speech
"""
deep_output = deepspeech(audio_file, deep_path)
deep_text = deep_speech_output_clean(deep_output)
return deep_text
def nltk_download():
try:
nltk.data.find("tokenizers/punkt")
except LookupError:
logger.info("punkt is not available")
nltk.download("punkt")
try:
nltk.data.find("averaged_perceptron_tagger")
except LookupError:
logger.info("averaged_perceptron_tagger is not available")
nltk.download("averaged_perceptron_tagger")
def empty_speech(r_config, master_url, error_txt):
"""
Preparing empty speech matrix with error
Args:
r_config: raw config file object
error_txt: Error message during transcription
Returns:
Empty dataframe for speech features with error
"""
col = [
r_config.nlp_numSentences,
r_config.nlp_singPronPerAns,
r_config.nlp_singPronPerSen,
r_config.nlp_pastTensePerAns,
r_config.nlp_pastTensePerSen,
r_config.nlp_pronounsPerAns,
r_config.nlp_pronounsPerSen,
r_config.nlp_verbsPerAns,
r_config.nlp_verbsPerSen,
r_config.nlp_adjectivesPerAns,
r_config.nlp_adjectivesPerSen,
r_config.nlp_nounsPerAns,
r_config.nlp_nounsPerSen,
r_config.nlp_sentiment_mean,
r_config.nlp_mattr,
r_config.nlp_wordsPerMin,
r_config.nlp_totalTime,
r_config.err_reason,
]
df_speech = pd.DataFrame([[np.nan] * len(col) + [error_txt]], columns=col)
df_speech["dbm_master_url"] = master_url
return df_speech
def divide_var(speech_var1, spech_var2):
"""
divide variables
"""
speech_var = np.nan
if spech_var2 != 0:
speech_var = speech_var1 / spech_var2
return speech_var
def process_speech(transcribe_df, r_config):
"""
Preparing speech features
Args:
transcribe_df: Transcribed dataframe
r_config: raw config file object
Returns:
Dataframe for speech features
"""
transcribe_df = transcribe_df.replace(np.nan, "", regex=True)
err_transcribe = transcribe_df[r_config.err_reason].iloc[0]
transcribe = transcribe_df[r_config.nlp_transcribe].iloc[0]
total_time = transcribe_df[r_config.nlp_totalTime].iloc[0]
master_url = transcribe_df["dbm_master_url"].iloc[0]
# clean transcribe
transcribe = transcribe.replace(",", "")
transcribe = " ".join(re.findall(r"[\w']+|[.!?]", transcribe))
if err_transcribe != "Pass":
df_speech = empty_speech(r_config, master_url, "error")
return df_speech
speech_dict = {}
nltk_download()
sentences = nltk.tokenize.sent_tokenize(transcribe)
words_all = nltk.tokenize.word_tokenize(transcribe)
num_sentences = len(sentences)
speech_dict[r_config.nlp_numSentences] = num_sentences
# nlp_singPron
i_s = transcribe.count("I")
me_s = transcribe.count("me")
my_s = transcribe.count("my")
sing_count = i_s + me_s + my_s
speech_dict[r_config.nlp_singPronPerAns] = (
sing_count if len(words_all) > 0 else np.nan
)
speech_dict[r_config.nlp_singPronPerSen] = divide_var(
speech_dict[r_config.nlp_singPronPerAns], num_sentences
)
tagged = nltk.pos_tag(transcribe.split())
tagged_df = pd.DataFrame(tagged, columns=["word", "pos_tag"])
# Past tense per answer
all_POSs = tagged_df["pos_tag"].tolist()
speech_dict[r_config.nlp_pastTensePerAns] = (
all_POSs.count("VBD") if len(words_all) > 0 else np.nan
)
speech_dict[r_config.nlp_pastTensePerSen] = divide_var(
speech_dict[r_config.nlp_pastTensePerAns], num_sentences
)
# Pronoun per answer
pronounsPerAns = all_POSs.count("PRP") + all_POSs.count("PRP$")
speech_dict[r_config.nlp_pronounsPerAns] = (
pronounsPerAns if len(words_all) > 0 else np.nan
)
speech_dict[r_config.nlp_pronounsPerSen] = divide_var(
speech_dict[r_config.nlp_pronounsPerAns], num_sentences
)
# Verb per answer
verbPerAns = (
all_POSs.count("VB")
+ all_POSs.count("VBD")
+ all_POSs.count("VBG")
+ all_POSs.count("VBN")
+ all_POSs.count("VBP")
+ all_POSs.count("VBZ")
)
speech_dict[r_config.nlp_verbsPerAns] = verbPerAns if len(words_all) > 0 else np.nan
speech_dict[r_config.nlp_verbsPerSen] = divide_var(
speech_dict[r_config.nlp_verbsPerAns], num_sentences
)
# Adjective per answer
adjectivesAns = all_POSs.count("JJ") + all_POSs.count("JJR") + all_POSs.count("JJS")
speech_dict[r_config.nlp_adjectivesPerAns] = (
adjectivesAns if len(words_all) > 0 else np.nan
)
speech_dict[r_config.nlp_adjectivesPerSen] = divide_var(
speech_dict[r_config.nlp_adjectivesPerAns], num_sentences
)
# Noun per answer
nounsAns = all_POSs.count("NN") + all_POSs.count("NNP") + all_POSs.count("NNS")
speech_dict[r_config.nlp_nounsPerAns] = nounsAns if len(words_all) > 0 else np.nan
speech_dict[r_config.nlp_nounsPerSen] = divide_var(
speech_dict[r_config.nlp_nounsPerAns], num_sentences
)
# Sentiment analysis
vader = SentimentIntensityAnalyzer()
sentence_valences = []
for s in sentences:
sentiment_dict = vader.polarity_scores(s)
sentence_valences.append(sentiment_dict["compound"])
speech_dict[r_config.nlp_sentiment_mean] = (
np.mean(sentence_valences) if len(sentence_valences) > 0 else np.nan
)
non_punc = list(value for value in words_all if value not in [".", "!", "?"])
non_punc_as_str = " ".join(str(non_punc))
lex = LexicalRichness(non_punc_as_str)
speech_dict[r_config.nlp_mattr] = (
lex.mattr(window_size=lex.words) if lex.words > 0 else np.nan
)
# Number of words per minute
speech_dict[r_config.nlp_wordsPerMin] = divide_var(len(non_punc), total_time) * 60
speech_dict[r_config.nlp_totalTime] = total_time
speech_dict["dbm_master_url"] = master_url
df_speech = pd.DataFrame([speech_dict])
return df_speech

View File

@@ -0,0 +1,183 @@
"""
file_name: util
project_name: DBM
created: 2020-20-07
"""
import glob
import os
import subprocess
import more_itertools as mit
import numpy as np
import pandas as pd
def get_length(filename):
result = subprocess.run(
[
"ffprobe",
"-v",
"error",
"-show_entries",
"format=duration",
"-of",
"default=noprint_wrappers=1:nokey=1",
filename,
],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
stdin=subprocess.DEVNULL,
)
return float(result.stdout)
def process_segment_pitch(ff_df, r_config):
voice_label = ff_df[r_config.aco_voiceLabel]
indices_yes = [i for i, x in enumerate(voice_label) if x == "yes"]
voiced_yes = [list(group) for group in mit.consecutive_groups(indices_yes)]
indices_no = [i for i, x in enumerate(voice_label) if x == "no"]
voiced_no = [list(group) for group in mit.consecutive_groups(indices_no)]
com_speech = voiced_yes + voiced_no
com_speech_sort = sorted(com_speech, key=lambda x: x[0])
return com_speech_sort, voiced_yes, voiced_no
def segment_pitch(dir_path, r_config, ff_df=None):
"""
segmenting pitch freq for each voice segment
"""
com_speech_sort, voiced_yes, voiced_no = ([],) * 3
for file in os.listdir(dir_path):
try:
if file.endswith("_pitch.csv") and ff_df is None:
ff_df = pd.read_csv((dir_path + "/" + file))
com_speech_sort, voiced_yes, voiced_no
except:
pass
return com_speech_sort, voiced_yes, voiced_no
def filter_path(video_url, out_dir):
"""
Filtering video uri path to prepare input and ouptut location
Args:
video_url: S3 bucket path for video
out_dir: Output directory path
"""
fl_name, _ = os.path.splitext(os.path.basename(video_url))
input_loc = os.path.dirname(video_url)
out_loc = os.path.join(out_dir, fl_name)
return input_loc, out_loc, fl_name
def save_output(df, out_loc, fl_name, f_dir, f_ext):
"""
creating output directory for Audio features
Args:
df: (dataframe) feature dataframe[ex: Formant freq, pitch]
out_loc: (dir) Output location where we want to save raw output
fl_name: file name
f_dir: directory name for a feature
f_ext: extension for a feature [ex: '_pose.csv']
"""
full_f_name = fl_name + f_ext
dir_path = os.path.join(out_loc, f_dir)
if not os.path.exists(dir_path):
os.makedirs(dir_path)
sav_path = os.path.join(dir_path, full_f_name)
df.to_csv(sav_path, index=False)
def audio_process(base_dir, video_url):
"""
Parsing cleaned audio files(Audio files without IMA voice)
Args:
base_dir: Base path for raw data
video_url: Raw video file path
"""
new_video_url = base_dir + "/".join(video_url[2:])
split_val = new_video_url.split("/")
wav_path = "/".join(split_val[0 : len(split_val) - 1])
audio_split_check = glob.glob(wav_path + "/*_split.wav")
return audio_split_check
def compute_open_face_features(
input_filepath,
output_directory,
open_face_executable,
au_static=False,
tracked_visualization=False,
clobber=False,
verbose=True,
):
"""
Runs OpenFace on an input video.
See https://github.com/TadasBaltrusaitis/OpenFace/wiki/Command-line-arguments
Args:
input_filepath:
output_directory:
au_static:
tracked_visualization:
open_face_executable:
clobber: (bool) if True existing files will be overwritten
verbose:
Returns:
(str) path to output csv file
Raises:
IOError if OpenFace executable is missing
"""
if not os.path.isfile(open_face_executable):
raise IOError(
"OpenFace executable {} could not be found.".format(open_face_executable)
)
bn, _ = os.path.splitext(os.path.basename(input_filepath))
if not output_directory:
output_directory = os.path.join(
os.path.dirname(input_filepath), bn + "_openface"
)
output_csv = os.path.join(output_directory, bn + ".csv")
if not os.path.isfile(output_csv) or clobber:
call = [
open_face_executable,
]
if au_static:
call += [
"-au_static",
]
if tracked_visualization:
call += [
"-tracked",
]
call += ["-q", "-2Dfp", "-3Dfp", "-pdmparams", "-pose", "-aus", "-gaze"]
call += ["-f", input_filepath, "-out_dir", output_directory]
if verbose:
print(
"Computing OpenFace features {} from video file".format(input_filepath)
)
subprocess.check_output(call)
if verbose:
print("OpenFace features saved to {}".format(output_directory))
else:
if verbose:
print("Output file {} already exists".format(output_csv))
return os.path.join(output_directory, bn + ".csv")

View File

@@ -10,11 +10,12 @@ import contextlib
import sys
import wave
def read_wave(path):
"""Reads a .wav file.
Takes the path, and returns (PCM audio data, sample rate).
"""
with contextlib.closing(wave.open(path, 'rb')) as wf:
with contextlib.closing(wave.open(path, "rb")) as wf:
num_channels = wf.getnchannels()
assert num_channels == 1
sample_width = wf.getsampwidth()
@@ -27,11 +28,13 @@ def read_wave(path):
class Frame(object):
"""Represents a "frame" of audio data."""
def __init__(self, bytes, timestamp, duration):
self.bytes = bytes
self.timestamp = timestamp
self.duration = duration
def frame_generator(frame_duration_ms, audio, sample_rate):
"""Generates audio frames from PCM audio data.
Takes the desired frame duration in milliseconds, the PCM data, and
@@ -43,13 +46,12 @@ def frame_generator(frame_duration_ms, audio, sample_rate):
timestamp = 0.0
duration = (float(n) / sample_rate) / 2.0
while offset + n < len(audio):
yield Frame(audio[offset:offset + n], timestamp, duration)
yield Frame(audio[offset : offset + n], timestamp, duration)
timestamp += duration
offset += n
def vad_collector(sample_rate, frame_duration_ms,
padding_duration_ms, vad, frames):
def vad_collector(sample_rate, frame_duration_ms, padding_duration_ms, vad, frames):
"""Filters out non-voiced audio frames.
Given a webrtcvad.Vad and a source of audio frames, yields only
the voiced audio.
@@ -80,7 +82,7 @@ def vad_collector(sample_rate, frame_duration_ms,
for frame in frames:
is_speech = vad.is_speech(frame.bytes, sample_rate)
sys.stdout.write('1' if is_speech else '0')
sys.stdout.write("1" if is_speech else "0")
if not triggered:
ring_buffer.append((frame, is_speech))
num_voiced = len([f for f, speech in ring_buffer if speech])
@@ -89,7 +91,7 @@ def vad_collector(sample_rate, frame_duration_ms,
# TRIGGERED state.
if num_voiced > 0.9 * ring_buffer.maxlen:
triggered = True
sys.stdout.write('+(%s)' % (ring_buffer[0][0].timestamp,))
sys.stdout.write("+(%s)" % (ring_buffer[0][0].timestamp,))
# We want to yield all the audio we see from now until
# we are NOTTRIGGERED, but we have to start with the
# audio that's already in the ring buffer.
@@ -106,23 +108,23 @@ def vad_collector(sample_rate, frame_duration_ms,
# unvoiced, then enter NOTTRIGGERED and yield whatever
# audio we've collected.
if num_unvoiced > 0.9 * ring_buffer.maxlen:
sys.stdout.write('-(%s)' % (frame.timestamp + frame.duration))
sys.stdout.write("-(%s)" % (frame.timestamp + frame.duration))
triggered = False
yield b''.join([f.bytes for f in voiced_frames])
yield b"".join([f.bytes for f in voiced_frames])
ring_buffer.clear()
voiced_frames = []
if triggered: # BT if were in triggered state at end of signal, set output time
sys.stdout.write('-(%s)' % (frame.timestamp + frame.duration))
sys.stdout.write('\n')
sys.stdout.write("-(%s)" % (frame.timestamp + frame.duration))
sys.stdout.write("\n")
# If we have any leftover voiced audio when we run out of input,
# yield it.
if voiced_frames:
yield b''.join([f.bytes for f in voiced_frames])
yield b"".join([f.bytes for f in voiced_frames])
def vad_get_segment_times(sample_rate, frame_duration_ms,
padding_duration_ms, vad, frames):
def vad_get_segment_times(
sample_rate, frame_duration_ms, padding_duration_ms, vad, frames
):
"""Filters out non-voiced audio frames.
BT: based on vad_collector, but returns start and end times for voiced segs
@@ -158,7 +160,7 @@ def vad_get_segment_times(sample_rate, frame_duration_ms,
for frame in frames:
is_speech = vad.is_speech(frame.bytes, sample_rate)
#sys.stdout.write('1' if is_speech else '0')
# sys.stdout.write('1' if is_speech else '0')
if not triggered:
ring_buffer.append((frame, is_speech))
num_voiced = len([f for f, speech in ring_buffer if speech])
@@ -167,7 +169,7 @@ def vad_get_segment_times(sample_rate, frame_duration_ms,
# TRIGGERED state.
if num_voiced > 0.9 * ring_buffer.maxlen:
triggered = True
#sys.stdout.write('+(%s)' % (ring_buffer[0][0].timestamp,))
# sys.stdout.write('+(%s)' % (ring_buffer[0][0].timestamp,))
start_times.append(ring_buffer[0][0].timestamp) # BT
ring_buffer.clear()
else:
@@ -179,23 +181,23 @@ def vad_get_segment_times(sample_rate, frame_duration_ms,
# unvoiced, then enter NOTTRIGGERED and yield whatever
# audio we've collected.
if num_unvoiced > 0.9 * ring_buffer.maxlen:
#sys.stdout.write('-(%s)' % (frame.timestamp + frame.duration))
# sys.stdout.write('-(%s)' % (frame.timestamp + frame.duration))
end_times.append(ring_buffer[0][0].timestamp + frame.duration) # BT
triggered = False
if triggered: # BT if were in triggered state at end of signal, set output time
#sys.stdout.write('-(%s)' % (frame.timestamp + frame.duration))
if len(ring_buffer)>0:
end_times.append(ring_buffer[0][0].timestamp ) # BT
# sys.stdout.write('-(%s)' % (frame.timestamp + frame.duration))
if len(ring_buffer) > 0:
end_times.append(ring_buffer[0][0].timestamp) # BT
else:
# only get here in very rare case that we triggered on 2nd-to-last frame
end_times.append(frame.timestamp + frame.duration)
#sys.stdout.write('\n')
# sys.stdout.write('\n')
return(start_times, end_times)
return (start_times, end_times)
def filter_seg_times(seg_starts, seg_ends, pad_at_start = 0.5, len_to_keep=2.5 ):
def filter_seg_times(seg_starts, seg_ends, pad_at_start=0.5, len_to_keep=2.5):
"""
do some filtering on the segments found to select part for analysis
rule: find the first segment that is at least (pad_at_start+len_to_keep sec long.
@@ -210,12 +212,14 @@ def filter_seg_times(seg_starts, seg_ends, pad_at_start = 0.5, len_to_keep=2.5 )
not_found = True
for iseg in range(len(seg_starts)):
seg_dur = seg_ends[iseg]-seg_starts[iseg]
if (not_found & (seg_dur > (pad_at_start + len_to_keep))):
seg_dur = seg_ends[iseg] - seg_starts[iseg]
if not_found & (seg_dur > (pad_at_start + len_to_keep)):
t_start = seg_starts[iseg] + pad_at_start
sel_start.append(t_start)
sel_end.append(t_start + len_to_keep)
sel_end_longer.append(max(t_start + len_to_keep, seg_ends[iseg]-pad_at_start))
sel_end_longer.append(
max(t_start + len_to_keep, seg_ends[iseg] - pad_at_start)
)
not_found = False
return sel_start, sel_end, sel_end_longer
return sel_start, sel_end, sel_end_longer

Some files were not shown because too many files have changed in this diff Show More