diff --git a/dbm_lib/config/config_raw_feature.py b/dbm_lib/config/config_raw_feature.py index 5494e21d..e0878dcf 100644 --- a/dbm_lib/config/config_raw_feature.py +++ b/dbm_lib/config/config_raw_feature.py @@ -17,7 +17,7 @@ class ConfigRawReader(object): Args: feature_config_yml (None, optional): yml file defined service configuration """ - + if feature_config_yml is None: feature_config = DBMLIB_FEATURE_CONFIG else: @@ -25,15 +25,15 @@ class ConfigRawReader(object): with open(feature_config, 'r') as ymlfile: config = yaml.load(ymlfile) - + #Verbal features self.base_raw = config self.err_reason = config['raw_feature']['error_reason'] - + #Output range self.mov_headvel_start = config['raw_feature']['mov_headvel_start'] self.mov_headvel_end = config['raw_feature']['mov_headvel_end'] - + #Acoustic variable self.aco_int = config['raw_feature']['aco_int'] self.aco_ff = config['raw_feature']['aco_ff'] @@ -210,7 +210,7 @@ class ConfigRawReader(object): self.fac_AsymMaskEye = config['raw_feature']['fac_AsymMaskEye'] self.fac_AsymMaskEyebrow = config['raw_feature']['fac_AsymMaskEyebrow'] self.fac_AsymMaskCom = config['raw_feature']['fac_AsymMaskCom'] - + #Movement features self.head_vel = config['raw_feature']['head_vel'] self.mov_blink_ear = config['raw_feature']['mov_blink_ear'] @@ -222,4 +222,18 @@ class ConfigRawReader(object): self.mov_Hpose_Yaw = config['raw_feature']['mov_Hpose_Yaw'] self.mov_Hpose_Roll = config['raw_feature']['mov_Hpose_Roll'] self.mov_Hpose_Dist = config['raw_feature']['mov_Hpose_Dist'] - \ No newline at end of file + self.mov_freq_trem_freq = config['raw_feature']['mov_freq_trem_freq'] + self.mov_freq_trem_index = config['raw_feature']['mov_freq_trem_index'] + self.mov_freq_trem_pindex = config['raw_feature']['mov_freq_trem_pindex'] + self.mov_amp_trem_freq = config['raw_feature']['mov_amp_trem_freq'] + self.mov_amp_trem_index = config['raw_feature']['mov_amp_trem_index'] + self.mov_amp_trem_pindex = config['raw_feature']['mov_amp_trem_pindex'] + self.fac_tremor_median_5 = config['raw_feature']['fac_tremor_median_5'] + self.fac_tremor_median_12 = config['raw_feature']['fac_tremor_median_12'] + self.fac_tremor_median_8 = config['raw_feature']['fac_tremor_median_8'] + self.fac_tremor_median_48 = config['raw_feature']['fac_tremor_median_48'] + self.fac_tremor_median_54 = config['raw_feature']['fac_tremor_median_54'] + self.fac_tremor_median_28 = config['raw_feature']['fac_tremor_median_28'] + self.fac_tremor_median_51 = config['raw_feature']['fac_tremor_median_51'] + self.fac_tremor_median_66 = config['raw_feature']['fac_tremor_median_66'] + self.fac_tremor_median_57 = config['raw_feature']['fac_tremor_median_57'] diff --git a/dbm_lib/controller/process_feature.py b/dbm_lib/controller/process_feature.py index 3282edc2..f5112da2 100644 --- a/dbm_lib/controller/process_feature.py +++ b/dbm_lib/controller/process_feature.py @@ -7,7 +7,7 @@ created: 2020-20-07 from dbm_lib.dbm_features.raw_features.audio import intensity, pitch_freq, hnr, gne, voice_frame_score, formant_freq from dbm_lib.dbm_features.raw_features.audio import pause_segment, jitter, shimmer, mfcc from dbm_lib.dbm_features.raw_features.video import face_asymmetry, face_au, face_emotion_expressivity, face_landmark -from dbm_lib.dbm_features.raw_features.movement import head_motion, eye_blink +from dbm_lib.dbm_features.raw_features.movement import head_motion, eye_blink, voice_tremor import subprocess import logging @@ -19,13 +19,13 @@ logging.basicConfig(level=logging.INFO) logger=logging.getLogger() def audio_to_wav(input_filepath): - """ Extracts a video's audio file and saves it to wav + """ Extracts a video's audio file and saves it to wav Args: - input_filepath: (str) + input_filepath: (str) Returns: """ try: - + fname, _ = splitext(input_filepath) output_filepath = fname + '.wav' @@ -37,7 +37,7 @@ def audio_to_wav(input_filepath): logger.info('wav output saved in {}'.format(output_filepath)) else: logger.info('Output file {} already exists'.format(output_filepath)) - + except Exception as e: logger.error('Failed to extract audio from Video') @@ -50,38 +50,41 @@ def process_acoustic(video_uri, out_dir, dbm_group, r_config): """ if dbm_group != None and len(dbm_group)>0 and 'acoustic' not in dbm_group: return - + logger.info('Processing acoustic variables from data in {}'.format(video_uri)) logger.info('processing audio intensity....') intensity.run_intensity(video_uri, out_dir, r_config) - + logger.info('processing audio pitch freq....') pitch_freq.run_pitch(video_uri, out_dir, r_config) - + logger.info('processing HNR....') hnr.run_hnr(video_uri, out_dir, r_config) - + logger.info('processing GNE....') gne.run_gne(video_uri, out_dir, r_config) - + logger.info('processing voice frame score....') voice_frame_score.run_vfs(video_uri, out_dir, r_config) - + logger.info('processing formant frequency....') formant_freq.run_formant(video_uri, out_dir, r_config) - + logger.info('processing pause segment....') pause_segment.run_pause_segment(video_uri, out_dir, r_config) - + logger.info('processing jitter....') jitter.run_jitter(video_uri, out_dir, r_config) - + logger.info('processing shimmer....') shimmer.run_shimmer(video_uri, out_dir, r_config) - + logger.info('processing mfcc....') mfcc.run_mfcc(video_uri, out_dir, r_config) - + + logger.info('processing voice tremor....') + voice_tremor.run_vtremor(video_uri, out_dir, r_config) + def process_facial(video_uri, out_dir, dbm_group, r_config): """ processing facial features @@ -91,20 +94,20 @@ def process_facial(video_uri, out_dir, dbm_group, r_config): """ if dbm_group != None and len(dbm_group)>0 and 'facial' not in dbm_group: return - + logger.info('Processing facial variables from data in {}'.format(video_uri)) logger.info('processing facial asymmetry....') face_asymmetry.run_face_asymmetry(video_uri, out_dir, r_config) - + logger.info('processing facial Action Unit....') face_au.run_face_au(video_uri, out_dir, r_config) - + logger.info('processing facial expressivity....') face_emotion_expressivity.run_face_expressivity(video_uri, out_dir, r_config) - + logger.info('processing facial landmark....') face_landmark.run_face_landmark(video_uri, out_dir, r_config) - + def process_movement(video_uri, out_dir, dbm_group, r_config, dlib_model): """ processing facial features @@ -115,23 +118,24 @@ def process_movement(video_uri, out_dir, dbm_group, r_config, dlib_model): """ if dbm_group != None and len(dbm_group)>0 and 'movement' not in dbm_group: return - + logger.info('Processing movement variables from data in {}'.format(video_uri)) logger.info('processing head movement....') head_motion.run_head_movement(video_uri, out_dir, r_config) - + logger.info('processing eye blink....') eye_blink.run_eye_blink(video_uri, out_dir, r_config, dlib_model) - + + logger.info('processing voice tremor....') + voice_tremor.run_vtremor(video_uri, out_dir, r_config) + def remove_file(file_path): """ - removing wav file + removing wav file """ file_dir = dirname(file_path) file_name, _ = splitext(basename(file_path)) wav_file = glob.glob(join(file_dir, file_name + '.wav')) - + if len(wav_file)> 0: os.remove(wav_file[0]) - - \ No newline at end of file diff --git a/dbm_lib/dbm_features/raw_features/movement/__init__.py b/dbm_lib/dbm_features/raw_features/movement/__init__.py index f2a2d4b9..dca2f624 100644 --- a/dbm_lib/dbm_features/raw_features/movement/__init__.py +++ b/dbm_lib/dbm_features/raw_features/movement/__init__.py @@ -11,3 +11,5 @@ from __future__ import print_function import os DBMLIB_PATH = os.path.dirname(__file__) +DBMLIB_VTREMOR_LIB = os.path.abspath(os.path.join(DBMLIB_PATH, + '../../../../resources/libraries/voice_tremor.praat')) diff --git a/dbm_lib/dbm_features/raw_features/movement/voice_tremor.py b/dbm_lib/dbm_features/raw_features/movement/voice_tremor.py new file mode 100644 index 00000000..52f9c398 --- /dev/null +++ b/dbm_lib/dbm_features/raw_features/movement/voice_tremor.py @@ -0,0 +1,93 @@ +import pandas as pd +import os +import glob +from os.path import join +import parselmouth +from parselmouth.praat import call, run_file +import numpy as np +import librosa +import json +import re +import logging + +from dbm_lib.dbm_features.raw_features.util import util as ut +from dbm_lib.dbm_features.raw_features.movement import DBMLIB_VTREMOR_LIB + +logging.basicConfig(level=logging.INFO) +logger=logging.getLogger() + +vt_dir = 'movement/voice_tremor' +csv_ext = '_vtremor.csv' + +#Executing praat script using parselmouth function +def tremor_praat(snd_file,r_cfg): + """ + Generating Voice tremor endpoint dataframe + Args: + snd_file: (.wav) parsed audio file + r_cfg: Raw variable configuration file + Returns tremor endpoint dataframe + """ + snd = parselmouth.Sound(snd_file) + tremor_var = run_file(snd,DBMLIB_VTREMOR_LIB,capture_output=True) + new_tremor_var = re.sub('--undefined--', '0', tremor_var[1]) + res = json.loads(new_tremor_var) + tremor_df = pd.DataFrame(res,index=['0',]) + tremor_df.columns = [r_cfg.mov_freq_trem_freq,r_cfg.mov_amp_trem_freq,r_cfg.mov_freq_trem_index, + r_cfg.mov_amp_trem_index,r_cfg.mov_freq_trem_pindex,r_cfg.mov_amp_trem_pindex] + return tremor_df + +def prepare_vtrem_output(audio_file, out_loc, r_config, fl_name): + """ + Preparing voice tremor matrix + Args: + audio_file: (.wav) parsed audio file ; r_config: raw config object + out_loc: (str) Output directory for csv ; fl_name: file name + """ + df_tremor = tremor_praat(audio_file, r_config) + df_tremor[r_config.err_reason] = 'Pass'# will replace with threshold in future release + + logger.info('Processing Output file {} '.format(out_loc)) + ut.save_output(df_tremor, out_loc, fl_name, vt_dir, csv_ext) + +def prepare_empty_vt(out_loc, fl_name, r_config, error_txt): + + """ + Preparing empty voice tremor matrix + """ + cols = [r_config.mov_freq_trem_freq, r_config.mov_amp_trem_freq, r_config.mov_freq_trem_index, + r_config.mov_amp_trem_index, r_config.mov_freq_trem_pindex, r_config.mov_amp_trem_pindex, r_config.err_reason] + + out_val = [[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, error_txt]] + df_tremor = pd.DataFrame(out_val, columns = cols) + + logger.info('Saving Output file {} '.format(out_loc)) + ut.save_output(df_tremor, out_loc, fl_name, vt_dir, csv_ext) + +def run_vtremor(video_uri, out_dir, r_config): + """ + Processing all patient's for fetching Formant freq + --------------- + --------------- + Args: + video_uri: video path; r_config: raw variable config object + out_dir: (str) Output directory for processed output + """ + try: + input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir) + aud_filter = glob.glob(join(input_loc, fl_name + '.wav')) + + if len(aud_filter)>0: + + audio_file = aud_filter[0] + aud_dur = librosa.get_duration(filename=audio_file) + + if float(aud_dur) < 0.064: + logger.info('Output file {} size is less than 0.064sec'.format(audio_file)) + + prepare_empty_vt(video_uri, out_loc, fl_name, r_config) + return + prepare_vtrem_output(audio_file, out_loc, r_config, fl_name) + except Exception as e: + logger.error('Failed to compute Voice Tremor {} for {}'.format(e,video_uri)) + prepare_empty_vt(out_loc, fl_name, r_config, e) diff --git a/process_data.py b/process_data.py index 0283f9f2..31f85fdb 100644 --- a/process_data.py +++ b/process_data.py @@ -33,11 +33,10 @@ def common_video(video_file, args, r_config): out_path = os.path.join(args.output_path, 'raw_variables') pf.audio_to_wav(video_file) of.process_open_face(video_file, os.path.dirname(video_file), out_path, OPENFACE_PATH, args.dbm_group) - pf.process_facial(video_file, out_path, args.dbm_group, r_config) pf.process_acoustic(video_file, out_path, args.dbm_group, r_config) - pf.remove_file(video_file) pf.process_movement(video_file, out_path, args.dbm_group, r_config, DLIB_SHAPE_MODEL) + pf.remove_file(video_file) def process_raw_video_file(args, s_config, r_config): """ @@ -57,7 +56,7 @@ def process_raw_video_file(args, s_config, r_config): else: logger.info('Enter correct video(*.mp4) file path.') - + except Exception as e: logger.error('Failed to process mp4 file.') pf.remove_file(video_file[0]) @@ -76,15 +75,15 @@ def process_raw_audio_file(args, s_config, r_config): if len(audio_file)>0: logger.info('Calculating raw variables...') - + out_path = os.path.join(args.output_path, 'raw_variables') pf.process_acoustic(audio_file[0], out_path, args.dbm_group, r_config) - + else: logger.info('Enter correct audio(*.wav) file path.') except Exception as e: logger.error('Failed to process wav file.') - + def process_raw_video_dir(args, s_config, r_config): """ Processing video file @@ -95,16 +94,15 @@ def process_raw_video_dir(args, s_config, r_config): """ if args.output_path != None: vid_loc = glob.glob(args.input_path + '/*.mp4') - + if len(vid_loc) == 0: logger.info('Directory does not have any MP4 files.') return - + logger.info('Calculating raw variables...') for vid_file in vid_loc: try: - - common_video(vid_file, args, r_config) + common_video(vid_file, args, r_config) except Exception as e: logger.error('Failed to process mp4 file.') pf.remove_file(vid_file) @@ -119,20 +117,20 @@ def process_raw_audio_dir(args, s_config, r_config): """ if args.output_path != None: audio_loc = glob.glob(args.input_path + '/*.wav') - + if len(audio_loc) == 0: logger.info('Directory does not have any WAV files.') return - + logger.info('Calculating raw variables...') for audio in audio_loc: try: - + out_path = os.path.join(args.output_path, 'raw_variables') pf.process_acoustic(audio, out_path, args.dbm_group, r_config) except Exception as e: logger.error('Failed to process wav file.') - + def process_derive(args, r_config, d_config, input_type): """ Processing dbm derived variables @@ -141,28 +139,28 @@ def process_derive(args, r_config, d_config, input_type): input_file = glob.glob(args.input_path) else: input_file = glob.glob(args.input_path + '/*') - + out_raw_path = os.path.join(args.output_path, 'raw_variables') out_derive_path = os.path.join(args.output_path, 'derived_variables') - + logger.info('Calculating derived variables...') feature_df = der.run_derive(input_file, out_raw_path, out_derive_path, r_config, d_config) - + if __name__=="__main__": start_time = time.time() parser = argparse.ArgumentParser(description="Process video/audio......") - + parser.add_argument("--input_path", help="path to the input files", required=True) parser.add_argument("--output_path", help="path to the raw and derived variable output", required=True) parser.add_argument("--dbm_group", help="list of feature groups", nargs='+') - + args = parser.parse_args() s_config = config_reader.ConfigReader() r_config = config_raw_feature.ConfigRawReader() d_config = config_derive_feature.ConfigDeriveReader() - + _, file_ext = os.path.splitext(os.path.basename(args.input_path)) - + if file_ext: input_type = 'file' if file_ext.lower() == '.mp4': @@ -170,14 +168,14 @@ if __name__=="__main__": elif file_ext.lower() == '.wav': process_raw_audio_file(args, s_config, r_config) - + else: logger.error('No WAV or MP4 files detected in input path') else: input_type = 'dir' process_raw_video_dir(args, s_config, r_config) process_raw_audio_dir(args, s_config, r_config) - + process_derive(args, r_config, d_config, input_type) exec_time = time.time() - start_time - logger.info('Done! Processing time: {} seconds'.format(exec_time)) \ No newline at end of file + logger.info('Done! Processing time: {} seconds'.format(exec_time)) diff --git a/process_dbm.sh b/process_dbm.sh index cad71b7f..424154bf 100644 --- a/process_dbm.sh +++ b/process_dbm.sh @@ -4,9 +4,9 @@ helpFunction() { echo "" echo "Usage: $0 --input_path parameterA --output_path parameterB --dbm_group parameterC" - echo -e "\t--input_path Description of what is parameterA" - echo -e "\t--output_path Description of what is parameterB" - echo -e "\t-dbm_group Description of what is parameterC" + echo -e "\t--input_path: path to the input files" + echo -e "\t--output_path: path to the raw and derived variable output" + echo -e "\t--dbm_group: list of feature groups" exit 1 # Exit script after printing help } @@ -72,4 +72,4 @@ docker cp dbm_container:/app/output $output_path docker stop dbm_container docker rm dbm_container -exit \ No newline at end of file +exit diff --git a/resources/features/derived_feature.yml b/resources/features/derived_feature.yml index a0759d79..43db29dc 100644 --- a/resources/features/derived_feature.yml +++ b/resources/features/derived_feature.yml @@ -1,9 +1,9 @@ derive_feature: - + #DBM Feature Group - FEATURE_GROUP: ['FAC_ASYM', 'FAC_AU', 'FAC_EXP', 'FAC_LMK', 'ACO_INT', 'ACO_FF', 'ACO_HNR', 'ACO_GNE', 'ACO_FM', - 'ACO_JITTER','ACO_SHIMMER', 'ACO_PAUSE', 'ACO_VFS', 'ACO_MFCC', 'MOV_HM', 'MOV_HP', 'EYE_BLINK'] - + FEATURE_GROUP: ['FAC_ASYM', 'FAC_AU', 'FAC_EXP', 'FAC_LMK', 'ACO_INT', 'ACO_FF', 'ACO_HNR', 'ACO_GNE', 'ACO_FM', + 'ACO_JITTER','ACO_SHIMMER', 'ACO_PAUSE', 'ACO_VFS', 'ACO_MFCC', 'MOV_HM', 'MOV_HP', 'EYE_BLINK', 'MOV_VT'] + #Feature group output file extensions FAC_ASYM_LOC: _facasym FAC_AU_LOC: _facau @@ -22,31 +22,33 @@ derive_feature: MOV_HM_LOC: _headmov MOV_HP_LOC: _headpose EYE_BLINK_LOC: _eyeblinks - + MOV_VT_LOC: _vtremor + + #Facial category feature group FAC_ASYM: ['fac_AsymMaskMouth', 'fac_AsymMaskEyebrow', 'fac_AsymMaskEye', 'fac_AsymMaskCom'] - FAC_AU: ['fac_AU01int', 'fac_AU02int', 'fac_AU04int', 'fac_AU05int', 'fac_AU06int', 'fac_AU07int', 'fac_AU09int', - 'fac_AU10int', 'fac_AU12int', 'fac_AU14int', 'fac_AU15int', 'fac_AU17int', 'fac_AU20int', 'fac_AU23int', - 'fac_AU25int', 'fac_AU26int', 'fac_AU45int', 'fac_AU01pres', 'fac_AU02pres', 'fac_AU04pres', 'fac_AU05pres', - 'fac_AU06pres', 'fac_AU07pres', 'fac_AU09pres', 'fac_AU10pres', 'fac_AU12pres', 'fac_AU14pres', 'fac_AU15pres', + FAC_AU: ['fac_AU01int', 'fac_AU02int', 'fac_AU04int', 'fac_AU05int', 'fac_AU06int', 'fac_AU07int', 'fac_AU09int', + 'fac_AU10int', 'fac_AU12int', 'fac_AU14int', 'fac_AU15int', 'fac_AU17int', 'fac_AU20int', 'fac_AU23int', + 'fac_AU25int', 'fac_AU26int', 'fac_AU45int', 'fac_AU01pres', 'fac_AU02pres', 'fac_AU04pres', 'fac_AU05pres', + 'fac_AU06pres', 'fac_AU07pres', 'fac_AU09pres', 'fac_AU10pres', 'fac_AU12pres', 'fac_AU14pres', 'fac_AU15pres', 'fac_AU17pres', 'fac_AU20pres', 'fac_AU23pres', 'fac_AU25pres', 'fac_AU26pres', 'fac_AU28pres', 'fac_AU45pres'] FAC_EXP: ['hap_exp', 'sad_exp', 'sur_exp', 'fea_exp', 'ang_exp', 'dis_exp', 'con_exp', 'happ_occ', 'sad_occ', 'sur_occ', 'fea_occ', 'ang_occ', 'dis_occ', 'con_occ', 'pos_exp', 'neg_exp', 'com_exp', 'hap_exp_full', 'sad_exp_full', 'sur_exp_full','fea_exp_full', 'ang_exp_full', 'dis_exp_full', 'con_exp_full', 'pos_exp_full', 'neg_exp_full', 'com_exp_full'] - FAC_LMK: ['fac_LMK00disp', 'fac_LMK01disp', 'fac_LMK02disp', 'fac_LMK03disp', 'fac_LMK04disp', - 'fac_LMK05disp', 'fac_LMK06disp', 'fac_LMK07disp', 'fac_LMK08disp', 'fac_LMK09disp', 'fac_LMK10disp', - 'fac_LMK11disp', 'fac_LMK12disp', 'fac_LMK13disp', 'fac_LMK14disp', 'fac_LMK15disp', 'fac_LMK16disp', - 'fac_LMK17disp', 'fac_LMK18disp', 'fac_LMK19disp', 'fac_LMK20disp', 'fac_LMK21disp', 'fac_LMK22disp', - 'fac_LMK23disp', 'fac_LMK24disp', 'fac_LMK25disp', 'fac_LMK26disp', 'fac_LMK27disp', 'fac_LMK28disp', - 'fac_LMK29disp', 'fac_LMK30disp', 'fac_LMK31disp', 'fac_LMK32disp', 'fac_LMK33disp', 'fac_LMK34disp', - 'fac_LMK35disp', 'fac_LMK36disp', 'fac_LMK37disp', 'fac_LMK38disp', 'fac_LMK39disp', 'fac_LMK40disp', - 'fac_LMK41disp', 'fac_LMK42disp', 'fac_LMK43disp', 'fac_LMK44disp', 'fac_LMK45disp', 'fac_LMK46disp', - 'fac_LMK47disp', 'fac_LMK48disp', 'fac_LMK49disp', 'fac_LMK50disp', 'fac_LMK51disp', 'fac_LMK52disp', - 'fac_LMK53disp', 'fac_LMK54disp', 'fac_LMK55disp', 'fac_LMK56disp', 'fac_LMK57disp', 'fac_LMK58disp', - 'fac_LMK59disp', 'fac_LMK60disp', 'fac_LMK61disp', 'fac_LMK62disp', 'fac_LMK63disp', 'fac_LMK64disp', + FAC_LMK: ['fac_LMK00disp', 'fac_LMK01disp', 'fac_LMK02disp', 'fac_LMK03disp', 'fac_LMK04disp', + 'fac_LMK05disp', 'fac_LMK06disp', 'fac_LMK07disp', 'fac_LMK08disp', 'fac_LMK09disp', 'fac_LMK10disp', + 'fac_LMK11disp', 'fac_LMK12disp', 'fac_LMK13disp', 'fac_LMK14disp', 'fac_LMK15disp', 'fac_LMK16disp', + 'fac_LMK17disp', 'fac_LMK18disp', 'fac_LMK19disp', 'fac_LMK20disp', 'fac_LMK21disp', 'fac_LMK22disp', + 'fac_LMK23disp', 'fac_LMK24disp', 'fac_LMK25disp', 'fac_LMK26disp', 'fac_LMK27disp', 'fac_LMK28disp', + 'fac_LMK29disp', 'fac_LMK30disp', 'fac_LMK31disp', 'fac_LMK32disp', 'fac_LMK33disp', 'fac_LMK34disp', + 'fac_LMK35disp', 'fac_LMK36disp', 'fac_LMK37disp', 'fac_LMK38disp', 'fac_LMK39disp', 'fac_LMK40disp', + 'fac_LMK41disp', 'fac_LMK42disp', 'fac_LMK43disp', 'fac_LMK44disp', 'fac_LMK45disp', 'fac_LMK46disp', + 'fac_LMK47disp', 'fac_LMK48disp', 'fac_LMK49disp', 'fac_LMK50disp', 'fac_LMK51disp', 'fac_LMK52disp', + 'fac_LMK53disp', 'fac_LMK54disp', 'fac_LMK55disp', 'fac_LMK56disp', 'fac_LMK57disp', 'fac_LMK58disp', + 'fac_LMK59disp', 'fac_LMK60disp', 'fac_LMK61disp', 'fac_LMK62disp', 'fac_LMK63disp', 'fac_LMK64disp', 'fac_LMK65disp', 'fac_LMK66disp', 'fac_LMK67disp'] - + #Acoustic category feature group ACO_INT: ['aco_int'] ACO_FF: ['aco_ff'] @@ -59,19 +61,21 @@ derive_feature: ACO_VFS: ['aco_voicePct'] ACO_MFCC: ['aco_mfcc1','aco_mfcc2','aco_mfcc3','aco_mfcc4','aco_mfcc5','aco_mfcc6','aco_mfcc7','aco_mfcc8','aco_mfcc9', 'aco_mfcc10','aco_mfcc11','aco_mfcc12'] - + #Movement category feature group MOV_HM: ['head_vel'] MOV_HP: ['mov_Hpose_Dist','mov_Hpose_Pitch','mov_Hpose_Yaw','mov_Hpose_Roll'] EYE_BLINK: ['mov_blink_ear', 'vid_dur', 'mov_blinkdur'] - + MOV_VT: ['mov_freq_trem_freq', 'mov_freq_trem_index', 'mov_freq_trem_pindex', 'mov_amp_trem_freq', + 'mov_amp_trem_index', 'mov_amp_trem_pindex'] + #Calculation for variables # Facial Asymmetry fac_AsymMaskMouth: ['mean', 'std'] fac_AsymMaskEyebrow: ['mean', 'std'] fac_AsymMaskEye: ['mean', 'std'] fac_AsymMaskCom: ['mean', 'std'] - + #Facial Action Unit fac_AU01int: ['mean', 'std'] fac_AU02int: ['mean', 'std'] @@ -103,12 +107,12 @@ derive_feature: fac_AU15pres: ['pct'] fac_AU17pres: ['pct'] fac_AU20pres: ['pct'] - fac_AU23pres: ['pct'] + fac_AU23pres: ['pct'] fac_AU25pres: ['pct'] fac_AU26pres: ['pct'] fac_AU28pres: ['pct'] fac_AU45pres: ['pct'] - + #Facial Expressivity hap_exp: ['mean', 'std'] sad_exp: ['mean', 'std'] @@ -129,7 +133,7 @@ derive_feature: neu_exp: ['mean', 'std', 'pct'] com_exp: ['mean', 'std', 'pct'] hap_exp_full: ['mean', 'std'] - sad_exp_full: ['mean', 'std'] + sad_exp_full: ['mean', 'std'] sur_exp_full: ['mean', 'std'] fea_exp_full: ['mean', 'std'] ang_exp_full: ['mean', 'std'] @@ -139,7 +143,7 @@ derive_feature: neg_exp_full: ['mean', 'std'] neu_exp_full: ['mean', 'std'] com_exp_full: ['mean', 'std'] - + #Facial Landmarks fac_LMK00disp: ['mean', 'std'] fac_LMK01disp: ['mean', 'std'] @@ -151,7 +155,7 @@ derive_feature: fac_LMK07disp: ['mean', 'std'] fac_LMK08disp: ['mean', 'std'] fac_LMK09disp: ['mean', 'std'] - fac_LMK10disp: ['mean', 'std'] + fac_LMK10disp: ['mean', 'std'] fac_LMK11disp: ['mean', 'std'] fac_LMK12disp: ['mean', 'std'] fac_LMK13disp: ['mean', 'std'] @@ -163,22 +167,22 @@ derive_feature: fac_LMK19disp: ['mean', 'std'] fac_LMK20disp: ['mean', 'std'] fac_LMK21disp: ['mean', 'std'] - fac_LMK22disp: ['mean', 'std'] + fac_LMK22disp: ['mean', 'std'] fac_LMK23disp: ['mean', 'std'] fac_LMK24disp: ['mean', 'std'] fac_LMK25disp: ['mean', 'std'] fac_LMK26disp: ['mean', 'std'] fac_LMK27disp: ['mean', 'std'] fac_LMK28disp: ['mean', 'std'] - fac_LMK29disp: ['mean', 'std'] + fac_LMK29disp: ['mean', 'std'] fac_LMK30disp: ['mean', 'std'] fac_LMK31disp: ['mean', 'std'] fac_LMK32disp: ['mean', 'std'] fac_LMK33disp: ['mean', 'std'] - fac_LMK34disp: ['mean', 'std'] + fac_LMK34disp: ['mean', 'std'] fac_LMK35disp: ['mean', 'std'] fac_LMK36disp: ['mean', 'std'] - fac_LMK37disp: ['mean', 'std'] + fac_LMK37disp: ['mean', 'std'] fac_LMK38disp: ['mean', 'std'] fac_LMK39disp: ['mean', 'std'] fac_LMK40disp: ['mean', 'std'] @@ -193,7 +197,7 @@ derive_feature: fac_LMK49disp: ['mean', 'std'] fac_LMK50disp: ['mean', 'std'] fac_LMK51disp: ['mean', 'std'] - fac_LMK52disp: ['mean', 'std'] + fac_LMK52disp: ['mean', 'std'] fac_LMK53disp: ['mean', 'std'] fac_LMK54disp: ['mean', 'std'] fac_LMK55disp: ['mean', 'std'] @@ -209,7 +213,7 @@ derive_feature: fac_LMK65disp: ['mean', 'std'] fac_LMK66disp: ['mean', 'std'] fac_LMK67disp: ['mean', 'std'] - + #Acoustic feature aco_int: ['mean', 'std', 'range'] aco_ff: ['mean', 'std', 'range'] @@ -238,7 +242,7 @@ derive_feature: aco_mfcc10: ['mean'] aco_mfcc11: ['mean'] aco_mfcc12: ['mean'] - + #Movement feature head_vel: ['mean', 'std'] mov_Hpose_Dist: ['mean', 'std'] @@ -248,3 +252,9 @@ derive_feature: mov_blink_ear: ['mean', 'std'] vid_dur: ['count'] mov_blinkdur: ['mean', 'std'] + mov_freq_trem_freq: ['mean'] + mov_freq_trem_index: ['mean'] + mov_freq_trem_pindex: ['mean'] + mov_amp_trem_freq: ['mean'] + mov_amp_trem_index: ['mean'] + mov_amp_trem_pindex: ['mean'] diff --git a/resources/features/raw_feature.yml b/resources/features/raw_feature.yml index 982bf631..fc61ff99 100644 --- a/resources/features/raw_feature.yml +++ b/resources/features/raw_feature.yml @@ -1,11 +1,11 @@ raw_feature: #error reason error_reason: error_reason - + #Output range mov_headvel_start: 0 mov_headvel_end: 200 - + #Facial markers hap_exp: fac_hapintsoft sad_exp: fac_sadintsoft @@ -38,13 +38,13 @@ raw_feature: neu_exp_full: neu_exp_full cai_exp_full: cai_exp_full com_exp_full: fac_cominthard - + #Facial asymmetry fac_AsymMaskMouth: fac_asymmaskmouth fac_AsymMaskEye: fac_asymmaskeye fac_AsymMaskEyebrow: fac_asymmaskeyebrow fac_AsymMaskCom: fac_asymmaskcom - + #Facial landmark fac_LMK00disp: fac_LMK00disp fac_LMK01disp: fac_LMK01disp @@ -114,7 +114,7 @@ raw_feature: fac_LMK65disp: fac_LMK65disp fac_LMK66disp: fac_LMK66disp fac_LMK67disp: fac_LMK67disp - + #Facial action unit fac_AU01int: fac_AU01int fac_AU02int: fac_AU02int @@ -151,7 +151,7 @@ raw_feature: fac_AU26pres: fac_AU26pres fac_AU28pres: fac_AU28pres fac_AU45pres: fac_AU45pres - + #Verbal markers aco_int: aco_int aco_ff: aco_ff @@ -184,7 +184,7 @@ raw_feature: aco_speakingtime: aco_speakingtime aco_numpauses: aco_numpauses aco_pausefrac: aco_pausefrac - + #Movement markers head_vel: mov_headvel mov_blink_ear: mov_blink_ear @@ -196,3 +196,18 @@ raw_feature: mov_Hpose_Yaw: mov_hposeyaw mov_Hpose_Roll: mov_hposeroll mov_Hpose_Dist: mov_hposedist + mov_freq_trem_freq: mov_freqtremfreq + mov_freq_trem_index: mov_freqtremindex + mov_freq_trem_pindex: mov_freqtrempindex + mov_amp_trem_freq: mov_amptremfreq + mov_amp_trem_index: mov_amptremindex + mov_amp_trem_pindex: mov_amptrempindex + fac_tremor_median_5: fac_tremor_median_5 + fac_tremor_median_12: fac_tremor_median_12 + fac_tremor_median_8: fac_tremor_median_8 + fac_tremor_median_48: fac_tremor_median_48 + fac_tremor_median_54: fac_tremor_median_54 + fac_tremor_median_28: fac_tremor_median_28 + fac_tremor_median_51: fac_tremor_median_51 + fac_tremor_median_66: fac_tremor_median_66 + fac_tremor_median_57: fac_tremor_median_57 diff --git a/resources/libraries/voice_tremor.praat b/resources/libraries/voice_tremor.praat new file mode 100644 index 00000000..efeca086 --- /dev/null +++ b/resources/libraries/voice_tremor.praat @@ -0,0 +1,422 @@ + +###################################### +# Global Settings +###################################### +sourcedirec$ = "./"; directory of sounds to be analyzed +minPi = 60; minimal Pitch [Hz] +maxPi = 350; maximal Pitch [Hz] +ts = 0.015; analysis time step [s] +tremthresh = 0.15; minimal autocorr.-coefficient to assume "tremor" +minTr = 1.5; minimal tremor frequency [Hz] +maxTr = 15; maximal tremor frequency [Hz] + + + +###################################### +# Sound (.wav) in, results (.txt) out +###################################### + +# record/load and select the sound to be analyzed!!! + +info$ = Info +name$ = extractWord$(info$, "Object name: ") + +slength = Get total duration +call ftrem +call atrem + +echo +...{"FTrF": 'ftrf:2#', "ATrF":'atrf:2',"FTrI":'ftri:3',"ATrI":'atri:3',"FTrP":'ftrp:3',"ATrP":'atrp:3'} + + + +###################################### +# Frequency Tremor Analysis +###################################### +procedure ftrem + To Pitch (cc)... ts minPi 15 yes 0.03 0.3 0.01 0.35 0.14 maxPi + +#Edit +#pause + +# because PRAAT only runs "Subtract linear fit" if the last frame is "voiceless" (!?): +# numberOfFrames+1 (1) + numberOfFrames = Get number of frames + x1 = Get time from frame number... 1 + am_F0 = Get mean... 0 0 Hertz + + Create Matrix... ftrem_0 0 slength numberOfFrames+1 ts x1 1 1 1 1 1 0 + for i from 1 to numberOfFrames + select Pitch 'name$' + f0 = Get value in frame... i Hertz + select Matrix ftrem_0 +# write zeros to matrix where frames are voiceless + if f0 = undefined + Set value... 1 i 0 + else + Set value... 1 i f0 + endif + endfor + +# remove the linear F0 trend (F0 declination) + To Pitch + Subtract linear fit... Hertz + Rename... ftrem_0_lin + +# undo (1) + Create Matrix... ftrem 0 slength numberOfFrames ts x1 1 1 1 1 1 0 + for i from 1 to numberOfFrames + select Pitch ftrem_0_lin + f0 = Get value in frame... i Hertz + select Matrix ftrem +# write zeros to matrix where frames are voiceless + if f0 = undefined + Set value... 1 i 0 + else + Set value... 1 i f0 + endif + endfor + + To Pitch + +# normalize F0-contour by mean F0 + select Matrix ftrem + Formula... (self-am_F0)/am_F0 + +# since zeros in the Matrix (unvoiced frames) become normalized to -1 but +# unvoiced frames should be zero (if anything) +# write zeros to matrix where frames are voiceless + for i from 1 to numberOfFrames + select Pitch ftrem + f0 = Get value in frame... i Hertz + if f0 = undefined + select Matrix ftrem + Set value... 1 i 0 + endif + endfor + +# to calculate autocorrelation (cc-method): + select Matrix ftrem + To Sound (slice)... 1 +# calculate Frequency of Frequency Tremor [Hz] + To Pitch (cc)... slength minTr 15 yes 0.01 tremthresh 0.01 0.35 0.14 maxTr + Rename... ftrem_norm + + ftrf = Get mean... 0 0 Hertz + +# calculate Intensity Index of Frequency Tremor [%] + select Sound ftrem + plus Pitch ftrem_norm + To PointProcess (peaks)... yes no + Rename... Maxima + numberofMaxPoints = Get number of points + ftri_max = 0 + noFMax = 0 + for iPoint from 1 to numberofMaxPoints + select PointProcess Maxima + ti = Get time from index... iPoint + select Sound ftrem + ftri_Point = Get value at time... Average ti Sinc70 + if ftri_Point = undefined + ftri_Point = 0 + noFMax += 1 + endif + ftri_max += abs(ftri_Point) + endfor + +select Sound ftrem +plus PointProcess Maxima +#Edit +#pause + +# ftri_max:= (mean) procentual deviation of F0-maxima from mean F0 at ftrf + numberofMaxima = numberofMaxPoints - noFMax + ftri_max = 100 * ftri_max/numberofMaxima + + select Sound ftrem + plus Pitch ftrem_norm + To PointProcess (peaks)... no yes + Rename... Minima + numberofMinPoints = Get number of points + ftri_min = 0 + noFMin = 0 + for iPoint from 1 to numberofMinPoints + select PointProcess Minima + ti = Get time from index... iPoint + select Sound ftrem + ftri_Point = Get value at time... Average ti Sinc70 + if ftri_Point = undefined + ftri_Point = 0 + noFMin += 1 + endif + ftri_min += abs(ftri_Point) + endfor + +select Sound ftrem +plus PointProcess Minima +#Edit +#pause + + +# ftri_min:= (mean) procentual deviation of F0-minima from mean F0 at ftrf + numberofMinima = numberofMinPoints - noFMin + ftri_min = 100 * ftri_min/numberofMinima + + ftri = (ftri_max + ftri_min) / 2 + + ftrp = ftri * ftrf/(ftrf+1) + +# uncomment to inspect frequnecy tremor objects: +# pause + + select Pitch ftrem +# uncomment if only frequency tremor is to be analyzed: +# plus Pitch 'name$' + plus Matrix ftrem_0 + plus Pitch ftrem_0 + plus Pitch ftrem_0_lin + plus Matrix ftrem + plus Sound ftrem + plus Pitch ftrem_norm + plus PointProcess Maxima + plus PointProcess Minima + Remove + +endproc + + +###################################### +# Amplitude Tremor Analysis +###################################### +procedure atrem + select Sound 'name$' +# uncomment if only amplitude tremor is to be analyzed: +# To Pitch (cc)... ts minPi 15 yes 0.03 0.3 0.01 0.35 0.14 maxPi +# select Sound 'name$' + plus Pitch 'name$' + To PointProcess (cc) + select Sound 'name$' + plus PointProcess 'name$'_'name$' + +# amplitudes are integrals of intensity over periods -- not intensity maxima + To AmplitudeTier (period)... 0 0 0.0001 0.02 1.7 + +#Edit +#pause + +# from here on out: prepare to autocorrelate AmplitudeTier-data +# sample AmplitudeTier at (constant) rate ts + numbOfAmpPoints = Get number of points + first_ampP = Get time from index... 1 + last_ampP = Get time from index... numbOfAmpPoints + +# to be able to -- automatically -- read Amp. values... + Down to TableOfReal + + select Pitch 'name$' + frameNo1 = Get frame number from time... first_ampP + hiframe1 = ceiling(frameNo1) + t_hiframe1 = Get time from frame number... hiframe1 + + frameNoN = Get frame number from time... last_ampP + loframeN = floor(frameNoN) + +# number of Amp. points if (re-)sampled at ts + numbOfPoints_neu = loframeN - hiframe1 + 1 + +# to enable autocorrelation of the Amp.-contour: ->Matrix->Sound + + Create Matrix... atrem_nlc 0 slength numbOfPoints_neu+1 ts t_hiframe1 1 1 1 1 1 2 +# get the mean of the amplitude contour in time windows of constant duration + for point_neu from 1 to numbOfPoints_neu + t = (point_neu-1) * ts + t_hiframe1 + tl = t - ts/2 + tu = t + ts/2 + + select AmplitudeTier 'name$'_'name$'_'name$' + loil = Get low index from time... tl + hiil = Get high index from time... tl + loiu = Get low index from time... tu + hiiu = Get high index from time... tu + + select TableOfReal 'name$'_'name$'_'name$' + if loil = 0 + lotl = 0; time before the first amp. point + druck_lol = Get value... hiil 2; amplitude value before the first amp. point + else + lotl = Get value... loil 1; time value of Amp.Point before tl in the PointProcess [s] + druck_lol = Get value... loil 2; amplitude value before tl in the PointProcess [Pa, ranged from 0 to 1] + endif + + hitl = Get value... hiil 1 + druck_hil = Get value... hiil 2; amplitude value after tl in the PointProcess + + lotu = Get value... loiu 1 + druck_lou = Get value... loiu 2; amplitude value before tu in the PointProcess + + if hiiu = numbOfAmpPoints + 1 + hitu = slength; time after the last amp. point + druck_hiu = Get value... hiil 2; amplitude value after the last amp. point + else + hitu = Get value... hiiu 1; time value after tu in the PointProcess + druck_hiu = Get value... hiiu 2; amplitude value after tu in the PointProcess + endif + + nPinter = loiu - loil; = hiiu - hiil; number of amp.-points between tl and tu + if nPinter > 0 + itinter = 0 + tinter = 0 + druck_tin = 0 + deltat = 0 + for iinter from 1 to nPinter + hilft = itinter + itinter = Get value... loil+iinter 1 + idruck_tin = Get value... loil+iinter 2 + + ideltat = itinter - hilft + druck_tin += idruck_tin * ideltat + tinter += itinter + deltat += ideltat + endfor + + tin = tinter/nPinter + druck_tin = druck_tin/deltat + endif + + druck_tl = ((hitl-tl)*druck_lol + (tl-lotl)*druck_hil) / (hitl-lotl) + druck_tu = ((hitu-tu)*druck_lou + (tu-lotu)*druck_hiu) / (hitu-lotu) + + if nPinter = 0; loil = loiu; hiil = hiiu + druck_mean = (druck_tl + druck_tu) / 2 + else + druck_mean = ((tin-tl)*(druck_tl + druck_tin)/2 + (tu-tin)*(druck_tin + druck_tu)/2) / (tu-tl) + endif + + select Matrix atrem_nlc + Set value... 1 point_neu druck_mean + endfor + + To Pitch + am_Int = Get mean... 0 0 Hertz + +# because PRAAT classifies frequencies in Pitch objects <=0 as "voiceless" and +# therefore parts with extreme INTENSITIES would be considered as "voiceless" +# (irrelevant) after "Subtract linear fit" (1) +# "1" is added to the original Pa-values (ranged from 0 to 1) + select Matrix atrem_nlc + Formula... self+1 + +# because PRAAT only runs "Subtract linear fit" if the last frame is "voiceless"...?(2) + Set value... 1 numbOfPoints_neu+1 0 + +# remove the linear amp.-trend (amplitude declination) +#Formula... self*1000; better for viewing + To Pitch + Rename... hilf_lincorr + Subtract linear fit... Hertz + Rename... atrem + +# undo (1)... + To Matrix + Formula... self-1 + +# normalize Amp. contour by mean Amp. + Formula... (self-am_Int)/am_Int + +# remove last frame, undo (2) + Create Matrix... atrem_besser 0 slength numbOfPoints_neu ts t_hiframe1 1 1 1 1 1 0 + for point_neu from 1 to numbOfPoints_neu + select Matrix atrem + spring = Get value in cell... 1 point_neu + select Matrix atrem_besser + Set value... 1 point_neu spring + endfor + +# to calculate autocorrelation (cc-method) + To Sound (slice)... 1 +# calculate Frequency of Ampitude Tremor [Hz] + To Pitch (cc)... slength minTr 15 yes 0.01 tremthresh 0.01 0.35 0.14 maxTr + Rename... atrem_norm + + atrf = Get mean... 0 0 Hertz + +# calculate Intensity Index of Amplitude Tremor [%] + select Sound atrem_besser + plus Pitch atrem_norm + To PointProcess (peaks)... yes no + Rename... Maxima + numberofMaxPoints = Get number of points + atri_max = 0 + noAMax = 0 + for iPoint from 1 to numberofMaxPoints + select PointProcess Maxima + ti = Get time from index... iPoint + select Sound atrem_besser + atri_Point = Get value at time... 0 ti Sinc70 + if atri_Point = undefined + atri_Point = 0 + noAMax += 1 + endif + atri_max += abs(atri_Point) + endfor + +select Sound atrem_besser +plus PointProcess Maxima +#Edit +#pause + +# atri_max:= (mean) procentual deviation of Amp. maxima from mean Amp.[Pa] at atrf + numberofMaxima = numberofMaxPoints - noAMax + atri_max = 100 * atri_max / numberofMaxima + + select Sound atrem_besser + plus Pitch atrem_norm + To PointProcess (peaks)... no yes + Rename... Minima + numberofMinPoints = Get number of points + atri_min = 0 + noAMin = 0 + for iPoint from 1 to numberofMinPoints + select PointProcess Minima + ti = Get time from index... iPoint + select Sound atrem_besser + atri_Point = Get value at time... 0 ti Sinc70 + if atri_Point = undefined + atri_Point = 0 + noAMin += 1 + endif + atri_min += abs(atri_Point) + endfor + +select Sound atrem_besser +plus PointProcess Minima +#Edit +#pause + +# atri_min:= (mean) procentual deviation of Amp. minima from mean Amp.[Pa] at atrf + numberofMinima = numberofMinPoints - noAMin + atri_min = 100 * atri_min / numberofMinima + + atri = (atri_max + atri_min) / 2 + + atrp = atri * atrf/(atrf+1) + +# uncomment to inspect amplitude tremor objects: +# pause + + select Pitch 'name$' + plus PointProcess 'name$'_'name$' + plus AmplitudeTier 'name$'_'name$'_'name$' + plus TableOfReal 'name$'_'name$'_'name$' + plus Matrix atrem_nlc + plus Pitch atrem_nlc + plus Pitch hilf_lincorr + plus Pitch atrem + plus Matrix atrem + plus Matrix atrem_besser + plus Sound atrem_besser + plus Pitch atrem_norm + plus PointProcess Maxima + plus PointProcess Minima + Remove +endproc \ No newline at end of file