From 117ffb3d57e649aabad9b4eb0aef13c957a07bcc Mon Sep 17 00:00:00 2001 From: Vijay Yadev Date: Mon, 7 Dec 2020 20:32:08 -0500 Subject: [PATCH] transcription logs --- dbm_lib/controller/process_feature.py | 6 +++--- .../dbm_features/raw_features/nlp/speech_features.py | 6 +++++- dbm_lib/dbm_features/raw_features/util/nlp_util.py | 2 +- .../dbm_features/raw_features/util/vad_utilities.py | 10 +++++----- process_data.py | 7 ++++--- process_dbm.sh | 9 +++++++-- 6 files changed, 25 insertions(+), 15 deletions(-) diff --git a/dbm_lib/controller/process_feature.py b/dbm_lib/controller/process_feature.py index d91d479d..271b0d49 100644 --- a/dbm_lib/controller/process_feature.py +++ b/dbm_lib/controller/process_feature.py @@ -131,7 +131,7 @@ def process_movement(video_uri, out_dir, dbm_group, r_config, dlib_model): logger.info('processing voice tremor....') voice_tremor.run_vtremor(video_uri, out_dir, r_config) -def process_nlp(video_uri, out_dir, dbm_group, r_config, deep_path): +def process_nlp(video_uri, out_dir, dbm_group, tran_tog, r_config, deep_path): """ processing nlp features Args: @@ -139,12 +139,12 @@ def process_nlp(video_uri, out_dir, dbm_group, r_config, deep_path): dbm_group: list of features to process; r_config: raw feature config object deep_path: deep speech build path """ - if dbm_group != None and len(dbm_group)>0 and 'nlp' not in dbm_group: + if dbm_group != None and len(dbm_group)>0 and 'speech' not in dbm_group: return logger.info('Processing nlp variables from data in {}'.format(video_uri)) transcribe.run_transcribe(video_uri, out_dir, r_config, deep_path) - speech_features.run_speech_feature(video_uri, out_dir, r_config) + speech_features.run_speech_feature(video_uri, out_dir, r_config, tran_tog) def remove_file(file_path): """ diff --git a/dbm_lib/dbm_features/raw_features/nlp/speech_features.py b/dbm_lib/dbm_features/raw_features/nlp/speech_features.py index 6aebd547..3f0cfba7 100644 --- a/dbm_lib/dbm_features/raw_features/nlp/speech_features.py +++ b/dbm_lib/dbm_features/raw_features/nlp/speech_features.py @@ -10,6 +10,7 @@ import pandas as pd import glob from os.path import join import logging +import shutil from dbm_lib.dbm_features.raw_features.util import util as ut from dbm_lib.dbm_features.raw_features.util import nlp_util as n_util @@ -21,7 +22,7 @@ speech_dir = 'nlp/speech_feature' speech_ext = '_nlp.csv' transcribe_ext = 'nlp/transcribe/*_transcribe.csv' -def run_speech_feature(video_uri, out_dir, r_config): +def run_speech_feature(video_uri, out_dir, r_config, tran_tog): """ Processing all patient's for fetching nlp features ------------------- @@ -42,6 +43,9 @@ def run_speech_feature(video_uri, out_dir, r_config): logger.info('Saving Output file {} '.format(out_loc)) ut.save_output(df_speech, out_loc, fl_name, speech_dir, speech_ext) + + if (tran_tog == None) or (tran_tog != 'on'): + shutil.rmtree(os.path.dirname(transcribe_path[0])) except Exception as e: logger.error('Failed to process video file') diff --git a/dbm_lib/dbm_features/raw_features/util/nlp_util.py b/dbm_lib/dbm_features/raw_features/util/nlp_util.py index fc1ac3d1..6a07141e 100644 --- a/dbm_lib/dbm_features/raw_features/util/nlp_util.py +++ b/dbm_lib/dbm_features/raw_features/util/nlp_util.py @@ -126,7 +126,7 @@ def process_speech(transcribe_df,r_config): Returns: Dataframe for speech features """ - + transcribe_df = transcribe_df.replace(np.nan, '', regex=True) err_transcribe = transcribe_df[r_config.err_reason].iloc[0] transcribe = transcribe_df[r_config.nlp_transcribe].iloc[0] total_time = transcribe_df[r_config.nlp_totalTime].iloc[0] diff --git a/dbm_lib/dbm_features/raw_features/util/vad_utilities.py b/dbm_lib/dbm_features/raw_features/util/vad_utilities.py index b769ba19..8f11a2e8 100644 --- a/dbm_lib/dbm_features/raw_features/util/vad_utilities.py +++ b/dbm_lib/dbm_features/raw_features/util/vad_utilities.py @@ -158,7 +158,7 @@ def vad_get_segment_times(sample_rate, frame_duration_ms, for frame in frames: is_speech = vad.is_speech(frame.bytes, sample_rate) - sys.stdout.write('1' if is_speech else '0') + #sys.stdout.write('1' if is_speech else '0') if not triggered: ring_buffer.append((frame, is_speech)) num_voiced = len([f for f, speech in ring_buffer if speech]) @@ -167,7 +167,7 @@ def vad_get_segment_times(sample_rate, frame_duration_ms, # TRIGGERED state. if num_voiced > 0.9 * ring_buffer.maxlen: triggered = True - sys.stdout.write('+(%s)' % (ring_buffer[0][0].timestamp,)) + #sys.stdout.write('+(%s)' % (ring_buffer[0][0].timestamp,)) start_times.append(ring_buffer[0][0].timestamp) # BT ring_buffer.clear() else: @@ -179,18 +179,18 @@ def vad_get_segment_times(sample_rate, frame_duration_ms, # unvoiced, then enter NOTTRIGGERED and yield whatever # audio we've collected. if num_unvoiced > 0.9 * ring_buffer.maxlen: - sys.stdout.write('-(%s)' % (frame.timestamp + frame.duration)) + #sys.stdout.write('-(%s)' % (frame.timestamp + frame.duration)) end_times.append(ring_buffer[0][0].timestamp + frame.duration) # BT triggered = False if triggered: # BT if were in triggered state at end of signal, set output time - sys.stdout.write('-(%s)' % (frame.timestamp + frame.duration)) + #sys.stdout.write('-(%s)' % (frame.timestamp + frame.duration)) if len(ring_buffer)>0: end_times.append(ring_buffer[0][0].timestamp ) # BT else: # only get here in very rare case that we triggered on 2nd-to-last frame end_times.append(frame.timestamp + frame.duration) - sys.stdout.write('\n') + #sys.stdout.write('\n') return(start_times, end_times) diff --git a/process_data.py b/process_data.py index 746ac18d..57b5a52f 100644 --- a/process_data.py +++ b/process_data.py @@ -36,7 +36,7 @@ def common_video(video_file, args, r_config): of.process_open_face(video_file, os.path.dirname(video_file), out_path, OPENFACE_PATH, args.dbm_group) pf.process_facial(video_file, out_path, args.dbm_group, r_config) pf.process_acoustic(video_file, out_path, args.dbm_group, r_config) - pf.process_nlp(video_file, out_path, args.dbm_group, r_config, DEEP_SPEECH) + pf.process_nlp(video_file, out_path, args.dbm_group, args.tr, r_config, DEEP_SPEECH) pf.process_movement(video_file, out_path, args.dbm_group, r_config, DLIB_SHAPE_MODEL) pf.remove_file(video_file) @@ -81,7 +81,7 @@ def process_raw_audio_file(args, s_config, r_config): out_path = os.path.join(args.output_path, 'raw_variables') pf.process_acoustic(audio_file[0], out_path, args.dbm_group, r_config) - pf.process_nlp(audio_file[0], out_path, args.dbm_group, r_config, DEEP_SPEECH) + pf.process_nlp(audio_file[0], out_path, args.dbm_group, args.tr, r_config, DEEP_SPEECH) else: logger.info('Enter correct audio(*.wav) file path.') @@ -132,7 +132,7 @@ def process_raw_audio_dir(args, s_config, r_config): out_path = os.path.join(args.output_path, 'raw_variables') pf.process_acoustic(audio, out_path, args.dbm_group, r_config) - pf.process_nlp(audio, out_path, args.dbm_group, r_config, DEEP_SPEECH) + pf.process_nlp(audio, out_path, args.dbm_group, args.tr, r_config, DEEP_SPEECH) except Exception as e: logger.error('Failed to process wav file.') @@ -159,6 +159,7 @@ if __name__=="__main__": parser.add_argument("--input_path", help="path to the input files", required=True) parser.add_argument("--output_path", help="path to the raw and derived variable output", required=True) parser.add_argument("--dbm_group", help="list of feature groups", nargs='+') + parser.add_argument("--tr", help="speech transcription toogle") args = parser.parse_args() s_config = config_reader.ConfigReader() diff --git a/process_dbm.sh b/process_dbm.sh index c573eccc..a74f50ea 100644 --- a/process_dbm.sh +++ b/process_dbm.sh @@ -7,6 +7,7 @@ helpFunction() echo -e "\t--input_path: path to the input files" echo -e "\t--output_path: path to the raw and derived variable output" echo -e "\t--dbm_group: list of feature groups" + echo -e "\t--tr: Toggle for speech transcription(optional)" exit 1 # Exit script after printing help } @@ -15,6 +16,7 @@ while [ $# -gt 0 ]; do --input_path=*) input_path="${1#*=}" ;; --output_path=*) output_path="${1#*=}" ;; --dbm_group=*) dbm_group="${1#*=}" ;; + --tr=*) tr="${1#*=}" ;; *) helpFunction ;; esac shift @@ -55,8 +57,11 @@ fi if [[ $dbm_group == *"movement"* ]]; then dbm_new="$dbm_new movement" fi -if [[ $dbm_group == *"nlp"* ]]; then - dbm_new="$dbm_new nlp" +if [[ $dbm_group == *"speech"* ]]; then + dbm_new="$dbm_new speech" +fi +if [[ $dbm_group == *"speech"* ]] && [[ ${tr,,} == "on" ]]; then + dbm_new="$dbm_new --tr ${tr,,}" fi #docker commands to run container