From 117ffb3d57e649aabad9b4eb0aef13c957a07bcc Mon Sep 17 00:00:00 2001
From: Vijay Yadev <vijay.yadav@aicure.com>
Date: Mon, 7 Dec 2020 20:32:08 -0500
Subject: [PATCH] transcription logs

---
 dbm_lib/controller/process_feature.py                  |  6 +++---
 .../dbm_features/raw_features/nlp/speech_features.py   |  6 +++++-
 dbm_lib/dbm_features/raw_features/util/nlp_util.py     |  2 +-
 .../dbm_features/raw_features/util/vad_utilities.py    | 10 +++++-----
 process_data.py                                        |  7 ++++---
 process_dbm.sh                                         |  9 +++++++--
 6 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/dbm_lib/controller/process_feature.py b/dbm_lib/controller/process_feature.py
index d91d479d..271b0d49 100644
--- a/dbm_lib/controller/process_feature.py
+++ b/dbm_lib/controller/process_feature.py
@@ -131,7 +131,7 @@ def process_movement(video_uri, out_dir, dbm_group, r_config, dlib_model):
     logger.info('processing voice tremor....')
     voice_tremor.run_vtremor(video_uri, out_dir, r_config)
     
-def process_nlp(video_uri, out_dir, dbm_group, r_config, deep_path):
+def process_nlp(video_uri, out_dir, dbm_group, tran_tog, r_config, deep_path):
     """
     processing nlp features
     Args:
@@ -139,12 +139,12 @@ def process_nlp(video_uri, out_dir, dbm_group, r_config, deep_path):
         dbm_group: list of features to process; r_config: raw feature config object
         deep_path: deep speech build path
     """
-    if dbm_group != None and len(dbm_group)>0 and 'nlp' not in dbm_group:
+    if dbm_group != None and len(dbm_group)>0 and 'speech' not in dbm_group:
         return
     
     logger.info('Processing nlp variables from data in {}'.format(video_uri))
     transcribe.run_transcribe(video_uri, out_dir, r_config, deep_path)
-    speech_features.run_speech_feature(video_uri, out_dir, r_config)
+    speech_features.run_speech_feature(video_uri, out_dir, r_config, tran_tog)
     
 def remove_file(file_path):
     """
diff --git a/dbm_lib/dbm_features/raw_features/nlp/speech_features.py b/dbm_lib/dbm_features/raw_features/nlp/speech_features.py
index 6aebd547..3f0cfba7 100644
--- a/dbm_lib/dbm_features/raw_features/nlp/speech_features.py
+++ b/dbm_lib/dbm_features/raw_features/nlp/speech_features.py
@@ -10,6 +10,7 @@ import pandas as pd
 import glob
 from os.path import join
 import logging
+import shutil
 
 from dbm_lib.dbm_features.raw_features.util import util as ut
 from dbm_lib.dbm_features.raw_features.util import nlp_util as n_util
@@ -21,7 +22,7 @@ speech_dir = 'nlp/speech_feature'
 speech_ext = '_nlp.csv'
 transcribe_ext = 'nlp/transcribe/*_transcribe.csv'
 
-def run_speech_feature(video_uri, out_dir, r_config):
+def run_speech_feature(video_uri, out_dir, r_config, tran_tog):
     """
     Processing all patient's for fetching nlp features
     -------------------
@@ -42,6 +43,9 @@ def run_speech_feature(video_uri, out_dir, r_config):
 
             logger.info('Saving Output file {} '.format(out_loc))
             ut.save_output(df_speech, out_loc, fl_name, speech_dir, speech_ext)
+
+            if (tran_tog == None) or (tran_tog != 'on'):
+                shutil.rmtree(os.path.dirname(transcribe_path[0]))
             
     except Exception as e:
         logger.error('Failed to process video file')
diff --git a/dbm_lib/dbm_features/raw_features/util/nlp_util.py b/dbm_lib/dbm_features/raw_features/util/nlp_util.py
index fc1ac3d1..6a07141e 100644
--- a/dbm_lib/dbm_features/raw_features/util/nlp_util.py
+++ b/dbm_lib/dbm_features/raw_features/util/nlp_util.py
@@ -126,7 +126,7 @@ def process_speech(transcribe_df,r_config):
         Returns:
             Dataframe for speech features
     """
-    
+    transcribe_df = transcribe_df.replace(np.nan, '', regex=True)
     err_transcribe = transcribe_df[r_config.err_reason].iloc[0]
     transcribe = transcribe_df[r_config.nlp_transcribe].iloc[0]
     total_time = transcribe_df[r_config.nlp_totalTime].iloc[0]
diff --git a/dbm_lib/dbm_features/raw_features/util/vad_utilities.py b/dbm_lib/dbm_features/raw_features/util/vad_utilities.py
index b769ba19..8f11a2e8 100644
--- a/dbm_lib/dbm_features/raw_features/util/vad_utilities.py
+++ b/dbm_lib/dbm_features/raw_features/util/vad_utilities.py
@@ -158,7 +158,7 @@ def vad_get_segment_times(sample_rate, frame_duration_ms,
     for frame in frames:
         is_speech = vad.is_speech(frame.bytes, sample_rate)
 
-        sys.stdout.write('1' if is_speech else '0')
+        #sys.stdout.write('1' if is_speech else '0')
         if not triggered:
             ring_buffer.append((frame, is_speech))
             num_voiced = len([f for f, speech in ring_buffer if speech])
@@ -167,7 +167,7 @@ def vad_get_segment_times(sample_rate, frame_duration_ms,
             # TRIGGERED state.
             if num_voiced > 0.9 * ring_buffer.maxlen:
                 triggered = True
-                sys.stdout.write('+(%s)' % (ring_buffer[0][0].timestamp,))
+                #sys.stdout.write('+(%s)' % (ring_buffer[0][0].timestamp,))
                 start_times.append(ring_buffer[0][0].timestamp)  # BT
                 ring_buffer.clear()
         else:
@@ -179,18 +179,18 @@ def vad_get_segment_times(sample_rate, frame_duration_ms,
             # unvoiced, then enter NOTTRIGGERED and yield whatever
             # audio we've collected.
             if num_unvoiced > 0.9 * ring_buffer.maxlen:
-                sys.stdout.write('-(%s)' % (frame.timestamp + frame.duration))
+                #sys.stdout.write('-(%s)' % (frame.timestamp + frame.duration))
                 end_times.append(ring_buffer[0][0].timestamp + frame.duration)  # BT
                 triggered = False
 
     if triggered:  # BT if were in triggered state at end of signal, set output time
-        sys.stdout.write('-(%s)' % (frame.timestamp + frame.duration))
+        #sys.stdout.write('-(%s)' % (frame.timestamp + frame.duration))
         if len(ring_buffer)>0:
             end_times.append(ring_buffer[0][0].timestamp )  # BT
         else:
             # only get here in very rare case that we triggered on 2nd-to-last frame
             end_times.append(frame.timestamp + frame.duration)
-    sys.stdout.write('\n')
+    #sys.stdout.write('\n')
 
     return(start_times, end_times)
 
diff --git a/process_data.py b/process_data.py
index 746ac18d..57b5a52f 100644
--- a/process_data.py
+++ b/process_data.py
@@ -36,7 +36,7 @@ def common_video(video_file, args, r_config):
     of.process_open_face(video_file, os.path.dirname(video_file), out_path, OPENFACE_PATH, args.dbm_group)
     pf.process_facial(video_file, out_path, args.dbm_group, r_config)
     pf.process_acoustic(video_file, out_path, args.dbm_group, r_config)
-    pf.process_nlp(video_file, out_path, args.dbm_group, r_config, DEEP_SPEECH)
+    pf.process_nlp(video_file, out_path, args.dbm_group, args.tr, r_config, DEEP_SPEECH)
     
     pf.process_movement(video_file, out_path, args.dbm_group, r_config, DLIB_SHAPE_MODEL)
     pf.remove_file(video_file)
@@ -81,7 +81,7 @@ def process_raw_audio_file(args, s_config, r_config):
 
                 out_path = os.path.join(args.output_path, 'raw_variables')
                 pf.process_acoustic(audio_file[0], out_path, args.dbm_group, r_config)
-                pf.process_nlp(audio_file[0], out_path, args.dbm_group, r_config, DEEP_SPEECH)
+                pf.process_nlp(audio_file[0], out_path, args.dbm_group, args.tr, r_config, DEEP_SPEECH)
                 
             else:
                 logger.info('Enter correct audio(*.wav) file path.')
@@ -132,7 +132,7 @@ def process_raw_audio_dir(args, s_config, r_config):
 
                 out_path = os.path.join(args.output_path, 'raw_variables')
                 pf.process_acoustic(audio, out_path, args.dbm_group, r_config)
-                pf.process_nlp(audio, out_path, args.dbm_group, r_config, DEEP_SPEECH)
+                pf.process_nlp(audio, out_path, args.dbm_group, args.tr, r_config, DEEP_SPEECH)
                 
             except Exception as e:
                 logger.error('Failed to process wav file.')
@@ -159,6 +159,7 @@ if __name__=="__main__":
     parser.add_argument("--input_path", help="path to the input files", required=True)
     parser.add_argument("--output_path", help="path to the raw and derived variable output", required=True)
     parser.add_argument("--dbm_group", help="list of feature groups", nargs='+')
+    parser.add_argument("--tr", help="speech transcription toogle")
 
     args = parser.parse_args()
     s_config = config_reader.ConfigReader()
diff --git a/process_dbm.sh b/process_dbm.sh
index c573eccc..a74f50ea 100644
--- a/process_dbm.sh
+++ b/process_dbm.sh
@@ -7,6 +7,7 @@ helpFunction()
    echo -e "\t--input_path: path to the input files"
    echo -e "\t--output_path: path to the raw and derived variable output"
    echo -e "\t--dbm_group: list of feature groups"
+   echo -e "\t--tr: Toggle for speech transcription(optional)"
    exit 1 # Exit script after printing help
 }
 
@@ -15,6 +16,7 @@ while [ $# -gt 0 ]; do
     --input_path=*) input_path="${1#*=}" ;;
     --output_path=*) output_path="${1#*=}" ;;
     --dbm_group=*) dbm_group="${1#*=}" ;;
+    --tr=*) tr="${1#*=}" ;;
     *) helpFunction ;;
   esac
   shift
@@ -55,8 +57,11 @@ fi
 if [[ $dbm_group == *"movement"* ]]; then
     dbm_new="$dbm_new movement"
 fi
-if [[ $dbm_group == *"nlp"* ]]; then
-    dbm_new="$dbm_new nlp"
+if [[ $dbm_group == *"speech"* ]]; then
+    dbm_new="$dbm_new speech"
+fi
+if [[ $dbm_group == *"speech"* ]] && [[ ${tr,,} == "on" ]]; then
+    dbm_new="$dbm_new --tr ${tr,,}"
 fi
 
 #docker commands to run container