From 920a7633cdf261846cc55bc31bc3c75142edc90c Mon Sep 17 00:00:00 2001
From: Vijay Yadev <vijay.yadav@aicure.com>
Date: Wed, 11 Nov 2020 21:57:04 -0500
Subject: [PATCH 01/12] nlp_transcribe

---
 Dockerfile                                    | 13 ++-
 dbm_lib/config/config_raw_feature.py          |  3 +
 dbm_lib/controller/process_feature.py         | 15 ++++
 .../raw_features/nlp/transcribe.py            | 82 +++++++++++++++++++
 .../raw_features/util/nlp_util.py             | 66 +++++++++++++++
 process_data.py                               |  6 ++
 process_dbm.sh                                |  3 +
 requirements.txt                              |  1 +
 resources/features/raw_feature.yml            |  3 +
 9 files changed, 190 insertions(+), 2 deletions(-)
 create mode 100644 dbm_lib/dbm_features/raw_features/nlp/transcribe.py
 create mode 100644 dbm_lib/dbm_features/raw_features/util/nlp_util.py

diff --git a/Dockerfile b/Dockerfile
index c3dfd352..5d425d61 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -12,7 +12,9 @@ RUN apt-get update && apt-get install -y python3-pip \
                    && apt-get install -y libavcodec-dev \
                    && apt-get install -y libavformat-dev \
                    && apt-get install -y libavdevice-dev \
-                   && apt-get install -y libboost-all-dev
+                   && apt-get install -y libboost-all-dev \
+                   && apt-get install -y git \
+                   && apt-get install -y sox
 RUN ln -sfn /usr/bin/pip3 /usr/bin/pip
 
 COPY . /app
@@ -24,8 +26,15 @@ RUN dpkg --configure -a
 RUN su -c ./install.sh
 RUN echo "Done OpenFace!"
 
-WORKDIR /app
+RUN echo "Cloning DeepSpeech..."
+WORKDIR /app/pkg
+RUN git clone https://github.com/mozilla/DeepSpeech.git
 
+WORKDIR /app/pkg/DeepSpeech
+RUN wget https://github.com/mozilla/DeepSpeech/releases/download/v0.9.1/deepspeech-0.9.1-models.pbmm
+RUN wget https://github.com/mozilla/DeepSpeech/releases/download/v0.9.1/deepspeech-0.9.1-models.scorer
+
+WORKDIR /app
 RUN pip install --upgrade pip
 RUN pip install -r requirements.txt
 RUN echo "Requirement txt done!"
diff --git a/dbm_lib/config/config_raw_feature.py b/dbm_lib/config/config_raw_feature.py
index 5494e21d..679c0845 100644
--- a/dbm_lib/config/config_raw_feature.py
+++ b/dbm_lib/config/config_raw_feature.py
@@ -222,4 +222,7 @@ class ConfigRawReader(object):
             self.mov_Hpose_Yaw = config['raw_feature']['mov_Hpose_Yaw']
             self.mov_Hpose_Roll = config['raw_feature']['mov_Hpose_Roll']
             self.mov_Hpose_Dist = config['raw_feature']['mov_Hpose_Dist']
+
+            #NLP features
+            self.nlp_transcribe = config['raw_feature']['nlp_transcribe']
             
\ No newline at end of file
diff --git a/dbm_lib/controller/process_feature.py b/dbm_lib/controller/process_feature.py
index 3282edc2..902ef2c6 100644
--- a/dbm_lib/controller/process_feature.py
+++ b/dbm_lib/controller/process_feature.py
@@ -8,6 +8,7 @@ from dbm_lib.dbm_features.raw_features.audio import intensity, pitch_freq, hnr,
 from dbm_lib.dbm_features.raw_features.audio import pause_segment, jitter, shimmer, mfcc
 from dbm_lib.dbm_features.raw_features.video import face_asymmetry, face_au, face_emotion_expressivity, face_landmark
 from dbm_lib.dbm_features.raw_features.movement import head_motion, eye_blink
+from dbm_lib.dbm_features.raw_features.nlp import transcribe
 
 import subprocess
 import logging
@@ -123,6 +124,20 @@ def process_movement(video_uri, out_dir, dbm_group, r_config, dlib_model):
     logger.info('processing eye blink....')
     eye_blink.run_eye_blink(video_uri, out_dir, r_config, dlib_model)
     
+def process_nlp(video_uri, out_dir, dbm_group, r_config, deep_path):
+    """
+    processing nlp features
+    Args:
+        video_uri: video path; out_dir: raw variable output dir
+        dbm_group: list of features to process; r_config: raw feature config object
+        deep_path: deep speech build path
+    """
+    if dbm_group != None and len(dbm_group)>0 and 'nlp' not in dbm_group:
+        return
+    
+    logger.info('Processing nlp variables from data in {}'.format(video_uri))
+    transcribe.run_transcribe(video_uri, out_dir, r_config, deep_path)
+    
 def remove_file(file_path):
     """
     removing wav file 
diff --git a/dbm_lib/dbm_features/raw_features/nlp/transcribe.py b/dbm_lib/dbm_features/raw_features/nlp/transcribe.py
new file mode 100644
index 00000000..3914f78a
--- /dev/null
+++ b/dbm_lib/dbm_features/raw_features/nlp/transcribe.py
@@ -0,0 +1,82 @@
+"""
+file_name: transcribe
+project_name: DBM
+created: 2020-10-11
+"""
+
+import pandas as pd
+import numpy as np
+import librosa
+import glob
+from os.path import join
+import logging
+
+from dbm_lib.dbm_features.raw_features.util import util as ut
+from dbm_lib.dbm_features.raw_features.util import nlp_util as n_util
+
+logging.basicConfig(level=logging.INFO)
+logger=logging.getLogger()
+
+formant_dir = 'nlp/transcribe'
+csv_ext = '_transcribe.csv'
+error_txt = 'error: length less than 0.1'
+
+def calc_transcribe(video_uri, audio_file, out_loc, fl_name, r_config, deep_path):
+    """
+    Preparing Formant freq matrix
+    Args:
+        audio_file: (.wav) parsed audio file; fl_name: input file name
+        out_loc: (str) Output directory; r_config: raw variable config
+    """
+    
+    text = n_util.process_deepspeech(audio_file, deep_path)
+    df_formant = pd.DataFrame([text], columns=[r_config.nlp_transcribe])
+    
+    df_formant.replace('', np.nan, regex=True,inplace=True)
+    df_formant[r_config.err_reason] = 'Pass'# will replace with threshold in future release
+    df_formant['dbm_master_url'] = video_uri
+    
+    logger.info('Saving Output file {} '.format(out_loc))
+    ut.save_output(df_formant, out_loc, fl_name, formant_dir, csv_ext)
+    
+def empty_transcribe(video_uri, out_loc, fl_name, r_config):
+    
+    """
+    Preparing empty formant frequency matrix if something fails
+    """
+    cols = [r_config.nlp_transcribe, r_config.err_reason]
+    out_val = [[np.nan, error_txt]]
+    df_fm = pd.DataFrame(out_val, columns = cols)
+    df_fm['dbm_master_url'] = video_uri
+    
+    logger.info('Saving Output file {} '.format(out_loc))
+    ut.save_output(df_fm, out_loc, fl_name, formant_dir, csv_ext)
+
+def run_transcribe(video_uri, out_dir, r_config, deep_path):
+    
+    """
+    Processing all patient's for fetching Formant freq
+    ---------------
+    ---------------
+    Args:
+        video_uri: video path; r_config: raw variable config object
+        out_dir: (str) Output directory for processed output; deep_path: deepspeech build path
+    """
+    try:
+        
+        input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
+        aud_filter = glob.glob(join(input_loc, fl_name + '.wav'))
+        if len(aud_filter)>0:
+
+            audio_file = aud_filter[0]
+            aud_dur = librosa.get_duration(filename=audio_file)
+
+            if float(aud_dur) < 0.1:
+                logger.info('Output file {} size is less than 0.1 sec'.format(audio_file))
+
+                empty_transcribe(video_uri, out_loc, fl_name, r_config)
+                return
+
+            calc_transcribe(video_uri, audio_file, out_loc, fl_name, r_config, deep_path)
+    except Exception as e:
+        logger.error('Failed to process audio file')
\ No newline at end of file
diff --git a/dbm_lib/dbm_features/raw_features/util/nlp_util.py b/dbm_lib/dbm_features/raw_features/util/nlp_util.py
new file mode 100644
index 00000000..3288240b
--- /dev/null
+++ b/dbm_lib/dbm_features/raw_features/util/nlp_util.py
@@ -0,0 +1,66 @@
+"""
+file_name: nlp_util
+project_name: DBM
+created: 2020-10-11
+"""
+
+import subprocess
+import json
+import numpy as np
+import pandas as pd
+import os
+import logging
+
+logging.basicConfig(level=logging.INFO)
+logger=logging.getLogger()
+
+#Speech to text using Deepspeech 0.9.1
+def deepspeech(AUDIO_FILE,deep_path):
+    """
+        Extracting text from audio using Deep Speech neural network trained model
+        Returns:
+            Text: text which is extracted from audio
+    """
+    api = 'deepspeech'
+    arg_speech0 = '--model'
+    arg_speech_path0 = os.path.join(deep_path, 'deepspeech-0.9.1-models.pbmm')
+    arg_speech1 = '--scorer'
+    arg_speech_path1 = os.path.join(deep_path, 'deepspeech-0.9.1-models.scorer')
+    arg_audio = "--audio"
+    
+    out = subprocess.Popen([api, arg_speech0, arg_speech_path0, arg_speech1, arg_speech_path1, arg_audio, AUDIO_FILE],
+                           stdout=subprocess.PIPE, 
+                           stderr=subprocess.STDOUT)
+    logger.info('Deepspeech output...... {}'.format(out))
+    try:
+        stdout,stderr = out.communicate()
+    except:
+        return "error", "error"
+    print(stderr)
+    return stdout,stderr
+
+def deep_speech_output_clean(result):
+    """
+        Parsing deep speech output(text)
+        Return: 
+            Text from speech
+    """
+    text = ""
+    if len(result)>0:
+        res_split = str(result[0]).split('\\n')
+        
+        if len(res_split)>0:
+            for i in range(len(res_split)):
+                if 'Inference took' in res_split[i]:
+                    text = res_split[i + 1]
+                    return text
+    return text
+
+def process_deepspeech(audio_file,deep_path):
+    """
+    Transcribing audio to extract text from speech
+    """
+    deep_output = deepspeech(audio_file,deep_path)
+    deep_text= deep_speech_output_clean(deep_output)
+    
+    return deep_text
diff --git a/process_data.py b/process_data.py
index 0283f9f2..90b37d43 100644
--- a/process_data.py
+++ b/process_data.py
@@ -20,6 +20,7 @@ logging.basicConfig(level=logging.INFO)
 logger=logging.getLogger()
 
 OPENFACE_PATH = 'pkg/OpenFace/build/bin/FeatureExtraction'
+DEEP_SPEECH = 'pkg/DeepSpeech'
 DLIB_SHAPE_MODEL = 'pkg/shape_detector/shape_predictor_68_face_landmarks.dat'
 
 def common_video(video_file, args, r_config):
@@ -36,6 +37,8 @@ def common_video(video_file, args, r_config):
 
     pf.process_facial(video_file, out_path, args.dbm_group, r_config)
     pf.process_acoustic(video_file, out_path, args.dbm_group, r_config)
+    pf.process_nlp(video_file, out_path, args.dbm_group, r_config, DEEP_SPEECH)
+
     pf.remove_file(video_file)
     pf.process_movement(video_file, out_path, args.dbm_group, r_config, DLIB_SHAPE_MODEL)
 
@@ -79,6 +82,7 @@ def process_raw_audio_file(args, s_config, r_config):
                 
                 out_path = os.path.join(args.output_path, 'raw_variables')
                 pf.process_acoustic(audio_file[0], out_path, args.dbm_group, r_config)
+                pf.process_nlp(audio_file[0], out_path, args.dbm_group, r_config, DEEP_SPEECH)
                 
             else:
                 logger.info('Enter correct audio(*.wav) file path.')
@@ -130,6 +134,8 @@ def process_raw_audio_dir(args, s_config, r_config):
                 
                 out_path = os.path.join(args.output_path, 'raw_variables')
                 pf.process_acoustic(audio, out_path, args.dbm_group, r_config)
+                pf.process_nlp(audio, out_path, args.dbm_group, r_config, DEEP_SPEECH)
+                
             except Exception as e:
                 logger.error('Failed to process wav file.')
         
diff --git a/process_dbm.sh b/process_dbm.sh
index cad71b7f..361d2aed 100644
--- a/process_dbm.sh
+++ b/process_dbm.sh
@@ -55,6 +55,9 @@ fi
 if [[ $dbm_group == *"movement"* ]]; then
     dbm_new="$dbm_new movement"
 fi
+if [[ $dbm_group == *"nlp"* ]]; then
+    dbm_new="$dbm_new nlp"
+fi
 
 #docker commands to run container
 docker create -ti --name dbm_container dbm bash
diff --git a/requirements.txt b/requirements.txt
index 01f03057..1ecea7d7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -20,3 +20,4 @@ more_itertools
 scipy==1.2.0
 pyyaml
 pydub
+deepspeech
\ No newline at end of file
diff --git a/resources/features/raw_feature.yml b/resources/features/raw_feature.yml
index 982bf631..f8b00883 100644
--- a/resources/features/raw_feature.yml
+++ b/resources/features/raw_feature.yml
@@ -196,3 +196,6 @@ raw_feature:
     mov_Hpose_Yaw: mov_hposeyaw
     mov_Hpose_Roll: mov_hposeroll
     mov_Hpose_Dist: mov_hposedist
+
+    #NLP markers
+    nlp_transcribe: nlp_transcribe

From dae5eb3cd4799d25c1bdad19d9a7e5a598a25ce3 Mon Sep 17 00:00:00 2001
From: Vijay Yadev <vijay.yadav@aicure.com>
Date: Fri, 13 Nov 2020 01:03:23 -0500
Subject: [PATCH 02/12] nlp feature

---
 dbm_lib/config/config_raw_feature.py          |  17 ++
 dbm_lib/controller/process_feature.py         |   3 +-
 .../raw_features/nlp/speech_features.py       |  47 ++++++
 .../raw_features/nlp/transcribe.py            |  12 +-
 .../raw_features/util/nlp_util.py             | 146 ++++++++++++++++++
 resources/features/derived_feature.yml        |  28 +++-
 resources/features/raw_feature.yml            |  18 +++
 7 files changed, 264 insertions(+), 7 deletions(-)
 create mode 100644 dbm_lib/dbm_features/raw_features/nlp/speech_features.py

diff --git a/dbm_lib/config/config_raw_feature.py b/dbm_lib/config/config_raw_feature.py
index 679c0845..3dc4834b 100644
--- a/dbm_lib/config/config_raw_feature.py
+++ b/dbm_lib/config/config_raw_feature.py
@@ -225,4 +225,21 @@ class ConfigRawReader(object):
 
             #NLP features
             self.nlp_transcribe = config['raw_feature']['nlp_transcribe']
+            self.nlp_numSentences = config['raw_feature']['nlp_numSentences']
+            self.nlp_singPronPerAns = config['raw_feature']['nlp_singPronPerAns']
+            self.nlp_singPronPerSen = config['raw_feature']['nlp_singPronPerSen']
+            self.nlp_pastTensePerAns = config['raw_feature']['nlp_pastTensePerAns']
+            self.nlp_pastTensePerSen = config['raw_feature']['nlp_pastTensePerSen']
+            self.nlp_pronounsPerAns = config['raw_feature']['nlp_pronounsPerAns']
+            self.nlp_pronounsPerSen = config['raw_feature']['nlp_pronounsPerSen']
+            self.nlp_verbsPerAns = config['raw_feature']['nlp_verbsPerAns']
+            self.nlp_verbsPerSen = config['raw_feature']['nlp_verbsPerSen']
+            self.nlp_adjectivesPerAns = config['raw_feature']['nlp_adjectivesPerAns']
+            self.nlp_adjectivesPerSen = config['raw_feature']['nlp_adjectivesPerSen']
+            self.nlp_nounsPerAns = config['raw_feature']['nlp_nounsPerAns']
+            self.nlp_nounsPerSen = config['raw_feature']['nlp_nounsPerSen']
+            self.nlp_sentiment_mean = config['raw_feature']['nlp_sentiment_mean']
+            self.nlp_mattr = config['raw_feature']['nlp_mattr']
+            self.nlp_wordsPerMin = config['raw_feature']['nlp_wordsPerMin']
+            self.nlp_totalTime = config['raw_feature']['nlp_totalTime']
             
\ No newline at end of file
diff --git a/dbm_lib/controller/process_feature.py b/dbm_lib/controller/process_feature.py
index 902ef2c6..be781a7f 100644
--- a/dbm_lib/controller/process_feature.py
+++ b/dbm_lib/controller/process_feature.py
@@ -8,7 +8,7 @@ from dbm_lib.dbm_features.raw_features.audio import intensity, pitch_freq, hnr,
 from dbm_lib.dbm_features.raw_features.audio import pause_segment, jitter, shimmer, mfcc
 from dbm_lib.dbm_features.raw_features.video import face_asymmetry, face_au, face_emotion_expressivity, face_landmark
 from dbm_lib.dbm_features.raw_features.movement import head_motion, eye_blink
-from dbm_lib.dbm_features.raw_features.nlp import transcribe
+from dbm_lib.dbm_features.raw_features.nlp import transcribe, speech_features
 
 import subprocess
 import logging
@@ -137,6 +137,7 @@ def process_nlp(video_uri, out_dir, dbm_group, r_config, deep_path):
     
     logger.info('Processing nlp variables from data in {}'.format(video_uri))
     transcribe.run_transcribe(video_uri, out_dir, r_config, deep_path)
+    speech_features.run_speech_feature(video_uri, out_dir, r_config)
     
 def remove_file(file_path):
     """
diff --git a/dbm_lib/dbm_features/raw_features/nlp/speech_features.py b/dbm_lib/dbm_features/raw_features/nlp/speech_features.py
new file mode 100644
index 00000000..6aebd547
--- /dev/null
+++ b/dbm_lib/dbm_features/raw_features/nlp/speech_features.py
@@ -0,0 +1,47 @@
+"""
+file_name: speech_features
+project_name: DBM
+created: 2020-13-11
+"""
+
+import os
+import numpy as np
+import pandas as pd
+import glob
+from os.path import join
+import logging
+
+from dbm_lib.dbm_features.raw_features.util import util as ut
+from dbm_lib.dbm_features.raw_features.util import nlp_util as n_util
+
+logging.basicConfig(level=logging.INFO)
+logger=logging.getLogger()
+
+speech_dir = 'nlp/speech_feature'
+speech_ext = '_nlp.csv'
+transcribe_ext = 'nlp/transcribe/*_transcribe.csv'
+
+def run_speech_feature(video_uri, out_dir, r_config):
+    """
+    Processing all patient's for fetching nlp features
+    -------------------
+    -------------------
+    Args:
+        video_uri: video path; r_config: raw variable config object
+        out_dir: (str) Output directory for processed output
+    """
+    try:
+        
+        input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
+
+        transcribe_path = glob.glob(join(out_loc, transcribe_ext))
+        if len(transcribe_path)>0:
+
+            transcribe_df = pd.read_csv(transcribe_path[0])
+            df_speech= n_util.process_speech(transcribe_df, r_config)
+
+            logger.info('Saving Output file {} '.format(out_loc))
+            ut.save_output(df_speech, out_loc, fl_name, speech_dir, speech_ext)
+            
+    except Exception as e:
+        logger.error('Failed to process video file')
diff --git a/dbm_lib/dbm_features/raw_features/nlp/transcribe.py b/dbm_lib/dbm_features/raw_features/nlp/transcribe.py
index 3914f78a..f567e967 100644
--- a/dbm_lib/dbm_features/raw_features/nlp/transcribe.py
+++ b/dbm_lib/dbm_features/raw_features/nlp/transcribe.py
@@ -21,7 +21,7 @@ formant_dir = 'nlp/transcribe'
 csv_ext = '_transcribe.csv'
 error_txt = 'error: length less than 0.1'
 
-def calc_transcribe(video_uri, audio_file, out_loc, fl_name, r_config, deep_path):
+def calc_transcribe(video_uri, audio_file, out_loc, fl_name, r_config, deep_path, aud_dur):
     """
     Preparing Formant freq matrix
     Args:
@@ -33,6 +33,7 @@ def calc_transcribe(video_uri, audio_file, out_loc, fl_name, r_config, deep_path
     df_formant = pd.DataFrame([text], columns=[r_config.nlp_transcribe])
     
     df_formant.replace('', np.nan, regex=True,inplace=True)
+    df_formant[r_config.nlp_totalTime] = aud_dur
     df_formant[r_config.err_reason] = 'Pass'# will replace with threshold in future release
     df_formant['dbm_master_url'] = video_uri
     
@@ -44,8 +45,8 @@ def empty_transcribe(video_uri, out_loc, fl_name, r_config):
     """
     Preparing empty formant frequency matrix if something fails
     """
-    cols = [r_config.nlp_transcribe, r_config.err_reason]
-    out_val = [[np.nan, error_txt]]
+    cols = [r_config.nlp_transcribe, r_config.nlp_totalTime, r_config.err_reason]
+    out_val = [[np.nan, np.nan, error_txt]]
     df_fm = pd.DataFrame(out_val, columns = cols)
     df_fm['dbm_master_url'] = video_uri
     
@@ -77,6 +78,7 @@ def run_transcribe(video_uri, out_dir, r_config, deep_path):
                 empty_transcribe(video_uri, out_loc, fl_name, r_config)
                 return
 
-            calc_transcribe(video_uri, audio_file, out_loc, fl_name, r_config, deep_path)
+            calc_transcribe(video_uri, audio_file, out_loc, fl_name, r_config, deep_path, aud_dur)
     except Exception as e:
-        logger.error('Failed to process audio file')
\ No newline at end of file
+        logger.error('Failed to process audio file')
+        
\ No newline at end of file
diff --git a/dbm_lib/dbm_features/raw_features/util/nlp_util.py b/dbm_lib/dbm_features/raw_features/util/nlp_util.py
index 3288240b..fc1ac3d1 100644
--- a/dbm_lib/dbm_features/raw_features/util/nlp_util.py
+++ b/dbm_lib/dbm_features/raw_features/util/nlp_util.py
@@ -11,6 +11,11 @@ import pandas as pd
 import os
 import logging
 
+import nltk
+import re
+from lexicalrichness import LexicalRichness
+from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer 
+
 logging.basicConfig(level=logging.INFO)
 logger=logging.getLogger()
 
@@ -64,3 +69,144 @@ def process_deepspeech(audio_file,deep_path):
     deep_text= deep_speech_output_clean(deep_output)
     
     return deep_text
+
+def nltk_download():
+    
+    try:
+        nltk.data.find('tokenizers/punkt')
+        
+    except LookupError:
+        logger.info('punkt is not available')
+        nltk.download('punkt')
+        
+    try:
+        nltk.data.find('averaged_perceptron_tagger')
+        
+    except LookupError:
+        logger.info('averaged_perceptron_tagger is not available')
+        nltk.download('averaged_perceptron_tagger')
+
+def empty_speech(r_config, master_url, error_txt):
+    """
+    Preparing empty speech matrix with error
+    Args:
+        r_config: raw config file object
+        error_txt: Error message during transcription
+        
+    Returns:
+            Empty dataframe for speech features with error
+    """
+    
+    col = [r_config.nlp_numSentences, r_config.nlp_singPronPerAns, r_config.nlp_singPronPerSen, r_config.nlp_pastTensePerAns,
+          r_config.nlp_pastTensePerSen, r_config.nlp_pronounsPerAns, r_config.nlp_pronounsPerSen, r_config.nlp_verbsPerAns,
+          r_config.nlp_verbsPerSen, r_config.nlp_adjectivesPerAns, r_config.nlp_adjectivesPerSen, r_config.nlp_nounsPerAns,
+          r_config.nlp_nounsPerSen, r_config.nlp_sentiment_mean, r_config.nlp_mattr, r_config.nlp_wordsPerMin,
+          r_config.nlp_totalTime, r_config.err_reason]
+    
+    df_speech = pd.DataFrame([[np.nan] * len(col) + [error_txt]], columns = col)
+    df_speech['dbm_master_url'] = master_url
+    
+    return df_speech
+
+def divide_var(speech_var1, spech_var2):
+    """
+    divide variables
+    """
+    speech_var = np.nan
+    if spech_var2!=0:
+        speech_var = speech_var1/spech_var2
+    return speech_var
+
+def process_speech(transcribe_df,r_config):
+    """
+        Preparing speech features
+        Args:
+            transcribe_df: Transcribed dataframe
+            r_config: raw config file object
+        Returns:
+            Dataframe for speech features
+    """
+    
+    err_transcribe = transcribe_df[r_config.err_reason].iloc[0]
+    transcribe = transcribe_df[r_config.nlp_transcribe].iloc[0]
+    total_time = transcribe_df[r_config.nlp_totalTime].iloc[0]
+    master_url = transcribe_df['dbm_master_url'].iloc[0]
+    
+    #clean transcribe
+    transcribe = transcribe.replace(",", "")
+    transcribe = " ".join(re.findall(r"[\w']+|[.!?]", transcribe))
+    
+    if err_transcribe != 'Pass':
+        df_speech = empty_speech(r_config, master_url, error_txt)
+        
+        return df_speech
+    
+    speech_dict = {}
+    nltk_download()
+    
+    sentences = nltk.tokenize.sent_tokenize(transcribe)
+    words_all = nltk.tokenize.word_tokenize(transcribe)
+    num_sentences = len(sentences)
+    
+    speech_dict[r_config.nlp_numSentences] = num_sentences
+    
+    #nlp_singPron
+    i_s = transcribe.count('I')
+    me_s = transcribe.count('me')
+    my_s = transcribe.count('my')
+    sing_count = i_s + me_s + my_s
+    
+    speech_dict[r_config.nlp_singPronPerAns] = sing_count if len(words_all)>0 else np.nan
+    speech_dict[r_config.nlp_singPronPerSen] = divide_var(speech_dict[r_config.nlp_singPronPerAns], num_sentences)
+    
+    tagged = nltk.pos_tag(transcribe.split())
+    tagged_df = pd.DataFrame(tagged, columns=['word', 'pos_tag'])
+    
+    #Past tense per answer
+    all_POSs = tagged_df['pos_tag'].tolist()
+    speech_dict[r_config.nlp_pastTensePerAns] = all_POSs.count('VBD') if len(words_all)>0 else np.nan
+    speech_dict[r_config.nlp_pastTensePerSen] = divide_var(speech_dict[r_config.nlp_pastTensePerAns], num_sentences)
+    
+    #Pronoun per answer
+    pronounsPerAns = all_POSs.count('PRP') + all_POSs.count('PRP$')
+    speech_dict[r_config.nlp_pronounsPerAns] = pronounsPerAns if len(words_all)>0 else np.nan
+    speech_dict[r_config.nlp_pronounsPerSen] = divide_var(speech_dict[r_config.nlp_pronounsPerAns], num_sentences)
+    
+    #Verb per answer
+    verbPerAns = all_POSs.count('VB') + all_POSs.count('VBD') + all_POSs.count('VBG') \
+                      + all_POSs.count('VBN') + all_POSs.count('VBP') + all_POSs.count('VBZ')
+    speech_dict[r_config.nlp_verbsPerAns] = verbPerAns if len(words_all) > 0 else np.nan
+    speech_dict[r_config.nlp_verbsPerSen] = divide_var(speech_dict[r_config.nlp_verbsPerAns], num_sentences)
+    
+    #Adjective per answer
+    adjectivesAns = all_POSs.count('JJ') + all_POSs.count('JJR') + all_POSs.count('JJS')
+    speech_dict[r_config.nlp_adjectivesPerAns] = adjectivesAns if len(words_all) > 0 else np.nan
+    speech_dict[r_config.nlp_adjectivesPerSen] = divide_var(speech_dict[r_config.nlp_adjectivesPerAns], num_sentences)
+    
+    #Noun per answer
+    nounsAns = all_POSs.count('NN') + all_POSs.count('NNP') + all_POSs.count('NNS')
+    speech_dict[r_config.nlp_nounsPerAns] = nounsAns if len(words_all) > 0 else np.nan
+    speech_dict[r_config.nlp_nounsPerSen] = divide_var(speech_dict[r_config.nlp_nounsPerAns], num_sentences)
+    
+    #Sentiment analysis
+    vader = SentimentIntensityAnalyzer()
+    sentence_valences = []
+    
+    for s in sentences:
+        sentiment_dict = vader.polarity_scores(s) 
+        sentence_valences.append(sentiment_dict['compound'])
+        
+    speech_dict[r_config.nlp_sentiment_mean] = np.mean(sentence_valences) if len(sentence_valences) > 0 else np.nan
+    non_punc = list(value for value in words_all if value not in ['.','!','?'])
+    
+    non_punc_as_str = " ".join(str(non_punc))
+    lex = LexicalRichness(non_punc_as_str)
+    speech_dict[r_config.nlp_mattr] = lex.mattr(window_size=lex.words) if lex.words > 0 else np.nan
+    
+    #Number of words per minute
+    speech_dict[r_config.nlp_wordsPerMin] = divide_var(len(non_punc), total_time)*60
+    speech_dict[r_config.nlp_totalTime] = total_time
+    speech_dict['dbm_master_url'] = master_url
+    
+    df_speech = pd.DataFrame([speech_dict])
+    return df_speech
\ No newline at end of file
diff --git a/resources/features/derived_feature.yml b/resources/features/derived_feature.yml
index a0759d79..7346eb36 100644
--- a/resources/features/derived_feature.yml
+++ b/resources/features/derived_feature.yml
@@ -2,7 +2,7 @@ derive_feature:
     
     #DBM Feature Group
     FEATURE_GROUP: ['FAC_ASYM', 'FAC_AU', 'FAC_EXP', 'FAC_LMK', 'ACO_INT', 'ACO_FF', 'ACO_HNR', 'ACO_GNE', 'ACO_FM', 
-    'ACO_JITTER','ACO_SHIMMER', 'ACO_PAUSE', 'ACO_VFS', 'ACO_MFCC', 'MOV_HM', 'MOV_HP', 'EYE_BLINK']
+    'ACO_JITTER','ACO_SHIMMER', 'ACO_PAUSE', 'ACO_VFS', 'ACO_MFCC', 'MOV_HM', 'MOV_HP', 'EYE_BLINK', 'NLP_SPEECH']
     
     #Feature group output file extensions
     FAC_ASYM_LOC: _facasym
@@ -22,6 +22,7 @@ derive_feature:
     MOV_HM_LOC: _headmov
     MOV_HP_LOC: _headpose
     EYE_BLINK_LOC: _eyeblinks
+    NLP_SPEECH_LOC: _nlp
     
     #Facial category feature group
     FAC_ASYM: ['fac_AsymMaskMouth', 'fac_AsymMaskEyebrow', 'fac_AsymMaskEye', 'fac_AsymMaskCom']
@@ -65,6 +66,12 @@ derive_feature:
     MOV_HP: ['mov_Hpose_Dist','mov_Hpose_Pitch','mov_Hpose_Yaw','mov_Hpose_Roll']
     EYE_BLINK: ['mov_blink_ear', 'vid_dur', 'mov_blinkdur']
     
+    #NLP category feature group
+    NLP_SPEECH: ['nlp_numSentences', 'nlp_singPronPerAns', 'nlp_singPronPerSen', 'nlp_pastTensePerAns', 'nlp_pastTensePerSen',
+    'nlp_pronounsPerAns', 'nlp_pronounsPerSen', 'nlp_verbsPerAns', 'nlp_verbsPerSen', 'nlp_adjectivesPerAns', 
+    'nlp_adjectivesPerSen', 'nlp_nounsPerAns', 'nlp_nounsPerSen', 'nlp_sentiment_mean', 'nlp_mattr', 'nlp_wordsPerMin',
+    'nlp_totalTime']
+    
     #Calculation for variables
     # Facial Asymmetry
     fac_AsymMaskMouth: ['mean', 'std']
@@ -248,3 +255,22 @@ derive_feature:
     mov_blink_ear: ['mean', 'std']
     vid_dur: ['count']
     mov_blinkdur: ['mean', 'std']
+    
+    #NLP feature
+    nlp_numSentences: ['mean']
+    nlp_singPronPerAns: ['mean']
+    nlp_singPronPerSen: ['mean']
+    nlp_pastTensePerAns: ['mean']
+    nlp_pastTensePerSen: ['mean']
+    nlp_pronounsPerAns: ['mean']
+    nlp_pronounsPerSen: ['mean']
+    nlp_verbsPerAns: ['mean']
+    nlp_verbsPerSen: ['mean']
+    nlp_adjectivesPerAns: ['mean']
+    nlp_adjectivesPerSen: ['mean']
+    nlp_nounsPerAns: ['mean']
+    nlp_nounsPerSen: ['mean']
+    nlp_sentiment_mean: ['mean']
+    nlp_mattr: ['mean']
+    nlp_wordsPerMin: ['mean']
+    nlp_totalTime: ['mean']
diff --git a/resources/features/raw_feature.yml b/resources/features/raw_feature.yml
index f8b00883..b9c673b8 100644
--- a/resources/features/raw_feature.yml
+++ b/resources/features/raw_feature.yml
@@ -199,3 +199,21 @@ raw_feature:
 
     #NLP markers
     nlp_transcribe: nlp_transcribe
+    nlp_numSentences: nlp_numSentences
+    nlp_singPronPerAns: nlp_singPronPerAns
+    nlp_singPronPerSen: nlp_singPronPerSen
+    nlp_pastTensePerAns: nlp_pastTensePerAns
+    nlp_pastTensePerSen: nlp_pastTensePerSen
+    nlp_pronounsPerAns: nlp_pronounsPerAns
+    nlp_pronounsPerSen: nlp_pronounsPerSen
+    nlp_verbsPerAns: nlp_verbsPerAns
+    nlp_verbsPerSen: nlp_verbsPerSen
+    nlp_adjectivesPerAns: nlp_adjectivesPerAns
+    nlp_adjectivesPerSen: nlp_adjectivesPerSen
+    nlp_nounsPerAns: nlp_nounsPerAns
+    nlp_nounsPerSen: nlp_nounsPerSen
+    nlp_sentiment_mean: nlp_sentiment_mean
+    nlp_mattr: nlp_mattr
+    nlp_wordsPerMin: nlp_wordsPerMin
+    nlp_totalTime: nlp_totalTime
+    
\ No newline at end of file

From a5f50210f9f38d1b8628851c6a6ef6134bff270b Mon Sep 17 00:00:00 2001
From: vjbytes102 <vijay.yadav@aicure.com>
Date: Fri, 13 Nov 2020 02:01:49 -0500
Subject: [PATCH 03/12] nlp requirement

---
 requirements.txt | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 1ecea7d7..278428b1 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -20,4 +20,7 @@ more_itertools
 scipy==1.2.0
 pyyaml
 pydub
-deepspeech
\ No newline at end of file
+deepspeech
+nltk
+lexicalrichness
+vaderSentiment

From bfa42917b9328c1e37fbfd87746d1a6fd9f2aae2 Mon Sep 17 00:00:00 2001
From: vjbytes102 <vijay.yadav@aicure.com>
Date: Fri, 13 Nov 2020 20:14:59 -0500
Subject: [PATCH 04/12] requirement updates

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index 278428b1..6c90cf09 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -24,3 +24,4 @@ deepspeech
 nltk
 lexicalrichness
 vaderSentiment
+textblob

From 394bdfd5c460db1fb4a711a21a08e73d93656bcc Mon Sep 17 00:00:00 2001
From: vjbytes102 <vijay.yadav@aicure.com>
Date: Wed, 25 Nov 2020 13:51:06 -0500
Subject: [PATCH 05/12] contribution guidelines

---
 CONTRIBUTING.md | 42 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 41 insertions(+), 1 deletion(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 09bfde42..28e6ba48 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1 +1,41 @@
-OpenDBM welcomes contributions from anyone. Please see our guidelines((CONTRIBUTING.md)).
+# Contributing guidelines to openDBM
+
+Please visit [openDBM](https://aicure.com/opendbm/) page if you have not seen it. If you are enthusiastic to contribute to this toolkit in terms of bug fixes, tutorials, new feature development, enhancing existing features etc. everything should be managed by submitting pull request on Github.
+
+## What you should know
+
+- Read code of conduct.
+- Read License.
+- Agree to contribute code under openDBM(GPL v3.0).
+- Before adding new feature/algorithmn make sure it's not patented.
+- Before fixing any bug make sure it's still exists and reproducable in master branch.
+- If you see any issue in existing features make sure to report the issue on openDBM issues page.
+- After adding new code make sure everything is working as expected.
+
+## How to contribute 
+
+1. Install Git.
+2. Register and signin into GitHub.
+3. Fork openDBM repository https://github.com/AiCure/open_dbm.git (https://help.github.com/articles/fork-a-repo for details)
+4. Assign a task for yourself. It could be a bugfix or adding new functionality.
+5. Clone your fork into your local system.
+6. Navigate to local repository.
+7. Check that your fork is the 'origin' remote.
+	- Use 'git remote -v' to show current remote
+	- If you do not see any remote, add it using git remote add origin <url of fork branch>
+8. Add openDBM master repository as 'upstream' remote.
+	- Use 'git remote add upstream https://github.com/AiCure/open_dbm.git' command
+	- Check remote using 'git remote -v'
+9. Before making any changes better to synchronize local repository with openDBM master
+	- git pull upstream master
+10. Create new branch where you are going to add bugfix or new features
+	- git checkout -b branch_name
+11. Make and commit your changes into local repository
+12. Validate all your commits and make sure everything is working as expected.
+13. Push your chanhes to new branch(which is a branch of fork repository)
+	-  git push origin branch_name
+14. Create a pull request and add brief information about all your commits.(see https://help.github.com/articles/using-pull-requests for details)
+
+## Request Approval
+
+Once reviewer is happy with the code changes, will approve the pull request and merge it with the master branch.

From 651e88e056f38f42b08b714d5c58e59c4ca1c1eb Mon Sep 17 00:00:00 2001
From: vjbytes102 <vijay.yadav@aicure.com>
Date: Wed, 25 Nov 2020 13:52:58 -0500
Subject: [PATCH 06/12] adding links

---
 CONTRIBUTING.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 28e6ba48..88b6051a 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -4,8 +4,8 @@ Please visit [openDBM](https://aicure.com/opendbm/) page if you have not seen it
 
 ## What you should know
 
-- Read code of conduct.
-- Read License.
+- Read [code of conduct](https://github.com/AiCure/open_dbm/blob/master/CODE_OF_CONDUCT.md).
+- Read [License](https://github.com/AiCure/open_dbm/blob/master/license.txt).
 - Agree to contribute code under openDBM(GPL v3.0).
 - Before adding new feature/algorithmn make sure it's not patented.
 - Before fixing any bug make sure it's still exists and reproducable in master branch.

From eae3e2a1f50f68a2738026539ea4b24742d953ab Mon Sep 17 00:00:00 2001
From: vjbytes102 <vijay.yadav@aicure.com>
Date: Wed, 25 Nov 2020 14:57:26 -0500
Subject: [PATCH 07/12] Update issue templates

---
 .github/ISSUE_TEMPLATE/bug_report.md | 38 ++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)
 create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md

diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
new file mode 100644
index 00000000..dd84ea78
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -0,0 +1,38 @@
+---
+name: Bug report
+about: Create a report to help us improve
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+**Describe the bug**
+A clear and concise description of what the bug is.
+
+**To Reproduce**
+Steps to reproduce the behavior:
+1. Go to '...'
+2. Click on '....'
+3. Scroll down to '....'
+4. See error
+
+**Expected behavior**
+A clear and concise description of what you expected to happen.
+
+**Screenshots**
+If applicable, add screenshots to help explain your problem.
+
+**Desktop (please complete the following information):**
+ - OS: [e.g. iOS]
+ - Browser [e.g. chrome, safari]
+ - Version [e.g. 22]
+
+**Smartphone (please complete the following information):**
+ - Device: [e.g. iPhone6]
+ - OS: [e.g. iOS8.1]
+ - Browser [e.g. stock browser, safari]
+ - Version [e.g. 22]
+
+**Additional context**
+Add any other context about the problem here.

From 51dec41be321f75ab362816c4aea09d4fd578a80 Mon Sep 17 00:00:00 2001
From: vjbytes102 <vijay.yadav@aicure.com>
Date: Wed, 25 Nov 2020 14:58:58 -0500
Subject: [PATCH 08/12] Update issue templates

---
 .github/ISSUE_TEMPLATE/bug_report.md | 38 ----------------------------
 1 file changed, 38 deletions(-)
 delete mode 100644 .github/ISSUE_TEMPLATE/bug_report.md

diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
deleted file mode 100644
index dd84ea78..00000000
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-name: Bug report
-about: Create a report to help us improve
-title: ''
-labels: ''
-assignees: ''
-
----
-
-**Describe the bug**
-A clear and concise description of what the bug is.
-
-**To Reproduce**
-Steps to reproduce the behavior:
-1. Go to '...'
-2. Click on '....'
-3. Scroll down to '....'
-4. See error
-
-**Expected behavior**
-A clear and concise description of what you expected to happen.
-
-**Screenshots**
-If applicable, add screenshots to help explain your problem.
-
-**Desktop (please complete the following information):**
- - OS: [e.g. iOS]
- - Browser [e.g. chrome, safari]
- - Version [e.g. 22]
-
-**Smartphone (please complete the following information):**
- - Device: [e.g. iPhone6]
- - OS: [e.g. iOS8.1]
- - Browser [e.g. stock browser, safari]
- - Version [e.g. 22]
-
-**Additional context**
-Add any other context about the problem here.

From c190d6f86b0d90c055a2c6efccb1825d8e182a57 Mon Sep 17 00:00:00 2001
From: vjbytes102 <vijay.yadav@aicure.com>
Date: Wed, 25 Nov 2020 15:00:28 -0500
Subject: [PATCH 09/12] Update issue templates

---
 .github/ISSUE_TEMPLATE/bug_report.md | 38 ++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)
 create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md

diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
new file mode 100644
index 00000000..dd84ea78
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -0,0 +1,38 @@
+---
+name: Bug report
+about: Create a report to help us improve
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+**Describe the bug**
+A clear and concise description of what the bug is.
+
+**To Reproduce**
+Steps to reproduce the behavior:
+1. Go to '...'
+2. Click on '....'
+3. Scroll down to '....'
+4. See error
+
+**Expected behavior**
+A clear and concise description of what you expected to happen.
+
+**Screenshots**
+If applicable, add screenshots to help explain your problem.
+
+**Desktop (please complete the following information):**
+ - OS: [e.g. iOS]
+ - Browser [e.g. chrome, safari]
+ - Version [e.g. 22]
+
+**Smartphone (please complete the following information):**
+ - Device: [e.g. iPhone6]
+ - OS: [e.g. iOS8.1]
+ - Browser [e.g. stock browser, safari]
+ - Version [e.g. 22]
+
+**Additional context**
+Add any other context about the problem here.

From 993bd860e46aba5fbf548fb6d360e7c6e506866f Mon Sep 17 00:00:00 2001
From: Vijay Yadev <vijay.yadav@aicure.com>
Date: Mon, 30 Nov 2020 20:31:33 -0500
Subject: [PATCH 10/12] eye gaze

---
 dbm_lib/config/config_raw_feature.py          |   8 +
 dbm_lib/controller/process_feature.py         |   5 +-
 .../raw_features/movement/eye_gaze.py         | 148 ++++++++++++++++++
 resources/features/derived_feature.yml        |  14 +-
 resources/features/raw_feature.yml            |   8 +
 5 files changed, 181 insertions(+), 2 deletions(-)
 create mode 100644 dbm_lib/dbm_features/raw_features/movement/eye_gaze.py

diff --git a/dbm_lib/config/config_raw_feature.py b/dbm_lib/config/config_raw_feature.py
index 3dc4834b..955780b7 100644
--- a/dbm_lib/config/config_raw_feature.py
+++ b/dbm_lib/config/config_raw_feature.py
@@ -222,6 +222,14 @@ class ConfigRawReader(object):
             self.mov_Hpose_Yaw = config['raw_feature']['mov_Hpose_Yaw']
             self.mov_Hpose_Roll = config['raw_feature']['mov_Hpose_Roll']
             self.mov_Hpose_Dist = config['raw_feature']['mov_Hpose_Dist']
+            self.mov_leye_x = config['raw_feature']['mov_leye_x']
+            self.mov_leye_y = config['raw_feature']['mov_leye_y']
+            self.mov_leye_z = config['raw_feature']['mov_leye_z']
+            self.mov_reye_x = config['raw_feature']['mov_reye_x']
+            self.mov_reye_y = config['raw_feature']['mov_reye_y']
+            self.mov_reye_z = config['raw_feature']['mov_reye_z']
+            self.mov_eleft_disp = config['raw_feature']['mov_eleft_disp']
+            self.mov_eright_disp = config['raw_feature']['mov_eright_disp']
 
             #NLP features
             self.nlp_transcribe = config['raw_feature']['nlp_transcribe']
diff --git a/dbm_lib/controller/process_feature.py b/dbm_lib/controller/process_feature.py
index be781a7f..555f4cc9 100644
--- a/dbm_lib/controller/process_feature.py
+++ b/dbm_lib/controller/process_feature.py
@@ -7,7 +7,7 @@ created: 2020-20-07
 from dbm_lib.dbm_features.raw_features.audio import intensity, pitch_freq, hnr, gne, voice_frame_score, formant_freq
 from dbm_lib.dbm_features.raw_features.audio import pause_segment, jitter, shimmer, mfcc
 from dbm_lib.dbm_features.raw_features.video import face_asymmetry, face_au, face_emotion_expressivity, face_landmark
-from dbm_lib.dbm_features.raw_features.movement import head_motion, eye_blink
+from dbm_lib.dbm_features.raw_features.movement import head_motion, eye_blink, eye_gaze
 from dbm_lib.dbm_features.raw_features.nlp import transcribe, speech_features
 
 import subprocess
@@ -123,6 +123,9 @@ def process_movement(video_uri, out_dir, dbm_group, r_config, dlib_model):
     
     logger.info('processing eye blink....')
     eye_blink.run_eye_blink(video_uri, out_dir, r_config, dlib_model)
+
+    logger.info('processing eye gaze....')
+    eye_gaze.run_eye_gaze(video_uri, out_dir, r_config)
     
 def process_nlp(video_uri, out_dir, dbm_group, r_config, deep_path):
     """
diff --git a/dbm_lib/dbm_features/raw_features/movement/eye_gaze.py b/dbm_lib/dbm_features/raw_features/movement/eye_gaze.py
new file mode 100644
index 00000000..a574a2d9
--- /dev/null
+++ b/dbm_lib/dbm_features/raw_features/movement/eye_gaze.py
@@ -0,0 +1,148 @@
+"""
+file_name: eye_gaze
+project_name: DBM
+created: 2020-30-11
+"""
+
+import os
+import glob
+import pandas as pd
+import numpy as np
+from scipy.spatial import distance
+from os.path import join
+import logging
+
+from dbm_lib.dbm_features.raw_features.util import util as ut
+
+logging.basicConfig(level=logging.INFO)
+logger=logging.getLogger()
+
+eye_pose_dir = 'movement/gaze'
+eye_pose_ext = '_eyegaze.csv'
+
+def eye_motion_df(l_disp, r_disp, error_list, r_config):
+    """
+    Generating eye movement dataframe
+
+    Args:
+        l_disp: displacement list(left eye); l_disp: displacement list(right eye)
+        r_config: raw variable config file object
+
+    Reutrns:
+        Final eye displacement dataframe
+    """
+    df_eye_left = pd.DataFrame(l_disp, columns=[r_config.mov_eleft_disp])
+    df_eye_right = pd.DataFrame(r_disp, columns=[r_config.mov_eright_disp])
+    
+    df_eye_motion = pd.concat([df_eye_left, df_eye_right], axis=1, sort=False)
+    df_eye_motion[r_config.err_reason] = error_list
+    return df_eye_motion
+
+def filter_motion(df_of, df_disp, col_l, col_r, r_config):
+    """
+    Filtering final eye movement dataframe
+    
+    Args:
+        df_of: Openface raw out dataframe; col_r: right eye column
+        col_l: left eye column; r_config: raw variable config file object
+    """
+    
+    df_of = df_of[col_l + col_r + [' confidence']]
+    df_of.loc[(df_of[' confidence'].astype(float) < 0.8), col_l + col_r] = np.nan
+    
+    df_filter =  df_of[col_l + col_r]
+    df_filter.columns = [r_config.mov_leye_x, r_config.mov_leye_y, r_config.mov_leye_z,
+                      r_config.mov_reye_x, r_config.mov_reye_y, r_config.mov_reye_z]
+    
+    df_motion = pd.concat([df_filter, df_disp], axis=1, sort=False)
+    return df_motion
+
+def eye_disp(of_results, col, r_config):
+    """
+    Computing head velocity frame by frame
+
+    Args:
+        of_results: Openface raw out dataframe
+        r_config: Face config file object
+
+    Reutrns:
+        Final head velocity frame by frame output
+    """
+    distance_list = []
+    error_list = []
+    
+    of_results = of_results[col+ [' confidence']]
+    for index, row in of_results.iterrows():
+        dst = np.nan
+        
+        if index == 0 or float(row[' confidence']) < 0.8:  #Threshold < 0.8
+            distance_list.append(dst)
+            
+            if float(row[' confidence']) < 0.8:
+                error_list.append('confidence less than 80%')
+                
+            else:
+                error_list.append('Pass')
+            continue
+            
+        if index > 0:
+            
+            point_x = (of_results[col[0]][index-1], of_results[col[1]][index-1], of_results[col[2]][index-1])
+            point_y = (row[col[0]],row[col[1]],row[col[2]])
+            try:
+                dst = distance.euclidean(point_x, point_y)
+            except:
+                pass
+            
+            distance_list.append(abs(dst))
+            error_list.append('Pass')
+            
+    return distance_list, error_list
+        
+def calc_eye_mov(video_uri, df_of, out_loc, fl_name, r_config):
+    """
+    Computing eye motion variables
+    Args:
+        df_of: Openface dataframe
+        out_loc: Output path for saving output csv's
+        fl_name: file name for output csv
+        r_config: raw variable config file object
+        
+    """
+    
+    col_l = [ ' gaze_0_x', ' gaze_0_y', ' gaze_0_z'] 
+    col_r = [ ' gaze_1_x', ' gaze_1_y', ' gaze_1_z']
+    
+    gazel_disp, err_l = eye_disp(df_of, col_l, r_config)
+    gazer_disp, err_r = eye_disp(df_of, col_r, r_config)
+    
+    df_disp = eye_motion_df(gazel_disp, gazer_disp, err_l, r_config)
+    df_disp['dbm_master_url'] = video_uri
+    
+    df_motion = filter_motion(df_of, df_disp, col_l, col_r, r_config)
+    ut.save_output(df_motion, out_loc, fl_name, eye_pose_dir, eye_pose_ext)
+    
+def run_eye_gaze(video_uri, out_dir, r_config):
+    """
+    Processing all patient's for getting eye movement artifacts 
+    --------------------------------
+    --------------------------------
+    Args:
+        video_uri: video path; input_dir : input directory for video's
+        out_dir: (str) Output directory for processed output; r_config: raw variable config object
+    """
+    try:
+        
+        #filtering path to generate input & output path
+        input_loc, out_loc, fl_name = ut.filter_path(video_uri, out_dir)
+        of_csv_path = glob.glob(join(out_loc, fl_name + '_OF_features/*.csv'))
+
+        if len(of_csv_path)>0:
+
+            of_csv = of_csv_path[0]
+            df_of = pd.read_csv(of_csv, error_bad_lines=False)
+
+            logger.info('Processing Output file {} '.format(os.path.join(out_loc, fl_name)))
+            calc_eye_mov(video_uri, df_of, out_loc, fl_name, r_config)
+    except Exception as e:
+        logger.error('Failed to process video file')
\ No newline at end of file
diff --git a/resources/features/derived_feature.yml b/resources/features/derived_feature.yml
index 7346eb36..dd1751fd 100644
--- a/resources/features/derived_feature.yml
+++ b/resources/features/derived_feature.yml
@@ -2,7 +2,8 @@ derive_feature:
     
     #DBM Feature Group
     FEATURE_GROUP: ['FAC_ASYM', 'FAC_AU', 'FAC_EXP', 'FAC_LMK', 'ACO_INT', 'ACO_FF', 'ACO_HNR', 'ACO_GNE', 'ACO_FM', 
-    'ACO_JITTER','ACO_SHIMMER', 'ACO_PAUSE', 'ACO_VFS', 'ACO_MFCC', 'MOV_HM', 'MOV_HP', 'EYE_BLINK', 'NLP_SPEECH']
+    'ACO_JITTER','ACO_SHIMMER', 'ACO_PAUSE', 'ACO_VFS', 'ACO_MFCC', 'MOV_HM', 'MOV_HP', 'EYE_BLINK', 'NLP_SPEECH',
+    'EYE_GAZE']
     
     #Feature group output file extensions
     FAC_ASYM_LOC: _facasym
@@ -23,6 +24,7 @@ derive_feature:
     MOV_HP_LOC: _headpose
     EYE_BLINK_LOC: _eyeblinks
     NLP_SPEECH_LOC: _nlp
+    EYE_GAZE_LOC: _eyegaze
     
     #Facial category feature group
     FAC_ASYM: ['fac_AsymMaskMouth', 'fac_AsymMaskEyebrow', 'fac_AsymMaskEye', 'fac_AsymMaskCom']
@@ -65,6 +67,8 @@ derive_feature:
     MOV_HM: ['head_vel']
     MOV_HP: ['mov_Hpose_Dist','mov_Hpose_Pitch','mov_Hpose_Yaw','mov_Hpose_Roll']
     EYE_BLINK: ['mov_blink_ear', 'vid_dur', 'mov_blinkdur']
+    EYE_GAZE: ['mov_leye_x', 'mov_leye_y', 'mov_leye_z', 'mov_reye_x', 'mov_reye_y', 'mov_reye_z', 'mov_eleft_disp',
+    'mov_eright_disp']
     
     #NLP category feature group
     NLP_SPEECH: ['nlp_numSentences', 'nlp_singPronPerAns', 'nlp_singPronPerSen', 'nlp_pastTensePerAns', 'nlp_pastTensePerSen',
@@ -255,6 +259,14 @@ derive_feature:
     mov_blink_ear: ['mean', 'std']
     vid_dur: ['count']
     mov_blinkdur: ['mean', 'std']
+    mov_leye_x: ['mean', 'std']
+    mov_leye_y: ['mean', 'std']
+    mov_leye_z: ['mean', 'std']
+    mov_reye_x: ['mean', 'std']
+    mov_reye_y: ['mean', 'std']
+    mov_reye_z: ['mean', 'std']
+    mov_eleft_disp: ['mean', 'std']
+    mov_eright_disp: ['mean', 'std']
     
     #NLP feature
     nlp_numSentences: ['mean']
diff --git a/resources/features/raw_feature.yml b/resources/features/raw_feature.yml
index b9c673b8..3f15bd9b 100644
--- a/resources/features/raw_feature.yml
+++ b/resources/features/raw_feature.yml
@@ -196,6 +196,14 @@ raw_feature:
     mov_Hpose_Yaw: mov_hposeyaw
     mov_Hpose_Roll: mov_hposeroll
     mov_Hpose_Dist: mov_hposedist
+    mov_leye_x: mov_lefteyex
+    mov_leye_y: mov_lefteyey
+    mov_leye_z: mov_lefteyez
+    mov_reye_x: mov_righteyex
+    mov_reye_y: mov_righteyey
+    mov_reye_z: mov_righteyez
+    mov_eleft_disp: mov_leyedisp
+    mov_eright_disp: mov_reyedisp
 
     #NLP markers
     nlp_transcribe: nlp_transcribe

From ccde369840cfb984a41d9e733025d6af67aa03b4 Mon Sep 17 00:00:00 2001
From: vjbytes102 <vijay.yadav@aicure.com>
Date: Tue, 1 Dec 2020 12:49:56 -0500
Subject: [PATCH 11/12] handling openface execution

---
 .../raw_features/video/open_face_process.py           | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/dbm_lib/dbm_features/raw_features/video/open_face_process.py b/dbm_lib/dbm_features/raw_features/video/open_face_process.py
index 292e2a98..34c31257 100644
--- a/dbm_lib/dbm_features/raw_features/video/open_face_process.py
+++ b/dbm_lib/dbm_features/raw_features/video/open_face_process.py
@@ -62,11 +62,16 @@ def process_open_face(video_uri, input_dir, out_dir, of_path, dbm_group):
     """
     try:
         
-        if dbm_group != None and len(dbm_group) == 1 and 'acoustic' in dbm_group:
+        if dbm_group != None:
             return
-
+        
+        check_group = ['facial','movement']
+        check_val = bool(len({*check_group} & {*dbm_group}))
+        if not check_val:
+            return
+        
         filepaths = [video_uri]
         csv_filepaths = batch_open_face(filepaths, video_uri, input_dir, out_dir, of_path)
         
     except Exception as e:
-        logger.error('Failed to process video file')
\ No newline at end of file
+        logger.error('Failed to process video file')

From 5ab0d2f1e153f1a34695c8b4aabd9bcda988d941 Mon Sep 17 00:00:00 2001
From: vjbytes102 <vijay.yadav@aicure.com>
Date: Tue, 1 Dec 2020 14:58:26 -0500
Subject: [PATCH 12/12] update in dbm group check

---
 .../raw_features/video/open_face_process.py            | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/dbm_lib/dbm_features/raw_features/video/open_face_process.py b/dbm_lib/dbm_features/raw_features/video/open_face_process.py
index 34c31257..695b35fb 100644
--- a/dbm_lib/dbm_features/raw_features/video/open_face_process.py
+++ b/dbm_lib/dbm_features/raw_features/video/open_face_process.py
@@ -63,12 +63,10 @@ def process_open_face(video_uri, input_dir, out_dir, of_path, dbm_group):
     try:
         
         if dbm_group != None:
-            return
-        
-        check_group = ['facial','movement']
-        check_val = bool(len({*check_group} & {*dbm_group}))
-        if not check_val:
-            return
+            check_group = ['facial','movement'] #add group here: if you want to use openface output for raw variable calculation
+            check_val = bool(len({*check_group} & {*dbm_group}))
+            if not check_val:
+                return
         
         filepaths = [video_uri]
         csv_filepaths = batch_open_face(filepaths, video_uri, input_dir, out_dir, of_path)