open_dbm/tests/speech/test_api_speech.py

import numpy as np
from pytest import mark


@mark.non_docker
@mark.speech
class SpeechTest:
    def test_dummy_3(self):
        assert True

    def test_get_transcribe(self, processing_speech_mp4, processing_speech_wav):
        actual_totaltime = 87.978685
        len_words_count = 57

        res_mp4 = processing_speech_mp4.get_transcribe().to_dataframe()
        audio_duration_mp4 = res_mp4["nlp_totalTime"].item()
        transcribed_text_mp4 = res_mp4["nlp_transcribe"].item()

        res_wav = processing_speech_wav.get_transcribe().to_dataframe()
        audio_duration_wav = res_wav["nlp_totalTime"].item()
        transcribed_text_wav = res_wav["nlp_transcribe"].item()

        # test if duration is matched
        assert np.isclose(audio_duration_mp4, actual_totaltime, rtol=0.1, atol=1e-8)
        assert np.isclose(audio_duration_wav, actual_totaltime, rtol=0.1, atol=1e-8)

        # test if there is transcribed text or not
        assert type(transcribed_text_mp4) == str
        assert type(transcribed_text_wav) == str

        # test the length of the text
        assert np.isclose(
            len(transcribed_text_mp4.split(" ")), len_words_count, rtol=0.5, atol=1e-8
        )
        assert np.isclose(
            len(transcribed_text_wav.split(" ")), len_words_count, rtol=0.5, atol=1e-8
        )

    def test_get_speech_features(self, processing_speech_mp4, processing_speech_wav):
        # actual = [
        #     1.0,
        #     2.0,
        #     2.0,
        #     1.0,
        #     1.0,
        #     6.0,
        #     6.0,
        #     11.0,
        #     11.0,
        #     5.0,
        #     5.0,
        #     15.0,
        #     15.0,
        #     -0.8256,
        #     0.08860759493670886,
        #     38.873052120437336,
        #     87.97868480725624,
        # ]

        res_mp4 = (
            processing_speech_mp4.get_speech_features()
            .to_dataframe()
            .drop(columns="dbm_master_url")
        )
        res_wav = (
            processing_speech_wav.get_speech_features()
            .to_dataframe()
            .drop(columns="dbm_master_url")
        )
        desired_mp4 = np.array((res_mp4.iloc[0]))
        desired_wav = np.array((res_wav.iloc[0]))

        # check if there is any zero value or not
        for v1, v2 in zip(desired_mp4, desired_wav):
            assert bool(v1)
            assert bool(v2)