import re def write_srt(phrases): """ This function will convert the input phrases into an .srt subtitle file formatted text. :param phrases: A list of words seperated out in batch of 10 words each along with the timing information :return: text blob of the .srt file """ x = 1 srt_out = "" for phrase in phrases: # write out the phrase number srt_out += str(x) + "\n" x += 1 # write out the start and end time srt_out += phrase["start_time"] + " --> " + phrase["end_time"] + "\n" # write out the full phase. Use spacing if it is a word, or punctuation without spacing srt_out += get_phrase_text(phrase) srt_out += "\n\n" return srt_out def write_web_vtt(phrases, style): """ This function will convert the input phrases into an .vtt subtitle file formatted text. :param phrases: A list of words seperated out in batch of 10 words each along with the timing information :param style: Styling that needs to be applied to the subtitle file :return: text blob of the .vtt file """ x = 1 vtt_out = "" for phrase in phrases: # write out the phrase number vtt_out += str(x) + "\n" x += 1 # write out the start and end time vtt_out += phrase["start_time"] + " --> " + phrase["end_time"] + " " + style + "\n" # write out the full phase. Use spacing if it is a word, or punctuation without spacing vtt_out += get_phrase_text(phrase) # write out the WebVTT file vtt_out += "\n\n" return vtt_out def new_phrase(): """ This is a utility function which creates a new phrase structure :return: a new phrase object which stores the start time, end time and words between the start and the end time. """ return {'start_time': '', 'end_time': '', 'words': []} def get_time_code(milliseconds, subtitle_format="srt"): """ Format and return a string that contains the converted number of seconds into SRT/WebVTT format :param milliseconds: time information of when a word is spoken :param subtitle_format: Whether srt or vtt file format is needed. Default is srt :return: """ seconds = milliseconds / 1000 mod = round(seconds % 1, 3) t_hund = int(mod * 1000) t_seconds = int(seconds) t_secs = round(((float(t_seconds) / 60) % 1) * 60) t_mins = int(t_seconds / 60) if subtitle_format == "srt": return str("%02d:%02d:%02d,%03d" % (00, t_mins, int(t_secs), t_hund)) elif subtitle_format == "vtt": return str("%02d:%02d:%02d.%03d" % (00, t_mins, int(t_secs), t_hund)) def get_speechmarks_to_webvtt(words, transcript): """ This is the core function which combines the words with the timing information and the input text to a subtitle text :param words: words list with timing information :param transcript: The actual text which was sent to polly :return: subtitle text in either vtt or srt format """ # Write the WebVTT file for the original language print("==> Creating WebVTT from Speechmarks") phrases = get_phrases_from_speechmarks(words, transcript) # write_web_vtt(phrases, "A:middle L:90%") srt_text = write_srt(phrases) return srt_text def get_phrases_from_speechmarks(words, transcript): """ This function does the heavy lifting of mapping the words in the words list to the text in the transcript, it splits the transcript into groups of 10 so that it fits the screen. Then for each of 10 words it calculate the time information and creates a phrase object which will store the time information along with the phrases (10 words each) :param words: words list with timing information :param transcript: The actual text which was sent to polly :return: List of phrases each with 10 words and their start and end time """ # Now create phrases from the translation # ts = json.loads(transcript) items = transcript.split() len_items = len(items) len_sm = len(words) print("length len_items-len_sm", len_items, len_sm) # set up some variables for the first pass phrase = new_phrase() phrases = [] n_phrase = True x = 0 c = 0 # print "==> Creating phrases from transcript..." for item in items: # if it is a new phrase, then get the start_time of the first item if n_phrase: phrase["start_time"] = get_time_code(words[c]["start_time"]) n_phrase = False else: # get the end_time if the item is a pronunciation and store it # We need to determine if this pronunciation or punctuation here # Punctuation doesn't contain timing information, so we'll want # to set the end_time to whatever the last word in the phrase is. if c == len(words) - 1: phrase["end_time"] = get_time_code(words[c]["start_time"]) else: phrase["end_time"] = get_time_code(words[c + 1]["start_time"] - 1) # in either case, append the word to the phrase... phrase["words"].append(item) x += 1 # now add the phrase to the phrases, generate a new phrase, etc. if x == 10 or c == (len(items) - 1): # print c, phrase if c == (len(items) - 1): if phrase["end_time"] == '': start_time = words[c]["start_time"] end_time = int(start_time) + 500 phrase["end_time"] = get_time_code(end_time) phrases.append(phrase) phrase = new_phrase() n_phrase = True x = 0 if c < len(words): c += 1 # if there are any words in the final phrase add to phrases if len(phrase["words"]) > 0: phrases.append(phrase) return phrases def get_phrase_text(phrase): """ This is a utility function which converts a list of words into a sentence. Uses spacing if it is a word, or punctuation without spacing :param phrase: a single phrase, list of 10 words :return: a sentence consisting of about 10 words """ length = len(phrase["words"]) out = "" for i in range(0, length): #if re.match('[a-zA-Z0-9]', phrase["words"][i]): if i > 0: out += " " + phrase["words"][i] else: out += phrase["words"][i] #else: # out += phrase["words"][i] return out