Sign Langauge Part 2: Generating the Database
This blog post is about generating the database from the videos to train the GAN. I am using the German dataset mentioned in the previous blog for this task. The dataset is available in the following link: meinedgs. I am taking a few sample vidoes and trying to make things on top of that. The dataset is having 2 types of videos, one is the video of the signer signing the word, and the other is the signer signing the entire sentence. I am using the entire sentence videos for this task. The dataset is having the following files:
I am using google colab to generate the database. The following code is used to download the videos from the website.
!git clone https://github.com/BenSaunders27/meineDGS-Translation-Protocols.git
import os
from tqdm import tqdm
import pandas as pd
data = pd.read_csv('/content/meineDGS-Translation-Protocols/mDGS-V/mDGS_Protocol_Train.csv', sep = '|')
from tqdm import tqdm
# data['gloss']
def MakeIndividualWords(dataframe):
words_info = dict()
for no,d in tqdm(enumerate(dataframe.tolist())):
try:
words = d.split(' ')
for word in words:
word_specific, startframe, endframe = word.split('/')
if word_specific not in words_info.keys():
words_info[word_specific] = no
except:
pass
return words_info
from pathlib import Path
import os
from pathlib import Path
import os
class PrepareData:
def __init__(self, dataframe,
download_vids = False,
downloadpath_vids = '/content/dataset/vids',
download_poses = False,
downloadpath_poses = '/content/dataset/poses',
makedataset = True,
makedatasetpath = '/content/signvideos/'):
super().__init__()
self.df = dataframe
self.downloadpath_vids = downloadpath_vids
self.downlaodpath_poses = downloadpath_poses
self.MakeURL()
self.GetVidDataset()
if download_vids == True:
self.DownloadVids()
if download_poses == True:
self.DownloadOpenPose()
if makedataset == True:
self.MakeDataset()
def MakeURL(self):
self.fnames = self.df['filename'].tolist()
self.start = self.df['start_time'].tolist()
self.end = self.df['stop_time'].tolist()
self.gloss = self.df['gloss'].tolist()
self.individualwords = MakeIndividualWords(self.df['gloss'])
def GetVidDataset(self):
print(self.fnames)
self.vids_name = set([i.split('-')[0][:-1] for i in self.fnames])
def DownloadVids(self):
if not os.path.exists(self.downloadpath_vids):
os.makedirs(self.downloadpath_vids)
for vid in tqdm(self.vids_name):
vidname = f'https://www.sign-lang.uni-hamburg.de/meinedgs/release2/videos/{vid}/{vid}.mp4'
!wget $vidname -P $self.downloadpath_vids
# break
def DownloadOpenPose(self):
if not os.path.exists(self.downlaodpath_poses):
os.makedirs(self.downlaodpath_poses)
for vid in tqdm(self.vids_name):
vidname = f'https://www.sign-lang.uni-hamburg.de/meinedgs/openpose/{vid}_openpose.json.gz'
!wget $vidname -P $self.downlaodpath_poses
def MakeDataset(self, fps = 50.0,
makedatasetpath = '/content/signvideos/',
cropped_imgs_path = '/content/cropped_vids',
downloadpath = '/content/drive/MyDrive/Teja_SignLangauge/GovernmentProject'):
os.makedirs(cropped_imgs_path)
for word,rowno in tqdm(self.individualwords.items()):
Path(os.path.join(makedatasetpath, word)).mkdir(parents=True, exist_ok=True)
rowinformation = self.df.iloc[rowno]
filename, camera ,ger_text ,gloss ,start_time ,stop_time = rowinformation.values
# print(filename, camera ,ger_text ,gloss ,start_time ,stop_time)
ger_text= ger_text.replace(' ', '-') #replace the spaces
sh,sm,ss,sms = convertmins(start_time , 50)
eh,em,es,ems = convertmins(stop_time, 50)
vidname_org = filename.split('-')[0][:-1] + '.mp4'
vidname = os.path.join(downloadpath, vidname_org)
# if os.path.exists(vidname):
# print("File exists")
comands_crop = f'ffmpeg -i {vidname} -ss {sh:02d}:{sm:02d}:{ss:02d}.{sms:03d} -t {eh:02d}:{em:02d}:{es:02d}.{ems:03d} -c copy {cropped_imgs_path}/{ger_text}_{vidname_org}'
# print(comands_crop)
os.system(comands_crop)
# break
downloadpath = '/content/drive/MyDrive/Teja_SignLangauge/GovernmentProject'
# P1 = PrepareData(data, downloadpath_poses = downloadpath, download_poses = True)
P1 = PrepareData(data, downloadpath_poses = downloadpath, makedataset = True)