Added code

prasunroy · May 1, 2024 · 414242a · 414242a
1 parent 186bcbd
commit 414242a
Show file tree

Hide file tree

Showing 28 changed files with 2,314 additions and 0 deletions.
diff --git a/demo/TIPS_demo.ipynb b/demo/TIPS_demo.ipynb
diff --git a/demo/test_df2df.py b/demo/test_df2df.py
@@ -0,0 +1,101 @@
+"""TIPS: Text-Induced Pose Synthesis
+
+Test TIPS inference pipeline
+Created on Thu Nov 18 10:00:00 2021
+Author: Prasun Roy | https://prasunroy.github.io
+GitHub: https://github.com/prasunroy/tips
+
+"""
+
+
+import datetime
+import numpy as np
+import os
+import pandas as pd
+from PIL import Image
+from tips import TIPS
+from tips import visualize_skeletons, visualize
+
+
+# -----------------------------------------------------------------------------
+prng = np.random.default_rng(1)
+
+ckpt_text2pose = './checkpoints/text2pose_75000.pth'
+ckpt_refinenet = './checkpoints/refinenet_100.pth'
+ckpt_pose2pose = './checkpoints/pose2pose_260500.pth'
+
+data_root = './data'
+save_root = f'./output/df2df_{datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}'
+
+keypoints = pd.read_csv('./data/keypoints.csv', index_col='file_id')
+encodings = pd.read_csv('./data/encodings.csv', index_col='file_id')
+img_pairs = pd.read_csv('./data/img_pairs_df2df.csv')
+
+font = './data/FreeMono.ttf'
+bbox = (40, 0, 216, 256)
+# -----------------------------------------------------------------------------
+
+
+def file_id(path):
+    return os.path.splitext(os.path.basename(path))[0]
+
+
+if not os.path.isdir(save_root):
+    os.makedirs(save_root)
+
+
+tips = TIPS(ckpt_text2pose, ckpt_refinenet, ckpt_pose2pose)
+
+
+z = prng.normal(size=128).astype(np.float32)
+
+
+layout = [
+    ['iA', 'kA',    'iB', 'kB',    'iB_k0'],
+    ['iA', 'kA',    'iB', 'kB_c1', 'iB_c1'],
+    ['iA', 'kA',    'iB', 'kB_f1', 'iB_f1'],
+    ['iA', 'kA_c2', 'iB', 'kB_c2', 'iB_c2'],
+    ['iA', 'kA_f2', 'iB', 'kB_f2', 'iB_f2']
+]
+
+
+for i in range(len(img_pairs)):
+    fpA = img_pairs.iloc[i].imgA
+    fpB = img_pairs.iloc[i].imgB
+
+    source_text_encoding = encodings.loc[file_id(fpA)].values[0:84].astype(np.float32)
+    target_text_encoding = encodings.loc[file_id(fpB)].values[0:84].astype(np.float32)
+
+    source_keypoints = keypoints.loc[file_id(fpA)].values[2:38].astype(np.int32)
+    target_keypoints = keypoints.loc[file_id(fpB)].values[2:38].astype(np.int32)
+
+    source_image = Image.open(f'{data_root}/{fpA}')
+    target_image = Image.open(f'{data_root}/{fpB}')
+
+    iB_k = tips.benchmark(source_image, source_keypoints, target_keypoints)
+    out1 = tips.pipeline(source_image, source_keypoints, target_text_encoding, z)
+    out2 = tips.pipeline_full(source_image, source_text_encoding, target_text_encoding, z)
+
+    images_dict = {
+        'iA': source_image.crop(bbox),
+        'iB': target_image.crop(bbox),
+        'iB_k0': iB_k.crop(bbox),
+        'iB_c1': out1['iB_c'].crop(bbox),
+        'iB_f1': out1['iB_f'].crop(bbox),
+        'iB_c2': out2['iB_c'].crop(bbox),
+        'iB_f2': out2['iB_f'].crop(bbox),
+        'kA': Image.fromarray(visualize_skeletons([source_keypoints], head_color=(100, 255, 100))).crop(bbox),
+        'kB': Image.fromarray(visualize_skeletons([target_keypoints], head_color=(100, 255, 100))).crop(bbox),
+        'kA_c2': Image.fromarray(visualize_skeletons([out2['kA_c']], head_color=(255, 100, 100))).crop(bbox),
+        'kA_f2': Image.fromarray(visualize_skeletons([out2['kA_f']], head_color=(100, 100, 255))).crop(bbox),
+        'kB_c1': Image.fromarray(visualize_skeletons([out1['kB_c']], head_color=(255, 100, 100))).crop(bbox),
+        'kB_f1': Image.fromarray(visualize_skeletons([out1['kB_f']], head_color=(100, 100, 255))).crop(bbox),
+        'kB_c2': Image.fromarray(visualize_skeletons([out2['kB_c']], head_color=(255, 100, 100))).crop(bbox),
+        'kB_f2': Image.fromarray(visualize_skeletons([out2['kB_f']], head_color=(100, 100, 255))).crop(bbox),
+    }
+
+    grid = visualize(images_dict, layout, True, font)
+    grid.save(f'{save_root}/{file_id(fpA)}____{file_id(fpB)}.png')
+    print(f'\r[TIPS] Testing inference pipeline... {i+1}/{len(img_pairs)}', end='')
+
+print('')
diff --git a/demo/test_df2rw.py b/demo/test_df2rw.py
@@ -0,0 +1,101 @@
+"""TIPS: Text-Induced Pose Synthesis
+
+Test TIPS inference pipeline
+Created on Thu Nov 18 10:00:00 2021
+Author: Prasun Roy | https://prasunroy.github.io
+GitHub: https://github.com/prasunroy/tips
+
+"""
+
+
+import datetime
+import numpy as np
+import os
+import pandas as pd
+from PIL import Image
+from tips import TIPS
+from tips import visualize_skeletons, visualize
+
+
+# -----------------------------------------------------------------------------
+prng = np.random.default_rng(1)
+
+ckpt_text2pose = './checkpoints/text2pose_75000.pth'
+ckpt_refinenet = './checkpoints/refinenet_100.pth'
+ckpt_pose2pose = './checkpoints/pose2pose_260500.pth'
+
+data_root = './data'
+save_root = f'./output/df2rw_{datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}'
+
+keypoints = pd.read_csv('./data/keypoints.csv', index_col='file_id')
+encodings = pd.read_csv('./data/encodings.csv', index_col='file_id')
+img_pairs = pd.read_csv('./data/img_pairs_df2rw.csv')
+
+font = './data/FreeMono.ttf'
+bbox = (40, 0, 216, 256)
+# -----------------------------------------------------------------------------
+
+
+def file_id(path):
+    return os.path.splitext(os.path.basename(path))[0]
+
+
+if not os.path.isdir(save_root):
+    os.makedirs(save_root)
+
+
+tips = TIPS(ckpt_text2pose, ckpt_refinenet, ckpt_pose2pose)
+
+
+z = prng.normal(size=128).astype(np.float32)
+
+
+layout = [
+    ['iA', 'kA',    'iB', 'kB',    'iB_k0'],
+    ['iA', 'kA',    'iB', 'kB_c1', 'iB_c1'],
+    ['iA', 'kA',    'iB', 'kB_f1', 'iB_f1'],
+    ['iA', 'kA_c2', 'iB', 'kB_c2', 'iB_c2'],
+    ['iA', 'kA_f2', 'iB', 'kB_f2', 'iB_f2']
+]
+
+
+for i in range(len(img_pairs)):
+    fpA = img_pairs.iloc[i].imgA
+    fpB = img_pairs.iloc[i].imgB
+
+    source_text_encoding = encodings.loc[file_id(fpA)].values[0:84].astype(np.float32)
+    target_text_encoding = encodings.loc[file_id(fpB)].values[0:84].astype(np.float32)
+
+    source_keypoints = keypoints.loc[file_id(fpA)].values[2:38].astype(np.int32)
+    target_keypoints = keypoints.loc[file_id(fpB)].values[2:38].astype(np.int32)
+
+    source_image = Image.open(f'{data_root}/{fpA}')
+    target_image = Image.open(f'{data_root}/{fpB}')
+
+    iB_k = tips.benchmark(source_image, source_keypoints, target_keypoints)
+    out1 = tips.pipeline(source_image, source_keypoints, target_text_encoding, z)
+    out2 = tips.pipeline_full(source_image, source_text_encoding, target_text_encoding, z)
+
+    images_dict = {
+        'iA': source_image.crop(bbox),
+        'iB': target_image.crop(bbox),
+        'iB_k0': iB_k.crop(bbox),
+        'iB_c1': out1['iB_c'].crop(bbox),
+        'iB_f1': out1['iB_f'].crop(bbox),
+        'iB_c2': out2['iB_c'].crop(bbox),
+        'iB_f2': out2['iB_f'].crop(bbox),
+        'kA': Image.fromarray(visualize_skeletons([source_keypoints], head_color=(100, 255, 100))).crop(bbox),
+        'kB': Image.fromarray(visualize_skeletons([target_keypoints], head_color=(100, 255, 100))).crop(bbox),
+        'kA_c2': Image.fromarray(visualize_skeletons([out2['kA_c']], head_color=(255, 100, 100))).crop(bbox),
+        'kA_f2': Image.fromarray(visualize_skeletons([out2['kA_f']], head_color=(100, 100, 255))).crop(bbox),
+        'kB_c1': Image.fromarray(visualize_skeletons([out1['kB_c']], head_color=(255, 100, 100))).crop(bbox),
+        'kB_f1': Image.fromarray(visualize_skeletons([out1['kB_f']], head_color=(100, 100, 255))).crop(bbox),
+        'kB_c2': Image.fromarray(visualize_skeletons([out2['kB_c']], head_color=(255, 100, 100))).crop(bbox),
+        'kB_f2': Image.fromarray(visualize_skeletons([out2['kB_f']], head_color=(100, 100, 255))).crop(bbox),
+    }
+
+    grid = visualize(images_dict, layout, True, font)
+    grid.save(f'{save_root}/{file_id(fpA)}____{file_id(fpB)}.png')
+    print(f'\r[TIPS] Testing inference pipeline... {i+1}/{len(img_pairs)}', end='')
+
+print('')
diff --git a/demo/tips/__init__.py b/demo/tips/__init__.py
@@ -0,0 +1,14 @@
+"""TIPS: Text-Induced Pose Synthesis
+
+Package initialization
+Created on Thu Nov 18 10:00:00 2021
+Author: Prasun Roy | https://prasunroy.github.io
+GitHub: https://github.com/prasunroy/tips
+
+"""
+
+
+__version__ = '1.0.0'
+
+from .tips import TIPS
+from .visualization import visualize_skeletons, visualize
diff --git a/demo/tips/models/__init__.py b/demo/tips/models/__init__.py
@@ -0,0 +1,11 @@
+"""TIPS: Text-Induced Pose Synthesis
+
+Package initialization
+Created on Thu Nov 18 10:00:00 2021
+Author: Prasun Roy | https://prasunroy.github.io
+GitHub: https://github.com/prasunroy/tips
+
+"""
+
+
+__version__ = '1.0.0'
diff --git a/demo/tips/models/pose2pose.py b/demo/tips/models/pose2pose.py
@@ -0,0 +1,129 @@
+"""TIPS: Text-Induced Pose Synthesis
+
+Stage-3 network: Pose2Pose generator
+Created on Thu Nov 18 10:00:00 2021
+Author: Prasun Roy | https://prasunroy.github.io
+GitHub: https://github.com/prasunroy/tips
+
+"""
+
+
+import torch
+import torch.nn as nn
+
+
+def conv1x1(in_channels, out_channels):
+    return nn.Conv2d(in_channels, out_channels, 1, 1, 0, bias=False)
+
+
+def conv3x3(in_channels, out_channels):
+    return nn.Conv2d(in_channels, out_channels, 3, 1, 1, bias=False)
+
+
+def downconv2x(in_channels, out_channels):
+    return nn.Conv2d(in_channels, out_channels, 4, 2, 1, bias=False)
+
+
+def upconv2x(in_channels, out_channels):
+    return nn.ConvTranspose2d(in_channels, out_channels, 4, 2, 1, bias=False)
+
+
+class ResidualBlock(nn.Module):
+
+    def __init__(self, num_channels):
+        super(ResidualBlock, self).__init__()
+        layers = [
+            conv3x3(num_channels, num_channels),
+            nn.BatchNorm2d(num_channels),
+            nn.ReLU(inplace=True),
+            conv3x3(num_channels, num_channels),
+            nn.BatchNorm2d(num_channels)
+        ]
+        self.layers = nn.Sequential(*layers)
+
+    def forward(self, x):
+        y = self.layers(x) + x
+        return y
+
+
+class NetG(nn.Module):
+
+    def __init__(self, in1_channels, in2_channels, out_channels, ngf=64):
+        super(NetG, self).__init__()
+
+        self.in1_conv1 = self.inconv(in1_channels, ngf)
+        self.in1_down1 = self.down2x(ngf, ngf*2)
+        self.in1_down2 = self.down2x(ngf*2, ngf*4)
+        self.in1_down3 = self.down2x(ngf*4, ngf*8)
+        self.in1_down4 = self.down2x(ngf*8, ngf*16)
+
+        self.in2_conv1 = self.inconv(in2_channels, ngf)
+        self.in2_down1 = self.down2x(ngf, ngf*2)
+        self.in2_down2 = self.down2x(ngf*2, ngf*4)
+        self.in2_down3 = self.down2x(ngf*4, ngf*8)
+        self.in2_down4 = self.down2x(ngf*8, ngf*16)
+
+        self.out_up1 = self.up2x(ngf*16, ngf*8)
+        self.out_up2 = self.up2x(ngf*8, ngf*4)
+        self.out_up3 = self.up2x(ngf*4, ngf*2)
+        self.out_up4 = self.up2x(ngf*2, ngf)
+
+        self.out_conv1 = self.outconv(ngf, out_channels)
+
+    def inconv(self, in_channels, out_channels):
+        return nn.Sequential(
+            conv3x3(in_channels, out_channels),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(inplace=True)
+        )
+
+    def outconv(self, in_channels, out_channels):
+        return nn.Sequential(
+            ResidualBlock(in_channels),
+            ResidualBlock(in_channels),
+            ResidualBlock(in_channels),
+            ResidualBlock(in_channels),
+            conv1x1(in_channels, out_channels),
+            nn.Tanh()
+        )
+
+    def down2x(self, in_channels, out_channels):
+        return nn.Sequential(
+            downconv2x(in_channels, out_channels),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(inplace=True),
+            ResidualBlock(out_channels)
+        )
+
+    def up2x(self, in_channels, out_channels):
+        return nn.Sequential(
+            upconv2x(in_channels, out_channels),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(inplace=True),
+            ResidualBlock(out_channels)
+        )
+
+    def forward(self, x1, x2):
+        x1_c1 = self.in1_conv1(x1)
+        x1_d1 = self.in1_down1(x1_c1)
+        x1_d2 = self.in1_down2(x1_d1)
+        x1_d3 = self.in1_down3(x1_d2)
+        x1_d4 = self.in1_down4(x1_d3)
+
+        x2_c1 = self.in2_conv1(x2)
+        x2_d1 = self.in2_down1(x2_c1)
+        x2_d2 = self.in2_down2(x2_d1)
+        x2_d3 = self.in2_down3(x2_d2)
+        x2_d4 = self.in2_down4(x2_d3)
+
+        y = x1_d4 * torch.sigmoid(x2_d4)
+        y = self.out_up1(y)
+        y = y * torch.sigmoid(x2_d3)
+        y = self.out_up2(y)
+        y = y * torch.sigmoid(x2_d2)
+        y = self.out_up3(y)
+        y = y * torch.sigmoid(x2_d1)
+        y = self.out_up4(y)
+        y = self.out_conv1(y)
+
+        return y
diff --git a/demo/tips/models/refinenet.py b/demo/tips/models/refinenet.py
@@ -0,0 +1,29 @@
+"""TIPS: Text-Induced Pose Synthesis
+
+Stage-2 network: RefineNet regressor
+Created on Thu Nov 18 10:00:00 2021
+Author: Prasun Roy | https://prasunroy.github.io
+GitHub: https://github.com/prasunroy/tips
+
+"""
+
+
+import torch
+import torch.nn as nn
+
+
+class RefineNet(nn.Module):
+
+    def __init__(self, in_features, out_features, bias=True):
+        super(RefineNet, self).__init__()
+        self.linear1 = nn.Linear(in_features, 128, bias=bias)
+        self.linear2 = nn.Linear(128, 128, bias=bias)
+        self.linear3 = nn.Linear(128, 128, bias=bias)
+        self.linear4 = nn.Linear(128, out_features, bias=bias)
+
+    def forward(self, x):
+        y = torch.relu(self.linear1(x))
+        y = torch.relu(self.linear2(y))
+        y = torch.relu(self.linear3(y))
+        y = torch.tanh(self.linear4(y))
+        return y