origin

2022-12-29 23:08:25 +08:00
commit 21ad625896
42 changed files with 2336 additions and 0 deletions
--- a/digit_depth/init.py
+++ b/digit_depth/init.py
@@ -0,0 +1,4 @@
+from .digit import *
+from .third_party import *
+from .train import *
+from .handlers import *
--- a/digit_depth/dataio/combine_A_and_B.py
+++ b/digit_depth/dataio/combine_A_and_B.py
@@ -0,0 +1,97 @@
+"""
+combine_A_and_B.py
+Preprocessing for pix2pix
+
+Copyright (c) 2016, Phillip Isola and Jun-Yan Zhu
+All rights reserved.
+
+BSD License
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+"""
+
+import argparse
+import os
+from multiprocessing import Pool
+
+import cv2
+import numpy as np
+
+
+def image_write(path_A, path_B, path_AB):
+    im_A = cv2.imread(
+        path_A, 1
+    )  # python2: cv2.CV_LOAD_IMAGE_COLOR; python3: cv2.IMREAD_COLOR
+    im_B = cv2.imread(
+        path_B, 1
+    )  # python2: cv2.CV_LOAD_IMAGE_COLOR; python3: cv2.IMREAD_COLOR
+
+    # im_A = cv2.transpose(im_A)
+    # im_B = cv2.transpose(im_B)
+
+    im_AB = np.concatenate([im_A, im_B], 1)
+    cv2.imwrite(path_AB, im_AB)
+
+
+parser = argparse.ArgumentParser("create image pairs")
+parser.add_argument("--fold_A", dest="fold_A", help="input directory for image A", type=str, default="../dataset/50kshoes_edges")
+parser.add_argument("--fold_B", dest="fold_B", help="input directory for image B", type=str, default="../dataset/50kshoes_jpg")
+parser.add_argument("--fold_AB", dest="fold_AB", help="output directory", type=str, default="../dataset/test_AB")
+parser.add_argument("--num_imgs", dest="num_imgs", help="number of images", type=int, default=1000000)
+parser.add_argument("--use_AB", dest="use_AB", help="if true: (0001_A, 0001_B) to (0001_AB)", action="store_true")
+parser.add_argument("--no_multiprocessing", dest="no_multiprocessing", help="If used, chooses single CPU execution instead of parallel execution", action="store_true", default=False,
+)
+args = parser.parse_args()
+
+for arg in vars(args):
+    print("[%s] = " % arg, getattr(args, arg))
+
+splits = os.listdir(args.fold_A)
+
+if not args.no_multiprocessing:
+    pool = Pool()
+
+for sp in splits:
+    img_fold_A = os.path.join(args.fold_A, sp)
+    img_fold_B = os.path.join(args.fold_B, sp)
+    img_list = os.listdir(img_fold_A)
+    if args.use_AB:
+        img_list = [img_path for img_path in img_list if "_A." in img_path]
+
+    num_imgs = min(args.num_imgs, len(img_list))
+    print("split = %s, use %d/%d images" % (sp, num_imgs, len(img_list)))
+    img_fold_AB = os.path.join(args.fold_AB, sp)
+    if not os.path.isdir(img_fold_AB):
+        os.makedirs(img_fold_AB)
+    print("split = %s, number of images = %d" % (sp, num_imgs))
+    for n in range(num_imgs):
+        name_A = img_list[n]
+        path_A = os.path.join(img_fold_A, name_A)
+        if args.use_AB:
+            name_B = name_A.replace("_A.", "_B.")
+        else:
+            name_B = name_A
+        path_B = os.path.join(img_fold_B, name_B)
+        if os.path.isfile(path_A) and os.path.isfile(path_B):
+            name_AB = name_A
+            if args.use_AB:
+                name_AB = name_AB.replace("_A.", ".")  # remove _A
+            path_AB = os.path.join(img_fold_AB, name_AB)
+            if not args.no_multiprocessing:
+                pool.apply_async(image_write, args=(path_A, path_B, path_AB))
+            else:
+                im_A = cv2.imread(path_A, 1)  # python2: cv2.CV_LOAD_IMAGE_COLOR; python3: cv2.IMREAD_COLOR
+                im_B = cv2.imread(path_B, 1)  # python2: cv2.CV_LOAD_IMAGE_COLOR; python3: cv2.IMREAD_COLOR
+                im_AB = np.concatenate([im_A, im_B], 1)
+                cv2.imwrite(path_AB, im_AB)
+if not args.no_multiprocessing:
+    pool.close()
+    pool.join()
--- a/digit_depth/dataio/create_csv.py
+++ b/digit_depth/dataio/create_csv.py
@@ -0,0 +1,109 @@
+import glob
+
+import numpy as np
+import pandas as pd
+from PIL import Image
+from sklearn.model_selection import train_test_split
+
+
+def create_pixel_csv(img_dir: str, save_dir: str, img_type: str):
+    """
+    Creates a CSV file with the pixel coordinates of the image
+    :param img_dir: image directory containing the images
+    :param save_dir: directory for saving the CSV file per image
+    :param img_type: color or normal
+    :return: saves the CSV file per image0
+    """
+    imgs = sorted(glob.glob(f"{img_dir}/*.png"))
+    print(f"Found {len(imgs)} {img_type} images")
+    for idx, img_path in enumerate(imgs):
+        img = Image.open(img_path)
+        img_np = np.array(img)
+        xy_coords = np.flip(
+            np.column_stack(np.where(np.all(img_np >= 0, axis=2))), axis=1
+        )
+        rgb = np.reshape(img_np, (np.prod(img_np.shape[:2]), 3))
+        # Add pixel numbers in front
+        pixel_numbers = np.expand_dims(np.arange(1, xy_coords.shape[0] + 1), axis=1)
+        value = np.hstack([pixel_numbers, xy_coords, rgb])
+        # Properly save as CSV
+        if img_type == "color":
+            df = pd.DataFrame(value, columns=["pixel_number", "X", "Y", "R", "G", "B"])
+            del df["pixel_number"]
+            df.to_csv(f"{save_dir}/image_{idx}.csv", sep=",", index=False)
+        elif img_type == "normal":
+            df = pd.DataFrame(value, columns=["pixel_number", "X", "Y", "Nx", "Ny", "Nz"])
+            del df["pixel_number"]
+            df.to_csv(f"{save_dir}/image_{idx}.csv", sep=",", index=False)
+        print(f"{img_type} image CSV written for image {idx}")
+
+
+def combine_csv(csv_dir: str, img_type: str):
+    """
+    Combines all the CSV files in the directory into one CSV file for later use in training
+
+    :param csv_dir: directory containing the CSV files
+    :param img_type: color or normal
+    """
+    csv_files=(glob.glob(f"{csv_dir}/*.csv"))
+    print(f"Found {len(csv_files)} {img_type} CSV files")
+    df = pd.concat([pd.read_csv(f, sep=",") for f in (glob.glob(f"{csv_dir}/*.csv"))])
+    df.to_csv(f"{csv_dir}/combined.csv", sep=",", index=False)
+    print(f"Combined CSV written for {img_type} images")
+    check_nans(f"{csv_dir}/combined.csv")
+    print("----------------------------------------------------")
+
+
+def create_train_test_csv(save_dir: str, normal_path: str, color_path: str):
+    """
+    Creates a CSV file with the pixel coordinates of the image. Samples 4% from zeros.
+    Splits the cleaned dataset into train and test (80%/20%)
+    :param save_dir: path for train_test_split folder
+    :param normal_path: path to combined normal CSV file
+    :param color_path: path to combined color CSV file
+    """
+    seed=42
+    rgb_df = pd.read_csv(color_path, sep=",")
+    normal_df = pd.read_csv(normal_path, sep=",")
+
+    rgb_df['Nx'] = normal_df['Nx']
+    rgb_df['Ny'] = normal_df['Ny']
+    rgb_df['Nz'] = normal_df['Nz']
+
+    zeros_df = rgb_df[(rgb_df['Nx'] == 127) & (rgb_df['Ny'] == 127) ^ (rgb_df['Nz'] == 127)]
+    print(f"Found {len(zeros_df)} zeros in the dataset")
+    non_zeros_df = rgb_df[(rgb_df['Nx'] != 127) | (rgb_df['Ny'] != 127) & (rgb_df['Nz'] != 127)]
+    print(f"Found {len(non_zeros_df)} non-zeros in the dataset")
+    zeros_df.to_csv(f"{save_dir}/zeros.csv", sep=",", index=False)
+    non_zeros_df.to_csv(f"{save_dir}/non_zeros.csv", sep=",", index=False)
+    print("----------------------------------------------------")
+    # sampling 4% of zeros
+    clean_df = zeros_df.sample(frac=0.04, random_state=seed)
+    clean_df = pd.concat([clean_df, non_zeros_df])
+    clean_df.to_csv(f"{save_dir}/clean-data.csv", sep=",", index=False)
+    print(f"Clean data CSV of {len(clean_df)} written")
+
+    # split into train and test
+    train_df,test_df=train_test_split(clean_df,test_size=0.2,random_state=seed)
+    train_df.to_csv(f"{save_dir}/train.csv", sep=",", index=False)
+    test_df.to_csv(f"{save_dir}/test.csv", sep=",", index=False)
+    print(f"Train set of size {len(train_df)} and Test set of size {len(test_df)} written")
+
+
+def check_nans(csv_path: str):
+    """
+    Checks if there are any NaNs in the CSV file
+    :param csv_path: path to the CSV file
+    :return: True if there are no NaNs, False otherwise
+    """
+    df = pd.read_csv(csv_path, sep=",")
+    if df.isnull().values.any():
+        nans=df.isnull().sum().sum()
+        print(f"Found {nans} NaNs in {csv_path}")
+        print("Replacing NaNs with mean")
+        df.fillna(df.mean(), inplace=True)
+        df.to_csv(csv_path, sep=",", index=False)
+        print(f"NaNs replaced in {csv_path}")
+    else:
+        print("No NaNs found.Perfect!")
+
--- a/digit_depth/dataio/data_loader.py
+++ b/digit_depth/dataio/data_loader.py
@@ -0,0 +1,22 @@
+"""
+  Data loader for the color-normal datasets
+"""
+import torchvision.transforms as transforms
+from torch.utils.data import DataLoader
+
+from digit_depth.dataio.digit_dataset import DigitRealImageAnnotDataset
+
+
+def data_loader(dir_dataset, params):
+    """A data loader for the color-normal datasets
+    Args:
+        dir_dataset: path to the dataset
+        params: a dict of parameters
+    """
+    transform = transforms.Compose([transforms.ToTensor()])
+    dataset = DigitRealImageAnnotDataset( dir_dataset=dir_dataset, annot_file=params.annot_file,
+                                          transform=transform, annot_flag=params.annot_flag)
+    dataloader = DataLoader(dataset, batch_size=params.batch_size, shuffle=params.shuffle,
+                            num_workers=params.num_workers)
+
+    return dataloader, dataset
--- a/digit_depth/dataio/digit_dataset.py
+++ b/digit_depth/dataio/digit_dataset.py
@@ -0,0 +1,43 @@
+import glob
+
+import pandas as pd
+import torch
+from PIL import Image
+from torch.utils.data import Dataset
+
+
+class DigitRealImageAnnotDataset(Dataset):
+    def __init__( self, dir_dataset, annot_file, transform=None, annot_flag=True, img_type="png" ):
+        self.dir_dataset = dir_dataset
+        print(f"Loading dataset from {dir_dataset}")
+        self.transform = transform
+        self.annot_flag = annot_flag
+
+        # a list of image paths sorted. dir_dataset is the root dir of the datasets (color)
+        self.img_files = sorted(glob.glob(f"{self.dir_dataset}/*.{img_type}"))
+        print(f"Found {len(self.img_files)} images")
+        if self.annot_flag:
+            self.annot_dataframe = pd.read_csv(annot_file, sep=",")
+
+    def __getitem__(self, idx):
+        """Returns a tuple of (img, annot) where annot is a tensor of shape (3,1)"""
+
+        # read in image
+        img = Image.open(self.img_files[idx])
+        img = self.transform(img)
+        img = img.permute(0, 2, 1)  # (3,240,320) -> (3,320,240)
+        # read in region annotations
+        if self.annot_flag:
+            img_name = self.img_files[idx]
+            row_filter = self.annot_dataframe["img_names"] == img_name
+            region_attr = self.annot_dataframe.loc[
+                row_filter, ["center_x", "center_y", "radius"]
+            ]
+            annot = (torch.tensor(region_attr.values, dtype=torch.int32) if (len(region_attr) > 0) else torch.tensor([]))
+        data = img
+        if self.annot_flag:
+            data = (img, annot)
+        return data
+
+    def __len__(self):
+        return len(self.img_files)
--- a/digit_depth/dataio/generate_sphere_gt_normals.py
+++ b/digit_depth/dataio/generate_sphere_gt_normals.py
@@ -0,0 +1,47 @@
+"""
+  Script for generating sphere ground truth normal images.
+"""
+import math
+
+import numpy as np
+
+
+def generate_sphere_gt_normals(img_mask, center_x, center_y, radius):
+    """
+    Generates sphere ground truth normal images for an image.
+    Args:
+      img_mask: a numpy array of shape [H, W, 3]
+      center_x: x coordinate of the center of the sphere
+      center_y: y coordinate of the center of the sphere
+      radius: the radius of the sphere
+    Returns:
+      img_normal: a numpy array of shape [H, W, 3]
+    """
+    img_normal = np.zeros(img_mask.shape, dtype="float64")
+
+    for y in range(img_mask.shape[0]):
+        for x in range(img_mask.shape[1]):
+
+            img_normal[y, x, 0] = 0.0
+            img_normal[y, x, 1] = 0.0
+            img_normal[y, x, 2] = 1.0
+
+            if np.sum(img_mask[y, x, :]) > 0:
+                dist = np.sqrt((x - center_x) ** 2 + (y - center_y) ** 2)
+                ang_xz = math.acos(dist / radius)
+                ang_xy = math.atan2(y - center_y, x - center_x)
+
+                nx = math.cos(ang_xz) * math.cos(ang_xy)
+                ny = math.cos(ang_xz) * math.sin(ang_xy)
+                nz = math.sin(ang_xz)
+
+                img_normal[y, x, 0] = nx
+                img_normal[y, x, 1] = -ny
+                img_normal[y, x, 2] = nz
+
+            norm_val = np.linalg.norm(img_normal[y, x, :])
+            img_normal[y, x, :] = img_normal[y, x, :] / norm_val
+
+    # img_normal between [-1., 1.], converting to [0., 1.]
+    img_normal = (img_normal + 1.0) * 0.5
+    return img_normal
--- a/digit_depth/digit/init.py
+++ b/digit_depth/digit/init.py
@@ -0,0 +1 @@
+from .digit_sensor import *
--- a/digit_depth/digit/digit_sensor.py
+++ b/digit_depth/digit/digit_sensor.py
@@ -0,0 +1,37 @@
+import time
+
+from digit_interface import Digit
+from digit_interface.digit import DigitDefaults
+
+
+class DigitSensor:
+    def __init__(self, fps: int, resolution: str, serial_num: str):
+        self.fps = fps
+        self.resolution = resolution
+        self.serial_num = serial_num
+
+    def __call__(self, *args, **kwargs):
+        """Calls the digit sensor."""
+        digit = self.setup_digit()
+        return digit
+
+    def __str__(self):
+        return f"DigitSensor(fps={self.fps}, resolution={self.resolution}, serial_num={self.serial_num})"
+
+    def setup_digit(self,):
+        "Sets up the digit sensor and returns it."
+
+        digit = Digit(self.serial_num)
+        digit.connect()
+
+        rgb_list = [(15, 0, 0), (0, 15, 0), (0, 0, 15)]
+
+        for rgb in rgb_list:
+            digit.set_intensity_rgb(*rgb)
+            time.sleep(1)
+        digit.set_intensity(15)
+        resolution = DigitDefaults.STREAMS[self.resolution]
+        digit.set_resolution(resolution)
+        fps = Digit.STREAMS[self.resolution]["fps"][str(self.fps) + "fps"]
+        digit.set_fps(fps)
+        return digit
--- a/digit_depth/handlers/init.py
+++ b/digit_depth/handlers/init.py
@@ -0,0 +1 @@
+from .image import ImageHandler
--- a/digit_depth/handlers/image.py
+++ b/digit_depth/handlers/image.py
@@ -0,0 +1,38 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
+
+import cv2
+import numpy as np
+from PIL import Image
+from torchvision import transforms
+
+
+class ImageHandler:
+    def __init__(self, img_path, convert="RGB"):
+        self.img = Image.open(img_path).convert(convert)
+        self.convert = convert
+
+    @staticmethod
+    def tensor_to_PIL(self, img_tensor):
+        img_tensor = img_tensor.squeeze_(0)
+        return transforms.ToPILImage()(img_tensor).convert(self.convert)
+
+    @property
+    def tensor(self):
+        return transforms.ToTensor()(self.img).unsqueeze_(0)
+
+    @property
+    def image(self):
+        return self.img
+
+    @property
+    def nparray(self):
+        return np.array(self.img)
+
+    @staticmethod
+    def save(file_name, img):
+        if isinstance(img, Image.Image):
+            # this is a PIL image
+            img.save(file_name)
+        else:
+            # cv2 image
+            cv2.imwrite(file_name, img)
--- a/digit_depth/third_party/init.py
+++ b/digit_depth/third_party/init.py
@@ -0,0 +1,4 @@
+from .data_utils import *
+from .geom_utils import *
+from .vis_utils import *
+from .poisson import *
--- a/digit_depth/third_party/data_utils.py
+++ b/digit_depth/third_party/data_utils.py
@@ -0,0 +1,30 @@
+import numpy as np
+import torch
+
+"""
+dataloader, logger helpers
+"""
+
+
+def pandas_col_to_numpy(df_col):
+
+    df_col = df_col.apply(lambda x: np.fromstring(x.replace("\n", "").replace("[", "").replace("]", "").replace("  ", " "), sep=", "))
+    df_col = np.stack(df_col)
+    return df_col
+
+
+def pandas_string_to_numpy(arr_str):
+    arr_npy = np.fromstring(arr_str.replace("\n", "").replace("[", "").replace("]", "").replace("  ", " "),sep=", ")
+    return arr_npy
+
+
+def interpolate_img(img, rows, cols):
+    """
+    img: C x H x W
+    """
+
+    img = torch.nn.functional.interpolate(img, size=cols)
+    img = torch.nn.functional.interpolate(img.permute(0, 2, 1), size=rows)
+    img = img.permute(0, 2, 1)
+
+    return img
--- a/digit_depth/third_party/geom_utils.py
+++ b/digit_depth/third_party/geom_utils.py
@@ -0,0 +1,531 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import copy
+
+import numpy as np
+import open3d as o3d
+import torch
+from scipy import ndimage
+
+from digit_depth.third_party.poisson import poisson_reconstruct
+from digit_depth.third_party.vis_utils import visualize_inlier_outlier
+
+"""
+Common functions
+"""
+
+
+def flip(x):
+    return torch.flip(x, dims=[0])
+
+
+def min_clip(x, min_val):
+    return torch.max(x, min_val)
+
+
+def max_clip(x, max_val):
+    return torch.min(x, max_val)
+
+
+def normalize(x, min_val, max_val):
+    return (x - torch.min(x)) * (max_val - min_val) / (
+        torch.max(x) - torch.min(x)
+    ) + min_val
+
+
+def mask_background(x, bg_mask, bg_val=0.0):
+    if bg_mask is not None:
+        x[bg_mask] = bg_val
+
+    return x
+
+
+def remove_background_pts(pts, bg_mask=None):
+    if bg_mask is not None:
+        fg_mask_pts = ~bg_mask.view(-1)
+        points3d_x = pts[0, fg_mask_pts].view(1, -1)
+        points3d_y = pts[1, fg_mask_pts].view(1, -1)
+        points3d_z = pts[2, fg_mask_pts].view(1, -1)
+        pts_fg = torch.cat((points3d_x, points3d_y, points3d_z), dim=0)
+    else:
+        pts_fg = pts
+
+    return pts_fg
+
+
+"""
+3D transform helper functions
+"""
+
+
+def Rt_to_T(R=None, t=None, device=None):
+    """
+    :param R: rotation, (B, 3, 3) or (3, 3)
+    :param t: translation, (B, 3) or  (3)
+    :return: T, (B, 4, 4) or  (4, 4)
+    """
+
+    T = torch.eye(4, device=device)
+
+    if (len(R.shape) > 2) & (len(t.shape) > 1):  # batch version
+        B = R.shape[0]
+        T = T.repeat(B, 1, 1)
+        if R is not None:
+            T[:, 0:3, 0:3] = R
+        if t is not None:
+            T[:, 0:3, -1] = t
+
+    else:
+        if R is not None:
+            T[0:3, 0:3] = R
+        if t is not None:
+            T[0:3, -1] = t
+
+    return T
+
+
+def transform_pts3d(T, pts):
+    """
+    T: 4x4
+    pts: 3xN
+
+    returns 3xN
+    """
+    D, N = pts.shape
+
+    if D == 3:
+        pts_tf = (
+            torch.cat((pts, torch.ones(1, N)), dim=0)
+            if torch.is_tensor(pts)
+            else np.concatenate((pts, torch.ones(1, N)), axis=0)
+        )
+
+    pts_tf = torch.matmul(T, pts_tf) if torch.is_tensor(pts) else np.matmul(T, pts_tf)
+    pts_tf = pts_tf[0:3, :]
+
+    return pts_tf
+
+
+"""
+3D-2D projection / conversion functions
+OpenGL transforms reference: http://www.songho.ca/opengl/gl_transform.html
+"""
+
+
+def _vectorize_pixel_coords(rows, cols, device=None):
+    y_range = torch.arange(rows, device=device)
+    x_range = torch.arange(cols, device=device)
+    grid_x, grid_y = torch.meshgrid(x_range, y_range)
+    pixel_pos = torch.stack([grid_x.reshape(-1), grid_y.reshape(-1)], dim=0)  # 2 x N
+
+    return pixel_pos
+
+
+def _clip_to_pixel(clip_pos, img_shape, params):
+    H, W = img_shape
+
+    # clip -> ndc coords
+    x_ndc = clip_pos[0, :] / clip_pos[3, :]
+    y_ndc = clip_pos[1, :] / clip_pos[3, :]
+    # z_ndc = clip_pos[2, :] / clip_pos[3, :]
+
+    # ndc -> pixel coords
+    x_pix = (W - 1) / 2 * (x_ndc + 1)  # [-1, 1] -> [0, W-1]
+    y_pix = (H - 1) / 2 * (y_ndc + 1)  # [-1, 1] -> [0, H-1]
+    # z_pix = (f-n) / 2 *  z_ndc + (f+n) / 2
+
+    pixel_pos = torch.stack((x_pix, y_pix), dim=0)
+
+    return pixel_pos
+
+
+def _pixel_to_clip(pix_pos, depth_map, params):
+    """
+    :param pix_pos: position in pixel space, (2, N)
+    :param depth_map: depth map, (H, W)
+    :return: clip_pos position in clip space, (4, N)
+    """
+
+    x_pix = pix_pos[0, :]
+    y_pix = pix_pos[1, :]
+
+    H, W = depth_map.shape
+    f = params.z_far
+    n = params.z_near
+
+    # pixel -> ndc coords
+    x_ndc = 2 / (W - 1) * x_pix - 1  # [0, W-1] -> [-1, 1]
+    y_ndc = 2 / (H - 1) * y_pix - 1  # [0, H-1] -> [-1, 1]
+    z_buf = depth_map[y_pix, x_pix]
+
+    # ndc -> clip coords
+    z_eye = -z_buf
+    w_c = -z_eye
+    x_c = x_ndc * w_c
+    y_c = y_ndc * w_c
+    z_c = -(f + n) / (f - n) * z_eye - 2 * f * n / (f - n) * 1.0
+
+    clip_pos = torch.stack([x_c, y_c, z_c, w_c], dim=0)
+
+    return clip_pos
+
+
+def _clip_to_eye(clip_pos, P):
+    P_inv = torch.inverse(P)
+    eye_pos = torch.matmul(P_inv, clip_pos)
+
+    return eye_pos
+
+
+def _eye_to_clip(eye_pos, P):
+    clip_pos = torch.matmul(P, eye_pos)
+
+    return clip_pos
+
+
+def _eye_to_world(eye_pos, V):
+    V_inv = torch.inverse(V)
+    world_pos = torch.matmul(V_inv, eye_pos)
+
+    world_pos = world_pos / world_pos[3, :]
+
+    return world_pos
+
+
+def _world_to_eye(world_pos, V):
+    eye_pos = torch.matmul(V, world_pos)
+
+    return eye_pos
+
+
+def _world_to_object(world_pos, M):
+    M_inv = torch.inverse(M)
+    obj_pos = torch.matmul(M_inv, world_pos)
+
+    obj_pos = obj_pos / obj_pos[3, :]
+
+    return obj_pos
+
+
+def _object_to_world(obj_pos, M):
+    world_pos = torch.matmul(M, obj_pos)
+
+    world_pos = world_pos / world_pos[3, :]
+
+    return world_pos
+
+
+def depth_to_pts3d(depth, P, V, params=None, ordered_pts=False):
+    """
+    :param depth: depth map, (C, H, W) or (H, W)
+    :param P: projection matrix, (4, 4)
+    :param V: view matrix, (4, 4)
+    :return: world_pos position in 3d world coordinates, (3, H, W) or (3, N)
+    """
+
+    assert 2 <= len(depth.shape) <= 3
+    assert P.shape == (4, 4)
+    assert V.shape == (4, 4)
+
+    depth_map = depth.squeeze(0) if (len(depth.shape) == 3) else depth
+    H, W = depth_map.shape
+    pixel_pos = _vectorize_pixel_coords(rows=H, cols=W)
+
+    clip_pos = _pixel_to_clip(pixel_pos, depth_map, params)
+    eye_pos = _clip_to_eye(clip_pos, P)
+    world_pos = _eye_to_world(eye_pos, V)
+
+    world_pos = world_pos[0:3, :] / world_pos[3, :]
+
+    if ordered_pts:
+        H, W = depth_map.shape
+        world_pos = world_pos.reshape(world_pos.shape[0], H, W)
+
+    return world_pos
+
+
+"""
+Optical flow functions
+"""
+
+
+def analytic_flow(img1, depth1, P, V1, V2, M1, M2, gel_depth, params):
+    C, H, W = img1.shape
+    depth_map = depth1.squeeze(0) if (len(depth1.shape) == 3) else depth1
+    pixel_pos = _vectorize_pixel_coords(rows=H, cols=W, device=img1.device)
+
+    clip_pos = _pixel_to_clip(pixel_pos, depth_map, params)
+    eye_pos = _clip_to_eye(clip_pos, P)
+    world_pos = _eye_to_world(eye_pos, V1)
+    obj_pos = _world_to_object(world_pos, M1)
+
+    world_pos = _object_to_world(obj_pos, M2)
+    eye_pos = _world_to_eye(world_pos, V2)
+    clip_pos = _eye_to_clip(eye_pos, P)
+    pixel_pos_proj = _clip_to_pixel(clip_pos, (H, W), params)
+
+    pixel_flow = pixel_pos - pixel_pos_proj
+    flow_map = pixel_flow.reshape(pixel_flow.shape[0], H, W)
+
+    # mask out background gel pixels
+    mask_idxs = depth_map >= gel_depth
+    flow_map[:, mask_idxs] = 0.0
+
+    return flow_map
+
+
+"""
+Normal to depth conversion / integration functions
+"""
+
+
+def _preproc_depth(img_depth, bg_mask=None):
+    if bg_mask is not None:
+        img_depth = mask_background(img_depth, bg_mask=bg_mask, bg_val=0.0)
+
+    return img_depth
+
+
+def _preproc_normal(img_normal, bg_mask=None):
+    """
+    img_normal: lies in range [0, 1]
+    """
+
+    # 0.5 corresponds to 0
+    img_normal = img_normal - 0.5
+
+    # normalize
+    img_normal = img_normal / torch.linalg.norm(img_normal, dim=0)
+
+    # set background to have only z normals (flat, facing camera)
+    if bg_mask is not None:
+        img_normal[0:2, bg_mask] = 0.0
+        img_normal[2, bg_mask] = 1.0
+
+    return img_normal
+
+
+def _depth_to_grad_depth(img_depth, bg_mask=None):
+    gradx = ndimage.sobel(img_depth.cpu().detach().numpy(), axis=1, mode="constant")
+    grady = ndimage.sobel(img_depth.cpu().detach().numpy(), axis=0, mode="constant")
+
+    gradx = torch.FloatTensor(gradx, device=img_depth.device)
+    grady = torch.FloatTensor(grady, device=img_depth.device)
+
+    if bg_mask is not None:
+        gradx = mask_background(gradx, bg_mask=bg_mask, bg_val=0.0)
+        grady = mask_background(grady, bg_mask=bg_mask, bg_val=0.0)
+
+    return gradx, grady
+
+
+def _normal_to_grad_depth(img_normal, gel_width=1.0, gel_height=1.0, bg_mask=None):
+    # Ref: https://stackoverflow.com/questions/34644101/calculate-surface-normals-from-depth-image-using-neighboring-pixels-cross-produc/34644939#34644939
+
+    EPS = 1e-1
+    nz = torch.max(torch.tensor(EPS), img_normal[2, :])
+
+    dzdx = -(img_normal[0, :] / nz).squeeze()
+    dzdy = -(img_normal[1, :] / nz).squeeze()
+
+    # taking out negative sign as we are computing gradient of depth not z
+    # since z is pointed towards sensor, increase in z corresponds to decrease in depth
+    # i.e., dz/dx = -ddepth/dx
+    ddepthdx = -dzdx
+    ddepthdy = -dzdy
+
+    # sim: pixel axis v points in opposite dxn of camera axis y
+    ddepthdu = ddepthdx
+    ddepthdv = -ddepthdy
+
+    gradx = ddepthdu  # cols
+    grady = ddepthdv  # rows
+
+    # convert units from pixel to meters
+    C, H, W = img_normal.shape
+    gradx = gradx * (gel_width / W)
+    grady = grady * (gel_height / H)
+
+    if bg_mask is not None:
+        gradx = mask_background(gradx, bg_mask=bg_mask, bg_val=0.0)
+        grady = mask_background(grady, bg_mask=bg_mask, bg_val=0.0)
+
+    return gradx, grady
+
+
+def _integrate_grad_depth(gradx, grady, boundary=None, bg_mask=None, max_depth=0.0):
+    if boundary is None:
+        boundary = torch.zeros((gradx.shape[0], gradx.shape[1]))
+
+    img_depth_recon = poisson_reconstruct(
+        grady.cpu().detach().numpy(),
+        gradx.cpu().detach().numpy(),
+        boundary.cpu().detach().numpy(),
+    )
+    img_depth_recon = torch.tensor(
+        img_depth_recon, device=gradx.device, dtype=torch.float32
+    )
+
+    if bg_mask is not None:
+        img_depth_recon = mask_background(img_depth_recon, bg_mask)
+
+    # after integration, img_depth_recon lies between 0. (bdry) and a -ve val (obj depth)
+    # rescale to make max depth as gel depth and obj depth as +ve values
+    img_depth_recon = max_clip(img_depth_recon, max_val=torch.tensor(0.0)) + max_depth
+
+    return img_depth_recon
+
+
+def depth_to_depth(img_depth, bg_mask=None, boundary=None, params=None):
+    # preproc depth img
+    img_depth = _preproc_depth(img_depth=img_depth.squeeze(), bg_mask=bg_mask)
+
+    # get grad depth
+    gradx, grady = _depth_to_grad_depth(img_depth=img_depth.squeeze(), bg_mask=bg_mask)
+
+    # integrate grad depth
+    img_depth_recon = _integrate_grad_depth(
+        gradx, grady, boundary=boundary, bg_mask=bg_mask
+    )
+
+    return img_depth_recon
+
+
+def normal_to_depth(
+    img_normal,
+    bg_mask=None,
+    boundary=None,
+    gel_width=0.02,
+    gel_height=0.03,
+    max_depth=0.02,
+):
+    # preproc normal img
+    img_normal = _preproc_normal(img_normal=img_normal, bg_mask=bg_mask)
+
+    # get grad depth
+    gradx, grady = _normal_to_grad_depth(
+        img_normal=img_normal,
+        gel_width=gel_width,
+        gel_height=gel_height,
+        bg_mask=bg_mask,
+    )
+
+    # integrate grad depth
+    img_depth_recon = _integrate_grad_depth(
+        gradx, grady, boundary=boundary, bg_mask=bg_mask, max_depth=max_depth
+    )
+
+    return img_depth_recon
+
+
+"""
+3D registration functions
+"""
+
+
+def _fpfh(pcd, normals):
+    pcd.normals = o3d.utility.Vector3dVector(normals)
+    pcd_fpfh = o3d.pipelines.registration.compute_fpfh_feature(
+        pcd, o3d.geometry.KDTreeSearchParamHybrid(radius=0.3, max_nn=64)
+    )
+    return pcd_fpfh
+
+
+def _fast_global_registration(source, target, source_fpfh, target_fpfh):
+    distance_threshold = 0.01
+    result = o3d.pipelines.registration.registration_fast_based_on_feature_matching(
+        source,
+        target,
+        source_fpfh,
+        target_fpfh,
+        o3d.pipelines.registration.FastGlobalRegistrationOption(
+            maximum_correspondence_distance=distance_threshold
+        ),
+    )
+
+    transformation = result.transformation
+    metrics = [result.fitness, result.inlier_rmse, result.correspondence_set]
+
+    return transformation, metrics
+
+
+def fgr(source, target, src_normals, tgt_normals):
+    source_fpfh = _fpfh(source, src_normals)
+    target_fpfh = _fpfh(target, tgt_normals)
+    transformation, metrics = _fast_global_registration(
+        source=source, target=target, source_fpfh=source_fpfh, target_fpfh=target_fpfh
+    )
+    return transformation, metrics
+
+
+def icp(source, target, T_init=np.eye(4), mcd=0.1, max_iter=50, type="point_to_plane"):
+    if type == "point_to_point":
+        result = o3d.pipelines.registration.registration_icp(
+            source=source,
+            target=target,
+            max_correspondence_distance=mcd,
+            init=T_init,
+            estimation_method=o3d.pipelines.registration.TransformationEstimationPointToPoint(),
+            criteria=o3d.pipelines.registration.ICPConvergenceCriteria(
+                max_iteration=max_iter
+            ),
+        )
+    else:
+        result = o3d.pipelines.registration.registration_icp(
+            source=source,
+            target=target,
+            max_correspondence_distance=mcd,
+            init=T_init,
+            estimation_method=o3d.pipelines.registration.TransformationEstimationPointToPlane(),
+            criteria=o3d.pipelines.registration.ICPConvergenceCriteria(
+                max_iteration=max_iter
+            ),
+        )
+
+    transformation = result.transformation
+    metrics = [result.fitness, result.inlier_rmse, result.correspondence_set]
+
+    return transformation, metrics
+
+
+"""
+Open3D helper functions
+"""
+
+
+def remove_outlier_pts(points3d, nb_neighbors=20, std_ratio=10.0, vis=False):
+    points3d_np = (
+        points3d.cpu().detach().numpy() if torch.is_tensor(points3d) else points3d
+    )
+
+    cloud = o3d.geometry.PointCloud()
+    cloud.points = copy.deepcopy(o3d.utility.Vector3dVector(points3d_np.transpose()))
+    cloud_filt, ind_filt = cloud.remove_statistical_outlier(
+        nb_neighbors=nb_neighbors, std_ratio=std_ratio
+    )
+
+    if vis:
+        visualize_inlier_outlier(cloud, ind_filt)
+
+    points3d_filt = np.asarray(cloud_filt.points).transpose()
+    points3d_filt = (
+        torch.tensor(points3d_filt) if torch.is_tensor(points3d) else points3d_filt
+    )
+
+    return points3d_filt
+
+
+def init_points_to_clouds(clouds, points3d, colors=None):
+    for idx, cloud in enumerate(clouds):
+        points3d_np = (
+            points3d[idx].cpu().detach().numpy()
+            if torch.is_tensor(points3d[idx])
+            else points3d[idx]
+        )
+        cloud.points = copy.deepcopy(
+            o3d.utility.Vector3dVector(points3d_np.transpose())
+        )
+        if colors is not None:
+            cloud.paint_uniform_color(colors[idx])
+
+    return clouds
--- a/digit_depth/third_party/poisson.py
+++ b/digit_depth/third_party/poisson.py
@@ -0,0 +1,81 @@
+"""
+poisson_reconstruct.py
+Fast Poisson Reconstruction in Python
+
+Copyright (c) 2014 Jack Doerner
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+import copy
+import math
+
+import numpy
+import scipy
+import scipy.fftpack
+
+
+def poisson_reconstruct(grady, gradx, boundarysrc):
+    # Thanks to Dr. Ramesh Raskar for providing the original matlab code from which this is derived
+    # Dr. Raskar's version is available here: http://web.media.mit.edu/~raskar/photo/code.pdf
+
+    # Laplacian
+    gyy = grady[1:, :-1] - grady[:-1, :-1]
+    gxx = gradx[:-1, 1:] - gradx[:-1, :-1]
+    f = numpy.zeros(boundarysrc.shape)
+    f[:-1, 1:] += gxx
+    f[1:, :-1] += gyy
+
+    # Boundary image
+    boundary = copy.deepcopy(boundarysrc)  # .copy()
+    boundary[1:-1, 1:-1] = 0
+
+    # Subtract boundary contribution
+    f_bp = (
+        -4 * boundary[1:-1, 1:-1]
+        + boundary[1:-1, 2:]
+        + boundary[1:-1, 0:-2]
+        + boundary[2:, 1:-1]
+        + boundary[0:-2, 1:-1]
+    )
+    f = f[1:-1, 1:-1] - f_bp
+
+    # Discrete Sine Transform
+    tt = scipy.fftpack.dst(f, norm="ortho")
+    fsin = scipy.fftpack.dst(tt.T, norm="ortho").T
+
+    # Eigenvalues
+    (x, y) = numpy.meshgrid(
+        range(1, f.shape[1] + 1), range(1, f.shape[0] + 1), copy=True
+    )
+    denom = (2 * numpy.cos(math.pi * x / (f.shape[1] + 2)) - 2) + (
+        2 * numpy.cos(math.pi * y / (f.shape[0] + 2)) - 2
+    )
+
+    f = fsin / denom
+
+    # Inverse Discrete Sine Transform
+    tt = scipy.fftpack.idst(f, norm="ortho")
+    img_tt = scipy.fftpack.idst(tt.T, norm="ortho").T
+
+    # New center + old boundary
+    result = copy.deepcopy(boundary)
+    result[1:-1, 1:-1] = img_tt
+
+    return result
--- a/digit_depth/third_party/vis_utils.py
+++ b/digit_depth/third_party/vis_utils.py
@@ -0,0 +1,372 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+import copy
+import logging
+import time
+import types
+
+import cv2
+import matplotlib.pyplot as plt
+import numpy as np
+import open3d as o3d
+import torch
+from attrdict import AttrDict
+from matplotlib.patches import Circle, Rectangle
+
+log = logging.getLogger(__name__)
+
+"""
+Open3d visualization functions
+"""
+
+
+class Visualizer3d:
+    def __init__(self, base_path="", view_params=None, tsleep=0.01):
+        self.vis = o3d.visualization.VisualizerWithKeyCallback()
+        self.vis.create_window(top=0, left=750, width=1080, height=1080)
+        self.tsleep = tsleep
+
+        self.base_path = base_path
+
+        self.view_params = AttrDict(
+            {
+                "fov": 0,
+                "front": [0.0, 0.0, 0.0],
+                "lookat": [0.0, 0.0, 0.0],
+                "up": [0.0, 0.0, 0.0],
+                "zoom": 0.5,
+            }
+        )
+        if view_params is not None:
+            self.view_params = view_params
+
+        self._init_options()
+
+    def _init_options(self):
+        opt = self.vis.get_render_option()
+        opt.show_coordinate_frame = True
+        opt.background_color = np.asarray([0.6, 0.6, 0.6])
+
+        # pause option
+        self.paused = types.SimpleNamespace()
+        self.paused.value = False
+        self.vis.register_key_action_callback(
+            ord("P"),
+            lambda a, b, c: b == 1
+            or setattr(self.paused, "value", not self.paused.value),
+        )
+
+    def _init_geom_cloud(self):
+        return o3d.geometry.PointCloud()
+
+    def _init_geom_frame(self, frame_size=0.01, frame_origin=[0, 0, 0]):
+        return o3d.geometry.TriangleMesh.create_coordinate_frame(
+            size=frame_size, origin=frame_origin
+        )
+
+    def _init_geom_mesh(self, mesh_name, color=None, wireframe=False):
+        mesh = o3d.io.read_triangle_mesh(
+            f"{self.base_path}/local/resources/meshes/{mesh_name}"
+        )
+        mesh.compute_vertex_normals()
+
+        if wireframe:
+            mesh = o3d.geometry.LineSet.create_from_triangle_mesh(mesh)
+        if color is not None:
+            mesh.paint_uniform_color(color)
+
+        return mesh
+
+    def init_geometry(
+        self,
+        geom_type,
+        num_items=1,
+        sizes=None,
+        file_names=None,
+        colors=None,
+        wireframes=None,
+    ):
+        geom_list = []
+        for i in range(0, num_items):
+
+            if geom_type == "cloud":
+                geom = self._init_geom_cloud()
+            elif geom_type == "frame":
+                frame_size = sizes[i] if sizes is not None else 0.001
+                geom = self._init_geom_frame(frame_size=frame_size)
+            elif geom_type == "mesh":
+                color = colors[i] if colors is not None else None
+                wireframe = wireframes[i] if wireframes is not None else False
+                geom = self._init_geom_mesh(file_names[i], color, wireframe)
+            else:
+                log.error(
+                    f"[Visualizer3d::init_geometry] geom_type {geom_type} not found."
+                )
+
+            geom_list.append(geom)
+
+        return geom_list
+
+    def add_geometry(self, geom_list):
+
+        if geom_list is None:
+            return
+
+        for geom in geom_list:
+            self.vis.add_geometry(geom)
+
+    def remove_geometry(self, geom_list, reset_bounding_box=False):
+
+        if geom_list is None:
+            return
+
+        for geom in geom_list:
+            self.vis.remove_geometry(geom, reset_bounding_box=reset_bounding_box)
+
+    def update_geometry(self, geom_list):
+        for geom in geom_list:
+            self.vis.update_geometry(geom)
+
+    def set_view(self):
+        ctr = self.vis.get_view_control()
+        ctr.change_field_of_view(self.view_params.fov)
+        ctr.set_front(self.view_params.front)
+        ctr.set_lookat(self.view_params.lookat)
+        ctr.set_up(self.view_params.up)
+        ctr.set_zoom(self.view_params.zoom)
+
+    def set_view_cam(self, T):
+        ctr = self.vis.get_view_control()
+        cam = ctr.convert_to_pinhole_camera_parameters()
+        cam.extrinsic = T
+        ctr.convert_from_pinhole_camera_parameters(cam)
+
+    def set_zoom(self):
+        ctr = self.vis.get_view_control()
+        ctr.set_zoom(1.5)
+
+    def rotate_view(self):
+        ctr = self.vis.get_view_control()
+        ctr.rotate(10.0, -0.0)
+
+    def pan_scene(self, max=300):
+        for i in range(0, max):
+            self.rotate_view()
+            self.render()
+
+    def render(self, T=None):
+
+        if T is not None:
+            self.set_view_cam(T)
+        else:
+            self.set_view()
+
+        self.vis.poll_events()
+        self.vis.update_renderer()
+        time.sleep(self.tsleep)
+
+    def transform_geometry_absolute(self, transform_list, geom_list):
+        for idx, geom in enumerate(geom_list):
+            T = transform_list[idx]
+            geom.transform(T)
+
+    def transform_geometry_relative(
+        self, transform_prev_list, transform_curr_list, geom_list
+    ):
+        for idx, geom in enumerate(geom_list):
+            T_prev = transform_prev_list[idx]
+            T_curr = transform_curr_list[idx]
+
+            # a. rotate R1^{-1}*R2 about center t1
+            geom.rotate(
+                torch.matmul(torch.inverse(T_prev[0:3, 0:3]), T_curr[0:3, 0:3]),
+                center=(T_prev[0, -1], T_prev[1, -1], T_prev[2, -1]),
+            )
+
+            # b. translate by t2 - t1
+            geom.translate(
+                (
+                    T_curr[0, -1] - T_prev[0, -1],
+                    T_curr[1, -1] - T_prev[1, -1],
+                    T_curr[2, -1] - T_prev[2, -1],
+                )
+            )
+
+    def clear_geometries(self):
+        self.vis.clear_geometries()
+
+    def destroy(self):
+        self.vis.destroy_window()
+
+
+def visualize_registration(source, target, transformation, vis3d=None, colors=None):
+    source_copy = copy.deepcopy(source)
+    target_copy = copy.deepcopy(target)
+
+    source_copy.transform(transformation)
+
+    clouds = [target_copy, source_copy]
+
+    if colors is not None:
+        clouds[0].paint_uniform_color(colors[1])
+        clouds[1].paint_uniform_color(colors[0])
+
+    vis3d.add_geometry(clouds)
+    vis3d.render()
+    vis3d.remove_geometry(clouds)
+
+
+def visualize_geometries_o3d(
+    vis3d, clouds=None, frames=None, meshes=None, transforms=None
+):
+    if meshes is not None:
+        meshes = [copy.deepcopy(mesh) for mesh in meshes]
+        if transforms is not None:
+            vis3d.transform_geometry_absolute(transforms, meshes)
+
+    if frames is not None:
+        frames = [copy.deepcopy(frame) for frame in frames]
+        if transforms is not None:
+            vis3d.transform_geometry_absolute(transforms, frames)
+
+    if clouds is not None:
+        vis3d.add_geometry(clouds)
+    if meshes is not None:
+        vis3d.add_geometry(meshes)
+    if frames is not None:
+        vis3d.add_geometry(frames)
+
+    vis3d.render()
+
+    if clouds is not None:
+        vis3d.remove_geometry(clouds)
+    if meshes is not None:
+        vis3d.remove_geometry(meshes)
+    if frames is not None:
+        vis3d.remove_geometry(frames)
+
+
+def visualize_inlier_outlier(cloud, ind):
+    inlier_cloud = cloud.select_by_index(ind)
+    outlier_cloud = cloud.select_by_index(ind, invert=True)
+
+    print("Showing outliers (red) and inliers (gray): ")
+    outlier_cloud.paint_uniform_color([1, 0, 0])
+    inlier_cloud.paint_uniform_color([0.8, 0.8, 0.8])
+    o3d.visualization.draw_geometries(
+        [inlier_cloud, outlier_cloud],
+        zoom=0.3412,
+        front=[0.4257, -0.2125, -0.8795],
+        lookat=[2.6172, 2.0475, 1.532],
+        up=[-0.0694, -0.9768, 0.2024],
+    )
+
+
+"""
+Optical flow visualization functions
+"""
+
+
+def flow_to_color(flow_uv, cvt=cv2.COLOR_HSV2BGR):
+    hsv = np.zeros((flow_uv.shape[0], flow_uv.shape[1], 3), dtype=np.uint8)
+    hsv[:, :, 0] = 255
+    hsv[:, :, 1] = 255
+    mag, ang = cv2.cartToPolar(flow_uv[..., 0], flow_uv[..., 1])
+    hsv[..., 0] = ang * 180 / np.pi / 2
+    hsv[..., 2] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX)
+    flow_color = cv2.cvtColor(hsv, cvt)
+
+    return flow_color
+
+
+def flow_to_arrows(img, flow, step=8):
+    img = copy.deepcopy(img)
+    # img = (255 * img).astype(np.uint8)
+
+    h, w = img.shape[:2]
+    y, x = np.mgrid[step / 2 : h : step, step / 2 : w : step].reshape(2, -1).astype(int)
+    fx, fy = 5.0 * flow[y, x].T
+    lines = np.vstack([x, y, x + fx, y + fy]).T.reshape(-1, 2, 2)
+    lines = np.int32(lines + 0.5)
+    cv2.polylines(img, lines, 0, color=(0, 255, 0), thickness=1)
+    for (x1, y1), (x2, y2) in lines:
+        cv2.circle(img, (x1, y1), 1, (0, 255, 0), -1)
+
+    return img
+
+
+def depth_to_color(depth):
+    gray = (
+        np.clip((depth - depth.min()) / (depth.max() - depth.min()), 0, 1) * 255
+    ).astype(np.uint8)
+    return cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR)
+
+
+def visualize_flow_cv2(
+    img1, img2, flow_arrow=None, flow_color=None, win_size=(360, 360)
+):
+    img_disp1 = np.concatenate([img1, img2], axis=1)
+    cv2.namedWindow("img1, img2", cv2.WINDOW_NORMAL)
+    cv2.resizeWindow("img1, img2", 2 * win_size[0], win_size[1])
+    cv2.imshow("img1, img2", img_disp1)
+
+    if flow_arrow is not None:
+        cv2.namedWindow("flow_arrow", cv2.WINDOW_NORMAL)
+        cv2.resizeWindow("flow_arrow", win_size[0], win_size[1])
+        cv2.imshow("flow_arrow", flow_arrow)
+
+    if flow_color is not None:
+        cv2.namedWindow("flow_color", cv2.WINDOW_NORMAL)
+        cv2.resizeWindow("flow_color", win_size[0], win_size[1])
+        cv2.imshow("flow_color", flow_color)
+
+    cv2.waitKey(300)
+
+
+"""
+General visualization functions
+"""
+
+
+def draw_rectangle(
+    center_x,
+    center_y,
+    size_x,
+    size_y,
+    ang=0.0,
+    edgecolor="dimgray",
+    facecolor=None,
+    linewidth=2,
+):
+    R = np.array([[np.cos(ang), -np.sin(ang)], [np.sin(ang), np.cos(ang)]])
+    offset = np.matmul(R, np.array([[0.5 * size_x], [0.5 * size_y]]))
+    anchor_x = center_x - offset[0]
+    anchor_y = center_y - offset[1]
+    rect = Rectangle(
+        (anchor_x, anchor_y),
+        size_x,
+        size_y,
+        angle=(np.rad2deg(ang)),
+        facecolor=facecolor,
+        edgecolor=edgecolor,
+        linewidth=linewidth,
+    )
+    plt.gca().add_patch(rect)
+
+
+def draw_circle(center_x, center_y, radius):
+    circle = Circle((center_x, center_y), color="dimgray", radius=radius)
+    plt.gca().add_patch(circle)
+
+
+def visualize_imgs(fig, axs, img_list, titles=None, cmap=None):
+    for idx, img in enumerate(img_list):
+
+        if img is None:
+            continue
+
+        im = axs[idx].imshow(img, cmap=cmap)
+        if cmap is not None:
+            fig.colorbar(im, ax=axs[idx])
+        if titles is not None:
+            axs[idx].set_title(titles[idx])
--- a/digit_depth/train/init.py
+++ b/digit_depth/train/init.py
@@ -0,0 +1,2 @@
+from .color2normal_dataset import Color2NormalDataset
+from .mlp_model import MLP
--- a/digit_depth/train/color2normal_dataset.py
+++ b/digit_depth/train/color2normal_dataset.py
@@ -0,0 +1,21 @@
+import pandas as pd
+import torch
+from torch.utils.data import Dataset
+
+
+class Color2NormalDataset(Dataset):
+    def __init__(self, csv):
+        self.data = pd.read_csv(csv)
+    def __len__(self):
+        return self.data['X'].count()
+
+    def __getitem__(self,idx):
+        x = self.data['X'][idx]/120
+        y = self.data['Y'][idx]/160
+        r = self.data['R'][idx]/255
+        g = self.data['G'][idx]/255
+        b = self.data['B'][idx]/255
+        nx = self.data['Nx'][idx]/255
+        ny = self.data['Ny'][idx]/255
+        nz= self.data['Nz'][idx]/255
+        return torch.tensor((x, y, r, g, b), dtype=torch.float32), torch.tensor((nx, ny, nz), dtype=torch.float32)
--- a/digit_depth/train/mlp_model.py
+++ b/digit_depth/train/mlp_model.py
@@ -0,0 +1,27 @@
+
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class MLP(nn.Module):
+    dropout_p = 0.05
+
+    def __init__(
+            self, input_size=5, output_size=3, hidden_size=32):
+        super().__init__()
+
+        self.fc1 = nn.Linear(input_size, hidden_size)
+        self.fc2 = nn.Linear(hidden_size, hidden_size)
+        self.fc3 = nn.Linear(hidden_size, hidden_size)
+        self.fc4 = nn.Linear(hidden_size, output_size)
+        self.drop = nn.Dropout(p=self.dropout_p)
+
+    def forward(self, x):
+        x = F.relu(self.fc1(x))
+        x = self.drop(x)
+        x = F.relu(self.fc2(x))
+        x = self.drop(x)
+        x = self.fc3(x)
+        x = self.drop(x)
+        x = self.fc4(x)
+        return x
--- a/digit_depth/train/prepost_mlp.py
+++ b/digit_depth/train/prepost_mlp.py
@@ -0,0 +1,49 @@
+
+import cv2
+import torch
+import numpy as np
+import pandas as pd
+import copy
+seed = 42
+torch.seed = seed
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+
+
+def preproc_mlp(image) -> torch.Tensor:
+    """ Preprocess image for input to model.
+
+    Args: image: OpenCV image in BGR format
+    Return: tensor of shape (R*C,5) where R=320 and C=240 for DIGIT images
+    5-columns are: X,Y,R,G,B
+
+    """
+    img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    img = img / 255.0
+    xy_coords = np.flip(np.column_stack(np.where(np.all(img >= 0, axis=2))), axis=1)
+    rgb = np.reshape(img, (np.prod(img.shape[:2]), 3))
+    pixel_numbers = np.expand_dims(np.arange(1, xy_coords.shape[0] + 1), axis=1)
+    value_base = np.hstack([pixel_numbers, xy_coords, rgb])
+    df_base = pd.DataFrame(value_base, columns=['pixel_number', 'X', 'Y', 'R', 'G', 'B'])
+    df_base['X'] = df_base['X'] / 240
+    df_base['Y'] = df_base['Y'] / 320
+    del df_base['pixel_number']
+    test_tensor = torch.tensor(df_base[['X', 'Y', 'R', 'G', 'B']].values, dtype=torch.float32).to(device)
+    return test_tensor
+
+
+def post_proc_mlp(model_output: torch.Tensor):
+    """ Postprocess model output to get normal map.
+
+    Args: model_output: torch.Tensor of shape (1,3)
+    Return: two torch.Tensor of shape (1,3)
+
+    """
+    test_np = model_output.reshape(320, 240, 3)
+    normal = copy.deepcopy(test_np)  # surface normal image
+    test_np = torch.tensor(test_np,
+                           dtype=torch.float32)  # convert to torch tensor for later processing in gradient computation
+    test_np = test_np.permute(2, 0, 1)  # swap axes to (3,320,240)
+    test_np = test_np # convert to uint8 for visualization
+    return test_np, normal
+
+