This commit is contained in:
wxchen
2022-12-29 23:08:25 +08:00
commit 21ad625896
42 changed files with 2336 additions and 0 deletions

4
digit_depth/__init__.py Normal file
View File

@@ -0,0 +1,4 @@
from .digit import *
from .third_party import *
from .train import *
from .handlers import *

View File

@@ -0,0 +1,97 @@
"""
combine_A_and_B.py
Preprocessing for pix2pix
Copyright (c) 2016, Phillip Isola and Jun-Yan Zhu
All rights reserved.
BSD License
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
"""
import argparse
import os
from multiprocessing import Pool
import cv2
import numpy as np
def image_write(path_A, path_B, path_AB):
im_A = cv2.imread(
path_A, 1
) # python2: cv2.CV_LOAD_IMAGE_COLOR; python3: cv2.IMREAD_COLOR
im_B = cv2.imread(
path_B, 1
) # python2: cv2.CV_LOAD_IMAGE_COLOR; python3: cv2.IMREAD_COLOR
# im_A = cv2.transpose(im_A)
# im_B = cv2.transpose(im_B)
im_AB = np.concatenate([im_A, im_B], 1)
cv2.imwrite(path_AB, im_AB)
parser = argparse.ArgumentParser("create image pairs")
parser.add_argument("--fold_A", dest="fold_A", help="input directory for image A", type=str, default="../dataset/50kshoes_edges")
parser.add_argument("--fold_B", dest="fold_B", help="input directory for image B", type=str, default="../dataset/50kshoes_jpg")
parser.add_argument("--fold_AB", dest="fold_AB", help="output directory", type=str, default="../dataset/test_AB")
parser.add_argument("--num_imgs", dest="num_imgs", help="number of images", type=int, default=1000000)
parser.add_argument("--use_AB", dest="use_AB", help="if true: (0001_A, 0001_B) to (0001_AB)", action="store_true")
parser.add_argument("--no_multiprocessing", dest="no_multiprocessing", help="If used, chooses single CPU execution instead of parallel execution", action="store_true", default=False,
)
args = parser.parse_args()
for arg in vars(args):
print("[%s] = " % arg, getattr(args, arg))
splits = os.listdir(args.fold_A)
if not args.no_multiprocessing:
pool = Pool()
for sp in splits:
img_fold_A = os.path.join(args.fold_A, sp)
img_fold_B = os.path.join(args.fold_B, sp)
img_list = os.listdir(img_fold_A)
if args.use_AB:
img_list = [img_path for img_path in img_list if "_A." in img_path]
num_imgs = min(args.num_imgs, len(img_list))
print("split = %s, use %d/%d images" % (sp, num_imgs, len(img_list)))
img_fold_AB = os.path.join(args.fold_AB, sp)
if not os.path.isdir(img_fold_AB):
os.makedirs(img_fold_AB)
print("split = %s, number of images = %d" % (sp, num_imgs))
for n in range(num_imgs):
name_A = img_list[n]
path_A = os.path.join(img_fold_A, name_A)
if args.use_AB:
name_B = name_A.replace("_A.", "_B.")
else:
name_B = name_A
path_B = os.path.join(img_fold_B, name_B)
if os.path.isfile(path_A) and os.path.isfile(path_B):
name_AB = name_A
if args.use_AB:
name_AB = name_AB.replace("_A.", ".") # remove _A
path_AB = os.path.join(img_fold_AB, name_AB)
if not args.no_multiprocessing:
pool.apply_async(image_write, args=(path_A, path_B, path_AB))
else:
im_A = cv2.imread(path_A, 1) # python2: cv2.CV_LOAD_IMAGE_COLOR; python3: cv2.IMREAD_COLOR
im_B = cv2.imread(path_B, 1) # python2: cv2.CV_LOAD_IMAGE_COLOR; python3: cv2.IMREAD_COLOR
im_AB = np.concatenate([im_A, im_B], 1)
cv2.imwrite(path_AB, im_AB)
if not args.no_multiprocessing:
pool.close()
pool.join()

View File

@@ -0,0 +1,109 @@
import glob
import numpy as np
import pandas as pd
from PIL import Image
from sklearn.model_selection import train_test_split
def create_pixel_csv(img_dir: str, save_dir: str, img_type: str):
"""
Creates a CSV file with the pixel coordinates of the image
:param img_dir: image directory containing the images
:param save_dir: directory for saving the CSV file per image
:param img_type: color or normal
:return: saves the CSV file per image0
"""
imgs = sorted(glob.glob(f"{img_dir}/*.png"))
print(f"Found {len(imgs)} {img_type} images")
for idx, img_path in enumerate(imgs):
img = Image.open(img_path)
img_np = np.array(img)
xy_coords = np.flip(
np.column_stack(np.where(np.all(img_np >= 0, axis=2))), axis=1
)
rgb = np.reshape(img_np, (np.prod(img_np.shape[:2]), 3))
# Add pixel numbers in front
pixel_numbers = np.expand_dims(np.arange(1, xy_coords.shape[0] + 1), axis=1)
value = np.hstack([pixel_numbers, xy_coords, rgb])
# Properly save as CSV
if img_type == "color":
df = pd.DataFrame(value, columns=["pixel_number", "X", "Y", "R", "G", "B"])
del df["pixel_number"]
df.to_csv(f"{save_dir}/image_{idx}.csv", sep=",", index=False)
elif img_type == "normal":
df = pd.DataFrame(value, columns=["pixel_number", "X", "Y", "Nx", "Ny", "Nz"])
del df["pixel_number"]
df.to_csv(f"{save_dir}/image_{idx}.csv", sep=",", index=False)
print(f"{img_type} image CSV written for image {idx}")
def combine_csv(csv_dir: str, img_type: str):
"""
Combines all the CSV files in the directory into one CSV file for later use in training
:param csv_dir: directory containing the CSV files
:param img_type: color or normal
"""
csv_files=(glob.glob(f"{csv_dir}/*.csv"))
print(f"Found {len(csv_files)} {img_type} CSV files")
df = pd.concat([pd.read_csv(f, sep=",") for f in (glob.glob(f"{csv_dir}/*.csv"))])
df.to_csv(f"{csv_dir}/combined.csv", sep=",", index=False)
print(f"Combined CSV written for {img_type} images")
check_nans(f"{csv_dir}/combined.csv")
print("----------------------------------------------------")
def create_train_test_csv(save_dir: str, normal_path: str, color_path: str):
"""
Creates a CSV file with the pixel coordinates of the image. Samples 4% from zeros.
Splits the cleaned dataset into train and test (80%/20%)
:param save_dir: path for train_test_split folder
:param normal_path: path to combined normal CSV file
:param color_path: path to combined color CSV file
"""
seed=42
rgb_df = pd.read_csv(color_path, sep=",")
normal_df = pd.read_csv(normal_path, sep=",")
rgb_df['Nx'] = normal_df['Nx']
rgb_df['Ny'] = normal_df['Ny']
rgb_df['Nz'] = normal_df['Nz']
zeros_df = rgb_df[(rgb_df['Nx'] == 127) & (rgb_df['Ny'] == 127) ^ (rgb_df['Nz'] == 127)]
print(f"Found {len(zeros_df)} zeros in the dataset")
non_zeros_df = rgb_df[(rgb_df['Nx'] != 127) | (rgb_df['Ny'] != 127) & (rgb_df['Nz'] != 127)]
print(f"Found {len(non_zeros_df)} non-zeros in the dataset")
zeros_df.to_csv(f"{save_dir}/zeros.csv", sep=",", index=False)
non_zeros_df.to_csv(f"{save_dir}/non_zeros.csv", sep=",", index=False)
print("----------------------------------------------------")
# sampling 4% of zeros
clean_df = zeros_df.sample(frac=0.04, random_state=seed)
clean_df = pd.concat([clean_df, non_zeros_df])
clean_df.to_csv(f"{save_dir}/clean-data.csv", sep=",", index=False)
print(f"Clean data CSV of {len(clean_df)} written")
# split into train and test
train_df,test_df=train_test_split(clean_df,test_size=0.2,random_state=seed)
train_df.to_csv(f"{save_dir}/train.csv", sep=",", index=False)
test_df.to_csv(f"{save_dir}/test.csv", sep=",", index=False)
print(f"Train set of size {len(train_df)} and Test set of size {len(test_df)} written")
def check_nans(csv_path: str):
"""
Checks if there are any NaNs in the CSV file
:param csv_path: path to the CSV file
:return: True if there are no NaNs, False otherwise
"""
df = pd.read_csv(csv_path, sep=",")
if df.isnull().values.any():
nans=df.isnull().sum().sum()
print(f"Found {nans} NaNs in {csv_path}")
print("Replacing NaNs with mean")
df.fillna(df.mean(), inplace=True)
df.to_csv(csv_path, sep=",", index=False)
print(f"NaNs replaced in {csv_path}")
else:
print("No NaNs found.Perfect!")

View File

@@ -0,0 +1,22 @@
"""
Data loader for the color-normal datasets
"""
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from digit_depth.dataio.digit_dataset import DigitRealImageAnnotDataset
def data_loader(dir_dataset, params):
"""A data loader for the color-normal datasets
Args:
dir_dataset: path to the dataset
params: a dict of parameters
"""
transform = transforms.Compose([transforms.ToTensor()])
dataset = DigitRealImageAnnotDataset( dir_dataset=dir_dataset, annot_file=params.annot_file,
transform=transform, annot_flag=params.annot_flag)
dataloader = DataLoader(dataset, batch_size=params.batch_size, shuffle=params.shuffle,
num_workers=params.num_workers)
return dataloader, dataset

View File

@@ -0,0 +1,43 @@
import glob
import pandas as pd
import torch
from PIL import Image
from torch.utils.data import Dataset
class DigitRealImageAnnotDataset(Dataset):
def __init__( self, dir_dataset, annot_file, transform=None, annot_flag=True, img_type="png" ):
self.dir_dataset = dir_dataset
print(f"Loading dataset from {dir_dataset}")
self.transform = transform
self.annot_flag = annot_flag
# a list of image paths sorted. dir_dataset is the root dir of the datasets (color)
self.img_files = sorted(glob.glob(f"{self.dir_dataset}/*.{img_type}"))
print(f"Found {len(self.img_files)} images")
if self.annot_flag:
self.annot_dataframe = pd.read_csv(annot_file, sep=",")
def __getitem__(self, idx):
"""Returns a tuple of (img, annot) where annot is a tensor of shape (3,1)"""
# read in image
img = Image.open(self.img_files[idx])
img = self.transform(img)
img = img.permute(0, 2, 1) # (3,240,320) -> (3,320,240)
# read in region annotations
if self.annot_flag:
img_name = self.img_files[idx]
row_filter = self.annot_dataframe["img_names"] == img_name
region_attr = self.annot_dataframe.loc[
row_filter, ["center_x", "center_y", "radius"]
]
annot = (torch.tensor(region_attr.values, dtype=torch.int32) if (len(region_attr) > 0) else torch.tensor([]))
data = img
if self.annot_flag:
data = (img, annot)
return data
def __len__(self):
return len(self.img_files)

View File

@@ -0,0 +1,47 @@
"""
Script for generating sphere ground truth normal images.
"""
import math
import numpy as np
def generate_sphere_gt_normals(img_mask, center_x, center_y, radius):
"""
Generates sphere ground truth normal images for an image.
Args:
img_mask: a numpy array of shape [H, W, 3]
center_x: x coordinate of the center of the sphere
center_y: y coordinate of the center of the sphere
radius: the radius of the sphere
Returns:
img_normal: a numpy array of shape [H, W, 3]
"""
img_normal = np.zeros(img_mask.shape, dtype="float64")
for y in range(img_mask.shape[0]):
for x in range(img_mask.shape[1]):
img_normal[y, x, 0] = 0.0
img_normal[y, x, 1] = 0.0
img_normal[y, x, 2] = 1.0
if np.sum(img_mask[y, x, :]) > 0:
dist = np.sqrt((x - center_x) ** 2 + (y - center_y) ** 2)
ang_xz = math.acos(dist / radius)
ang_xy = math.atan2(y - center_y, x - center_x)
nx = math.cos(ang_xz) * math.cos(ang_xy)
ny = math.cos(ang_xz) * math.sin(ang_xy)
nz = math.sin(ang_xz)
img_normal[y, x, 0] = nx
img_normal[y, x, 1] = -ny
img_normal[y, x, 2] = nz
norm_val = np.linalg.norm(img_normal[y, x, :])
img_normal[y, x, :] = img_normal[y, x, :] / norm_val
# img_normal between [-1., 1.], converting to [0., 1.]
img_normal = (img_normal + 1.0) * 0.5
return img_normal

View File

@@ -0,0 +1 @@
from .digit_sensor import *

View File

@@ -0,0 +1,37 @@
import time
from digit_interface import Digit
from digit_interface.digit import DigitDefaults
class DigitSensor:
def __init__(self, fps: int, resolution: str, serial_num: str):
self.fps = fps
self.resolution = resolution
self.serial_num = serial_num
def __call__(self, *args, **kwargs):
"""Calls the digit sensor."""
digit = self.setup_digit()
return digit
def __str__(self):
return f"DigitSensor(fps={self.fps}, resolution={self.resolution}, serial_num={self.serial_num})"
def setup_digit(self,):
"Sets up the digit sensor and returns it."
digit = Digit(self.serial_num)
digit.connect()
rgb_list = [(15, 0, 0), (0, 15, 0), (0, 0, 15)]
for rgb in rgb_list:
digit.set_intensity_rgb(*rgb)
time.sleep(1)
digit.set_intensity(15)
resolution = DigitDefaults.STREAMS[self.resolution]
digit.set_resolution(resolution)
fps = Digit.STREAMS[self.resolution]["fps"][str(self.fps) + "fps"]
digit.set_fps(fps)
return digit

View File

@@ -0,0 +1 @@
from .image import ImageHandler

View File

@@ -0,0 +1,38 @@
# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
import cv2
import numpy as np
from PIL import Image
from torchvision import transforms
class ImageHandler:
def __init__(self, img_path, convert="RGB"):
self.img = Image.open(img_path).convert(convert)
self.convert = convert
@staticmethod
def tensor_to_PIL(self, img_tensor):
img_tensor = img_tensor.squeeze_(0)
return transforms.ToPILImage()(img_tensor).convert(self.convert)
@property
def tensor(self):
return transforms.ToTensor()(self.img).unsqueeze_(0)
@property
def image(self):
return self.img
@property
def nparray(self):
return np.array(self.img)
@staticmethod
def save(file_name, img):
if isinstance(img, Image.Image):
# this is a PIL image
img.save(file_name)
else:
# cv2 image
cv2.imwrite(file_name, img)

4
digit_depth/third_party/__init__.py vendored Normal file
View File

@@ -0,0 +1,4 @@
from .data_utils import *
from .geom_utils import *
from .vis_utils import *
from .poisson import *

30
digit_depth/third_party/data_utils.py vendored Normal file
View File

@@ -0,0 +1,30 @@
import numpy as np
import torch
"""
dataloader, logger helpers
"""
def pandas_col_to_numpy(df_col):
df_col = df_col.apply(lambda x: np.fromstring(x.replace("\n", "").replace("[", "").replace("]", "").replace(" ", " "), sep=", "))
df_col = np.stack(df_col)
return df_col
def pandas_string_to_numpy(arr_str):
arr_npy = np.fromstring(arr_str.replace("\n", "").replace("[", "").replace("]", "").replace(" ", " "),sep=", ")
return arr_npy
def interpolate_img(img, rows, cols):
"""
img: C x H x W
"""
img = torch.nn.functional.interpolate(img, size=cols)
img = torch.nn.functional.interpolate(img.permute(0, 2, 1), size=rows)
img = img.permute(0, 2, 1)
return img

531
digit_depth/third_party/geom_utils.py vendored Normal file
View File

@@ -0,0 +1,531 @@
# Copyright (c) Facebook, Inc. and its affiliates.
import copy
import numpy as np
import open3d as o3d
import torch
from scipy import ndimage
from digit_depth.third_party.poisson import poisson_reconstruct
from digit_depth.third_party.vis_utils import visualize_inlier_outlier
"""
Common functions
"""
def flip(x):
return torch.flip(x, dims=[0])
def min_clip(x, min_val):
return torch.max(x, min_val)
def max_clip(x, max_val):
return torch.min(x, max_val)
def normalize(x, min_val, max_val):
return (x - torch.min(x)) * (max_val - min_val) / (
torch.max(x) - torch.min(x)
) + min_val
def mask_background(x, bg_mask, bg_val=0.0):
if bg_mask is not None:
x[bg_mask] = bg_val
return x
def remove_background_pts(pts, bg_mask=None):
if bg_mask is not None:
fg_mask_pts = ~bg_mask.view(-1)
points3d_x = pts[0, fg_mask_pts].view(1, -1)
points3d_y = pts[1, fg_mask_pts].view(1, -1)
points3d_z = pts[2, fg_mask_pts].view(1, -1)
pts_fg = torch.cat((points3d_x, points3d_y, points3d_z), dim=0)
else:
pts_fg = pts
return pts_fg
"""
3D transform helper functions
"""
def Rt_to_T(R=None, t=None, device=None):
"""
:param R: rotation, (B, 3, 3) or (3, 3)
:param t: translation, (B, 3) or (3)
:return: T, (B, 4, 4) or (4, 4)
"""
T = torch.eye(4, device=device)
if (len(R.shape) > 2) & (len(t.shape) > 1): # batch version
B = R.shape[0]
T = T.repeat(B, 1, 1)
if R is not None:
T[:, 0:3, 0:3] = R
if t is not None:
T[:, 0:3, -1] = t
else:
if R is not None:
T[0:3, 0:3] = R
if t is not None:
T[0:3, -1] = t
return T
def transform_pts3d(T, pts):
"""
T: 4x4
pts: 3xN
returns 3xN
"""
D, N = pts.shape
if D == 3:
pts_tf = (
torch.cat((pts, torch.ones(1, N)), dim=0)
if torch.is_tensor(pts)
else np.concatenate((pts, torch.ones(1, N)), axis=0)
)
pts_tf = torch.matmul(T, pts_tf) if torch.is_tensor(pts) else np.matmul(T, pts_tf)
pts_tf = pts_tf[0:3, :]
return pts_tf
"""
3D-2D projection / conversion functions
OpenGL transforms reference: http://www.songho.ca/opengl/gl_transform.html
"""
def _vectorize_pixel_coords(rows, cols, device=None):
y_range = torch.arange(rows, device=device)
x_range = torch.arange(cols, device=device)
grid_x, grid_y = torch.meshgrid(x_range, y_range)
pixel_pos = torch.stack([grid_x.reshape(-1), grid_y.reshape(-1)], dim=0) # 2 x N
return pixel_pos
def _clip_to_pixel(clip_pos, img_shape, params):
H, W = img_shape
# clip -> ndc coords
x_ndc = clip_pos[0, :] / clip_pos[3, :]
y_ndc = clip_pos[1, :] / clip_pos[3, :]
# z_ndc = clip_pos[2, :] / clip_pos[3, :]
# ndc -> pixel coords
x_pix = (W - 1) / 2 * (x_ndc + 1) # [-1, 1] -> [0, W-1]
y_pix = (H - 1) / 2 * (y_ndc + 1) # [-1, 1] -> [0, H-1]
# z_pix = (f-n) / 2 * z_ndc + (f+n) / 2
pixel_pos = torch.stack((x_pix, y_pix), dim=0)
return pixel_pos
def _pixel_to_clip(pix_pos, depth_map, params):
"""
:param pix_pos: position in pixel space, (2, N)
:param depth_map: depth map, (H, W)
:return: clip_pos position in clip space, (4, N)
"""
x_pix = pix_pos[0, :]
y_pix = pix_pos[1, :]
H, W = depth_map.shape
f = params.z_far
n = params.z_near
# pixel -> ndc coords
x_ndc = 2 / (W - 1) * x_pix - 1 # [0, W-1] -> [-1, 1]
y_ndc = 2 / (H - 1) * y_pix - 1 # [0, H-1] -> [-1, 1]
z_buf = depth_map[y_pix, x_pix]
# ndc -> clip coords
z_eye = -z_buf
w_c = -z_eye
x_c = x_ndc * w_c
y_c = y_ndc * w_c
z_c = -(f + n) / (f - n) * z_eye - 2 * f * n / (f - n) * 1.0
clip_pos = torch.stack([x_c, y_c, z_c, w_c], dim=0)
return clip_pos
def _clip_to_eye(clip_pos, P):
P_inv = torch.inverse(P)
eye_pos = torch.matmul(P_inv, clip_pos)
return eye_pos
def _eye_to_clip(eye_pos, P):
clip_pos = torch.matmul(P, eye_pos)
return clip_pos
def _eye_to_world(eye_pos, V):
V_inv = torch.inverse(V)
world_pos = torch.matmul(V_inv, eye_pos)
world_pos = world_pos / world_pos[3, :]
return world_pos
def _world_to_eye(world_pos, V):
eye_pos = torch.matmul(V, world_pos)
return eye_pos
def _world_to_object(world_pos, M):
M_inv = torch.inverse(M)
obj_pos = torch.matmul(M_inv, world_pos)
obj_pos = obj_pos / obj_pos[3, :]
return obj_pos
def _object_to_world(obj_pos, M):
world_pos = torch.matmul(M, obj_pos)
world_pos = world_pos / world_pos[3, :]
return world_pos
def depth_to_pts3d(depth, P, V, params=None, ordered_pts=False):
"""
:param depth: depth map, (C, H, W) or (H, W)
:param P: projection matrix, (4, 4)
:param V: view matrix, (4, 4)
:return: world_pos position in 3d world coordinates, (3, H, W) or (3, N)
"""
assert 2 <= len(depth.shape) <= 3
assert P.shape == (4, 4)
assert V.shape == (4, 4)
depth_map = depth.squeeze(0) if (len(depth.shape) == 3) else depth
H, W = depth_map.shape
pixel_pos = _vectorize_pixel_coords(rows=H, cols=W)
clip_pos = _pixel_to_clip(pixel_pos, depth_map, params)
eye_pos = _clip_to_eye(clip_pos, P)
world_pos = _eye_to_world(eye_pos, V)
world_pos = world_pos[0:3, :] / world_pos[3, :]
if ordered_pts:
H, W = depth_map.shape
world_pos = world_pos.reshape(world_pos.shape[0], H, W)
return world_pos
"""
Optical flow functions
"""
def analytic_flow(img1, depth1, P, V1, V2, M1, M2, gel_depth, params):
C, H, W = img1.shape
depth_map = depth1.squeeze(0) if (len(depth1.shape) == 3) else depth1
pixel_pos = _vectorize_pixel_coords(rows=H, cols=W, device=img1.device)
clip_pos = _pixel_to_clip(pixel_pos, depth_map, params)
eye_pos = _clip_to_eye(clip_pos, P)
world_pos = _eye_to_world(eye_pos, V1)
obj_pos = _world_to_object(world_pos, M1)
world_pos = _object_to_world(obj_pos, M2)
eye_pos = _world_to_eye(world_pos, V2)
clip_pos = _eye_to_clip(eye_pos, P)
pixel_pos_proj = _clip_to_pixel(clip_pos, (H, W), params)
pixel_flow = pixel_pos - pixel_pos_proj
flow_map = pixel_flow.reshape(pixel_flow.shape[0], H, W)
# mask out background gel pixels
mask_idxs = depth_map >= gel_depth
flow_map[:, mask_idxs] = 0.0
return flow_map
"""
Normal to depth conversion / integration functions
"""
def _preproc_depth(img_depth, bg_mask=None):
if bg_mask is not None:
img_depth = mask_background(img_depth, bg_mask=bg_mask, bg_val=0.0)
return img_depth
def _preproc_normal(img_normal, bg_mask=None):
"""
img_normal: lies in range [0, 1]
"""
# 0.5 corresponds to 0
img_normal = img_normal - 0.5
# normalize
img_normal = img_normal / torch.linalg.norm(img_normal, dim=0)
# set background to have only z normals (flat, facing camera)
if bg_mask is not None:
img_normal[0:2, bg_mask] = 0.0
img_normal[2, bg_mask] = 1.0
return img_normal
def _depth_to_grad_depth(img_depth, bg_mask=None):
gradx = ndimage.sobel(img_depth.cpu().detach().numpy(), axis=1, mode="constant")
grady = ndimage.sobel(img_depth.cpu().detach().numpy(), axis=0, mode="constant")
gradx = torch.FloatTensor(gradx, device=img_depth.device)
grady = torch.FloatTensor(grady, device=img_depth.device)
if bg_mask is not None:
gradx = mask_background(gradx, bg_mask=bg_mask, bg_val=0.0)
grady = mask_background(grady, bg_mask=bg_mask, bg_val=0.0)
return gradx, grady
def _normal_to_grad_depth(img_normal, gel_width=1.0, gel_height=1.0, bg_mask=None):
# Ref: https://stackoverflow.com/questions/34644101/calculate-surface-normals-from-depth-image-using-neighboring-pixels-cross-produc/34644939#34644939
EPS = 1e-1
nz = torch.max(torch.tensor(EPS), img_normal[2, :])
dzdx = -(img_normal[0, :] / nz).squeeze()
dzdy = -(img_normal[1, :] / nz).squeeze()
# taking out negative sign as we are computing gradient of depth not z
# since z is pointed towards sensor, increase in z corresponds to decrease in depth
# i.e., dz/dx = -ddepth/dx
ddepthdx = -dzdx
ddepthdy = -dzdy
# sim: pixel axis v points in opposite dxn of camera axis y
ddepthdu = ddepthdx
ddepthdv = -ddepthdy
gradx = ddepthdu # cols
grady = ddepthdv # rows
# convert units from pixel to meters
C, H, W = img_normal.shape
gradx = gradx * (gel_width / W)
grady = grady * (gel_height / H)
if bg_mask is not None:
gradx = mask_background(gradx, bg_mask=bg_mask, bg_val=0.0)
grady = mask_background(grady, bg_mask=bg_mask, bg_val=0.0)
return gradx, grady
def _integrate_grad_depth(gradx, grady, boundary=None, bg_mask=None, max_depth=0.0):
if boundary is None:
boundary = torch.zeros((gradx.shape[0], gradx.shape[1]))
img_depth_recon = poisson_reconstruct(
grady.cpu().detach().numpy(),
gradx.cpu().detach().numpy(),
boundary.cpu().detach().numpy(),
)
img_depth_recon = torch.tensor(
img_depth_recon, device=gradx.device, dtype=torch.float32
)
if bg_mask is not None:
img_depth_recon = mask_background(img_depth_recon, bg_mask)
# after integration, img_depth_recon lies between 0. (bdry) and a -ve val (obj depth)
# rescale to make max depth as gel depth and obj depth as +ve values
img_depth_recon = max_clip(img_depth_recon, max_val=torch.tensor(0.0)) + max_depth
return img_depth_recon
def depth_to_depth(img_depth, bg_mask=None, boundary=None, params=None):
# preproc depth img
img_depth = _preproc_depth(img_depth=img_depth.squeeze(), bg_mask=bg_mask)
# get grad depth
gradx, grady = _depth_to_grad_depth(img_depth=img_depth.squeeze(), bg_mask=bg_mask)
# integrate grad depth
img_depth_recon = _integrate_grad_depth(
gradx, grady, boundary=boundary, bg_mask=bg_mask
)
return img_depth_recon
def normal_to_depth(
img_normal,
bg_mask=None,
boundary=None,
gel_width=0.02,
gel_height=0.03,
max_depth=0.02,
):
# preproc normal img
img_normal = _preproc_normal(img_normal=img_normal, bg_mask=bg_mask)
# get grad depth
gradx, grady = _normal_to_grad_depth(
img_normal=img_normal,
gel_width=gel_width,
gel_height=gel_height,
bg_mask=bg_mask,
)
# integrate grad depth
img_depth_recon = _integrate_grad_depth(
gradx, grady, boundary=boundary, bg_mask=bg_mask, max_depth=max_depth
)
return img_depth_recon
"""
3D registration functions
"""
def _fpfh(pcd, normals):
pcd.normals = o3d.utility.Vector3dVector(normals)
pcd_fpfh = o3d.pipelines.registration.compute_fpfh_feature(
pcd, o3d.geometry.KDTreeSearchParamHybrid(radius=0.3, max_nn=64)
)
return pcd_fpfh
def _fast_global_registration(source, target, source_fpfh, target_fpfh):
distance_threshold = 0.01
result = o3d.pipelines.registration.registration_fast_based_on_feature_matching(
source,
target,
source_fpfh,
target_fpfh,
o3d.pipelines.registration.FastGlobalRegistrationOption(
maximum_correspondence_distance=distance_threshold
),
)
transformation = result.transformation
metrics = [result.fitness, result.inlier_rmse, result.correspondence_set]
return transformation, metrics
def fgr(source, target, src_normals, tgt_normals):
source_fpfh = _fpfh(source, src_normals)
target_fpfh = _fpfh(target, tgt_normals)
transformation, metrics = _fast_global_registration(
source=source, target=target, source_fpfh=source_fpfh, target_fpfh=target_fpfh
)
return transformation, metrics
def icp(source, target, T_init=np.eye(4), mcd=0.1, max_iter=50, type="point_to_plane"):
if type == "point_to_point":
result = o3d.pipelines.registration.registration_icp(
source=source,
target=target,
max_correspondence_distance=mcd,
init=T_init,
estimation_method=o3d.pipelines.registration.TransformationEstimationPointToPoint(),
criteria=o3d.pipelines.registration.ICPConvergenceCriteria(
max_iteration=max_iter
),
)
else:
result = o3d.pipelines.registration.registration_icp(
source=source,
target=target,
max_correspondence_distance=mcd,
init=T_init,
estimation_method=o3d.pipelines.registration.TransformationEstimationPointToPlane(),
criteria=o3d.pipelines.registration.ICPConvergenceCriteria(
max_iteration=max_iter
),
)
transformation = result.transformation
metrics = [result.fitness, result.inlier_rmse, result.correspondence_set]
return transformation, metrics
"""
Open3D helper functions
"""
def remove_outlier_pts(points3d, nb_neighbors=20, std_ratio=10.0, vis=False):
points3d_np = (
points3d.cpu().detach().numpy() if torch.is_tensor(points3d) else points3d
)
cloud = o3d.geometry.PointCloud()
cloud.points = copy.deepcopy(o3d.utility.Vector3dVector(points3d_np.transpose()))
cloud_filt, ind_filt = cloud.remove_statistical_outlier(
nb_neighbors=nb_neighbors, std_ratio=std_ratio
)
if vis:
visualize_inlier_outlier(cloud, ind_filt)
points3d_filt = np.asarray(cloud_filt.points).transpose()
points3d_filt = (
torch.tensor(points3d_filt) if torch.is_tensor(points3d) else points3d_filt
)
return points3d_filt
def init_points_to_clouds(clouds, points3d, colors=None):
for idx, cloud in enumerate(clouds):
points3d_np = (
points3d[idx].cpu().detach().numpy()
if torch.is_tensor(points3d[idx])
else points3d[idx]
)
cloud.points = copy.deepcopy(
o3d.utility.Vector3dVector(points3d_np.transpose())
)
if colors is not None:
cloud.paint_uniform_color(colors[idx])
return clouds

81
digit_depth/third_party/poisson.py vendored Normal file
View File

@@ -0,0 +1,81 @@
"""
poisson_reconstruct.py
Fast Poisson Reconstruction in Python
Copyright (c) 2014 Jack Doerner
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""
import copy
import math
import numpy
import scipy
import scipy.fftpack
def poisson_reconstruct(grady, gradx, boundarysrc):
# Thanks to Dr. Ramesh Raskar for providing the original matlab code from which this is derived
# Dr. Raskar's version is available here: http://web.media.mit.edu/~raskar/photo/code.pdf
# Laplacian
gyy = grady[1:, :-1] - grady[:-1, :-1]
gxx = gradx[:-1, 1:] - gradx[:-1, :-1]
f = numpy.zeros(boundarysrc.shape)
f[:-1, 1:] += gxx
f[1:, :-1] += gyy
# Boundary image
boundary = copy.deepcopy(boundarysrc) # .copy()
boundary[1:-1, 1:-1] = 0
# Subtract boundary contribution
f_bp = (
-4 * boundary[1:-1, 1:-1]
+ boundary[1:-1, 2:]
+ boundary[1:-1, 0:-2]
+ boundary[2:, 1:-1]
+ boundary[0:-2, 1:-1]
)
f = f[1:-1, 1:-1] - f_bp
# Discrete Sine Transform
tt = scipy.fftpack.dst(f, norm="ortho")
fsin = scipy.fftpack.dst(tt.T, norm="ortho").T
# Eigenvalues
(x, y) = numpy.meshgrid(
range(1, f.shape[1] + 1), range(1, f.shape[0] + 1), copy=True
)
denom = (2 * numpy.cos(math.pi * x / (f.shape[1] + 2)) - 2) + (
2 * numpy.cos(math.pi * y / (f.shape[0] + 2)) - 2
)
f = fsin / denom
# Inverse Discrete Sine Transform
tt = scipy.fftpack.idst(f, norm="ortho")
img_tt = scipy.fftpack.idst(tt.T, norm="ortho").T
# New center + old boundary
result = copy.deepcopy(boundary)
result[1:-1, 1:-1] = img_tt
return result

372
digit_depth/third_party/vis_utils.py vendored Normal file
View File

@@ -0,0 +1,372 @@
# Copyright (c) Facebook, Inc. and its affiliates.
import copy
import logging
import time
import types
import cv2
import matplotlib.pyplot as plt
import numpy as np
import open3d as o3d
import torch
from attrdict import AttrDict
from matplotlib.patches import Circle, Rectangle
log = logging.getLogger(__name__)
"""
Open3d visualization functions
"""
class Visualizer3d:
def __init__(self, base_path="", view_params=None, tsleep=0.01):
self.vis = o3d.visualization.VisualizerWithKeyCallback()
self.vis.create_window(top=0, left=750, width=1080, height=1080)
self.tsleep = tsleep
self.base_path = base_path
self.view_params = AttrDict(
{
"fov": 0,
"front": [0.0, 0.0, 0.0],
"lookat": [0.0, 0.0, 0.0],
"up": [0.0, 0.0, 0.0],
"zoom": 0.5,
}
)
if view_params is not None:
self.view_params = view_params
self._init_options()
def _init_options(self):
opt = self.vis.get_render_option()
opt.show_coordinate_frame = True
opt.background_color = np.asarray([0.6, 0.6, 0.6])
# pause option
self.paused = types.SimpleNamespace()
self.paused.value = False
self.vis.register_key_action_callback(
ord("P"),
lambda a, b, c: b == 1
or setattr(self.paused, "value", not self.paused.value),
)
def _init_geom_cloud(self):
return o3d.geometry.PointCloud()
def _init_geom_frame(self, frame_size=0.01, frame_origin=[0, 0, 0]):
return o3d.geometry.TriangleMesh.create_coordinate_frame(
size=frame_size, origin=frame_origin
)
def _init_geom_mesh(self, mesh_name, color=None, wireframe=False):
mesh = o3d.io.read_triangle_mesh(
f"{self.base_path}/local/resources/meshes/{mesh_name}"
)
mesh.compute_vertex_normals()
if wireframe:
mesh = o3d.geometry.LineSet.create_from_triangle_mesh(mesh)
if color is not None:
mesh.paint_uniform_color(color)
return mesh
def init_geometry(
self,
geom_type,
num_items=1,
sizes=None,
file_names=None,
colors=None,
wireframes=None,
):
geom_list = []
for i in range(0, num_items):
if geom_type == "cloud":
geom = self._init_geom_cloud()
elif geom_type == "frame":
frame_size = sizes[i] if sizes is not None else 0.001
geom = self._init_geom_frame(frame_size=frame_size)
elif geom_type == "mesh":
color = colors[i] if colors is not None else None
wireframe = wireframes[i] if wireframes is not None else False
geom = self._init_geom_mesh(file_names[i], color, wireframe)
else:
log.error(
f"[Visualizer3d::init_geometry] geom_type {geom_type} not found."
)
geom_list.append(geom)
return geom_list
def add_geometry(self, geom_list):
if geom_list is None:
return
for geom in geom_list:
self.vis.add_geometry(geom)
def remove_geometry(self, geom_list, reset_bounding_box=False):
if geom_list is None:
return
for geom in geom_list:
self.vis.remove_geometry(geom, reset_bounding_box=reset_bounding_box)
def update_geometry(self, geom_list):
for geom in geom_list:
self.vis.update_geometry(geom)
def set_view(self):
ctr = self.vis.get_view_control()
ctr.change_field_of_view(self.view_params.fov)
ctr.set_front(self.view_params.front)
ctr.set_lookat(self.view_params.lookat)
ctr.set_up(self.view_params.up)
ctr.set_zoom(self.view_params.zoom)
def set_view_cam(self, T):
ctr = self.vis.get_view_control()
cam = ctr.convert_to_pinhole_camera_parameters()
cam.extrinsic = T
ctr.convert_from_pinhole_camera_parameters(cam)
def set_zoom(self):
ctr = self.vis.get_view_control()
ctr.set_zoom(1.5)
def rotate_view(self):
ctr = self.vis.get_view_control()
ctr.rotate(10.0, -0.0)
def pan_scene(self, max=300):
for i in range(0, max):
self.rotate_view()
self.render()
def render(self, T=None):
if T is not None:
self.set_view_cam(T)
else:
self.set_view()
self.vis.poll_events()
self.vis.update_renderer()
time.sleep(self.tsleep)
def transform_geometry_absolute(self, transform_list, geom_list):
for idx, geom in enumerate(geom_list):
T = transform_list[idx]
geom.transform(T)
def transform_geometry_relative(
self, transform_prev_list, transform_curr_list, geom_list
):
for idx, geom in enumerate(geom_list):
T_prev = transform_prev_list[idx]
T_curr = transform_curr_list[idx]
# a. rotate R1^{-1}*R2 about center t1
geom.rotate(
torch.matmul(torch.inverse(T_prev[0:3, 0:3]), T_curr[0:3, 0:3]),
center=(T_prev[0, -1], T_prev[1, -1], T_prev[2, -1]),
)
# b. translate by t2 - t1
geom.translate(
(
T_curr[0, -1] - T_prev[0, -1],
T_curr[1, -1] - T_prev[1, -1],
T_curr[2, -1] - T_prev[2, -1],
)
)
def clear_geometries(self):
self.vis.clear_geometries()
def destroy(self):
self.vis.destroy_window()
def visualize_registration(source, target, transformation, vis3d=None, colors=None):
source_copy = copy.deepcopy(source)
target_copy = copy.deepcopy(target)
source_copy.transform(transformation)
clouds = [target_copy, source_copy]
if colors is not None:
clouds[0].paint_uniform_color(colors[1])
clouds[1].paint_uniform_color(colors[0])
vis3d.add_geometry(clouds)
vis3d.render()
vis3d.remove_geometry(clouds)
def visualize_geometries_o3d(
vis3d, clouds=None, frames=None, meshes=None, transforms=None
):
if meshes is not None:
meshes = [copy.deepcopy(mesh) for mesh in meshes]
if transforms is not None:
vis3d.transform_geometry_absolute(transforms, meshes)
if frames is not None:
frames = [copy.deepcopy(frame) for frame in frames]
if transforms is not None:
vis3d.transform_geometry_absolute(transforms, frames)
if clouds is not None:
vis3d.add_geometry(clouds)
if meshes is not None:
vis3d.add_geometry(meshes)
if frames is not None:
vis3d.add_geometry(frames)
vis3d.render()
if clouds is not None:
vis3d.remove_geometry(clouds)
if meshes is not None:
vis3d.remove_geometry(meshes)
if frames is not None:
vis3d.remove_geometry(frames)
def visualize_inlier_outlier(cloud, ind):
inlier_cloud = cloud.select_by_index(ind)
outlier_cloud = cloud.select_by_index(ind, invert=True)
print("Showing outliers (red) and inliers (gray): ")
outlier_cloud.paint_uniform_color([1, 0, 0])
inlier_cloud.paint_uniform_color([0.8, 0.8, 0.8])
o3d.visualization.draw_geometries(
[inlier_cloud, outlier_cloud],
zoom=0.3412,
front=[0.4257, -0.2125, -0.8795],
lookat=[2.6172, 2.0475, 1.532],
up=[-0.0694, -0.9768, 0.2024],
)
"""
Optical flow visualization functions
"""
def flow_to_color(flow_uv, cvt=cv2.COLOR_HSV2BGR):
hsv = np.zeros((flow_uv.shape[0], flow_uv.shape[1], 3), dtype=np.uint8)
hsv[:, :, 0] = 255
hsv[:, :, 1] = 255
mag, ang = cv2.cartToPolar(flow_uv[..., 0], flow_uv[..., 1])
hsv[..., 0] = ang * 180 / np.pi / 2
hsv[..., 2] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX)
flow_color = cv2.cvtColor(hsv, cvt)
return flow_color
def flow_to_arrows(img, flow, step=8):
img = copy.deepcopy(img)
# img = (255 * img).astype(np.uint8)
h, w = img.shape[:2]
y, x = np.mgrid[step / 2 : h : step, step / 2 : w : step].reshape(2, -1).astype(int)
fx, fy = 5.0 * flow[y, x].T
lines = np.vstack([x, y, x + fx, y + fy]).T.reshape(-1, 2, 2)
lines = np.int32(lines + 0.5)
cv2.polylines(img, lines, 0, color=(0, 255, 0), thickness=1)
for (x1, y1), (x2, y2) in lines:
cv2.circle(img, (x1, y1), 1, (0, 255, 0), -1)
return img
def depth_to_color(depth):
gray = (
np.clip((depth - depth.min()) / (depth.max() - depth.min()), 0, 1) * 255
).astype(np.uint8)
return cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR)
def visualize_flow_cv2(
img1, img2, flow_arrow=None, flow_color=None, win_size=(360, 360)
):
img_disp1 = np.concatenate([img1, img2], axis=1)
cv2.namedWindow("img1, img2", cv2.WINDOW_NORMAL)
cv2.resizeWindow("img1, img2", 2 * win_size[0], win_size[1])
cv2.imshow("img1, img2", img_disp1)
if flow_arrow is not None:
cv2.namedWindow("flow_arrow", cv2.WINDOW_NORMAL)
cv2.resizeWindow("flow_arrow", win_size[0], win_size[1])
cv2.imshow("flow_arrow", flow_arrow)
if flow_color is not None:
cv2.namedWindow("flow_color", cv2.WINDOW_NORMAL)
cv2.resizeWindow("flow_color", win_size[0], win_size[1])
cv2.imshow("flow_color", flow_color)
cv2.waitKey(300)
"""
General visualization functions
"""
def draw_rectangle(
center_x,
center_y,
size_x,
size_y,
ang=0.0,
edgecolor="dimgray",
facecolor=None,
linewidth=2,
):
R = np.array([[np.cos(ang), -np.sin(ang)], [np.sin(ang), np.cos(ang)]])
offset = np.matmul(R, np.array([[0.5 * size_x], [0.5 * size_y]]))
anchor_x = center_x - offset[0]
anchor_y = center_y - offset[1]
rect = Rectangle(
(anchor_x, anchor_y),
size_x,
size_y,
angle=(np.rad2deg(ang)),
facecolor=facecolor,
edgecolor=edgecolor,
linewidth=linewidth,
)
plt.gca().add_patch(rect)
def draw_circle(center_x, center_y, radius):
circle = Circle((center_x, center_y), color="dimgray", radius=radius)
plt.gca().add_patch(circle)
def visualize_imgs(fig, axs, img_list, titles=None, cmap=None):
for idx, img in enumerate(img_list):
if img is None:
continue
im = axs[idx].imshow(img, cmap=cmap)
if cmap is not None:
fig.colorbar(im, ax=axs[idx])
if titles is not None:
axs[idx].set_title(titles[idx])

View File

@@ -0,0 +1,2 @@
from .color2normal_dataset import Color2NormalDataset
from .mlp_model import MLP

View File

@@ -0,0 +1,21 @@
import pandas as pd
import torch
from torch.utils.data import Dataset
class Color2NormalDataset(Dataset):
def __init__(self, csv):
self.data = pd.read_csv(csv)
def __len__(self):
return self.data['X'].count()
def __getitem__(self,idx):
x = self.data['X'][idx]/120
y = self.data['Y'][idx]/160
r = self.data['R'][idx]/255
g = self.data['G'][idx]/255
b = self.data['B'][idx]/255
nx = self.data['Nx'][idx]/255
ny = self.data['Ny'][idx]/255
nz= self.data['Nz'][idx]/255
return torch.tensor((x, y, r, g, b), dtype=torch.float32), torch.tensor((nx, ny, nz), dtype=torch.float32)

View File

@@ -0,0 +1,27 @@
import torch.nn as nn
import torch.nn.functional as F
class MLP(nn.Module):
dropout_p = 0.05
def __init__(
self, input_size=5, output_size=3, hidden_size=32):
super().__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.fc2 = nn.Linear(hidden_size, hidden_size)
self.fc3 = nn.Linear(hidden_size, hidden_size)
self.fc4 = nn.Linear(hidden_size, output_size)
self.drop = nn.Dropout(p=self.dropout_p)
def forward(self, x):
x = F.relu(self.fc1(x))
x = self.drop(x)
x = F.relu(self.fc2(x))
x = self.drop(x)
x = self.fc3(x)
x = self.drop(x)
x = self.fc4(x)
return x

View File

@@ -0,0 +1,49 @@
import cv2
import torch
import numpy as np
import pandas as pd
import copy
seed = 42
torch.seed = seed
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
def preproc_mlp(image) -> torch.Tensor:
""" Preprocess image for input to model.
Args: image: OpenCV image in BGR format
Return: tensor of shape (R*C,5) where R=320 and C=240 for DIGIT images
5-columns are: X,Y,R,G,B
"""
img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
img = img / 255.0
xy_coords = np.flip(np.column_stack(np.where(np.all(img >= 0, axis=2))), axis=1)
rgb = np.reshape(img, (np.prod(img.shape[:2]), 3))
pixel_numbers = np.expand_dims(np.arange(1, xy_coords.shape[0] + 1), axis=1)
value_base = np.hstack([pixel_numbers, xy_coords, rgb])
df_base = pd.DataFrame(value_base, columns=['pixel_number', 'X', 'Y', 'R', 'G', 'B'])
df_base['X'] = df_base['X'] / 240
df_base['Y'] = df_base['Y'] / 320
del df_base['pixel_number']
test_tensor = torch.tensor(df_base[['X', 'Y', 'R', 'G', 'B']].values, dtype=torch.float32).to(device)
return test_tensor
def post_proc_mlp(model_output: torch.Tensor):
""" Postprocess model output to get normal map.
Args: model_output: torch.Tensor of shape (1,3)
Return: two torch.Tensor of shape (1,3)
"""
test_np = model_output.reshape(320, 240, 3)
normal = copy.deepcopy(test_np) # surface normal image
test_np = torch.tensor(test_np,
dtype=torch.float32) # convert to torch tensor for later processing in gradient computation
test_np = test_np.permute(2, 0, 1) # swap axes to (3,320,240)
test_np = test_np # convert to uint8 for visualization
return test_np, normal