diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..4d821b91 --- /dev/null +++ b/.gitignore @@ -0,0 +1,352 @@ +## Ignore Visual Studio temporary files, build results, and +## files generated by popular Visual Studio add-ons. +## +## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore + +# User-specific files +*.rsuser +*.suo +*.user +*.userosscache +*.sln.docstates + +# User-specific files (MonoDevelop/Xamarin Studio) +*.userprefs + +# Mono auto generated files +mono_crash.* + +# Build results +[Dd]ebug/ +[Dd]ebugPublic/ +[Rr]elease/ +[Rr]eleases/ +x64/ +x86/ +[Aa][Rr][Mm]/ +[Aa][Rr][Mm]64/ +bld/ +[Bb]in/ +[Oo]bj/ +[Ll]og/ +[Ll]ogs/ + +# Visual Studio 2015/2017 cache/options directory +.vs/ +# Uncomment if you have tasks that create the project's static files in wwwroot +#wwwroot/ + +# Visual Studio 2017 auto generated files +Generated\ Files/ + +# MSTest test Results +[Tt]est[Rr]esult*/ +[Bb]uild[Ll]og.* + +# NUnit +*.VisualState.xml +TestResult.xml +nunit-*.xml + +# Build Results of an ATL Project +[Dd]ebugPS/ +[Rr]eleasePS/ +dlldata.c + +# Benchmark Results +BenchmarkDotNet.Artifacts/ + +# .NET Core +project.lock.json +project.fragment.lock.json +artifacts/ + +# StyleCop +StyleCopReport.xml + +# Files built by Visual Studio +*_i.c +*_p.c +*_h.h +*.ilk +*.meta +*.obj +*.iobj +*.pch +*.pdb +*.ipdb +*.pgc +*.pgd +*.rsp +*.sbr +*.tlb +*.tli +*.tlh +*.tmp +*.tmp_proj +*_wpftmp.csproj +*.log +*.vspscc +*.vssscc +.builds +*.pidb +*.svclog +*.scc + +# Chutzpah Test files +_Chutzpah* + +# Visual C++ cache files +ipch/ +*.aps +*.ncb +*.opendb +*.opensdf +*.sdf +*.cachefile +*.VC.db +*.VC.VC.opendb + +# Visual Studio profiler +*.psess +*.vsp +*.vspx +*.sap + +# Visual Studio Trace Files +*.e2e + +# TFS 2012 Local Workspace +$tf/ + +# Guidance Automation Toolkit +*.gpState + +# ReSharper is a .NET coding add-in +_ReSharper*/ +*.[Rr]e[Ss]harper +*.DotSettings.user + +# TeamCity is a build add-in +_TeamCity* + +# DotCover is a Code Coverage Tool +*.dotCover + +# AxoCover is a Code Coverage Tool +.axoCover/* +!.axoCover/settings.json + +# Visual Studio code coverage results +*.coverage +*.coveragexml + +# NCrunch +_NCrunch_* +.*crunch*.local.xml +nCrunchTemp_* + +# MightyMoose +*.mm.* +AutoTest.Net/ + +# Web workbench (sass) +.sass-cache/ + +# Installshield output folder +[Ee]xpress/ + +# DocProject is a documentation generator add-in +DocProject/buildhelp/ +DocProject/Help/*.HxT +DocProject/Help/*.HxC +DocProject/Help/*.hhc +DocProject/Help/*.hhk +DocProject/Help/*.hhp +DocProject/Help/Html2 +DocProject/Help/html + +# Click-Once directory +publish/ + +# Publish Web Output +*.[Pp]ublish.xml +*.azurePubxml +# Note: Comment the next line if you want to checkin your web deploy settings, +# but database connection strings (with potential passwords) will be unencrypted +*.pubxml +*.publishproj + +# Microsoft Azure Web App publish settings. Comment the next line if you want to +# checkin your Azure Web App publish settings, but sensitive information contained +# in these scripts will be unencrypted +PublishScripts/ + +# NuGet Packages +*.nupkg +# NuGet Symbol Packages +*.snupkg +# The packages folder can be ignored because of Package Restore +**/[Pp]ackages/* +# except build/, which is used as an MSBuild target. +!**/[Pp]ackages/build/ +# Uncomment if necessary however generally it will be regenerated when needed +#!**/[Pp]ackages/repositories.config +# NuGet v3's project.json files produces more ignorable files +*.nuget.props +*.nuget.targets + +# Microsoft Azure Build Output +csx/ +*.build.csdef + +# Microsoft Azure Emulator +ecf/ +rcf/ + +# Windows Store app package directories and files +AppPackages/ +BundleArtifacts/ +Package.StoreAssociation.xml +_pkginfo.txt +*.appx +*.appxbundle +*.appxupload + +# Visual Studio cache files +# files ending in .cache can be ignored +*.[Cc]ache +# but keep track of directories ending in .cache +!?*.[Cc]ache/ + +# Others +ClientBin/ +~$* +*~ +*.dbmdl +*.dbproj.schemaview +*.jfm +*.pfx +*.publishsettings +orleans.codegen.cs + +# Including strong name files can present a security risk +# (https://github.com/github/gitignore/pull/2483#issue-259490424) +#*.snk + +# Since there are multiple workflows, uncomment next line to ignore bower_components +# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) +#bower_components/ + +# RIA/Silverlight projects +Generated_Code/ + +# Backup & report files from converting an old project file +# to a newer Visual Studio version. Backup files are not needed, +# because we have git ;-) +_UpgradeReport_Files/ +Backup*/ +UpgradeLog*.XML +UpgradeLog*.htm +ServiceFabricBackup/ +*.rptproj.bak + +# SQL Server files +*.mdf +*.ldf +*.ndf + +# Business Intelligence projects +*.rdl.data +*.bim.layout +*.bim_*.settings +*.rptproj.rsuser +*- [Bb]ackup.rdl +*- [Bb]ackup ([0-9]).rdl +*- [Bb]ackup ([0-9][0-9]).rdl + +# Microsoft Fakes +FakesAssemblies/ + +# GhostDoc plugin setting file +*.GhostDoc.xml + +# Node.js Tools for Visual Studio +.ntvs_analysis.dat +node_modules/ + +# Visual Studio 6 build log +*.plg + +# Visual Studio 6 workspace options file +*.opt + +# Visual Studio 6 auto-generated workspace file (contains which files were open etc.) +*.vbw + +# Visual Studio LightSwitch build output +**/*.HTMLClient/GeneratedArtifacts +**/*.DesktopClient/GeneratedArtifacts +**/*.DesktopClient/ModelManifest.xml +**/*.Server/GeneratedArtifacts +**/*.Server/ModelManifest.xml +_Pvt_Extensions + +# Paket dependency manager +.paket/paket.exe +paket-files/ + +# FAKE - F# Make +.fake/ + +# CodeRush personal settings +.cr/personal + +# Python Tools for Visual Studio (PTVS) +__pycache__/ +*.pyc + +# Cake - Uncomment if you are using it +# tools/** +# !tools/packages.config + +# Tabs Studio +*.tss + +# Telerik's JustMock configuration file +*.jmconfig + +# BizTalk build output +*.btp.cs +*.btm.cs +*.odx.cs +*.xsd.cs + +# OpenCover UI analysis results +OpenCover/ + +# Azure Stream Analytics local run output +ASALocalRun/ + +# MSBuild Binary and Structured Log +*.binlog + +# NVidia Nsight GPU debugger configuration file +*.nvuser + +# MFractors (Xamarin productivity tool) working folder +.mfractor/ + +# Local History for Visual Studio +.localhistory/ + +# BeatPulse healthcheck temp database +healthchecksdb + +# Backup folder for Package Reference Convert tool in Visual Studio 2017 +MigrationBackup/ + +# Ionide (cross platform F# VS Code tools) working folder +.ionide/ + +*.pth \ No newline at end of file diff --git a/README.md b/README.md index 798e6c34..4e98f4be 100644 --- a/README.md +++ b/README.md @@ -80,6 +80,36 @@ python ./core/evaluation/eval_odom.py --gt_txt [path/to/your/groundtruth/poses/t ``` You could evaluate on the sampled KITTI odometry dataset by simply sampling the raw image sequences and gt-pose txt. Then run *infer_vo.py* on the sampled image sequence and *eval_odom.py* with predicted txt and sampled gt txt to get results. +5. To get the predictions for visual odometry on EuRoC dataset run: +```bash +python infer_vo.py --config_file ./config/euroc.yaml \ + --gpu [gpu_id] \ + --traj_save_dir_txt [where/to/save/the/prediction/file] \ + --sequences_root_dir [path/to/EuRoC/root] \ + --sequence [sequence/name] \ # e.g. MH_01_easy + --pretrained_model [path/to/your/model] +``` + +6. To get the predictions for visual odometry on TUM-RGBD dataset run: +```bash +python infer_vo.py --config_file ./config/tum.yaml \ + --gpu [gpu_id] \ + --traj_save_dir_txt [where/to/save/the/prediction/file] \ + --sequences_root_dir [path/to/TUM/root] \ + --sequence [sequence/name] \ # e.g. rgbd_dataset_freiburg1_360 + --pretrained_model [path/to/your/model] +``` + +7. To get the predictions for visual odometry on Aqualoc dataset run: +```bash +python infer_vo.py --config_file ./config/aqualoc.yaml \ + --gpu [gpu_id] \ + --traj_save_dir_txt [where/to/save/the/prediction/file] \ + --sequences_root_dir [path/to/Aqualoc/root] \ + --sequence Archaeological_site_sequences/archaeo_sequence_[sequence_number]\_raw_data/raw_data/images_sequence_[sequence_number] \ + --pretrained_model [path/to/your/model] +``` + ### Citation If you find our work useful in your research, please consider citing: ``` diff --git a/config/aqualoc.yaml b/config/aqualoc.yaml new file mode 100644 index 00000000..0083bec5 --- /dev/null +++ b/config/aqualoc.yaml @@ -0,0 +1,36 @@ +cfg_name: 'aqualoc' + +# dataset +raw_base_dir: '/home/olaya/Datasets/Aqualoc/Archaeological_site_sequences' +dataset: 'aqualoc' +num_scales: 3 + +# training +num_iterations: 200000 + + +w_ssim: 0.85 # w_pixel = 1 - w_ssim +w_flow_smooth: 10.0 +w_flow_consis: 0.01 +w_geo: 0.1 +w_pt_depth: 1.0 +w_pj_depth: 0.1 +w_flow_error: 0.0 +w_depth_smooth: 0.0001 + + +h_flow_consist_alpha: 3.0 +h_flow_consist_beta: 0.05 + +ransac_iters: 100 +ransac_points: 6000 + +# Depth Setting +depth_match_num: 6000 +depth_sample_ratio: 0.20 +depth_scale: 1 + +# basic info +img_hw: [256, 832] +block_tri_grad: False + diff --git a/config/euroc.yaml b/config/euroc.yaml new file mode 100644 index 00000000..2520de24 --- /dev/null +++ b/config/euroc.yaml @@ -0,0 +1,35 @@ +cfg_name: 'euroc' + +# dataset +dataset: 'euroc' +num_scales: 3 + +# training +num_iterations: 200000 + + +w_ssim: 0.85 # w_pixel = 1 - w_ssim +w_flow_smooth: 10.0 +w_flow_consis: 0.01 +w_geo: 0.1 +w_pt_depth: 1.0 +w_pj_depth: 0.1 +w_flow_error: 0.0 +w_depth_smooth: 0.0001 + + +h_flow_consist_alpha: 3.0 +h_flow_consist_beta: 0.05 + +ransac_iters: 100 +ransac_points: 6000 + +# Depth Setting +depth_match_num: 6000 +depth_sample_ratio: 0.20 +depth_scale: 1 + +# basic info +img_hw: [256, 832] +block_tri_grad: False + diff --git a/config/tum.yaml b/config/tum.yaml new file mode 100644 index 00000000..aa1c637b --- /dev/null +++ b/config/tum.yaml @@ -0,0 +1,35 @@ +cfg_name: 'tum' + +# dataset +dataset: 'tum' +num_scales: 3 + +# training +num_iterations: 200000 + + +w_ssim: 0.85 # w_pixel = 1 - w_ssim +w_flow_smooth: 10.0 +w_flow_consis: 0.01 +w_geo: 0.1 +w_pt_depth: 1.0 +w_pj_depth: 0.1 +w_flow_error: 0.0 +w_depth_smooth: 0.0001 + + +h_flow_consist_alpha: 3.0 +h_flow_consist_beta: 0.05 + +ransac_iters: 100 +ransac_points: 6000 + +# Depth Setting +depth_match_num: 6000 +depth_sample_ratio: 0.20 +depth_scale: 1 + +# basic info +img_hw: [256, 832] +block_tri_grad: False + diff --git a/core/evaluation/eval_odom.py b/core/evaluation/eval_odom.py index b1cb3320..95c534ef 100644 --- a/core/evaluation/eval_odom.py +++ b/core/evaluation/eval_odom.py @@ -351,13 +351,299 @@ def eval(self, gt_txt, result_txt, seq=None): +class EurocEvalOdom(): + # ---------------------------------------------------------------------- + # poses: N,4,4 + # pose: 4,4 + # ---------------------------------------------------------------------- + def __init__(self): + self.lengths = [100, 200, 300, 400, 500, 600, 700, 800] + self.num_lengths = len(self.lengths) + + def loadPoses(self, file_name): + # ---------------------------------------------------------------------- + # Each line in the file should follow one of the following structures + # (1) idx pose(3x4 matrix in terms of 12 numbers) + # (2) pose(3x4 matrix in terms of 12 numbers) + # ---------------------------------------------------------------------- + f = open(file_name, 'r') + s = f.readlines() + f.close() + file_len = len(s) + poses = {} + for cnt, line in enumerate(s): + P = np.eye(4) + line_split = [float(i) for i in line.split(" ")] + withIdx = int(len(line_split) == 13) + for row in range(3): + for col in range(4): + P[row, col] = line_split[row*4 + col + withIdx] + if withIdx: + frame_idx = line_split[0] + else: + frame_idx = cnt + poses[frame_idx] = P + return poses + + def trajectory_distances(self, poses): + # ---------------------------------------------------------------------- + # poses: dictionary: [frame_idx: pose] + # ---------------------------------------------------------------------- + dist = [0] + sort_frame_idx = sorted(poses.keys()) + for i in range(len(sort_frame_idx)-1): + cur_frame_idx = sort_frame_idx[i] + next_frame_idx = sort_frame_idx[i+1] + P1 = poses[cur_frame_idx] + P2 = poses[next_frame_idx] + dx = P1[0, 3] - P2[0, 3] + dy = P1[1, 3] - P2[1, 3] + dz = P1[2, 3] - P2[2, 3] + dist.append(dist[i]+np.sqrt(dx**2+dy**2+dz**2)) + return dist + + def rotation_error(self, pose_error): + a = pose_error[0, 0] + b = pose_error[1, 1] + c = pose_error[2, 2] + d = 0.5*(a+b+c-1.0) + rot_error = np.arccos(max(min(d, 1.0), -1.0)) + return rot_error + + def translation_error(self, pose_error): + dx = pose_error[0, 3] + dy = pose_error[1, 3] + dz = pose_error[2, 3] + return np.sqrt(dx**2+dy**2+dz**2) + + def last_frame_from_segment_length(self, dist, first_frame, len_): + for i in range(first_frame, len(dist), 1): + if dist[i] > (dist[first_frame] + len_): + return i + return -1 + + def calc_sequence_errors(self, poses_gt, poses_result): + err = [] + dist = self.trajectory_distances(poses_gt) + self.step_size = 10 + + for first_frame in range(0, len(poses_gt), self.step_size): + for i in range(self.num_lengths): + len_ = self.lengths[i] + last_frame = self.last_frame_from_segment_length(dist, first_frame, len_) + + # ---------------------------------------------------------------------- + # Continue if sequence not long enough + # ---------------------------------------------------------------------- + if last_frame == -1 or not(last_frame in poses_result.keys()) or not(first_frame in poses_result.keys()): + continue + + # ---------------------------------------------------------------------- + # compute rotational and translational errors + # ---------------------------------------------------------------------- + pose_delta_gt = np.dot(np.linalg.inv(poses_gt[first_frame]), poses_gt[last_frame]) + pose_delta_result = np.dot(np.linalg.inv(poses_result[first_frame]), poses_result[last_frame]) + pose_error = np.dot(np.linalg.inv(pose_delta_result), pose_delta_gt) + + r_err = self.rotation_error(pose_error) + t_err = self.translation_error(pose_error) + + # ---------------------------------------------------------------------- + # compute speed + # ---------------------------------------------------------------------- + num_frames = last_frame - first_frame + 1.0 + speed = len_/(0.1*num_frames) + + err.append([first_frame, r_err/len_, t_err/len_, len_, speed]) + return err + + def save_sequence_errors(self, err, file_name): + fp = open(file_name, 'w') + for i in err: + line_to_write = " ".join([str(j) for j in i]) + fp.writelines(line_to_write+"\n") + fp.close() + + def compute_overall_err(self, seq_err): + t_err = 0 + r_err = 0 + + seq_len = len(seq_err) + + for item in seq_err: + r_err += item[1] + t_err += item[2] + ave_t_err = t_err / seq_len + ave_r_err = r_err / seq_len + return ave_t_err, ave_r_err + + def plotPath(self, seq, poses_gt, poses_result): + plot_keys = ["Ground Truth", "Ours"] + fontsize_ = 20 + plot_num =-1 + + poses_dict = {} + poses_dict["Ground Truth"] = poses_gt + poses_dict["Ours"] = poses_result + + fig = plt.figure() + ax = plt.gca() + ax.set_aspect('equal') + + for key in plot_keys: + pos_xz = [] + # for pose in poses_dict[key]: + for frame_idx in sorted(poses_dict[key].keys()): + pose = poses_dict[key][frame_idx] + pos_xz.append([pose[0,3], pose[2,3]]) + pos_xz = np.asarray(pos_xz) + plt.plot(pos_xz[:,0], pos_xz[:,1], label = key) + + plt.legend(loc="upper right", prop={'size': fontsize_}) + plt.xticks(fontsize=fontsize_) + plt.yticks(fontsize=fontsize_) + plt.xlabel('x (m)', fontsize=fontsize_) + plt.ylabel('z (m)', fontsize=fontsize_) + fig.set_size_inches(10, 10) + png_title = "sequence_"+(seq) + plt.savefig(self.plot_path_dir + "/" + png_title + ".pdf", bbox_inches='tight', pad_inches=0) + # plt.show() + + def compute_segment_error(self, seq_errs): + # ---------------------------------------------------------------------- + # This function calculates average errors for different segment. + # ---------------------------------------------------------------------- + + segment_errs = {} + avg_segment_errs = {} + for len_ in self.lengths: + segment_errs[len_] = [] + # ---------------------------------------------------------------------- + # Get errors + # ---------------------------------------------------------------------- + for err in seq_errs: + len_ = err[3] + t_err = err[2] + r_err = err[1] + segment_errs[len_].append([t_err, r_err]) + # ---------------------------------------------------------------------- + # Compute average + # ---------------------------------------------------------------------- + for len_ in self.lengths: + if segment_errs[len_] != []: + avg_t_err = np.mean(np.asarray(segment_errs[len_])[:, 0]) + avg_r_err = np.mean(np.asarray(segment_errs[len_])[:, 1]) + avg_segment_errs[len_] = [avg_t_err, avg_r_err] + else: + avg_segment_errs[len_] = [] + return avg_segment_errs + + def scale_optimization(self, gt, pred): + """ Optimize scaling factor + Args: + gt (4x4 array dict): ground-truth poses + pred (4x4 array dict): predicted poses + Returns: + new_pred (4x4 array dict): predicted poses after optimization + """ + pred_updated = copy.deepcopy(pred) + xyz_pred = [] + xyz_ref = [] + for i in pred: + pose_pred = pred[i] + pose_ref = gt[i] + xyz_pred.append(pose_pred[:3, 3]) + xyz_ref.append(pose_ref[:3, 3]) + xyz_pred = np.asarray(xyz_pred) + xyz_ref = np.asarray(xyz_ref) + scale = scale_lse_solver(xyz_pred, xyz_ref) + for i in pred_updated: + pred_updated[i][:3, 3] *= scale + return pred_updated + + def eval(self, gt_txt, result_txt, seq=None): + # gt_dir: the directory of groundtruth poses txt + # results_dir: the directory of predicted poses txt + self.plot_path_dir = os.path.dirname(result_txt) + "/plot_path" + if not os.path.exists(self.plot_path_dir): + os.makedirs(self.plot_path_dir) + + self.gt_txt = gt_txt + + ave_t_errs = [] + ave_r_errs = [] + + poses_result = self.loadPoses(result_txt) + poses_gt = self.loadPoses(self.gt_txt) + + # Pose alignment to first frame + idx_0 = sorted(list(poses_result.keys()))[0] + pred_0 = poses_result[idx_0] + gt_0 = poses_gt[idx_0] + for cnt in poses_result: + poses_result[cnt] = np.linalg.inv(pred_0) @ poses_result[cnt] + poses_gt[cnt] = np.linalg.inv(gt_0) @ poses_gt[cnt] + + # get XYZ + xyz_gt = [] + xyz_result = [] + for cnt in poses_result: + xyz_gt.append([poses_gt[cnt][0, 3], poses_gt[cnt][1, 3], poses_gt[cnt][2, 3]]) + xyz_result.append([poses_result[cnt][0, 3], poses_result[cnt][1, 3], poses_result[cnt][2, 3]]) + xyz_gt = np.asarray(xyz_gt).transpose(1, 0) + xyz_result = np.asarray(xyz_result).transpose(1, 0) + + r, t, scale = umeyama_alignment(xyz_result, xyz_gt, True) + + align_transformation = np.eye(4) + align_transformation[:3:, :3] = r + align_transformation[:3, 3] = t + + for cnt in poses_result: + poses_result[cnt][:3, 3] *= scale + poses_result[cnt] = align_transformation @ poses_result[cnt] + + # ---------------------------------------------------------------------- + # compute sequence errors + # ---------------------------------------------------------------------- + seq_err = self.calc_sequence_errors(poses_gt, poses_result) + + # ---------------------------------------------------------------------- + # Compute segment errors + # ---------------------------------------------------------------------- + avg_segment_errs = self.compute_segment_error(seq_err) + + # ---------------------------------------------------------------------- + # compute overall error + # ---------------------------------------------------------------------- + ave_t_err, ave_r_err = self.compute_overall_err(seq_err) + print("Sequence: " + seq) + print("Translational error (%): ", ave_t_err*100) + print("Rotational error (deg/100m): ", ave_r_err/np.pi*180*100) + ave_t_errs.append(ave_t_err) + ave_r_errs.append(ave_r_err) + + # Plotting + self.plotPath(seq, poses_gt, poses_result) + + print("-------------------- For Copying ------------------------------") + for i in range(len(ave_t_errs)): + print("{0:.2f}".format(ave_t_errs[i]*100)) + print("{0:.2f}".format(ave_r_errs[i]/np.pi*180*100)) + + + if __name__ == '__main__': import argparse parser = argparse.ArgumentParser(description='KITTI evaluation') + parser.add_argument('--dataset', type=str, required=True, help="dataset name") parser.add_argument('--gt_txt', type=str, required=True, help="Groundtruth directory") parser.add_argument('--result_txt', type=str, required=True, help="Result directory") parser.add_argument('--seq', type=str, help="sequences to be evaluated", default='09') args = parser.parse_args() - - eval_tool = KittiEvalOdom() - eval_tool.eval(args.gt_txt, args.result_txt, seq=args.seq) + if 'kitti' in args.dataset: + eval_tool = KittiEvalOdom() + eval_tool.eval(args.gt_txt, args.result_txt, seq=args.seq) + if 'euroc' in args.dataset: + eval_tool = EurocEvalOdom() + eval_tool.eval(args.gt_txt, args.result_txt, seq=args.seq) diff --git a/infer_vo.py b/infer_vo.py index 7b83cc26..7540b38c 100644 --- a/infer_vo.py +++ b/infer_vo.py @@ -21,7 +21,8 @@ def save_traj(path, poses): path: file path of saved poses poses: list of global poses """ - f = open(path, 'w') + os.makedirs(os.path.dirname(path), exist_ok=True) + f = open(path, 'w+') for i in range(len(poses)): pose = poses[i].flatten()[:12] # [3x4] line = " ".join([str(j) for j in pose]) @@ -74,17 +75,31 @@ def cv_triangulation(matches, pose): return points1, points2 class infer_vo(): - def __init__(self, seq_id, sequences_root_dir): + def __init__(self, dataset, seq_id, sequences_root_dir): + self.dataset = dataset self.img_dir = sequences_root_dir #self.img_dir = '/home4/zhaow/data/kitti_odometry/sampled_s4_sequences/' self.seq_id = seq_id - self.raw_img_h = 370.0#320 - self.raw_img_w = 1226.0#1024 + if 'kitti' in self.dataset or 'KITTI' in self.dataset or 'Kitti' in self.dataset: + self.raw_img_h = 370.0#320 + self.raw_img_w = 1226.0#1024 + if 'euroc' in self.dataset: + self.raw_img_h = 480.0#320 + self.raw_img_w = 752.0#1024 + if 'aqualoc' in self.dataset: + self.raw_img_h = 608.0#320 + self.raw_img_w = 968.0#1024 + if 'mimir' in self.dataset: + self.raw_img_h = 540.0#320 + self.raw_img_w = 720.0#1024 + if 'tum' in self.dataset: + self.raw_img_h = 480.0#320 + self.raw_img_w = 640.0#1024 self.new_img_h = 256#320 self.new_img_w = 832#1024 self.max_depth = 50.0 self.min_depth = 0.0 - self.cam_intrinsics = self.read_rescale_camera_intrinsics(os.path.join(self.img_dir, seq_id) + '/calib.txt') + self.cam_intrinsics = self.read_rescale_camera_intrinsics(os.path.join(self.img_dir, seq_id)) self.flow_pose_ransac_thre = 0.1 #0.2 self.flow_pose_ransac_times = 10 #5 self.flow_pose_min_flow = 5 @@ -97,19 +112,62 @@ def __init__(self, seq_id, sequences_root_dir): self.PnP_ransac_times = 5 def read_rescale_camera_intrinsics(self, path): - raw_img_h = self.raw_img_h - raw_img_w = self.raw_img_w - new_img_h = self.new_img_h - new_img_w = self.new_img_w - with open(path, 'r') as f: - lines = f.readlines() - data = lines[-1].strip('\n').split(' ')[1:] - data = [float(k) for k in data] - data = np.array(data).reshape(3,4) - cam_intrinsics = data[:3,:3] - cam_intrinsics[0,:] = cam_intrinsics[0,:] * new_img_w / raw_img_w - cam_intrinsics[1,:] = cam_intrinsics[1,:] * new_img_h / raw_img_h - return cam_intrinsics + if 'kitti' in self.dataset or 'KITTI' in self.dataset or 'Kitti' in self.dataset: + path = os.path.join(path, 'calib.txt') + raw_img_h = self.raw_img_h + raw_img_w = self.raw_img_w + new_img_h = self.new_img_h + new_img_w = self.new_img_w + with open(path, 'r') as f: + lines = f.readlines() + data = lines[-1].strip('\n').split(' ')[1:] + data = [float(k) for k in data] + data = np.array(data).reshape(3,4) + cam_intrinsics = data[:3,:3] + cam_intrinsics[0,:] = cam_intrinsics[0,:] * new_img_w / raw_img_w + cam_intrinsics[1,:] = cam_intrinsics[1,:] * new_img_h / raw_img_h + return cam_intrinsics + elif 'aqualoc' in self.dataset: + path = os.path.dirname(os.path.dirname(os.path.dirname(path))) + intrinsics_file = os.path.join(path,'archaeo_calibration_files',"archaeo_camera_calib.yaml") + with open(intrinsics_file, 'r') as file: + sensor = yaml.safe_load(file) + sensor = sensor["cam0"] + fx = sensor['intrinsics'][0] + fy = sensor['intrinsics'][1] + cx = sensor['intrinsics'][2] + cy = sensor['intrinsics'][3] + + cam_intrinsics = np.array([[fx,0,cx],[0,fy,cy],[0,0,1]]) + cam_intrinsics[0,:] = cam_intrinsics[0,:] * self.new_img_w / self.raw_img_w + cam_intrinsics[1,:] = cam_intrinsics[1,:] * self.new_img_h / self.raw_img_w + return cam_intrinsics + elif 'euroc' in self.dataset: + intrinsics_file = os.path.join(path, 'mav0', 'cam0','sensor.yaml') + with open(intrinsics_file, 'r') as file: + sensor = yaml.safe_load(file) + fx = sensor['intrinsics'][0] + fy = sensor['intrinsics'][1] + cx = sensor['intrinsics'][2] + cy = sensor['intrinsics'][3] + + cam_intrinsics = np.array([[fx,0,cx],[0,fy,cy],[0,0,1]]) + cam_intrinsics[0,:] = cam_intrinsics[0,:] * self.new_img_w / self.raw_img_w + cam_intrinsics[1,:] = cam_intrinsics[1,:] * self.new_img_h / self.raw_img_w + return cam_intrinsics + elif 'mimir' in self.dataset: + intrinsics_file = os.path.join(path, 'auv0', 'rgb','cam0') + with open(os.path.join(intrinsics_file, "sensor.yaml"), 'r') as stream: + sensor = yaml.safe_load(stream) + cam_intrinsics = np.asarray(sensor['intrinsics']).reshape((3,3)) + cam_intrinsics[0,:] = cam_intrinsics[0,:] * self.new_img_w / self.raw_img_w + cam_intrinsics[1,:] = cam_intrinsics[1,:] * self.new_img_h / self.raw_img_w + return cam_intrinsics + elif 'tum' in self.dataset: + cam_intrinsics = np.array([[525.0,0,319.5],[0,525.0,239.5],[0,0,1]]) + cam_intrinsics[0,:] = cam_intrinsics[0,:] * self.new_img_w / self.raw_img_w + cam_intrinsics[1,:] = cam_intrinsics[1,:] * self.new_img_h / self.raw_img_w + return cam_intrinsics def load_images(self): path = self.img_dir @@ -117,14 +175,63 @@ def load_images(self): new_img_h = self.new_img_h new_img_w = self.new_img_w seq_dir = os.path.join(path, seq) - image_dir = os.path.join(seq_dir, 'image_2') - num = len(os.listdir(image_dir)) - images = [] - for i in range(num): - image = cv2.imread(os.path.join(image_dir, '%.6d'%i)+'.png') - image = cv2.resize(image, (new_img_w, new_img_h)) - images.append(image) - return images + + if 'kitti' in self.dataset or 'KITTI' in self.dataset or 'Kitti' in self.dataset: + image_dir = os.path.join(seq_dir, 'image_2') + num = len(os.listdir(image_dir)) + images = [] + for i in range(num): + image = cv2.imread(os.path.join(image_dir, '%.6d'%i)+'.png') + image = cv2.resize(image, (new_img_w, new_img_h)) + images.append(image) + return images + + elif 'euroc' in self.dataset: + image_dir = os.path.join(seq_dir, 'mav0', 'cam0', 'data') + num = len(os.listdir(image_dir)) + images = [] + for img in sorted(os.listdir(image_dir)): + image = cv2.imread(os.path.join(image_dir, img),1) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + image = cv2.resize(image, (new_img_w, new_img_h)) + images.append(image) + return images + + elif 'mimir' in self.dataset: + image_dir = os.path.join(seq_dir, 'auv0', 'rgb', 'cam0', 'data') + num = len(os.listdir(image_dir)) + images = [] + for img in sorted(os.listdir(image_dir)): + if 'png' in img: + image = cv2.imread(os.path.join(image_dir, img),1) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + image = cv2.resize(image, (new_img_w, new_img_h)) + images.append(image) + return images + + elif 'tum' in self.dataset: + image_dir = os.path.join(seq_dir, 'rgb') + num = len(os.listdir(image_dir)) + images = [] + for img in sorted(os.listdir(image_dir)): + if 'png' in img: + image = cv2.imread(os.path.join(image_dir, img)) + # print('IMG SHAPE', image.shape) + # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + image = cv2.resize(image, (new_img_w, new_img_h)) + images.append(image) + return images + + elif 'aqualoc' in self.dataset: + image_dir = seq_dir + num = len(os.listdir(image_dir)) + images = [] + for img in sorted(os.listdir(image_dir)): + image = cv2.imread(os.path.join(image_dir, img),1) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + image = cv2.resize(image, (new_img_w, new_img_h)) + images.append(image) + return images def get_prediction(self, img1, img2, model, K, K_inv, match_num): # img1: [3,H,W] K: [3,3] @@ -303,7 +410,8 @@ def solve_pose_flow(self, xy1, xy2): with open(args.config_file, 'r') as f: cfg = yaml.safe_load(f) - cfg['dataset'] = 'kitti_odo' + # cfg['dataset'] = 'kitti_odo' + dataset = cfg['dataset'] # copy attr into cfg for attr in dir(args): if attr[:2] != '__': @@ -326,7 +434,7 @@ def __init__(self): print('Model Loaded.') print('Testing VO.') - vo_test = infer_vo(args.sequence, args.sequences_root_dir) + vo_test = infer_vo(dataset,args.sequence, args.sequences_root_dir) images = vo_test.load_images() print('Images Loaded. Total ' + str(len(images)) + ' images found.') poses = vo_test.process_video(images, model)