ETH-Semester-Project/src/main.py at main · erictubo/ETH-Semester-Project · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Main entry point for the camera localization pipeline.

This script organizes the full workflow:
- setting up camera objects
- loading and processing map and frame data
- creating and visualizing the railway, generating keyframes
- running the optimization routine

Adjust parameters and interact with the pipeline here for experiments or evaluation.
"""

# External Libraries
import os
import pickle
import numpy as np
import cv2
import matplotlib.pyplot as plt

# Methods & Data
import data
from camera import Camera
from railway import Railway

from keyframe import Frame, Keyframe
from transformation import Transformation

import sys
src_directory = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, src_directory)

import cpp.optimization


def create_frames(ids: list[int], include_elevation=True) -> list[Frame]:
    """
    Create a list of basic frames for processing of the Railway object.
    Args:
        ids: List of frame IDs.
        include_elevation: Whether to include elevation data.
    Returns:
        List of Frame objects.
    """
    frames: list[Frame] = []
    for id in ids:
        id = int(id)
        frame = Frame(id, include_elevation)
        if frame.gps.is_missing_data:
            print("Frame #", id, "missing data >> skipped")
            np.delete(ids, np.where(ids == id))
            continue
        print("Frame #", id, "created")
        frames.append(frame)
    return frames


def create_keyframes(ids: list[int], camera_0: Camera, camera_1: Camera, railway: Railway) -> list[Keyframe]:
    """
    Create a list of sophisticated keyframes from the same camera for optimisation.
    Requires an existing Railway object (to create an attribute of local points).
    Args:
        ids: List of keyframe IDs.
        camera_0: Camera object for camera 0.
        camera_1: Camera object for camera 1.
        railway: Railway object.
    Returns:
        List of Keyframe objects.
    """
    keyframes: list[Keyframe] = []
    for id in ids:
        id = int(id)
        keyframe = Keyframe(id, camera_0, camera_1, railway)
        if keyframe.gps.is_missing_data:
            print("Keyframe #", id, "missing data >> skipped")
            np.delete(ids, np.where(ids == id))
            continue
        print("Keyframe #", id, "created")
        keyframes.append(keyframe)
    return keyframes


def main():
    """
    Main workflow for the camera localization pipeline:

    1. Set up camera objects and initial pose
    2. Create/load Railway object from frames
    3. Visualize railway and frame data (optional)
    4. Create keyframes for optimization
    5. Visualize initial reprojections (before optimization)
    6. Optimize camera poses using C++ backend
    7. Compute and print stereo camera transformation & accuracy
    8. Visualize final reprojections (after optimization)
    """


    """
    1. Set up camera objects and initial pose
    """

    # Camera pose: initial guess
    # Camera frame (camf) = position of GPS frame with rotation to approximate camera frame
    # Camera frame in GPS frame
    R_gps_camf = np.array([[0, 0, 1], [-1, 0, 0], [0, -1, 0]])
    t_gps_camf = np.array([[0],[0],[0]])
    H_gps_camf = Transformation.compile_homogeneous_transformation(R_gps_camf, t_gps_camf)

    # offset_camf_cam = [0, -1.70, 0]
    # angles_camf_cam = [-0.00, -0.01, 0.0]

    offset_camf_cam = [0., -1.5, -8.]
    angles_camf_cam = [0., 0., 0.]

    # Median estimates from report:
    # Camera 1
    # offset_camf_cam = [1.28, -1.05, 0.04]
    # angles_camf_cam = [-0.31, -0.48, 0.0]
    # Camera 2
    # offset_camf_cam = [0.97, -0.99, 0.48]
    # angles_camf_cam = [0.73, 1.57, 0.0]

    # Camera in camera frame
    t_camf_cam = np.array(offset_camf_cam).transpose()
    R_camf_cam = Transformation.convert_euler_angles(angles_camf_cam, to_type="rotation matrix")
    H_camf_cam = Transformation.compile_homogeneous_transformation(R_camf_cam, t_camf_cam)

    initial_H_gps_cam = H_gps_camf @ H_camf_cam

    # Camera parameters:
    # - specific intrinsics, distortion coefficients, initial guess of pose
    # - camera poses will automatically be optimised separately for each camera,
    #   assuming the keyframes are initiallised with the correct corresponding cameras
    camera_0 = Camera(0, image_size = (1920, 1200),
                      focal_length = [2*698.8258566937119, 2*698.6495855393557],
                      principal_point = [2*492.9705850660823, 2*293.3927615928415],
                      distortion_coefficients = [0.15677829213401465, 0.1193983032005652, -0.011287926707786692, 0.23013426724923478],
                      initial_H_gps_cam=initial_H_gps_cam)

    camera_1 = Camera(1, image_size = (1920, 1200),
                      focal_length = [2*698.5551622016458, 2*698.6714179616163],
                      principal_point = [2*489.7761220331873, 2*297.9103051604121],
                      distortion_coefficients = [0.16213733886648546, 0.11615082942281711, -0.05779296718987261, 0.324051803251489],
                      initial_H_gps_cam=initial_H_gps_cam)

    # cameras = [camera_0, camera_1]


    """
    2. Create/load Railway object from frames
    """

    # Dense frames for Railway processing
    frame_ids = np.arange(0, 5495, 10)
    frame_ids = np.arange(4000, 4500, 1)

    # Railway object: process new (and save to file) or load existing
    max_gap = 20
    r_ahead = 100
    r_behind = 50

    if (not os.path.isfile(data.railway_object_file)) or (input("Create a new railway object? [y/n]: ") == 'y'):
        print("Creating new railway object")
        frames = create_frames(frame_ids, include_elevation=False)
        railway = Railway(frames, max_gap=max_gap, r_ahead=r_ahead, r_behind=r_behind)
        with open(data.railway_object_file, 'wb') as file:
            pickle.dump(railway, file)
            print("Railway object sucessfully saved to file.")
    else:
        print("Loading railway object from file")
        with open(data.railway_object_file, 'rb') as file:
            railway = pickle.load(file)

    # railway.plot_map()


    """
    3. Visualize railway and frame data (optional)
    """

    if input("Visualize railway / frames? [y/n]: ") == 'y':
        if input("2D? [y/n] ") == 'y':
            frames = create_frames(frame_ids, include_elevation=False)
            # railway.visualize_2D(show_tracks=False, frames=[])
            # railway.visualize_2D(show_tracks=True, frames=[])
            railway.visualize_2D(show_tracks=True, frames=frames)

        if input("GPS Height vs. Elevation? [y/n] ") == 'y':
            frames = create_frames(frame_ids, include_elevation=True)
            # for frame in frames:
            #     point = frame.gps.t_w_gps
            #     plt.scatter(frame.id, point[2], s=3, c='black')
            #     plt.scatter(frame.id, frame.gps.elevation, s=3, c='red')
            #     plt.grid(color='grey', linestyle='-', linewidth=0.5, alpha=0.5)
            #     plt.xticks(np.arange(0, 5500, 500))
            #     plt.yticks(np.arange(30, 45, 1))
            #     plt.xlabel("Frame ID")
            #     plt.ylabel("Height [m]")
            #     plt.legend(["GPS height", "Local elevation"])
            # plt.show()

            for frame in frames:
                point = frame.gps.t_w_gps
                plt.scatter(frame.id, point[2], s=3, c='blue')
                plt.scatter(frame.id, frame.gps.elevation, s=3, c='red')
                plt.xticks(np.arange(4000, 4500, 100))
                # plt.yticks(np.arange(33, 37, 0.2))
                plt.xlabel("Frame ID")
                plt.ylabel("Height [m]")
                plt.legend(["GPS height", "Local elevation"])
            plt.show()

        if input("GPS Rotation? [y/n] ") == 'y':
            frames = create_frames(frame_ids, include_elevation=False)

            yaw_0 = frames[0].gps.angles_deg[0]
            pitch_0 = frames[0].gps.angles_deg[1]
            roll_0 = frames[0].gps.angles_deg[2]

            yaw_0 = 0
            pitch_0 = 0
            roll_0 = 0

            for frame in frames:
                yaw = frame.gps.angles_deg[0] - yaw_0
                pitch = frame.gps.angles_deg[1] - pitch_0
                roll = frame.gps.angles_deg[2] - roll_0

                plt.scatter(frame.id, roll, s=3, c='red')
                plt.scatter(frame.id, pitch, s=3, c='green')
                plt.scatter(frame.id, yaw, s=3, c='blue')
                # plt.grid(color='grey', linestyle='-', linewidth=0.5, alpha=0.5)
            plt.xlabel("Frame ID")
            plt.ylabel("Rotation [deg]")
            plt.legend(["Roll", "Pitch", "Yaw"])
            plt.show()

        if input("3D? [y/n] ") == 'y':
            frames = create_frames(frame_ids, camera_0, include_elevation=True)
            railway.visualize_3D(frames)

        if input("Continue to optimize camera pose? [y/n]: ") == 'n':
            exit()


    """
    4. Create keyframes for optimization
    """

    # Sparse keyframes for optimization -- subset of prior frames
    # keyframe_ids = [780, 1090, 1430, 2770, 3110, 4500, 5410, 5420, 5430, 5440, 5450, 5460, 5470, 5480]
    # keyframe_ids = [780, 3110, 5410]
    # keyframe_ids = [5410, 5420, 5430, 5440, 5450, 5460, 5470, 5480]
    # keyframe_ids = [5410, 5420, 5430]
    # keyframe_ids = [5410, 5420, 5430]
    # keyframe_ids = [780, 1090, 1430]
    # keyframe_ids = [490, 570, 950]
    # keyframe_ids = [570, 950]
    # keyframe_ids = [5410]
    keyframe_ids: list[int] = [490, 570, 950]

    # for keyframe_id in keyframe_ids:
    #     assert keyframe_id in frame_ids, "Keyframe ID not in frame IDs"

    keyframes = create_keyframes(keyframe_ids, camera_0, camera_1, railway)


    """
    5. Visualize initial reprojections (before optimization)
    """

    print("Reprojecting points using initial camera pose")

    for i, keyframe in enumerate(keyframes):

        annotated_visual_0 = keyframe.annotation_0.visualize_splines_and_points()
        annotated_visual_1 = keyframe.annotation_1.visualize_splines_and_points()

        cv2.imwrite(data.path_to_visualization_initial + keyframe.filename + "_annotated_0.jpg", annotated_visual_0)
        cv2.imwrite(data.path_to_visualization_initial + keyframe.filename + "_annotated_1.jpg", annotated_visual_1)

        reprojected_visual_0 = keyframe.visualize_reprojected_and_original_points(camera_0)
        reprojected_visual_1 = keyframe.visualize_reprojected_and_original_points(camera_1)

        cv2.imwrite(data.path_to_visualization_initial + keyframe.filename + "_reprojected_0.jpg", reprojected_visual_0)
        cv2.imwrite(data.path_to_visualization_initial + keyframe.filename + "_reprojected_1.jpg", reprojected_visual_1)

        combined_visual_0 = keyframe.visualize_reprojected_points(camera_0, keyframe.annotation_0.visualize_splines())
        combined_visual_1 = keyframe.visualize_reprojected_points(camera_1, keyframe.annotation_1.visualize_splines())

        cv2.imwrite(data.path_to_visualization_initial + keyframe.filename + "_combined_0.jpg", combined_visual_0)
        cv2.imwrite(data.path_to_visualization_initial + keyframe.filename + "_combined_1.jpg", combined_visual_1)


    """
    6. Optimize camera poses using C++ backend
    """

    print("Optimizing camera pose")

    iterations = 10

    # Camera 0

    cpp.optimization.reset_keyframes()

    for keyframe in keyframes:
        assert keyframe.camera_0 == camera_0
        cpp.optimization.add_keyframe(
            keyframe.filename,
            str(keyframe.camera_0.id),
            np.asarray(keyframe.image_0),
            keyframe.annotation_0.array,
            keyframe.points_gps_array_0)

    new_camera_pose = cpp.optimization.update_camera_pose(camera_0.pose_vector, camera_0.intrinsics_vector, iterations, data.path_to_visualization_optimization)
    camera_0.update_pose(new_camera_pose)


    # Camera 1

    cpp.optimization.reset_keyframes()

    for keyframe in keyframes:
        assert keyframe.camera_1 == camera_1
        cpp.optimization.add_keyframe(
            keyframe.filename,
            str(keyframe.camera_1.id),
            np.asarray(keyframe.image_1),
            keyframe.annotation_1.array,
            keyframe.points_gps_array_1)

    new_camera_pose = cpp.optimization.update_camera_pose(camera_1.pose_vector, camera_1.intrinsics_vector, iterations, data.path_to_visualization_optimization)
    camera_1.update_pose(new_camera_pose)


    """
    7. Compute and print stereo camera transformation & accuracy
    """

    for camera in [camera_0, camera_1]:
        print("Final pose of camera #", camera.id, ":\n")
        print("R_gps_cam:\n", camera.H_gps_cam[0:3, 0:3])
        print("t_gps_cam:\n", camera.H_gps_cam[0:3, 3])
        print("q_gps_cam:\n", Transformation.convert_rotation_matrix(camera.H_gps_cam[0:3, 0:3], to_type='quaternion'))


    # Transformation between cameras

    print("Transformation between stereo cameras:\n")
    H_cam1_cam0 = camera_1.H_cam_gps @ camera_0.H_gps_cam
    R_cam1_cam0, t_cam1_cam0 = Transformation.separate_homogeneous_transformation(H_cam1_cam0)
    q_cam1_cam0 = Transformation.convert_rotation_matrix(R_cam1_cam0, to_type='quaternion')
    print("R_cam1_cam0:\n", R_cam1_cam0)
    print("t_cam1_cam0:\n", t_cam1_cam0)
    print("q_cam1_cam0:\n", q_cam1_cam0)


    # Given calibration between cameras

    print("Given calibration between stereo cameras")
    t_1_0 = np.array([0.30748946, 0.00190947, 0.00966052])
    q_1_0 = np.array([0.00666986, 0.02121604, -0.00106887, 0.99975209])
    R_1_0 = Transformation.convert_quaternions(q_1_0, to_type='rotation matrix')
    print("R_cam1_cam0:\n", R_1_0)
    print("t_cam1_cam0:\n", t_1_0)
    print("q_cam1_cam0:\n", q_1_0)

    # Absolute position error

    print("Absolute position error:\n")
    t_error = t_cam1_cam0 - t_1_0
    print("t_error:\n", t_error)


    """
    8. Visualize final reprojections (after optimization)
    """

    print("Reprojecting points using final camera pose")

    for i, keyframe in enumerate(keyframes):

        # annotated_visual_0 = keyframe.annotation_0.visualize_splines_and_points()
        # annotated_visual_1 = keyframe.annotation_1.visualize_splines_and_points()

        # cv2.imwrite(data.path_to_visualization_final + keyframe.filename + "_annotated_0.jpg", annotated_visual_0)
        # cv2.imwrite(data.path_to_visualization_final + keyframe.filename + "_annotated_1.jpg", annotated_visual_1)

        # reprojected_visual_0 = keyframe.visualize_reprojected_and_original_points(camera_0)
        # reprojected_visual_1 = keyframe.visualize_reprojected_and_original_points(camera_1)

        # cv2.imwrite(data.path_to_visualization_final + keyframe.filename + "_reprojected_0.jpg", reprojected_visual_0)
        # cv2.imwrite(data.path_to_visualization_final + keyframe.filename + "_reprojected_1.jpg", reprojected_visual_1)

        combined_visual_0 = keyframe.visualize_reprojected_points(camera_0, keyframe.annotation_0.visualize_splines())
        combined_visual_1 = keyframe.visualize_reprojected_points(camera_1, keyframe.annotation_1.visualize_splines())

        cv2.imwrite(data.path_to_visualization_final + keyframe.filename + "_combined_0.jpg", combined_visual_0)
        cv2.imwrite(data.path_to_visualization_final + keyframe.filename + "_combined_1.jpg", combined_visual_1)


if __name__ == "__main__":
    main()