一、基本信息与AirSim图像API的简单demo

Image API.py

import airsim import numpy as np import cv2 # 连接到AirSim模拟器 client = airsim.MultirotorClient() client.confirmConnection() # 一次获取一张图片 # response = client.simGetImage(camera_name, image_type, vehicle_name='') # 无人机摄像头编号及含义(camera_name) ''' 摄像机0：无人机的前方视角 摄像机1：无人机的后方视角 摄像机2：无人机的底部视角，可以用于检测地面和障碍物 摄像机3：无人机的顶部视角，可以用于拍摄俯视图或进行目标跟踪 摄像机4：无人机的左侧视角 摄像机5：无人机的右侧视角 ''' # 使用图像API能够获取到的8种图像类型 ''' Scene：场景视图图片，即俯视图，可以看到整个场景的情况。 airsim.ImageType.Scene DepthPlanar：平面深度图片，可以获取场景中每个像素点到相机的距离。 airsim.ImageType.DepthPlanar DepthPerspective：透视深度图片，可以获取场景中每个像素点到相机的距离。 airsim.ImageType.DepthPerspective DepthVis：深度可视化图片，可以将深度图像转换为RGB图像，方便观察。 airsim.ImageType.DepthVis DisparityNormalized：视差归一化图片，可以获取场景中每个像素点的视差值，用于计算深度信息。 airsim.ImageType.DisparityNormalized Segmentation：分割图片，可以将场景中的不同物体或区域分别标记出来，方便进行目标检测和分割。 airsim.ImageType.Segmentation SurfaceNormals：表面法线图片，可以获取场景中每个像素点的法线方向，用于计算光照和阴影效果。 airsim.ImageType.SurfaceNormals Infrared：红外线图片，可以获取场景中的红外线图像，用于热成像和红外线探测等应用。 airsim.ImageType.Infrared ''' # 1.直接使用simGetImage获取PNG格式的彩色图，并保存成 .png 格式的图片文件。 # response = client.simGetImage('0', airsim.ImageType.Scene, vehicle_name='Drone') # f = open('screen/scene.png', 'wb') # f.write(response) # f.close() # 2.使用 simGetImages 获取PNG格式的分割图，并保存成 .png 格式的图片文件。 # responses = client.simGetImages( # [airsim.ImageRequest(0, airsim.ImageType.Segmentation, pixels_as_float=False, compress=True)]) # f = open('screen/seg.png', 'wb') # f.write(responses[0].image_data_uint8) # f.close() ''' 图像类型 compress pixels_as_float 适合保存的图片类型 PNG格式 True False 彩色图、分割图、表面法线图、红外图 Array格式 False False 彩色图、分割图、表面法线图、红外图 浮点型格式 False True 深度图 ''' # 3.使用 simGetImages 同时获取PNG格式的红外图和表面法线图，并保存成2个.png格式的图片文件。 # responses = client.simGetImages( # [airsim.ImageRequest(0, airsim.ImageType.Infrared, pixels_as_float=False, compress=True), # airsim.ImageRequest(0, airsim.ImageType.SurfaceNormals, pixels_as_float=False, compress=True)]) # # print(responses) # # 保存红外图 # f = open('screen/infrared.png', 'wb') # f.write(responses[0].image_data_uint8) # f.close() # # 保存表面法线图 # f = open('screen/surface.png', 'wb') # f.write(responses[1].image_data_uint8) # f.close() # 4.保存Array格式图像 # # 读取图像数据，array格式 # responses = client.simGetImages([ # airsim.ImageRequest(0, airsim.ImageType.Scene, pixels_as_float=False, compress=False)]) # # 将bytes格式转换为 array格式 fromstring # img_1d = np.frombuffer(responses[0].image_data_uint8, dtype=np.uint8) # img_bgr = img_1d.reshape(responses[0].height, responses[0].width, 3) # # 保存为图片文件 # cv2.imwrite('screen/scene2.png', img_bgr) # 保存为.png格式的图像文件 # cv2.imwrite('screen/scene2.jpg', img_bgr) # 保存为.jpg格式的图像文件 # cv2.imwrite('screen/scene2.tif', img_bgr) # 保存为.tif格式的图像文件 # cv2.imwrite('screen/scene2.bmp', img_bgr) # 保存为.bmp格式的图像文件 ''' .jpg 格式：不带透明通道的有损压缩格式，广泛应用于互联网和数码相机领域； .png 格式：便携式网络图形，无损压缩的位图，有较高的压缩比； .tif 格式：非失真的压缩格式，占用空间较大，通常用于书籍和海报等教专业的领域； .bmp 格式：是Windows操作系统中的标准图像文件格式，通常不压缩，文件所占空间较大。 '''

二、随机设置无人机位姿并获取图像

random_pictures_get_and_save_pose.py

import airsim import os import numpy as np import pandas as pd # 连接到AirSim模拟器 client = airsim.MultirotorClient() client.confirmConnection() # 获取图像路径 folder_path = "screen2" # 保存位姿信息的空DataFrame poses_df = pd.DataFrame(columns=['index', 'x', 'y', 'z', 'yaw', 'pitch', 'roll']) # 设置随机采样的范围和数量 num_samples = 50 # 需要采样的数量 x_min, x_max, y_min, y_max, z_min, z_max = -4, 4, -4, 4, -5, -2 # 位置范围 yaw_min, yaw_max, pitch_min, pitch_max, roll_min, roll_max = -90, 90, -45, 45, -45, 45 # 姿态范围 # 相机列表 camera_list = ["0", "1", "2", "3", "4"] # 随机采样并保存图像和位姿信息 poses_list = [] for i in range(num_samples): # 随机生成目标位置，并设置姿态朝向 x = np.random.uniform(x_min, x_max) y = np.random.uniform(y_min, y_max) z = np.random.uniform(z_min, z_max) yaw = np.random.uniform(yaw_min, yaw_max) pitch = np.random.uniform(pitch_min, pitch_max) roll = np.random.uniform(roll_min, roll_max) pose = airsim.Pose(airsim.Vector3r(x, y, z), airsim.to_quaternion(pitch, roll, yaw)) poses_list.append({'index': i, 'x': x, 'y': y, 'z': z, 'yaw': yaw, 'pitch': pitch, 'roll': roll}) # 移动到目标位置 client.simSetVehiclePose(pose, True) # # 获取相机图像 # responses = client.simGetImages([airsim.ImageRequest("1", airsim.ImageType.Scene, False, False)]) # img_raw = responses[0] # 遍历相机列表，获取每个相机的图像 for j, camera_name in enumerate(camera_list): # 获取相机图像 responses = client.simGetImages([airsim.ImageRequest(camera_name, airsim.ImageType.Scene, False, False)]) img_raw = responses[0] # 将字节流转换为PIL的Image对象 img1d = np.frombuffer(img_raw.image_data_uint8, dtype=np.uint8) img_rgb = img1d.reshape(img_raw.height, img_raw.width, 3) # 保存PNG格式的图像 img_filename = "pose_{0}_x_{1:.2f}_y_{2:.2f}_z_{3:.2f}_yaw_{4:.2f}_pitch_{5:.2f}_roll_{6:.2f}_camera_{4}.png".format(i, x, y, z, yaw, pitch, roll, j) img_filepath = os.path.join(folder_path, img_filename) airsim.write_png(os.path.normpath(img_filepath), img_rgb) print("全部图像和位姿信息均已保存到文件夹：", folder_path) # 将位姿信息保存到csv文件中 poses_df = pd.DataFrame(poses_list) poses_df.to_csv(os.path.join(folder_path, 'poses.csv'), index=False) ''' airsim.Vector3r函数用于创建一个三维向量，表示无人机在三个轴上的位置信息。 airsim.to_quaternion函数则用于将欧拉角（即pitch、roll、yaw）转换为四元数，表示无人机的姿态信息。 四元数是一种数学工具，用于描述三维空间中的旋转。它是由一个实部和三个虚部组成的，通常表示为q = a + bi + cj + dk， 其中a是实部，b、c、d是虚部，i、j、$k$是虚数单位。四元数可以用来表示旋转的方向和角度，它比欧拉角更加稳定和准确，避免了万向锁等问题。 在机器人、计算机图形学和游戏开发等领域中，四元数被广泛应用。在AirSim中，四元数用于表示无人机的姿态信息。 万向锁是一种旋转表示中常见的问题，它发生在使用欧拉角进行旋转时，当旋转角度过大或旋转轴与旋转顺序不当时，就会出现万向锁问题。 万向锁的表现形式是旋转轴和旋转角度不能被唯一确定，即旋转自由度丧失。这会导致旋转结果不可预测，甚至无法进行旋转。 为了避免万向锁问题，可以使用四元数等其他旋转表示方法。在AirSim中，使用欧拉角进行旋转时，也可能会出现万向锁问题，因此建议使用四元数进行姿态表示。 '''

三、利用保存好的位姿csv文件截取图像

get_pictures_with_poses_from_csv.py

import airsim import os import csv import numpy as np client = airsim.MultirotorClient() client.confirmConnection() # 设置相机和文件路径 camera_list = ["0", "1", "2", "3", "4"] folder_path = "/home/yehuo/python_learning/AirSim_learning/screen3" # 读取位姿信息文件（csv格式） poses_csv_file = open("/home/yehuo/python_learning/AirSim_learning/screen2/poses.csv", "r") pos_reader = csv.DictReader(poses_csv_file) # 循环采样并保存图像和位姿信息 for i, row in enumerate(pos_reader): # 获取姿态信息 x, y, z = float(row['x']), float(row['y']), float(row['z']) yaw, pitch, roll = float(row['yaw']), float(row['pitch']), float(row['roll']) pose = airsim.Pose(airsim.Vector3r(x, y, z), airsim.to_quaternion(pitch, roll, yaw)) # 移动到目标位置 client.simSetVehiclePose(pose, True) # 遍历相机列表，获取每个相机的图像 for j, camera_name in enumerate(camera_list): responses = client.simGetImages([airsim.ImageRequest(camera_name, airsim.ImageType.Scene, False, False)]) img_raw = responses[0] # 将字节流转换为PIL的Image对象 img1d = np.frombuffer(img_raw.image_data_uint8, dtype=np.uint8) img_rgb = img1d.reshape(img_raw.height, img_raw.width, 3) # 保存PNG格式的图像 img_filename = "pose_{0}_x_{1:.2f}_y_{2:.2f}_z_{3:.2f}_yaw_{4:.2f}_pitch_{5:.2f}_roll_{6:.2f}_camera_{7}.png".format(i, x, y, z, yaw, pitch, roll, j) img_filepath = os.path.join(folder_path, img_filename) airsim.write_png(os.path.normpath(img_filepath), img_rgb) print("图像和位姿信息均已保存到文件夹：", folder_path)