Geo170k 数据可视化
import json import os import cv2 import numpy as np import random def load_alignment_data(json_path): 加载 alignment.json 文件 with open(json_path, r, encodingutf-8) as f: data json.load(f) return data def visualize_geo170k_opencv(data, images_dir, num_samples5, wait_key0): # 解析数据 if isinstance(data, dict): items list(data.items()) elif isinstance(data, list): items data else: print(f未知的数据格式: {type(data)}) return print(f总共有 {len(items)} 个样本) # 随机选择样本 samples random.sample(items, min(num_samples, len(items))) for idx, item in enumerate(samples): # 获取图片 ID 和描述 if isinstance(item, tuple): img_id, caption item elif isinstance(item, dict): img_id item.get(id) or item.get(image) or list(item.keys())[0] if len(item) 0 else None caption item.get(conversations) or item.get(text) or item.get(description) # 如果 item 本身就是图片数据 if img_id is None and image in item: img_data item[image] caption item.get(caption, ) # 处理 base64 编码的图片 if isinstance(img_data, str) and img_data.startswith(base64,): import base64 img_bytes base64.b64decode(img_data[7:]) np_arr np.frombuffer(img_bytes, np.uint8) img cv2.imdecode(np_arr, cv2.IMREAD_COLOR) display_image(img, img_id, caption) continue else: continue # 查找图片文件 img_path os.path.join(images_dir, f{img_id}) if not os.path.exists(img_path): print(f未找到图片: {img_id} {img_path}) continue # 读取图片 img cv2.imread(img_path) if img is None: print(f无法读取图片: {img_path}) continue display_image(img, img_id, caption, wait_key) def display_image(img, img_id, caption, wait_key0): 显示图片和文字 h, w img.shape[:2] # 创建更大的画布用于显示文字 text_height 120 canvas np.ones((h text_height, w, 3), dtypenp.uint8) * 255 canvas[:h, :w] img # 添加 ID 信息 font cv2.FONT_HERSHEY_SIMPLEX cv2.putText(canvas, fID: {img_id}, (10, h 25), font, 0.6, (0, 0, 0), 1) print(caption,caption) # 分行显示描述 if caption and 0: max_chars 60 lines [caption[i:i max_chars] for i in range(0, len(caption), max_chars)] for i, line in enumerate(lines): y_pos h 55 i * 22 cv2.putText(canvas, line, (10, y_pos), font, 0.5, (80, 80, 80), 1) # 显示窗口 window_name fGeo170K - {img_id} cv2.imshow(window_name, canvas) cv2.waitKey(wait_key) cv2.destroyAllWindows() def visualize_batch_opencv(data, images_dir, num_samples9, save_dirNone): 批量可视化将多张图片拼成网格显示 Args: data: alignment.json 加载的数据 images_dir: 图片目录路径 num_samples: 样本数量建议 4, 6, 9, 12, 16 等平方数 save_dir: 保存目录可选 if isinstance(data, dict): items list(data.items()) elif isinstance(data, list): items data else: return # 计算网格尺寸 grid_size int(np.ceil(np.sqrt(num_samples))) cols rows grid_size # 获取样本 samples random.sample(items, min(num_samples, len(items))) # 收集图片 images [] captions [] for item in samples: if isinstance(item, tuple): img_id, caption item elif isinstance(item, dict): img_id item.get(id) or list(item.keys())[0] if len(item) 0 else None caption item.get(caption, ) else: continue img_path None for ext in [.jpg, .jpeg, .png]: candidate os.path.join(images_dir, f{img_id}{ext}) if os.path.exists(candidate): img_path candidate break if img_path: img cv2.imread(img_path) if img is not None: # 统一大小 img cv2.resize(img, (300, 300)) images.append(img) captions.append(caption[:50]) if not images: print(没有找到图片) return # 创建网格 grid np.zeros((rows * 300, cols * 300, 3), dtypenp.uint8) for i, img in enumerate(images): if i rows * cols: break r i // cols c i % cols grid[r * 300:(r 1) * 300, c * 300:(c 1) * 300] img # 添加标题 font cv2.FONT_HERSHEY_SIMPLEX cv2.putText(grid, fGeo170K Samples ({len(images)} images), (10, 30), font, 1, (255, 255, 255), 2) if save_dir: os.makedirs(save_dir, exist_okTrue) save_path os.path.join(save_dir, geo170k_grid.png) cv2.imwrite(save_path, grid) print(f已保存: {save_path}) else: cv2.imshow(Geo170K Samples, grid) cv2.waitKey(0) cv2.destroyAllWindows() def analyze_json_structure(json_path): 分析 JSON 文件结构 with open(json_path, r, encodingutf-8) as f: data json.load(f) print( * 60) print(JSON 数据结构分析) print( * 60) print(f数据类型: {type(data)}) if isinstance(data, dict): print(f字典键数量: {len(data.keys())}) print(f前5个键: {list(data.keys())[:5]}) first_key list(data.keys())[0] print(f\n第一个样本 (key{first_key}):) print(f 类型: {type(data[first_key])}) print(f 内容: {data[first_key]}) elif isinstance(data, list): print(f列表长度: {len(data)}) if len(data) 0: print(f\n第一个样本:) print(f 类型: {type(data[0])}) print(f 内容: {data[0]}) else: print(f内容: {data}) if __name__ __main__: # 配置路径 data_dir rE:\data # 替换为你的数据目录 json_path os.path.join(data_dir, alignment.json) images_dir os.path.join(data_dir, images) # 1. 先分析数据结构 analyze_json_structure(json_path) # 2. 加载数据 data load_alignment_data(json_path) # 3. 可视化 # 单张显示按键切换 visualize_geo170k_opencv(data, images_dir, num_samples5, wait_key0) # 或批量显示为网格 # visualize_batch_opencv(data, images_dir, num_samples9, save_dir./output)