import os import random from itertools import combinations def get_frame_pairs(root_dir, total_comparisons): subjects = {} # Dictionary to store subject -> list of frame paths # Traverse the directory structure for subject in os.listdir(root_dir): subject_path = os.path.join(root_dir, subject) if os.path.isdir(subject_path): subjects[subject] = [] for subfolder in os.listdir(subject_path): subfolder_path = os.path.join(subject_path, subfolder) if os.path.isdir(subfolder_path): frames = [os.path.join(subfolder_path, f) for f in os.listdir(subfolder_path) if f.endswith(('.jpg', '.png', '.jpeg'))] subjects[subject].extend(frames) genuine_pairs = [] impostor_pairs = [] # Generate genuine pairs for subject, frames in subjects.items(): if len(frames) > 1: genuine_pairs.extend(list(combinations(frames, 2))) random.shuffle(genuine_pairs) genuine_pairs = genuine_pairs[:total_comparisons // 2] # Generate impostor pairs subject_list = list(subjects.keys()) while len(impostor_pairs) < total_comparisons // 2: subj1, subj2 = random.sample(subject_list, 2) if subjects[subj1] and subjects[subj2]: frame1 = random.choice(subjects[subj1]) frame2 = random.choice(subjects[subj2]) impostor_pairs.append((frame1, frame2)) return genuine_pairs, impostor_pairs # Example usage: root_directory = "path/to/your/folder" total_instances = 2000 genuine, impostor = get_frame_pairs(root_directory, total_instances)