简单的批量匿名化dicom的代码,
将INPUT_DIR里的子文件夹保存的每个案例的dicom数据匿名化(删除隐私信息),输出到OUTPUT_DIR
if __name__ == "__main__":
INPUT_DIR = r"D:\code\GYN_plans_nodose_153"
OUTPUT_DIR = r"D:\code\GYN_Anonymized_153"
main(INPUT_DIR, OUTPUT_DIR)
遍历子文件夹进行处理:
def main(input_directory, output_directory):
"""
Anonymize all DICOM files in each subfolder of the input directory and save to an output directory.
"""
for patient_folder in os.listdir(input_directory):
patient_folder_path = os.path.join(input_directory, patient_folder)
if os.path.isdir(patient_folder_path):
anonymize_dicom_directory(patient_folder_path, output_directory)
print(f"Anonymized DICOM files from: {patient_folder_path}")
def anonymize_dicom_directory(input_directory, output_directory):
"""
Anonymize all DICOM files in an input directory and save to an output directory.
"""
for root, dirs, files in os.walk(input_directory):
for file in files:
if file.endswith(".dcm"): # Assuming DICOM files have .dcm extension
dicom_path = os.path.join(root, file)
anonymize_dicom_file(dicom_path, output_directory)
针对单个dcm文件,自定义新的ID和Name,处理需要匿名化的tag:
def anonymize_dicom_file(dicom_path, output_path):
"""
Anonymize a single DICOM file and save to a new directory.
"""
dataset = pydicom.dcmread(dicom_path)
# Anonymize the required tags
if "PatientID" in dataset:
old_id = dataset.data_element("PatientID").value
new_id = "Test" + old_id
dataset.data_element("PatientID").value = new_id
if "PatientName" in dataset:
old_name = str(dataset.data_element("PatientName").value)
new_name = "Anonymized" + extract_initials(old_name)
dataset.data_element("PatientName").value = new_name
# Define the tags to be anonymized (can be extended)
tags_to_anonymize = [
"InstitutionName",
#"PatientBirthDate",
#"PatientAddress",
# Add more tags as necessary
]
for tag in tags_to_anonymize:
if tag in dataset:
dataset.data_element(tag).value = ""
# Create the output directory if it doesn't exist
output_dir = os.path.join(output_path, dataset.data_element("PatientID").value)
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# Save the anonymized DICOM to the output directory
output_file_path = os.path.join(output_dir, os.path.basename(dicom_path))
dataset.save_as(output_file_path)
# use initial character of each word, as new name
def extract_initials(name):
#print(name)
words = name.split()
initials = [word[0].upper() for word in words]
return ''.join(initials)