import os source_dir = r"D:\code\repository\RAG资料库—开发" target_dir = r"D:\code\repository\RAG资料库—上线" file_extension = 'md' paragraph_delimiter = '\n' # 即将被替换的段落分割符 replacement = '。。' # 替换为该字符 # 获取待处理的所有文件 all_files = [] for root, dirs, files in os.walk(source_dir): for file in files: if file.split('.')[-1] == file_extension: all_files.append(os.path.join(root, file)) # 开始处理 for file in all_files: # 文件保存路径 save_path = file.replace(source_dir, target_dir) file_dir, filename = os.path.split(save_path) # 创建保存路径,确保存在 if not os.path.exists(file_dir): os.makedirs(file_dir) # 读取文件 with open(file, "r", encoding="utf-8") as f: content = f.readlines() # 处理孤立段落 new_content = [] for l in content: if l == paragraph_delimiter: continue # 末尾有分隔符 if l[-1] == paragraph_delimiter: l = l[:-1] new_content.append(l) del content # 保存文件 new_content = f"{replacement}".join(new_content) with open(save_path, "w", encoding="utf-8") as f: f.write(new_content) pass