#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 配置文件 用于控制QA生成的各种参数 """ from typing import List, Dict class QAConfig: """QA生成配置类""" def __init__(self): # ========== 基础配置 ========== # 随机种子(确保结果可重现) self.RANDOM_SEED = 42 # 输入输出目录 self.INPUT_DIR = "Data_Export_Json" self.OUTPUT_DIR = "Data_QA_Outputs" # ========== 问题数量控制 ========== # 每个数据项生成的基本问题数量(简单模式下) self.BASIC_QUESTIONS_PER_ITEM = 1 # 每个数据项生成的最多问题数量(复杂模式下) self.MAX_QUESTIONS_PER_ITEM = 10 # 多列查询问题的占比(0.0-1.0) # 0.0 表示不生成多列问题,1.0 表示所有问题都是多列问题 self.MULTI_COLUMN_RATIO = 0.3 # ========== 复杂程度控制 ========== # 复杂程度等级:1-5 # 1: 最简单(只有基础问答) # 3: 中等(包含部分多列问答) # 5: 最复杂(包含所有类型问答) self.COMPLEXITY_LEVEL = 3 # ========== 问句模板配置 ========== # 问句前缀模板(可根据复杂程度调整使用数量) self.QUESTION_PREFIXES_SIMPLE = [ "请告诉我", "查询", "请问" ] self.QUESTION_PREFIXES_NORMAL = [ "请告诉我", "查询", "请问", "在", "请解释", "请输出", "请列举", "请说明", "请查找", "请确认" ] # ========== 答句模板配置 ========== # 答句前缀模板 self.ANSWER_PREFIXES_SIMPLE = [ "根据表记录,该字段的", "查询结果显示,", "经查询,该字段的" ] self.ANSWER_PREFIXES_NORMAL = [ "根据表记录,该字段的", "查询结果显示,", "经查询,该字段的", "根据数据库记录,", "在表中,此字段的", "查询结果:", "经系统查询,", "根据记录显示,", "在数据中,该字段的", "查询得知,该字段的" ] # 答句后缀模板 self.ANSWER_SUFFIXES_SIMPLE = [ "。", "。" ] self.ANSWER_SUFFIXES_NORMAL = [ "。", ",请参考。", ",详情如上。", ",以上信息为准。", ",望知悉。", ",如需更多信息请联系。", ",希望能帮到您。", ",祝您工作顺利。", ",谢谢。", "。" ] # ========== 连接词配置 ========== # 多列查询的连接词 self.CONNECTORS_SIMPLE = ["和", "与"] self.CONNECTORS_NORMAL = ["和", "与", "及", "、", ",还有", "以及"] # ========== 数据文件配置 ========== # 要处理的数据文件列表 self.DATA_FILES = [ { "name": "元素治理模板", "file": "元素治理模板.json", "output": "元素治理模板_QA.json", "enabled": True }, { "name": "物理模型", "file": "物理模型.json", "output": "物理模型_QA.json", "enabled": True }, { "name": "逻辑模型", "file": "逻辑模型.json", "output": "逻辑模型_QA.json", "enabled": True } ] # ========== 单列问题模板配置 ========== # 根据复杂程度决定启用哪些模板 self.SINGLE_COLUMN_TEMPLATES = { 1: 3, # 简单模式:只启用前3个模板 2: 6, # 简单+模式:启用前6个模板 3: 9, # 中等模式:启用前9个模板 4: 12, # 复杂模式:启用前12个模板(全部) 5: 12 # 最复杂模式:启用全部模板 } # ========== 多列问题模板配置 ========== # 根据复杂程度决定启用哪些多列模板 self.MULTI_COLUMN_TEMPLATES = { 1: 0, # 简单模式:不生成多列问题 2: 1, # 简单+模式:启用1个多列模板 3: 3, # 中等模式:启用3个多列模板 4: 4, # 复杂模式:启用4个多列模板 5: 5 # 最复杂模式:启用全部多列模板 } # ========== 输出控制 ========== # 是否打乱问答对顺序 self.SHUFFLE_OUTPUT = True # 是否生成QA生成报告 self.GENERATE_REPORT = True # 是否显示详细日志 self.VERBOSE_LOG = True def get_complexity_settings(self) -> Dict: """根据复杂程度等级获取相应设置""" level = self.COMPLEXITY_LEVEL if level <= 2: return { "question_prefixes": self.QUESTION_PREFIXES_SIMPLE, "answer_prefixes": self.ANSWER_PREFIXES_SIMPLE, "answer_suffixes": self.ANSWER_SUFFIXES_SIMPLE, "connectors": self.CONNECTORS_SIMPLE, "single_templates": self.SINGLE_COLUMN_TEMPLATES.get(level, 3), "multi_templates": self.MULTI_COLUMN_TEMPLATES.get(level, 0), "multi_ratio": 0.0 if level == 1 else 0.1 } else: return { "question_prefixes": self.QUESTION_PREFIXES_NORMAL, "answer_prefixes": self.ANSWER_PREFIXES_NORMAL, "answer_suffixes": self.ANSWER_SUFFIXES_NORMAL, "connectors": self.CONNECTORS_NORMAL, "single_templates": self.SINGLE_COLUMN_TEMPLATES.get(level, 9), "multi_templates": self.MULTI_COLUMN_TEMPLATES.get(level, 3), "multi_ratio": self.MULTI_COLUMN_RATIO } def update_config(self, **kwargs): """更新配置参数""" for key, value in kwargs.items(): if hasattr(self, key): setattr(self, key, value) print(f"[CONFIG] 已更新 {key} = {value}") else: print(f"[WARNING] 未找到配置项: {key}") def print_config(self): """打印当前配置""" print("\n" + "="*60) print("当前QA生成配置") print("="*60) print(f"随机种子: {self.RANDOM_SEED}") print(f"输入目录: {self.INPUT_DIR}") print(f"输出目录: {self.OUTPUT_DIR}") print(f"复杂程度等级: {self.COMPLEXITY_LEVEL}") print(f"基本问题数量: {self.BASIC_QUESTIONS_PER_ITEM}") print(f"最大问题数量: {self.MAX_QUESTIONS_PER_ITEM}") print(f"多列查询占比: {self.MULTI_COLUMN_RATIO}") print(f"打乱输出: {self.SHUFFLE_OUTPUT}") print(f"生成报告: {self.GENERATE_REPORT}") print(f"详细日志: {self.VERBOSE_LOG}") print("\n启用处理的文件:") for file_info in self.DATA_FILES: if file_info["enabled"]: print(f" ✓ {file_info['name']} ({file_info['file']})") print("="*60 + "\n") # 创建默认配置实例 DEFAULT_CONFIG = QAConfig() def create_custom_config(**kwargs) -> QAConfig: """创建自定义配置""" config = QAConfig() config.update_config(**kwargs) return config # 配置预设 SIMPLE_CONFIG = create_custom_config( COMPLEXITY_LEVEL=1, MULTI_COLUMN_RATIO=0.0, BASIC_QUESTIONS_PER_ITEM=1, MAX_QUESTIONS_PER_ITEM=3, VERBOSE_LOG=False ) NORMAL_CONFIG = create_custom_config( COMPLEXITY_LEVEL=3, MULTI_COLUMN_RATIO=0.3, BASIC_QUESTIONS_PER_ITEM=3, MAX_QUESTIONS_PER_ITEM=8 ) COMPLEX_CONFIG = create_custom_config( COMPLEXITY_LEVEL=5, MULTI_COLUMN_RATIO=0.5, BASIC_QUESTIONS_PER_ITEM=5, MAX_QUESTIONS_PER_ITEM=10 ) if __name__ == "__main__": # 演示配置使用 config = QAConfig() config.print_config() settings = config.get_complexity_settings() print("\n根据当前复杂程度等级的设置:") print(f"问句前缀数量: {len(settings['question_prefixes'])}") print(f"答句前缀数量: {len(settings['answer_prefixes'])}") print(f"答句后缀数量: {len(settings['answer_suffixes'])}") print(f"连接词数量: {len(settings['connectors'])}") print(f"单列模板数: {settings['single_templates']}") print(f"多列模板数: {settings['multi_templates']}") print(f"多列占比: {settings['multi_ratio']}")