import io import os import tempfile import unittest from app.utils.llm import strip_thinking from app.utils.schema_storage import SchemaStorage MODULE = { "module_name": "1、银行账户", "fields": [ {"name": "所属集团", "marker": "groupName", "required": "必填", "rule": "填写集团名称"}, {"name": "开户单位", "marker": "accountOrg", "required": "必填", "rule": "填写账户所属单位"}, {"name": "银行账号", "marker": "accountNo", "required": "必填", "rule": "填写银行账号"}, {"name": "开户银行", "marker": "bankName", "required": "必填", "rule": "填写开户银行"}, {"name": "币种", "marker": "currency", "required": "必填", "rule": "填写账户币种"}, {"name": "账户余额", "marker": "balance", "required": "必填", "rule": "填写账户余额"}, ], } class LLMThinkingCleanupTest(unittest.TestCase): def test_strip_complete_think_block(self): text = "这里是推理过程银行账户表用于管理账户基础信息。" self.assertEqual(strip_thinking(text), "银行账户表用于管理账户基础信息。") def test_strip_unclosed_think_block(self): self.assertEqual(strip_thinking("只有推理过程,没有最终答案"), "") class SchemaDescriptionSanitizingTest(unittest.TestCase): def setUp(self): self.storage = object.__new__(SchemaStorage) def test_extracts_final_quoted_description(self): response = ( "好的,我先分析字段。\n" "\"银行账户表用于记录集团及成员单位银行账户基础信息,包含所属集团、开户单位、" "银行账号、开户银行、币种、账户余额等字段,支撑账户查询、余额校验和资金管理选表。\"" ) description = self.storage._sanitize_description(response, MODULE) self.assertNotIn("好的", description) self.assertIn("银行账户", description) self.assertIn("账户余额", description) def test_falls_back_when_only_thinking_is_returned(self): description = self.storage._sanitize_description("仅有思考过程", MODULE) self.assertNotIn("", description) self.assertIn("银行账户", description) self.assertIn("所属集团", description) def test_cleans_historical_schema_response(self): data = { "processing_status": "done", "modules": [ {**MODULE, "description": "历史脏数据"}, {**MODULE, "module_name": "2、资金结算", "description": ""}, ], } cleaned = self.storage._clean_descriptions_for_response(data) self.assertNotIn("", cleaned["modules"][0]["description"]) self.assertIn("银行账户", cleaned["modules"][0]["description"]) self.assertIn("资金结算", cleaned["modules"][1]["description"]) def test_parse_excel_preserves_physical_table_name(self): from openpyxl import Workbook workbook = Workbook() sheet = workbook.active sheet.append(["序号", "表名", "数据名称", "数据标记", "数据类型", "数据长度", "数据填写规则", "数据填写要求", "强弱校验"]) sheet.append(["银行贷款"]) sheet.append([1, "bank_loan", "贷款余额", "STANDARDCURRENCYBALANCE", "数值", "18,2", "填写贷款余额", "必填", "强校验"]) stream = io.BytesIO() workbook.save(stream) stream.seek(0) modules = self.storage._parse_excel(stream) self.assertEqual(modules[0]["table_name"], "bank_loan") self.assertEqual(modules[0]["fields"][0]["marker"], "STANDARDCURRENCYBALANCE") def test_delete_file_removes_schema_json(self): with tempfile.TemporaryDirectory() as temp_dir: schema_path = os.path.join(temp_dir, "schema.json") storage = SchemaStorage(schema_path) self.assertTrue(os.path.exists(schema_path)) self.assertTrue(storage.delete_file()) self.assertFalse(os.path.exists(schema_path)) self.assertFalse(storage.delete_file()) if __name__ == "__main__": unittest.main()