import io import unittest from unittest.mock import patch from app.utils.guidance_analysis import GuidanceAnalyzer class GuidanceFileExtractionTest(unittest.TestCase): def test_extracts_plain_text_and_markdown(self): analyzer = GuidanceAnalyzer() txt = analyzer.extract_text("资产负债率预警。".encode("utf-8"), "guide.txt") md = analyzer.extract_text("# 标题\n\n资产负债率预警。".encode("utf-8"), "guide.md") self.assertIn("资产负债率预警", txt["text"]) self.assertIn("资产负债率预警", md["text"]) self.assertEqual(txt["method"], "plain_text") self.assertEqual(md["method"], "markdown") def test_extracts_docx_text(self): from docx import Document document = Document() document.add_paragraph("资产负债率预警。") stream = io.BytesIO() document.save(stream) result = GuidanceAnalyzer().extract_text(stream.getvalue(), "guide.docx") self.assertEqual(result["method"], "docx") self.assertIn("资产负债率预警", result["text"]) def test_pdf_extractor_result_shape(self): with patch.object(GuidanceAnalyzer, "_extract_pdf_text", return_value="资产负债率预警。"): result = GuidanceAnalyzer().extract_text(b"%PDF", "guide.pdf") self.assertEqual(result["method"], "pdf") self.assertIn("资产负债率预警", result["text"])