Files
YG-Rules/tests/test_guidance_file_extraction.py

39 lines
1.4 KiB
Python

import io
import unittest
from unittest.mock import patch
from app.utils.guidance_analysis import GuidanceAnalyzer
class GuidanceFileExtractionTest(unittest.TestCase):
def test_extracts_plain_text_and_markdown(self):
analyzer = GuidanceAnalyzer()
txt = analyzer.extract_text("资产负债率预警。".encode("utf-8"), "guide.txt")
md = analyzer.extract_text("# 标题\n\n资产负债率预警。".encode("utf-8"), "guide.md")
self.assertIn("资产负债率预警", txt["text"])
self.assertIn("资产负债率预警", md["text"])
self.assertEqual(txt["method"], "plain_text")
self.assertEqual(md["method"], "markdown")
def test_extracts_docx_text(self):
from docx import Document
document = Document()
document.add_paragraph("资产负债率预警。")
stream = io.BytesIO()
document.save(stream)
result = GuidanceAnalyzer().extract_text(stream.getvalue(), "guide.docx")
self.assertEqual(result["method"], "docx")
self.assertIn("资产负债率预警", result["text"])
def test_pdf_extractor_result_shape(self):
with patch.object(GuidanceAnalyzer, "_extract_pdf_text", return_value="资产负债率预警。"):
result = GuidanceAnalyzer().extract_text(b"%PDF", "guide.pdf")
self.assertEqual(result["method"], "pdf")
self.assertIn("资产负债率预警", result["text"])