39 lines
1.4 KiB
Python
39 lines
1.4 KiB
Python
|
|
import io
|
||
|
|
import unittest
|
||
|
|
from unittest.mock import patch
|
||
|
|
|
||
|
|
from app.utils.guidance_analysis import GuidanceAnalyzer
|
||
|
|
|
||
|
|
|
||
|
|
class GuidanceFileExtractionTest(unittest.TestCase):
|
||
|
|
def test_extracts_plain_text_and_markdown(self):
|
||
|
|
analyzer = GuidanceAnalyzer()
|
||
|
|
|
||
|
|
txt = analyzer.extract_text("资产负债率预警。".encode("utf-8"), "guide.txt")
|
||
|
|
md = analyzer.extract_text("# 标题\n\n资产负债率预警。".encode("utf-8"), "guide.md")
|
||
|
|
|
||
|
|
self.assertIn("资产负债率预警", txt["text"])
|
||
|
|
self.assertIn("资产负债率预警", md["text"])
|
||
|
|
self.assertEqual(txt["method"], "plain_text")
|
||
|
|
self.assertEqual(md["method"], "markdown")
|
||
|
|
|
||
|
|
def test_extracts_docx_text(self):
|
||
|
|
from docx import Document
|
||
|
|
|
||
|
|
document = Document()
|
||
|
|
document.add_paragraph("资产负债率预警。")
|
||
|
|
stream = io.BytesIO()
|
||
|
|
document.save(stream)
|
||
|
|
|
||
|
|
result = GuidanceAnalyzer().extract_text(stream.getvalue(), "guide.docx")
|
||
|
|
|
||
|
|
self.assertEqual(result["method"], "docx")
|
||
|
|
self.assertIn("资产负债率预警", result["text"])
|
||
|
|
|
||
|
|
def test_pdf_extractor_result_shape(self):
|
||
|
|
with patch.object(GuidanceAnalyzer, "_extract_pdf_text", return_value="资产负债率预警。"):
|
||
|
|
result = GuidanceAnalyzer().extract_text(b"%PDF", "guide.pdf")
|
||
|
|
|
||
|
|
self.assertEqual(result["method"], "pdf")
|
||
|
|
self.assertIn("资产负债率预警", result["text"])
|