""" Excel/CSV Text Extractor """ import asyncio from typing import Dict, List import pandas as pd class ExcelProcessor: """Extract text from Excel and CSV files""" def extract_csv(self, file_path: str) -> str: """Extract text from CSV file""" df = pd.read_csv(file_path) return self._dataframe_to_text(df) async def extract_csv_async(self, file_path: str) -> str: """Extract CSV asynchronously""" return await asyncio.get_event_loop().run_in_executor( None, self.extract_csv, file_path ) def extract_excel(self, file_path: str, sheet_name: str = None) -> str: """Extract text from Excel file""" if sheet_name: df = pd.read_excel(file_path, sheet_name=sheet_name) return self._dataframe_to_text(df) else: # Read all sheets sheets = pd.read_excel(file_path, sheet_name=None) text_parts = [] for sheet_name, df in sheets.items(): text_parts.append(f"=== Sheet: {sheet_name} ===\n") text_parts.append(self._dataframe_to_text(df)) return "\n\n".join(text_parts) async def extract_excel_async(self, file_path: str, sheet_name: str = None) -> str: """Extract Excel asynchronously""" return await asyncio.get_event_loop().run_in_executor( None, self.extract_excel, file_path, sheet_name ) def _dataframe_to_text(self, df: pd.DataFrame) -> str: """Convert DataFrame to readable text""" text_parts = [] # Add column headers if not df.empty: text_parts.append(" | ".join(str(col) for col in df.columns)) text_parts.append("-" * len(text_parts[-1])) # Add rows for _, row in df.iterrows(): row_text = " | ".join(str(val) for val in row.values) text_parts.append(row_text) return "\n".join(text_parts) def extract_all_sheets(self, file_path: str) -> Dict[str, str]: """Extract all sheets from Excel file""" sheets = pd.read_excel(file_path, sheet_name=None) return {name: self._dataframe_to_text(df) for name, df in sheets.items()} async def extract_all_sheets_async(self, file_path: str) -> Dict[str, str]: """Extract all sheets asynchronously""" return await asyncio.get_event_loop().run_in_executor( None, self.extract_all_sheets, file_path ) def get_sheet_names(self, file_path: str) -> List[str]: """Get all sheet names from Excel file""" xl = pd.ExcelFile(file_path) return xl.sheet_names async def process_csv(file_path: str) -> str: """Process CSV file and return text""" processor = ExcelProcessor() return await processor.extract_csv_async(file_path) async def process_excel(file_path: str) -> str: """Process Excel file and return text""" processor = ExcelProcessor() return await processor.extract_excel_async(file_path)