""" Excel/CSV Text Extractor """ import pandas as pd from typing import Dict, List class ExcelProcessor: """Extract text from Excel and CSV files""" def extract_csv(self, file_path: str) -> str: """Extract text from CSV file""" df = pd.read_csv(file_path) return self._dataframe_to_text(df) def extract_excel(self, file_path: str, sheet_name: str = None) -> str: """Extract text from Excel file""" if sheet_name: df = pd.read_excel(file_path, sheet_name=sheet_name) return self._dataframe_to_text(df) else: # Read all sheets sheets = pd.read_excel(file_path, sheet_name=None) text_parts = [] for sheet_name, df in sheets.items(): text_parts.append(f"=== Sheet: {sheet_name} ===\n") text_parts.append(self._dataframe_to_text(df)) return "\n\n".join(text_parts) def _dataframe_to_text(self, df: pd.DataFrame) -> str: """Convert DataFrame to readable text""" text_parts = [] # Add column headers if not df.empty: text_parts.append(" | ".join(str(col) for col in df.columns)) text_parts.append("-" * len(text_parts[-1])) # Add rows for _, row in df.iterrows(): row_text = " | ".join(str(val) for val in row.values) text_parts.append(row_text) return "\n".join(text_parts) def extract_all_sheets(self, file_path: str) -> Dict[str, str]: """Extract all sheets from Excel file""" sheets = pd.read_excel(file_path, sheet_name=None) return {name: self._dataframe_to_text(df) for name, df in sheets.items()} def get_sheet_names(self, file_path: str) -> List[str]: """Get all sheet names from Excel file""" xl = pd.ExcelFile(file_path) return xl.sheet_names def process_csv(file_path: str) -> str: """Process CSV file and return text""" processor = ExcelProcessor() return processor.extract_csv(file_path) def process_excel(file_path: str) -> str: """Process Excel file and return text""" processor = ExcelProcessor() return processor.extract_excel(file_path)