from __future__ import annotations import re DIRECT_MUNICIPALITY_DISPLAY = { "北京": "北京市", "北京市": "北京市", "上海": "上海市", "上海市": "上海市", "天津": "天津市", "天津市": "天津市", "重庆": "重庆市", "重庆市": "重庆市", } PROVINCE_ALIASES = { "新疆维吾尔自治区": "新疆", "新疆": "新疆", "广东省": "广东", "广东": "广东", "浙江省": "浙江", "浙江": "浙江", "江苏省": "江苏", "江苏": "江苏", "四川省": "四川", "四川": "四川", "湖北省": "湖北", "湖北": "湖北", "陕西省": "陕西", "陕西": "陕西", "山东省": "山东", "山东": "山东", "湖南省": "湖南", "湖南": "湖南", "河南省": "河南", "河南": "河南", "安徽省": "安徽", "安徽": "安徽", "福建省": "福建", "福建": "福建", "云南省": "云南", "云南": "云南", "江西省": "江西", "江西": "江西", "辽宁省": "辽宁", "辽宁": "辽宁", } CITY_TO_PROVINCE = { "伊犁": "新疆", "伊犁哈萨克自治州": "新疆", "乌鲁木齐": "新疆", "克拉玛依": "新疆", "喀什": "新疆", "广州": "广东", "深圳": "广东", "佛山": "广东", "东莞": "广东", "杭州": "浙江", "宁波": "浙江", "南京": "江苏", "苏州": "江苏", "无锡": "江苏", "成都": "四川", "武汉": "湖北", "西安": "陕西", "青岛": "山东", "济南": "山东", "长沙": "湖南", "郑州": "河南", "合肥": "安徽", "福州": "福建", "厦门": "福建", "昆明": "云南", "南昌": "江西", "沈阳": "辽宁", "大连": "辽宁", } LOCATION_NOISE_PATTERN = re.compile( r"(?:出差|驻场|现场|支撑|支持|部署|上线|实施|拜访|验收|会议|采购|培训|协助|处理|办理|参加|进行).*$" ) def normalize_application_location(value: str) -> str: text = _cleanup_location_text(value) if not text: return "" direct = _resolve_direct_municipality(text) if direct: return direct province_city = _resolve_province_city(text) if province_city: return province_city return text[:12] def _cleanup_location_text(value: str) -> str: text = re.sub(r"\s+", "", str(value or "")) text = text.strip("::,,。;;、") text = re.sub(r"^(?:地点|业务地点|发生地点)[::]", "", text) text = re.sub(r"^(?:去|到|赴|前往)", "", text) text = LOCATION_NOISE_PATTERN.sub("", text) return text.strip("::,,。;;、") def _resolve_direct_municipality(text: str) -> str: for key, display in DIRECT_MUNICIPALITY_DISPLAY.items(): if text.startswith(key): return display return "" def _resolve_province_city(text: str) -> str: for province_alias, province_display in PROVINCE_ALIASES.items(): if not text.startswith(province_alias): continue remainder = text[len(province_alias) :].strip("省市地区自治州盟,,、") if not remainder: return province_display city = _resolve_city_name(remainder) return f"{province_display},{city}" if city else province_display city = _resolve_city_name(text) if city: province = CITY_TO_PROVINCE.get(city) return f"{province},{city}" if province else city return "" def _resolve_city_name(text: str) -> str: normalized = text.strip(",,、") if not normalized: return "" for city in sorted(CITY_TO_PROVINCE, key=len, reverse=True): if normalized.startswith(city): return _display_city_name(city) return "" def _display_city_name(city: str) -> str: if city == "伊犁哈萨克自治州": return "伊犁" return city.removesuffix("市")