Files
X-Financial/server/src/app/services/user_agent_application_locations.py

149 lines
3.9 KiB
Python
Raw Normal View History

from __future__ import annotations
import re
DIRECT_MUNICIPALITY_DISPLAY = {
"北京": "北京市",
"北京市": "北京市",
"上海": "上海市",
"上海市": "上海市",
"天津": "天津市",
"天津市": "天津市",
"重庆": "重庆市",
"重庆市": "重庆市",
}
PROVINCE_ALIASES = {
"新疆维吾尔自治区": "新疆",
"新疆": "新疆",
"广东省": "广东",
"广东": "广东",
"浙江省": "浙江",
"浙江": "浙江",
"江苏省": "江苏",
"江苏": "江苏",
"四川省": "四川",
"四川": "四川",
"湖北省": "湖北",
"湖北": "湖北",
"陕西省": "陕西",
"陕西": "陕西",
"山东省": "山东",
"山东": "山东",
"湖南省": "湖南",
"湖南": "湖南",
"河南省": "河南",
"河南": "河南",
"安徽省": "安徽",
"安徽": "安徽",
"福建省": "福建",
"福建": "福建",
"云南省": "云南",
"云南": "云南",
"江西省": "江西",
"江西": "江西",
"辽宁省": "辽宁",
"辽宁": "辽宁",
}
CITY_TO_PROVINCE = {
"伊犁": "新疆",
"伊犁哈萨克自治州": "新疆",
"乌鲁木齐": "新疆",
"克拉玛依": "新疆",
"喀什": "新疆",
"广州": "广东",
"深圳": "广东",
"佛山": "广东",
"东莞": "广东",
"杭州": "浙江",
"宁波": "浙江",
"南京": "江苏",
"苏州": "江苏",
"无锡": "江苏",
"成都": "四川",
"武汉": "湖北",
"西安": "陕西",
"青岛": "山东",
"济南": "山东",
"长沙": "湖南",
"郑州": "河南",
"合肥": "安徽",
"福州": "福建",
"厦门": "福建",
"昆明": "云南",
"南昌": "江西",
"沈阳": "辽宁",
"大连": "辽宁",
}
LOCATION_NOISE_PATTERN = re.compile(
r"(?:出差|驻场|现场|支撑|支持|部署|上线|实施|拜访|验收|会议|采购|培训|协助|处理|办理|参加|进行).*$"
)
def normalize_application_location(value: str) -> str:
text = _cleanup_location_text(value)
if not text:
return ""
direct = _resolve_direct_municipality(text)
if direct:
return direct
province_city = _resolve_province_city(text)
if province_city:
return province_city
return text[:12]
def _cleanup_location_text(value: str) -> str:
text = re.sub(r"\s+", "", str(value or ""))
text = text.strip(":,。;;、")
text = re.sub(r"^(?:地点|业务地点|发生地点)[:]", "", text)
text = re.sub(r"^(?:去|到|赴|前往)", "", text)
text = LOCATION_NOISE_PATTERN.sub("", text)
return text.strip(":,。;;、")
def _resolve_direct_municipality(text: str) -> str:
for key, display in DIRECT_MUNICIPALITY_DISPLAY.items():
if text.startswith(key):
return display
return ""
def _resolve_province_city(text: str) -> str:
for province_alias, province_display in PROVINCE_ALIASES.items():
if not text.startswith(province_alias):
continue
remainder = text[len(province_alias) :].strip("省市地区自治州盟,,、")
if not remainder:
return province_display
city = _resolve_city_name(remainder)
return f"{province_display}{city}" if city else province_display
city = _resolve_city_name(text)
if city:
province = CITY_TO_PROVINCE.get(city)
return f"{province}{city}" if province else city
return ""
def _resolve_city_name(text: str) -> str:
normalized = text.strip(",、")
if not normalized:
return ""
for city in sorted(CITY_TO_PROVINCE, key=len, reverse=True):
if normalized.startswith(city):
return _display_city_name(city)
return ""
def _display_city_name(city: str) -> str:
if city == "伊犁哈萨克自治州":
return "伊犁"
return city.removesuffix("")