1. 修改了一些bug
2. 做了一些调整,比如启动脚本,支持了tenmsorboard
This commit is contained in:
@@ -204,44 +204,67 @@ def get_trained_models():
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
try:
|
||||
# 使用 /app/base/saves 目录(容器内路径)
|
||||
saves_base_path = '/app/base/saves'
|
||||
# 本地开发时的备用路径
|
||||
local_saves_path = os.path.join(PROJECT_ROOT, 'saves')
|
||||
# 多个可能的路径
|
||||
potential_paths = [
|
||||
'/app/base/saves', # 容器内路径
|
||||
os.path.join(PROJECT_ROOT, 'saves'), # 本地开发路径
|
||||
os.path.join(os.path.dirname(os.path.dirname(PROJECT_ROOT)), 'YG_FT_Base', 'saves'), # 上级目录
|
||||
]
|
||||
|
||||
# 选择存在的路径
|
||||
base_path = saves_base_path if os.path.exists(saves_base_path) else local_saves_path
|
||||
base_path = None
|
||||
for path in potential_paths:
|
||||
logger.info(f"[DEBUG] 检查路径: {path}, exists: {os.path.exists(path)}")
|
||||
if os.path.exists(path):
|
||||
base_path = path
|
||||
break
|
||||
|
||||
logger.info(f"[DEBUG] 已训练模型目录: {base_path}, exists: {os.path.exists(base_path)}")
|
||||
logger.info(f"[DEBUG] 最终使用的路径: {base_path}")
|
||||
|
||||
models = []
|
||||
if os.path.exists(base_path):
|
||||
for item in os.listdir(base_path):
|
||||
item_path = os.path.join(base_path, item)
|
||||
if os.path.isdir(item_path):
|
||||
# 检查是否是模板目录(包含训练方法的子目录)
|
||||
sub_items = []
|
||||
if os.path.exists(item_path):
|
||||
for sub_item in os.listdir(item_path):
|
||||
sub_path = os.path.join(item_path, sub_item)
|
||||
if os.path.isdir(sub_path):
|
||||
# 检查是否包含模型文件(adapter_model.bin 或 pytorch_model.bin 等)
|
||||
has_model = False
|
||||
for f in os.listdir(sub_path):
|
||||
if f.endswith('.bin') or f.endswith('.safetensors'):
|
||||
has_model = True
|
||||
break
|
||||
if has_model:
|
||||
sub_items.append({
|
||||
'name': sub_item,
|
||||
'path': sub_path
|
||||
})
|
||||
if base_path and os.path.exists(base_path):
|
||||
logger.info(f"[DEBUG] 遍历目录: {base_path}")
|
||||
try:
|
||||
# 路径结构: /app/base/saves/{train_method}/{model_name}/
|
||||
# train_method: lora, full, qlora, dpo, cpt 等
|
||||
|
||||
models.append({
|
||||
'name': item,
|
||||
'path': item_path,
|
||||
'train_methods': sub_items
|
||||
})
|
||||
for train_method in os.listdir(base_path):
|
||||
train_method_path = os.path.join(base_path, train_method)
|
||||
if not os.path.isdir(train_method_path):
|
||||
continue
|
||||
|
||||
logger.info(f"[DEBUG] 检查训练方法目录: {train_method}")
|
||||
model_count = 0
|
||||
|
||||
# 遍历模型文件夹
|
||||
for model_name in os.listdir(train_method_path):
|
||||
model_path = os.path.join(train_method_path, model_name)
|
||||
if not os.path.isdir(model_path):
|
||||
continue
|
||||
|
||||
# 检查是否有模型文件
|
||||
try:
|
||||
files = os.listdir(model_path)
|
||||
logger.info(f"[DEBUG] {train_method}/{model_name} 文件: {files[:5]}...")
|
||||
has_model = any(f.endswith('.bin') or f.endswith('.safetensors') for f in files)
|
||||
|
||||
if has_model:
|
||||
logger.info(f"[DEBUG] 找到模型: {train_method}/{model_name}")
|
||||
models.append({
|
||||
'name': model_name,
|
||||
'path': model_path,
|
||||
'train_methods': [{
|
||||
'name': train_method,
|
||||
'path': model_path
|
||||
}]
|
||||
})
|
||||
model_count += 1
|
||||
except Exception as file_err:
|
||||
logger.error(f"[DEBUG] 读取 {model_path} 失败: {file_err}")
|
||||
|
||||
logger.info(f"[DEBUG] {train_method} 找到 {model_count} 个模型")
|
||||
|
||||
except Exception as list_err:
|
||||
logger.error(f"[DEBUG] 遍历目录失败: {list_err}")
|
||||
|
||||
logger.info(f"[DEBUG] 找到 {len(models)} 个已训练模型")
|
||||
|
||||
@@ -249,7 +272,7 @@ def get_trained_models():
|
||||
'code': 0,
|
||||
'data': {
|
||||
'models': models,
|
||||
'base_path': base_path
|
||||
'base_path': base_path or ''
|
||||
}
|
||||
})
|
||||
except Exception as e:
|
||||
|
||||
Reference in New Issue
Block a user