feat: 整合 Places365 場景類別到場景識別
- 新增 places365_categories.json (380 個場景類別) - 更新場景識別使用 Places365 類別名稱 - 使用最常見場景類型作為影片主要場景 - 改進場景合併邏輯 改進: - 場景名稱從 'unknown_X' 改為實際場景索引 - 支援 Places365 380 個場景類別 - 自動統計最常見場景類型 限制: - ResNet18 使用 ImageNet 1000 類別 - Places365 只有 365 類別,索引不完全匹配 - 建議使用專門的 Places365 模型獲得最佳結果 測試結果: - ExaSAN 影片識別為 scene_664 (37% 信心度) - 處理時間:1.3 秒 - 79 個取樣點成功處理
This commit is contained in:
@@ -51,6 +51,20 @@ try:
|
||||
except ImportError:
|
||||
HAS_CV = False
|
||||
|
||||
# 載入 Places365 類別
|
||||
PLACES365_CATEGORIES = {}
|
||||
try:
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
categories_path = Path(__file__).parent / "places365_categories.json"
|
||||
if categories_path.exists():
|
||||
with open(categories_path, "r", encoding="utf-8") as f:
|
||||
PLACES365_CATEGORIES = json.load(f)
|
||||
print(f"[SCENE] Loaded {len(PLACES365_CATEGORIES)} Places365 categories")
|
||||
except Exception as e:
|
||||
print(f"[SCENE] Warning: Could not load Places365 categories: {e}")
|
||||
|
||||
|
||||
# 場景類型中英文對照
|
||||
SCENE_TYPE_ZH = {
|
||||
@@ -260,13 +274,16 @@ class SceneClassifier:
|
||||
# 取得 top 5
|
||||
top_5_probs, top_5_indices = torch.topk(probs, 5)
|
||||
|
||||
# 簡化:返回通用預測
|
||||
# 簡化:使用 Places365 類別映射
|
||||
results = []
|
||||
for i in range(5):
|
||||
prob = top_5_probs[0][i].item()
|
||||
results.append(
|
||||
{"scene_type": f"unknown_{i}", "confidence": prob}
|
||||
)
|
||||
idx = top_5_indices[0][i].item()
|
||||
|
||||
# 使用 Places365 類別名稱(如果可用)
|
||||
scene_type = PLACES365_CATEGORIES.get(str(idx), f"scene_{idx}")
|
||||
|
||||
results.append({"scene_type": scene_type, "confidence": prob})
|
||||
|
||||
return results
|
||||
except Exception as e:
|
||||
@@ -461,40 +478,49 @@ class SceneClassifier:
|
||||
"""
|
||||
合併連續相同場景
|
||||
|
||||
注意:由於使用 ImageNet 模型而非 Places365,這裡使用簡化分類
|
||||
使用 Places365 類別名稱
|
||||
"""
|
||||
if not predictions:
|
||||
return []
|
||||
|
||||
# 簡化:將整個影片視為一個場景
|
||||
# 在沒有 Places365 模型的情況下,這是合理的預設行為
|
||||
first_pred = predictions[0]
|
||||
last_pred = predictions[-1]
|
||||
# 統計所有預測的場景類型
|
||||
scene_counts = {}
|
||||
for pred in predictions:
|
||||
if pred["predictions"]:
|
||||
scene_type = pred["predictions"][0]["scene_type"]
|
||||
scene_counts[scene_type] = scene_counts.get(scene_type, 0) + 1
|
||||
|
||||
# 使用平均信心度
|
||||
avg_confidence = (
|
||||
sum(
|
||||
p["predictions"][0]["confidence"]
|
||||
for p in predictions
|
||||
if p["predictions"]
|
||||
# 找出最常見的場景類型
|
||||
if scene_counts:
|
||||
most_common_scene = max(scene_counts.items(), key=lambda x: x[1])[0]
|
||||
|
||||
# 計算平均信心度
|
||||
avg_confidence = (
|
||||
sum(
|
||||
p["predictions"][0]["confidence"]
|
||||
for p in predictions
|
||||
if p["predictions"]
|
||||
)
|
||||
/ len(predictions)
|
||||
if predictions
|
||||
else 0.0
|
||||
)
|
||||
/ len(predictions)
|
||||
if predictions
|
||||
else 0.0
|
||||
)
|
||||
|
||||
return [
|
||||
{
|
||||
"start_time": first_pred["timestamp"],
|
||||
"end_time": last_pred["timestamp"],
|
||||
"scene_type": "indoor_general", # 預設為室內一般場景
|
||||
"scene_type_zh": "室內場景",
|
||||
"confidence": avg_confidence,
|
||||
"top_5": first_pred["predictions"][:5],
|
||||
}
|
||||
]
|
||||
first_pred = predictions[0]
|
||||
last_pred = predictions[-1]
|
||||
|
||||
# 簡化:將整個影片視為一個場景
|
||||
return [
|
||||
{
|
||||
"start_time": first_pred["timestamp"],
|
||||
"end_time": last_pred["timestamp"],
|
||||
"scene_type": most_common_scene,
|
||||
"scene_type_zh": SCENE_TYPE_ZH.get(most_common_scene),
|
||||
"confidence": avg_confidence,
|
||||
"top_5": first_pred["predictions"][:5],
|
||||
}
|
||||
]
|
||||
|
||||
return []
|
||||
# 在沒有 Places365 模型的情況下,這是合理的預設行為
|
||||
if predictions:
|
||||
first_pred = predictions[0]
|
||||
|
||||
Reference in New Issue
Block a user