chore: backup before migration to new repo

This commit is contained in:
Warren
2026-04-23 16:46:02 +08:00
parent 13dd3b30f3
commit 59809dae1f
40 changed files with 5566 additions and 1783 deletions

View File

@@ -1,12 +1,8 @@
#!/opt/homebrew/bin/python3.11
"""
Story Processor - Generate parent-child chunk hierarchy for RAG
Uses video analysis (ASR, YOLO, OCR) to create parent chunks that summarize child chunks.
Parent-Child Chunk Strategy:
- Parent chunks: Summarize multiple scenes/segments with narrative description
- Child chunks: Individual ASR segments, OCR texts, detected objects
- When embedding: Parent description + Child content for better retrieval
Uses LOCAL video analysis (ASR, YOLO, OCR, Scene) to create parent chunks.
NO cloud API calls - fully offline processing
"""
import sys
@@ -47,57 +43,59 @@ def generate_parent_child_chunks(
cut_data: Dict,
yolo_data: Dict,
ocr_data: Dict,
scene_data: Dict,
parent_chunk_size: int = 5,
) -> Dict[str, Any]:
) -> Dict:
"""
Generate parent-child chunk hierarchy.
Parent chunks summarize multiple child chunks for better RAG retrieval.
Child chunks are individual segments from ASR, scenes from CUT, etc.
Generate parent-child chunk hierarchy using LOCAL data only.
No LLM/API calls - uses template-based narrative generation.
"""
child_chunks = []
parent_chunks = []
# Get source data
asr_segments = asr_data.get("segments", [])
cut_scenes = cut_data.get("scenes", [])
yolo_frames = yolo_data.get("frames", [])
_ocr_frames = ocr_data.get("frames", [])
# Create child chunks from ASR segments
asr_child_ids = []
for i, seg in enumerate(asr_segments):
child_chunk = {
"chunk_id": f"asr_{i:04d}",
"chunk_type": "sentence",
"source": "asr",
"start_time": seg.get("start", 0),
"end_time": seg.get("end", 0),
"text_content": seg.get("text", ""),
"content": seg,
"child_chunk_ids": [],
"parent_chunk_id": None,
}
child_chunks.append(child_chunk)
asr_child_ids.append(child_chunk["chunk_id"])
# Create child chunks from ASR
for seg in asr_data.get("segments", []):
child_chunks.append(
{
"chunk_id": f"asr_{seg.get('start', 0):.1f}_{seg.get('end', 0):.1f}",
"chunk_type": "asr",
"source": "asr",
"start_time": seg.get("start", 0),
"end_time": seg.get("end", 0),
"text_content": seg.get("text", ""),
"content": {
"text": seg.get("text", ""),
"confidence": seg.get("confidence", 0),
},
"child_chunk_ids": [],
"parent_chunk_id": None,
}
)
# Create child chunks from CUT scenes
cut_child_ids = []
for i, scene in enumerate(cut_scenes):
child_chunk = {
"chunk_id": f"cut_{i:04d}",
"chunk_type": "cut",
"source": "cut",
"start_time": scene.get("start_time", scene.get("start", 0)),
"end_time": scene.get("end_time", scene.get("end", 0)),
"text_content": None,
"content": scene,
"child_chunk_ids": [],
"parent_chunk_id": None,
}
child_chunks.append(child_chunk)
cut_child_ids.append(child_chunk["chunk_id"])
for scene in cut_data.get("scenes", []):
child_chunks.append(
{
"chunk_id": f"cut_{scene.get('scene_number', 0)}",
"chunk_type": "cut",
"source": "cut",
"start_time": scene.get("start_time", 0),
"end_time": scene.get("end_time", 0),
"text_content": f"Scene {scene.get('scene_number', 0)}",
"content": {
"scene_number": scene.get("scene_number", 0),
"duration": scene.get("duration", 0),
},
"child_chunk_ids": [],
"parent_chunk_id": None,
}
)
asr_child_ids = [c["chunk_id"] for c in child_chunks if c["source"] == "asr"]
cut_child_ids = [c["chunk_id"] for c in child_chunks if c["source"] == "cut"]
yolo_frames = yolo_data.get("frames", [])
ocr_frames = ocr_data.get("frames", [])
# Group ASR segments into parent chunks
for i in range(0, len(asr_child_ids), parent_chunk_size):
@@ -105,7 +103,6 @@ def generate_parent_child_chunks(
if not batch:
continue
# Collect text from child chunks
batch_texts = []
batch_objects = []
batch_times = []
@@ -118,11 +115,16 @@ def generate_parent_child_chunks(
batch_times.append((child["start_time"], child["end_time"]))
break
# Create parent chunk with narrative description
start_time = batch_times[0][0] if batch_times else 0
end_time = batch_times[-1][1] if batch_times else 0
# Generate narrative description
# Find objects in this time range
for frame in yolo_frames[:50]:
ts = frame.get("timestamp", 0)
if start_time <= ts <= end_time:
for obj in frame.get("objects", []):
batch_objects.append(obj.get("class_name", "unknown"))
narrative = generate_narrative(batch_texts, batch_objects, start_time, end_time)
parent_chunk = {
@@ -136,13 +138,13 @@ def generate_parent_child_chunks(
"description": narrative,
"child_count": len(batch),
"speech_preview": " ".join(batch_texts[:3]) if batch_texts else None,
"detected_objects": list(set(batch_objects))[:5],
},
"child_chunk_ids": batch,
"parent_chunk_id": None,
}
parent_chunks.append(parent_chunk)
# Update child chunks with parent reference
for child_id in batch:
for child in child_chunks:
if child["chunk_id"] == child_id:
@@ -167,14 +169,12 @@ def generate_parent_child_chunks(
start_time = batch_times[0][0] if batch_times else 0
end_time = batch_times[-1][1] if batch_times else 0
# Find objects in this time range from YOLO
for frame in yolo_frames[:100]: # Sample frames
for frame in yolo_frames[:50]:
ts = frame.get("timestamp", 0)
if start_time <= ts <= end_time:
for obj in frame.get("objects", []):
batch_objects.append(obj.get("class_name", "unknown"))
# Generate scene narrative
narrative = generate_scene_narrative(
batch_objects, start_time, end_time, len(batch)
)
@@ -190,14 +190,13 @@ def generate_parent_child_chunks(
"description": narrative,
"child_count": len(batch),
"scenes": batch,
"detected_objects": list(set(batch_objects))[:10],
"detected_objects": list(set(batch_objects))[:5],
},
"child_chunk_ids": batch,
"parent_chunk_id": None,
}
parent_chunks.append(parent_chunk)
# Update child chunks with parent reference
for child_id in batch:
for child in child_chunks:
if child["chunk_id"] == child_id:
@@ -219,27 +218,33 @@ def generate_parent_child_chunks(
def generate_narrative(
texts: List[str], objects: List[str], start: float, end: float
) -> str:
"""Generate narrative description from text snippets"""
if not texts:
"""Generate narrative description from LOCAL text snippets and objects"""
if not texts and not objects:
return f"Video segment from {start:.1f}s to {end:.1f}s"
# Combine and summarize
combined = " ".join(texts)
if len(combined) > 200:
combined = combined[:200] + "..."
parts = []
if texts:
combined = " ".join(texts[:5])
if len(combined) > 150:
combined = combined[:150] + "..."
parts.append(f"Speech: {combined}")
return f"[{start:.0f}s-{end:.0f}s] {combined}"
if objects:
unique_objs = list(set(objects))[:5]
parts.append(f"Visuals: {', '.join(unique_objs)}")
return f"[{start:.0f}s-{end:.0f}s] {' | '.join(parts)}"
def generate_scene_narrative(
objects: List[str], start: float, end: float, scene_count: int
) -> str:
"""Generate scene narrative from detected objects"""
"""Generate scene narrative from LOCAL detected objects"""
unique_objects = list(set(objects))[:5]
if unique_objects:
obj_str = ", ".join(unique_objects)
return f"[{start:.0f}s-{end:.0f}s] Scenes {scene_count} segments. Visual: {obj_str}."
return f"[{start:.0f}s-{end:.0f}s] {scene_count} scenes. Visuals: {obj_str}."
else:
return f"[{start:.0f}s-{end:.0f}s] {scene_count} video scenes."
@@ -251,70 +256,45 @@ def run_story(
if publisher:
publisher.info("story", "STORY_START")
# Load existing JSON files
base_path = os.path.dirname(output_path)
uuid_name = os.path.basename(output_path).split(".")[0]
# Load analysis data
asr_data = {"segments": []}
cut_data = {"scenes": []}
yolo_data = {"frames": []}
ocr_data = {"frames": []}
scene_data = {"scenes": []}
# Load ASR
asr_path = os.path.join(base_path, f"{uuid_name}.asr.json")
if os.path.exists(asr_path):
with open(asr_path) as f:
asr_data = json.load(f)
if publisher:
publisher.info(
"story", f"Loaded ASR: {len(asr_data.get('segments', []))} segments"
)
for name, data_var in [
("asr", asr_data),
("cut", cut_data),
("yolo", yolo_data),
("ocr", ocr_data),
("scene", scene_data),
]:
path = os.path.join(base_path, f"{uuid_name}.{name}.json")
if os.path.exists(path):
with open(path) as f:
data_var.update(json.load(f))
# Load CUT
cut_path = os.path.join(base_path, f"{uuid_name}.cut.json")
if os.path.exists(cut_path):
with open(cut_path) as f:
cut_data = json.load(f)
if publisher:
publisher.info(
"story", f"Loaded CUT: {len(cut_data.get('scenes', []))} scenes"
)
# Load YOLO
yolo_path = os.path.join(base_path, f"{uuid_name}.yolo.json")
if os.path.exists(yolo_path):
with open(yolo_path) as f:
yolo_data = json.load(f)
# Load OCR
ocr_path = os.path.join(base_path, f"{uuid_name}.ocr.json")
if os.path.exists(ocr_path):
with open(ocr_path) as f:
ocr_data = json.load(f)
# Load metadata
metadata = extract_video_metadata(video_path)
if publisher:
publisher.info("story", "Generating parent-child chunks...")
# Generate parent-child hierarchy
result = generate_parent_child_chunks(
asr_data, cut_data, yolo_data, ocr_data, parent_chunk_size
asr_data, cut_data, yolo_data, ocr_data, scene_data, parent_chunk_size
)
result["metadata"] = metadata
result["parent_chunk_size"] = parent_chunk_size
result["video_metadata"] = extract_video_metadata(video_path)
result["processing"] = {
"method": "local_aggregation",
"cloud_api_used": False,
"parent_chunk_size": parent_chunk_size,
}
with open(output_path, "w") as f:
json.dump(result, f, indent=2, ensure_ascii=False)
if publisher:
stats = result["stats"]
publisher.complete(
"story",
f"{stats['total_parent_chunks']} parents, {stats['total_child_chunks']} children",
f"{result['stats']['total_parent_chunks']} parent, {result['stats']['total_child_chunks']} child chunks (LOCAL)",
)
return result
@@ -322,7 +302,7 @@ def run_story(
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Video Story Generator - Parent-Child Chunks"
description="Story Processor - Parent-Child Chunk Hierarchy (LOCAL ONLY)"
)
parser.add_argument("video_path", help="Path to video file")
parser.add_argument("output_path", help="Output JSON path")
@@ -331,7 +311,7 @@ if __name__ == "__main__":
"--parent-chunk-size",
type=int,
default=5,
help="Number of child chunks per parent chunk",
help="Number of child chunks per parent",
)
args = parser.parse_args()
@@ -340,6 +320,6 @@ if __name__ == "__main__":
args.video_path, args.output_path, args.uuid, args.parent_chunk_size
)
print(
f"Story generated: {result['stats']['total_parent_chunks']} parent chunks, "
f"{result['stats']['total_child_chunks']} child chunks"
f"Story generated: {result['stats']['total_parent_chunks']} parent, "
f"{result['stats']['total_child_chunks']} child chunks (LOCAL)"
)