test: add unified probe unit tests (8 Rust + 6 Python), fix pre-existing test compilation errors

This commit is contained in:
Accusys
2026-05-15 14:58:44 +08:00
parent f66557f898
commit 0e73d2a2ce
4 changed files with 169 additions and 3 deletions

View File

@@ -0,0 +1,92 @@
#!/opt/homebrew/bin/python3.11
"""Unit tests for probe_file.py"""
import sys
import json
import os
import unittest
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
import probe_file # noqa: E402
class TestProbePDF(unittest.TestCase):
def setUp(self):
self.path = "/tmp/_test_probe.pdf"
if not os.path.exists(self.path):
from PyPDF2 import PdfWriter
w = PdfWriter()
w.add_blank_page(210, 297)
with open(self.path, 'wb') as f:
w.write(f)
def test_pdf_has_pages(self):
result = json.loads(probe_file.probe(self.path))
self.assertIn("document", result)
self.assertGreater(result["document"]["pages"], 0)
def test_pdf_has_streams(self):
result = json.loads(probe_file.probe(self.path))
self.assertEqual(result["streams"], [])
class TestProbeDOCX(unittest.TestCase):
def setUp(self):
self.path = "/tmp/_test_probe.docx"
if not os.path.exists(self.path):
from docx import Document
d = Document()
d.add_paragraph("Test paragraph.")
d.save(self.path)
def test_docx_has_paragraphs(self):
result = json.loads(probe_file.probe(self.path))
self.assertIn("document", result)
self.assertGreater(result["document"]["paragraphs"], 0)
class TestProbeXLSX(unittest.TestCase):
def setUp(self):
self.path = "/tmp/_test_probe.xlsx"
if not os.path.exists(self.path):
import openpyxl
wb = openpyxl.Workbook()
ws = wb.active
ws.title = "Sheet1"
ws.append(["A", 1])
wb.save(self.path)
def test_xlsx_has_sheets(self):
result = json.loads(probe_file.probe(self.path))
self.assertIn("spreadsheet", result)
self.assertGreater(result["spreadsheet"]["sheet_count"], 0)
class TestProbePPTX(unittest.TestCase):
def setUp(self):
self.path = "/tmp/_test_probe.pptx"
if not os.path.exists(self.path):
from pptx import Presentation
prs = Presentation()
slide = prs.slides.add_slide(prs.slide_layouts[0])
slide.shapes.title.text = "Test"
prs.save(self.path)
def test_pptx_has_slides(self):
result = json.loads(probe_file.probe(self.path))
self.assertIn("presentation", result)
self.assertGreater(result["presentation"]["slide_count"], 0)
class TestProbeUnknown(unittest.TestCase):
def test_unknown_extension(self):
path = "/tmp/_test_probe.xyz"
with open(path, 'w') as f:
f.write("test")
result = json.loads(probe_file.probe(path))
os.remove(path)
self.assertEqual(result["streams"], [])
if __name__ == "__main__":
unittest.main()

View File

@@ -821,6 +821,8 @@ mod tests {
ocr_text: None,
has_face: None,
speaker_id: None,
chunk_type: None,
co_appears_with_trace_id: None,
min_confidence: Some(0.8),
min_unique_classes: Some(3),
min_spatial_density: Some(0.5),

View File

@@ -4912,16 +4912,19 @@ mod tests {
started_at: Some(
chrono::DateTime::parse_from_rfc3339("2024-01-01T10:00:00Z")
.unwrap()
.with_timezone(&chrono::Utc),
.with_timezone(&chrono::Utc)
.to_rfc3339(),
),
updated_at: Some(
chrono::DateTime::parse_from_rfc3339("2024-01-01T10:05:00Z")
.unwrap()
.with_timezone(&chrono::Utc),
.with_timezone(&chrono::Utc)
.to_rfc3339(),
),
created_at: chrono::DateTime::parse_from_rfc3339("2024-01-01T09:55:00Z")
.unwrap()
.into(),
.with_timezone(&chrono::Utc)
.to_rfc3339(),
processors: vec!["asr".to_string(), "cut".to_string()],
completed_processors: vec!["asr".to_string()],
failed_processors: vec![],

View File

@@ -1,6 +1,75 @@
use std::path::Path;
use std::time::SystemTime;
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
#[test]
fn test_detect_category_video() {
assert_eq!(detect_category(Path::new("video.mp4")), FileCategory::Video);
assert_eq!(detect_category(Path::new("video.mov")), FileCategory::Video);
assert_eq!(detect_category(Path::new("video.mkv")), FileCategory::Video);
assert_eq!(detect_category(Path::new("video.avi")), FileCategory::Video);
}
#[test]
fn test_detect_category_image() {
assert_eq!(detect_category(Path::new("photo.jpg")), FileCategory::Image);
assert_eq!(detect_category(Path::new("photo.jpeg")), FileCategory::Image);
assert_eq!(detect_category(Path::new("photo.png")), FileCategory::Image);
assert_eq!(detect_category(Path::new("photo.svg")), FileCategory::Image);
assert_eq!(detect_category(Path::new("photo.webp")), FileCategory::Image);
}
#[test]
fn test_detect_category_document() {
assert_eq!(detect_category(Path::new("doc.pdf")), FileCategory::Document);
assert_eq!(detect_category(Path::new("doc.docx")), FileCategory::Document);
assert_eq!(detect_category(Path::new("doc.pages")), FileCategory::Document);
assert_eq!(detect_category(Path::new("doc.txt")), FileCategory::Document);
}
#[test]
fn test_detect_category_spreadsheet() {
assert_eq!(detect_category(Path::new("data.xlsx")), FileCategory::Spreadsheet);
assert_eq!(detect_category(Path::new("data.csv")), FileCategory::Spreadsheet);
assert_eq!(detect_category(Path::new("data.numbers")), FileCategory::Spreadsheet);
}
#[test]
fn test_detect_category_presentation() {
assert_eq!(detect_category(Path::new("deck.pptx")), FileCategory::Presentation);
assert_eq!(detect_category(Path::new("deck.key")), FileCategory::Presentation);
}
#[test]
fn test_detect_category_archive() {
assert_eq!(detect_category(Path::new("files.zip")), FileCategory::Archive);
assert_eq!(detect_category(Path::new("files.tar.gz")), FileCategory::Archive);
}
#[test]
fn test_detect_category_unknown() {
assert_eq!(detect_category(Path::new("file.xyz")), FileCategory::Unknown);
assert_eq!(detect_category(Path::new("file")), FileCategory::Unknown);
}
#[test]
fn test_base_format_info() {
// Create a temp file and verify base_format_info returns correct fields
let tmp = std::env::temp_dir().join("_test_unified_probe.txt");
fs::write(&tmp, b"hello probe").unwrap();
let info = base_format_info(&tmp);
assert_eq!(info["file_type"], "document");
assert_eq!(info["format_name"], "txt");
assert!(!info["size"].as_str().unwrap_or("").is_empty());
assert!(!info["mtime"].as_str().unwrap_or("").is_empty());
let _ = fs::remove_file(&tmp);
}
}
/// File category derived from extension
#[derive(Debug, Clone, PartialEq)]
pub enum FileCategory {