Files
markbase/filetree/src/convert.rs
2026-05-30 14:08:55 +08:00

244 lines
7.2 KiB
Rust

use anyhow::{Context, Result};
use std::path::{Path, PathBuf};
use std::process::Command;
const CACHE_DIR: &str = "data/cache";
// Phase 1: built-in macOS tools (no installation)
const TEXTUTIL_FORMATS: &[&str] = &["docx", "doc", "rtf"];
const APPLE_FORMATS: &[&str] = &["pages", "key", "numbers"];
// Phase 2: soffice/qlmanage fallback
const SOFFICE_FORMATS: &[&str] = &["pptx", "ppt", "xlsx", "xls", "odt", "epub"];
pub fn is_document_ext(ext: &str) -> bool {
TEXTUTIL_FORMATS.contains(&ext)
|| APPLE_FORMATS.contains(&ext)
|| SOFFICE_FORMATS.contains(&ext)
}
pub fn is_textutil_ext(ext: &str) -> bool {
TEXTUTIL_FORMATS.contains(&ext)
}
pub fn is_apple_format_ext(ext: &str) -> bool {
APPLE_FORMATS.contains(&ext)
}
pub fn get_cached_preview(file_uuid: &str, ext: &str) -> Option<(PathBuf, &'static str)> {
if TEXTUTIL_FORMATS.contains(&ext) {
get_cached_txt(file_uuid).map(|p| (p, "text/plain; charset=utf-8"))
} else if APPLE_FORMATS.contains(&ext) {
get_cached_jpg(file_uuid).map(|p| (p, "image/jpeg"))
} else {
get_cached_pdf(file_uuid).map(|p| (p, "application/pdf"))
}
}
pub fn get_cached_txt(file_uuid: &str) -> Option<PathBuf> {
let p = Path::new(CACHE_DIR).join(format!("{}.txt", file_uuid));
p.exists().then_some(p)
}
pub fn get_cached_jpg(file_uuid: &str) -> Option<PathBuf> {
let p = Path::new(CACHE_DIR).join(format!("{}.jpg", file_uuid));
p.exists().then_some(p)
}
pub fn get_cached_pdf(file_uuid: &str) -> Option<PathBuf> {
let p = Path::new(CACHE_DIR).join(format!("{}.pdf", file_uuid));
p.exists().then_some(p)
}
pub fn get_cached_png(file_uuid: &str) -> Option<PathBuf> {
let p = Path::new(CACHE_DIR).join(format!("{}.png", file_uuid));
p.exists().then_some(p)
}
pub fn convert_document(input_path: &Path, file_uuid: &str) -> Result<(PathBuf, &'static str)> {
let ext = input_path
.extension()
.and_then(|e| e.to_str())
.unwrap_or("")
.to_lowercase();
// Phase 1: built-in tools (fast, no installation)
if TEXTUTIL_FORMATS.contains(&ext.as_str()) {
let p = textutil_to_txt(input_path, file_uuid)?;
return Ok((p, "text/plain; charset=utf-8"));
}
if APPLE_FORMATS.contains(&ext.as_str()) {
match unzip_preview_jpg(input_path, file_uuid) {
Ok(p) => return Ok((p, "image/jpeg")),
Err(e) => eprintln!("[markbase] unzip preview failed for {}: {}", file_uuid, e),
}
// Fall back to qlmanage PNG
let p = qlmanage_to_png(input_path, file_uuid, 2048)?;
return Ok((p, "image/png"));
}
// Phase 2: soffice for Office formats
if SOFFICE_FORMATS.contains(&ext.as_str()) {
match soffice_to_pdf(input_path, file_uuid) {
Ok(p) => return Ok((p, "application/pdf")),
Err(e) => {
eprintln!("[markbase] soffice failed for {}: {}", file_uuid, e);
}
}
}
// Final fallback: qlmanage PNG
let p = qlmanage_to_png(input_path, file_uuid, 2048)?;
Ok((p, "image/png"))
}
// ─── Phase 1: textutil (macOS built-in, .docx/.doc/.rtf → .txt) ───
fn textutil_to_txt(input_path: &Path, file_uuid: &str) -> Result<PathBuf> {
let cache_dir = Path::new(CACHE_DIR);
std::fs::create_dir_all(cache_dir)?;
let output = cache_dir.join(format!("{}.txt", file_uuid));
if output.exists() {
return Ok(output);
}
let out = Command::new("textutil")
.args(["-convert", "txt", "-output"])
.arg(&output)
.arg(input_path)
.output()
.context("Failed to run textutil")?;
if !out.status.success() {
anyhow::bail!("textutil: {}", String::from_utf8_lossy(&out.stderr).trim());
}
if output.exists() {
Ok(output)
} else {
anyhow::bail!("textutil did not produce {}", output.display())
}
}
// ─── Phase 1: unzip preview.jpg from iWork packages ───
fn unzip_preview_jpg(input_path: &Path, file_uuid: &str) -> Result<PathBuf> {
let cache_dir = Path::new(CACHE_DIR);
std::fs::create_dir_all(cache_dir)?;
let output = cache_dir.join(format!("{}.jpg", file_uuid));
if output.exists() {
return Ok(output);
}
let tmp = cache_dir.join(format!("_tmp_{}", file_uuid));
let _ = std::fs::remove_dir_all(&tmp);
let out = Command::new("unzip")
.args(["-o", "-d"])
.arg(&tmp)
.arg(input_path)
.output()
.context("Failed to unzip iWork package")?;
if !out.status.success() {
anyhow::bail!("unzip: {}", String::from_utf8_lossy(&out.stderr).trim());
}
// Look for preview.jpg, preview.pdf, or quicklook/thumbnail.jpg
for name in &[
"preview.jpg",
"preview.png",
"preview.pdf",
"preview-web.jpg",
] {
let src = tmp.join(name);
if src.exists() {
std::fs::copy(&src, &output)?;
let _ = std::fs::remove_dir_all(&tmp);
return Ok(output);
}
}
let _ = std::fs::remove_dir_all(&tmp);
anyhow::bail!("no preview found in iWork package")
}
// ─── Phase 2: soffice (LibreOffice, multi-page PDF) ───
fn soffice_to_pdf(input_path: &Path, file_uuid: &str) -> Result<PathBuf> {
let cache_dir = Path::new(CACHE_DIR);
std::fs::create_dir_all(cache_dir)?;
let output = cache_dir.join(format!("{}.pdf", file_uuid));
if output.exists() {
return Ok(output);
}
let out = Command::new("soffice")
.args(["--headless", "--convert-to", "pdf", "--outdir"])
.arg(cache_dir)
.arg(input_path)
.output()
.context("Failed to run soffice")?;
if !out.status.success() {
anyhow::bail!("soffice: {}", String::from_utf8_lossy(&out.stderr).trim());
}
let basename = input_path
.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("unknown");
let generated = cache_dir.join(format!("{}.pdf", basename));
if generated.exists() && generated != output {
std::fs::rename(&generated, &output)?;
}
if output.exists() {
Ok(output)
} else {
anyhow::bail!("soffice did not produce {}", output.display())
}
}
// ─── Phase 2: qlmanage (Apple QuickLook, PNG thumbnail) ───
fn qlmanage_to_png(input_path: &Path, file_uuid: &str, size: u32) -> Result<PathBuf> {
let cache_dir = Path::new(CACHE_DIR);
std::fs::create_dir_all(cache_dir)?;
let output = cache_dir.join(format!("{}.png", file_uuid));
if output.exists() {
return Ok(output);
}
let out = Command::new("qlmanage")
.args(["-t", "-s", &size.to_string(), "-o"])
.arg(cache_dir)
.arg(input_path)
.output()
.context("Failed to run qlmanage")?;
if !out.status.success() {
anyhow::bail!("qlmanage: {}", String::from_utf8_lossy(&out.stderr).trim());
}
let filename = input_path
.file_name()
.and_then(|s| s.to_str())
.unwrap_or("unknown");
let generated = cache_dir.join(format!("{}.png", filename));
if generated.exists() && generated != output {
std::fs::rename(&generated, &output)?;
}
if output.exists() {
Ok(output)
} else {
anyhow::bail!("qlmanage did not produce {}", output.display())
}
}