llama.cpp verification source 2026-05-22
Some checks are pending
Copilot Setup Steps / copilot-setup-steps (push) Waiting to run
Check Pre-Tokenizer Hashes / pre-tokenizer-hashes (push) Waiting to run
Python check requirements.txt / check-requirements (push) Waiting to run
Python Type-Check / python type-check (push) Waiting to run
Update Operations Documentation / update-ops-docs (push) Waiting to run

This commit is contained in:
2026-05-22 16:44:08 +08:00
commit 8e5a449007
2740 changed files with 1155720 additions and 0 deletions

View File

@@ -0,0 +1 @@
0xffff

View File

@@ -0,0 +1,49 @@
#!/bin/sh
#
# Basedir on device
basedir=/data/local/tmp/llama.cpp
branch=.
[ "$B" != "" ] && branch=$B
adbserial=
[ "$S" != "" ] && adbserial="-s $S"
adbhost=
[ "$H" != "" ] && adbhost="-H $H"
model="Llama-3.2-3B-Instruct-Q4_0.gguf"
[ "$M" != "" ] && model="$M"
device="HTP0"
[ "$D" != "" ] && device="$D"
verbose=
[ "$V" != "" ] && verbose="GGML_HEXAGON_VERBOSE=$V" cli_opts="$cli_opts -v"
profile=
[ "$PROF" != "" ] && profile="GGML_HEXAGON_PROFILE=$PROF" cli_opts="$cli_opts -v"
opmask=
[ "$OPSTAGE" != "" ] && opmask="GGML_HEXAGON_OPSTAGE=$OPSTAGE"
nhvx=
[ "$NHVX" != "" ] && nhvx="GGML_HEXAGON_NHVX=$NHVX"
ndev=
[ "$NDEV" != "" ] && ndev="GGML_HEXAGON_NDEV=$NDEV"
hb=
[ "$HB" != "" ] && hb="GGML_HEXAGON_HOSTBUF=$HB"
set -x
adb $adbserial $adbhost shell " \
cd $basedir; \
LD_LIBRARY_PATH=$basedir/$branch/lib \
ADSP_LIBRARY_PATH=$basedir/$branch/lib \
$ndev $nhvx $opmask $verbose $profile $hb ./$branch/bin/llama-bench --device $device --mmap 0 -m $basedir/../gguf/$model \
--poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 \
--ubatch-size 256 -fa 1 -ngl 99 $cli_opts $@ \
"

View File

@@ -0,0 +1,78 @@
#!/bin/sh
#
# Basedir on device
basedir=/data/local/tmp/llama.cpp
cli_opts=
branch=.
[ "$B" != "" ] && branch=$B
adbserial=
[ "$S" != "" ] && adbserial="-s $S"
adbhost=
[ "$H" != "" ] && adbhost="-H $H"
model="Llama-3.2-3B-Instruct-Q4_0.gguf"
[ "$M" != "" ] && model="$M"
device="HTP0"
[ "$D" != "" ] && device="$D"
verbose=
[ "$V" != "" ] && verbose="GGML_HEXAGON_VERBOSE=$V" cli_opts="$cli_opts -v"
sched=
[ "$SCHED" != "" ] && sched="GGML_SCHED_DEBUG=2" cli_opts="$cli_opts -v"
profile=
[ "$PROF" != "" ] && profile="GGML_HEXAGON_PROFILE=$PROF" cli_opts="$cli_opts -v"
opmask=
[ "$OPSTAGE" != "" ] && opmask="GGML_HEXAGON_OPSTAGE=$OPSTAGE"
nhvx=
[ "$NHVX" != "" ] && nhvx="GGML_HEXAGON_NHVX=$NHVX"
hmx=
[ "$HMX" != "" ] && hmx="GGML_HEXAGON_USE_HMX=$HMX"
ndev=
[ "$NDEV" != "" ] && ndev="GGML_HEXAGON_NDEV=$NDEV"
hb=
[ "$HB" != "" ] && hb="GGML_HEXAGON_HOSTBUF=$HB"
opbatch=
[ "$OB" != "" ] && opbatch="GGML_HEXAGON_OPBATCH=$OB"
opqueue=
[ "$OQ" != "" ] && opqueue="GGML_HEXAGON_OPQUEUE=$OQ"
opflt=
[ "$OF" != "" ] && opflt="GGML_HEXAGON_OPFILTER=$OF"
vmem=
[ "$VM" != "" ] && opflt="GGML_HEXAGON_VMEM=$VM"
mbuf=
[ "$MB" != "" ] && opflt="GGML_HEXAGON_MBUF=$MB"
vmem=
[ "$VM" != "" ] && vmem="GGML_HEXAGON_VMEM=$VM"
mbuf=
[ "$MB" != "" ] && mbuf="GGML_HEXAGON_MBUF=$MB"
set -x
adb $adbserial $adbhost shell " \
cd $basedir; ulimit -c unlimited; \
LD_LIBRARY_PATH=$basedir/$branch/lib \
ADSP_LIBRARY_PATH=$basedir/$branch/lib \
$verbose $sched $opmask $profile $nhvx $hmx $ndev $hb $opbatch $opqueue $opflt $vmem $mbuf \
./$branch/bin/llama-cli --no-mmap -m $basedir/../gguf/$model \
--poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 \
--ctx-size 8192 --ubatch-size 256 -fa on \
-ngl 99 --device $device $cli_opts $@ \
"

View File

@@ -0,0 +1,74 @@
#!/bin/sh
#
# Basedir on device
basedir=/data/local/tmp/llama.cpp
cli_opts=
branch=.
[ "$B" != "" ] && branch=$B
adbserial=
[ "$S" != "" ] && adbserial="-s $S"
adbhost=
[ "$H" != "" ] && adbhost="-H $H"
model="Llama-3.2-3B-Instruct-Q4_0.gguf"
[ "$M" != "" ] && model="$M"
device="HTP0"
[ "$D" != "" ] && device="$D"
verbose=
[ "$V" != "" ] && verbose="GGML_HEXAGON_VERBOSE=$V" cli_opts="$cli_opts -v"
sched=
[ "$SCHED" != "" ] && sched="GGML_SCHED_DEBUG=2" cli_opts="$cli_opts -v"
profile=
[ "$PROF" != "" ] && profile="GGML_HEXAGON_PROFILE=$PROF" cli_opts="$cli_opts -v"
opmask=
[ "$OPSTAGE" != "" ] && opmask="GGML_HEXAGON_OPSTAGE=$OPSTAGE"
nhvx=
[ "$NHVX" != "" ] && nhvx="GGML_HEXAGON_NHVX=$NHVX"
hmx=
[ "$HMX" != "" ] && hmx="GGML_HEXAGON_USE_HMX=$HMX"
ndev=
[ "$NDEV" != "" ] && ndev="GGML_HEXAGON_NDEV=$NDEV"
hb=
[ "$HB" != "" ] && hb="GGML_HEXAGON_HOSTBUF=$HB"
opbatch=
[ "$OB" != "" ] && opbatch="GGML_HEXAGON_OPBATCH=$OB"
opqueue=
[ "$OQ" != "" ] && opqueue="GGML_HEXAGON_OPQUEUE=$OQ"
opflt=
[ "$OF" != "" ] && opflt="GGML_HEXAGON_OPFILTER=$OF"
vmem=
[ "$VM" != "" ] && vmem="GGML_HEXAGON_VMEM=$VM"
mbuf=
[ "$MB" != "" ] && mbuf="GGML_HEXAGON_MBUF=$MB"
set -x
adb $adbserial $adbhost shell " \
cd $basedir; ulimit -c unlimited; \
LD_LIBRARY_PATH=$basedir/$branch/lib \
ADSP_LIBRARY_PATH=$basedir/$branch/lib \
$verbose $sched $opmask $profile $nhvx $hmx $ndev $hb $opbatch $opqueue $opflt $vmem $mbuf \
./$branch/bin/llama-completion --no-mmap -m $basedir/../gguf/$model \
--poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 \
--ctx-size 8192 --ubatch-size 256 -fa on \
-ngl 99 -no-cnv --device $device $cli_opts $@ \
"

View File

@@ -0,0 +1,71 @@
#!/bin/sh
#
# Basedir on device
basedir=/data/local/tmp/llama.cpp
cli_opts=
branch=.
[ "$B" != "" ] && branch=$B
adbserial=
[ "$S" != "" ] && adbserial="-s $S"
adbhost=
[ "$H" != "" ] && adbhost="-H $H"
model="gemma-3-4b-it-Q4_0.gguf"
[ "$M" != "" ] && model="$M"
mmproj="mmproj-F16.gguf"
[ "$MMPROJ" != "" ] && mmproj="$MMPROJ"
image=
[ "$IMG" != "" ] && image="$IMG"
device="HTP0"
[ "$D" != "" ] && device="$D"
verbose=
[ "$V" != "" ] && verbose="GGML_HEXAGON_VERBOSE=$V"
experimental="GGML_HEXAGON_EXPERIMENTAL=1"
[ "$E" != "" ] && experimental="GGML_HEXAGON_EXPERIMENTAL=$E"
sched=
[ "$SCHED" != "" ] && sched="GGML_SCHED_DEBUG=2" cli_opts="$cli_opts -v"
profile=
[ "$PROF" != "" ] && profile="GGML_HEXAGON_PROFILE=$PROF"
opmask=
[ "$OPSTAGE" != "" ] && opmask="GGML_HEXAGON_OPSTAGE=$OPSTAGE"
nhvx=
[ "$NHVX" != "" ] && nhvx="GGML_HEXAGON_NHVX=$NHVX"
hmx=
[ "$HMX" != "" ] && hmx="GGML_HEXAGON_USE_HMX=$HMX"
ndev=
[ "$NDEV" != "" ] && ndev="GGML_HEXAGON_NDEV=$NDEV"
# MTMD backend device for vision model (defaults to CPU if not set)
mtmd_backend=
[ "$MTMD_DEVICE" != "" ] && mtmd_backend="MTMD_BACKEND_DEVICE=$MTMD_DEVICE"
set -x
adb $adbserial $adbhost shell " \
cd $basedir; ulimit -c unlimited; \
LD_LIBRARY_PATH=$basedir/$branch/lib \
ADSP_LIBRARY_PATH=$basedir/$branch/lib \
$verbose $experimental $sched $opmask $profile $hmx $nhvx $ndev $mtmd_backend \
./$branch/bin/llama-mtmd-cli --no-mmap -m $basedir/../gguf/$model \
--mmproj $basedir/../gguf/$mmproj \
--image $basedir/../gguf/$image \
--poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 \
--ctx-size 8192 --ubatch-size 256 -fa on \
-ngl 99 --device $device -v $cli_opts $@ \
"

View File

@@ -0,0 +1,54 @@
#!/bin/sh
#
# Basedir on device
basedir=/data/local/tmp/llama.cpp
cli_opts=
branch=.
[ "$B" != "" ] && branch=$B
adbserial=
[ "$S" != "" ] && adbserial="-s $S"
adbhost=
[ "$H" != "" ] && adbhost="-H $H"
device="HTP0"
[ "$D" != "" ] && device="$D"
verbose=
[ "$V" != "" ] && verbose="GGML_HEXAGON_VERBOSE=$V"
sched=
[ "$SCHED" != "" ] && sched="GGML_SCHED_DEBUG=2" cli_opts="$cli_opts -v"
profile=
[ "$PROF" != "" ] && profile="GGML_HEXAGON_PROFILE=$PROF"
opmask=
[ "$OPSTAGE" != "" ] && opmask="GGML_HEXAGON_OPSTAGE=$OPSTAGE"
nhvx=
[ "$NHVX" != "" ] && nhvx="GGML_HEXAGON_NHVX=$NHVX"
hmx=
[ "$HMX" != "" ] && hmx="GGML_HEXAGON_USE_HMX=$HMX"
ndev=
[ "$NDEV" != "" ] && ndev="GGML_HEXAGON_NDEV=$NDEV"
hb=
[ "$HB" != "" ] && hb="GGML_HEXAGON_HOSTBUF=$HB"
set -x
tool=$1; shift
adb $adbserial $adbhost shell " \
cd $basedir; ulimit -c unlimited; \
LD_LIBRARY_PATH=$basedir/$branch/lib \
ADSP_LIBRARY_PATH=$basedir/$branch/lib \
$verbose $sched $opmask $profile $nhvx $hmx $ndev $hb ./$branch/bin/$tool $@ \
"

View File

@@ -0,0 +1,188 @@
#!/usr/bin/env python3
import sys
import os
import re
import argparse
import statistics
import logging
from collections import defaultdict
# Mapping of cli-friendly names to (internal_data_key, Display Header, numeric_sort_key)
COL_MAP = {
"op": ("op", "Op", "op"),
"dims": ("dims", "Dims", "dims"),
"dtypes": ("dtypes", "DTypes", "dtypes"),
"count": ("count", "Count", "_sort_count"),
"max-usec": ("max_usec", "Max usec", "_sort_max_usec"),
"avg-usec": ("avg_usec", "Avg usec", "_sort_avg_usec"),
"max-cycles": ("max_cycles", "Max Cycles", "_sort_max_cycles"),
"avg-cycles": ("avg_cycles", "Avg Cycles", "_sort_avg_cycles"),
"max-pmu": ("max_pmu", "Max PMU", "_sort_max_pmu"),
"avg-pmu": ("avg_pmu", "Avg PMU", "_sort_avg_pmu"),
}
op_pattern = re.compile(
r"profile-op\s+(?P<op_name>[A-Z_0-9]+):\s+.*?\s+:\s+(?P<dims>[\d:x\s\->!]+)\s+:\s+(?P<types>[a-z\d_\s\->x]+)\s+:\s+.*?\s+usec\s+(?P<usec>\d+)\s+cycles\s+(?P<cycles>\d+)(?:\s+pmu\s+\[(?P<pmu>[\d,\s]+)\])?"
)
logger = logging.getLogger("ggml-hexagon-profile")
def parse_log(file_path, pmu_index=None):
try:
if file_path != "-":
f = open(file_path, 'r', encoding='utf-8', errors='ignore')
else:
f = os.fdopen(0, 'r', encoding='utf-8', errors='ignore')
except FileNotFoundError:
logger.error(f"file '{file_path}' not found.")
sys.exit(1)
all_ops = []
for line in f:
match = op_pattern.search(line)
if not match: continue
pmu_raw = match.group('pmu')
pmu_val = None
if pmu_raw and pmu_index is not None:
try:
pmu_list = [int(x.strip()) for x in pmu_raw.split(',')]
if len(pmu_list) > pmu_index:
pmu_val = pmu_list[pmu_index]
except (ValueError, IndexError):
pmu_val = None
all_ops.append({
'name': match.group('op_name'),
'dims': match.group('dims').strip(),
'types': match.group('types').strip(),
'usec': int(match.group('usec')),
'cycles': int(match.group('cycles')),
'pmu_val': pmu_val
})
f.close()
return all_ops
def generate_report(ops, top_n, width_overrides, sort_col, pmu_name=None):
if not ops:
logger.info("No valid records found.")
return
grouped = defaultdict(list)
for op in ops:
key = (op['name'], op['dims'], op['types'])
grouped[key].append(op)
group_stats = []
for (name, dims, types), group_ops in grouped.items():
usecs = [o['usec'] for o in group_ops]
cycles = [o['cycles'] for o in group_ops]
pmu_vals = [o['pmu_val'] for o in group_ops if o['pmu_val'] is not None]
group_stats.append({
'op': name,
'dims': dims,
'dtypes': types,
'count': str(len(group_ops)),
'max_usec': str(max(usecs)),
'avg_usec': f"{statistics.mean(usecs):.2f}",
'max_cycles': str(max(cycles)),
'avg_cycles': f"{statistics.mean(cycles):.2f}",
'max_pmu': str(max(pmu_vals)) if pmu_vals else "0",
'avg_pmu': f"{statistics.mean(pmu_vals):.2f}" if pmu_vals else "0.00",
# Numeric values for accurate sorting
'_sort_count': len(group_ops),
'_sort_max_usec': max(usecs),
'_sort_avg_usec': statistics.mean(usecs),
'_sort_max_cycles': max(cycles),
'_sort_avg_cycles': statistics.mean(cycles),
'_sort_max_pmu': max(pmu_vals) if pmu_vals else 0,
'_sort_avg_pmu': statistics.mean(pmu_vals) if pmu_vals else 0
})
# Sorting logic
actual_sort_key = COL_MAP[sort_col][2]
# We sort numeric fields descending, strings (op/dims) ascending
is_numeric = actual_sort_key.startswith("_") or actual_sort_key == "count"
sorted_groups = sorted(group_stats, key=lambda x: x[actual_sort_key], reverse=is_numeric)[:top_n]
# Define initial column order
active_cols = ["op", "dims", "dtypes"]
if pmu_name:
active_cols += ["max-pmu", "avg-pmu"]
active_cols += ["max-usec", "avg-usec", "max-cycles", "avg-cycles", "count"]
final_headers, final_keys, final_widths = [], [], []
for col_name in active_cols:
data_key, header_text, _ = COL_MAP[col_name]
if "pmu" in col_name and pmu_name:
header_text = header_text.replace("PMU", pmu_name)
natural_width = max([len(row[data_key]) for row in sorted_groups] + [len(header_text)])
target_width = width_overrides.get(col_name, natural_width)
if target_width == 0:
continue
final_headers.append(header_text)
final_keys.append(data_key)
final_widths.append(target_width)
# Print Report
logger.info(f"\n# Profile Report (Top {top_n} Ops sorted by {sort_col})\n")
header_line = "| " + " | ".join(f"{h:<{final_widths[i]}}" for i, h in enumerate(final_headers)) + " |"
sep_line = "| " + " | ".join("-" * final_widths[i] for i in range(len(final_headers))) + " |"
logger.info(header_line)
logger.info(sep_line)
for group in sorted_groups:
row_vals = []
for i, key in enumerate(final_keys):
val = group[key]
if len(val) > final_widths[i]:
val = val[:final_widths[i] - 3] + "..."
row_vals.append(f"{val:<{final_widths[i]}}")
logger.info("| " + " | ".join(row_vals) + " |")
def main():
parser = argparse.ArgumentParser(description="Post-process Op profile info.")
parser.add_argument("logfile")
parser.add_argument("-n", "--top", type=int, default=100)
parser.add_argument("--sort", type=str, default="max-usec", choices=list(COL_MAP.keys()))
parser.add_argument("--pmu-index", type=int)
parser.add_argument("--pmu-name", type=str)
parser.add_argument("--width", action='append', default=['dims:40'], help="Override column width, e.g. --width dims:50")
args = parser.parse_args()
logging.basicConfig(level=logging.INFO, format='%(message)s')
# Sort validation: can't sort by PMU if index isn't provided
if "pmu" in args.sort and args.pmu_index is None:
logger.error(f"Cannot sort by '{args.sort}' without --pmu-index.")
sys.exit(1)
overrides = {}
if args.width:
for w in args.width:
try:
name, val = w.split(':')
overrides[name.lower()] = int(val)
except ValueError:
logger.warning(f"Invalid width format '{w}'")
final_pmu_name = (args.pmu_name or f"#{args.pmu_index}") if args.pmu_index is not None else None
ops = parse_log(args.logfile, pmu_index=args.pmu_index)
generate_report(ops, args.top, overrides, args.sort, pmu_name=final_pmu_name)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,22 @@
Appium-Python-Client==5.2.4
attrs==25.4.0
certifi==2025.10.5
exceptiongroup==1.3.0
h11==0.16.0
idna==3.11
iniconfig==2.1.0
outcome==1.3.0.post0
packaging==25.0
pluggy==1.6.0
PySocks==1.7.1
pytest==8.4.2
selenium==4.36.0
sniffio==1.3.1
sortedcontainers==2.4.0
tomli==2.3.0
trio==0.31.0
trio-websocket==0.12.2
typing_extensions==4.15.0
urllib3==2.5.0
websocket-client==1.9.0
wsproto==1.2.0

View File

@@ -0,0 +1,401 @@
"""Run llama.cpp Hexagon Android tests in a single QDC Appium job.
Bundles test scripts into one artifact and submits a single QDC job:
1. run_bench_tests_posix.py — llama-cli and llama-bench on CPU / GPU / NPU
(from scripts/snapdragon/qdc/)
Results are written to $GITHUB_STEP_SUMMARY when set (GitHub Actions).
Prerequisites:
pip install /path/to/qualcomm_device_cloud_sdk*.whl
Required environment variables:
QDC_API_KEY API key from QDC UI -> Users -> Settings -> API Keys
Usage:
python run_qdc_jobs.py \\
--pkg-dir pkg-snapdragon/llama.cpp \\
--model-url https://.../Llama-3.2-1B-Instruct-Q4_0.gguf \\
--device SM8750
"""
from __future__ import annotations
import argparse
import logging
import os
import re
import shutil
import sys
import tempfile
import time
import xml.etree.ElementTree as ET
from dataclasses import dataclass, field
from pathlib import Path
from qualcomm_device_cloud_sdk.api import qdc_api # ty: ignore[unresolved-import]
from qualcomm_device_cloud_sdk.logging import configure_logging # ty: ignore[unresolved-import]
from qualcomm_device_cloud_sdk.models import ArtifactType, JobMode, JobState, JobSubmissionParameter, JobType, TestFramework # ty: ignore[unresolved-import]
configure_logging(level=logging.INFO, handlers=[logging.StreamHandler()])
log = logging.getLogger(__name__)
POLL_INTERVAL = 30
JOB_TIMEOUT = 3600
LOG_UPLOAD_TIMEOUT = 600
CAPACITY_TIMEOUT = 1800
CAPACITY_POLL = 60
MAX_CONCURRENT_JOBS = 5
TERMINAL_STATES = {JobState.COMPLETED, JobState.CANCELED}
NON_TERMINAL_STATES = {JobState.DISPATCHED, JobState.RUNNING, JobState.SETUP, JobState.SUBMITTED}
_SCRIPTS_DIR = Path(__file__).parent
_TESTS_DIR = _SCRIPTS_DIR / "tests"
_RUN_BENCH = _TESTS_DIR / "run_bench_tests_posix.py"
_RUN_BACKEND_OPS = _TESTS_DIR / "run_backend_ops_posix.py"
_UTILS = _TESTS_DIR / "utils.py"
_CONFTEST = _TESTS_DIR / "conftest.py"
_REQUIREMENTS = _SCRIPTS_DIR / "requirements.txt"
_PYTEST_LINE_RE = re.compile(
r"(?:[\w/]+\.py::)?(?:\w+::)?([\w\[\].-]+)\s+(PASSED|FAILED|ERROR|SKIPPED)"
)
_EXCLUDED_LOGS = {"qdc_android_whole_host-000.log", "qdc_kernel_host-000.log"}
_NON_TERMINAL_STATE_VALUES = {s.value for s in NON_TERMINAL_STATES}
@dataclass
class JobResult:
passed: bool
tests: dict[str, bool] = field(default_factory=dict)
raw_logs: dict[str, str] = field(default_factory=dict)
failure_details: dict[str, str] = field(default_factory=dict)
def build_artifact_zip(
pkg_dir: Path,
stage_dir: Path,
*,
test_mode: str = "bench",
model_url: str | None = None,
) -> Path:
"""Bundle everything into a single QDC artifact zip.
Zip structure (extracted by QDC to /qdc/appium/ on the runner):
llama_cpp_bundle/ installed package (adb pushed to /data/local/tmp/)
tests/
utils.py shared helpers (paths, run_adb_command, …)
conftest.py shared pytest fixtures (driver)
test_bench_posix.py bench + cli tests (<<MODEL_URL>> substituted)
AND/OR
test_backend_ops_posix.py test-backend-ops -b HTP0
requirements.txt
"""
shutil.copytree(pkg_dir, stage_dir / "llama_cpp_bundle")
tests_dir = stage_dir / "tests"
tests_dir.mkdir()
shutil.copy(_UTILS, tests_dir / "utils.py")
shutil.copy(_CONFTEST, tests_dir / "conftest.py")
if test_mode in ("bench", "all"):
assert model_url is not None, "--model-url is required for bench/all test modes"
(tests_dir / "test_bench_posix.py").write_text(
_RUN_BENCH.read_text().replace("<<MODEL_URL>>", model_url)
)
if test_mode in ("backend-ops", "all"):
shutil.copy(_RUN_BACKEND_OPS, tests_dir / "test_backend_ops_posix.py")
shutil.copy(_REQUIREMENTS, stage_dir / "requirements.txt")
(stage_dir / "pytest.ini").write_text("[pytest]\naddopts = --junitxml=results.xml\n")
zip_base = str(stage_dir / "artifact")
shutil.make_archive(zip_base, "zip", stage_dir)
return Path(f"{zip_base}.zip")
def wait_for_job(client, job_id: str, timeout: int) -> str:
elapsed = 0
while elapsed < timeout:
raw = qdc_api.get_job_status(client, job_id)
try:
status = JobState(raw)
except ValueError:
status = raw
if status in TERMINAL_STATES:
return raw.lower()
log.info("Job %s: %s", job_id, raw)
time.sleep(POLL_INTERVAL)
elapsed += POLL_INTERVAL
raise TimeoutError(f"Job {job_id} did not finish within {timeout}s")
def wait_for_log_upload(client, job_id: str) -> None:
elapsed = 0
while elapsed <= LOG_UPLOAD_TIMEOUT:
status = (qdc_api.get_job_log_upload_status(client, job_id) or "").lower()
if status in {"completed", "failed"}:
return
log.info("Waiting for log upload (status=%s) ...", status)
time.sleep(POLL_INTERVAL)
elapsed += POLL_INTERVAL
log.warning("Timed out waiting for log upload after %ds", LOG_UPLOAD_TIMEOUT)
def wait_for_capacity(client, max_jobs: int = MAX_CONCURRENT_JOBS) -> None:
"""Block until the user's active (non-terminal) QDC job count is below max_jobs."""
elapsed = 0
while elapsed < CAPACITY_TIMEOUT:
jobs_page = qdc_api.get_jobs_list(client, page_number=0, page_size=50)
if jobs_page is None:
log.warning("Could not retrieve job list; proceeding without capacity check")
return
items = getattr(jobs_page, "data", []) or []
active = sum(1 for j in items if getattr(j, "state", None) in _NON_TERMINAL_STATE_VALUES)
if active < max_jobs:
log.info("Active QDC jobs: %d / %d — proceeding", active, max_jobs)
return
log.info("Active QDC jobs: %d / %d — waiting %ds ...", active, max_jobs, CAPACITY_POLL)
time.sleep(CAPACITY_POLL)
elapsed += CAPACITY_POLL
log.warning("Capacity wait timed out after %ds; proceeding anyway", CAPACITY_TIMEOUT)
def _parse_junit_xml(content: str) -> tuple[dict[str, bool], dict[str, str]]:
try:
root = ET.fromstring(content)
except ET.ParseError:
return {}, {}
results: dict[str, bool] = {}
failures: dict[str, str] = {}
for tc in root.iter("testcase"):
name = tc.get("name", "")
if classname := tc.get("classname", ""):
name = f"{classname}.{name}"
failure_el = tc.find("failure")
if failure_el is None:
failure_el = tc.find("error")
results[name] = failure_el is None
if failure_el is not None:
parts = [failure_el.get("message", ""), failure_el.text or ""]
failures[name] = "\n".join(p for p in parts if p).strip()
return results, failures
def _parse_pytest_output(content: str) -> dict[str, bool]:
results: dict[str, bool] = {}
for m in _PYTEST_LINE_RE.finditer(content):
results[m.group(1)] = m.group(2) == "PASSED"
return results
def fetch_logs_and_parse_tests(
client, job_id: str
) -> tuple[dict[str, bool], dict[str, str], dict[str, str]]:
"""Returns (test_results, raw_logs, failure_details)."""
log_files = qdc_api.get_job_log_files(client, job_id)
if not log_files:
log.warning("No log files returned for job %s", job_id)
return {}, {}, {}
test_results: dict[str, bool] = {}
pytest_fallback: dict[str, bool] = {}
raw_logs: dict[str, str] = {}
failure_details: dict[str, str] = {}
with tempfile.TemporaryDirectory() as tmpdir:
for lf in log_files:
log.info("Downloading log file: %s", lf.filename)
zip_path = os.path.join(tmpdir, "log.zip")
qdc_api.download_job_log_files(client, lf.filename, zip_path)
try:
shutil.unpack_archive(zip_path, tmpdir, "zip")
except Exception as e:
log.warning("Could not unpack %s as zip: %s", lf.filename, e)
for root_dir, _, files in os.walk(tmpdir):
for fname in sorted(files):
fpath = os.path.join(root_dir, fname)
content = Path(fpath).read_text(errors="replace")
if fname.endswith(".xml"):
results, failures = _parse_junit_xml(content)
test_results.update(results)
failure_details.update(failures)
elif fname.endswith(".log"):
if fname in _EXCLUDED_LOGS:
continue
log.info("--- %s ---", fname)
log.info("%s", content)
raw_logs[fname] = content
pytest_fallback.update(_parse_pytest_output(content))
return (test_results if test_results else pytest_fallback), raw_logs, failure_details
def write_summary(result: JobResult, title: str = "QDC Test Results") -> None:
summary_path = os.environ.get("GITHUB_STEP_SUMMARY")
if not summary_path:
return
icon = "" if result.passed else ""
lines = [
f"## {title}\n",
f"Overall: {icon} {'PASSED' if result.passed else 'FAILED'}\n",
]
reportable = {n: ok for n, ok in result.tests.items() if "test_install" not in n}
if reportable:
lines += ["| Test | Result |", "| ---- | ------ |"]
for name, ok in reportable.items():
lines.append(f"| `{name}` | {'' if ok else ''} |")
passed_n = sum(1 for v in reportable.values() if v)
failed_n = sum(1 for v in reportable.values() if not v)
lines += ["", f"**{passed_n} passed, {failed_n} failed**"]
else:
lines.append("_No per-test data available._")
failed_names = [n for n, ok in reportable.items() if not ok]
if failed_names:
lines += ["", "### Failures"]
for name in failed_names:
detail = result.failure_details.get(name)
if detail:
lines += [
f"<details><summary><code>{name}</code></summary>",
"",
"```",
detail,
"```",
"",
"</details>",
]
if result.raw_logs:
lines += ["", "### Raw Logs"]
for fname, content in sorted(result.raw_logs.items()):
lines += [
f"<details><summary>{fname}</summary>",
"",
"```",
content.rstrip(),
"```",
"",
"</details>",
]
with open(summary_path, "a") as f:
f.write("\n".join(lines) + "\n")
def parse_args() -> argparse.Namespace:
p = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
p.add_argument("--pkg-dir", required=True, type=Path,
help="Installed llama.cpp package directory (contains bin/ and lib/)")
p.add_argument("--model-url",
help="Direct URL to the GGUF model file (required for --test bench)")
p.add_argument("--device", required=True,
help="QDC chipset name, e.g. SM8750")
p.add_argument("--test", choices=["bench", "backend-ops", "all"], default="bench",
help="Test suite to run (default: bench)")
p.add_argument("--job-timeout", type=int, default=JOB_TIMEOUT, metavar="SECONDS",
help=f"Max seconds to wait for job completion (default: {JOB_TIMEOUT})")
args = p.parse_args()
if args.test in ("bench", "all") and not args.model_url:
p.error("--model-url is required when --test bench or --test all")
return args
def main() -> int:
args = parse_args()
api_key = os.environ.get("QDC_API_KEY")
if not api_key:
log.error("QDC_API_KEY environment variable must be set")
return 1
if not args.pkg_dir.is_dir():
log.error("--pkg-dir %s does not exist", args.pkg_dir)
return 1
client = qdc_api.get_public_api_client_using_api_key(
api_key_header=api_key,
app_name_header="llama-cpp-ci",
on_behalf_of_header="llama-cpp-ci",
client_type_header="Python",
)
target_id = qdc_api.get_target_id(client, args.device)
if target_id is None:
log.error("Could not find QDC target for device %r", args.device)
return 1
with tempfile.TemporaryDirectory() as tmpdir:
log.info("Building artifact ...")
zip_path = build_artifact_zip(
args.pkg_dir, Path(tmpdir),
test_mode=args.test, model_url=args.model_url,
)
log.info("Uploading artifact (%d MB) ...", zip_path.stat().st_size // 1_000_000)
artifact_id = qdc_api.upload_file(client, str(zip_path), ArtifactType.TESTSCRIPT)
if artifact_id is None:
log.error("Artifact upload failed")
return 1
wait_for_capacity(client)
job_id = qdc_api.submit_job(
public_api_client=client,
target_id=target_id,
job_name="llama.cpp Hexagon tests",
external_job_id=None,
job_type=JobType.AUTOMATED,
job_mode=JobMode.APPLICATION,
timeout=max(1, args.job_timeout // 60),
test_framework=TestFramework.APPIUM,
entry_script=None,
job_artifacts=[artifact_id],
monkey_events=None,
monkey_session_timeout=None,
job_parameters=[JobSubmissionParameter.WIFIENABLED],
)
if job_id is None:
log.error("Job submission failed")
return 1
log.info("Job submitted: %s (device=%s)", job_id, args.device)
try:
job_status = wait_for_job(client, job_id, timeout=args.job_timeout)
except TimeoutError as e:
log.error("%s", e)
write_summary(JobResult(passed=False, tests={}), title=f"QDC Job Timed Out ({args.device})")
return 1
log.info("Job %s finished: %s", job_id, job_status)
wait_for_log_upload(client, job_id)
tests, raw_logs, failure_details = fetch_logs_and_parse_tests(client, job_id)
passed = job_status == JobState.COMPLETED.value.lower()
if tests:
passed = passed and all(tests.values())
if not passed:
log.error("Job did not complete successfully or tests failed (status=%s)", job_status)
result = JobResult(passed=passed, tests=tests, raw_logs=raw_logs, failure_details=failure_details)
if args.test == "backend-ops":
title = f"Backend Ops — HTP0 ({args.device})"
elif args.test == "all":
title = f"QDC Tests ({args.device})"
else:
title = f"QDC Test Results ({args.device})"
write_summary(result, title=title)
return 0 if passed else 1
if __name__ == "__main__":
sys.exit(main())

View File

@@ -0,0 +1,20 @@
"""Shared pytest fixtures for QDC on-device test runners."""
import os
import pytest
from appium import webdriver
from utils import options, write_qdc_log
@pytest.fixture(scope="session", autouse=True)
def driver():
return webdriver.Remote(command_executor="http://127.0.0.1:4723/wd/hub", options=options)
def pytest_sessionfinish(session, exitstatus):
xml_path = getattr(session.config.option, "xmlpath", None) or "results.xml"
if os.path.exists(xml_path):
with open(xml_path) as f:
write_qdc_log("results.xml", f.read())

View File

@@ -0,0 +1,41 @@
"""
On-device test-backend-ops runner for llama.cpp (HTP0 backend).
Executed by QDC's Appium test framework on the QDC runner.
The runner has ADB access to the allocated device.
"""
import os
import sys
import pytest
from utils import BIN_PATH, CMD_PREFIX, push_bundle_if_needed, run_adb_command, write_qdc_log
@pytest.fixture(scope="session", autouse=True)
def install(driver):
push_bundle_if_needed(f"{BIN_PATH}/test-backend-ops")
@pytest.mark.parametrize("type_a", ["mxfp4", "fp16", "q4_0"])
def test_backend_ops_htp0(type_a):
cmd = f"{CMD_PREFIX} GGML_HEXAGON_HOSTBUF=0 GGML_HEXAGON_EXPERIMENTAL=1 {BIN_PATH}/test-backend-ops -b HTP0 -o MUL_MAT"
if type_a == "q4_0":
cmd += r' -p "^(?=.*type_a=q4_0)(?!.*type_b=f32,m=576,n=512,k=576).*$"'
else:
cmd += f" -p type_a={type_a}"
result = run_adb_command(
cmd,
check=False,
)
write_qdc_log(f"backend_ops_{type_a}.log", result.stdout or "")
assert result.returncode == 0, f"test-backend-ops type_a={type_a} failed (exit {result.returncode})"
if __name__ == "__main__":
ret = pytest.main(["-s", "--junitxml=results.xml", os.path.realpath(__file__)])
if os.path.exists("results.xml"):
with open("results.xml") as f:
write_qdc_log("results.xml", f.read())
sys.exit(ret)

View File

@@ -0,0 +1,76 @@
"""
On-device bench and completion test runner for llama.cpp (CPU, GPU, NPU backends).
Executed by QDC's Appium test framework on the QDC runner.
The runner has ADB access to the allocated device.
Placeholders replaced at artifact creation time by run_qdc_jobs.py:
<<MODEL_URL>> Direct URL to the GGUF model file (downloaded on-device via curl)
"""
import os
import subprocess
import sys
import pytest
from utils import BIN_PATH, CMD_PREFIX, push_bundle_if_needed, run_adb_command, write_qdc_log
MODEL_PATH = "/data/local/tmp/model.gguf"
PROMPT = "What is the capital of France?"
CLI_OPTS = "--batch-size 128 -n 128 -no-cnv --seed 42"
@pytest.fixture(scope="session", autouse=True)
def install(driver):
push_bundle_if_needed(f"{BIN_PATH}/llama-cli")
# Skip model download if already present
check = subprocess.run(
["adb", "shell", f"ls {MODEL_PATH}"],
text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
)
if check.returncode != 0:
run_adb_command(f'curl -L -J --output {MODEL_PATH} "<<MODEL_URL>>"')
@pytest.mark.parametrize("device,extra_flags", [
pytest.param("none", "-ctk q8_0 -ctv q8_0", id="cpu"),
pytest.param("GPUOpenCL", "", id="gpu"),
pytest.param("HTP0", "-ctk q8_0 -ctv q8_0", id="npu"),
])
def test_llama_completion(device, extra_flags):
result = run_adb_command(
f'{CMD_PREFIX} {BIN_PATH}/llama-completion'
f' -m {MODEL_PATH} --device {device} -ngl 99 -t 4 {CLI_OPTS} {extra_flags} -fa on'
f' -p "{PROMPT}"',
check=False,
)
write_qdc_log(f"llama_completion_{device}.log", result.stdout or "")
assert result.returncode == 0, f"llama-completion {device} failed (exit {result.returncode})"
_DEVICE_LOG_NAME = {"none": "cpu", "GPUOpenCL": "gpu", "HTP0": "htp"}
@pytest.mark.parametrize("device", [
pytest.param("none", id="cpu"),
pytest.param("GPUOpenCL", id="gpu"),
pytest.param("HTP0", id="npu"),
])
def test_llama_bench(device):
result = run_adb_command(
f"{CMD_PREFIX} {BIN_PATH}/llama-bench"
f" -m {MODEL_PATH} --device {device} -ngl 99 --batch-size 128 -t 4 -p 128 -n 32",
check=False,
)
write_qdc_log(f"llama_bench_{_DEVICE_LOG_NAME[device]}.log", result.stdout or "")
assert result.returncode == 0, f"llama-bench {device} failed (exit {result.returncode})"
if __name__ == "__main__":
ret = pytest.main(["-s", "--junitxml=results.xml", os.path.realpath(__file__)])
if os.path.exists("results.xml"):
with open("results.xml") as f:
write_qdc_log("results.xml", f.read())
sys.exit(ret)

View File

@@ -0,0 +1,93 @@
"""Shared helpers for QDC on-device test runners."""
import logging
import os
import subprocess
import tempfile
from appium.options.common import AppiumOptions
log = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# On-device paths
# ---------------------------------------------------------------------------
BUNDLE_PATH = "/data/local/tmp/llama_cpp_bundle"
QDC_LOGS_PATH = "/data/local/tmp/QDC_logs"
LIB_PATH = f"{BUNDLE_PATH}/lib"
BIN_PATH = f"{BUNDLE_PATH}/bin"
ENV_PREFIX = (
f"export LD_LIBRARY_PATH={LIB_PATH} && "
f"export ADSP_LIBRARY_PATH={LIB_PATH} && "
f"chmod +x {BIN_PATH}/* &&"
)
CMD_PREFIX = f"cd {BUNDLE_PATH} && {ENV_PREFIX}"
# ---------------------------------------------------------------------------
# Appium session options
# ---------------------------------------------------------------------------
options = AppiumOptions()
options.set_capability("automationName", "UiAutomator2")
options.set_capability("platformName", "Android")
options.set_capability("deviceName", os.getenv("ANDROID_DEVICE_VERSION"))
# ---------------------------------------------------------------------------
# ADB helpers
# ---------------------------------------------------------------------------
def run_adb_command(cmd: str, *, check: bool = True) -> subprocess.CompletedProcess:
# Append exit-code sentinel because `adb shell` doesn't reliably propagate
# the on-device exit code (older ADB versions always return 0).
raw = subprocess.run(
["adb", "shell", f"{cmd}; echo __RC__:$?"],
text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
)
stdout = raw.stdout
returncode = raw.returncode
if stdout:
lines = stdout.rstrip("\n").split("\n")
if lines and lines[-1].startswith("__RC__:"):
try:
returncode = int(lines[-1][7:])
stdout = "\n".join(lines[:-1]) + "\n"
except ValueError:
pass
log.info("%s", stdout)
result = subprocess.CompletedProcess(raw.args, returncode, stdout=stdout)
if check:
assert returncode == 0, f"Command failed (exit {returncode})"
return result
def write_qdc_log(filename: str, content: str) -> None:
"""Push content as a log file to QDC_LOGS_PATH on the device for QDC log collection."""
subprocess.run(
["adb", "shell", f"mkdir -p {QDC_LOGS_PATH}"],
stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
)
with tempfile.NamedTemporaryFile(mode="w", suffix=".log", delete=False) as f:
f.write(content)
tmp_path = f.name
try:
subprocess.run(
["adb", "push", tmp_path, f"{QDC_LOGS_PATH}/{filename}"],
stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
)
finally:
os.unlink(tmp_path)
def push_bundle_if_needed(check_binary: str) -> None:
"""Push llama_cpp_bundle to the device if check_binary is not already present."""
result = subprocess.run(
["adb", "shell", f"ls {check_binary}"],
text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
)
if result.returncode != 0:
subprocess.run(
["adb", "push", "/qdc/appium/llama_cpp_bundle/", "/data/local/tmp"],
text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
)

View File

@@ -0,0 +1,48 @@
#!/usr/bin/env pwsh
# Basedir on device
$basedir=".\pkg-snapdragon"
$cli_opts=$args
$model="Llama-3.2-3B-Instruct-Q4_0.gguf"
if ($null -ne $env:M) {
$model=$env:M
}
$device="HTP0"
if ($null -ne $env:D) {
$device=$env:D
}
if ($null -ne $env:V) {
$env:GGML_HEXAGON_VERBOSE=$env:V
}
if ($null -ne $env:PROF) {
$env:GGML_HEXAGON_PROFILE=$env:PROF
}
if ($null -ne $env:OPSTAGE) {
$env:GGML_HEXAGON_OPSTAGE=$env:OPSTAGE
}
if ($null -ne $env:NHVX) {
$env:GGML_HEXAGON_NHVX=$env:NHVX
}
if ($null -ne $env:NDEV) {
$env:GGML_HEXAGON_NDEV=$env:NDEV
}
if ($null -ne $env:HB) {
$env:GGML_HEXAGON_HOSTBUF=$env:HB
}
$env:ADSP_LIBRARY_PATH="$basedir\lib"
& "$basedir\bin\llama-bench.exe" `
--mmap 0 -m $basedir\..\..\gguf\$model `
--poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 `
--batch-size 128 -ngl 99 --device $device $cli_opts

View File

@@ -0,0 +1,53 @@
#!/usr/bin/env pwsh
# Basedir on device
$basedir=".\pkg-snapdragon"
$cli_opts=$args
$model="Llama-3.2-3B-Instruct-Q4_0.gguf"
if ($null -ne $env:M) {
$model=$env:M
}
$device="HTP0"
if ($null -ne $env:D) {
$device=$env:D
}
if ($null -ne $env:V) {
$env:GGML_HEXAGON_VERBOSE=$env:V
}
if ($null -ne $env:SCHED) {
$env:GGML_SCHED_DEBUG=$env:SCHED; $cli_opts="$cli_opts -v"
}
if ($null -ne $env:PROF) {
$env:GGML_HEXAGON_PROFILE=$env:PROF
}
if ($null -ne $env:OPSTAGE) {
$env:GGML_HEXAGON_OPSTAGE=$env:OPSTAGE
}
if ($null -ne $env:NHVX) {
$env:GGML_HEXAGON_NHVX=$env:NHVX
}
if ($null -ne $env:NDEV) {
$env:GGML_HEXAGON_NDEV=$env:NDEV
}
if ($null -ne $env:HB) {
$env:GGML_HEXAGON_HOSTBUF=$env:HB
}
$env:ADSP_LIBRARY_PATH="$basedir\lib"
& "$basedir\bin\llama-cli.exe" `
--no-mmap -m $basedir\..\..\gguf\$model `
--poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 `
--ctx-size 8192 --ubatch-size 256 -fa on `
-ngl 99 --device $device $cli_opts

View File

@@ -0,0 +1,53 @@
#!/usr/bin/env pwsh
# Basedir on device
$basedir=".\pkg-snapdragon"
$cli_opts=$args
$model="Llama-3.2-3B-Instruct-Q4_0.gguf"
if ($null -ne $env:M) {
$model=$env:M
}
$device="HTP0"
if ($null -ne $env:D) {
$device=$env:D
}
if ($null -ne $env:V) {
$env:GGML_HEXAGON_VERBOSE=$env:V
}
if ($null -ne $env:SCHED) {
$env:GGML_SCHED_DEBUG=$env:SCHED; $cli_opts="$cli_opts -v"
}
if ($null -ne $env:PROF) {
$env:GGML_HEXAGON_PROFILE=$env:PROF
}
if ($null -ne $env:OPSTAGE) {
$env:GGML_HEXAGON_OPSTAGE=$env:OPSTAGE
}
if ($null -ne $env:NHVX) {
$env:GGML_HEXAGON_NHVX=$env:NHVX
}
if ($null -ne $env:NDEV) {
$env:GGML_HEXAGON_NDEV=$env:NDEV
}
if ($null -ne $env:HB) {
$env:GGML_HEXAGON_HOSTBUF=$env:HB
}
$env:ADSP_LIBRARY_PATH="$basedir\lib"
& "$basedir\bin\llama-completion.exe" `
--no-mmap -m $basedir\..\..\gguf\$model `
--poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 `
--ctx-size 8192 --batch-size 256 -fa on `
-ngl 99 -no-cnv --device $device $cli_opts

View File

@@ -0,0 +1,68 @@
#!/usr/bin/env pwsh
# Basedir on device
$basedir=".\pkg-snapdragon"
$cli_opts=$args
$model="gemma-3-4b-it-Q4_0.gguf"
if ($null -ne $env:M) {
$model=$env:M
}
$mmproj="mmproj-F16.gguf"
if ($null -ne $env:MMPROJ) {
$mmproj=$env:MMPROJ
}
$image=""
if ($null -ne $env:IMG) {
$image=$env:IMG
}
$device="HTP0"
if ($null -ne $env:D) {
$device=$env:D
}
if ($null -ne $env:V) {
$env:GGML_HEXAGON_VERBOSE=$env:V
}
if ($null -ne $env:SCHED) {
$env:GGML_SCHED_DEBUG=$env:SCHED; $cli_opts="$cli_opts -v"
}
if ($null -ne $env:PROF) {
$env:GGML_HEXAGON_PROFILE=$env:PROF
}
if ($null -ne $env:OPSTAGE) {
$env:GGML_HEXAGON_OPSTAGE=$env:OPSTAGE
}
if ($null -ne $env:NHVX) {
$env:GGML_HEXAGON_NHVX=$env:NHVX
}
if ($null -ne $env:NDEV) {
$env:GGML_HEXAGON_NDEV=$env:NDEV
}
if ($null -ne $env:HB) {
$env:GGML_HEXAGON_HOSTBUF=$env:HB
}
if ($null -ne $env:MTMD_DEVICE) {
$env:MTMD_BACKEND_DEVICE=$env:MTMD_DEVICE
}
$env:ADSP_LIBRARY_PATH="$basedir\lib"
& "$basedir\bin\llama-mtmd-cli.exe" `
--no-mmap -m $basedir\..\..\gguf\$model `
--mmproj $basedir\..\..\gguf\$mmproj `
--image $basedir\..\..\gguf\$image `
--poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 `
--ctx-size 8192 --ubatch-size 256 -fa on `
-ngl 99 --device $device -v $cli_opts

View File

@@ -0,0 +1,56 @@
#!/usr/bin/env pwsh
# Basedir on device
$basedir=".\pkg-snapdragon"
if ($args.Count -eq 0) {
Write-Host "No arguments provided.Expected the tool and argument to run."
exit -1
}
$tool=$args[0]
$cli_opts=@()
if ($args.Count -gt 1) {
$cli_opts=$args[1..($args.Count - 1)]
$remainingArgs = $args[1..($args.Count - 1)]
}
$device="HTP0"
if ($null -ne $env:D) {
$device=$env:D
}
if ($null -ne $env:V) {
$env:GGML_HEXAGON_VERBOSE=$env:V
}
if ($null -ne $env:SCHED) {
$env:GGML_SCHED_DEBUG=$env:SCHED; $cli_opts="$cli_opts -v"
}
if ($null -ne $env:PROF) {
$env:GGML_HEXAGON_PROFILE=$env:PROF
}
if ($null -ne $env:OPSTAGE) {
$env:GGML_HEXAGON_OPSTAGE=$env:OPSTAGE
}
if ($null -ne $env:NHVX) {
$env:GGML_HEXAGON_NHVX=$env:NHVX
}
if ($null -ne $env:NDEV) {
$env:GGML_HEXAGON_NDEV=$env:NDEV
}
if ($null -ne $env:HB) {
$env:GGML_HEXAGON_HOSTBUF=$env:HB
}
$env:ADSP_LIBRARY_PATH="$basedir\lib"
& "$basedir\bin\$tool" `
$cli_opts

View File

@@ -0,0 +1,105 @@
# Requires Run as Administrator is NOT strictly necessary for User-scope env vars,
# but recommended for creating directories in C:\ root if permissions are restricted.
$ErrorActionPreference = "Stop"
# --- Configuration ---
$BaseDir = "C:\Qualcomm"
# SDK 1: Hexagon
$HexagonUrl = "https://github.com/snapdragon-toolchain/hexagon-sdk/releases/download/v6.4.0.2/hexagon-sdk-v6.4.0.2-arm64-wos.tar.xz"
$HexagonParent = Join-Path $BaseDir "Hexagon_SDK"
$HexagonSdkVersion = "6.4.0.2"
$HexagonToolsVersion = "19.0.04"
$HexagonSdkTarget = Join-Path $HexagonParent $HexagonSdkVersion
$HexagonToolsTarget = Join-Path $HexagonSdkTarget "\tools\HEXAGON_Tools\$HexagonToolsVersion"
# SDK 2: OpenCL
$OpenCLUrl = "https://github.com/snapdragon-toolchain/opencl-sdk/releases/download/v2.3.2/adreno-opencl-sdk-v2.3.2-arm64-wos.tar.xz"
$OpenCLParent = Join-Path $BaseDir "OpenCL_SDK"
$OpenCLVersion = "2.3.2"
$OpenCLTarget = Join-Path $OpenCLParent $OpenCLVersion
# --- Helper Function ---
function Install-QualcommSDK {
param (
[string]$Url,
[string]$ParentDir,
[string]$TargetDir,
[string]$Name
)
# 1. Create Parent Directory
if (-not (Test-Path -Path $ParentDir)) {
Write-Host "Creating directory: $ParentDir" -ForegroundColor Cyan
New-Item -Path $ParentDir -ItemType Directory -Force | Out-Null
}
# 2. Check for Specific Version Directory
if (Test-Path -Path $TargetDir) {
Write-Host "$Name ($TargetDir) already exists. Skipping download." -ForegroundColor Green
}
else {
Write-Host "$Name not found. preparing to download..." -ForegroundColor Yellow
# Create the target directory to extract into
New-Item -Path $TargetDir -ItemType Directory -Force | Out-Null
# Define temporary archive path
$TempFile = Join-Path $ParentDir "temp_sdk.tar.xz"
try {
# Download
Write-Host "Downloading from: $Url"
Invoke-WebRequest -Uri $Url -OutFile $TempFile
# Untar
# Note: We assume Windows includes tar.exe (Win 10 build 17063+)
Write-Host "Extracting archive to $TargetDir..."
# We use -C to extract contents INTO the target directory created above
tar -xJvf $TempFile -C $TargetDir\..
Write-Host "Extraction complete." -ForegroundColor Green
}
catch {
Write-Error "Failed to download or extract $Name. Error: $_"
# Cleanup target dir if failed so script tries again next time
Remove-Item -Path $TargetDir -Recurse -Force -ErrorAction SilentlyContinue
}
finally {
# Cleanup Archive
if (Test-Path $TempFile) { Remove-Item $TempFile -Force }
}
}
}
# --- Execution ---
# 1. Ensure Base C:\Qualcomm exists
if (-not (Test-Path $BaseDir)) {
New-Item -Path $BaseDir -ItemType Directory -Force | Out-Null
}
# 2. Run Install Logic
Install-QualcommSDK -Url $HexagonUrl -ParentDir $HexagonParent -TargetDir $HexagonSdkTarget -Name "Hexagon SDK"
Install-QualcommSDK -Url $OpenCLUrl -ParentDir $OpenCLParent -TargetDir $OpenCLTarget -Name "OpenCL SDK"
# --- Environment Variables ---
Write-Host "`nSetting Environment Variables..." -ForegroundColor Cyan
# Set OPENCL_SDK_ROOT
[System.Environment]::SetEnvironmentVariable('OPENCL_SDK_ROOT', $OpenCLTarget, [System.EnvironmentVariableTarget]::User)
$env:OPENCL_SDK_ROOT = $OpenCLTarget # Set for current session as well
Write-Host "OPENCL_SDK_ROOT set to: $OpenCLTarget"
# Set HEXAGON_SDK_ROOT
[System.Environment]::SetEnvironmentVariable('HEXAGON_SDK_ROOT', $HexagonSdkTarget, [System.EnvironmentVariableTarget]::User)
$env:HEXAGON_SDK_ROOT = $HexagonSdkTarget # Set for current session as well
Write-Host "HEXAGON_SDK_ROOT set to: $HexagonSdkTarget"
# Set HEXAGON_SDK_ROOT
[System.Environment]::SetEnvironmentVariable('HEXAGON_TOOLS_ROOT', $HexagonToolsTarget, [System.EnvironmentVariableTarget]::User)
$env:HEXAGON_TOOLS_ROOT = $HexagonToolsTarget # Set for current session as well
Write-Host "HEXAGON_TOOLS_ROOT set to: $HexagonToolsTarget"