llama.cpp verification source 2026-05-22

2026-05-22 16:44:08 +08:00
commit 8e5a449007
2740 changed files with 1155720 additions and 0 deletions
--- a/scripts/snapdragon/adb/llama-cli.farf
+++ b/scripts/snapdragon/adb/llama-cli.farf
@@ -0,0 +1 @@
+0xffff
--- a/scripts/snapdragon/adb/run-bench.sh
+++ b/scripts/snapdragon/adb/run-bench.sh
@@ -0,0 +1,49 @@
+#!/bin/sh
+#
+
+# Basedir on device
+basedir=/data/local/tmp/llama.cpp
+
+branch=.
+[ "$B" != "" ] && branch=$B
+
+adbserial=
+[ "$S" != "" ] && adbserial="-s $S"
+
+adbhost=
+[ "$H" != "" ] && adbhost="-H $H"
+
+model="Llama-3.2-3B-Instruct-Q4_0.gguf"
+[ "$M" != "" ] && model="$M"
+
+device="HTP0"
+[ "$D" != "" ] && device="$D"
+
+verbose=
+[ "$V" != "" ] && verbose="GGML_HEXAGON_VERBOSE=$V" cli_opts="$cli_opts -v"
+
+profile=
+[ "$PROF" != "" ] && profile="GGML_HEXAGON_PROFILE=$PROF" cli_opts="$cli_opts -v"
+
+opmask=
+[ "$OPSTAGE" != "" ] && opmask="GGML_HEXAGON_OPSTAGE=$OPSTAGE"
+
+nhvx=
+[ "$NHVX" != "" ] && nhvx="GGML_HEXAGON_NHVX=$NHVX"
+
+ndev=
+[ "$NDEV" != "" ] && ndev="GGML_HEXAGON_NDEV=$NDEV"
+
+hb=
+[ "$HB" != "" ] && hb="GGML_HEXAGON_HOSTBUF=$HB"
+
+set -x
+
+adb $adbserial $adbhost shell " \
+  cd $basedir;         \
+  LD_LIBRARY_PATH=$basedir/$branch/lib   \
+  ADSP_LIBRARY_PATH=$basedir/$branch/lib \
+    $ndev $nhvx $opmask $verbose $profile $hb ./$branch/bin/llama-bench --device $device --mmap 0 -m $basedir/../gguf/$model \
+        --poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 \
+        --ubatch-size 256 -fa 1 -ngl 99 $cli_opts $@    \
+"
--- a/scripts/snapdragon/adb/run-cli.sh
+++ b/scripts/snapdragon/adb/run-cli.sh
@@ -0,0 +1,78 @@
+#!/bin/sh
+#
+
+# Basedir on device
+basedir=/data/local/tmp/llama.cpp
+
+cli_opts=
+
+branch=.
+[ "$B" != "" ] && branch=$B
+
+adbserial=
+[ "$S" != "" ] && adbserial="-s $S"
+
+adbhost=
+[ "$H" != "" ] && adbhost="-H $H"
+
+model="Llama-3.2-3B-Instruct-Q4_0.gguf"
+[ "$M" != "" ] && model="$M"
+
+device="HTP0"
+[ "$D" != "" ] && device="$D"
+
+verbose=
+[ "$V" != "" ] && verbose="GGML_HEXAGON_VERBOSE=$V" cli_opts="$cli_opts -v"
+
+sched=
+[ "$SCHED" != "" ] && sched="GGML_SCHED_DEBUG=2" cli_opts="$cli_opts -v"
+
+profile=
+[ "$PROF" != "" ] && profile="GGML_HEXAGON_PROFILE=$PROF" cli_opts="$cli_opts -v"
+
+opmask=
+[ "$OPSTAGE" != "" ] && opmask="GGML_HEXAGON_OPSTAGE=$OPSTAGE"
+
+nhvx=
+[ "$NHVX" != "" ] && nhvx="GGML_HEXAGON_NHVX=$NHVX"
+
+hmx=
+[ "$HMX" != "" ] && hmx="GGML_HEXAGON_USE_HMX=$HMX"
+
+ndev=
+[ "$NDEV" != "" ] && ndev="GGML_HEXAGON_NDEV=$NDEV"
+
+hb=
+[ "$HB" != "" ] && hb="GGML_HEXAGON_HOSTBUF=$HB"
+
+opbatch=
+[ "$OB" != "" ] && opbatch="GGML_HEXAGON_OPBATCH=$OB"
+
+opqueue=
+[ "$OQ" != "" ] && opqueue="GGML_HEXAGON_OPQUEUE=$OQ"
+
+opflt=
+[ "$OF" != "" ] && opflt="GGML_HEXAGON_OPFILTER=$OF"
+
+vmem=
+[ "$VM" != "" ] && opflt="GGML_HEXAGON_VMEM=$VM"
+
+mbuf=
+[ "$MB" != "" ] && opflt="GGML_HEXAGON_MBUF=$MB"
+vmem=
+[ "$VM" != "" ] && vmem="GGML_HEXAGON_VMEM=$VM"
+
+mbuf=
+[ "$MB" != "" ] && mbuf="GGML_HEXAGON_MBUF=$MB"
+set -x
+
+adb $adbserial $adbhost shell " \
+  cd $basedir; ulimit -c unlimited;        \
+    LD_LIBRARY_PATH=$basedir/$branch/lib   \
+    ADSP_LIBRARY_PATH=$basedir/$branch/lib \
+    $verbose $sched $opmask $profile $nhvx $hmx $ndev $hb $opbatch $opqueue $opflt $vmem $mbuf \
+      ./$branch/bin/llama-cli --no-mmap -m $basedir/../gguf/$model \
+         --poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1           \
+         --ctx-size 8192 --ubatch-size 256 -fa on                  \
+         -ngl 99 --device $device $cli_opts $@                     \
+"
--- a/scripts/snapdragon/adb/run-completion.sh
+++ b/scripts/snapdragon/adb/run-completion.sh
@@ -0,0 +1,74 @@
+#!/bin/sh
+#
+
+# Basedir on device
+basedir=/data/local/tmp/llama.cpp
+
+cli_opts=
+
+branch=.
+[ "$B" != "" ] && branch=$B
+
+adbserial=
+[ "$S" != "" ] && adbserial="-s $S"
+
+adbhost=
+[ "$H" != "" ] && adbhost="-H $H"
+
+model="Llama-3.2-3B-Instruct-Q4_0.gguf"
+[ "$M" != "" ] && model="$M"
+
+device="HTP0"
+[ "$D" != "" ] && device="$D"
+
+verbose=
+[ "$V" != "" ] && verbose="GGML_HEXAGON_VERBOSE=$V" cli_opts="$cli_opts -v"
+
+sched=
+[ "$SCHED" != "" ] && sched="GGML_SCHED_DEBUG=2" cli_opts="$cli_opts -v"
+
+profile=
+[ "$PROF" != "" ] && profile="GGML_HEXAGON_PROFILE=$PROF" cli_opts="$cli_opts -v"
+
+opmask=
+[ "$OPSTAGE" != "" ] && opmask="GGML_HEXAGON_OPSTAGE=$OPSTAGE"
+
+nhvx=
+[ "$NHVX" != "" ] && nhvx="GGML_HEXAGON_NHVX=$NHVX"
+
+hmx=
+[ "$HMX" != "" ] && hmx="GGML_HEXAGON_USE_HMX=$HMX"
+
+ndev=
+[ "$NDEV" != "" ] && ndev="GGML_HEXAGON_NDEV=$NDEV"
+
+hb=
+[ "$HB" != "" ] && hb="GGML_HEXAGON_HOSTBUF=$HB"
+
+opbatch=
+[ "$OB" != "" ] && opbatch="GGML_HEXAGON_OPBATCH=$OB"
+
+opqueue=
+[ "$OQ" != "" ] && opqueue="GGML_HEXAGON_OPQUEUE=$OQ"
+
+opflt=
+[ "$OF" != "" ] && opflt="GGML_HEXAGON_OPFILTER=$OF"
+
+vmem=
+[ "$VM" != "" ] && vmem="GGML_HEXAGON_VMEM=$VM"
+
+mbuf=
+[ "$MB" != "" ] && mbuf="GGML_HEXAGON_MBUF=$MB"
+
+set -x
+
+adb $adbserial $adbhost shell " \
+  cd $basedir; ulimit -c unlimited;        \
+    LD_LIBRARY_PATH=$basedir/$branch/lib   \
+    ADSP_LIBRARY_PATH=$basedir/$branch/lib \
+    $verbose $sched $opmask $profile $nhvx $hmx $ndev $hb $opbatch $opqueue $opflt $vmem $mbuf \
+      ./$branch/bin/llama-completion --no-mmap -m $basedir/../gguf/$model \
+         --poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1                  \
+         --ctx-size 8192 --ubatch-size 256 -fa on                         \
+         -ngl 99 -no-cnv --device $device $cli_opts $@                    \
+"
--- a/scripts/snapdragon/adb/run-mtmd.sh
+++ b/scripts/snapdragon/adb/run-mtmd.sh
@@ -0,0 +1,71 @@
+#!/bin/sh
+#
+
+# Basedir on device
+basedir=/data/local/tmp/llama.cpp
+
+cli_opts=
+
+branch=.
+[ "$B" != "" ] && branch=$B
+
+adbserial=
+[ "$S" != "" ] && adbserial="-s $S"
+
+adbhost=
+[ "$H" != "" ] && adbhost="-H $H"
+
+model="gemma-3-4b-it-Q4_0.gguf"
+[ "$M" != "" ] && model="$M"
+
+mmproj="mmproj-F16.gguf"
+[ "$MMPROJ" != "" ] && mmproj="$MMPROJ"
+
+image=
+[ "$IMG" != "" ] && image="$IMG"
+
+device="HTP0"
+[ "$D" != "" ] && device="$D"
+
+verbose=
+[ "$V" != "" ] && verbose="GGML_HEXAGON_VERBOSE=$V"
+
+experimental="GGML_HEXAGON_EXPERIMENTAL=1"
+[ "$E" != "" ] && experimental="GGML_HEXAGON_EXPERIMENTAL=$E"
+
+sched=
+[ "$SCHED" != "" ] && sched="GGML_SCHED_DEBUG=2" cli_opts="$cli_opts -v"
+
+profile=
+[ "$PROF" != "" ] && profile="GGML_HEXAGON_PROFILE=$PROF"
+
+opmask=
+[ "$OPSTAGE" != "" ] && opmask="GGML_HEXAGON_OPSTAGE=$OPSTAGE"
+
+nhvx=
+[ "$NHVX" != "" ] && nhvx="GGML_HEXAGON_NHVX=$NHVX"
+
+hmx=
+[ "$HMX" != "" ] && hmx="GGML_HEXAGON_USE_HMX=$HMX"
+
+ndev=
+[ "$NDEV" != "" ] && ndev="GGML_HEXAGON_NDEV=$NDEV"
+
+# MTMD backend device for vision model (defaults to CPU if not set)
+mtmd_backend=
+[ "$MTMD_DEVICE" != "" ] && mtmd_backend="MTMD_BACKEND_DEVICE=$MTMD_DEVICE"
+
+set -x
+
+adb $adbserial $adbhost shell " \
+  cd $basedir; ulimit -c unlimited;        \
+    LD_LIBRARY_PATH=$basedir/$branch/lib   \
+    ADSP_LIBRARY_PATH=$basedir/$branch/lib \
+    $verbose $experimental $sched $opmask $profile $hmx $nhvx $ndev $mtmd_backend \
+      ./$branch/bin/llama-mtmd-cli --no-mmap -m $basedir/../gguf/$model      \
+         --mmproj $basedir/../gguf/$mmproj                                   \
+         --image $basedir/../gguf/$image                                     \
+         --poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1                     \
+         --ctx-size 8192 --ubatch-size 256 -fa on                            \
+         -ngl 99 --device $device -v $cli_opts $@                            \
+"
--- a/scripts/snapdragon/adb/run-tool.sh
+++ b/scripts/snapdragon/adb/run-tool.sh
@@ -0,0 +1,54 @@
+#!/bin/sh
+#
+
+# Basedir on device
+basedir=/data/local/tmp/llama.cpp
+
+cli_opts=
+
+branch=.
+[ "$B" != "" ] && branch=$B
+
+adbserial=
+[ "$S" != "" ] && adbserial="-s $S"
+
+adbhost=
+[ "$H" != "" ] && adbhost="-H $H"
+
+device="HTP0"
+[ "$D" != "" ] && device="$D"
+
+verbose=
+[ "$V" != "" ] && verbose="GGML_HEXAGON_VERBOSE=$V"
+
+sched=
+[ "$SCHED" != "" ] && sched="GGML_SCHED_DEBUG=2" cli_opts="$cli_opts -v"
+
+profile=
+[ "$PROF" != "" ] && profile="GGML_HEXAGON_PROFILE=$PROF"
+
+opmask=
+[ "$OPSTAGE" != "" ] && opmask="GGML_HEXAGON_OPSTAGE=$OPSTAGE"
+
+nhvx=
+[ "$NHVX" != "" ] && nhvx="GGML_HEXAGON_NHVX=$NHVX"
+
+hmx=
+[ "$HMX" != "" ] && hmx="GGML_HEXAGON_USE_HMX=$HMX"
+
+ndev=
+[ "$NDEV" != "" ] && ndev="GGML_HEXAGON_NDEV=$NDEV"
+
+hb=
+[ "$HB" != "" ] && hb="GGML_HEXAGON_HOSTBUF=$HB"
+
+set -x
+
+tool=$1; shift
+
+adb $adbserial $adbhost shell " \
+  cd $basedir; ulimit -c unlimited;        \
+    LD_LIBRARY_PATH=$basedir/$branch/lib   \
+    ADSP_LIBRARY_PATH=$basedir/$branch/lib \
+    $verbose $sched $opmask $profile $nhvx $hmx $ndev $hb ./$branch/bin/$tool $@ \
+"
--- a/scripts/snapdragon/ggml-hexagon-profile.py
+++ b/scripts/snapdragon/ggml-hexagon-profile.py
@@ -0,0 +1,188 @@
+#!/usr/bin/env python3
+
+import sys
+import os
+import re
+import argparse
+import statistics
+import logging
+
+from collections import defaultdict
+
+# Mapping of cli-friendly names to (internal_data_key, Display Header, numeric_sort_key)
+COL_MAP = {
+    "op":         ("op",         "Op",         "op"),
+    "dims":       ("dims",       "Dims",       "dims"),
+    "dtypes":     ("dtypes",     "DTypes",     "dtypes"),
+    "count":      ("count",      "Count",      "_sort_count"),
+    "max-usec":   ("max_usec",   "Max usec",   "_sort_max_usec"),
+    "avg-usec":   ("avg_usec",   "Avg usec",   "_sort_avg_usec"),
+    "max-cycles": ("max_cycles", "Max Cycles", "_sort_max_cycles"),
+    "avg-cycles": ("avg_cycles", "Avg Cycles", "_sort_avg_cycles"),
+    "max-pmu":    ("max_pmu",    "Max PMU",    "_sort_max_pmu"),
+    "avg-pmu":    ("avg_pmu",    "Avg PMU",    "_sort_avg_pmu"),
+}
+
+op_pattern = re.compile(
+    r"profile-op\s+(?P<op_name>[A-Z_0-9]+):\s+.*?\s+:\s+(?P<dims>[\d:x\s\->!]+)\s+:\s+(?P<types>[a-z\d_\s\->x]+)\s+:\s+.*?\s+usec\s+(?P<usec>\d+)\s+cycles\s+(?P<cycles>\d+)(?:\s+pmu\s+\[(?P<pmu>[\d,\s]+)\])?"
+)
+
+logger = logging.getLogger("ggml-hexagon-profile")
+
+
+def parse_log(file_path, pmu_index=None):
+    try:
+        if file_path != "-":
+            f = open(file_path, 'r', encoding='utf-8', errors='ignore')
+        else:
+            f = os.fdopen(0, 'r', encoding='utf-8', errors='ignore')
+    except FileNotFoundError:
+        logger.error(f"file '{file_path}' not found.")
+        sys.exit(1)
+
+    all_ops = []
+    for line in f:
+        match = op_pattern.search(line)
+        if not match: continue
+
+        pmu_raw = match.group('pmu')
+        pmu_val = None
+        if pmu_raw and pmu_index is not None:
+            try:
+                pmu_list = [int(x.strip()) for x in pmu_raw.split(',')]
+                if len(pmu_list) > pmu_index:
+                    pmu_val = pmu_list[pmu_index]
+            except (ValueError, IndexError):
+                pmu_val = None
+
+        all_ops.append({
+            'name':    match.group('op_name'),
+            'dims':    match.group('dims').strip(),
+            'types':   match.group('types').strip(),
+            'usec':    int(match.group('usec')),
+            'cycles':  int(match.group('cycles')),
+            'pmu_val': pmu_val
+        })
+
+    f.close()
+
+    return all_ops
+
+
+def generate_report(ops, top_n, width_overrides, sort_col, pmu_name=None):
+    if not ops:
+        logger.info("No valid records found.")
+        return
+
+    grouped = defaultdict(list)
+    for op in ops:
+        key = (op['name'], op['dims'], op['types'])
+        grouped[key].append(op)
+
+    group_stats = []
+    for (name, dims, types), group_ops in grouped.items():
+        usecs = [o['usec'] for o in group_ops]
+        cycles = [o['cycles'] for o in group_ops]
+        pmu_vals = [o['pmu_val'] for o in group_ops if o['pmu_val'] is not None]
+
+        group_stats.append({
+            'op':               name,
+            'dims':             dims,
+            'dtypes':           types,
+            'count':            str(len(group_ops)),
+            'max_usec':         str(max(usecs)),
+            'avg_usec':         f"{statistics.mean(usecs):.2f}",
+            'max_cycles':       str(max(cycles)),
+            'avg_cycles':       f"{statistics.mean(cycles):.2f}",
+            'max_pmu':          str(max(pmu_vals)) if pmu_vals else "0",
+            'avg_pmu':          f"{statistics.mean(pmu_vals):.2f}" if pmu_vals else "0.00",
+            # Numeric values for accurate sorting
+            '_sort_count':      len(group_ops),
+            '_sort_max_usec':   max(usecs),
+            '_sort_avg_usec':   statistics.mean(usecs),
+            '_sort_max_cycles': max(cycles),
+            '_sort_avg_cycles': statistics.mean(cycles),
+            '_sort_max_pmu':    max(pmu_vals) if pmu_vals else 0,
+            '_sort_avg_pmu':    statistics.mean(pmu_vals) if pmu_vals else 0
+        })
+
+    # Sorting logic
+    actual_sort_key = COL_MAP[sort_col][2]
+    # We sort numeric fields descending, strings (op/dims) ascending
+    is_numeric    = actual_sort_key.startswith("_") or actual_sort_key == "count"
+    sorted_groups = sorted(group_stats, key=lambda x: x[actual_sort_key], reverse=is_numeric)[:top_n]
+
+    # Define initial column order
+    active_cols = ["op", "dims", "dtypes"]
+    if pmu_name:
+        active_cols += ["max-pmu", "avg-pmu"]
+    active_cols += ["max-usec", "avg-usec", "max-cycles", "avg-cycles", "count"]
+
+    final_headers, final_keys, final_widths = [], [], []
+
+    for col_name in active_cols:
+        data_key, header_text, _ = COL_MAP[col_name]
+        if "pmu" in col_name and pmu_name:
+            header_text = header_text.replace("PMU", pmu_name)
+
+        natural_width = max([len(row[data_key]) for row in sorted_groups] + [len(header_text)])
+        target_width  = width_overrides.get(col_name, natural_width)
+
+        if target_width == 0:
+            continue
+
+        final_headers.append(header_text)
+        final_keys.append(data_key)
+        final_widths.append(target_width)
+
+    # Print Report
+    logger.info(f"\n# Profile Report (Top {top_n} Ops sorted by {sort_col})\n")
+    header_line = "| " + " | ".join(f"{h:<{final_widths[i]}}" for i, h in enumerate(final_headers)) + " |"
+    sep_line    = "| " + " | ".join("-" * final_widths[i] for i in range(len(final_headers))) + " |"
+    logger.info(header_line)
+    logger.info(sep_line)
+
+    for group in sorted_groups:
+        row_vals = []
+        for i, key in enumerate(final_keys):
+            val = group[key]
+            if len(val) > final_widths[i]:
+                val = val[:final_widths[i] - 3] + "..."
+            row_vals.append(f"{val:<{final_widths[i]}}")
+        logger.info("| " + " | ".join(row_vals) + " |")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Post-process Op profile info.")
+    parser.add_argument("logfile")
+    parser.add_argument("-n", "--top", type=int, default=100)
+    parser.add_argument("--sort", type=str, default="max-usec", choices=list(COL_MAP.keys()))
+    parser.add_argument("--pmu-index", type=int)
+    parser.add_argument("--pmu-name", type=str)
+    parser.add_argument("--width", action='append', default=['dims:40'], help="Override column width, e.g. --width dims:50")
+
+    args = parser.parse_args()
+
+    logging.basicConfig(level=logging.INFO, format='%(message)s')
+
+    # Sort validation: can't sort by PMU if index isn't provided
+    if "pmu" in args.sort and args.pmu_index is None:
+        logger.error(f"Cannot sort by '{args.sort}' without --pmu-index.")
+        sys.exit(1)
+
+    overrides = {}
+    if args.width:
+        for w in args.width:
+            try:
+                name, val = w.split(':')
+                overrides[name.lower()] = int(val)
+            except ValueError:
+                logger.warning(f"Invalid width format '{w}'")
+
+    final_pmu_name = (args.pmu_name or f"#{args.pmu_index}") if args.pmu_index is not None else None
+    ops = parse_log(args.logfile, pmu_index=args.pmu_index)
+    generate_report(ops, args.top, overrides, args.sort, pmu_name=final_pmu_name)
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/snapdragon/qdc/requirements.txt
+++ b/scripts/snapdragon/qdc/requirements.txt
@@ -0,0 +1,22 @@
+Appium-Python-Client==5.2.4
+attrs==25.4.0
+certifi==2025.10.5
+exceptiongroup==1.3.0
+h11==0.16.0
+idna==3.11
+iniconfig==2.1.0
+outcome==1.3.0.post0
+packaging==25.0
+pluggy==1.6.0
+PySocks==1.7.1
+pytest==8.4.2
+selenium==4.36.0
+sniffio==1.3.1
+sortedcontainers==2.4.0
+tomli==2.3.0
+trio==0.31.0
+trio-websocket==0.12.2
+typing_extensions==4.15.0
+urllib3==2.5.0
+websocket-client==1.9.0
+wsproto==1.2.0
--- a/scripts/snapdragon/qdc/run_qdc_jobs.py
+++ b/scripts/snapdragon/qdc/run_qdc_jobs.py
@@ -0,0 +1,401 @@
+"""Run llama.cpp Hexagon Android tests in a single QDC Appium job.
+
+Bundles test scripts into one artifact and submits a single QDC job:
+
+  1. run_bench_tests_posix.py — llama-cli and llama-bench on CPU / GPU / NPU
+                                (from scripts/snapdragon/qdc/)
+
+Results are written to $GITHUB_STEP_SUMMARY when set (GitHub Actions).
+
+Prerequisites:
+  pip install /path/to/qualcomm_device_cloud_sdk*.whl
+
+Required environment variables:
+  QDC_API_KEY   API key from QDC UI -> Users -> Settings -> API Keys
+
+Usage:
+  python run_qdc_jobs.py \\
+      --pkg-dir    pkg-snapdragon/llama.cpp \\
+      --model-url  https://.../Llama-3.2-1B-Instruct-Q4_0.gguf \\
+      --device     SM8750
+"""
+
+from __future__ import annotations
+
+import argparse
+import logging
+import os
+import re
+import shutil
+import sys
+import tempfile
+import time
+import xml.etree.ElementTree as ET
+from dataclasses import dataclass, field
+from pathlib import Path
+
+from qualcomm_device_cloud_sdk.api import qdc_api  # ty: ignore[unresolved-import]
+from qualcomm_device_cloud_sdk.logging import configure_logging  # ty: ignore[unresolved-import]
+from qualcomm_device_cloud_sdk.models import ArtifactType, JobMode, JobState, JobSubmissionParameter, JobType, TestFramework  # ty: ignore[unresolved-import]
+
+configure_logging(level=logging.INFO, handlers=[logging.StreamHandler()])
+log = logging.getLogger(__name__)
+
+POLL_INTERVAL        = 30
+JOB_TIMEOUT          = 3600
+LOG_UPLOAD_TIMEOUT   = 600
+CAPACITY_TIMEOUT     = 1800
+CAPACITY_POLL        = 60
+MAX_CONCURRENT_JOBS  = 5
+TERMINAL_STATES     = {JobState.COMPLETED, JobState.CANCELED}
+NON_TERMINAL_STATES = {JobState.DISPATCHED, JobState.RUNNING, JobState.SETUP, JobState.SUBMITTED}
+
+_SCRIPTS_DIR      = Path(__file__).parent
+_TESTS_DIR        = _SCRIPTS_DIR / "tests"
+_RUN_BENCH        = _TESTS_DIR / "run_bench_tests_posix.py"
+_RUN_BACKEND_OPS  = _TESTS_DIR / "run_backend_ops_posix.py"
+_UTILS            = _TESTS_DIR / "utils.py"
+_CONFTEST         = _TESTS_DIR / "conftest.py"
+_REQUIREMENTS     = _SCRIPTS_DIR / "requirements.txt"
+
+_PYTEST_LINE_RE = re.compile(
+    r"(?:[\w/]+\.py::)?(?:\w+::)?([\w\[\].-]+)\s+(PASSED|FAILED|ERROR|SKIPPED)"
+)
+_EXCLUDED_LOGS = {"qdc_android_whole_host-000.log", "qdc_kernel_host-000.log"}
+_NON_TERMINAL_STATE_VALUES = {s.value for s in NON_TERMINAL_STATES}
+
+
+@dataclass
+class JobResult:
+    passed: bool
+    tests: dict[str, bool] = field(default_factory=dict)
+    raw_logs: dict[str, str] = field(default_factory=dict)
+    failure_details: dict[str, str] = field(default_factory=dict)
+
+
+def build_artifact_zip(
+    pkg_dir: Path,
+    stage_dir: Path,
+    *,
+    test_mode: str = "bench",
+    model_url: str | None = None,
+) -> Path:
+    """Bundle everything into a single QDC artifact zip.
+
+    Zip structure (extracted by QDC to /qdc/appium/ on the runner):
+      llama_cpp_bundle/            installed package (adb pushed to /data/local/tmp/)
+      tests/
+        utils.py                   shared helpers (paths, run_adb_command, …)
+        conftest.py                shared pytest fixtures (driver)
+        test_bench_posix.py        bench + cli tests (<<MODEL_URL>> substituted)
+          AND/OR
+        test_backend_ops_posix.py  test-backend-ops -b HTP0
+      requirements.txt
+    """
+    shutil.copytree(pkg_dir, stage_dir / "llama_cpp_bundle")
+
+    tests_dir = stage_dir / "tests"
+    tests_dir.mkdir()
+
+    shutil.copy(_UTILS,    tests_dir / "utils.py")
+    shutil.copy(_CONFTEST, tests_dir / "conftest.py")
+
+    if test_mode in ("bench", "all"):
+        assert model_url is not None, "--model-url is required for bench/all test modes"
+        (tests_dir / "test_bench_posix.py").write_text(
+            _RUN_BENCH.read_text().replace("<<MODEL_URL>>", model_url)
+        )
+    if test_mode in ("backend-ops", "all"):
+        shutil.copy(_RUN_BACKEND_OPS, tests_dir / "test_backend_ops_posix.py")
+
+    shutil.copy(_REQUIREMENTS, stage_dir / "requirements.txt")
+    (stage_dir / "pytest.ini").write_text("[pytest]\naddopts = --junitxml=results.xml\n")
+
+    zip_base = str(stage_dir / "artifact")
+    shutil.make_archive(zip_base, "zip", stage_dir)
+    return Path(f"{zip_base}.zip")
+
+
+def wait_for_job(client, job_id: str, timeout: int) -> str:
+    elapsed = 0
+    while elapsed < timeout:
+        raw = qdc_api.get_job_status(client, job_id)
+        try:
+            status = JobState(raw)
+        except ValueError:
+            status = raw
+        if status in TERMINAL_STATES:
+            return raw.lower()
+        log.info("Job %s: %s", job_id, raw)
+        time.sleep(POLL_INTERVAL)
+        elapsed += POLL_INTERVAL
+    raise TimeoutError(f"Job {job_id} did not finish within {timeout}s")
+
+
+def wait_for_log_upload(client, job_id: str) -> None:
+    elapsed = 0
+    while elapsed <= LOG_UPLOAD_TIMEOUT:
+        status = (qdc_api.get_job_log_upload_status(client, job_id) or "").lower()
+        if status in {"completed", "failed"}:
+            return
+        log.info("Waiting for log upload (status=%s) ...", status)
+        time.sleep(POLL_INTERVAL)
+        elapsed += POLL_INTERVAL
+    log.warning("Timed out waiting for log upload after %ds", LOG_UPLOAD_TIMEOUT)
+
+
+def wait_for_capacity(client, max_jobs: int = MAX_CONCURRENT_JOBS) -> None:
+    """Block until the user's active (non-terminal) QDC job count is below max_jobs."""
+    elapsed = 0
+    while elapsed < CAPACITY_TIMEOUT:
+        jobs_page = qdc_api.get_jobs_list(client, page_number=0, page_size=50)
+        if jobs_page is None:
+            log.warning("Could not retrieve job list; proceeding without capacity check")
+            return
+        items = getattr(jobs_page, "data", []) or []
+        active = sum(1 for j in items if getattr(j, "state", None) in _NON_TERMINAL_STATE_VALUES)
+        if active < max_jobs:
+            log.info("Active QDC jobs: %d / %d — proceeding", active, max_jobs)
+            return
+        log.info("Active QDC jobs: %d / %d — waiting %ds ...", active, max_jobs, CAPACITY_POLL)
+        time.sleep(CAPACITY_POLL)
+        elapsed += CAPACITY_POLL
+    log.warning("Capacity wait timed out after %ds; proceeding anyway", CAPACITY_TIMEOUT)
+
+
+def _parse_junit_xml(content: str) -> tuple[dict[str, bool], dict[str, str]]:
+    try:
+        root = ET.fromstring(content)
+    except ET.ParseError:
+        return {}, {}
+    results: dict[str, bool] = {}
+    failures: dict[str, str] = {}
+    for tc in root.iter("testcase"):
+        name = tc.get("name", "")
+        if classname := tc.get("classname", ""):
+            name = f"{classname}.{name}"
+        failure_el = tc.find("failure")
+        if failure_el is None:
+            failure_el = tc.find("error")
+        results[name] = failure_el is None
+        if failure_el is not None:
+            parts = [failure_el.get("message", ""), failure_el.text or ""]
+            failures[name] = "\n".join(p for p in parts if p).strip()
+    return results, failures
+
+
+def _parse_pytest_output(content: str) -> dict[str, bool]:
+    results: dict[str, bool] = {}
+    for m in _PYTEST_LINE_RE.finditer(content):
+        results[m.group(1)] = m.group(2) == "PASSED"
+    return results
+
+
+def fetch_logs_and_parse_tests(
+    client, job_id: str
+) -> tuple[dict[str, bool], dict[str, str], dict[str, str]]:
+    """Returns (test_results, raw_logs, failure_details)."""
+    log_files = qdc_api.get_job_log_files(client, job_id)
+    if not log_files:
+        log.warning("No log files returned for job %s", job_id)
+        return {}, {}, {}
+
+    test_results: dict[str, bool] = {}
+    pytest_fallback: dict[str, bool] = {}
+    raw_logs: dict[str, str] = {}
+    failure_details: dict[str, str] = {}
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        for lf in log_files:
+            log.info("Downloading log file: %s", lf.filename)
+            zip_path = os.path.join(tmpdir, "log.zip")
+            qdc_api.download_job_log_files(client, lf.filename, zip_path)
+            try:
+                shutil.unpack_archive(zip_path, tmpdir, "zip")
+            except Exception as e:
+                log.warning("Could not unpack %s as zip: %s", lf.filename, e)
+
+        for root_dir, _, files in os.walk(tmpdir):
+            for fname in sorted(files):
+                fpath = os.path.join(root_dir, fname)
+                content = Path(fpath).read_text(errors="replace")
+                if fname.endswith(".xml"):
+                    results, failures = _parse_junit_xml(content)
+                    test_results.update(results)
+                    failure_details.update(failures)
+                elif fname.endswith(".log"):
+                    if fname in _EXCLUDED_LOGS:
+                        continue
+                    log.info("--- %s ---", fname)
+                    log.info("%s", content)
+                    raw_logs[fname] = content
+                    pytest_fallback.update(_parse_pytest_output(content))
+
+    return (test_results if test_results else pytest_fallback), raw_logs, failure_details
+
+
+def write_summary(result: JobResult, title: str = "QDC Test Results") -> None:
+    summary_path = os.environ.get("GITHUB_STEP_SUMMARY")
+    if not summary_path:
+        return
+
+    icon = "✅" if result.passed else "❌"
+
+    lines = [
+        f"## {title}\n",
+        f"Overall: {icon} {'PASSED' if result.passed else 'FAILED'}\n",
+    ]
+    reportable = {n: ok for n, ok in result.tests.items() if "test_install" not in n}
+    if reportable:
+        lines += ["| Test | Result |", "| ---- | ------ |"]
+        for name, ok in reportable.items():
+            lines.append(f"| `{name}` | {'✅' if ok else '❌'} |")
+        passed_n = sum(1 for v in reportable.values() if v)
+        failed_n = sum(1 for v in reportable.values() if not v)
+        lines += ["", f"**{passed_n} passed, {failed_n} failed**"]
+    else:
+        lines.append("_No per-test data available._")
+
+    failed_names = [n for n, ok in reportable.items() if not ok]
+    if failed_names:
+        lines += ["", "### Failures"]
+        for name in failed_names:
+            detail = result.failure_details.get(name)
+            if detail:
+                lines += [
+                    f"<details><summary><code>{name}</code></summary>",
+                    "",
+                    "```",
+                    detail,
+                    "```",
+                    "",
+                    "</details>",
+                ]
+
+    if result.raw_logs:
+        lines += ["", "### Raw Logs"]
+        for fname, content in sorted(result.raw_logs.items()):
+            lines += [
+                f"<details><summary>{fname}</summary>",
+                "",
+                "```",
+                content.rstrip(),
+                "```",
+                "",
+                "</details>",
+            ]
+
+    with open(summary_path, "a") as f:
+        f.write("\n".join(lines) + "\n")
+
+
+def parse_args() -> argparse.Namespace:
+    p = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    p.add_argument("--pkg-dir",   required=True, type=Path,
+                   help="Installed llama.cpp package directory (contains bin/ and lib/)")
+    p.add_argument("--model-url",
+                   help="Direct URL to the GGUF model file (required for --test bench)")
+    p.add_argument("--device",    required=True,
+                   help="QDC chipset name, e.g. SM8750")
+    p.add_argument("--test", choices=["bench", "backend-ops", "all"], default="bench",
+                   help="Test suite to run (default: bench)")
+    p.add_argument("--job-timeout", type=int, default=JOB_TIMEOUT, metavar="SECONDS",
+                   help=f"Max seconds to wait for job completion (default: {JOB_TIMEOUT})")
+    args = p.parse_args()
+    if args.test in ("bench", "all") and not args.model_url:
+        p.error("--model-url is required when --test bench or --test all")
+    return args
+
+
+def main() -> int:
+    args = parse_args()
+
+    api_key = os.environ.get("QDC_API_KEY")
+    if not api_key:
+        log.error("QDC_API_KEY environment variable must be set")
+        return 1
+    if not args.pkg_dir.is_dir():
+        log.error("--pkg-dir %s does not exist", args.pkg_dir)
+        return 1
+
+    client = qdc_api.get_public_api_client_using_api_key(
+        api_key_header=api_key,
+        app_name_header="llama-cpp-ci",
+        on_behalf_of_header="llama-cpp-ci",
+        client_type_header="Python",
+    )
+
+    target_id = qdc_api.get_target_id(client, args.device)
+    if target_id is None:
+        log.error("Could not find QDC target for device %r", args.device)
+        return 1
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        log.info("Building artifact ...")
+        zip_path = build_artifact_zip(
+            args.pkg_dir, Path(tmpdir),
+            test_mode=args.test, model_url=args.model_url,
+        )
+        log.info("Uploading artifact (%d MB) ...", zip_path.stat().st_size // 1_000_000)
+        artifact_id = qdc_api.upload_file(client, str(zip_path), ArtifactType.TESTSCRIPT)
+
+    if artifact_id is None:
+        log.error("Artifact upload failed")
+        return 1
+
+    wait_for_capacity(client)
+
+    job_id = qdc_api.submit_job(
+        public_api_client=client,
+        target_id=target_id,
+        job_name="llama.cpp Hexagon tests",
+        external_job_id=None,
+        job_type=JobType.AUTOMATED,
+        job_mode=JobMode.APPLICATION,
+        timeout=max(1, args.job_timeout // 60),
+        test_framework=TestFramework.APPIUM,
+        entry_script=None,
+        job_artifacts=[artifact_id],
+        monkey_events=None,
+        monkey_session_timeout=None,
+        job_parameters=[JobSubmissionParameter.WIFIENABLED],
+    )
+    if job_id is None:
+        log.error("Job submission failed")
+        return 1
+    log.info("Job submitted: %s  (device=%s)", job_id, args.device)
+
+    try:
+        job_status = wait_for_job(client, job_id, timeout=args.job_timeout)
+    except TimeoutError as e:
+        log.error("%s", e)
+        write_summary(JobResult(passed=False, tests={}), title=f"QDC Job Timed Out ({args.device})")
+        return 1
+    log.info("Job %s finished: %s", job_id, job_status)
+
+    wait_for_log_upload(client, job_id)
+    tests, raw_logs, failure_details = fetch_logs_and_parse_tests(client, job_id)
+
+    passed = job_status == JobState.COMPLETED.value.lower()
+    if tests:
+        passed = passed and all(tests.values())
+    if not passed:
+        log.error("Job did not complete successfully or tests failed (status=%s)", job_status)
+
+    result = JobResult(passed=passed, tests=tests, raw_logs=raw_logs, failure_details=failure_details)
+    if args.test == "backend-ops":
+        title = f"Backend Ops — HTP0 ({args.device})"
+    elif args.test == "all":
+        title = f"QDC Tests ({args.device})"
+    else:
+        title = f"QDC Test Results ({args.device})"
+    write_summary(result, title=title)
+
+    return 0 if passed else 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/scripts/snapdragon/qdc/tests/conftest.py
+++ b/scripts/snapdragon/qdc/tests/conftest.py
@@ -0,0 +1,20 @@
+"""Shared pytest fixtures for QDC on-device test runners."""
+
+import os
+
+import pytest
+from appium import webdriver
+
+from utils import options, write_qdc_log
+
+
+@pytest.fixture(scope="session", autouse=True)
+def driver():
+    return webdriver.Remote(command_executor="http://127.0.0.1:4723/wd/hub", options=options)
+
+
+def pytest_sessionfinish(session, exitstatus):
+    xml_path = getattr(session.config.option, "xmlpath", None) or "results.xml"
+    if os.path.exists(xml_path):
+        with open(xml_path) as f:
+            write_qdc_log("results.xml", f.read())
--- a/scripts/snapdragon/qdc/tests/run_backend_ops_posix.py
+++ b/scripts/snapdragon/qdc/tests/run_backend_ops_posix.py
@@ -0,0 +1,41 @@
+"""
+On-device test-backend-ops runner for llama.cpp (HTP0 backend).
+
+Executed by QDC's Appium test framework on the QDC runner.
+The runner has ADB access to the allocated device.
+"""
+
+import os
+import sys
+
+import pytest
+
+from utils import BIN_PATH, CMD_PREFIX, push_bundle_if_needed, run_adb_command, write_qdc_log
+
+
+@pytest.fixture(scope="session", autouse=True)
+def install(driver):
+    push_bundle_if_needed(f"{BIN_PATH}/test-backend-ops")
+
+
+@pytest.mark.parametrize("type_a", ["mxfp4", "fp16", "q4_0"])
+def test_backend_ops_htp0(type_a):
+    cmd = f"{CMD_PREFIX} GGML_HEXAGON_HOSTBUF=0 GGML_HEXAGON_EXPERIMENTAL=1 {BIN_PATH}/test-backend-ops -b HTP0 -o MUL_MAT"
+    if type_a == "q4_0":
+        cmd += r' -p "^(?=.*type_a=q4_0)(?!.*type_b=f32,m=576,n=512,k=576).*$"'
+    else:
+        cmd += f" -p type_a={type_a}"
+    result = run_adb_command(
+        cmd,
+        check=False,
+    )
+    write_qdc_log(f"backend_ops_{type_a}.log", result.stdout or "")
+    assert result.returncode == 0, f"test-backend-ops type_a={type_a} failed (exit {result.returncode})"
+
+
+if __name__ == "__main__":
+    ret = pytest.main(["-s", "--junitxml=results.xml", os.path.realpath(__file__)])
+    if os.path.exists("results.xml"):
+        with open("results.xml") as f:
+            write_qdc_log("results.xml", f.read())
+    sys.exit(ret)
--- a/scripts/snapdragon/qdc/tests/run_bench_tests_posix.py
+++ b/scripts/snapdragon/qdc/tests/run_bench_tests_posix.py
@@ -0,0 +1,76 @@
+"""
+On-device bench and completion test runner for llama.cpp (CPU, GPU, NPU backends).
+
+Executed by QDC's Appium test framework on the QDC runner.
+The runner has ADB access to the allocated device.
+
+Placeholders replaced at artifact creation time by run_qdc_jobs.py:
+  <<MODEL_URL>>  Direct URL to the GGUF model file (downloaded on-device via curl)
+"""
+
+import os
+import subprocess
+import sys
+
+import pytest
+
+from utils import BIN_PATH, CMD_PREFIX, push_bundle_if_needed, run_adb_command, write_qdc_log
+
+MODEL_PATH = "/data/local/tmp/model.gguf"
+PROMPT     = "What is the capital of France?"
+CLI_OPTS   = "--batch-size 128 -n 128 -no-cnv --seed 42"
+
+
+@pytest.fixture(scope="session", autouse=True)
+def install(driver):
+    push_bundle_if_needed(f"{BIN_PATH}/llama-cli")
+
+    # Skip model download if already present
+    check = subprocess.run(
+        ["adb", "shell", f"ls {MODEL_PATH}"],
+        text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+    )
+    if check.returncode != 0:
+        run_adb_command(f'curl -L -J --output {MODEL_PATH} "<<MODEL_URL>>"')
+
+
+@pytest.mark.parametrize("device,extra_flags", [
+    pytest.param("none",      "-ctk q8_0 -ctv q8_0", id="cpu"),
+    pytest.param("GPUOpenCL", "",                     id="gpu"),
+    pytest.param("HTP0",      "-ctk q8_0 -ctv q8_0", id="npu"),
+])
+def test_llama_completion(device, extra_flags):
+    result = run_adb_command(
+        f'{CMD_PREFIX} {BIN_PATH}/llama-completion'
+        f' -m {MODEL_PATH} --device {device} -ngl 99 -t 4 {CLI_OPTS} {extra_flags} -fa on'
+        f' -p "{PROMPT}"',
+        check=False,
+    )
+    write_qdc_log(f"llama_completion_{device}.log", result.stdout or "")
+    assert result.returncode == 0, f"llama-completion {device} failed (exit {result.returncode})"
+
+
+_DEVICE_LOG_NAME = {"none": "cpu", "GPUOpenCL": "gpu", "HTP0": "htp"}
+
+
+@pytest.mark.parametrize("device", [
+    pytest.param("none",      id="cpu"),
+    pytest.param("GPUOpenCL", id="gpu"),
+    pytest.param("HTP0",      id="npu"),
+])
+def test_llama_bench(device):
+    result = run_adb_command(
+        f"{CMD_PREFIX} {BIN_PATH}/llama-bench"
+        f" -m {MODEL_PATH} --device {device} -ngl 99 --batch-size 128 -t 4 -p 128 -n 32",
+        check=False,
+    )
+    write_qdc_log(f"llama_bench_{_DEVICE_LOG_NAME[device]}.log", result.stdout or "")
+    assert result.returncode == 0, f"llama-bench {device} failed (exit {result.returncode})"
+
+
+if __name__ == "__main__":
+    ret = pytest.main(["-s", "--junitxml=results.xml", os.path.realpath(__file__)])
+    if os.path.exists("results.xml"):
+        with open("results.xml") as f:
+            write_qdc_log("results.xml", f.read())
+    sys.exit(ret)
--- a/scripts/snapdragon/qdc/tests/utils.py
+++ b/scripts/snapdragon/qdc/tests/utils.py
@@ -0,0 +1,93 @@
+"""Shared helpers for QDC on-device test runners."""
+
+import logging
+import os
+import subprocess
+import tempfile
+
+from appium.options.common import AppiumOptions
+
+log = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# On-device paths
+# ---------------------------------------------------------------------------
+
+BUNDLE_PATH  = "/data/local/tmp/llama_cpp_bundle"
+QDC_LOGS_PATH = "/data/local/tmp/QDC_logs"
+LIB_PATH    = f"{BUNDLE_PATH}/lib"
+BIN_PATH    = f"{BUNDLE_PATH}/bin"
+ENV_PREFIX  = (
+    f"export LD_LIBRARY_PATH={LIB_PATH} && "
+    f"export ADSP_LIBRARY_PATH={LIB_PATH} && "
+    f"chmod +x {BIN_PATH}/* &&"
+)
+CMD_PREFIX  = f"cd {BUNDLE_PATH} && {ENV_PREFIX}"
+
+# ---------------------------------------------------------------------------
+# Appium session options
+# ---------------------------------------------------------------------------
+
+options = AppiumOptions()
+options.set_capability("automationName", "UiAutomator2")
+options.set_capability("platformName", "Android")
+options.set_capability("deviceName", os.getenv("ANDROID_DEVICE_VERSION"))
+
+# ---------------------------------------------------------------------------
+# ADB helpers
+# ---------------------------------------------------------------------------
+
+
+def run_adb_command(cmd: str, *, check: bool = True) -> subprocess.CompletedProcess:
+    # Append exit-code sentinel because `adb shell` doesn't reliably propagate
+    # the on-device exit code (older ADB versions always return 0).
+    raw = subprocess.run(
+        ["adb", "shell", f"{cmd}; echo __RC__:$?"],
+        text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+    )
+    stdout = raw.stdout
+    returncode = raw.returncode
+    if stdout:
+        lines = stdout.rstrip("\n").split("\n")
+        if lines and lines[-1].startswith("__RC__:"):
+            try:
+                returncode = int(lines[-1][7:])
+                stdout = "\n".join(lines[:-1]) + "\n"
+            except ValueError:
+                pass
+    log.info("%s", stdout)
+    result = subprocess.CompletedProcess(raw.args, returncode, stdout=stdout)
+    if check:
+        assert returncode == 0, f"Command failed (exit {returncode})"
+    return result
+
+
+def write_qdc_log(filename: str, content: str) -> None:
+    """Push content as a log file to QDC_LOGS_PATH on the device for QDC log collection."""
+    subprocess.run(
+        ["adb", "shell", f"mkdir -p {QDC_LOGS_PATH}"],
+        stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+    )
+    with tempfile.NamedTemporaryFile(mode="w", suffix=".log", delete=False) as f:
+        f.write(content)
+        tmp_path = f.name
+    try:
+        subprocess.run(
+            ["adb", "push", tmp_path, f"{QDC_LOGS_PATH}/{filename}"],
+            stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+        )
+    finally:
+        os.unlink(tmp_path)
+
+
+def push_bundle_if_needed(check_binary: str) -> None:
+    """Push llama_cpp_bundle to the device if check_binary is not already present."""
+    result = subprocess.run(
+        ["adb", "shell", f"ls {check_binary}"],
+        text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+    )
+    if result.returncode != 0:
+        subprocess.run(
+            ["adb", "push", "/qdc/appium/llama_cpp_bundle/", "/data/local/tmp"],
+            text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+        )
--- a/scripts/snapdragon/windows/run-bench.ps1
+++ b/scripts/snapdragon/windows/run-bench.ps1
@@ -0,0 +1,48 @@
+
+#!/usr/bin/env pwsh
+
+# Basedir on device
+$basedir=".\pkg-snapdragon"
+
+$cli_opts=$args
+
+$model="Llama-3.2-3B-Instruct-Q4_0.gguf"
+if ($null -ne $env:M) {
+    $model=$env:M
+}
+
+$device="HTP0"
+if ($null -ne $env:D) {
+    $device=$env:D
+}
+
+if ($null -ne $env:V) {
+    $env:GGML_HEXAGON_VERBOSE=$env:V
+}
+
+if ($null -ne $env:PROF) {
+    $env:GGML_HEXAGON_PROFILE=$env:PROF
+}
+
+if ($null -ne $env:OPSTAGE) {
+    $env:GGML_HEXAGON_OPSTAGE=$env:OPSTAGE
+}
+
+if ($null -ne $env:NHVX) {
+    $env:GGML_HEXAGON_NHVX=$env:NHVX
+}
+
+if ($null -ne $env:NDEV) {
+    $env:GGML_HEXAGON_NDEV=$env:NDEV
+}
+
+if ($null -ne $env:HB) {
+    $env:GGML_HEXAGON_HOSTBUF=$env:HB
+}
+
+$env:ADSP_LIBRARY_PATH="$basedir\lib"
+
+& "$basedir\bin\llama-bench.exe" `
+    --mmap 0 -m $basedir\..\..\gguf\$model `
+    --poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 `
+    --batch-size 128 -ngl 99 --device $device $cli_opts
--- a/scripts/snapdragon/windows/run-cli.ps1
+++ b/scripts/snapdragon/windows/run-cli.ps1
@@ -0,0 +1,53 @@
+
+#!/usr/bin/env pwsh
+
+# Basedir on device
+$basedir=".\pkg-snapdragon"
+
+$cli_opts=$args
+
+$model="Llama-3.2-3B-Instruct-Q4_0.gguf"
+if ($null -ne $env:M) {
+    $model=$env:M
+}
+
+$device="HTP0"
+if ($null -ne $env:D) {
+    $device=$env:D
+}
+
+if ($null -ne $env:V) {
+    $env:GGML_HEXAGON_VERBOSE=$env:V
+}
+
+if ($null -ne $env:SCHED) {
+    $env:GGML_SCHED_DEBUG=$env:SCHED; $cli_opts="$cli_opts -v"
+}
+
+if ($null -ne $env:PROF) {
+    $env:GGML_HEXAGON_PROFILE=$env:PROF
+}
+
+if ($null -ne $env:OPSTAGE) {
+    $env:GGML_HEXAGON_OPSTAGE=$env:OPSTAGE
+}
+
+if ($null -ne $env:NHVX) {
+    $env:GGML_HEXAGON_NHVX=$env:NHVX
+}
+
+if ($null -ne $env:NDEV) {
+    $env:GGML_HEXAGON_NDEV=$env:NDEV
+}
+
+if ($null -ne $env:HB) {
+    $env:GGML_HEXAGON_HOSTBUF=$env:HB
+}
+
+$env:ADSP_LIBRARY_PATH="$basedir\lib"
+
+& "$basedir\bin\llama-cli.exe" `
+    --no-mmap -m $basedir\..\..\gguf\$model `
+    --poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 `
+    --ctx-size 8192 --ubatch-size 256 -fa on `
+    -ngl 99 --device $device $cli_opts
--- a/scripts/snapdragon/windows/run-completion.ps1
+++ b/scripts/snapdragon/windows/run-completion.ps1
@@ -0,0 +1,53 @@
+
+#!/usr/bin/env pwsh
+
+# Basedir on device
+$basedir=".\pkg-snapdragon"
+
+$cli_opts=$args
+
+$model="Llama-3.2-3B-Instruct-Q4_0.gguf"
+if ($null -ne $env:M) {
+    $model=$env:M
+}
+
+$device="HTP0"
+if ($null -ne $env:D) {
+    $device=$env:D
+}
+
+if ($null -ne $env:V) {
+    $env:GGML_HEXAGON_VERBOSE=$env:V
+}
+
+if ($null -ne $env:SCHED) {
+    $env:GGML_SCHED_DEBUG=$env:SCHED; $cli_opts="$cli_opts -v"
+}
+
+if ($null -ne $env:PROF) {
+    $env:GGML_HEXAGON_PROFILE=$env:PROF
+}
+
+if ($null -ne $env:OPSTAGE) {
+    $env:GGML_HEXAGON_OPSTAGE=$env:OPSTAGE
+}
+
+if ($null -ne $env:NHVX) {
+    $env:GGML_HEXAGON_NHVX=$env:NHVX
+}
+
+if ($null -ne $env:NDEV) {
+    $env:GGML_HEXAGON_NDEV=$env:NDEV
+}
+
+if ($null -ne $env:HB) {
+    $env:GGML_HEXAGON_HOSTBUF=$env:HB
+}
+
+$env:ADSP_LIBRARY_PATH="$basedir\lib"
+
+& "$basedir\bin\llama-completion.exe" `
+    --no-mmap -m $basedir\..\..\gguf\$model `
+    --poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 `
+    --ctx-size 8192 --batch-size 256 -fa on `
+    -ngl 99 -no-cnv --device $device $cli_opts
--- a/scripts/snapdragon/windows/run-mtmd.ps1
+++ b/scripts/snapdragon/windows/run-mtmd.ps1
@@ -0,0 +1,68 @@
+#!/usr/bin/env pwsh
+
+# Basedir on device
+$basedir=".\pkg-snapdragon"
+
+$cli_opts=$args
+
+$model="gemma-3-4b-it-Q4_0.gguf"
+if ($null -ne $env:M) {
+    $model=$env:M
+}
+
+$mmproj="mmproj-F16.gguf"
+if ($null -ne $env:MMPROJ) {
+    $mmproj=$env:MMPROJ
+}
+
+$image=""
+if ($null -ne $env:IMG) {
+    $image=$env:IMG
+}
+
+$device="HTP0"
+if ($null -ne $env:D) {
+    $device=$env:D
+}
+
+if ($null -ne $env:V) {
+    $env:GGML_HEXAGON_VERBOSE=$env:V
+}
+
+if ($null -ne $env:SCHED) {
+    $env:GGML_SCHED_DEBUG=$env:SCHED; $cli_opts="$cli_opts -v"
+}
+
+if ($null -ne $env:PROF) {
+    $env:GGML_HEXAGON_PROFILE=$env:PROF
+}
+
+if ($null -ne $env:OPSTAGE) {
+    $env:GGML_HEXAGON_OPSTAGE=$env:OPSTAGE
+}
+
+if ($null -ne $env:NHVX) {
+    $env:GGML_HEXAGON_NHVX=$env:NHVX
+}
+
+if ($null -ne $env:NDEV) {
+    $env:GGML_HEXAGON_NDEV=$env:NDEV
+}
+
+if ($null -ne $env:HB) {
+    $env:GGML_HEXAGON_HOSTBUF=$env:HB
+}
+
+if ($null -ne $env:MTMD_DEVICE) {
+    $env:MTMD_BACKEND_DEVICE=$env:MTMD_DEVICE
+}
+
+$env:ADSP_LIBRARY_PATH="$basedir\lib"
+
+& "$basedir\bin\llama-mtmd-cli.exe" `
+    --no-mmap -m $basedir\..\..\gguf\$model `
+    --mmproj $basedir\..\..\gguf\$mmproj `
+    --image $basedir\..\..\gguf\$image `
+    --poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 `
+    --ctx-size 8192 --ubatch-size 256 -fa on `
+    -ngl 99 --device $device -v $cli_opts
--- a/scripts/snapdragon/windows/run-tool.ps1
+++ b/scripts/snapdragon/windows/run-tool.ps1
@@ -0,0 +1,56 @@
+
+#!/usr/bin/env pwsh
+
+# Basedir on device
+$basedir=".\pkg-snapdragon"
+
+if ($args.Count -eq 0) {
+    Write-Host "No arguments provided.Expected the tool and argument to run."
+    exit -1
+}
+
+$tool=$args[0]
+$cli_opts=@()
+
+if ($args.Count -gt 1) {
+    $cli_opts=$args[1..($args.Count - 1)]
+    $remainingArgs = $args[1..($args.Count - 1)]
+}
+
+$device="HTP0"
+if ($null -ne $env:D) {
+    $device=$env:D
+}
+
+if ($null -ne $env:V) {
+    $env:GGML_HEXAGON_VERBOSE=$env:V
+}
+
+if ($null -ne $env:SCHED) {
+    $env:GGML_SCHED_DEBUG=$env:SCHED; $cli_opts="$cli_opts -v"
+}
+
+if ($null -ne $env:PROF) {
+    $env:GGML_HEXAGON_PROFILE=$env:PROF
+}
+
+if ($null -ne $env:OPSTAGE) {
+    $env:GGML_HEXAGON_OPSTAGE=$env:OPSTAGE
+}
+
+if ($null -ne $env:NHVX) {
+    $env:GGML_HEXAGON_NHVX=$env:NHVX
+}
+
+if ($null -ne $env:NDEV) {
+    $env:GGML_HEXAGON_NDEV=$env:NDEV
+}
+
+if ($null -ne $env:HB) {
+    $env:GGML_HEXAGON_HOSTBUF=$env:HB
+}
+
+$env:ADSP_LIBRARY_PATH="$basedir\lib"
+
+& "$basedir\bin\$tool" `
+    $cli_opts
--- a/scripts/snapdragon/windows/setup-build.ps1
+++ b/scripts/snapdragon/windows/setup-build.ps1
@@ -0,0 +1,105 @@
+# Requires Run as Administrator is NOT strictly necessary for User-scope env vars,
+# but recommended for creating directories in C:\ root if permissions are restricted.
+
+$ErrorActionPreference = "Stop"
+
+# --- Configuration ---
+$BaseDir = "C:\Qualcomm"
+
+# SDK 1: Hexagon
+$HexagonUrl     = "https://github.com/snapdragon-toolchain/hexagon-sdk/releases/download/v6.4.0.2/hexagon-sdk-v6.4.0.2-arm64-wos.tar.xz"
+$HexagonParent  = Join-Path $BaseDir "Hexagon_SDK"
+$HexagonSdkVersion   = "6.4.0.2"
+$HexagonToolsVersion = "19.0.04"
+$HexagonSdkTarget    = Join-Path $HexagonParent $HexagonSdkVersion
+$HexagonToolsTarget  = Join-Path $HexagonSdkTarget "\tools\HEXAGON_Tools\$HexagonToolsVersion"
+
+# SDK 2: OpenCL
+$OpenCLUrl      = "https://github.com/snapdragon-toolchain/opencl-sdk/releases/download/v2.3.2/adreno-opencl-sdk-v2.3.2-arm64-wos.tar.xz"
+$OpenCLParent   = Join-Path $BaseDir "OpenCL_SDK"
+$OpenCLVersion  = "2.3.2"
+$OpenCLTarget   = Join-Path $OpenCLParent $OpenCLVersion
+
+# --- Helper Function ---
+function Install-QualcommSDK {
+    param (
+        [string]$Url,
+        [string]$ParentDir,
+        [string]$TargetDir,
+        [string]$Name
+    )
+
+    # 1. Create Parent Directory
+    if (-not (Test-Path -Path $ParentDir)) {
+        Write-Host "Creating directory: $ParentDir" -ForegroundColor Cyan
+        New-Item -Path $ParentDir -ItemType Directory -Force | Out-Null
+    }
+
+    # 2. Check for Specific Version Directory
+    if (Test-Path -Path $TargetDir) {
+        Write-Host "$Name ($TargetDir) already exists. Skipping download." -ForegroundColor Green
+    }
+    else {
+        Write-Host "$Name not found. preparing to download..." -ForegroundColor Yellow
+
+        # Create the target directory to extract into
+        New-Item -Path $TargetDir -ItemType Directory -Force | Out-Null
+
+        # Define temporary archive path
+        $TempFile = Join-Path $ParentDir "temp_sdk.tar.xz"
+
+        try {
+            # Download
+            Write-Host "Downloading from: $Url"
+            Invoke-WebRequest -Uri $Url -OutFile $TempFile
+
+            # Untar
+            # Note: We assume Windows includes tar.exe (Win 10 build 17063+)
+            Write-Host "Extracting archive to $TargetDir..."
+
+            # We use -C to extract contents INTO the target directory created above
+            tar -xJvf $TempFile -C $TargetDir\..
+
+            Write-Host "Extraction complete." -ForegroundColor Green
+        }
+        catch {
+            Write-Error "Failed to download or extract $Name. Error: $_"
+            # Cleanup target dir if failed so script tries again next time
+            Remove-Item -Path $TargetDir -Recurse -Force -ErrorAction SilentlyContinue
+        }
+        finally {
+            # Cleanup Archive
+            if (Test-Path $TempFile) { Remove-Item $TempFile -Force }
+        }
+    }
+}
+
+# --- Execution ---
+
+# 1. Ensure Base C:\Qualcomm exists
+if (-not (Test-Path $BaseDir)) {
+    New-Item -Path $BaseDir -ItemType Directory -Force | Out-Null
+}
+
+# 2. Run Install Logic
+Install-QualcommSDK -Url $HexagonUrl -ParentDir $HexagonParent -TargetDir $HexagonSdkTarget -Name "Hexagon SDK"
+Install-QualcommSDK -Url $OpenCLUrl -ParentDir $OpenCLParent -TargetDir $OpenCLTarget -Name "OpenCL SDK"
+
+# --- Environment Variables ---
+
+Write-Host "`nSetting Environment Variables..." -ForegroundColor Cyan
+
+# Set OPENCL_SDK_ROOT
+[System.Environment]::SetEnvironmentVariable('OPENCL_SDK_ROOT', $OpenCLTarget, [System.EnvironmentVariableTarget]::User)
+$env:OPENCL_SDK_ROOT = $OpenCLTarget # Set for current session as well
+Write-Host "OPENCL_SDK_ROOT set to:  $OpenCLTarget"
+
+# Set HEXAGON_SDK_ROOT
+[System.Environment]::SetEnvironmentVariable('HEXAGON_SDK_ROOT', $HexagonSdkTarget, [System.EnvironmentVariableTarget]::User)
+$env:HEXAGON_SDK_ROOT = $HexagonSdkTarget # Set for current session as well
+Write-Host "HEXAGON_SDK_ROOT set to: $HexagonSdkTarget"
+
+# Set HEXAGON_SDK_ROOT
+[System.Environment]::SetEnvironmentVariable('HEXAGON_TOOLS_ROOT', $HexagonToolsTarget, [System.EnvironmentVariableTarget]::User)
+$env:HEXAGON_TOOLS_ROOT = $HexagonToolsTarget # Set for current session as well
+Write-Host "HEXAGON_TOOLS_ROOT set to: $HexagonToolsTarget"