llama.cpp verification source 2026-05-22
Some checks are pending
Copilot Setup Steps / copilot-setup-steps (push) Waiting to run
Check Pre-Tokenizer Hashes / pre-tokenizer-hashes (push) Waiting to run
Python check requirements.txt / check-requirements (push) Waiting to run
Python Type-Check / python type-check (push) Waiting to run
Update Operations Documentation / update-ops-docs (push) Waiting to run

This commit is contained in:
2026-05-22 16:44:08 +08:00
commit 8e5a449007
2740 changed files with 1155720 additions and 0 deletions

5
scripts/apple/validate-apps.sh Executable file
View File

@@ -0,0 +1,5 @@
#!/usr/bin/env bash
./scripts/apple/validate-ios.sh
./scripts/apple/validate-macos.sh
./scripts/apple/validate-visionos.sh
./scripts/apple/validate-tvos.sh

820
scripts/apple/validate-ios.sh Executable file
View File

@@ -0,0 +1,820 @@
#!/usr/bin/env bash
# validate-ios.sh - Validate iOS Application with embedded llama.xcframework using SwiftUI
# Authentication options (optional) (can be set via environment variables)
# To use: export APPLE_ID=your.email@example.com
# export APPLE_PASSWORD=your-app-specific-password
# ./validate-ios.sh
APPLE_ID=${APPLE_ID:-""}
APPLE_PASSWORD=${APPLE_PASSWORD:-""}
# Ensure the script exits on error
set -e
# Function to print usage instructions
print_usage() {
echo "Usage: ./validate-ios.sh [OPTIONS]"
echo ""
echo "Options:"
echo " --help Show this help message"
echo " --apple-id EMAIL Apple ID email for validation"
echo " --apple-password PWD App-specific password for Apple ID"
echo ""
echo "Environment variables:"
echo " APPLE_ID Apple ID email for validation"
echo " APPLE_PASSWORD App-specific password for Apple ID"
echo ""
echo "Notes:"
echo " - Command line options take precedence over environment variables"
echo " - Authentication is optional. If not provided, alternative validation will be performed"
echo " - For APPLE_PASSWORD, use an app-specific password generated at https://appleid.apple.com/account/manage"
}
# Parse command line arguments
while [[ $# -gt 0 ]]; do
case $1 in
--help)
print_usage
exit 0
;;
--apple-id)
APPLE_ID="$2"
shift 2
;;
--apple-password)
APPLE_PASSWORD="$2"
shift 2
;;
*)
echo "Unknown option: $1"
print_usage
exit 1
;;
esac
done
# Function to clean up in case of error
cleanup() {
# Don't clean up temp files on error to help with debugging
echo "===== iOS Validation Process Failed ====="
exit 1
}
# Set up trap to call cleanup function on error
trap cleanup ERR
set -e # Exit on any error
ROOT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../.." && pwd )"
BUILD_DIR="${ROOT_DIR}/validation-builds/ios"
# Configuration
APP_NAME="iOSLlamaTest"
BUNDLE_ID="org.ggml.iOSLlamaTest"
XCFRAMEWORK_PATH="${ROOT_DIR}/build-apple/llama.xcframework"
TEMP_DIR="${BUILD_DIR}/temp"
ARCHIVE_PATH="${BUILD_DIR}/${APP_NAME}.xcarchive"
IPA_PATH="${BUILD_DIR}/${APP_NAME}.ipa"
VALIDATION_DIR="${BUILD_DIR}/validation"
# Create necessary directories
mkdir -p "${BUILD_DIR}"
mkdir -p "${TEMP_DIR}"
mkdir -p "${VALIDATION_DIR}"
echo "===== iOS Validation Process Started ====="
# 1. Create a simple test app project
echo "Creating test iOS app project..."
mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}"
cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Info.plist" << EOF
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>CFBundleDevelopmentRegion</key>
<string>en</string>
<key>CFBundleExecutable</key>
<string>${APP_NAME}</string>
<key>CFBundleIdentifier</key>
<string>${BUNDLE_ID}</string>
<key>CFBundleInfoDictionaryVersion</key>
<string>6.0</string>
<key>CFBundleName</key>
<string>${APP_NAME}</string>
<key>CFBundlePackageType</key>
<string>APPL</string>
<key>CFBundleShortVersionString</key>
<string>1.0</string>
<key>CFBundleVersion</key>
<string>1</string>
<key>LSRequiresIPhoneOS</key>
<true/>
<key>UILaunchScreen</key>
<dict/>
<key>UIRequiredDeviceCapabilities</key>
<array>
<string>armv7</string>
</array>
<key>UISupportedInterfaceOrientations</key>
<array>
<string>UIInterfaceOrientationPortrait</string>
</array>
</dict>
</plist>
EOF
# Create SwiftUI app files
mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources"
# Create App.swift
cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources/App.swift" << EOF
import SwiftUI
import llama
@main
struct LlamaTestApp: App {
var body: some Scene {
WindowGroup {
ContentView()
}
}
}
EOF
# Create ContentView.swift
cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources/ContentView.swift" << EOF
import SwiftUI
import llama
struct ContentView: View {
// Test that we can initialize a llama context params struct
let params = llama_context_default_params()
var body: some View {
VStack(spacing: 20) {
Text("Llama Framework Test")
.font(.largeTitle)
.padding()
Text("llama_context_default_params() created successfully")
.font(.headline)
.multilineTextAlignment(.center)
.padding()
// Display some param values to confirm the framework is working
Text("n_ctx: \(params.n_ctx)")
.font(.body)
Text("n_batch: \(params.n_batch)")
.font(.body)
Spacer()
}
.padding()
}
}
struct ContentView_Previews: PreviewProvider {
static var previews: some View {
ContentView()
}
}
EOF
# Create project.pbxproj, fixing the framework search paths issues
mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj"
cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
// !$*UTF8*$!
{
archiveVersion = 1;
classes = {
};
objectVersion = 54;
objects = {
/* Begin PBXBuildFile section */
11111111111111111111111 /* App.swift in Sources */ = {isa = PBXBuildFile; fileRef = 22222222222222222222222; };
33333333333333333333333 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 44444444444444444444444; };
55555555555555555555555 /* llama.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = 66666666666666666666666; };
77777777777777777777777 /* llama.xcframework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 66666666666666666666666; };
/* End PBXBuildFile section */
/* Begin PBXCopyFilesBuildPhase section */
88888888888888888888888 /* Embed Frameworks */ = {
isa = PBXCopyFilesBuildPhase;
buildActionMask = 2147483647;
dstPath = "";
dstSubfolderSpec = 10;
files = (
77777777777777777777777 /* llama.xcframework in Embed Frameworks */,
);
name = "Embed Frameworks";
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXCopyFilesBuildPhase section */
/* Begin PBXFileReference section */
EOF
# Continue with the project.pbxproj file, using the APP_NAME variable appropriately
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
99999999999999999999999 /* ${APP_NAME}.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "${APP_NAME}.app"; sourceTree = BUILT_PRODUCTS_DIR; };
22222222222222222222222 /* App.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = App.swift; sourceTree = "<group>"; };
44444444444444444444444 /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; };
AAAAAAAAAAAAAAAAAAAAAAA /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
66666666666666666666666 /* llama.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; path = llama.xcframework; sourceTree = "<group>"; };
/* End PBXFileReference section */
EOF
# Add the rest of the project file with fixed framework search paths
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
/* Begin PBXFrameworksBuildPhase section */
BBBBBBBBBBBBBBBBBBBBBBBB /* Frameworks */ = {
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
55555555555555555555555 /* llama.xcframework in Frameworks */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXFrameworksBuildPhase section */
/* Begin PBXGroup section */
EOF
# Continue with the project.pbxproj file, using the APP_NAME variable appropriately
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
CCCCCCCCCCCCCCCCCCCCCCCC /* Products */ = {
isa = PBXGroup;
children = (
99999999999999999999999 /* ${APP_NAME}.app */,
);
name = Products;
sourceTree = "<group>";
};
EOF
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
DDDDDDDDDDDDDDDDDDDDDDDD /* Frameworks */ = {
isa = PBXGroup;
children = (
66666666666666666666666 /* llama.xcframework */,
);
name = Frameworks;
sourceTree = "<group>";
};
EEEEEEEEEEEEEEEEEEEEEEEE = {
isa = PBXGroup;
children = (
FFFFFFFFFFFFFFFFFFFFFFFF /* iOSLlamaTest */,
CCCCCCCCCCCCCCCCCCCCCCCC /* Products */,
DDDDDDDDDDDDDDDDDDDDDDDD /* Frameworks */,
);
sourceTree = "<group>";
};
FFFFFFFFFFFFFFFFFFFFFFFF /* iOSLlamaTest */ = {
isa = PBXGroup;
children = (
1111111111111111111111AA /* Sources */,
AAAAAAAAAAAAAAAAAAAAAAA /* Info.plist */,
);
path = "iOSLlamaTest";
sourceTree = "<group>";
};
1111111111111111111111AA /* Sources */ = {
isa = PBXGroup;
children = (
22222222222222222222222 /* App.swift */,
44444444444444444444444 /* ContentView.swift */,
);
path = Sources;
sourceTree = "<group>";
};
/* End PBXGroup section */
EOF
# Continue with the project.pbxproj file, using the APP_NAME variable appropriately
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
/* Begin PBXNativeTarget section */
3333333333333333333333AA /* ${APP_NAME} */ = {
isa = PBXNativeTarget;
buildConfigurationList = 4444444444444444444444AA /* Build configuration list for PBXNativeTarget "${APP_NAME}" */;
buildPhases = (
5555555555555555555555AA /* Sources */,
BBBBBBBBBBBBBBBBBBBBBBBB /* Frameworks */,
6666666666666666666666AA /* Resources */,
88888888888888888888888 /* Embed Frameworks */,
);
buildRules = (
);
dependencies = (
);
name = "${APP_NAME}";
productName = "${APP_NAME}";
productReference = 99999999999999999999999 /* ${APP_NAME}.app */;
productType = "com.apple.product-type.application";
};
/* End PBXNativeTarget section */
/* Begin PBXProject section */
7777777777777777777777AA /* Project object */ = {
isa = PBXProject;
attributes = {
LastSwiftUpdateCheck = 1240;
LastUpgradeCheck = 1240;
TargetAttributes = {
3333333333333333333333AA = {
CreatedOnToolsVersion = 12.4;
};
};
};
buildConfigurationList = 8888888888888888888888AA /* Build configuration list for PBXProject "${APP_NAME}" */;
compatibilityVersion = "Xcode 12.0";
developmentRegion = en;
hasScannedForEncodings = 0;
knownRegions = (
en,
Base,
);
mainGroup = EEEEEEEEEEEEEEEEEEEEEEEE;
productRefGroup = CCCCCCCCCCCCCCCCCCCCCCCC /* Products */;
projectDirPath = "";
projectRoot = "";
targets = (
3333333333333333333333AA /* ${APP_NAME} */,
);
};
/* End PBXProject section */
EOF
# Add the rest of the file with correct FRAMEWORK_SEARCH_PATHS
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
/* Begin PBXResourcesBuildPhase section */
6666666666666666666666AA /* Resources */ = {
isa = PBXResourcesBuildPhase;
buildActionMask = 2147483647;
files = (
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXResourcesBuildPhase section */
/* Begin PBXSourcesBuildPhase section */
5555555555555555555555AA /* Sources */ = {
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
33333333333333333333333 /* ContentView.swift in Sources */,
11111111111111111111111 /* App.swift in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXSourcesBuildPhase section */
/* Begin XCBuildConfiguration section */
9999999999999999999999AA /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
CLANG_ANALYZER_NONNULL = YES;
CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
CLANG_CXX_LIBRARY = "libc++";
CLANG_ENABLE_MODULES = YES;
CLANG_ENABLE_OBJC_ARC = YES;
CLANG_ENABLE_OBJC_WEAK = YES;
CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
CLANG_WARN_BOOL_CONVERSION = YES;
CLANG_WARN_COMMA = YES;
CLANG_WARN_CONSTANT_CONVERSION = YES;
CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
CLANG_WARN_EMPTY_BODY = YES;
CLANG_WARN_ENUM_CONVERSION = YES;
CLANG_WARN_INFINITE_RECURSION = YES;
CLANG_WARN_INT_CONVERSION = YES;
CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
CLANG_WARN_STRICT_PROTOTYPES = YES;
CLANG_WARN_SUSPICIOUS_MOVE = YES;
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
CLANG_WARN_UNREACHABLE_CODE = YES;
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
COPY_PHASE_STRIP = NO;
DEBUG_INFORMATION_FORMAT = dwarf;
ENABLE_STRICT_OBJC_MSGSEND = YES;
ENABLE_TESTABILITY = YES;
GCC_C_LANGUAGE_STANDARD = gnu11;
GCC_DYNAMIC_NO_PIC = NO;
GCC_NO_COMMON_BLOCKS = YES;
GCC_OPTIMIZATION_LEVEL = 0;
GCC_PREPROCESSOR_DEFINITIONS = (
"DEBUG=1",
"$(inherited)",
);
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
GCC_WARN_UNDECLARED_SELECTOR = YES;
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
GCC_WARN_UNUSED_FUNCTION = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
IPHONEOS_DEPLOYMENT_TARGET = 16.4;
MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
MTL_FAST_MATH = YES;
ONLY_ACTIVE_ARCH = YES;
SDKROOT = iphoneos;
SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG;
SWIFT_OPTIMIZATION_LEVEL = "-Onone";
};
name = Debug;
};
AAAAAAAAAAAAAAAAAAAAABBB /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
CLANG_ANALYZER_NONNULL = YES;
CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
CLANG_CXX_LIBRARY = "libc++";
CLANG_ENABLE_MODULES = YES;
CLANG_ENABLE_OBJC_ARC = YES;
CLANG_ENABLE_OBJC_WEAK = YES;
CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
CLANG_WARN_BOOL_CONVERSION = YES;
CLANG_WARN_COMMA = YES;
CLANG_WARN_CONSTANT_CONVERSION = YES;
CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
CLANG_WARN_EMPTY_BODY = YES;
CLANG_WARN_ENUM_CONVERSION = YES;
CLANG_WARN_INFINITE_RECURSION = YES;
CLANG_WARN_INT_CONVERSION = YES;
CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
CLANG_WARN_STRICT_PROTOTYPES = YES;
CLANG_WARN_SUSPICIOUS_MOVE = YES;
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
CLANG_WARN_UNREACHABLE_CODE = YES;
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
COPY_PHASE_STRIP = NO;
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
ENABLE_NS_ASSERTIONS = NO;
ENABLE_STRICT_OBJC_MSGSEND = YES;
GCC_C_LANGUAGE_STANDARD = gnu11;
GCC_NO_COMMON_BLOCKS = YES;
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
GCC_WARN_UNDECLARED_SELECTOR = YES;
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
GCC_WARN_UNUSED_FUNCTION = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
IPHONEOS_DEPLOYMENT_TARGET = 16.4;
MTL_ENABLE_DEBUG_INFO = NO;
MTL_FAST_MATH = YES;
SDKROOT = iphoneos;
SWIFT_COMPILATION_MODE = wholemodule;
SWIFT_OPTIMIZATION_LEVEL = "-O";
VALIDATE_PRODUCT = YES;
};
name = Release;
};
BBBBBBBBBBBBBBBBBBBBBBCCC /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
CODE_SIGN_STYLE = Manual;
DEVELOPMENT_TEAM = "";
ENABLE_PREVIEWS = YES;
FRAMEWORK_SEARCH_PATHS = "$(PROJECT_DIR)";
INFOPLIST_FILE = "iOSLlamaTest/Info.plist";
LD_RUNPATH_SEARCH_PATHS = (
"$(inherited)",
"@executable_path/Frameworks",
);
PRODUCT_BUNDLE_IDENTIFIER = "org.ggml.iOSLlamaTest";
PRODUCT_NAME = "$(TARGET_NAME)";
PROVISIONING_PROFILE_SPECIFIER = "";
SWIFT_VERSION = 5.0;
TARGETED_DEVICE_FAMILY = "1,2";
};
name = Debug;
};
CCCCCCCCCCCCCCCCCCCCCCDDD /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
CODE_SIGN_STYLE = Manual;
DEVELOPMENT_TEAM = "";
ENABLE_PREVIEWS = YES;
FRAMEWORK_SEARCH_PATHS = (
"$(inherited)",
"$(PROJECT_DIR)",
);
INFOPLIST_FILE = "iOSLlamaTest/Info.plist";
LD_RUNPATH_SEARCH_PATHS = (
"$(inherited)",
"@executable_path/Frameworks",
);
PRODUCT_BUNDLE_IDENTIFIER = "org.ggml.iOSLlamaTest";
PRODUCT_NAME = "$(TARGET_NAME)";
PROVISIONING_PROFILE_SPECIFIER = "";
SWIFT_VERSION = 5.0;
TARGETED_DEVICE_FAMILY = "1,2";
};
name = Release;
};
/* End XCBuildConfiguration section */
EOF
# Finish the project.pbxproj file
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
/* Begin XCConfigurationList section */
8888888888888888888888AA /* Build configuration list for PBXProject "${APP_NAME}" */ = {
isa = XCConfigurationList;
buildConfigurations = (
9999999999999999999999AA /* Debug */,
AAAAAAAAAAAAAAAAAAAAABBB /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
4444444444444444444444AA /* Build configuration list for PBXNativeTarget "${APP_NAME}" */ = {
isa = XCConfigurationList;
buildConfigurations = (
BBBBBBBBBBBBBBBBBBBBBBCCC /* Debug */,
CCCCCCCCCCCCCCCCCCCCCCDDD /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
/* End XCConfigurationList section */
};
rootObject = 7777777777777777777777AA /* Project object */;
}
EOF
# 2. Copy XCFramework to test project
echo "Copying XCFramework to test project..."
cp -R "${XCFRAMEWORK_PATH}" "${TEMP_DIR}/${APP_NAME}/"
# 3. Build and archive the app
echo "Building and archiving test app..."
cd "${TEMP_DIR}/${APP_NAME}"
# Create a simple xcscheme file to avoid xcodebuild scheme issues
mkdir -p "${APP_NAME}.xcodeproj/xcshareddata/xcschemes"
cat > "${APP_NAME}.xcodeproj/xcshareddata/xcschemes/${APP_NAME}.xcscheme" << EOF
<?xml version="1.0" encoding="UTF-8"?>
<Scheme
LastUpgradeVersion = "1240"
version = "1.3">
<BuildAction
parallelizeBuildables = "YES"
buildImplicitDependencies = "YES">
<BuildActionEntries>
<BuildActionEntry
buildForTesting = "YES"
buildForRunning = "YES"
buildForProfiling = "YES"
buildForArchiving = "YES"
buildForAnalyzing = "YES">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "3333333333333333333333AA"
BuildableName = "${APP_NAME}.app"
BlueprintName = "${APP_NAME}"
ReferencedContainer = "container:${APP_NAME}.xcodeproj">
</BuildableReference>
</BuildActionEntry>
</BuildActionEntries>
</BuildAction>
<TestAction
buildConfiguration = "Debug"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
shouldUseLaunchSchemeArgsEnv = "YES">
<Testables>
</Testables>
</TestAction>
<LaunchAction
buildConfiguration = "Debug"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
launchStyle = "0"
useCustomWorkingDirectory = "NO"
ignoresPersistentStateOnLaunch = "NO"
debugDocumentVersioning = "YES"
debugServiceExtension = "internal"
allowLocationSimulation = "YES">
<BuildableProductRunnable
runnableDebuggingMode = "0">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "3333333333333333333333AA"
BuildableName = "${APP_NAME}.app"
BlueprintName = "${APP_NAME}"
ReferencedContainer = "container:${APP_NAME}.xcodeproj">
</BuildableReference>
</BuildableProductRunnable>
</LaunchAction>
<ProfileAction
buildConfiguration = "Release"
shouldUseLaunchSchemeArgsEnv = "YES"
savedToolIdentifier = ""
useCustomWorkingDirectory = "NO"
debugDocumentVersioning = "YES">
<BuildableProductRunnable
runnableDebuggingMode = "0">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "3333333333333333333333AA"
BuildableName = "${APP_NAME}.app"
BlueprintName = "${APP_NAME}"
ReferencedContainer = "container:${APP_NAME}.xcodeproj">
</BuildableReference>
</BuildableProductRunnable>
</ProfileAction>
<AnalyzeAction
buildConfiguration = "Debug">
</AnalyzeAction>
<ArchiveAction
buildConfiguration = "Release"
revealArchiveInOrganizer = "YES">
</ArchiveAction>
</Scheme>
EOF
# Now use xcodebuild with an explicitly defined product name
xcodebuild -project "${APP_NAME}.xcodeproj" -scheme "${APP_NAME}" -sdk iphoneos -configuration Release archive -archivePath "${ARCHIVE_PATH}" CODE_SIGN_IDENTITY="-" CODE_SIGNING_REQUIRED=NO CODE_SIGNING_ALLOWED=NO PRODUCT_NAME="${APP_NAME}" SWIFT_OPTIMIZATION_LEVEL="-Onone" -quiet
# 4. Create IPA from archive
echo "Creating IPA from archive..."
mkdir -p "${TEMP_DIR}/Payload"
cp -R "${ARCHIVE_PATH}/Products/Applications/${APP_NAME}.app" "${TEMP_DIR}/Payload/"
# Check and log app structure before zipping
echo "App structure:"
ls -la "${TEMP_DIR}/Payload/${APP_NAME}.app/"
echo "Frameworks:"
ls -la "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/" 2>/dev/null || echo "No Frameworks directory found"
cd "${TEMP_DIR}"
zip -r "${IPA_PATH}" Payload
# Check embedded provisioning profile
echo "Checking provisioning profile (if any)..."
PROVISIONING_PROFILE=$(find "${ARCHIVE_PATH}/Products/Applications/${APP_NAME}.app" -name "embedded.mobileprovision" 2>/dev/null)
if [ -n "$PROVISIONING_PROFILE" ]; then
echo "Found embedded provisioning profile:"
security cms -D -i "$PROVISIONING_PROFILE" || echo "Unable to decode provisioning profile"
else
echo "No embedded provisioning profile found (expected for ad-hoc builds)"
fi
# 5. Validate the IPA
echo "Validating IPA..."
VALIDATION_OUTPUT="${VALIDATION_DIR}/validation_output.txt"
# Check if authentication credentials are provided
AUTH_ARGS=""
if [ -n "$APPLE_ID" ] && [ -n "$APPLE_PASSWORD" ]; then
echo "Using Apple ID authentication for validation..."
AUTH_ARGS="--username \"$APPLE_ID\" --password \"$APPLE_PASSWORD\""
else
echo "No authentication credentials provided. Will perform basic validation."
echo "To use your personal developer account, you can run the script with:"
echo " APPLE_ID='your.email@example.com' APPLE_PASSWORD='your-app-specific-password' ./validate-ios.sh"
echo "Note: You need to create an app-specific password at https://appleid.apple.com/account/manage"
fi
# Run validation with detailed output
echo "Running validation with altool..."
if [ -n "$AUTH_ARGS" ]; then
# Use eval to properly handle the quoted arguments
eval "xcrun altool --validate-app -f \"${IPA_PATH}\" --type ios --output-format xml $AUTH_ARGS" 2>&1 | tee "${VALIDATION_OUTPUT}"
else
xcrun altool --validate-app -f "${IPA_PATH}" --type ios --output-format xml 2>&1 | tee "${VALIDATION_OUTPUT}"
fi
VALIDATION_RESULT=$?
# Final validation result
FINAL_VALIDATION_RESULT=0
# Check if validation failed because the app isn't in App Store Connect
if grep -q "No suitable application records were found" "${VALIDATION_OUTPUT}"; then
echo "⚠️ App Store Connect Warning: The app bundle identifier is not found in App Store Connect"
echo "This is expected for apps that haven't been registered in App Store Connect yet."
echo "This doesn't indicate a problem with the build or framework."
# Perform alternative validation
echo "Performing alternative validation checks..."
# Check if IPA was created successfully
if [ -f "${IPA_PATH}" ] && [ -s "${IPA_PATH}" ]; then
echo "✅ IPA file created successfully"
else
echo "❌ IPA file not created or empty"
FINAL_VALIDATION_RESULT=1
fi
# Check if app binary exists and is executable
if [ -f "${TEMP_DIR}/Payload/${APP_NAME}.app/${APP_NAME}" ] && [ -x "${TEMP_DIR}/Payload/${APP_NAME}.app/${APP_NAME}" ]; then
echo "✅ App binary exists and is executable"
else
echo "❌ App binary missing or not executable"
FINAL_VALIDATION_RESULT=1
fi
# Check if framework was properly embedded
if [ -d "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/llama.framework" ]; then
echo "✅ llama.framework properly embedded"
else
echo "❌ llama.framework not properly embedded"
FINAL_VALIDATION_RESULT=1
fi
# Check if framework binary exists
if [ -f "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/llama.framework/llama" ]; then
echo "✅ Framework binary exists"
# Further validate framework by checking architecture
ARCHS=$(lipo -info "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/llama.framework/llama" 2>/dev/null | grep -o "arm64\\|armv7\\|x86_64" | tr '\n' ' ')
if [ -n "$ARCHS" ]; then
echo "✅ Framework architecture(s): $ARCHS"
else
echo "⚠️ Could not determine framework architecture"
fi
else
echo "❌ Framework binary missing"
FINAL_VALIDATION_RESULT=1
fi
if [ $FINAL_VALIDATION_RESULT -eq 0 ]; then
echo "✅ Alternative validation PASSED: App built successfully with embedded framework"
else
echo "❌ Alternative validation FAILED: Issues found with the app or framework"
fi
elif grep -q "You must specify authentication credentials" "${VALIDATION_OUTPUT}" && [ -z "$AUTH_ARGS" ]; then
echo "✅ iOS Validation PASSED: IPA successfully validated"
echo "Results saved to ${VALIDATION_OUTPUT}"
else
echo "❌ iOS Validation FAILED: IPA validation found issues"
echo "See validation output at ${VALIDATION_OUTPUT}"
echo ""
echo "==== VALIDATION ERRORS ===="
# Try to extract specific errors from the output
if grep -q "Error" "${VALIDATION_OUTPUT}"; then
grep -A 5 "Error" "${VALIDATION_OUTPUT}"
else
# If no specific error found, show the whole log
cat "${VALIDATION_OUTPUT}"
fi
# Additional debugging: check IPA contents
echo ""
echo "==== IPA CONTENTS ===="
mkdir -p "${TEMP_DIR}/ipa_contents"
unzip -q "${IPA_PATH}" -d "${TEMP_DIR}/ipa_contents"
ls -la "${TEMP_DIR}/ipa_contents/Payload/${APP_NAME}.app/"
# Check for code signing issues
echo ""
echo "==== CODE SIGNING INFO ===="
codesign -vv -d "${TEMP_DIR}/ipa_contents/Payload/${APP_NAME}.app" 2>&1 || echo "Code signing verification failed"
# Check embedded frameworks
echo ""
echo "==== FRAMEWORK INFO ===="
ls -la "${TEMP_DIR}/ipa_contents/Payload/${APP_NAME}.app/Frameworks/" 2>/dev/null || echo "No Frameworks directory found"
fi
# Don't clean up on error to allow inspection
if [ $FINAL_VALIDATION_RESULT -ne 0 ]; then
echo ""
echo "Temporary files kept for inspection at: ${TEMP_DIR}"
echo "===== iOS Validation Process Failed ====="
exit 1
fi
# Clean up temporary files but keep build artifacts
if [ $FINAL_VALIDATION_RESULT -eq 0 ]; then
echo "Cleaning up temporary files..."
#rm -rf "${TEMP_DIR}"
fi
echo "===== iOS Validation Process Completed ====="
exit $FINAL_VALIDATION_RESULT

781
scripts/apple/validate-macos.sh Executable file
View File

@@ -0,0 +1,781 @@
#!/usr/bin/env bash
# validate-macos.sh - Validate macOS Application with embedded llama.xcframework using SwiftUI
# Authentication options (optional) (can be set via environment variables)
# To use: export APPLE_ID=your.email@example.com
# export APPLE_PASSWORD=your-app-specific-password
# ./validate-macos.sh
APPLE_ID=${APPLE_ID:-""}
APPLE_PASSWORD=${APPLE_PASSWORD:-""}
# Ensure the script exits on error
set -e
# Function to print usage instructions
print_usage() {
echo "Usage: ./validate-macos.sh [OPTIONS]"
echo ""
echo "Options:"
echo " --help Show this help message"
echo " --apple-id EMAIL Apple ID email for validation"
echo " --apple-password PWD App-specific password for Apple ID"
echo ""
echo "Environment variables:"
echo " APPLE_ID Apple ID email for validation"
echo " APPLE_PASSWORD App-specific password for Apple ID"
echo ""
echo "Notes:"
echo " - Command line options take precedence over environment variables"
echo " - Authentication is optional. If not provided, alternative validation will be performed"
echo " - For APPLE_PASSWORD, use an app-specific password generated at https://appleid.apple.com/account/manage"
}
# Parse command line arguments
while [[ $# -gt 0 ]]; do
case $1 in
--help)
print_usage
exit 0
;;
--apple-id)
APPLE_ID="$2"
shift 2
;;
--apple-password)
APPLE_PASSWORD="$2"
shift 2
;;
*)
echo "Unknown option: $1"
print_usage
exit 1
;;
esac
done
# Function to clean up in case of error
cleanup() {
# Don't clean up temp files on error to help with debugging
echo "===== macOS Validation Process Failed ====="
exit 1
}
# Set up trap to call cleanup function on error
trap cleanup ERR
set -e # Exit on any error
ROOT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../.." && pwd )"
BUILD_DIR="${ROOT_DIR}/validation-builds/ios"
# Configuration
APP_NAME="MacOSLlamaTest"
BUNDLE_ID="org.ggml.MacOSLlamaTest"
XCFRAMEWORK_PATH="${ROOT_DIR}/build-apple/llama.xcframework"
TEMP_DIR="${BUILD_DIR}/temp"
ARCHIVE_PATH="${BUILD_DIR}/${APP_NAME}.xcarchive"
APP_PATH="${BUILD_DIR}/${APP_NAME}.app"
ZIP_PATH="${BUILD_DIR}/${APP_NAME}.zip"
VALIDATION_DIR="${BUILD_DIR}/validation"
# Create necessary directories
mkdir -p "${BUILD_DIR}"
mkdir -p "${TEMP_DIR}"
mkdir -p "${VALIDATION_DIR}"
echo "===== macOS Validation Process Started ====="
# 1. Create a simple test app project
echo "Creating test macOS app project..."
mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}"
cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Info.plist" << EOF
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>CFBundleDevelopmentRegion</key>
<string>en</string>
<key>CFBundleExecutable</key>
<string>${APP_NAME}</string>
<key>CFBundleIdentifier</key>
<string>${BUNDLE_ID}</string>
<key>CFBundleInfoDictionaryVersion</key>
<string>6.0</string>
<key>CFBundleName</key>
<string>${APP_NAME}</string>
<key>CFBundlePackageType</key>
<string>APPL</string>
<key>CFBundleShortVersionString</key>
<string>1.0</string>
<key>CFBundleVersion</key>
<string>1</string>
<key>LSMinimumSystemVersion</key>
<string>12.0</string>
<key>NSHumanReadableCopyright</key>
<string>Copyright © 2025 GGML. All rights reserved.</string>
<key>NSPrincipalClass</key>
<string>NSApplication</string>
</dict>
</plist>
EOF
# Create SwiftUI app files
mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources"
# Create App.swift
cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources/App.swift" << EOF
import SwiftUI
import llama
@main
struct LlamaTestApp: App {
var body: some Scene {
WindowGroup {
ContentView()
}
}
}
EOF
# Create ContentView.swift with macOS specific elements
cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources/ContentView.swift" << EOF
import SwiftUI
import llama
struct ContentView: View {
// Test that we can initialize a llama context params struct
let params = llama_context_default_params()
var body: some View {
VStack(spacing: 20) {
Text("Llama Framework Test on macOS")
.font(.largeTitle)
.padding()
Text("llama_context_default_params() created successfully")
.font(.headline)
.multilineTextAlignment(.center)
.padding()
// Display some param values to confirm the framework is working
Text("n_ctx: \(params.n_ctx)")
.font(.body)
Text("n_batch: \(params.n_batch)")
.font(.body)
Spacer()
}
.padding()
.frame(width: 600, height: 400)
}
}
struct ContentView_Previews: PreviewProvider {
static var previews: some View {
ContentView()
}
}
EOF
# Create project.pbxproj, fixing the framework search paths issues
mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj"
cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
// !$*UTF8*$!
{
archiveVersion = 1;
classes = {
};
objectVersion = 54;
objects = {
/* Begin PBXBuildFile section */
11111111111111111111111 /* App.swift in Sources */ = {isa = PBXBuildFile; fileRef = 22222222222222222222222; };
33333333333333333333333 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 44444444444444444444444; };
55555555555555555555555 /* llama.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = 66666666666666666666666; };
77777777777777777777777 /* llama.xcframework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 66666666666666666666666; };
/* End PBXBuildFile section */
/* Begin PBXCopyFilesBuildPhase section */
88888888888888888888888 /* Embed Frameworks */ = {
isa = PBXCopyFilesBuildPhase;
buildActionMask = 2147483647;
dstPath = "";
dstSubfolderSpec = 10;
files = (
77777777777777777777777 /* llama.xcframework in Embed Frameworks */,
);
name = "Embed Frameworks";
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXCopyFilesBuildPhase section */
/* Begin PBXFileReference section */
EOF
# Continue with the project.pbxproj file, using the APP_NAME variable appropriately
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
99999999999999999999999 /* ${APP_NAME}.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "${APP_NAME}.app"; sourceTree = BUILT_PRODUCTS_DIR; };
22222222222222222222222 /* App.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = App.swift; sourceTree = "<group>"; };
44444444444444444444444 /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; };
AAAAAAAAAAAAAAAAAAAAAAA /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
66666666666666666666666 /* llama.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; path = llama.xcframework; sourceTree = "<group>"; };
/* End PBXFileReference section */
EOF
# Add the rest of the project file with fixed framework search paths
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
/* Begin PBXFrameworksBuildPhase section */
BBBBBBBBBBBBBBBBBBBBBBBB /* Frameworks */ = {
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
55555555555555555555555 /* llama.xcframework in Frameworks */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXFrameworksBuildPhase section */
/* Begin PBXGroup section */
EOF
# Continue with the project.pbxproj file, using the APP_NAME variable appropriately
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
CCCCCCCCCCCCCCCCCCCCCCCC /* Products */ = {
isa = PBXGroup;
children = (
99999999999999999999999 /* ${APP_NAME}.app */,
);
name = Products;
sourceTree = "<group>";
};
EOF
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
DDDDDDDDDDDDDDDDDDDDDDDD /* Frameworks */ = {
isa = PBXGroup;
children = (
66666666666666666666666 /* llama.xcframework */,
);
name = Frameworks;
sourceTree = "<group>";
};
EEEEEEEEEEEEEEEEEEEEEEEE = {
isa = PBXGroup;
children = (
FFFFFFFFFFFFFFFFFFFFFFFF /* MacOSLlamaTest */,
CCCCCCCCCCCCCCCCCCCCCCCC /* Products */,
DDDDDDDDDDDDDDDDDDDDDDDD /* Frameworks */,
);
sourceTree = "<group>";
};
FFFFFFFFFFFFFFFFFFFFFFFF /* MacOSLlamaTest */ = {
isa = PBXGroup;
children = (
1111111111111111111111AA /* Sources */,
AAAAAAAAAAAAAAAAAAAAAAA /* Info.plist */,
);
path = "MacOSLlamaTest";
sourceTree = "<group>";
};
1111111111111111111111AA /* Sources */ = {
isa = PBXGroup;
children = (
22222222222222222222222 /* App.swift */,
44444444444444444444444 /* ContentView.swift */,
);
path = Sources;
sourceTree = "<group>";
};
/* End PBXGroup section */
EOF
# Continue with the project.pbxproj file, using the APP_NAME variable appropriately
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
/* Begin PBXNativeTarget section */
3333333333333333333333AA /* ${APP_NAME} */ = {
isa = PBXNativeTarget;
buildConfigurationList = 4444444444444444444444AA /* Build configuration list for PBXNativeTarget "${APP_NAME}" */;
buildPhases = (
5555555555555555555555AA /* Sources */,
BBBBBBBBBBBBBBBBBBBBBBBB /* Frameworks */,
6666666666666666666666AA /* Resources */,
88888888888888888888888 /* Embed Frameworks */,
);
buildRules = (
);
dependencies = (
);
name = "${APP_NAME}";
productName = "${APP_NAME}";
productReference = 99999999999999999999999 /* ${APP_NAME}.app */;
productType = "com.apple.product-type.application";
};
/* End PBXNativeTarget section */
/* Begin PBXProject section */
7777777777777777777777AA /* Project object */ = {
isa = PBXProject;
attributes = {
LastSwiftUpdateCheck = 1240;
LastUpgradeCheck = 1240;
TargetAttributes = {
3333333333333333333333AA = {
CreatedOnToolsVersion = 12.4;
};
};
};
buildConfigurationList = 8888888888888888888888AA /* Build configuration list for PBXProject "${APP_NAME}" */;
compatibilityVersion = "Xcode 12.0";
developmentRegion = en;
hasScannedForEncodings = 0;
knownRegions = (
en,
Base,
);
mainGroup = EEEEEEEEEEEEEEEEEEEEEEEE;
productRefGroup = CCCCCCCCCCCCCCCCCCCCCCCC /* Products */;
projectDirPath = "";
projectRoot = "";
targets = (
3333333333333333333333AA /* ${APP_NAME} */,
);
};
/* End PBXProject section */
EOF
# Add the rest of the file with correct FRAMEWORK_SEARCH_PATHS and macOS settings
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
/* Begin PBXResourcesBuildPhase section */
6666666666666666666666AA /* Resources */ = {
isa = PBXResourcesBuildPhase;
buildActionMask = 2147483647;
files = (
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXResourcesBuildPhase section */
/* Begin PBXSourcesBuildPhase section */
5555555555555555555555AA /* Sources */ = {
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
33333333333333333333333 /* ContentView.swift in Sources */,
11111111111111111111111 /* App.swift in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXSourcesBuildPhase section */
/* Begin XCBuildConfiguration section */
9999999999999999999999AA /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
CLANG_ANALYZER_NONNULL = YES;
CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
CLANG_CXX_LIBRARY = "libc++";
CLANG_ENABLE_MODULES = YES;
CLANG_ENABLE_OBJC_ARC = YES;
CLANG_ENABLE_OBJC_WEAK = YES;
CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
CLANG_WARN_BOOL_CONVERSION = YES;
CLANG_WARN_COMMA = YES;
CLANG_WARN_CONSTANT_CONVERSION = YES;
CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
CLANG_WARN_EMPTY_BODY = YES;
CLANG_WARN_ENUM_CONVERSION = YES;
CLANG_WARN_INFINITE_RECURSION = YES;
CLANG_WARN_INT_CONVERSION = YES;
CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
CLANG_WARN_STRICT_PROTOTYPES = YES;
CLANG_WARN_SUSPICIOUS_MOVE = YES;
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
CLANG_WARN_UNREACHABLE_CODE = YES;
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
COPY_PHASE_STRIP = NO;
DEBUG_INFORMATION_FORMAT = dwarf;
ENABLE_STRICT_OBJC_MSGSEND = YES;
ENABLE_TESTABILITY = YES;
GCC_C_LANGUAGE_STANDARD = gnu11;
GCC_DYNAMIC_NO_PIC = NO;
GCC_NO_COMMON_BLOCKS = YES;
GCC_OPTIMIZATION_LEVEL = 0;
GCC_PREPROCESSOR_DEFINITIONS = (
"DEBUG=1",
"$(inherited)",
);
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
GCC_WARN_UNDECLARED_SELECTOR = YES;
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
GCC_WARN_UNUSED_FUNCTION = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
MACOSX_DEPLOYMENT_TARGET = 12.0;
MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
MTL_FAST_MATH = YES;
ONLY_ACTIVE_ARCH = YES;
SDKROOT = macosx;
SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG;
SWIFT_OPTIMIZATION_LEVEL = "-Onone";
};
name = Debug;
};
AAAAAAAAAAAAAAAAAAAAABBB /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
CLANG_ANALYZER_NONNULL = YES;
CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
CLANG_CXX_LIBRARY = "libc++";
CLANG_ENABLE_MODULES = YES;
CLANG_ENABLE_OBJC_ARC = YES;
CLANG_ENABLE_OBJC_WEAK = YES;
CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
CLANG_WARN_BOOL_CONVERSION = YES;
CLANG_WARN_COMMA = YES;
CLANG_WARN_CONSTANT_CONVERSION = YES;
CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
CLANG_WARN_EMPTY_BODY = YES;
CLANG_WARN_ENUM_CONVERSION = YES;
CLANG_WARN_INFINITE_RECURSION = YES;
CLANG_WARN_INT_CONVERSION = YES;
CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
CLANG_WARN_STRICT_PROTOTYPES = YES;
CLANG_WARN_SUSPICIOUS_MOVE = YES;
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
CLANG_WARN_UNREACHABLE_CODE = YES;
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
COPY_PHASE_STRIP = NO;
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
ENABLE_NS_ASSERTIONS = NO;
ENABLE_STRICT_OBJC_MSGSEND = YES;
GCC_C_LANGUAGE_STANDARD = gnu11;
GCC_NO_COMMON_BLOCKS = YES;
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
GCC_WARN_UNDECLARED_SELECTOR = YES;
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
GCC_WARN_UNUSED_FUNCTION = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
MACOSX_DEPLOYMENT_TARGET = 12.0;
MTL_ENABLE_DEBUG_INFO = NO;
MTL_FAST_MATH = YES;
SDKROOT = macosx;
SWIFT_COMPILATION_MODE = wholemodule;
SWIFT_OPTIMIZATION_LEVEL = "-O";
};
name = Release;
};
BBBBBBBBBBBBBBBBBBBBBBCCC /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
CODE_SIGN_STYLE = Manual;
COMBINE_HIDPI_IMAGES = YES;
DEVELOPMENT_TEAM = "";
ENABLE_HARDENED_RUNTIME = YES;
ENABLE_PREVIEWS = YES;
FRAMEWORK_SEARCH_PATHS = "$(PROJECT_DIR)";
INFOPLIST_FILE = "MacOSLlamaTest/Info.plist";
LD_RUNPATH_SEARCH_PATHS = (
"$(inherited)",
"@executable_path/../Frameworks",
);
PRODUCT_BUNDLE_IDENTIFIER = "org.ggml.MacOSLlamaTest";
PRODUCT_NAME = "$(TARGET_NAME)";
PROVISIONING_PROFILE_SPECIFIER = "";
SWIFT_VERSION = 5.0;
};
name = Debug;
};
CCCCCCCCCCCCCCCCCCCCCCDDD /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
CODE_SIGN_STYLE = Manual;
COMBINE_HIDPI_IMAGES = YES;
DEVELOPMENT_TEAM = "";
ENABLE_HARDENED_RUNTIME = YES;
ENABLE_PREVIEWS = YES;
FRAMEWORK_SEARCH_PATHS = (
"$(inherited)",
"$(PROJECT_DIR)",
);
INFOPLIST_FILE = "MacOSLlamaTest/Info.plist";
LD_RUNPATH_SEARCH_PATHS = (
"$(inherited)",
"@executable_path/../Frameworks",
);
PRODUCT_BUNDLE_IDENTIFIER = "org.ggml.MacOSLlamaTest";
PRODUCT_NAME = "$(TARGET_NAME)";
PROVISIONING_PROFILE_SPECIFIER = "";
SWIFT_VERSION = 5.0;
};
name = Release;
};
/* End XCBuildConfiguration section */
EOF
# Finish the project.pbxproj file
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
/* Begin XCConfigurationList section */
8888888888888888888888AA /* Build configuration list for PBXProject "${APP_NAME}" */ = {
isa = XCConfigurationList;
buildConfigurations = (
9999999999999999999999AA /* Debug */,
AAAAAAAAAAAAAAAAAAAAABBB /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
4444444444444444444444AA /* Build configuration list for PBXNativeTarget "${APP_NAME}" */ = {
isa = XCConfigurationList;
buildConfigurations = (
BBBBBBBBBBBBBBBBBBBBBBCCC /* Debug */,
CCCCCCCCCCCCCCCCCCCCCCDDD /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
/* End XCConfigurationList section */
};
rootObject = 7777777777777777777777AA /* Project object */;
}
EOF
# 2. Copy XCFramework to test project
echo "Copying XCFramework to test project..."
cp -R "${XCFRAMEWORK_PATH}" "${TEMP_DIR}/${APP_NAME}/"
# 3. Build and archive the app
echo "Building and archiving test app..."
cd "${TEMP_DIR}/${APP_NAME}"
# Create a simple xcscheme file to avoid xcodebuild scheme issues
mkdir -p "${APP_NAME}.xcodeproj/xcshareddata/xcschemes"
cat > "${APP_NAME}.xcodeproj/xcshareddata/xcschemes/${APP_NAME}.xcscheme" << EOF
<?xml version="1.0" encoding="UTF-8"?>
<Scheme
LastUpgradeVersion = "1240"
version = "1.3">
<BuildAction
parallelizeBuildables = "YES"
buildImplicitDependencies = "YES">
<BuildActionEntries>
<BuildActionEntry
buildForTesting = "YES"
buildForRunning = "YES"
buildForProfiling = "YES"
buildForArchiving = "YES"
buildForAnalyzing = "YES">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "3333333333333333333333AA"
BuildableName = "${APP_NAME}.app"
BlueprintName = "${APP_NAME}"
ReferencedContainer = "container:${APP_NAME}.xcodeproj">
</BuildableReference>
</BuildActionEntry>
</BuildActionEntries>
</BuildAction>
<TestAction
buildConfiguration = "Debug"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
shouldUseLaunchSchemeArgsEnv = "YES">
<Testables>
</Testables>
</TestAction>
<LaunchAction
buildConfiguration = "Debug"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
launchStyle = "0"
useCustomWorkingDirectory = "NO"
ignoresPersistentStateOnLaunch = "NO"
debugDocumentVersioning = "YES"
debugServiceExtension = "internal"
allowLocationSimulation = "YES">
<BuildableProductRunnable
runnableDebuggingMode = "0">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "3333333333333333333333AA"
BuildableName = "${APP_NAME}.app"
BlueprintName = "${APP_NAME}"
ReferencedContainer = "container:${APP_NAME}.xcodeproj">
</BuildableReference>
</BuildableProductRunnable>
</LaunchAction>
<ProfileAction
buildConfiguration = "Release"
shouldUseLaunchSchemeArgsEnv = "YES"
savedToolIdentifier = ""
useCustomWorkingDirectory = "NO"
debugDocumentVersioning = "YES">
<BuildableProductRunnable
runnableDebuggingMode = "0">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "3333333333333333333333AA"
BuildableName = "${APP_NAME}.app"
BlueprintName = "${APP_NAME}"
ReferencedContainer = "container:${APP_NAME}.xcodeproj">
</BuildableReference>
</BuildableProductRunnable>
</ProfileAction>
<AnalyzeAction
buildConfiguration = "Debug">
</AnalyzeAction>
<ArchiveAction
buildConfiguration = "Release"
revealArchiveInOrganizer = "YES">
</ArchiveAction>
</Scheme>
EOF
# Now use xcodebuild with an explicitly defined product name for macOS
xcodebuild -project "${APP_NAME}.xcodeproj" -scheme "${APP_NAME}" -sdk macosx -configuration Release archive -archivePath "${ARCHIVE_PATH}" CODE_SIGN_IDENTITY="-" CODE_SIGNING_REQUIRED=NO CODE_SIGNING_ALLOWED=NO PRODUCT_NAME="${APP_NAME}" SWIFT_OPTIMIZATION_LEVEL="-Onone" -quiet
# 4. Create a package for distribution
echo "Creating distributable package from archive..."
cp -R "${ARCHIVE_PATH}/Products/Applications/${APP_NAME}.app" "${APP_PATH}"
# Check and log app structure
echo "App structure:"
ls -la "${APP_PATH}"
echo "Frameworks:"
ls -la "${APP_PATH}/Contents/Frameworks/" 2>/dev/null || echo "No Frameworks directory found"
# Create a zip file for potential distribution
cd "${BUILD_DIR}"
zip -r "${ZIP_PATH}" "${APP_NAME}.app"
# Check embedded provisioning profile
echo "Checking provisioning profile (if any)..."
PROVISIONING_PROFILE=$(find "${APP_PATH}/Contents" -name "embedded.provisionprofile" 2>/dev/null)
if [ -n "$PROVISIONING_PROFILE" ]; then
echo "Found embedded provisioning profile:"
security cms -D -i "$PROVISIONING_PROFILE" || echo "Unable to decode provisioning profile"
else
echo "No embedded provisioning profile found (expected for ad-hoc builds)"
fi
# 5. Validate the app
echo "Validating macOS app..."
VALIDATION_OUTPUT="${VALIDATION_DIR}/validation_output.txt"
# Check if authentication credentials are provided
AUTH_ARGS=""
if [ -n "$APPLE_ID" ] && [ -n "$APPLE_PASSWORD" ]; then
echo "Using Apple ID authentication for validation..."
AUTH_ARGS="--username \"$APPLE_ID\" --password \"$APPLE_PASSWORD\""
else
echo "No authentication credentials provided. Will perform basic validation."
echo "To use your personal developer account, you can run the script with:"
echo " APPLE_ID='your.email@example.com' APPLE_PASSWORD='your-app-specific-password' ./validate-macos.sh"
echo "Note: You need to create an app-specific password at https://appleid.apple.com/account/manage"
fi
# For macOS we need to use notarytool or alternative checks because altool doesn't support macOS apps in the same way
echo "Note: For macOS, formal notarization process would require Apple Developer credentials."
echo "Performing alternative validation checks..."
# Final validation result
FINAL_VALIDATION_RESULT=0
# Check if app was created successfully
if [ -d "${APP_PATH}" ] && [ -s "${APP_PATH}/Contents/MacOS/${APP_NAME}" ]; then
echo "✅ App package created successfully"
else
echo "❌ App package not created or binary missing"
FINAL_VALIDATION_RESULT=1
fi
# Check if app binary exists and is executable
if [ -f "${APP_PATH}/Contents/MacOS/${APP_NAME}" ] && [ -x "${APP_PATH}/Contents/MacOS/${APP_NAME}" ]; then
echo "✅ App binary exists and is executable"
else
echo "❌ App binary missing or not executable"
FINAL_VALIDATION_RESULT=1
fi
# Check if framework was properly embedded
if [ -d "${APP_PATH}/Contents/Frameworks/llama.framework" ]; then
echo "✅ llama.framework properly embedded"
else
echo "❌ llama.framework not properly embedded"
FINAL_VALIDATION_RESULT=1
fi
# Check if framework binary exists
if [ -f "${APP_PATH}/Contents/Frameworks/llama.framework/Versions/A/llama" ]; then
echo "✅ Framework binary exists"
# Further validate framework by checking architecture
ARCHS=$(lipo -info "${APP_PATH}/Contents/Frameworks/llama.framework/Versions/A/llama" 2>/dev/null | grep -o "arm64\\|x86_64" | tr '\n' ' ')
if [ -n "$ARCHS" ]; then
echo "✅ Framework architecture(s): $ARCHS"
else
echo "⚠️ Could not determine framework architecture"
fi
else
echo "❌ Framework binary missing"
FINAL_VALIDATION_RESULT=1
fi
# Check code signing
echo ""
echo "==== CODE SIGNING INFO ===="
codesign -vv -d "${APP_PATH}" 2>&1 || echo "Code signing verification not available (expected for ad-hoc builds)"
if [ $FINAL_VALIDATION_RESULT -eq 0 ]; then
if [ -n "$AUTH_ARGS" ]; then
echo ""
echo "To notarize this app with Apple (requires Apple Developer account):"
echo "xcrun notarytool submit \"${ZIP_PATH}\" --apple-id \"your-apple-id\" --password \"your-app-specific-password\" --team-id \"your-team-id\" --wait"
echo ""
fi
echo "✅ Validation PASSED: macOS app built successfully with embedded framework"
else
echo "❌ Validation FAILED: Issues found with the app or framework"
fi
# Don't clean up on error to allow inspection
if [ $FINAL_VALIDATION_RESULT -ne 0 ]; then
echo ""
echo "Temporary files kept for inspection at: ${TEMP_DIR}"
echo "===== macOS Validation Process Failed ====="
exit 1
fi
# Clean up temporary files but keep build artifacts
if [ $FINAL_VALIDATION_RESULT -eq 0 ]; then
echo "Cleaning up temporary files..."
#rm -rf "${TEMP_DIR}"
fi
echo "===== macOS Validation Process Completed ====="
echo "App package available at: ${APP_PATH}"
echo "Zipped app available at: ${ZIP_PATH}"
exit $FINAL_VALIDATION_RESULT

813
scripts/apple/validate-tvos.sh Executable file
View File

@@ -0,0 +1,813 @@
#!/usr/bin/env bash
# validate-tvos.sh - Validate tvOS Application with embedded llama.xcframework using SwiftUI
# Authentication options (optional) (can be set via environment variables)
# To use: export APPLE_ID=your.email@example.com
# export APPLE_PASSWORD=your-app-specific-password
# ./validate-tvos.sh
APPLE_ID=${APPLE_ID:-""}
APPLE_PASSWORD=${APPLE_PASSWORD:-""}
# Ensure the script exits on error
set -e
# Function to print usage instructions
print_usage() {
echo "Usage: ./validate-tvos.sh [OPTIONS]"
echo ""
echo "Options:"
echo " --help Show this help message"
echo " --apple-id EMAIL Apple ID email for validation"
echo " --apple-password PWD App-specific password for Apple ID"
echo ""
echo "Environment variables:"
echo " APPLE_ID Apple ID email for validation"
echo " APPLE_PASSWORD App-specific password for Apple ID"
echo ""
echo "Notes:"
echo " - Command line options take precedence over environment variables"
echo " - Authentication is optional. If not provided, alternative validation will be performed"
echo " - For APPLE_PASSWORD, use an app-specific password generated at https://appleid.apple.com/account/manage"
}
# Parse command line arguments
while [[ $# -gt 0 ]]; do
case $1 in
--help)
print_usage
exit 0
;;
--apple-id)
APPLE_ID="$2"
shift 2
;;
--apple-password)
APPLE_PASSWORD="$2"
shift 2
;;
*)
echo "Unknown option: $1"
print_usage
exit 1
;;
esac
done
# Function to clean up in case of error
cleanup() {
# Don't clean up temp files on error to help with debugging
echo "===== tvOS Validation Process Failed ====="
exit 1
}
# Set up trap to call cleanup function on error
trap cleanup ERR
set -e # Exit on any error
ROOT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../.." && pwd )"
BUILD_DIR="${ROOT_DIR}/validation-builds/ios"
# Configuration
APP_NAME="TVOSLlamaTest"
BUNDLE_ID="org.ggml.TVOSLlamaTest"
XCFRAMEWORK_PATH="${ROOT_DIR}/build-apple/llama.xcframework"
TEMP_DIR="${BUILD_DIR}/temp"
ARCHIVE_PATH="${BUILD_DIR}/${APP_NAME}.xcarchive"
IPA_PATH="${BUILD_DIR}/${APP_NAME}.ipa"
VALIDATION_DIR="${BUILD_DIR}/validation"
# Create necessary directories
mkdir -p "${BUILD_DIR}"
mkdir -p "${TEMP_DIR}"
mkdir -p "${VALIDATION_DIR}"
echo "===== tvOS Validation Process Started ====="
# 1. Create a simple test app project
echo "Creating test tvOS app project..."
mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}"
cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Info.plist" << EOF
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>CFBundleDevelopmentRegion</key>
<string>en</string>
<key>CFBundleExecutable</key>
<string>${APP_NAME}</string>
<key>CFBundleIdentifier</key>
<string>${BUNDLE_ID}</string>
<key>CFBundleInfoDictionaryVersion</key>
<string>6.0</string>
<key>CFBundleName</key>
<string>${APP_NAME}</string>
<key>CFBundlePackageType</key>
<string>APPL</string>
<key>CFBundleShortVersionString</key>
<string>1.0</string>
<key>CFBundleVersion</key>
<string>1</string>
<key>UIRequiredDeviceCapabilities</key>
<array>
<string>arm64</string>
</array>
</dict>
</plist>
EOF
# Create SwiftUI app files
mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources"
# Create App.swift
cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources/App.swift" << EOF
import SwiftUI
import llama
@main
struct LlamaTestApp: App {
var body: some Scene {
WindowGroup {
ContentView()
}
}
}
EOF
# Create ContentView.swift with tvOS specific elements
cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources/ContentView.swift" << EOF
import SwiftUI
import llama
struct ContentView: View {
// Test that we can initialize a llama context params struct
let params = llama_context_default_params()
var body: some View {
VStack(spacing: 40) {
Text("Llama Framework Test on tvOS")
.font(.largeTitle)
.padding()
Text("llama_context_default_params() created successfully")
.font(.headline)
.multilineTextAlignment(.center)
.padding()
// Display some param values to confirm the framework is working
Text("n_ctx: \(params.n_ctx)")
.font(.title2)
Text("n_batch: \(params.n_batch)")
.font(.title2)
Spacer()
}
.padding(50)
// Larger size suitable for TV display
}
}
struct ContentView_Previews: PreviewProvider {
static var previews: some View {
ContentView()
}
}
EOF
# Create project.pbxproj, fixing the framework search paths issues
mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj"
cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
// !$*UTF8*$!
{
archiveVersion = 1;
classes = {
};
objectVersion = 54;
objects = {
/* Begin PBXBuildFile section */
11111111111111111111111 /* App.swift in Sources */ = {isa = PBXBuildFile; fileRef = 22222222222222222222222; };
33333333333333333333333 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 44444444444444444444444; };
55555555555555555555555 /* llama.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = 66666666666666666666666; };
77777777777777777777777 /* llama.xcframework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 66666666666666666666666; };
/* End PBXBuildFile section */
/* Begin PBXCopyFilesBuildPhase section */
88888888888888888888888 /* Embed Frameworks */ = {
isa = PBXCopyFilesBuildPhase;
buildActionMask = 2147483647;
dstPath = "";
dstSubfolderSpec = 10;
files = (
77777777777777777777777 /* llama.xcframework in Embed Frameworks */,
);
name = "Embed Frameworks";
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXCopyFilesBuildPhase section */
/* Begin PBXFileReference section */
EOF
# Continue with the project.pbxproj file, using the APP_NAME variable appropriately
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
99999999999999999999999 /* ${APP_NAME}.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "${APP_NAME}.app"; sourceTree = BUILT_PRODUCTS_DIR; };
22222222222222222222222 /* App.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = App.swift; sourceTree = "<group>"; };
44444444444444444444444 /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; };
AAAAAAAAAAAAAAAAAAAAAAA /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
66666666666666666666666 /* llama.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; path = llama.xcframework; sourceTree = "<group>"; };
/* End PBXFileReference section */
EOF
# Add the rest of the project file with fixed framework search paths
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
/* Begin PBXFrameworksBuildPhase section */
BBBBBBBBBBBBBBBBBBBBBBBB /* Frameworks */ = {
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
55555555555555555555555 /* llama.xcframework in Frameworks */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXFrameworksBuildPhase section */
/* Begin PBXGroup section */
EOF
# Continue with the project.pbxproj file, using the APP_NAME variable appropriately
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
CCCCCCCCCCCCCCCCCCCCCCCC /* Products */ = {
isa = PBXGroup;
children = (
99999999999999999999999 /* ${APP_NAME}.app */,
);
name = Products;
sourceTree = "<group>";
};
EOF
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
DDDDDDDDDDDDDDDDDDDDDDDD /* Frameworks */ = {
isa = PBXGroup;
children = (
66666666666666666666666 /* llama.xcframework */,
);
name = Frameworks;
sourceTree = "<group>";
};
EEEEEEEEEEEEEEEEEEEEEEEE = {
isa = PBXGroup;
children = (
FFFFFFFFFFFFFFFFFFFFFFFF /* TVOSLlamaTest */,
CCCCCCCCCCCCCCCCCCCCCCCC /* Products */,
DDDDDDDDDDDDDDDDDDDDDDDD /* Frameworks */,
);
sourceTree = "<group>";
};
FFFFFFFFFFFFFFFFFFFFFFFF /* TVOSLlamaTest */ = {
isa = PBXGroup;
children = (
1111111111111111111111AA /* Sources */,
AAAAAAAAAAAAAAAAAAAAAAA /* Info.plist */,
);
path = "TVOSLlamaTest";
sourceTree = "<group>";
};
1111111111111111111111AA /* Sources */ = {
isa = PBXGroup;
children = (
22222222222222222222222 /* App.swift */,
44444444444444444444444 /* ContentView.swift */,
);
path = Sources;
sourceTree = "<group>";
};
/* End PBXGroup section */
EOF
# Continue with the project.pbxproj file, using the APP_NAME variable appropriately
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
/* Begin PBXNativeTarget section */
3333333333333333333333AA /* ${APP_NAME} */ = {
isa = PBXNativeTarget;
buildConfigurationList = 4444444444444444444444AA /* Build configuration list for PBXNativeTarget "${APP_NAME}" */;
buildPhases = (
5555555555555555555555AA /* Sources */,
BBBBBBBBBBBBBBBBBBBBBBBB /* Frameworks */,
6666666666666666666666AA /* Resources */,
88888888888888888888888 /* Embed Frameworks */,
);
buildRules = (
);
dependencies = (
);
name = "${APP_NAME}";
productName = "${APP_NAME}";
productReference = 99999999999999999999999 /* ${APP_NAME}.app */;
productType = "com.apple.product-type.application";
};
/* End PBXNativeTarget section */
/* Begin PBXProject section */
7777777777777777777777AA /* Project object */ = {
isa = PBXProject;
attributes = {
LastSwiftUpdateCheck = 1240;
LastUpgradeCheck = 1240;
TargetAttributes = {
3333333333333333333333AA = {
CreatedOnToolsVersion = 12.4;
};
};
};
buildConfigurationList = 8888888888888888888888AA /* Build configuration list for PBXProject "${APP_NAME}" */;
compatibilityVersion = "Xcode 12.0";
developmentRegion = en;
hasScannedForEncodings = 0;
knownRegions = (
en,
Base,
);
mainGroup = EEEEEEEEEEEEEEEEEEEEEEEE;
productRefGroup = CCCCCCCCCCCCCCCCCCCCCCCC /* Products */;
projectDirPath = "";
projectRoot = "";
targets = (
3333333333333333333333AA /* ${APP_NAME} */,
);
};
/* End PBXProject section */
EOF
# Add the rest of the file with correct FRAMEWORK_SEARCH_PATHS and tvOS settings
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
/* Begin PBXResourcesBuildPhase section */
6666666666666666666666AA /* Resources */ = {
isa = PBXResourcesBuildPhase;
buildActionMask = 2147483647;
files = (
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXResourcesBuildPhase section */
/* Begin PBXSourcesBuildPhase section */
5555555555555555555555AA /* Sources */ = {
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
33333333333333333333333 /* ContentView.swift in Sources */,
11111111111111111111111 /* App.swift in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXSourcesBuildPhase section */
/* Begin XCBuildConfiguration section */
9999999999999999999999AA /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
CLANG_ANALYZER_NONNULL = YES;
CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
CLANG_CXX_LIBRARY = "libc++";
CLANG_ENABLE_MODULES = YES;
CLANG_ENABLE_OBJC_ARC = YES;
CLANG_ENABLE_OBJC_WEAK = YES;
CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
CLANG_WARN_BOOL_CONVERSION = YES;
CLANG_WARN_COMMA = YES;
CLANG_WARN_CONSTANT_CONVERSION = YES;
CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
CLANG_WARN_EMPTY_BODY = YES;
CLANG_WARN_ENUM_CONVERSION = YES;
CLANG_WARN_INFINITE_RECURSION = YES;
CLANG_WARN_INT_CONVERSION = YES;
CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
CLANG_WARN_STRICT_PROTOTYPES = YES;
CLANG_WARN_SUSPICIOUS_MOVE = YES;
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
CLANG_WARN_UNREACHABLE_CODE = YES;
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
COPY_PHASE_STRIP = NO;
DEBUG_INFORMATION_FORMAT = dwarf;
ENABLE_STRICT_OBJC_MSGSEND = YES;
ENABLE_TESTABILITY = YES;
GCC_C_LANGUAGE_STANDARD = gnu11;
GCC_DYNAMIC_NO_PIC = NO;
GCC_NO_COMMON_BLOCKS = YES;
GCC_OPTIMIZATION_LEVEL = 0;
GCC_PREPROCESSOR_DEFINITIONS = (
"DEBUG=1",
"$(inherited)",
);
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
GCC_WARN_UNDECLARED_SELECTOR = YES;
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
GCC_WARN_UNUSED_FUNCTION = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
TVOS_DEPLOYMENT_TARGET = 15.0;
MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
MTL_FAST_MATH = YES;
ONLY_ACTIVE_ARCH = YES;
SDKROOT = appletvos;
SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG;
SWIFT_OPTIMIZATION_LEVEL = "-Onone";
};
name = Debug;
};
AAAAAAAAAAAAAAAAAAAAABBB /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
CLANG_ANALYZER_NONNULL = YES;
CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
CLANG_CXX_LIBRARY = "libc++";
CLANG_ENABLE_MODULES = YES;
CLANG_ENABLE_OBJC_ARC = YES;
CLANG_ENABLE_OBJC_WEAK = YES;
CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
CLANG_WARN_BOOL_CONVERSION = YES;
CLANG_WARN_COMMA = YES;
CLANG_WARN_CONSTANT_CONVERSION = YES;
CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
CLANG_WARN_EMPTY_BODY = YES;
CLANG_WARN_ENUM_CONVERSION = YES;
CLANG_WARN_INFINITE_RECURSION = YES;
CLANG_WARN_INT_CONVERSION = YES;
CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
CLANG_WARN_STRICT_PROTOTYPES = YES;
CLANG_WARN_SUSPICIOUS_MOVE = YES;
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
CLANG_WARN_UNREACHABLE_CODE = YES;
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
COPY_PHASE_STRIP = NO;
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
ENABLE_NS_ASSERTIONS = NO;
ENABLE_STRICT_OBJC_MSGSEND = YES;
GCC_C_LANGUAGE_STANDARD = gnu11;
GCC_NO_COMMON_BLOCKS = YES;
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
GCC_WARN_UNDECLARED_SELECTOR = YES;
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
GCC_WARN_UNUSED_FUNCTION = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
TVOS_DEPLOYMENT_TARGET = 15.0;
MTL_ENABLE_DEBUG_INFO = NO;
MTL_FAST_MATH = YES;
SDKROOT = appletvos;
SWIFT_COMPILATION_MODE = wholemodule;
SWIFT_OPTIMIZATION_LEVEL = "-O";
VALIDATE_PRODUCT = YES;
};
name = Release;
};
BBBBBBBBBBBBBBBBBBBBBBCCC /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
CODE_SIGN_STYLE = Manual;
DEVELOPMENT_TEAM = "";
ENABLE_PREVIEWS = YES;
FRAMEWORK_SEARCH_PATHS = "$(PROJECT_DIR)";
INFOPLIST_FILE = "TVOSLlamaTest/Info.plist";
LD_RUNPATH_SEARCH_PATHS = (
"$(inherited)",
"@executable_path/Frameworks",
);
PRODUCT_BUNDLE_IDENTIFIER = "org.ggml.TVOSLlamaTest";
PRODUCT_NAME = "$(TARGET_NAME)";
PROVISIONING_PROFILE_SPECIFIER = "";
SWIFT_VERSION = 5.0;
TARGETED_DEVICE_FAMILY = 3;
};
name = Debug;
};
CCCCCCCCCCCCCCCCCCCCCCDDD /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
CODE_SIGN_STYLE = Manual;
DEVELOPMENT_TEAM = "";
ENABLE_PREVIEWS = YES;
FRAMEWORK_SEARCH_PATHS = (
"$(inherited)",
"$(PROJECT_DIR)",
);
INFOPLIST_FILE = "TVOSLlamaTest/Info.plist";
LD_RUNPATH_SEARCH_PATHS = (
"$(inherited)",
"@executable_path/Frameworks",
);
PRODUCT_BUNDLE_IDENTIFIER = "org.ggml.TVOSLlamaTest";
PRODUCT_NAME = "$(TARGET_NAME)";
PROVISIONING_PROFILE_SPECIFIER = "";
SWIFT_VERSION = 5.0;
TARGETED_DEVICE_FAMILY = 3;
};
name = Release;
};
/* End XCBuildConfiguration section */
EOF
# Finish the project.pbxproj file
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
/* Begin XCConfigurationList section */
8888888888888888888888AA /* Build configuration list for PBXProject "${APP_NAME}" */ = {
isa = XCConfigurationList;
buildConfigurations = (
9999999999999999999999AA /* Debug */,
AAAAAAAAAAAAAAAAAAAAABBB /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
4444444444444444444444AA /* Build configuration list for PBXNativeTarget "${APP_NAME}" */ = {
isa = XCConfigurationList;
buildConfigurations = (
BBBBBBBBBBBBBBBBBBBBBBCCC /* Debug */,
CCCCCCCCCCCCCCCCCCCCCCDDD /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
/* End XCConfigurationList section */
};
rootObject = 7777777777777777777777AA /* Project object */;
}
EOF
# 2. Copy XCFramework to test project
echo "Copying XCFramework to test project..."
cp -R "${XCFRAMEWORK_PATH}" "${TEMP_DIR}/${APP_NAME}/"
# 3. Build and archive the app
echo "Building and archiving test app..."
cd "${TEMP_DIR}/${APP_NAME}"
# Create a simple xcscheme file to avoid xcodebuild scheme issues
mkdir -p "${APP_NAME}.xcodeproj/xcshareddata/xcschemes"
cat > "${APP_NAME}.xcodeproj/xcshareddata/xcschemes/${APP_NAME}.xcscheme" << EOF
<?xml version="1.0" encoding="UTF-8"?>
<Scheme
LastUpgradeVersion = "1240"
version = "1.3">
<BuildAction
parallelizeBuildables = "YES"
buildImplicitDependencies = "YES">
<BuildActionEntries>
<BuildActionEntry
buildForTesting = "YES"
buildForRunning = "YES"
buildForProfiling = "YES"
buildForArchiving = "YES"
buildForAnalyzing = "YES">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "3333333333333333333333AA"
BuildableName = "${APP_NAME}.app"
BlueprintName = "${APP_NAME}"
ReferencedContainer = "container:${APP_NAME}.xcodeproj">
</BuildableReference>
</BuildActionEntry>
</BuildActionEntries>
</BuildAction>
<TestAction
buildConfiguration = "Debug"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
shouldUseLaunchSchemeArgsEnv = "YES">
<Testables>
</Testables>
</TestAction>
<LaunchAction
buildConfiguration = "Debug"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
launchStyle = "0"
useCustomWorkingDirectory = "NO"
ignoresPersistentStateOnLaunch = "NO"
debugDocumentVersioning = "YES"
debugServiceExtension = "internal"
allowLocationSimulation = "YES">
<BuildableProductRunnable
runnableDebuggingMode = "0">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "3333333333333333333333AA"
BuildableName = "${APP_NAME}.app"
BlueprintName = "${APP_NAME}"
ReferencedContainer = "container:${APP_NAME}.xcodeproj">
</BuildableReference>
</BuildableProductRunnable>
</LaunchAction>
<ProfileAction
buildConfiguration = "Release"
shouldUseLaunchSchemeArgsEnv = "YES"
savedToolIdentifier = ""
useCustomWorkingDirectory = "NO"
debugDocumentVersioning = "YES">
<BuildableProductRunnable
runnableDebuggingMode = "0">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "3333333333333333333333AA"
BuildableName = "${APP_NAME}.app"
BlueprintName = "${APP_NAME}"
ReferencedContainer = "container:${APP_NAME}.xcodeproj">
</BuildableReference>
</BuildableProductRunnable>
</ProfileAction>
<AnalyzeAction
buildConfiguration = "Debug">
</AnalyzeAction>
<ArchiveAction
buildConfiguration = "Release"
revealArchiveInOrganizer = "YES">
</ArchiveAction>
</Scheme>
EOF
# Now use xcodebuild with an explicitly defined product name for tvOS
xcodebuild -project "${APP_NAME}.xcodeproj" -scheme "${APP_NAME}" -sdk appletvos -configuration Release archive -archivePath "${ARCHIVE_PATH}" CODE_SIGN_IDENTITY="-" CODE_SIGNING_REQUIRED=NO CODE_SIGNING_ALLOWED=NO PRODUCT_NAME="${APP_NAME}" SWIFT_OPTIMIZATION_LEVEL="-Onone" -quiet
# 4. Create IPA from archive
echo "Creating IPA from archive..."
mkdir -p "${TEMP_DIR}/Payload"
cp -R "${ARCHIVE_PATH}/Products/Applications/${APP_NAME}.app" "${TEMP_DIR}/Payload/"
# Check and log app structure before zipping
echo "App structure:"
ls -la "${TEMP_DIR}/Payload/${APP_NAME}.app/"
echo "Frameworks:"
ls -la "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/" 2>/dev/null || echo "No Frameworks directory found"
cd "${TEMP_DIR}"
zip -r "${IPA_PATH}" Payload
# Check embedded provisioning profile
echo "Checking provisioning profile (if any)..."
PROVISIONING_PROFILE=$(find "${ARCHIVE_PATH}/Products/Applications/${APP_NAME}.app" -name "embedded.mobileprovision" 2>/dev/null)
if [ -n "$PROVISIONING_PROFILE" ]; then
echo "Found embedded provisioning profile:"
security cms -D -i "$PROVISIONING_PROFILE" || echo "Unable to decode provisioning profile"
else
echo "No embedded provisioning profile found (expected for ad-hoc builds)"
fi
# 5. Validate the IPA
echo "Validating IPA..."
VALIDATION_OUTPUT="${VALIDATION_DIR}/validation_output.txt"
# Check if authentication credentials are provided
AUTH_ARGS=""
if [ -n "$APPLE_ID" ] && [ -n "$APPLE_PASSWORD" ]; then
echo "Using Apple ID authentication for validation..."
AUTH_ARGS="--username \"$APPLE_ID\" --password \"$APPLE_PASSWORD\""
else
echo "No authentication credentials provided. Will perform basic validation."
echo "To use your personal developer account, you can run the script with:"
echo " APPLE_ID='your.email@example.com' APPLE_PASSWORD='your-app-specific-password' ./validate-tvos.sh"
echo "Note: You need to create an app-specific password at https://appleid.apple.com/account/manage"
fi
# Run validation with detailed output
echo "Running validation with altool..."
if [ -n "$AUTH_ARGS" ]; then
# Use eval to properly handle the quoted arguments
eval "xcrun altool --validate-app -f \"${IPA_PATH}\" --type tvos --output-format xml $AUTH_ARGS" 2>&1 | tee "${VALIDATION_OUTPUT}"
else
xcrun altool --validate-app -f "${IPA_PATH}" --type tvos --output-format xml 2>&1 | tee "${VALIDATION_OUTPUT}"
fi
VALIDATION_RESULT=$?
# Final validation result
FINAL_VALIDATION_RESULT=0
# Check if validation failed because the app isn't in App Store Connect
if grep -q "No suitable application records were found" "${VALIDATION_OUTPUT}"; then
echo "⚠️ App Store Connect Warning: The app bundle identifier is not found in App Store Connect"
echo "This is expected for apps that haven't been registered in App Store Connect yet."
echo "This doesn't indicate a problem with the build or framework."
# Perform alternative validation
echo "Performing alternative validation checks..."
# Check if IPA was created successfully
if [ -f "${IPA_PATH}" ] && [ -s "${IPA_PATH}" ]; then
echo "✅ IPA file created successfully"
else
echo "❌ IPA file not created or empty"
FINAL_VALIDATION_RESULT=1
fi
# Check if app binary exists and is executable
if [ -f "${TEMP_DIR}/Payload/${APP_NAME}.app/${APP_NAME}" ] && [ -x "${TEMP_DIR}/Payload/${APP_NAME}.app/${APP_NAME}" ]; then
echo "✅ App binary exists and is executable"
else
echo "❌ App binary missing or not executable"
FINAL_VALIDATION_RESULT=1
fi
# Check if framework was properly embedded
if [ -d "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/llama.framework" ]; then
echo "✅ llama.framework properly embedded"
else
echo "❌ llama.framework not properly embedded"
FINAL_VALIDATION_RESULT=1
fi
# Check if framework binary exists
if [ -f "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/llama.framework/llama" ]; then
echo "✅ Framework binary exists"
# Further validate framework by checking architecture
ARCHS=$(lipo -info "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/llama.framework/llama" 2>/dev/null | grep -o "arm64\\|x86_64" | tr '\n' ' ')
if [ -n "$ARCHS" ]; then
echo "✅ Framework architecture(s): $ARCHS"
else
echo "⚠️ Could not determine framework architecture"
fi
else
echo "❌ Framework binary missing"
FINAL_VALIDATION_RESULT=1
fi
if [ $FINAL_VALIDATION_RESULT -eq 0 ]; then
echo "✅ Alternative validation PASSED: App built successfully with embedded framework"
else
echo "❌ Alternative validation FAILED: Issues found with the app or framework"
fi
elif grep -q "You must specify authentication credentials" "${VALIDATION_OUTPUT}" && [ -z "$AUTH_ARGS" ]; then
echo "✅ tvOS Validation PASSED: IPA successfully validated"
echo "Results saved to ${VALIDATION_OUTPUT}"
else
echo "❌ tvOS Validation FAILED: IPA validation found issues"
echo "See validation output at ${VALIDATION_OUTPUT}"
echo ""
echo "==== VALIDATION ERRORS ===="
# Try to extract specific errors from the output
if grep -q "Error" "${VALIDATION_OUTPUT}"; then
grep -A 5 "Error" "${VALIDATION_OUTPUT}"
else
# If no specific error found, show the whole log
cat "${VALIDATION_OUTPUT}"
fi
# Additional debugging: check IPA contents
echo ""
echo "==== IPA CONTENTS ===="
mkdir -p "${TEMP_DIR}/ipa_contents"
unzip -q "${IPA_PATH}" -d "${TEMP_DIR}/ipa_contents"
ls -la "${TEMP_DIR}/ipa_contents/Payload/${APP_NAME}.app/"
# Check for code signing issues
echo ""
echo "==== CODE SIGNING INFO ===="
codesign -vv -d "${TEMP_DIR}/ipa_contents/Payload/${APP_NAME}.app" 2>&1 || echo "Code signing verification failed"
# Check embedded frameworks
echo ""
echo "==== FRAMEWORK INFO ===="
ls -la "${TEMP_DIR}/ipa_contents/Payload/${APP_NAME}.app/Frameworks/" 2>/dev/null || echo "No Frameworks directory found"
fi
# Don't clean up on error to allow inspection
if [ $FINAL_VALIDATION_RESULT -ne 0 ]; then
echo ""
echo "Temporary files kept for inspection at: ${TEMP_DIR}"
echo "===== tvOS Validation Process Failed ====="
exit 1
fi
# Clean up temporary files but keep build artifacts
if [ $FINAL_VALIDATION_RESULT -eq 0 ]; then
echo "Cleaning up temporary files..."
#rm -rf "${TEMP_DIR}"
fi
echo "===== tvOS Validation Process Completed ====="
exit $FINAL_VALIDATION_RESULT

View File

@@ -0,0 +1,811 @@
#!/usr/bin/env bash
# validate-visionos.sh - Validate visionOS Application with embedded llama.xcframework using SwiftUI
# Authentication options (optional) (can be set via environment variables)
# To use: export APPLE_ID=your.email@example.com
# export APPLE_PASSWORD=your-app-specific-password
# ./validate-visionos.sh
APPLE_ID=${APPLE_ID:-""}
APPLE_PASSWORD=${APPLE_PASSWORD:-""}
# Ensure the script exits on error
set -e
# Function to print usage instructions
print_usage() {
echo "Usage: ./validate-visionos.sh [OPTIONS]"
echo ""
echo "Options:"
echo " --help Show this help message"
echo " --apple-id EMAIL Apple ID email for validation"
echo " --apple-password PWD App-specific password for Apple ID"
echo ""
echo "Environment variables:"
echo " APPLE_ID Apple ID email for validation"
echo " APPLE_PASSWORD App-specific password for Apple ID"
echo ""
echo "Notes:"
echo " - Command line options take precedence over environment variables"
echo " - Authentication is optional. If not provided, alternative validation will be performed"
echo " - For APPLE_PASSWORD, use an app-specific password generated at https://appleid.apple.com/account/manage"
}
# Parse command line arguments
while [[ $# -gt 0 ]]; do
case $1 in
--help)
print_usage
exit 0
;;
--apple-id)
APPLE_ID="$2"
shift 2
;;
--apple-password)
APPLE_PASSWORD="$2"
shift 2
;;
*)
echo "Unknown option: $1"
print_usage
exit 1
;;
esac
done
# Function to clean up in case of error
cleanup() {
# Don't clean up temp files on error to help with debugging
echo "===== visionOS Validation Process Failed ====="
exit 1
}
# Set up trap to call cleanup function on error
trap cleanup ERR
set -e # Exit on any error
ROOT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../.." && pwd )"
BUILD_DIR="${ROOT_DIR}/validation-builds/visionos"
# Configuration
APP_NAME="VisionOSLlamaTest"
BUNDLE_ID="org.ggml.VisionOSLlamaTest"
XCFRAMEWORK_PATH="${ROOT_DIR}/build-apple/llama.xcframework"
TEMP_DIR="${BUILD_DIR}/temp"
ARCHIVE_PATH="${BUILD_DIR}/${APP_NAME}.xcarchive"
IPA_PATH="${BUILD_DIR}/${APP_NAME}.ipa"
VALIDATION_DIR="${BUILD_DIR}/validation"
# Create necessary directories
mkdir -p "${BUILD_DIR}"
mkdir -p "${TEMP_DIR}"
mkdir -p "${VALIDATION_DIR}"
echo "===== visionOS Validation Process Started ====="
# 1. Create a simple test app project
echo "Creating test visionOS app project..."
mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}"
cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Info.plist" << EOF
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>CFBundleDevelopmentRegion</key>
<string>en</string>
<key>CFBundleExecutable</key>
<string>${APP_NAME}</string>
<key>CFBundleIdentifier</key>
<string>${BUNDLE_ID}</string>
<key>CFBundleInfoDictionaryVersion</key>
<string>6.0</string>
<key>CFBundleName</key>
<string>${APP_NAME}</string>
<key>CFBundlePackageType</key>
<string>APPL</string>
<key>CFBundleShortVersionString</key>
<string>1.0</string>
<key>CFBundleVersion</key>
<string>1</string>
</dict>
</plist>
EOF
# Create SwiftUI app files
mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources"
# Create App.swift
cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources/App.swift" << EOF
import SwiftUI
import llama
@main
struct LlamaTestApp: App {
var body: some Scene {
WindowGroup {
ContentView()
}
}
}
EOF
# Create ContentView.swift with visionOS specific elements
cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources/ContentView.swift" << EOF
import SwiftUI
import llama
struct ContentView: View {
// Test that we can initialize a llama context params struct
let params = llama_context_default_params()
var body: some View {
VStack(spacing: 20) {
Text("Llama Framework Test on visionOS")
.font(.largeTitle)
.padding()
Text("llama_context_default_params() created successfully")
.font(.headline)
.multilineTextAlignment(.center)
.padding()
// Display some param values to confirm the framework is working
Text("n_ctx: \(params.n_ctx)")
.font(.body)
Text("n_batch: \(params.n_batch)")
.font(.body)
Spacer()
}
.padding()
.frame(width: 500, height: 400)
}
}
struct ContentView_Previews: PreviewProvider {
static var previews: some View {
ContentView()
}
}
EOF
# Create project.pbxproj, fixing the framework search paths issues
mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj"
cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
// !$*UTF8*$!
{
archiveVersion = 1;
classes = {
};
objectVersion = 54;
objects = {
/* Begin PBXBuildFile section */
11111111111111111111111 /* App.swift in Sources */ = {isa = PBXBuildFile; fileRef = 22222222222222222222222; };
33333333333333333333333 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 44444444444444444444444; };
55555555555555555555555 /* llama.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = 66666666666666666666666; };
77777777777777777777777 /* llama.xcframework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 66666666666666666666666; };
/* End PBXBuildFile section */
/* Begin PBXCopyFilesBuildPhase section */
88888888888888888888888 /* Embed Frameworks */ = {
isa = PBXCopyFilesBuildPhase;
buildActionMask = 2147483647;
dstPath = "";
dstSubfolderSpec = 10;
files = (
77777777777777777777777 /* llama.xcframework in Embed Frameworks */,
);
name = "Embed Frameworks";
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXCopyFilesBuildPhase section */
/* Begin PBXFileReference section */
EOF
# Continue with the project.pbxproj file, using the APP_NAME variable appropriately
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
99999999999999999999999 /* ${APP_NAME}.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "${APP_NAME}.app"; sourceTree = BUILT_PRODUCTS_DIR; };
22222222222222222222222 /* App.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = App.swift; sourceTree = "<group>"; };
44444444444444444444444 /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; };
AAAAAAAAAAAAAAAAAAAAAAA /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
66666666666666666666666 /* llama.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; path = llama.xcframework; sourceTree = "<group>"; };
/* End PBXFileReference section */
EOF
# Add the rest of the project file with fixed framework search paths
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
/* Begin PBXFrameworksBuildPhase section */
BBBBBBBBBBBBBBBBBBBBBBBB /* Frameworks */ = {
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
55555555555555555555555 /* llama.xcframework in Frameworks */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXFrameworksBuildPhase section */
/* Begin PBXGroup section */
EOF
# Continue with the project.pbxproj file, using the APP_NAME variable appropriately
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
CCCCCCCCCCCCCCCCCCCCCCCC /* Products */ = {
isa = PBXGroup;
children = (
99999999999999999999999 /* ${APP_NAME}.app */,
);
name = Products;
sourceTree = "<group>";
};
EOF
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
DDDDDDDDDDDDDDDDDDDDDDDD /* Frameworks */ = {
isa = PBXGroup;
children = (
66666666666666666666666 /* llama.xcframework */,
);
name = Frameworks;
sourceTree = "<group>";
};
EEEEEEEEEEEEEEEEEEEEEEEE = {
isa = PBXGroup;
children = (
FFFFFFFFFFFFFFFFFFFFFFFF /* VisionOSLlamaTest */,
CCCCCCCCCCCCCCCCCCCCCCCC /* Products */,
DDDDDDDDDDDDDDDDDDDDDDDD /* Frameworks */,
);
sourceTree = "<group>";
};
FFFFFFFFFFFFFFFFFFFFFFFF /* VisionOSLlamaTest */ = {
isa = PBXGroup;
children = (
1111111111111111111111AA /* Sources */,
AAAAAAAAAAAAAAAAAAAAAAA /* Info.plist */,
);
path = "VisionOSLlamaTest";
sourceTree = "<group>";
};
1111111111111111111111AA /* Sources */ = {
isa = PBXGroup;
children = (
22222222222222222222222 /* App.swift */,
44444444444444444444444 /* ContentView.swift */,
);
path = Sources;
sourceTree = "<group>";
};
/* End PBXGroup section */
EOF
# Continue with the project.pbxproj file, using the APP_NAME variable appropriately
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
/* Begin PBXNativeTarget section */
3333333333333333333333AA /* ${APP_NAME} */ = {
isa = PBXNativeTarget;
buildConfigurationList = 4444444444444444444444AA /* Build configuration list for PBXNativeTarget "${APP_NAME}" */;
buildPhases = (
5555555555555555555555AA /* Sources */,
BBBBBBBBBBBBBBBBBBBBBBBB /* Frameworks */,
6666666666666666666666AA /* Resources */,
88888888888888888888888 /* Embed Frameworks */,
);
buildRules = (
);
dependencies = (
);
name = "${APP_NAME}";
productName = "${APP_NAME}";
productReference = 99999999999999999999999 /* ${APP_NAME}.app */;
productType = "com.apple.product-type.application";
};
/* End PBXNativeTarget section */
/* Begin PBXProject section */
7777777777777777777777AA /* Project object */ = {
isa = PBXProject;
attributes = {
LastSwiftUpdateCheck = 1510;
LastUpgradeCheck = 1510;
TargetAttributes = {
3333333333333333333333AA = {
CreatedOnToolsVersion = 15.1;
};
};
};
buildConfigurationList = 8888888888888888888888AA /* Build configuration list for PBXProject "${APP_NAME}" */;
compatibilityVersion = "Xcode 15.0";
developmentRegion = en;
hasScannedForEncodings = 0;
knownRegions = (
en,
Base,
);
mainGroup = EEEEEEEEEEEEEEEEEEEEEEEE;
productRefGroup = CCCCCCCCCCCCCCCCCCCCCCCC /* Products */;
projectDirPath = "";
projectRoot = "";
targets = (
3333333333333333333333AA /* ${APP_NAME} */,
);
};
/* End PBXProject section */
EOF
# Add the rest of the file with correct FRAMEWORK_SEARCH_PATHS
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
/* Begin PBXResourcesBuildPhase section */
6666666666666666666666AA /* Resources */ = {
isa = PBXResourcesBuildPhase;
buildActionMask = 2147483647;
files = (
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXResourcesBuildPhase section */
/* Begin PBXSourcesBuildPhase section */
5555555555555555555555AA /* Sources */ = {
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
33333333333333333333333 /* ContentView.swift in Sources */,
11111111111111111111111 /* App.swift in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXSourcesBuildPhase section */
/* Begin XCBuildConfiguration section */
9999999999999999999999AA /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
CLANG_ANALYZER_NONNULL = YES;
CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
CLANG_CXX_LIBRARY = "libc++";
CLANG_ENABLE_MODULES = YES;
CLANG_ENABLE_OBJC_ARC = YES;
CLANG_ENABLE_OBJC_WEAK = YES;
CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
CLANG_WARN_BOOL_CONVERSION = YES;
CLANG_WARN_COMMA = YES;
CLANG_WARN_CONSTANT_CONVERSION = YES;
CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
CLANG_WARN_EMPTY_BODY = YES;
CLANG_WARN_ENUM_CONVERSION = YES;
CLANG_WARN_INFINITE_RECURSION = YES;
CLANG_WARN_INT_CONVERSION = YES;
CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
CLANG_WARN_STRICT_PROTOTYPES = YES;
CLANG_WARN_SUSPICIOUS_MOVE = YES;
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
CLANG_WARN_UNREACHABLE_CODE = YES;
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
COPY_PHASE_STRIP = NO;
DEBUG_INFORMATION_FORMAT = dwarf;
ENABLE_STRICT_OBJC_MSGSEND = YES;
ENABLE_TESTABILITY = YES;
GCC_C_LANGUAGE_STANDARD = gnu11;
GCC_DYNAMIC_NO_PIC = NO;
GCC_NO_COMMON_BLOCKS = YES;
GCC_OPTIMIZATION_LEVEL = 0;
GCC_PREPROCESSOR_DEFINITIONS = (
"DEBUG=1",
"$(inherited)",
);
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
GCC_WARN_UNDECLARED_SELECTOR = YES;
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
GCC_WARN_UNUSED_FUNCTION = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
MTL_FAST_MATH = YES;
ONLY_ACTIVE_ARCH = YES;
SDKROOT = xros;
SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG;
SWIFT_OPTIMIZATION_LEVEL = "-Onone";
XROS_DEPLOYMENT_TARGET = 1.0;
};
name = Debug;
};
AAAAAAAAAAAAAAAAAAAAABBB /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
CLANG_ANALYZER_NONNULL = YES;
CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
CLANG_CXX_LIBRARY = "libc++";
CLANG_ENABLE_MODULES = YES;
CLANG_ENABLE_OBJC_ARC = YES;
CLANG_ENABLE_OBJC_WEAK = YES;
CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
CLANG_WARN_BOOL_CONVERSION = YES;
CLANG_WARN_COMMA = YES;
CLANG_WARN_CONSTANT_CONVERSION = YES;
CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
CLANG_WARN_EMPTY_BODY = YES;
CLANG_WARN_ENUM_CONVERSION = YES;
CLANG_WARN_INFINITE_RECURSION = YES;
CLANG_WARN_INT_CONVERSION = YES;
CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
CLANG_WARN_STRICT_PROTOTYPES = YES;
CLANG_WARN_SUSPICIOUS_MOVE = YES;
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
CLANG_WARN_UNREACHABLE_CODE = YES;
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
COPY_PHASE_STRIP = NO;
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
ENABLE_NS_ASSERTIONS = NO;
ENABLE_STRICT_OBJC_MSGSEND = YES;
GCC_C_LANGUAGE_STANDARD = gnu11;
GCC_NO_COMMON_BLOCKS = YES;
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
GCC_WARN_UNDECLARED_SELECTOR = YES;
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
GCC_WARN_UNUSED_FUNCTION = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
MTL_ENABLE_DEBUG_INFO = NO;
MTL_FAST_MATH = YES;
SDKROOT = xros;
SWIFT_COMPILATION_MODE = wholemodule;
SWIFT_OPTIMIZATION_LEVEL = "-O";
VALIDATE_PRODUCT = YES;
XROS_DEPLOYMENT_TARGET = 1.0;
};
name = Release;
};
BBBBBBBBBBBBBBBBBBBBBBCCC /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
CODE_SIGN_STYLE = Manual;
DEVELOPMENT_TEAM = "";
ENABLE_PREVIEWS = YES;
FRAMEWORK_SEARCH_PATHS = "$(PROJECT_DIR)";
INFOPLIST_FILE = "VisionOSLlamaTest/Info.plist";
LD_RUNPATH_SEARCH_PATHS = (
"$(inherited)",
"@executable_path/Frameworks",
);
PRODUCT_BUNDLE_IDENTIFIER = "org.ggml.VisionOSLlamaTest";
PRODUCT_NAME = "$(TARGET_NAME)";
PROVISIONING_PROFILE_SPECIFIER = "";
SUPPORTED_PLATFORMS = "xros xrsimulator";
SWIFT_VERSION = 5.0;
TARGETED_DEVICE_FAMILY = "1,2,7";
};
name = Debug;
};
CCCCCCCCCCCCCCCCCCCCCCDDD /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
CODE_SIGN_STYLE = Manual;
DEVELOPMENT_TEAM = "";
ENABLE_PREVIEWS = YES;
FRAMEWORK_SEARCH_PATHS = (
"$(inherited)",
"$(PROJECT_DIR)",
);
INFOPLIST_FILE = "VisionOSLlamaTest/Info.plist";
LD_RUNPATH_SEARCH_PATHS = (
"$(inherited)",
"@executable_path/Frameworks",
);
PRODUCT_BUNDLE_IDENTIFIER = "org.ggml.VisionOSLlamaTest";
PRODUCT_NAME = "$(TARGET_NAME)";
PROVISIONING_PROFILE_SPECIFIER = "";
SUPPORTED_PLATFORMS = "xros xrsimulator";
SWIFT_VERSION = 5.0;
TARGETED_DEVICE_FAMILY = "1,2,7";
};
name = Release;
};
/* End XCBuildConfiguration section */
EOF
# Finish the project.pbxproj file
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
/* Begin XCConfigurationList section */
8888888888888888888888AA /* Build configuration list for PBXProject "${APP_NAME}" */ = {
isa = XCConfigurationList;
buildConfigurations = (
9999999999999999999999AA /* Debug */,
AAAAAAAAAAAAAAAAAAAAABBB /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
4444444444444444444444AA /* Build configuration list for PBXNativeTarget "${APP_NAME}" */ = {
isa = XCConfigurationList;
buildConfigurations = (
BBBBBBBBBBBBBBBBBBBBBBCCC /* Debug */,
CCCCCCCCCCCCCCCCCCCCCCDDD /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
/* End XCConfigurationList section */
};
rootObject = 7777777777777777777777AA /* Project object */;
}
EOF
# 2. Copy XCFramework to test project
echo "Copying XCFramework to test project..."
cp -R "${XCFRAMEWORK_PATH}" "${TEMP_DIR}/${APP_NAME}/"
# 3. Build and archive the app
echo "Building and archiving test app..."
cd "${TEMP_DIR}/${APP_NAME}"
# Create a simple xcscheme file to avoid xcodebuild scheme issues
mkdir -p "${APP_NAME}.xcodeproj/xcshareddata/xcschemes"
cat > "${APP_NAME}.xcodeproj/xcshareddata/xcschemes/${APP_NAME}.xcscheme" << EOF
<?xml version="1.0" encoding="UTF-8"?>
<Scheme
LastUpgradeVersion = "1510"
version = "1.3">
<BuildAction
parallelizeBuildables = "YES"
buildImplicitDependencies = "YES">
<BuildActionEntries>
<BuildActionEntry
buildForTesting = "YES"
buildForRunning = "YES"
buildForProfiling = "YES"
buildForArchiving = "YES"
buildForAnalyzing = "YES">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "3333333333333333333333AA"
BuildableName = "${APP_NAME}.app"
BlueprintName = "${APP_NAME}"
ReferencedContainer = "container:${APP_NAME}.xcodeproj">
</BuildableReference>
</BuildActionEntry>
</BuildActionEntries>
</BuildAction>
<TestAction
buildConfiguration = "Debug"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
shouldUseLaunchSchemeArgsEnv = "YES">
<Testables>
</Testables>
</TestAction>
<LaunchAction
buildConfiguration = "Debug"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
launchStyle = "0"
useCustomWorkingDirectory = "NO"
ignoresPersistentStateOnLaunch = "NO"
debugDocumentVersioning = "YES"
debugServiceExtension = "internal"
allowLocationSimulation = "YES">
<BuildableProductRunnable
runnableDebuggingMode = "0">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "3333333333333333333333AA"
BuildableName = "${APP_NAME}.app"
BlueprintName = "${APP_NAME}"
ReferencedContainer = "container:${APP_NAME}.xcodeproj">
</BuildableReference>
</BuildableProductRunnable>
</LaunchAction>
<ProfileAction
buildConfiguration = "Release"
shouldUseLaunchSchemeArgsEnv = "YES"
savedToolIdentifier = ""
useCustomWorkingDirectory = "NO"
debugDocumentVersioning = "YES">
<BuildableProductRunnable
runnableDebuggingMode = "0">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "3333333333333333333333AA"
BuildableName = "${APP_NAME}.app"
BlueprintName = "${APP_NAME}"
ReferencedContainer = "container:${APP_NAME}.xcodeproj">
</BuildableReference>
</BuildableProductRunnable>
</ProfileAction>
<AnalyzeAction
buildConfiguration = "Debug">
</AnalyzeAction>
<ArchiveAction
buildConfiguration = "Release"
revealArchiveInOrganizer = "YES">
</ArchiveAction>
</Scheme>
EOF
# Now use xcodebuild with an explicitly defined product name for visionOS
xcodebuild -project "${APP_NAME}.xcodeproj" -scheme "${APP_NAME}" -sdk xros -configuration Release archive -archivePath "${ARCHIVE_PATH}" CODE_SIGN_IDENTITY="-" CODE_SIGNING_REQUIRED=NO CODE_SIGNING_ALLOWED=NO PRODUCT_NAME="${APP_NAME}" SWIFT_OPTIMIZATION_LEVEL="-Onone" -quiet
# 4. Create IPA from archive
echo "Creating IPA from archive..."
mkdir -p "${TEMP_DIR}/Payload"
cp -R "${ARCHIVE_PATH}/Products/Applications/${APP_NAME}.app" "${TEMP_DIR}/Payload/"
# Check and log app structure before zipping
echo "App structure:"
ls -la "${TEMP_DIR}/Payload/${APP_NAME}.app/"
echo "Frameworks:"
ls -la "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/" 2>/dev/null || echo "No Frameworks directory found"
cd "${TEMP_DIR}"
zip -r "${IPA_PATH}" Payload
# Check embedded provisioning profile
echo "Checking provisioning profile (if any)..."
PROVISIONING_PROFILE=$(find "${ARCHIVE_PATH}/Products/Applications/${APP_NAME}.app" -name "embedded.mobileprovision" 2>/dev/null)
if [ -n "$PROVISIONING_PROFILE" ]; then
echo "Found embedded provisioning profile:"
security cms -D -i "$PROVISIONING_PROFILE" || echo "Unable to decode provisioning profile"
else
echo "No embedded provisioning profile found (expected for ad-hoc builds)"
fi
# 5. Validate the IPA
echo "Validating IPA..."
VALIDATION_OUTPUT="${VALIDATION_DIR}/validation_output.txt"
# Check if authentication credentials are provided
AUTH_ARGS=""
if [ -n "$APPLE_ID" ] && [ -n "$APPLE_PASSWORD" ]; then
echo "Using Apple ID authentication for validation..."
AUTH_ARGS="--username \"$APPLE_ID\" --password \"$APPLE_PASSWORD\""
else
echo "No authentication credentials provided. Will perform basic validation."
echo "To use your personal developer account, you can run the script with:"
echo " APPLE_ID='your.email@example.com' APPLE_PASSWORD='your-app-specific-password' ./validate-visionos.sh"
echo "Note: You need to create an app-specific password at https://appleid.apple.com/account/manage"
fi
# Run validation with detailed output
echo "Running validation with altool..."
if [ -n "$AUTH_ARGS" ]; then
# Use eval to properly handle the quoted arguments
eval "xcrun altool --validate-app -f \"${IPA_PATH}\" --type visionos --output-format xml $AUTH_ARGS" 2>&1 | tee "${VALIDATION_OUTPUT}"
else
xcrun altool --validate-app -f "${IPA_PATH}" --type visionos --output-format xml 2>&1 | tee "${VALIDATION_OUTPUT}"
fi
VALIDATION_RESULT=$?
# Final validation result
FINAL_VALIDATION_RESULT=0
# Check if validation failed because the app isn't in App Store Connect
if grep -q "No suitable application records were found" "${VALIDATION_OUTPUT}"; then
echo "⚠️ App Store Connect Warning: The app bundle identifier is not found in App Store Connect"
echo "This is expected for apps that haven't been registered in App Store Connect yet."
echo "This doesn't indicate a problem with the build or framework."
# Perform alternative validation
echo "Performing alternative validation checks..."
# Check if IPA was created successfully
if [ -f "${IPA_PATH}" ] && [ -s "${IPA_PATH}" ]; then
echo "✅ IPA file created successfully"
else
echo "❌ IPA file not created or empty"
FINAL_VALIDATION_RESULT=1
fi
# Check if app binary exists and is executable
if [ -f "${TEMP_DIR}/Payload/${APP_NAME}.app/${APP_NAME}" ] && [ -x "${TEMP_DIR}/Payload/${APP_NAME}.app/${APP_NAME}" ]; then
echo "✅ App binary exists and is executable"
else
echo "❌ App binary missing or not executable"
FINAL_VALIDATION_RESULT=1
fi
# Check if framework was properly embedded
if [ -d "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/llama.framework" ]; then
echo "✅ llama.framework properly embedded"
else
echo "❌ llama.framework not properly embedded"
FINAL_VALIDATION_RESULT=1
fi
# Check if framework binary exists
if [ -f "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/llama.framework/llama" ]; then
echo "✅ Framework binary exists"
# Further validate framework by checking architecture
ARCHS=$(lipo -info "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/llama.framework/llama" 2>/dev/null | grep -o "arm64\\|x86_64" | tr '\n' ' ')
if [ -n "$ARCHS" ]; then
echo "✅ Framework architecture(s): $ARCHS"
else
echo "⚠️ Could not determine framework architecture"
fi
else
echo "❌ Framework binary missing"
FINAL_VALIDATION_RESULT=1
fi
if [ $FINAL_VALIDATION_RESULT -eq 0 ]; then
echo "✅ Alternative validation PASSED: App built successfully with embedded framework"
else
echo "❌ Alternative validation FAILED: Issues found with the app or framework"
fi
elif grep -q "You must specify authentication credentials" "${VALIDATION_OUTPUT}" && [ -z "$AUTH_ARGS" ]; then
echo "✅ visionOS Validation PASSED: IPA successfully validated"
echo "Results saved to ${VALIDATION_OUTPUT}"
else
echo "❌ visionOS Validation FAILED: IPA validation found issues"
echo "See validation output at ${VALIDATION_OUTPUT}"
echo ""
echo "==== VALIDATION ERRORS ===="
# Try to extract specific errors from the output
if grep -q "Error" "${VALIDATION_OUTPUT}"; then
grep -A 5 "Error" "${VALIDATION_OUTPUT}"
else
# If no specific error found, show the whole log
cat "${VALIDATION_OUTPUT}"
fi
# Additional debugging: check IPA contents
echo ""
echo "==== IPA CONTENTS ===="
mkdir -p "${TEMP_DIR}/ipa_contents"
unzip -q "${IPA_PATH}" -d "${TEMP_DIR}/ipa_contents"
ls -la "${TEMP_DIR}/ipa_contents/Payload/${APP_NAME}.app/"
# Check for code signing issues
echo ""
echo "==== CODE SIGNING INFO ===="
codesign -vv -d "${TEMP_DIR}/ipa_contents/Payload/${APP_NAME}.app" 2>&1 || echo "Code signing verification failed"
# Check embedded frameworks
echo ""
echo "==== FRAMEWORK INFO ===="
ls -la "${TEMP_DIR}/ipa_contents/Payload/${APP_NAME}.app/Frameworks/" 2>/dev/null || echo "No Frameworks directory found"
fi
# Don't clean up on error to allow inspection
if [ $FINAL_VALIDATION_RESULT -ne 0 ]; then
echo ""
echo "Temporary files kept for inspection at: ${TEMP_DIR}"
echo "===== visionOS Validation Process Failed ====="
exit 1
fi
# Clean up temporary files but keep build artifacts
if [ $FINAL_VALIDATION_RESULT -eq 0 ]; then
echo "Cleaning up temporary files..."
#rm -rf "${TEMP_DIR}"
fi
echo "===== visionOS Validation Process Completed ====="
exit $FINAL_VALIDATION_RESULT

82
scripts/bench-models.sh Executable file
View File

@@ -0,0 +1,82 @@
#!/usr/bin/env bash
RESULTS="bench-models-results.txt"
: > "$RESULTS"
ARGS_BB="-c 270336 -npp 512,4096,8192 -npl 1,2,4,8,16,32 -ntg 32"
ARGS_B="-d 0,4096,8192,16384,32768 -p 2048 -n 32"
QUICK=0
DIO=0
while (( "$#" )); do
case "$1" in
--quick) QUICK=1; shift ;;
--dio) DIO=1; shift ;;
*) shift ;;
esac
done
if (( QUICK )); then
ARGS_BB="-c 20480 -npp 512,4096 -npl 1,2,4 -ntg 32"
ARGS_B="-d 0 -p 2048 -n 32"
fi
if (( DIO )); then
ARGS_BB="${ARGS_BB} --no-mmap --direct-io"
ARGS_B="${ARGS_B} -mmp 0 -dio 1"
fi
run_model() {
local HFR=$1
local HFF=$2
printf "## ${HFR}\n" | tee -a "$RESULTS"
printf "\n" | tee -a "$RESULTS"
printf "Model: https://huggingface.co/${HFR}\n" | tee -a "$RESULTS"
printf "\n" | tee -a "$RESULTS"
printf -- "- \`llama-batched-bench\`\n" | tee -a "$RESULTS"
printf "\n" | tee -a "$RESULTS"
./bin/llama-batched-bench \
-hfr "${HFR}" -hff "${HFF}" \
-m "${HFF}" -fa 1 -ub 2048 \
${ARGS_BB} | tee -a "$RESULTS"
printf "\n" | tee -a "$RESULTS"
printf -- "- \`llama-bench\`\n" | tee -a "$RESULTS"
printf "\n" | tee -a "$RESULTS"
./bin/llama-bench \
-m "${HFF}" -fa 1 -ub 2048 \
${ARGS_B} | tee -a "$RESULTS"
printf "\n" | tee -a "$RESULTS"
printf "\n"
}
run_model "ggml-org/gpt-oss-20b-GGUF" "gpt-oss-20b-mxfp4.gguf"
run_model "ggml-org/gpt-oss-120b-GGUF" "gpt-oss-120b-mxfp4-00001-of-00003.gguf"
run_model "ggml-org/Qwen3-Coder-30B-A3B-Instruct-Q8_0-GGUF" "qwen3-coder-30b-a3b-instruct-q8_0.gguf"
run_model "ggml-org/Qwen2.5-Coder-7B-Q8_0-GGUF" "qwen2.5-coder-7b-q8_0.gguf"
run_model "ggml-org/gemma-3-4b-it-qat-GGUF" "gemma-3-4b-it-qat-Q4_0.gguf"
run_model "ggml-org/GLM-4.7-Flash-GGUF" "GLM-4.7-Flash-Q8_0.gguf"
if [[ -f models-extra.txt ]]; then
while read -r HFR HFF; do
[[ -z "$HFR" ]] && continue
run_model "$HFR" "$HFF"
done < models-extra.txt
fi
printf "\n=====================================\n"
printf "\n"
cat "$RESULTS"
printf "\n"
printf "Done! Results are written to $RESULTS\n"
printf "\n"

30
scripts/build-info.sh Executable file
View File

@@ -0,0 +1,30 @@
#!/bin/sh
CC=$1
build_number="0"
build_commit="unknown"
build_compiler="unknown"
build_target="unknown"
if out=$(git rev-list --count HEAD); then
# git is broken on WSL so we need to strip extra newlines
build_number=$(printf '%s' "$out" | tr -d '\n')
fi
if out=$(git rev-parse --short HEAD); then
build_commit=$(printf '%s' "$out" | tr -d '\n')
fi
if out=$($CC --version | head -1); then
build_compiler=$out
fi
if out=$($CC -dumpmachine); then
build_target=$out
fi
echo "int LLAMA_BUILD_NUMBER = ${build_number};"
echo "char const *LLAMA_COMMIT = \"${build_commit}\";"
echo "char const *LLAMA_COMPILER = \"${build_compiler}\";"
echo "char const *LLAMA_BUILD_TARGET = \"${build_target}\";"

179
scripts/check-requirements.sh Executable file
View File

@@ -0,0 +1,179 @@
#!/usr/bin/env bash
set -euo pipefail
#
# check-requirements.sh checks all requirements files for each top-level
# convert*.py script.
#
# WARNING: This is quite IO intensive, because a fresh venv is set up for every
# python script. As of 2023-12-22, this writes ~2.7GB of data. An adequately
# sized tmpfs /tmp or ramdisk is recommended if running this frequently.
#
# usage: check-requirements.sh [<working_dir>]
# check-requirements.sh nocleanup [<working_dir>]
#
# where:
# - <working_dir> is a directory that can be used as the base for
# setting up the venvs. Defaults to `/tmp`.
# - 'nocleanup' as the first argument will disable automatic cleanup
# of the files created by this script.
#
# requires:
# - bash >= 3.2.57
# - shellcheck
#
# For each script, it creates a fresh venv, `pip install`s the requirements, and
# finally imports the python script to check for `ImportError`.
#
log() {
local level=$1 msg=$2
printf >&2 '%s: %s\n' "$level" "$msg"
}
debug() {
log DEBUG "$@"
}
info() {
log INFO "$@"
}
fatal() {
log FATAL "$@"
exit 1
}
cleanup() {
if [[ -n ${workdir+x} && -d $workdir && -w $workdir ]]; then
info "Removing $workdir"
local count=0
rm -rfv -- "$workdir" | while read -r; do
if (( count++ > 750 )); then
printf .
count=0
fi
done
printf '\n'
info "Removed $workdir"
fi
}
do_cleanup=1
if [[ ${1-} == nocleanup ]]; then
do_cleanup=0; shift
fi
if (( do_cleanup )); then
trap exit INT TERM
trap cleanup EXIT
fi
this=$(realpath -- "$0"); readonly this
cd "$(dirname "$this")/.." # PWD should stay in llama.cpp project directory
shellcheck "$this"
readonly reqs_dir=requirements
if [[ ${1+x} ]]; then
tmp_dir=$(realpath -- "$1")
if [[ ! ( -d $tmp_dir && -w $tmp_dir ) ]]; then
fatal "$tmp_dir is not a writable directory"
fi
else
tmp_dir=/tmp
fi
workdir=$(mktemp -d "$tmp_dir/check-requirements.XXXX"); readonly workdir
info "Working directory: $workdir"
check_requirements() {
local reqs=$1
info "$reqs: beginning check"
pip --disable-pip-version-check install -qr "$reqs"
info "$reqs: OK"
}
check_convert_script() {
local py=$1 # e.g. ./convert_hf_to_gguf.py
local pyname=${py##*/} # e.g. convert_hf_to_gguf.py
pyname=${pyname%.py} # e.g. convert_hf_to_gguf
info "$py: beginning check"
local reqs="$reqs_dir/requirements-$pyname.txt"
if [[ ! -r $reqs ]]; then
fatal "$py missing requirements. Expected: $reqs"
fi
# Check that all sub-requirements are added to top-level requirements.txt
if ! grep -qF "$reqs" requirements.txt; then
fatal "$reqs needs to be added to requirements.txt"
fi
local venv="$workdir/$pyname-venv"
python3 -m venv "$venv"
(
# shellcheck source=/dev/null
source "$venv/bin/activate"
check_requirements "$reqs"
python - "$py" "$pyname" <<'EOF'
import sys
from importlib.machinery import SourceFileLoader
py, pyname = sys.argv[1:]
SourceFileLoader(pyname, py).load_module()
EOF
)
if (( do_cleanup )); then
rm -rf -- "$venv"
fi
info "$py: imports OK"
}
readonly ignore_eq_eq='check_requirements: ignore "=="'
for req in */**/requirements*.txt; do
# Make sure exact release versions aren't being pinned in the requirements
# Filters out the ignore string
if grep -vF "$ignore_eq_eq" "$req" | grep -q '=='; then
tab=$'\t'
cat >&2 <<EOF
FATAL: Avoid pinning exact package versions. Use '~=' instead.
You can suppress this error by appending the following to the line:
$tab# $ignore_eq_eq
EOF
exit 1
fi
done
all_venv="$workdir/all-venv"
python3 -m venv "$all_venv"
(
# shellcheck source=/dev/null
source "$all_venv/bin/activate"
check_requirements requirements.txt
)
if (( do_cleanup )); then
rm -rf -- "$all_venv"
fi
check_convert_script examples/convert_legacy_llama.py
for py in convert_*.py; do
# skip convert_hf_to_gguf_update.py
# TODO: the check is failing for some reason:
# https://github.com/ggml-org/llama.cpp/actions/runs/8875330981/job/24364557177?pr=6920
[[ $py == convert_hf_to_gguf_update.py ]] && continue
check_convert_script "$py"
done
info 'Done! No issues found.'

66
scripts/compare-commits.sh Executable file
View File

@@ -0,0 +1,66 @@
#!/usr/bin/env bash
if [ $# -lt 2 ]; then
echo "usage: ./scripts/compare-commits.sh <commit1> <commit2> [tool] [additional arguments]"
echo " tool: 'llama-bench' (default) or 'test-backend-ops'"
echo " additional arguments: passed to the selected tool"
exit 1
fi
set -e
set -x
# Parse arguments
commit1=$1
commit2=$2
tool=${3:-llama-bench}
additional_args="${@:4}"
# Validate tool argument
if [ "$tool" != "llama-bench" ] && [ "$tool" != "test-backend-ops" ]; then
echo "Error: tool must be 'llama-bench' or 'test-backend-ops'"
exit 1
fi
# verify at the start that the compare script has all the necessary dependencies installed
./scripts/compare-llama-bench.py --check
if ! command -v sqlite3 >/dev/null 2>&1; then
echo "Error: sqlite3 is not installed or not in PATH"
echo "Please install sqlite3 to use this script"
exit 1
fi
if [ "$tool" = "llama-bench" ]; then
db_file="llama-bench.sqlite"
target="llama-bench"
run_args="-o sql -oe md $additional_args"
else # test-backend-ops
db_file="test-backend-ops.sqlite"
target="test-backend-ops"
run_args="perf --output sql $additional_args"
fi
rm -f "$db_file" > /dev/null
# to test a backend, call the script with the corresponding environment variable (e.g. GGML_CUDA=1 ./scripts/compare-commits.sh ...)
if [ -n "$GGML_CUDA" ]; then
CMAKE_OPTS="${CMAKE_OPTS} -DGGML_CUDA=ON"
fi
dir="build-bench"
function run {
rm -fr ${dir} > /dev/null
cmake -B ${dir} -S . ${CMAKE_OPTS} > /dev/null
cmake --build ${dir} -t $target -j $(nproc) > /dev/null
${dir}/bin/$target $run_args | sqlite3 "$db_file"
}
git checkout $commit1 > /dev/null
run
git checkout $commit2 > /dev/null
run
./scripts/compare-llama-bench.py -b $commit1 -c $commit2 --tool $tool -i "$db_file"

1102
scripts/compare-llama-bench.py Executable file

File diff suppressed because it is too large Load Diff

279
scripts/compare-logprobs.py Normal file
View File

@@ -0,0 +1,279 @@
import argparse
import requests
import json
from pathlib import Path
import logging
logger = logging.getLogger("compare-logprobs")
logging.basicConfig(level=logging.INFO)
DESCRIPTION = """
Compare logits between llama.cpp and another inference engine using OpenAI-compatible server endpoints.
Unlike compare-logits.py, it allows dumping logits from a hosted API endpoint. Useful when it's not possible to run both models locally.
Example usage:
Step 1: Dump logits from two different servers
python scripts/compare-logprobs.py dump logits_llama.log http://localhost:8080/v1/completions
python scripts/compare-logprobs.py dump logits_other.log http://other-engine:8000/v1/completions
(optionally, you can add --api-key <key> if the endpoint requires authentication)
Step 2: Compare the dumped logits
python scripts/compare-logprobs.py compare logits_llama.log logits_other.log report.md
"""
def get_remote_corpus(url: str, length: int) -> list[str]:
response = requests.get(url)
response.raise_for_status()
corpus = response.text
words = [w.strip() for w in corpus.strip().split(" ")]
words = [w for w in words if "<" not in w] # make sure nothing looks like special tokens
words = [w for w in words if len(w) > 0] # filter out empty strings
while len(words) < length:
words += words
return words[:length]
def dump_logits(
endpoint: str,
output_path: Path,
input_words: list[str],
pattern: list[tuple[bool, int]],
api_key=None,
):
logger.info(f"Dumping logits to {output_path} from endpoint {endpoint}...")
words = input_words
curr_text = ""
n_total = sum(n for get, n in pattern if get)
n_done = 0
i_cur = 0
i_total = len(words)
with output_path.open("w") as f:
for get, n in pattern:
if not get:
# skip n words
for i in range(n):
curr_text += words.pop(0) + " "
i_cur += 1
continue
# get n words
for i in range(n):
curr_text += words.pop(0) + " "
payload = {
"prompt": curr_text.strip(),
"temperature": 0.0,
"top_k": 1,
"max_tokens": 1,
"logprobs": 1,
"stream": False,
}
response = requests.post(
endpoint,
json=payload,
headers={"Authorization": f"Bearer {api_key}"} if api_key else {},
)
response.raise_for_status()
data = response.json()
data["__index"] = i_cur # add index for easier debugging later
data = json.dumps(data)
f.write(f"{data}\n")
n_done += 1
i_cur += 1
logger.info(
f"\n\n{data}\n\n[Step: {n_done}/{n_total} | Word: {i_cur}/{i_total}]"
)
logger.info(f"Logits dumped to {output_path}")
def get_token_logprobs(data: dict):
logprobs = data["choices"][0]["logprobs"]
if "content" in logprobs:
# llama.cpp case
top = logprobs["content"][0]["top_logprobs"][0]
return top["token"], top["logprob"]
else:
# vllm case
tokens = logprobs["tokens"]
token_logprobs = logprobs["token_logprobs"]
return tokens[0], token_logprobs[0]
def clean_text(text: str) -> str:
return (
"'"
+ text.replace("\n", "\\n")
.replace("\t", "\\t")
.replace("\r", "\\r")
.replace("|", "\\|")
+ "'"
)
def compare_logits(input1: Path, input2: Path, output_path: Path):
with input1.open("r") as f1, input2.open("r") as f2, output_path.open("w") as fout:
lines1 = f1.readlines()
lines2 = f2.readlines()
tab_header = [
"idx",
input1.name,
"logprob_1",
input2.name,
"logprob_2",
"diff (abs)",
]
tab_entries = []
tab_max_widths = [len(h) for h in tab_header]
assert len(lines1) == len(
lines2
), "Input files must have the same number of lines."
fout.write("# Logits Comparison Report\n\n")
for i, (line1, line2) in enumerate(zip(lines1, lines2)):
if not line1.strip() or not line2.strip():
continue # skip empty lines
data1 = json.loads(line1)
data2 = json.loads(line2)
idx1 = data1.get("__index", -1)
idx2 = data2.get("__index", -1)
if idx1 != idx2:
logger.warning(
f"Warning: Mismatched indices at line {i}: {idx1} vs {idx2}"
)
token1, logprob1 = get_token_logprobs(data1)
token2, logprob2 = get_token_logprobs(data2)
token1 = clean_text(token1)
token2 = clean_text(token2)
abs_diff = abs(logprob1 - logprob2)
tab_entries.append(
(
str(idx1 + 1),
token1,
f"{logprob1:.4f}",
token2,
f"{logprob2:.4f}",
f"{(abs_diff):.4f}",
)
)
for i in range(len(tab_entries)):
for j in range(len(tab_header)):
tab_max_widths[j] = max(tab_max_widths[j], len(tab_entries[i][j]))
output = ""
for j in range(len(tab_header)):
output += f"| {tab_header[j]:<{tab_max_widths[j]}} "
output += "|\n"
for j in range(len(tab_header)):
output += f"|{'-' * (tab_max_widths[j] + 2)}"
output += "|\n"
for entry in tab_entries:
for j in range(len(tab_header)):
output += f"| {entry[j]:<{tab_max_widths[j]}} "
output += "|\n"
logger.info("\n" + output)
fout.write(output)
logger.info(f"Report written to {output_path}")
def parse_pattern(pattern: str) -> list[tuple[bool, int]]:
parts = pattern.split(",")
result = []
for i, part in enumerate(parts):
n = int(part)
if i % 2 == 0:
result.append((True, n)) # get n words
else:
result.append((False, n)) # skip n words
return result
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description=DESCRIPTION, formatter_class=argparse.RawTextHelpFormatter
)
subparsers = parser.add_subparsers(
dest="verb", required=True, help="action to perform"
)
# dump subcommand
parser_dump = subparsers.add_parser("dump", help="dump logits from an endpoint")
parser_dump.add_argument(
"output", type=Path, help="output path for dumped logits (.log)"
)
parser_dump.add_argument(
"endpoint", type=str, help="OAI-compat /completions endpoint"
)
parser_dump.add_argument(
"--api-key",
type=str,
default=None,
help="API key for authentication (if required)",
)
parser_dump.add_argument(
"--file",
type=str,
default="https://raw.githubusercontent.com/ggml-org/llama.cpp/eaba92c3dcc980ebe753348855d4a5d75c069997/tools/server/README.md",
help="File containing prompt to use instead of the default (can also be an URL)",
)
parser_dump.add_argument(
"--pattern",
type=str,
default="10,1000,10,4000,10",
help="Pattern n_get,n_skip,... where n_get is number of words to get and n_skip is number of words to skip (num of words, NOT num of tokens)",
)
# compare subcommand
parser_compare = subparsers.add_parser(
"compare", help="compare two dumped logits files"
)
parser_compare.add_argument("input1", type=Path, help="first input file (.log)")
parser_compare.add_argument("input2", type=Path, help="second input file (.log)")
parser_compare.add_argument(
"output", type=Path, help="output path for comparison report (.md)"
)
try:
return parser.parse_args()
except Exception as e:
parser.print_help()
raise e
def main():
args = parse_args()
if args.verb == "dump":
pattern = parse_pattern(args.pattern)
required_words = sum(n for _, n in pattern)
if args.file.startswith("http"):
input_words = get_remote_corpus(args.file, required_words)
logger.info(f"Fetched {len(input_words)} words from remote {args.file}")
else:
with open(args.file, "r") as f:
input_words = f.read().strip().split(" ")
input_words = [w for w in input_words if len(w) > 0] # filter out empty strings
if len(input_words) < required_words:
raise ValueError(
f"Input file has only {len(input_words)} words, but pattern requires at least {required_words} words."
)
logger.info(f"Using {len(input_words)} words")
dump_logits(args.endpoint, args.output, input_words, pattern, args.api_key)
elif args.verb == "compare":
compare_logits(args.input1, args.input2, args.output)
else:
raise ValueError(f"Unknown verb: {args.verb}")
if __name__ == "__main__":
main()

201
scripts/create_ops_docs.py Executable file
View File

@@ -0,0 +1,201 @@
#!/usr/bin/env python3
"""
This script parses docs/ops/*.csv and creates the ops.md, which is a table documenting supported operations on various ggml backends.
"""
import csv
import logging
import sys
from pathlib import Path
from collections import defaultdict
class DocsGenerator:
def __init__(self, ggml_root: str, output_filename: str = "ops.md"):
self.ggml_root = Path(ggml_root)
self.ops_dir = self.ggml_root / "docs" / "ops"
self.output_filename = output_filename
self.backend_support: dict[str, dict[str, list[bool]]] = defaultdict(
lambda: defaultdict(list)
)
self.all_operations: set[str] = set()
self.all_backends: set[str] = set()
self.logger = logging.getLogger(__name__)
def parse_support_files(self) -> None:
if not self.ops_dir.exists():
self.logger.warning(f"ops directory not found: {self.ops_dir}")
return
self.logger.info(f"Parsing support files from {self.ops_dir}...")
for support_file in self.ops_dir.glob("*.csv"):
self.logger.info(f" Reading: {support_file.name}")
self._parse_support_file(support_file)
def _parse_support_file(self, file_path: Path) -> None:
try:
with open(file_path, "r", newline='') as f:
reader = csv.DictReader(f)
for row in reader:
# Skip rows that don't have support mode
if row.get('test_mode') != 'support':
continue
backend_name = row.get('backend_name', '').strip()
operation = row.get('op_name', '').strip()
supported_str = row.get('error_message', '').strip() # "yes" or "no"
backend_reg_name = row.get('backend_reg_name', '').strip()
# Skip invalid or error operations
if not operation or not backend_name or operation in [
"CONTEXT_ERROR",
"BUILD_ERROR",
]:
continue
is_supported = supported_str.lower() == "yes"
# Use backend_reg_name for grouping, fallback to backend_name
backend_key = backend_reg_name if backend_reg_name else backend_name
self.all_backends.add(backend_key)
self.backend_support[backend_key][operation].append(is_supported)
self.all_operations.add(operation)
except Exception as e:
self.logger.error(f" Error parsing {file_path}: {e}")
def get_backend_support_status(self, backend: str, operation: str) -> str:
support_list = self.backend_support[backend].get(operation, [])
if not support_list:
return "unsupported"
all_supported = all(support_list)
any_supported = any(support_list)
if all_supported:
return "supported"
elif any_supported:
return "partially supported"
else:
return "unsupported"
def get_support_status(self, operation: str) -> str:
if operation not in self.all_operations:
return "unsupported"
support_count = 0
total_backends = len(self.all_backends)
for backend in self.all_backends:
if self.backend_support[backend].get(operation, False):
support_count += 1
if support_count == 0:
return "unsupported"
elif support_count == total_backends:
return "supported"
else:
return "partially supported"
def get_support_symbol(self, status: str) -> str:
symbols = {"supported": "", "partially supported": "🟡", "unsupported": ""}
return symbols.get(status, "")
def generate_markdown(self) -> str:
lines = []
lines.append("# GGML Operations")
lines.append("")
lines.append("List of GGML operations and backend support status.")
lines.append("")
lines.append("## How to add a backend to this table:")
lines.append("")
lines.append("1. Run `test-backend-ops support --output csv` with your backend name and redirect output to a csv file in `docs/ops/` (e.g., `docs/ops/CUDA.csv`)")
lines.append("2. Regenerate `/docs/ops.md` via `./scripts/create_ops_docs.py`")
lines.append("")
lines.append("Legend:")
lines.append("- ✅ Fully supported by this backend")
lines.append("- 🟡 Partially supported by this backend")
lines.append("- ❌ Not supported by this backend")
lines.append("")
backends = sorted(self.all_backends)
header = "| Operation |"
for backend in backends:
header += f" {backend} |"
separator = "|-----------|"
for _ in backends:
separator += "------|"
lines.append(header)
lines.append(separator)
sorted_operations = sorted(self.all_operations)
for operation in sorted_operations:
row = f"| {operation:>32} |"
for backend in backends:
status = self.get_backend_support_status(backend, operation)
if status == "supported":
symbol = ""
elif status == "partially supported":
symbol = "🟡"
else:
symbol = ""
row += f" {symbol} |"
lines.append(row)
lines.append("")
return "\n".join(lines)
def run(self) -> None:
self.logger.info("Parsing GGML operation support files...")
self.parse_support_files()
if not self.all_operations:
self.logger.error(
"No operations found. Make sure to run test-backend-ops support --output csv > docs/ops/file.csv first."
)
return
self.logger.info(
f"Found {len(self.all_operations)} operations across {len(self.all_backends)} backends"
)
self.logger.info("Generating markdown...")
markdown_content = self.generate_markdown()
docs_dir = self.ggml_root / "docs"
docs_dir.mkdir(exist_ok=True)
ops_file = docs_dir / self.output_filename
with open(ops_file, "w") as f:
f.write(markdown_content)
self.logger.info(f"Generated: {ops_file}")
self.logger.info(f"Operations: {len(self.all_operations)}")
self.logger.info(f"Backends: {len(self.all_backends)}")
def main():
logging.basicConfig(level=logging.INFO)
if len(sys.argv) > 1:
output_filename = sys.argv[1]
else:
output_filename = "ops.md"
generator = DocsGenerator(".", output_filename)
generator.run()
if __name__ == "__main__":
main()

202
scripts/debug-test.sh Executable file
View File

@@ -0,0 +1,202 @@
#!/usr/bin/env bash
PROG=${0##*/}
build_dir="build-ci-debug"
# Print Color Commands
red=$(tput setaf 1)
green=$(tput setaf 2)
yellow=$(tput setaf 3)
blue=$(tput setaf 4)
magenta=$(tput setaf 5)
cyan=$(tput setaf 6)
normal=$(tput sgr0)
# Print Help Message
####################
print_full_help() {
cat << EOF
Usage: $PROG [OPTION]... <test_regex> (test_number)
Debug specific ctest program.
Options:
-h, --help display this help and exit
-g run in gdb mode
Arguments:
<test_regex> (Mandatory) Supply one regex to the script to filter tests
(test_number) (Optional) Test number to run a specific test
Example:
$PROG test-tokenizer
$PROG test-tokenizer 3
EOF
}
abort() {
echo "Error: $1" >&2
cat << EOF >&2
Usage: $PROG [OPTION]... <test_regex> (test_number)
Debug specific ctest program.
Refer to --help for full instructions.
EOF
exit 1
}
# Dependency Sanity Check
#########################
check_dependency() {
command -v "$1" >/dev/null 2>&1 || {
abort "$1 is required but not found. Please install it and try again."
}
}
check_dependency ctest
check_dependency cmake
# Step 0: Check the args
########################
if [ x"$1" = x"-h" ] || [ x"$1" = x"--help" ]; then
print_full_help >&2
exit 0
fi
# Parse command-line options
gdb_mode=false
while getopts "g" opt; do
case $opt in
g)
gdb_mode=true
echo "gdb_mode Mode Enabled"
;;
esac
done
# Shift the option parameters
shift $((OPTIND - 1))
# Positionial Argument Processing : <test_regex>
if [ -z "${1}" ]; then
abort "Test regex is required"
else
test_suite=${1:-}
fi
# Positionial Argument Processing : (test_number)
test_number=${2:-}
# Step 1: Reset and Setup folder context
########################################
## Sanity check that we are actually in a git repo
repo_root=$(git rev-parse --show-toplevel)
if [ ! -d "$repo_root" ]; then
abort "Not in a Git repository."
fi
## Reset folder to root context of git repo and Create and enter build directory
pushd "$repo_root"
rm -rf "$build_dir" && mkdir "$build_dir" || abort "Failed to make $build_dir"
# Step 2: Setup Build Environment and Compile Test Binaries
###########################################################
cmake -B "./$build_dir" -DCMAKE_BUILD_TYPE=Debug -DGGML_CUDA=1 || abort "Failed to build environment"
pushd "$build_dir"
make -j || abort "Failed to compile"
popd > /dev/null || exit 1
# Step 3: Find all tests available that matches REGEX
####################################################
# Ctest Gather Tests
# `-R test-tokenizer` : looks for all the test files named `test-tokenizer*` (R=Regex)
# `-N` : "show-only" disables test execution & shows test commands that you can feed to GDB.
# `-V` : Verbose Mode
printf "\n\nGathering tests that fit REGEX: ${test_suite} ...\n"
pushd "$build_dir"
tests=($(ctest -R ${test_suite} -V -N | grep -E " +Test +#[0-9]+*" | cut -d':' -f2 | awk '{$1=$1};1'))
if [ ${#tests[@]} -eq 0 ]; then
abort "No tests available... check your compilation process..."
fi
popd > /dev/null || exit 1
# Step 4: Identify Test Command for Debugging
#############################################
# Select test number
if [ -z $test_number ]; then
# List out available tests
printf "Which test would you like to debug?\n"
id=0
for s in "${tests[@]}"
do
echo "Test# ${id}"
echo " $s"
((id++))
done
# Prompt user which test they wanted to run
printf "\nRun test#? "
read test_number
else
printf "\nUser Already Requested #${test_number}\n"
fi
# Grab all tests commands
pushd "$build_dir"
sIFS=$IFS # Save Initial IFS (Internal Field Separator)
IFS=$'\n' # Change IFS (Internal Field Separator) (So we split ctest output by newline rather than by spaces)
test_args=($(ctest -R ${test_suite} -V -N | grep "Test command" | cut -d':' -f3 | awk '{$1=$1};1' )) # Get test args
IFS=$sIFS # Reset IFS (Internal Field Separator)
popd > /dev/null || exit 1
# Grab specific test command
single_test_name="${tests[test_number]}"
single_test_command="${test_args[test_number]}"
# Step 5: Execute or GDB Debug
##############################
printf "${magenta}Running Test #${test_number}: ${single_test_name}${normal}\n"
printf "${cyan}single_test_command: ${single_test_command}${normal}\n"
if [ "$gdb_mode" = "true" ]; then
# Execute debugger
pushd "$repo_root" || exit 1
eval "gdb --args ${single_test_command}"
popd > /dev/null || exit 1
else
# Execute Test
pushd "$repo_root" || exit 1
eval "${single_test_command}"
exit_code=$?
popd > /dev/null || exit 1
# Print Result
printf "${blue}Ran Test #${test_number}: ${single_test_name}${normal}\n"
printf "${yellow}Command: ${single_test_command}${normal}\n"
if [ $exit_code -eq 0 ]; then
printf "${green}TEST PASS${normal}\n"
else
printf "${red}TEST FAIL${normal}\n"
fi
fi
# Return to the directory from which the user ran the command.
popd > /dev/null || exit 1

View File

@@ -0,0 +1,105 @@
#!/usr/bin/env python
'''
This script fetches all the models used in the server tests.
This is useful for slow tests that use larger models, to avoid them timing out on the model downloads.
It is meant to be run from the root of the repository.
Example:
python scripts/fetch_server_test_models.py
( cd tools/server/tests && ./tests.sh -v -x -m slow )
'''
import ast
import glob
import logging
import os
from typing import Generator
from pydantic import BaseModel
from typing import Optional
import subprocess
class HuggingFaceModel(BaseModel):
hf_repo: str
hf_file: Optional[str] = None
class Config:
frozen = True
def collect_hf_model_test_parameters(test_file) -> Generator[HuggingFaceModel, None, None]:
try:
with open(test_file) as f:
tree = ast.parse(f.read())
except Exception as e:
logging.error(f'collect_hf_model_test_parameters failed on {test_file}: {e}')
return
for node in ast.walk(tree):
if isinstance(node, ast.FunctionDef):
for dec in node.decorator_list:
if isinstance(dec, ast.Call) and isinstance(dec.func, ast.Attribute) and dec.func.attr == 'parametrize':
param_names = ast.literal_eval(dec.args[0]).split(",")
if "hf_repo" not in param_names:
continue
raw_param_values = dec.args[1]
if not isinstance(raw_param_values, ast.List):
logging.warning(f'Skipping non-list parametrize entry at {test_file}:{node.lineno}')
continue
hf_repo_idx = param_names.index("hf_repo")
hf_file_idx = param_names.index("hf_file") if "hf_file" in param_names else None
for t in raw_param_values.elts:
if not isinstance(t, ast.Tuple):
logging.warning(f'Skipping non-tuple parametrize entry at {test_file}:{node.lineno}')
continue
yield HuggingFaceModel(
hf_repo=ast.literal_eval(t.elts[hf_repo_idx]),
hf_file=ast.literal_eval(t.elts[hf_file_idx]) if hf_file_idx is not None else None)
if __name__ == '__main__':
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
models = sorted(list(set([
model
for test_file in glob.glob('tools/server/tests/unit/test_*.py')
for model in collect_hf_model_test_parameters(test_file)
])), key=lambda m: (m.hf_repo, m.hf_file))
logging.info(f'Found {len(models)} models in parameterized tests:')
for m in models:
logging.info(f' - {m.hf_repo} / {m.hf_file}')
cli_path = os.environ.get(
'LLAMA_CLI_BIN_PATH',
os.path.join(
os.path.dirname(__file__),
'../build/bin/Release/llama-cli.exe' if os.name == 'nt' else '../build/bin/llama-cli'))
for m in models:
if '<' in m.hf_repo or (m.hf_file is not None and '<' in m.hf_file):
continue
if m.hf_file is not None and '-of-' in m.hf_file:
logging.warning(f'Skipping model at {m.hf_repo} / {m.hf_file} because it is a split file')
continue
logging.info(f'Using llama-cli to ensure model {m.hf_repo}/{m.hf_file} was fetched')
cmd = [
cli_path,
'-hfr', m.hf_repo,
*([] if m.hf_file is None else ['-hff', m.hf_file]),
'-n', '1',
'-p', 'Hey',
'--no-warmup',
'--log-disable',
'-st']
if m.hf_file != 'tinyllamas/stories260K.gguf' and 'Mistral-Nemo' not in m.hf_repo:
cmd += ('-fa', 'on')
try:
subprocess.check_call(cmd)
except subprocess.CalledProcessError:
logging.error(f'Failed to fetch model at {m.hf_repo} / {m.hf_file} with command:\n {" ".join(cmd)}')
exit(1)

9
scripts/gen-authors.sh Executable file
View File

@@ -0,0 +1,9 @@
#!/usr/bin/env bash
printf "# date: $(date)\n" > AUTHORS
printf "# this file is auto-generated by scripts/gen-authors.sh\n\n" >> AUTHORS
git log --format='%an <%ae>' --reverse --date=short master | awk '!seen[$0]++' | sort >> AUTHORS
# if necessary, update your name here. for example: jdoe -> John Doe
sed -i '' 's/^jdoe/John Doe/g' AUTHORS

196
scripts/gen-unicode-data.py Normal file
View File

@@ -0,0 +1,196 @@
from __future__ import annotations
import array
import unicodedata
import requests
MAX_CODEPOINTS = 0x110000
UNICODE_DATA_URL = "https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt"
# see https://www.unicode.org/L2/L1999/UnicodeData.html
def unicode_data_iter():
res = requests.get(UNICODE_DATA_URL)
res.raise_for_status()
data = res.content.decode()
prev = []
for line in data.splitlines():
# ej: 0000;<control>;Cc;0;BN;;;;;N;NULL;;;;
line = line.split(";")
cpt = int(line[0], base=16)
assert cpt < MAX_CODEPOINTS
cpt_lower = int(line[-2] or "0", base=16)
assert cpt_lower < MAX_CODEPOINTS
cpt_upper = int(line[-3] or "0", base=16)
assert cpt_upper < MAX_CODEPOINTS
categ = line[2].strip()
assert len(categ) == 2
bidir = line[4].strip()
assert len(categ) == 2
name = line[1]
if name.endswith(", First>"):
prev = (cpt, cpt_lower, cpt_upper, categ, bidir)
continue
if name.endswith(", Last>"):
assert prev[1:] == (0, 0, categ, bidir)
for c in range(prev[0], cpt):
yield (c, cpt_lower, cpt_upper, categ, bidir)
yield (cpt, cpt_lower, cpt_upper, categ, bidir)
# see definition in unicode.h
CODEPOINT_FLAG_UNDEFINED = 0x0001 #
CODEPOINT_FLAG_NUMBER = 0x0002 # \p{N}
CODEPOINT_FLAG_LETTER = 0x0004 # \p{L}
CODEPOINT_FLAG_SEPARATOR = 0x0008 # \p{Z}
CODEPOINT_FLAG_MARK = 0x0010 # \p{M}
CODEPOINT_FLAG_PUNCTUATION = 0x0020 # \p{P}
CODEPOINT_FLAG_SYMBOL = 0x0040 # \p{S}
CODEPOINT_FLAG_CONTROL = 0x0080 # \p{C}
UNICODE_CATEGORY_TO_FLAG = {
"Cn": CODEPOINT_FLAG_UNDEFINED, # Undefined
"Cc": CODEPOINT_FLAG_CONTROL, # Control
"Cf": CODEPOINT_FLAG_CONTROL, # Format
"Co": CODEPOINT_FLAG_CONTROL, # Private Use
"Cs": CODEPOINT_FLAG_CONTROL, # Surrrogate
"Ll": CODEPOINT_FLAG_LETTER, # Lowercase Letter
"Lm": CODEPOINT_FLAG_LETTER, # Modifier Letter
"Lo": CODEPOINT_FLAG_LETTER, # Other Letter
"Lt": CODEPOINT_FLAG_LETTER, # Titlecase Letter
"Lu": CODEPOINT_FLAG_LETTER, # Uppercase Letter
"L&": CODEPOINT_FLAG_LETTER, # Cased Letter
"Mc": CODEPOINT_FLAG_MARK, # Spacing Mark
"Me": CODEPOINT_FLAG_MARK, # Enclosing Mark
"Mn": CODEPOINT_FLAG_MARK, # Nonspacing Mark
"Nd": CODEPOINT_FLAG_NUMBER, # Decimal Number
"Nl": CODEPOINT_FLAG_NUMBER, # Letter Number
"No": CODEPOINT_FLAG_NUMBER, # Other Number
"Pc": CODEPOINT_FLAG_PUNCTUATION, # Connector Punctuation
"Pd": CODEPOINT_FLAG_PUNCTUATION, # Dash Punctuation
"Pe": CODEPOINT_FLAG_PUNCTUATION, # Close Punctuation
"Pf": CODEPOINT_FLAG_PUNCTUATION, # Final Punctuation
"Pi": CODEPOINT_FLAG_PUNCTUATION, # Initial Punctuation
"Po": CODEPOINT_FLAG_PUNCTUATION, # Other Punctuation
"Ps": CODEPOINT_FLAG_PUNCTUATION, # Open Punctuation
"Sc": CODEPOINT_FLAG_SYMBOL, # Currency Symbol
"Sk": CODEPOINT_FLAG_SYMBOL, # Modifier Symbol
"Sm": CODEPOINT_FLAG_SYMBOL, # Math Symbol
"So": CODEPOINT_FLAG_SYMBOL, # Other Symbol
"Zl": CODEPOINT_FLAG_SEPARATOR, # Line Separator
"Zp": CODEPOINT_FLAG_SEPARATOR, # Paragraph Separator
"Zs": CODEPOINT_FLAG_SEPARATOR, # Space Separator
}
codepoint_flags = array.array('H', [CODEPOINT_FLAG_UNDEFINED]) * MAX_CODEPOINTS
table_whitespace = []
table_lowercase = []
table_uppercase = []
table_nfd = []
for (cpt, cpt_lower, cpt_upper, categ, bidir) in unicode_data_iter():
# convert codepoint to unicode character
char = chr(cpt)
# codepoint category flags
codepoint_flags[cpt] = UNICODE_CATEGORY_TO_FLAG[categ]
# lowercase conversion
if cpt_lower:
table_lowercase.append((cpt, cpt_lower))
# uppercase conversion
if cpt_upper:
table_uppercase.append((cpt, cpt_upper))
# NFD normalization
norm = ord(unicodedata.normalize('NFD', char)[0])
if cpt != norm:
table_nfd.append((cpt, norm))
# whitespaces, see "<White_Space>" https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
table_whitespace.extend(range(0x0009, 0x000D + 1))
table_whitespace.extend(range(0x2000, 0x200A + 1))
table_whitespace.extend([0x0020, 0x0085, 0x00A0, 0x1680, 0x2028, 0x2029, 0x202F, 0x205F, 0x3000])
# sort by codepoint
table_whitespace.sort()
table_lowercase.sort()
table_uppercase.sort()
table_nfd.sort()
# group ranges with same flags
ranges_flags: list[tuple[int, int]] = [(0, codepoint_flags[0])] # start, flags
for codepoint, flags in enumerate(codepoint_flags):
if flags != ranges_flags[-1][1]:
ranges_flags.append((codepoint, flags))
ranges_flags.append((MAX_CODEPOINTS, 0x0000))
# group ranges with same nfd
ranges_nfd: list[tuple[int, int, int]] = [(0, 0, 0)] # start, last, nfd
for codepoint, norm in table_nfd:
start = ranges_nfd[-1][0]
if ranges_nfd[-1] != (start, codepoint - 1, norm):
ranges_nfd.append((0, 0, 0)) # dummy, will be replaced below
start = codepoint
ranges_nfd[-1] = (start, codepoint, norm)
# Generate 'unicode-data.cpp':
# python ./scripts//gen-unicode-data.py > unicode-data.cpp
def out(line=""):
print(line, end='\n') # noqa
out("""\
// generated with scripts/gen-unicode-data.py
#include "unicode-data.h"
#include <cstdint>
#include <vector>
#include <unordered_map>
#include <unordered_set>
""")
out("const std::vector<std::pair<uint32_t, uint16_t>> unicode_ranges_flags = { // start, flags // last=next_start-1")
for codepoint, flags in ranges_flags:
out("{0x%06X, 0x%04X}," % (codepoint, flags))
out("};\n")
out("const std::unordered_set<uint32_t> unicode_set_whitespace = {")
for codepoint in table_whitespace:
out("0x%06X," % codepoint)
out("};\n")
out("const std::unordered_map<uint32_t, uint32_t> unicode_map_lowercase = {")
for tuple_lw in table_lowercase:
out("{0x%06X, 0x%06X}," % tuple_lw)
out("};\n")
out("const std::unordered_map<uint32_t, uint32_t> unicode_map_uppercase = {")
for tuple_up in table_uppercase:
out("{0x%06X, 0x%06X}," % tuple_up)
out("};\n")
out("const std::vector<range_nfd> unicode_ranges_nfd = { // start, last, nfd")
for triple in ranges_nfd:
out("{0x%06X, 0x%06X, 0x%06X}," % triple)
out("};\n")

38
scripts/get-flags.mk Normal file
View File

@@ -0,0 +1,38 @@
ifeq '' '$(findstring clang,$(shell $(GF_CC) --version))'
GF_CC_IS_GCC = 1
GF_CC_VER := $(shell { $(GF_CC) -dumpfullversion 2>/dev/null; echo; $(GF_CC) -dumpversion; } | awk -F. '/./ { printf("%02d%02d%02d", $$1, $$2, $$3); exit }')
else
GF_CC_IS_CLANG = 1
ifeq '' '$(findstring Apple,$(shell $(GF_CC) --version))'
GF_CC_IS_LLVM_CLANG = 1
else
GF_CC_IS_APPLE_CLANG = 1
endif
GF_CC_VER := \
$(shell $(GF_CC) --version | sed -n 's/^.* version \([0-9.]*\).*$$/\1/p' \
| awk -F. '{ printf("%02d%02d%02d", $$1, $$2, $$3) }')
endif
ifeq ($(GF_CC_IS_CLANG), 1)
# clang options
GF_CFLAGS = -Wunreachable-code-break -Wunreachable-code-return
GF_CXXFLAGS = -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi
ifneq '' '$(and $(GF_CC_IS_LLVM_CLANG),$(filter 1,$(shell expr $(GF_CC_VER) \>= 030800)))'
GF_CFLAGS += -Wdouble-promotion
endif
ifneq '' '$(and $(GF_CC_IS_APPLE_CLANG),$(filter 1,$(shell expr $(GF_CC_VER) \>= 070300)))'
GF_CFLAGS += -Wdouble-promotion
endif
else
# gcc options
GF_CFLAGS = -Wdouble-promotion
GF_CXXFLAGS = -Wno-array-bounds
ifeq ($(shell expr $(GF_CC_VER) \>= 070100), 1)
GF_CXXFLAGS += -Wno-format-truncation
endif
ifeq ($(shell expr $(GF_CC_VER) \>= 080100), 1)
GF_CXXFLAGS += -Wextra-semi
endif
endif

38
scripts/get-hellaswag.sh Executable file
View File

@@ -0,0 +1,38 @@
#!/bin/sh
# vim: set ts=4 sw=4 et:
FILE="hellaswag_val_full.txt"
URL="https://raw.githubusercontent.com/klosax/hellaswag_text_data/main/$FILE"
die() {
printf "%s\n" "$@" >&2
exit 1
}
have_cmd() {
for cmd; do
command -v "$cmd" >/dev/null || return
done
}
dl() {
[ -f "$2" ] && return
if have_cmd wget; then
wget "$1" -O "$2"
elif have_cmd curl; then
curl -L "$1" -o "$2"
else
die "Please install wget or curl"
fi
}
if [ ! -f "$FILE" ]; then
dl "$URL" "$FILE" || exit
fi
cat <<EOF
Usage:
llama-perplexity -m model.gguf -f $FILE --hellaswag [--hellaswag-tasks N] [other params]
EOF

70
scripts/get-pg.sh Executable file
View File

@@ -0,0 +1,70 @@
#!/usr/bin/env bash
function usage {
echo "usage: <n>$0"
echo "note: n is the number of essays to download"
echo "for specific n, the resulting pg.txt file will have the following number of tokens:"
echo "n | tokens"
echo "--- | ---"
echo "1 | 6230"
echo "2 | 23619"
echo "5 | 25859"
echo "10 | 36888"
echo "15 | 50188"
echo "20 | 59094"
echo "25 | 88764"
echo "30 | 103121"
echo "32 | 108338"
echo "35 | 113403"
echo "40 | 127699"
echo "45 | 135896"
exit 1
}
function has_cmd {
if ! [ -x "$(command -v $1)" ]; then
echo "error: $1 is not available" >&2
exit 1
fi
}
# check for: curl, html2text, tail, sed, fmt
has_cmd curl
has_cmd html2text
has_cmd tail
has_cmd sed
if [ $# -ne 1 ]; then
usage
fi
n=$1
# get urls
urls="$(curl http://www.aaronsw.com/2002/feeds/pgessays.rss | grep html | sed -e "s/.*http/http/" | sed -e "s/html.*/html/" | head -n $n)"
printf "urls:\n%s\n" "$urls"
if [ -f pg.txt ]; then
rm pg.txt
fi
c=1
for url in $urls; do
echo "processing $url"
cc=$(printf "%03d" $c)
curl -L $url | html2text | tail -n +4 | sed -E "s/^[[:space:]]+//g" | fmt -w 80 >> pg-$cc-one.txt
cat pg-$cc-one.txt >> pg.txt
cp -v pg.txt pg-$cc-all.txt
c=$((c+1))
# don't flood the server
sleep 1
done
echo "done. data in pg.txt"
exit 0

43
scripts/get-wikitext-2.sh Executable file
View File

@@ -0,0 +1,43 @@
#!/bin/sh
# vim: set ts=4 sw=4 et:
ZIP="wikitext-2-raw-v1.zip"
FILE="wikitext-2-raw/wiki.test.raw"
URL="https://huggingface.co/datasets/ggml-org/ci/resolve/main/$ZIP"
die() {
printf "%s\n" "$@" >&2
exit 1
}
have_cmd() {
for cmd; do
command -v "$cmd" >/dev/null || return
done
}
dl() {
[ -f "$2" ] && return
if have_cmd wget; then
wget "$1" -O "$2"
elif have_cmd curl; then
curl -L "$1" -o "$2"
else
die "Please install wget or curl"
fi
}
have_cmd unzip || die "Please install unzip"
if [ ! -f "$FILE" ]; then
dl "$URL" "$ZIP" || exit
unzip -o "$ZIP" || exit
rm -f -- "$ZIP"
fi
cat <<EOF
Usage:
llama-perplexity -m model.gguf -f $FILE [other params]
EOF

38
scripts/get-winogrande.sh Executable file
View File

@@ -0,0 +1,38 @@
#!/bin/sh
# vim: set ts=4 sw=4 et:
FILE="winogrande-debiased-eval.csv"
URL="https://huggingface.co/datasets/ikawrakow/winogrande-eval-for-llama.cpp/raw/main/$FILE"
die() {
printf "%s\n" "$@" >&2
exit 1
}
have_cmd() {
for cmd; do
command -v "$cmd" >/dev/null || return
done
}
dl() {
[ -f "$2" ] && return
if have_cmd wget; then
wget "$1" -O "$2"
elif have_cmd curl; then
curl -L "$1" -o "$2"
else
die "Please install wget or curl"
fi
}
if [ ! -f "$FILE" ]; then
dl "$URL" "$FILE" || exit
fi
cat <<EOF
Usage:
llama-perplexity -m model.gguf -f $FILE --winogrande [--winogrande-tasks N] [other params]
EOF

76
scripts/get_chat_template.py Executable file
View File

@@ -0,0 +1,76 @@
#!/usr/bin/env python
'''
Fetches the Jinja chat template of a HuggingFace model.
If a model has multiple chat templates, you can specify the variant name.
Syntax:
./scripts/get_chat_template.py model_id [variant]
Examples:
./scripts/get_chat_template.py CohereForAI/c4ai-command-r-plus tool_use
./scripts/get_chat_template.py microsoft/Phi-3.5-mini-instruct
'''
import json
import re
import sys
def get_chat_template(model_id, variant=None):
try:
# Use huggingface_hub library if available.
# Allows access to gated models if the user has access and ran `huggingface-cli login`.
from huggingface_hub import hf_hub_download
with open(hf_hub_download(repo_id=model_id, filename="tokenizer_config.json"), encoding="utf-8") as f:
config_str = f.read()
except ImportError:
import requests
assert re.match(r"^[\w.-]+/[\w.-]+$", model_id), f"Invalid model ID: {model_id}"
response = requests.get(f"https://huggingface.co/{model_id}/resolve/main/tokenizer_config.json")
if response.status_code == 401:
raise Exception('Access to this model is gated, please request access, authenticate with `huggingface-cli login` and make sure to run `pip install huggingface_hub`')
response.raise_for_status()
config_str = response.text
try:
config = json.loads(config_str)
except json.JSONDecodeError:
# Fix https://huggingface.co/NousResearch/Meta-Llama-3-8B-Instruct/blob/main/tokenizer_config.json
# (Remove extra '}' near the end of the file)
config = json.loads(re.sub(r'\}([\n\s]*\}[\n\s]*\],[\n\s]*"clean_up_tokenization_spaces")', r'\1', config_str))
chat_template = config['chat_template']
if isinstance(chat_template, str):
return chat_template
else:
variants = {
ct['name']: ct['template']
for ct in chat_template
}
def format_variants():
return ', '.join(f'"{v}"' for v in variants.keys())
if variant is None:
if 'default' not in variants:
raise Exception(f'Please specify a chat template variant (one of {format_variants()})')
variant = 'default'
sys.stderr.write(f'Note: picked "default" chat template variant (out of {format_variants()})\n')
elif variant not in variants:
raise Exception(f"Variant {variant} not found in chat template (found {format_variants()})")
return variants[variant]
def main(args):
if len(args) < 1:
raise ValueError("Please provide a model ID and an optional variant name")
model_id = args[0]
variant = None if len(args) < 2 else args[1]
template = get_chat_template(model_id, variant)
sys.stdout.write(template)
if __name__ == '__main__':
main(sys.argv[1:])

18
scripts/git-bisect-run.sh Executable file
View File

@@ -0,0 +1,18 @@
#!/usr/bin/env bash
cmake_args=()
llama_results_args=()
for arg in "${@}"; do
if [[ "$arg" == -D* ]]; then
cmake_args+=("$arg")
else
llama_results_args+=("$arg")
fi
done
dir="build-bisect"
rm -rf ${dir} > /dev/null
cmake -B ${dir} -S . ${cmake_args} > /dev/null
cmake --build ${dir} -t llama-results -j $(nproc) > /dev/null
${dir}/bin/llama-results "${llama_results_args[@]}"

19
scripts/git-bisect.sh Executable file
View File

@@ -0,0 +1,19 @@
#!/usr/bin/env bash
if [ $# -lt 2 ]; then
echo "usage: ./scripts/git-bisect.sh <commit_bad> <commit_good> [additional arguments]"
echo " additional arguments: passed to CMake if they start with \"-D\", to llama-results otherwise"
exit 1
fi
set -e
set -x
commit_bad=$1
commit_good=$2
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
git checkout ${commit_good}
${script_dir}/git-bisect-run.sh --output results.gguf "${@:3}"
git bisect start ${commit_bad} ${commit_good}
git bisect run ${script_dir}/git-bisect-run.sh --output results.gguf --check "${@:3}"
git bisect reset

112
scripts/hf.sh Executable file
View File

@@ -0,0 +1,112 @@
#!/usr/bin/env bash
#
# Shortcut for downloading HF models
#
# Usage:
# ./llama-cli -m $(./scripts/hf.sh https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF/resolve/main/mixtral-8x7b-v0.1.Q4_K_M.gguf)
# ./llama-cli -m $(./scripts/hf.sh --url https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF/blob/main/mixtral-8x7b-v0.1.Q4_K_M.gguf)
# ./llama-cli -m $(./scripts/hf.sh --repo TheBloke/Mixtral-8x7B-v0.1-GGUF --file mixtral-8x7b-v0.1.Q4_K_M.gguf)
#
# all logs go to stderr
function log {
echo "$@" 1>&2
}
function usage {
log "Usage: $0 [[--url] <url>] [--repo <repo>] [--file <file>] [--outdir <dir> [-h|--help]"
exit 1
}
# check for curl or wget
function has_cmd {
if ! [ -x "$(command -v $1)" ]; then
return 1
fi
}
if has_cmd wget; then
cmd="wget -q -c -O %s/%s %s"
elif has_cmd curl; then
cmd="curl -C - -f --output-dir %s -o %s -L %s"
else
log "[E] curl or wget not found"
exit 1
fi
url=""
repo=""
file=""
outdir="."
# parse args
while [[ $# -gt 0 ]]; do
case "$1" in
--url)
url="$2"
shift 2
;;
--repo)
repo="$2"
shift 2
;;
--file)
file="$2"
shift 2
;;
--outdir)
outdir="$2"
shift 2
;;
-h|--help)
usage
;;
*)
url="$1"
shift
;;
esac
done
if [ -n "$repo" ] && [ -n "$file" ]; then
url="https://huggingface.co/$repo/resolve/main/$file"
fi
if [ -z "$url" ]; then
log "[E] missing --url"
usage
fi
# check if the URL is a HuggingFace model, and if so, try to download it
is_url=false
if [[ ${#url} -gt 22 ]]; then
if [[ ${url:0:22} == "https://huggingface.co" ]]; then
is_url=true
fi
fi
if [ "$is_url" = false ]; then
log "[E] invalid URL, must start with https://huggingface.co"
exit 0
fi
# replace "blob/main" with "resolve/main"
url=${url/blob\/main/resolve\/main}
basename=$(basename $url)
log "[+] attempting to download $basename"
if [ -n "$cmd" ]; then
cmd=$(printf "$cmd" "$outdir" "$basename" "$url")
log "[+] $cmd"
if $cmd; then
echo $outdir/$basename
exit 0
fi
fi
log "[-] failed to download"
exit 1

View File

@@ -0,0 +1,178 @@
#!/usr/bin/env python3
import sys
from collections import defaultdict
import re
def parse_log_file(filepath):
functions = defaultdict(lambda: {'vgprs': 0, 'spill': 0, 'location': ''})
func_stack = []
try:
with open(filepath, 'r') as f:
for line in f:
# Match function name lines
func_match = re.search(r'remark: ([^:]+):(\d+):\d+: Function Name: (\S+)', line)
if func_match:
location = func_match.group(1) + ':' + func_match.group(2)
func_name = func_match.group(3)
# Extract just the filename and line number
parts = location.split('/')
short_location = parts[-1] if len(parts) > 0 else location
functions[func_name]['location'] = short_location
# Push function onto stack with its location
func_stack.append({'name': func_name, 'location': location})
continue
# Match VGPR usage lines (only if we have functions in stack)
vgpr_match = re.search(r'remark: ([^:]+):(\d+):\d+:\s+VGPRs: (\d+)', line)
if vgpr_match:
location = vgpr_match.group(1) + ':' + vgpr_match.group(2)
# Find the most recent function with matching location
for i in range(len(func_stack) - 1, -1, -1):
if func_stack[i]['location'] == location:
functions[func_stack[i]['name']]['vgprs'] = int(vgpr_match.group(3))
break
continue
spill_match = re.search(r'remark: ([^:]+):(\d+):\d+:\s+VGPRs Spill: (\d+)', line)
if spill_match:
location = spill_match.group(1) + ':' + spill_match.group(2)
# Find the most recent function with matching location
for i in range(len(func_stack) - 1, -1, -1):
if func_stack[i]['location'] == location:
functions[func_stack[i]['name']]['spill'] = int(spill_match.group(3))
break
continue
except FileNotFoundError:
print(f"Error: File {filepath} not found", file=sys.stderr) # noqa: NP100
sys.exit(1)
return functions
def main():
if len(sys.argv) < 2:
print("Usage: ./vgpr_check.py <log_file>", file=sys.stderr) # noqa: NP100
sys.exit(1)
log_file = sys.argv[1]
ignored = {
'_ZL21gated_linear_attn_f32ILi128EEviiiifPKfS1_S1_S1_S1_Pf',
'_ZL18flash_attn_ext_f16ILi64ELi64ELi16ELi2ELb0ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi80ELi80ELi16ELi2ELb0ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi96ELi96ELi16ELi2ELb0ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi64ELi64ELi32ELi1ELb0ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL13rwkv_wkv7_f32ILi128EEviiiiPKfS1_S1_S1_S1_S1_S1_Pf',
'_ZL18flash_attn_ext_f16ILi80ELi80ELi16ELi1ELb0ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi112ELi112ELi16ELi2ELb0ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi80ELi80ELi32ELi1ELb0ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi96ELi96ELi16ELi1ELb0ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi128ELi128ELi16ELi2ELb0ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi128ELi128ELi16ELi2ELb1ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi96ELi96ELi32ELi1ELb0ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi112ELi112ELi16ELi1ELb0ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi112ELi112ELi32ELi1ELb0ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi128ELi128ELi16ELi1ELb0ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi128ELi128ELi16ELi1ELb1ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi80ELi80ELi2ELi8ELb0ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi96ELi96ELi2ELi8ELb0ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi112ELi112ELi2ELi8ELb0ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi128ELi128ELi2ELi8ELb0ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi128ELi128ELi2ELi8ELb1ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi112ELi112ELi16ELi4ELb0ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi128ELi128ELi16ELi4ELb0ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi128ELi128ELi16ELi4ELb1ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi128ELi128ELi32ELi2ELb0ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi96ELi96ELi4ELi4ELb0ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi112ELi112ELi4ELi4ELb0ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi128ELi128ELi4ELi4ELb0ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi128ELi128ELi4ELi4ELb1ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi80ELi80ELi4ELi8ELb0ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi128ELi128ELi4ELi8ELb0ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi96ELi96ELi64ELi1ELb0ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi112ELi112ELi64ELi1ELb0ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi128ELi128ELi64ELi1ELb0ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi128ELi128ELi64ELi1ELb1ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi64ELi64ELi8ELi4ELb0ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi80ELi80ELi8ELi4ELb0ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi96ELi96ELi8ELi4ELb0ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi112ELi112ELi8ELi4ELb0ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi80ELi80ELi8ELi2ELb0ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi128ELi128ELi8ELi4ELb0ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi128ELi128ELi8ELi4ELb1ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi96ELi96ELi8ELi2ELb0ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi112ELi112ELi8ELi2ELb0ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi128ELi128ELi8ELi2ELb0ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi128ELi128ELi8ELi2ELb1ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi112ELi112ELi8ELi8ELb0ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi128ELi128ELi8ELi8ELb0ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi128ELi128ELi8ELi8ELb1ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL24mul_mat_q_stream_k_fixupIL9ggml_type22ELi8ELb1EEvPKiS2_PfPKfiiimimimi',
'_ZL9mul_mat_qIL9ggml_type3ELi32ELb0EEvPKcPKiS4_S4_PfS5_iiiiiiiiiiiiiiiii',
'_ZL9mul_mat_qIL9ggml_type3ELi48ELb0EEvPKcPKiS4_S4_PfS5_iiiiiiiiiiiiiiiii',
'_ZL9mul_mat_qIL9ggml_type20ELi32ELb1EEvPKcPKiS4_S4_PfS5_iiiiiiiiiiiiiiiii',
'_ZL9mul_mat_qIL9ggml_type17ELi64ELb0EEvPKcPKiS4_S4_PfS5_iiiiiiiiiiiiiiiii',
'_ZL18flash_attn_ext_f16ILi80ELi80ELi4ELi4ELb0ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL15flash_attn_tileILi256ELi256ELi32ELi1ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL9mul_mat_qIL9ggml_type19ELi112ELb1EEvPKcPKiS4_S4_PfS5_iiiiiiiiiiiiiiiii',
'_ZL9mul_mat_qIL9ggml_type17ELi112ELb1EEvPKcPKiS4_S4_PfS5_iiiiiiiiiiiiiiiii',
'_ZL9mul_mat_qIL9ggml_type22ELi112ELb1EEvPKcPKiS4_S4_PfS5_iiiiiiiiiiiiiiiii',
'_ZL9mul_mat_qIL9ggml_type19ELi128ELb0EEvPKcPKiS4_S4_PfS5_iiiiiiiiiiiiiiiii',
'_ZL9mul_mat_qIL9ggml_type19ELi128ELb1EEvPKcPKiS4_S4_PfS5_iiiiiiiiiiiiiiiii',
'_ZL9mul_mat_qIL9ggml_type7ELi112ELb1EEvPKcPKiS4_S4_PfS5_iiiiiiiiiiiiiiiii',
'_ZL9mul_mat_qIL9ggml_type3ELi128ELb0EEvPKcPKiS4_S4_PfS5_iiiiiiiiiiiiiiiii',
'_ZL9mul_mat_qIL9ggml_type3ELi128ELb1EEvPKcPKiS4_S4_PfS5_iiiiiiiiiiiiiiiii',
'_ZL9mul_mat_qIL9ggml_type7ELi128ELb0EEvPKcPKiS4_S4_PfS5_iiiiiiiiiiiiiiiii',
'_ZL9mul_mat_qIL9ggml_type7ELi128ELb1EEvPKcPKiS4_S4_PfS5_iiiiiiiiiiiiiiiii',
'_ZL9mul_mat_qIL9ggml_type11ELi112ELb0EEvPKcPKiS4_S4_PfS5_iiiiiiiiiiiiiiiii',
'_ZL9mul_mat_qIL9ggml_type11ELi112ELb1EEvPKcPKiS4_S4_PfS5_iiiiiiiiiiiiiiiii',
'_ZL24mul_mat_q_stream_k_fixupIL9ggml_type11ELi128ELb0EEvPKiS2_PfPKfiiimimimi',
'_ZL18flash_attn_ext_f16ILi128ELi128ELi32ELi1ELb0ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL9mul_mat_qIL9ggml_type2ELi112ELb0EEvPKcPKiS4_S4_PfS5_iiiiiiiiiiiiiiiii',
'_ZL18flash_attn_ext_f16ILi112ELi112ELi32ELi2ELb0ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi112ELi112ELi4ELi8ELb0ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi128ELi128ELi32ELi1ELb1ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi128ELi128ELi32ELi2ELb1ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi128ELi128ELi4ELi8ELb1ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_f16ILi96ELi96ELi4ELi8ELb0ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL18flash_attn_ext_vecILi128ELi2EL9ggml_type2ELS0_2ELb0EEvPKcS2_S2_S2_S2_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS6_IjLj3EEiiiiiiiiiiiliiliiiiil',
'_ZL9mul_mat_qIL9ggml_type10ELi16ELb1EEvPKcPKiS4_S4_PfS5_iiiiiiiiiiiiiiiii',
'_ZL9mul_mat_qIL9ggml_type12ELi128ELb1EEvPKcPKiS4_S4_PfS5_iiiiiiiiiiiiiiiii',
'_ZL9mul_mat_qIL9ggml_type40ELi112ELb0EEvPKcPKiS4_S4_PfS5_iiiiiiiiiiiiiiiii',
'_ZL9mul_mat_qIL9ggml_type40ELi112ELb1EEvPKcPKiS4_S4_PfS5_iiiiiiiiiiiiiiiii',
'_ZL9mul_mat_qIL9ggml_type40ELi128ELb0EEvPKcPKiS4_S4_PfS5_iiiiiiiiiiiiiiiii',
'_ZL9mul_mat_qIL9ggml_type40ELi128ELb1EEvPKcPKiS4_S4_PfS5_iiiiiiiiiiiiiiiii'
}
functions = parse_log_file(log_file)
found_issues = False
# First print all ignored functions (deduplicated)
printed_ignored = set()
for func_name, data in sorted(functions.items()):
total_vgprs = int(data['vgprs']) + int(data['spill'])
if total_vgprs > 256 and func_name in ignored and func_name not in printed_ignored:
location = data.get('location', log_file)
print(f"{location}: {func_name} - Total VGPRs: {total_vgprs} ({data['vgprs']} + {data['spill']}) [IGNORED]") # noqa: NP100
printed_ignored.add(func_name)
# Then print new functions with issues in red
for func_name, data in sorted(functions.items()):
total_vgprs = int(data['vgprs']) + int(data['spill'])
if total_vgprs > 256 and func_name not in ignored:
status = "[IGNORED]" if func_name in ignored else ""
location = data.get('location', log_file)
# Print in red if not ignored
color_code = "\033[91m" if func_name not in ignored else ""
reset_code = "\033[0m" if func_name not in ignored else ""
print(f"{color_code}{location}: {func_name} - Total VGPRs: {total_vgprs} ({data['vgprs']} + {data['spill']}) {status}{reset_code}") # noqa: NP100
if func_name not in ignored:
found_issues = True
sys.exit(1 if found_issues else 0)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,19 @@
:: MIT license
:: Copyright (C) 2024 Intel Corporation
:: SPDX-License-Identifier: MIT
set URL=%1
set COMPONENTS=%2
curl.exe --output %TEMP%\webimage.exe --url %URL% --retry 5 --retry-delay 5
start /b /wait %TEMP%\webimage.exe -s -x -f webimage_extracted --log extract.log
del %TEMP%\webimage.exe
if "%COMPONENTS%"=="" (
webimage_extracted\bootstrapper.exe -s --action install --eula=accept -p=NEED_VS2017_INTEGRATION=0 -p=NEED_VS2019_INTEGRATION=0 -p=NEED_VS2022_INTEGRATION=0 --log-dir=.
) else (
webimage_extracted\bootstrapper.exe -s --action install --components=%COMPONENTS% --eula=accept -p=NEED_VS2017_INTEGRATION=0 -p=NEED_VS2019_INTEGRATION=0 -p=NEED_VS2022_INTEGRATION=0 --log-dir=.
)
set installer_exit_code=%ERRORLEVEL%
rd /s/q "webimage_extracted"
exit /b %installer_exit_code%

505
scripts/jinja/jinja-tester.py Executable file
View File

@@ -0,0 +1,505 @@
#!/usr/bin/env python3
import sys
import json
import argparse
import jinja2.ext as jinja2_ext
from PySide6.QtWidgets import (
QApplication,
QMainWindow,
QWidget,
QVBoxLayout,
QHBoxLayout,
QLabel,
QPlainTextEdit,
QTextEdit,
QPushButton,
QFileDialog,
)
from PySide6.QtGui import QColor, QColorConstants, QTextCursor, QTextFormat
from PySide6.QtCore import Qt, QRect, QSize
from jinja2 import TemplateSyntaxError
from jinja2.sandbox import ImmutableSandboxedEnvironment
from datetime import datetime
from typing import Callable
def format_template_content(template_content):
"""Format the Jinja template content using Jinja2's lexer."""
if not template_content.strip():
return template_content
env = ImmutableSandboxedEnvironment()
tc_rstrip = template_content.rstrip()
tokens = list(env.lex(tc_rstrip))
result = ""
indent_level = 0
i = 0
while i < len(tokens):
token = tokens[i]
_, token_type, token_value = token
if token_type == "block_begin":
block_start = i
# Collect all tokens for this block construct
construct_content = token_value
end_token_type = token_type.replace("_begin", "_end")
j = i + 1
while j < len(tokens) and tokens[j][1] != end_token_type:
construct_content += tokens[j][2]
j += 1
if j < len(tokens): # Found the end token
construct_content += tokens[j][2]
i = j # Skip to the end token
# Check for control structure keywords for indentation
stripped_content = construct_content.strip()
instr = block_start + 1
while tokens[instr][1] == "whitespace":
instr = instr + 1
instruction_token = tokens[instr][2]
start_control_tokens = ["if", "for", "macro", "call", "block"]
end_control_tokens = ["end" + t for t in start_control_tokens]
is_control_start = any(
instruction_token.startswith(kw) for kw in start_control_tokens
)
is_control_end = any(
instruction_token.startswith(kw) for kw in end_control_tokens
)
# Adjust indentation for control structures
# For control end blocks, decrease indent BEFORE adding the content
if is_control_end:
indent_level = max(0, indent_level - 1)
# Remove all previous whitespace before this block
result = result.rstrip()
# Add proper indent, but only if this is not the first token
added_newline = False
if result: # Only add newline and indent if there's already content
result += (
"\n" + " " * indent_level
) # Use 2 spaces per indent level
added_newline = True
else: # For the first token, don't add any indent
result += ""
# Add the block content
result += stripped_content
# Add '-' after '%' if it wasn't there and we added a newline or indent
if (
added_newline
and stripped_content.startswith("{%")
and not stripped_content.startswith("{%-")
):
# Add '-' at the beginning
result = (
result[: result.rfind("{%")]
+ "{%-"
+ result[result.rfind("{%") + 2 :]
)
if stripped_content.endswith("%}") and not stripped_content.endswith(
"-%}"
):
# Only add '-' if this is not the last token or if there's content after
if i + 1 < len(tokens) and tokens[i + 1][1] != "eof":
result = result[:-2] + "-%}"
# For control start blocks, increase indent AFTER adding the content
if is_control_start:
indent_level += 1
else:
# Malformed template, just add the token
result += token_value
elif token_type == "variable_begin":
# Collect all tokens for this variable construct
construct_content = token_value
end_token_type = token_type.replace("_begin", "_end")
j = i + 1
while j < len(tokens) and tokens[j][1] != end_token_type:
construct_content += tokens[j][2]
j += 1
if j < len(tokens): # Found the end token
construct_content += tokens[j][2]
i = j # Skip to the end token
# For variable constructs, leave them alone
# Do not add indent or whitespace before or after them
result += construct_content
else:
# Malformed template, just add the token
result += token_value
elif token_type == "data":
# Handle data (text between Jinja constructs)
# For data content, preserve it as is
result += token_value
else:
# Handle any other tokens
result += token_value
i += 1
# Clean up trailing newlines and spaces
result = result.rstrip()
# Copy the newline / space count from the original
if (trailing_length := len(template_content) - len(tc_rstrip)):
result += template_content[-trailing_length:]
return result
# ------------------------
# Line Number Widget
# ------------------------
class LineNumberArea(QWidget):
def __init__(self, editor):
super().__init__(editor)
self.code_editor = editor
def sizeHint(self):
return QSize(self.code_editor.line_number_area_width(), 0)
def paintEvent(self, event):
self.code_editor.line_number_area_paint_event(event)
class CodeEditor(QPlainTextEdit):
def __init__(self):
super().__init__()
self.line_number_area = LineNumberArea(self)
self.blockCountChanged.connect(self.update_line_number_area_width)
self.updateRequest.connect(self.update_line_number_area)
self.cursorPositionChanged.connect(self.highlight_current_line)
self.update_line_number_area_width(0)
self.highlight_current_line()
def line_number_area_width(self):
digits = len(str(self.blockCount()))
space = 3 + self.fontMetrics().horizontalAdvance("9") * digits
return space
def update_line_number_area_width(self, _):
self.setViewportMargins(self.line_number_area_width(), 0, 0, 0)
def update_line_number_area(self, rect, dy):
if dy:
self.line_number_area.scroll(0, dy)
else:
self.line_number_area.update(
0, rect.y(), self.line_number_area.width(), rect.height()
)
if rect.contains(self.viewport().rect()):
self.update_line_number_area_width(0)
def resizeEvent(self, event):
super().resizeEvent(event)
cr = self.contentsRect()
self.line_number_area.setGeometry(
QRect(cr.left(), cr.top(), self.line_number_area_width(), cr.height())
)
def line_number_area_paint_event(self, event):
from PySide6.QtGui import QPainter
painter = QPainter(self.line_number_area)
painter.fillRect(event.rect(), QColorConstants.LightGray)
block = self.firstVisibleBlock()
block_number = block.blockNumber()
top = int(
self.blockBoundingGeometry(block).translated(self.contentOffset()).top()
)
bottom = top + int(self.blockBoundingRect(block).height())
while block.isValid() and top <= event.rect().bottom():
if block.isVisible() and bottom >= event.rect().top():
number = str(block_number + 1)
painter.setPen(QColorConstants.Black)
painter.drawText(
0,
top,
self.line_number_area.width() - 2,
self.fontMetrics().height(),
Qt.AlignmentFlag.AlignRight,
number,
)
block = block.next()
top = bottom
bottom = top + int(self.blockBoundingRect(block).height())
block_number += 1
def highlight_current_line(self):
extra_selections = []
if not self.isReadOnly():
selection = QTextEdit.ExtraSelection()
line_color = QColorConstants.Yellow.lighter(160)
selection.format.setBackground(line_color) # pyright: ignore[reportAttributeAccessIssue] # ty: ignore[unresolved-attribute]
selection.format.setProperty(QTextFormat.Property.FullWidthSelection, True) # pyright: ignore[reportAttributeAccessIssue] # ty: ignore[unresolved-attribute]
selection.cursor = self.textCursor() # pyright: ignore[reportAttributeAccessIssue] # ty: ignore[unresolved-attribute]
selection.cursor.clearSelection() # pyright: ignore[reportAttributeAccessIssue] # ty: ignore[unresolved-attribute]
extra_selections.append(selection)
self.setExtraSelections(extra_selections)
def highlight_position(self, lineno: int, col: int, color: QColor):
block = self.document().findBlockByLineNumber(lineno - 1)
if block.isValid():
cursor = QTextCursor(block)
text = block.text()
start = block.position() + max(0, col - 1)
cursor.setPosition(start)
if col <= len(text):
cursor.movePosition(
QTextCursor.MoveOperation.NextCharacter,
QTextCursor.MoveMode.KeepAnchor,
)
extra = QTextEdit.ExtraSelection()
extra.format.setBackground(color.lighter(160)) # pyright: ignore[reportAttributeAccessIssue] # ty: ignore[unresolved-attribute]
extra.cursor = cursor # pyright: ignore[reportAttributeAccessIssue] # ty: ignore[unresolved-attribute]
self.setExtraSelections(self.extraSelections() + [extra])
def highlight_line(self, lineno: int, color: QColor):
block = self.document().findBlockByLineNumber(lineno - 1)
if block.isValid():
cursor = QTextCursor(block)
cursor.select(QTextCursor.SelectionType.LineUnderCursor)
extra = QTextEdit.ExtraSelection()
extra.format.setBackground(color.lighter(160)) # pyright: ignore[reportAttributeAccessIssue] # ty: ignore[unresolved-attribute]
extra.cursor = cursor # pyright: ignore[reportAttributeAccessIssue] # ty: ignore[unresolved-attribute]
self.setExtraSelections(self.extraSelections() + [extra])
def clear_highlighting(self):
self.highlight_current_line()
# ------------------------
# Main App
# ------------------------
class JinjaTester(QMainWindow):
def __init__(self):
super().__init__()
self.setWindowTitle("Jinja Template Tester")
self.resize(1200, 800)
central = QWidget()
main_layout = QVBoxLayout(central)
# -------- Top input area --------
input_layout = QHBoxLayout()
# Template editor with label
template_layout = QVBoxLayout()
template_label = QLabel("Jinja2 Template")
template_layout.addWidget(template_label)
self.template_edit = CodeEditor()
template_layout.addWidget(self.template_edit)
input_layout.addLayout(template_layout)
# JSON editor with label
json_layout = QVBoxLayout()
json_label = QLabel("Context (JSON)")
json_layout.addWidget(json_label)
self.json_edit = CodeEditor()
self.json_edit.setPlainText("""
{
"add_generation_prompt": true,
"bos_token": "",
"eos_token": "",
"messages": [
{
"role": "user",
"content": "What is the capital of Poland?"
}
]
}
""".strip())
json_layout.addWidget(self.json_edit)
input_layout.addLayout(json_layout)
main_layout.addLayout(input_layout)
# -------- Rendered output area --------
output_label = QLabel("Rendered Output")
main_layout.addWidget(output_label)
self.output_edit = QPlainTextEdit()
self.output_edit.setReadOnly(True)
main_layout.addWidget(self.output_edit)
# -------- Render button and status --------
btn_layout = QHBoxLayout()
# Load template button
self.load_btn = QPushButton("Load Template")
self.load_btn.clicked.connect(self.load_template)
btn_layout.addWidget(self.load_btn)
# Format template button
self.format_btn = QPushButton("Format")
self.format_btn.clicked.connect(self.format_template)
btn_layout.addWidget(self.format_btn)
self.render_btn = QPushButton("Render")
self.render_btn.clicked.connect(self.render_template)
btn_layout.addWidget(self.render_btn)
main_layout.addLayout(btn_layout)
# Status label below buttons
self.status_label = QLabel("Ready")
main_layout.addWidget(self.status_label)
self.setCentralWidget(central)
def render_template(self):
self.template_edit.clear_highlighting()
self.output_edit.clear()
template_str = self.template_edit.toPlainText()
json_str = self.json_edit.toPlainText()
# Parse JSON context
try:
context = json.loads(json_str) if json_str.strip() else {}
except Exception as e:
self.status_label.setText(f"❌ JSON Error: {e}")
return
def raise_exception(text: str) -> str:
raise RuntimeError(text)
env = ImmutableSandboxedEnvironment(
trim_blocks=True,
lstrip_blocks=True,
extensions=[jinja2_ext.loopcontrols],
)
env.filters["tojson"] = (
lambda x,
indent=None,
separators=None,
sort_keys=False,
ensure_ascii=False: json.dumps(
x,
indent=indent,
separators=separators,
sort_keys=sort_keys,
ensure_ascii=ensure_ascii,
)
)
env.globals["strftime_now"]: Callable[[str], str] = lambda format: datetime.now().strftime(format)
env.globals["raise_exception"] = raise_exception # ty: ignore[invalid-assignment]
try:
template = env.from_string(template_str)
output = template.render(context)
self.output_edit.setPlainText(output)
self.status_label.setText("✅ Render successful")
except TemplateSyntaxError as e:
self.status_label.setText(f"❌ Syntax Error (line {e.lineno}): {e.message}")
if e.lineno:
self.template_edit.highlight_line(e.lineno, QColor("red"))
except Exception as e:
# Catch all runtime errors
# Try to extract template line number
lineno = None
tb = e.__traceback__
while tb:
frame = tb.tb_frame
if frame.f_code.co_filename == "<template>":
lineno = tb.tb_lineno
break
tb = tb.tb_next
error_msg = f"Runtime Error: {type(e).__name__}: {e}"
if lineno:
error_msg = f"Runtime Error at line {lineno} in template: {type(e).__name__}: {e}"
self.template_edit.highlight_line(lineno, QColor("orange"))
self.output_edit.setPlainText(error_msg)
self.status_label.setText(f"{error_msg}")
def load_template(self):
"""Load a Jinja template from a file using a file dialog."""
file_path, _ = QFileDialog.getOpenFileName(
self,
"Load Jinja Template",
"",
"Template Files (*.jinja *.j2 *.html *.txt);;All Files (*)",
)
if file_path:
try:
with open(file_path, "r", encoding="utf-8") as file:
content = file.read()
self.template_edit.setPlainText(content)
self.status_label.setText(f"✅ Loaded template from {file_path}")
except Exception as e:
self.status_label.setText(f"❌ Error loading file: {str(e)}")
def format_template(self):
"""Format the Jinja template using Jinja2's lexer for proper parsing."""
try:
template_content = self.template_edit.toPlainText()
if not template_content.strip():
self.status_label.setText("⚠️ Template is empty")
return
formatted_content = format_template_content(template_content)
self.template_edit.setPlainText(formatted_content)
self.status_label.setText("✅ Template formatted")
except Exception as e:
self.status_label.setText(f"❌ Error formatting template: {str(e)}")
if __name__ == "__main__":
if len(sys.argv) > 1:
# CLI mode
parser = argparse.ArgumentParser(description="Jinja Template Tester")
parser.add_argument(
"--template", required=True, help="Path to Jinja template file"
)
parser.add_argument("--context", required=True, help="JSON string for context")
parser.add_argument(
"--action",
choices=["format", "render"],
default="render",
help="Action to perform",
)
args = parser.parse_args()
# Load template
with open(args.template, "r", encoding="utf-8") as f:
template_content = f.read()
# Load JSON
context = json.loads(args.context)
# Add missing variables
context.setdefault("bos_token", "")
context.setdefault("eos_token", "")
context.setdefault("add_generation_prompt", False)
env = ImmutableSandboxedEnvironment()
if args.action == "format":
formatted = format_template_content(template_content)
print(formatted) # noqa: NP100
elif args.action == "render":
template = env.from_string(template_content)
output = template.render(context)
print(output) # noqa: NP100
else:
# GUI mode
app = QApplication(sys.argv)
window = JinjaTester()
window.show()
sys.exit(app.exec())

View File

@@ -0,0 +1,2 @@
PySide6
jinja2

93
scripts/pr2wt.sh Executable file
View File

@@ -0,0 +1,93 @@
#!/usr/bin/env bash
# initialize a new worktree from a PR number:
#
# - creates a new remote using the fork's clone URL
# - creates a local branch tracking the remote branch
# - creates a new worktree in a parent folder, suffixed with "-pr-$PR"
#
# sample usage:
# ./scripts/pr2wt.sh 12345
# ./scripts/pr2wt.sh 12345 opencode
# ./scripts/pr2wt.sh 12345 "cmake -B build && cmake --build build"
# ./scripts/pr2wt.sh 12345 "bash -l"
function usage() {
echo "usage: $0 <pr_number> [cmd]"
exit 1
}
# check we are in the right directory
if [[ ! -f "scripts/pr2wt.sh" ]]; then
echo "error: this script must be run from the root of the repository"
exit 1
fi
if [[ $# -lt 1 || $# -gt 2 ]]; then
usage
fi
PR=$1
[[ "$PR" =~ ^[0-9]+$ ]] || { echo "error: PR number must be numeric"; exit 1; }
url_origin=$(git config --get remote.upstream.url 2>/dev/null) || \
url_origin=$(git config --get remote.origin.url) || {
echo "error: no remote named 'upstream' or 'origin' in this repository"
exit 1
}
# Extract org/repo from either https or ssh format.
if [[ $url_origin =~ ^git@ ]]; then
org_repo=$(echo $url_origin | cut -d: -f2)
else
org_repo=$(echo $url_origin | cut -d/ -f4-)
fi
org_repo=${org_repo%.git}
echo "org/repo: $org_repo"
meta=$(curl -sSLf -H "Accept: application/vnd.github+json" "https://api.github.com/repos/$org_repo/pulls/$PR")
url_remote=$(echo "$meta" | jq -r '.head.repo.clone_url')
head_ref=$(echo "$meta" | jq -r '.head.ref')
echo "url: $url_remote"
echo "head_ref: $head_ref"
url_remote_cur=$(git config --get "remote.pr/$PR.url" 2>/dev/null || true)
if [[ "$url_remote_cur" != "$url_remote" ]]; then
git remote rm pr/$PR 2> /dev/null
git remote add pr/$PR "$url_remote"
fi
git fetch "pr/$PR" "$head_ref"
dir=$(basename $(pwd))
git branch -D pr/$PR 2> /dev/null
git worktree add -b pr/$PR ../$dir-pr-$PR pr/$PR/$head_ref 2> /dev/null
og_path=$(pwd)
wt_path=$(cd ../$dir-pr-$PR && pwd)
echo "git worktree created in $wt_path"
cd $wt_path
# pi agent setup in the worktree
if [[ -f "$og_path/.pi/SYSTEM.md" && ! -f ".pi/SYSTEM.md" ]]; then
mkdir -p .pi
ln -sfn "$og_path/.pi/SYSTEM.md" .pi/SYSTEM.md
fi
git branch --set-upstream-to=pr/$PR/$head_ref
git pull --ff-only || {
echo "error: failed to pull pr/$PR"
exit 1
}
if [[ $# -eq 2 ]]; then
echo "executing: $2"
eval "$2"
fi

110
scripts/serve-static.js Normal file
View File

@@ -0,0 +1,110 @@
const http = require('http');
const fs = require('fs').promises;
const path = require('path');
// This file is used for testing wasm build from emscripten
// Example build command:
// emcmake cmake -B build-wasm -DGGML_WEBGPU=ON -DLLAMA_OPENSSL=OFF
// cmake --build build-wasm --target test-backend-ops -j
const PORT = 8080;
const STATIC_DIR = path.join(__dirname, '../build-wasm/bin');
console.log(`Serving static files from: ${STATIC_DIR}`);
const mimeTypes = {
'.html': 'text/html',
'.js': 'text/javascript',
'.css': 'text/css',
'.png': 'image/png',
'.jpg': 'image/jpeg',
'.gif': 'image/gif',
'.svg': 'image/svg+xml',
'.json': 'application/json',
'.woff': 'font/woff',
'.woff2': 'font/woff2',
};
async function generateDirListing(dirPath, reqUrl) {
const files = await fs.readdir(dirPath);
let html = `
<!DOCTYPE html>
<html>
<head>
<title>Directory Listing</title>
<style>
body { font-family: Arial, sans-serif; padding: 20px; }
ul { list-style: none; padding: 0; }
li { margin: 5px 0; }
a { text-decoration: none; color: #0066cc; }
a:hover { text-decoration: underline; }
</style>
</head>
<body>
<h1>Directory: ${reqUrl}</h1>
<ul>
`;
if (reqUrl !== '/') {
html += `<li><a href="../">../ (Parent Directory)</a></li>`;
}
for (const file of files) {
const filePath = path.join(dirPath, file);
const stats = await fs.stat(filePath);
const link = encodeURIComponent(file) + (stats.isDirectory() ? '/' : '');
html += `<li><a href="${link}">${file}${stats.isDirectory() ? '/' : ''}</a></li>`;
}
html += `
</ul>
</body>
</html>
`;
return html;
}
const server = http.createServer(async (req, res) => {
try {
// Set COOP and COEP headers
res.setHeader('Cross-Origin-Opener-Policy', 'same-origin');
res.setHeader('Cross-Origin-Embedder-Policy', 'require-corp');
res.setHeader('Cache-Control', 'no-store, no-cache, must-revalidate, proxy-revalidate');
res.setHeader('Pragma', 'no-cache');
res.setHeader('Expires', '0');
const filePath = path.join(STATIC_DIR, decodeURIComponent(req.url));
const stats = await fs.stat(filePath);
if (stats.isDirectory()) {
const indexPath = path.join(filePath, 'index.html');
try {
const indexData = await fs.readFile(indexPath);
res.writeHeader(200, { 'Content-Type': 'text/html' });
res.end(indexData);
} catch {
// No index.html, generate directory listing
const dirListing = await generateDirListing(filePath, req.url);
res.writeHeader(200, { 'Content-Type': 'text/html' });
res.end(dirListing);
}
} else {
const ext = path.extname(filePath).toLowerCase();
const contentType = mimeTypes[ext] || 'application/octet-stream';
const data = await fs.readFile(filePath);
res.writeHeader(200, { 'Content-Type': contentType });
res.end(data);
}
} catch (err) {
if (err.code === 'ENOENT') {
res.writeHeader(404, { 'Content-Type': 'text/plain' });
res.end('404 Not Found');
} else {
res.writeHeader(500, { 'Content-Type': 'text/plain' });
res.end('500 Internal Server Error');
}
}
});
server.listen(PORT, () => {
console.log(`Server running at http://localhost:${PORT}/`);
});

298
scripts/server-bench.py Executable file
View File

@@ -0,0 +1,298 @@
#!/usr/bin/env python3
import argparse
import json
import os
import random
import sqlite3
import subprocess
from time import sleep, time
from typing import Optional, Union
import datasets
import logging
import matplotlib.pyplot as plt
import numpy as np
import requests
from tqdm.contrib.concurrent import thread_map
logging.basicConfig(level=logging.INFO, format='%(message)s')
logger = logging.getLogger("server-bench")
def get_prompts_text(dataset_name: str, n_prompts: int) -> Optional[list[str]]:
ret = []
if dataset_name.lower() == "mmlu":
logger.info("Loading MMLU dataset...")
ret = datasets.load_dataset("cais/mmlu", "all")["test"]["question"] # type: ignore
else:
return None
if n_prompts >= 0:
ret = ret[:n_prompts]
return ret
def get_prompt_lengths_rng(n_prompts: int, prompt_length_min: int, prompt_length_max: int, seed_offset: int) -> list[int]:
assert n_prompts >= 0
ret: list[int] = []
for i in range(n_prompts):
if seed_offset >= 0:
random.seed(3 * (seed_offset + 1000 * i) + 0)
ret.append(random.randint(prompt_length_min, prompt_length_max))
return ret
def get_prompts_rng(prompt_lengths: list[int]) -> list[list[int]]:
return [[random.randint(100, 10000) for _ in range(pl)] for pl in prompt_lengths]
def get_server(path_server: str, path_log: Optional[str]) -> dict:
if path_server.startswith("http://") or path_server.startswith("https://"):
return {"process": None, "address": path_server, "fout": None}
if os.environ.get("LLAMA_ARG_HOST") is None:
logger.info("LLAMA_ARG_HOST not explicitly set, using 127.0.0.1")
os.environ["LLAMA_ARG_HOST"] = "127.0.0.1"
if os.environ.get("LLAMA_ARG_PORT") is None:
logger.info("LLAMA_ARG_PORT not explicitly set, using 8080")
os.environ["LLAMA_ARG_PORT"] = "8080"
hostname: Optional[str] = os.environ.get("LLAMA_ARG_HOST")
port: Optional[str] = os.environ.get("LLAMA_ARG_PORT")
assert hostname is not None
assert port is not None
address: str = f"http://{hostname}:{port}"
logger.info(f"Starting the llama.cpp server under {address}...")
fout = open(path_log.format(port=port), "w") if path_log is not None else subprocess.DEVNULL
process = subprocess.Popen([path_server], stdout=fout, stderr=subprocess.STDOUT)
n_failures: int = 0
while True:
try:
sleep(1.0)
exit_code = process.poll()
if exit_code is not None:
raise RuntimeError(f"llama.cpp server exited unexpectedly with exit code {exit_code}{path_log and f', see {path_log.format(port=port)}' or ''}")
response = requests.get(f"{address}/health")
if response.status_code == 200:
break
except requests.ConnectionError:
n_failures += 1
if n_failures >= 10:
raise RuntimeError("llama.cpp server is not healthy after 10 seconds")
return {"process": process, "address": address, "fout": fout}
def get_prompt_length(data: dict) -> int:
session = data["session"]
server_address: str = data["server_address"]
response = session.post(
f"{server_address}/apply-template",
json={"messages": [{"role": "user", "content": data["prompt"], "stream": True}]}
)
response.raise_for_status()
prompt: str = json.loads(response.text)["prompt"]
response = session.post(
f"{server_address}/tokenize",
json={"content": prompt, "add_special": True}
)
response.raise_for_status()
tokens: list[str] = json.loads(response.text)["tokens"]
return len(tokens)
def send_prompt(data: dict) -> tuple[float, list[float]]:
session = data["session"]
server_address: str = data["server_address"]
t_submit = time()
if data["external_server"]:
json_data: dict = {
"prompt": data["prompt"], "ignore_eos": True,
"seed": data["seed"], "max_tokens": data["n_predict"], "stream": True}
response = session.post(f"{server_address}/v1/completions", json=json_data, stream=True)
elif data["synthetic_prompt"]:
json_data: dict = {
"prompt": data["prompt"], "ignore_eos": True, "cache_prompt": False,
"seed": data["seed"], "n_predict": data["n_predict"], "stream": True}
response = session.post(f"{server_address}/completion", json=json_data, stream=True)
else:
response = session.post(
f"{server_address}/apply-template",
json={"messages": [{"role": "user", "content": data["prompt"], "stream": True}]}
)
response.raise_for_status()
prompt: str = json.loads(response.text)["prompt"]
json_data: dict = {"prompt": prompt, "seed": data["seed"], "n_predict": data["n_predict"], "stream": True}
response = session.post(f"{server_address}/completion", json=json_data, stream=True)
response.raise_for_status()
lines = []
token_arrival_times: list[float] = []
for line in response.iter_lines(decode_unicode=False):
if not line.startswith(b"data: "):
continue
lines.append(line)
token_arrival_times.append(time())
token_arrival_times = token_arrival_times[:-1]
if len(lines) > 1 and "timings" in json.loads(lines[-2][6:]):
token_arrival_times = token_arrival_times[:-1]
return (t_submit, token_arrival_times)
def benchmark(
path_server: str, path_log: Optional[str], path_db: Optional[str], name: Optional[str], prompt_source: str, n_prompts: int,
n_predict: int, n_predict_min: int, seed_offset: int):
external_server: bool = path_server.startswith("http://") or path_server.startswith("https://")
if os.environ.get("LLAMA_ARG_N_PARALLEL") is None:
logger.info("LLAMA_ARG_N_PARALLEL not explicitly set, using 32")
os.environ["LLAMA_ARG_N_PARALLEL"] = "32"
parallel: int = int(os.environ.get("LLAMA_ARG_N_PARALLEL")) # type: ignore
prompts: Union[None, list[str], list[list[int]]] = get_prompts_text(prompt_source, n_prompts)
synthetic_prompts: bool = prompts is None
prompt_n = []
if synthetic_prompts:
prompt_source_split: list[str] = prompt_source.split("-")
assert len(prompt_source_split) == 3
assert prompt_source_split[0].lower() == "rng"
prompt_length_min: int = int(prompt_source_split[1])
prompt_length_max: int = int(prompt_source_split[2])
logger.info("Generating random prompts...")
prompt_n = get_prompt_lengths_rng(n_prompts, prompt_length_min, prompt_length_max, seed_offset)
prompts = get_prompts_rng(prompt_n)
else:
n_predict_min = n_predict
if not external_server and os.environ.get("LLAMA_ARG_CTX_SIZE") is None:
context_per_slot: int = int(1.05 * (n_predict + (np.max(prompt_n) if synthetic_prompts else 2048)))
context_total: int = context_per_slot * parallel
os.environ["LLAMA_ARG_CTX_SIZE"] = str(context_total)
logger.info(f"LLAMA_ARG_CTX_SIZE not explicitly set, using {context_total} ({context_per_slot} per slot).")
server: Optional[dict] = None
session = None
try:
server = get_server(path_server, path_log)
server_address: str = server["address"]
assert external_server == (server["process"] is None)
adapter = requests.adapters.HTTPAdapter(pool_connections=parallel, pool_maxsize=parallel) # type: ignore
session = requests.Session()
session.mount("http://", adapter)
session.mount("https://", adapter)
data: list[dict] = []
assert isinstance(prompts, list)
for i, p in enumerate(prompts):
if seed_offset >= 0:
random.seed(3 * (seed_offset + 1000 * i) + 1)
data.append({
"session": session, "server_address": server_address, "external_server": external_server, "prompt": p,
"synthetic_prompt": synthetic_prompts, "n_predict": random.randint(n_predict_min, n_predict),
"seed": (3 * (seed_offset + 1000 * i) + 2) if seed_offset >= 0 else -1})
if not synthetic_prompts:
logger.info("Getting the prompt lengths...")
prompt_n = [get_prompt_length(d) for d in data]
logger.info("Starting the benchmark...\n")
t0 = time()
results: list[tuple[float, list[float]]] = thread_map(send_prompt, data, max_workers=parallel, chunksize=1)
finally:
if server is not None and server["process"] is not None:
server["process"].terminate()
server["process"].wait()
if session is not None:
session.close()
prompt_t = []
token_t = []
depth_sum: int = 0
for pn, (t_submit, tat) in zip(prompt_n, results):
prompt_t.append(tat[0] - t_submit)
token_t += tat
n_tokens: int = len(tat)
depth_sum += n_tokens * pn
depth_sum += n_tokens * (n_tokens + 1) // 2
assert len(token_t) > 0
prompt_n = np.array(prompt_n, dtype=np.int64)
prompt_t = np.array(prompt_t, dtype=np.float64)
token_t = np.array(token_t, dtype=np.float64)
token_t -= t0
token_t_last = np.max(token_t)
logger.info("")
logger.info(f"Benchmark duration: {token_t_last:.2f} s")
logger.info(f"Request throughput: {n_prompts / token_t_last:.2f} requests/s = {n_prompts / (token_t_last/60):.2f} requests/min")
logger.info(f"Total prompt length: {np.sum(prompt_n)} tokens")
logger.info(f"Average prompt length: {np.mean(prompt_n):.2f} tokens")
logger.info(f"Average prompt latency: {1e3 * np.mean(prompt_t):.2f} ms")
logger.info(f"Average prompt speed: {np.sum(prompt_n) / np.sum(prompt_t):.2f} tokens/s")
logger.info(f"Total generated tokens: {token_t.shape[0]}")
logger.info(f"Average generation depth: {depth_sum / token_t.shape[0]:.2f} tokens")
logger.info(f"Average total generation speed: {token_t.shape[0] / token_t_last:.2f} tokens/s")
logger.info(f"Average generation speed per slot: {token_t.shape[0] / (parallel * token_t_last):.2f} tokens/s / slot")
if path_db is not None:
con = sqlite3.connect(path_db)
cursor = con.cursor()
cursor.execute(
"CREATE TABLE IF NOT EXISTS server_bench"
"(name TEXT, n_parallel INTEGER, prompt_source TEXT, n_prompts INTEGER, "
"n_predict INTEGER, n_predict_min INTEGER, seed_offset INTEGER, runtime REAL);")
cursor.execute(
"INSERT INTO server_bench VALUES (?, ?, ?, ?, ?, ?, ?, ?);",
[name, parallel, prompt_source, n_prompts, n_predict, n_predict_min, seed_offset, token_t_last])
con.commit()
plt.figure()
plt.scatter(prompt_n, 1e3 * prompt_t, s=10.0, marker=".", alpha=0.25)
plt.xlim(0, 1.05e0 * np.max(prompt_n))
plt.ylim(0, 1.05e3 * np.max(prompt_t))
plt.title(name or "")
plt.xlabel("Prompt length [tokens]")
plt.ylabel("Time to first token [ms]")
plt.savefig("prompt_time.png", dpi=240)
bin_max = np.ceil(token_t_last) + 1
plt.figure()
plt.hist(token_t, np.arange(0, bin_max))
plt.xlim(0, bin_max + 1)
plt.title(name or "")
plt.xlabel("Time [s]")
plt.ylabel("Num. tokens generated per second")
plt.savefig("gen_rate.png", dpi=240)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Tool for benchmarking the throughput of the llama.cpp HTTP server. "
"Results are printed to console and visualized as plots (saved to current working directory). "
"To pass arguments such as the model path to the server, set the corresponding environment variables (see llama-server --help). "
"The reported numbers are the speeds as observed by the Python script and may differ from the performance reported by the server, "
"particularly when the server is fast vs. the network or Python script (e.g. when serving a very small model).")
parser.add_argument("--path_server", type=str, default="llama-server", help="Path to the llama.cpp server binary")
parser.add_argument("--path_log", type=str, default="server-bench-{port}.log", help="Path to the model to use for the benchmark")
parser.add_argument("--path_db", type=str, default=None, help="Path to an sqlite database to store the benchmark results in")
parser.add_argument("--name", type=str, default=None, help="Name to label plots and database entries with")
parser.add_argument(
"--prompt_source", type=str, default="rng-1024-2048",
help="How to get the prompts for the benchmark, either 'mmlu' for MMLU questions or "
"rng-MIN-MAX for synthetic prompts with random lengths in the interval [MIN, MAX]")
parser.add_argument("--n_prompts", type=int, default=100, help="Number of prompts to evaluate")
parser.add_argument("--n_predict", type=int, default=2048, help="Max. number of tokens to predict per prompt")
parser.add_argument(
"--n_predict_min", type=int, default=1024,
help="Min. number of tokens to predict per prompt (supported for synthetic prompts only)")
parser.add_argument("--seed_offset", type=int, default=0, help="Offset for determining the seeds for pseudorandom prompt/generation lengths. "
"Correlations between seeds can occur when set >= 1000. Negative values mean no seed.")
args = parser.parse_args()
benchmark(**vars(args))

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,202 @@
import argparse
import json
import requests
import logging
import sys
handler = logging.StreamHandler(sys.stdout)
handler.terminator = "" # ← no newline
logging.basicConfig(level=logging.INFO, format='%(message)s', handlers=[handler])
logger = logging.getLogger("server-test-model")
def run_query(url, messages, tools=None, stream=False, tool_choice=None):
payload = {
"messages": messages,
"stream": stream,
"max_tokens": 5000,
}
if tools:
payload["tools"] = tools
if tool_choice:
payload["tool_choice"] = tool_choice
try:
response = requests.post(url, json=payload, stream=stream)
response.raise_for_status()
except requests.exceptions.RequestException as e:
if e.response is not None:
logger.info(f"Response error: {e} for {e.response.content}\n")
else:
logger.info(f"Error connecting to server: {e}\n")
return None
full_content = ""
reasoning_content = ""
tool_calls = []
if stream:
logger.info(f"--- Streaming response (Tools: {bool(tools)}) ---\n")
for line in response.iter_lines():
if line:
decoded_line = line.decode("utf-8")
if decoded_line.startswith("data: "):
data_str = decoded_line[6:]
if data_str == "[DONE]":
break
try:
data = json.loads(data_str)
if "choices" in data and len(data["choices"]) > 0:
delta = data["choices"][0].get("delta", {})
# Content
content_chunk = delta.get("content", "")
if content_chunk:
full_content += content_chunk
logger.info(content_chunk)
# Reasoning
reasoning_chunk = delta.get("reasoning_content", "")
if reasoning_chunk:
reasoning_content += reasoning_chunk
logger.info(f"\x1B[3m{reasoning_chunk}\x1B[0m")
# Tool calls
if "tool_calls" in delta:
for tc in delta["tool_calls"]:
index = tc.get("index")
if index is not None:
while len(tool_calls) <= index:
# Using "function" as type default but could be flexible
tool_calls.append(
{
"id": "",
"type": "function",
"function": {
"name": "",
"arguments": "",
},
}
)
if "id" in tc:
tool_calls[index]["id"] += tc["id"]
if "function" in tc:
if "name" in tc["function"]:
tool_calls[index]["function"][
"name"
] += tc["function"]["name"]
if "arguments" in tc["function"]:
tool_calls[index]["function"][
"arguments"
] += tc["function"]["arguments"]
except json.JSONDecodeError:
logger.info(f"Failed to decode JSON: {data_str}\n")
logger.info("\n--- End of Stream ---\n")
else:
logger.info(f"--- Non-streaming response (Tools: {bool(tools)}) ---\n")
data = response.json()
if "choices" in data and len(data["choices"]) > 0:
message = data["choices"][0].get("message", {})
full_content = message.get("content", "")
reasoning_content = message.get("reasoning_content", "")
tool_calls = message.get("tool_calls", [])
logger.info(full_content)
logger.info("--- End of Response ---\n")
return {
"content": full_content,
"reasoning_content": reasoning_content,
"tool_calls": tool_calls,
}
def test_chat(url, stream):
logger.info(f"\n=== Testing Chat (Stream={stream}) ===\n")
messages = [{"role": "user", "content": "What is the capital of France?"}]
result = run_query(url, messages, stream=stream)
if result:
if result["content"]:
logger.info("PASS: Output received.\n")
else:
logger.info("WARN: No content received (valid if strict tool call, but unexpected here).\n")
if result.get("reasoning_content"):
logger.info(f"INFO: Reasoning content detected ({len(result['reasoning_content'])} chars).\n")
else:
logger.info("INFO: No reasoning content detected (Standard model behavior).\n")
else:
logger.info("FAIL: No result.\n")
def test_tool_call(url, stream):
logger.info(f"\n=== Testing Tool Call (Stream={stream}) ===\n")
messages = [
{
"role": "user",
"content": "What is the weather in London? Please use the get_weather tool.",
}
]
tools = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
},
"required": ["location"],
},
},
}
]
result = run_query(url, messages, tools=tools, tool_choice="auto", stream=stream)
if result:
tcs = result.get("tool_calls")
if tcs and len(tcs) > 0:
logger.info("PASS: Tool calls detected.")
for tc in tcs:
func = tc.get("function", {})
logger.info(f" Tool: {func.get('name')}, Args: {func.get('arguments')}\n")
else:
logger.info(f"FAIL: No tool calls. Content: {result['content']}\n")
if result.get("reasoning_content"):
logger.info(
f"INFO: Reasoning content detected during tool call ({len(result['reasoning_content'])} chars).\n"
)
else:
logger.info("FAIL: Query failed.\n")
def main():
parser = argparse.ArgumentParser(description="Test llama-server functionality.")
parser.add_argument("--host", default="localhost", help="Server host")
parser.add_argument("--port", default=8080, type=int, help="Server port")
args = parser.parse_args()
base_url = f"http://{args.host}:{args.port}/v1/chat/completions"
logger.info(f"Testing server at {base_url}\n")
# Non-streaming tests
test_chat(base_url, stream=False)
test_tool_call(base_url, stream=False)
# Streaming tests
test_chat(base_url, stream=True)
test_tool_call(base_url, stream=True)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,991 @@
#!/usr/bin/env python3
"""
Test parallel tool-calling capability via chat completions endpoint.
Only run this against models that actually support parallel tool calls — this
script does not attempt to toggle that setting on the server. Each scenario is
explicitly worded so that a capable model SHOULD emit multiple tool calls in a
single assistant turn (either the same tool N times, or several different
tools at once).
Each test case contains:
- tools: list of tool definitions (OpenAI-compatible)
- messages: initial conversation messages
- mock_tool_responses: dict mapping tool_name -> callable(arguments) -> str (JSON)
- expected_parallel: dict describing what constitutes a successful parallel turn
{"min_parallel": int, # minimum tool_calls in one turn
"require_same_tool": Optional[str], # all parallel calls must be this tool
"require_distinct_tools": Optional[int], # >= N distinct tool names in one turn
"min_distinct_args_key": Optional[str]} # parallel calls must span this
# many distinct values of this arg key
- validate: callable(turns, all_tool_calls, final_content) -> (passed, reason)
"""
import argparse
import json
import requests
import sys
# ---------------------------------------------------------------------------
# Color / formatting helpers
# ---------------------------------------------------------------------------
RESET = "\x1b[0m"
BOLD = "\x1b[1m"
DIM = "\x1b[2m"
CYAN = "\x1b[36m"
YELLOW = "\x1b[33m"
GREEN = "\x1b[32m"
RED = "\x1b[31m"
BLUE = "\x1b[34m"
WHITE = "\x1b[97m"
MAGENTA = "\x1b[35m"
def _print(text="", end="\n"):
sys.stdout.write(text + end)
sys.stdout.flush()
def print_header(title):
bar = "" * 60
_print(f"\n{BOLD}{CYAN}{bar}{RESET}")
_print(
f"{BOLD}{CYAN}{WHITE}{title}{CYAN}{' ' * max(0, 58 - len(title))}{RESET}"
)
_print(f"{BOLD}{CYAN}{bar}{RESET}")
def print_turn_banner(turn_idx, n_calls):
color = MAGENTA if n_calls >= 2 else DIM
_print(f"\n {BOLD}{color}▶ turn {turn_idx}{n_calls} tool call(s){RESET}")
def print_tool_call(name, args):
args_str = json.dumps(args)
_print(
f" {BOLD}{YELLOW}{name}{RESET}{DIM}({args_str}){RESET}"
)
def print_tool_result(result):
preview = result[:140] + ("" if len(result) > 140 else "")
_print(f" {DIM}{BLUE}{preview}{RESET}")
def print_model_output(text):
sys.stdout.write(text)
sys.stdout.flush()
def print_pass(reason):
_print(f"\n{BOLD}{GREEN}✔ PASS{RESET} {reason}")
def print_fail(reason):
_print(f"\n{BOLD}{RED}✘ FAIL{RESET} {reason}")
def print_info(msg):
_print(f"{DIM}{msg}{RESET}")
def print_warn(msg):
_print(f"{BOLD}{YELLOW}{msg}{RESET}")
# ---------------------------------------------------------------------------
# HTTP helpers
# ---------------------------------------------------------------------------
def chat_completion(url, messages, tools=None, stream=False):
payload = {
"messages": messages,
"stream": stream,
"max_tokens": 4096,
}
if tools:
payload["tools"] = tools
payload["tool_choice"] = "auto"
try:
response = requests.post(url, json=payload, stream=stream)
response.raise_for_status()
except requests.exceptions.RequestException as e:
body = e.response.content if (e.response is not None) else b""
print_fail(f"Request error: {e} | body: {body}")
return None
full_content = ""
reasoning_content = ""
tool_calls: list[dict] = []
if stream:
for line in response.iter_lines():
if not line:
continue
decoded = line.decode("utf-8")
if not decoded.startswith("data: "):
continue
data_str = decoded[6:]
if data_str == "[DONE]":
break
try:
data = json.loads(data_str)
except json.JSONDecodeError:
continue
choices = data.get("choices", [])
if not choices:
continue
delta = choices[0].get("delta", {})
if delta.get("reasoning_content"):
reasoning_content += delta["reasoning_content"]
if delta.get("content"):
full_content += delta["content"]
print_model_output(delta["content"])
for tc in delta.get("tool_calls", []):
idx = tc.get("index", 0)
while len(tool_calls) <= idx:
tool_calls.append(
{
"id": "",
"type": "function",
"function": {"name": "", "arguments": ""},
}
)
if "id" in tc:
tool_calls[idx]["id"] += tc["id"]
if "function" in tc:
if "name" in tc["function"]:
tool_calls[idx]["function"]["name"] += tc["function"]["name"]
if "arguments" in tc["function"]:
tool_calls[idx]["function"]["arguments"] += tc["function"][
"arguments"
]
else:
data = response.json()
choices = data.get("choices", [])
if choices:
msg = choices[0].get("message", {})
full_content = msg.get("content") or ""
reasoning_content = msg.get("reasoning_content") or ""
tool_calls = msg.get("tool_calls") or []
if full_content:
print_model_output(full_content)
result = {"content": full_content, "tool_calls": tool_calls}
if reasoning_content:
result["reasoning_content"] = reasoning_content
return result
def run_agentic_loop(url, messages, tools, mock_tool_responses, stream, max_turns=6):
"""
Drive the multi-turn tool-call loop, but record each turn's tool calls
separately so parallelism can be validated.
Returns (turns, all_tool_calls, final_content) where `turns` is a list
of dicts: {"index": int, "tool_calls": [...], "content": str}.
"""
msgs = list(messages)
turns: list[dict] = []
all_tool_calls: list[dict] = []
for turn_idx in range(max_turns):
result = chat_completion(url, msgs, tools=tools, stream=stream)
if result is None:
return turns, all_tool_calls, None
tcs = result.get("tool_calls") or []
content = result.get("content") or ""
turns.append(
{"index": turn_idx, "tool_calls": list(tcs), "content": content}
)
if not tcs:
if content:
_print(f"\n{DIM}{'·' * 60}{RESET}")
_print(f"{DIM} model response:{RESET}\n")
return turns, all_tool_calls, content
print_turn_banner(turn_idx, len(tcs))
all_tool_calls.extend(tcs)
assistant_msg: dict = {
"role": "assistant",
"content": content,
"tool_calls": tcs,
}
reasoning = result.get("reasoning_content")
if reasoning:
assistant_msg["reasoning_content"] = reasoning
msgs.append(assistant_msg)
for tc in tcs:
tool_name = tc["function"]["name"]
try:
args = json.loads(tc["function"]["arguments"])
except json.JSONDecodeError:
args = {}
print_tool_call(tool_name, args)
mock_fn = mock_tool_responses.get(tool_name)
if mock_fn:
tool_result = mock_fn(args)
else:
tool_result = json.dumps({"error": f"Unknown tool: {tool_name}"})
print_tool_result(tool_result)
msgs.append(
{
"role": "tool",
"tool_call_id": tc.get("id", ""),
"content": tool_result,
}
)
return turns, all_tool_calls, None
# ---------------------------------------------------------------------------
# Parallelism helpers
# ---------------------------------------------------------------------------
def _best_parallel_turn(turns):
"""Return the turn (dict) with the most tool calls, or None if no tools."""
tool_turns = [t for t in turns if t["tool_calls"]]
if not tool_turns:
return None
return max(tool_turns, key=lambda t: len(t["tool_calls"]))
def _distinct_tool_names(turn):
return {tc["function"]["name"] for tc in turn["tool_calls"]}
def _distinct_arg_values(turn, key):
values = set()
for tc in turn["tool_calls"]:
try:
args = json.loads(tc["function"]["arguments"])
except json.JSONDecodeError:
continue
v = args.get(key)
if v is not None:
if isinstance(v, str):
values.add(v.strip().lower())
else:
values.add(v)
return values
def _check_parallel(turns, expected):
"""
Check that at least one turn satisfies the parallel-call expectations.
Returns (ok, reason).
"""
best = _best_parallel_turn(turns)
if best is None:
return False, "No tool calls were made at all"
min_parallel = expected.get("min_parallel", 2)
if len(best["tool_calls"]) < min_parallel:
by_turn = [len(t["tool_calls"]) for t in turns]
return False, (
f"No turn had >= {min_parallel} parallel tool calls "
f"(per-turn counts: {by_turn})"
)
require_same = expected.get("require_same_tool")
if require_same is not None:
names = [tc["function"]["name"] for tc in best["tool_calls"]]
if any(n != require_same for n in names):
return False, (
f"Parallel turn mixed tools; expected all {require_same!r}, got {names}"
)
require_distinct = expected.get("require_distinct_tools")
if require_distinct is not None:
distinct = _distinct_tool_names(best)
if len(distinct) < require_distinct:
return False, (
f"Parallel turn had only {len(distinct)} distinct tool names "
f"({distinct}); need >= {require_distinct}"
)
distinct_key = expected.get("min_distinct_args_key")
distinct_count = expected.get("min_distinct_args_count", min_parallel)
if distinct_key is not None:
values = _distinct_arg_values(best, distinct_key)
if len(values) < distinct_count:
return False, (
f"Parallel turn had only {len(values)} distinct {distinct_key!r} "
f"values ({values}); need >= {distinct_count}"
)
return True, (
f"Parallel turn had {len(best['tool_calls'])} calls across "
f"{len(_distinct_tool_names(best))} distinct tool(s)"
)
# ---------------------------------------------------------------------------
# Test case runner
# ---------------------------------------------------------------------------
def run_test(url, test_case, stream):
name = test_case["name"]
mode = f"{'stream' if stream else 'non-stream'}"
print_header(f"{name} [{mode}]")
turns, all_tool_calls, final_content = run_agentic_loop(
url,
messages=test_case["messages"],
tools=test_case["tools"],
mock_tool_responses=test_case["mock_tool_responses"],
stream=stream,
)
if not turns:
print_fail("No response from server.")
return False
parallel_ok, parallel_reason = _check_parallel(turns, test_case["expected_parallel"])
if not parallel_ok:
print_fail(parallel_reason)
return False
passed, reason = test_case["validate"](turns, all_tool_calls, final_content)
if passed:
print_pass(f"{parallel_reason}; {reason}")
else:
print_fail(reason)
return passed
# ---------------------------------------------------------------------------
# Test case definitions
# ---------------------------------------------------------------------------
# ---- Test 1: Multi-file read (same tool, multiple distinct paths) ----
_FILE_TOOLS = [
{
"type": "function",
"function": {
"name": "read_file",
"description": (
"Read the full contents of a file from the local filesystem. "
"Call this tool in parallel when asked to read several files — "
"each path needs its own call."
),
"parameters": {
"type": "object",
"properties": {
"path": {
"type": "string",
"description": "Absolute or repo-relative path to a file",
},
},
"required": ["path"],
},
},
},
]
_FILE_CONTENTS = {
"config/database.yml": "host: db.internal\nport: 5432\nuser: svc_app\n",
"config/redis.yml": "host: cache.internal\nport: 6379\ndb: 0\n",
"config/queue.yml": "broker: rabbitmq.internal\nport: 5672\nvhost: prod\n",
"config/auth.yml": "provider: oidc\nissuer: https://auth.internal\n",
}
def _read_file_mock(args):
path = args.get("path", "")
norm = path.lstrip("./").lstrip("/")
content = _FILE_CONTENTS.get(norm)
if content is None:
for k, v in _FILE_CONTENTS.items():
if path.endswith(k):
content = v
break
if content is None:
return json.dumps({"path": path, "error": "not found"})
return json.dumps({"path": path, "content": content})
MULTIFILE_READ_TEST = {
"name": "Parallel multi-file read (same tool, 4 distinct paths)",
"tools": _FILE_TOOLS,
"messages": [
{
"role": "user",
"content": (
"Please read all four of these config files so I can review them "
"together: config/database.yml, config/redis.yml, config/queue.yml, "
"and config/auth.yml. Call read_file for every path in parallel in "
"a single batch — do NOT read them one by one sequentially across "
"turns. After you have all four, give me a one-line summary of each."
),
}
],
"mock_tool_responses": {"read_file": _read_file_mock},
"expected_parallel": {
"min_parallel": 4,
"require_same_tool": "read_file",
"min_distinct_args_key": "path",
"min_distinct_args_count": 4,
},
"validate": lambda turns, tcs, content: _validate_multifile(turns, tcs, content),
}
def _validate_multifile(turns, tcs, content):
del turns
if not content:
return False, "No final summary produced"
return True, f"{len(tcs)} total read_file calls; content length={len(content)}"
# ---- Test 2: Batch TODO marking (same tool, N calls in one turn) ----
_TODO_TOOLS = [
{
"type": "function",
"function": {
"name": "mark_todo_complete",
"description": (
"Mark a single TODO item as complete by ID. When the user wants "
"several items marked at once, call this tool in parallel — "
"one call per item — rather than sequentially across turns."
),
"parameters": {
"type": "object",
"properties": {
"todo_id": {
"type": "string",
"description": "Identifier of the TODO item",
},
"note": {
"type": "string",
"description": "Optional completion note",
},
},
"required": ["todo_id"],
},
},
},
]
_TODO_DB = {
"T-101": "Draft onboarding doc",
"T-102": "Update dependency lockfile",
"T-103": "Fix flaky login test",
"T-104": "Rotate service credentials",
"T-105": "Archive Q4 reports",
}
def _mark_todo_mock(args):
tid = args.get("todo_id", "")
if tid in _TODO_DB:
return json.dumps({"todo_id": tid, "title": _TODO_DB[tid], "status": "done"})
return json.dumps({"todo_id": tid, "error": "unknown id"})
TODO_BATCH_TEST = {
"name": "Batch TODO completion (same tool, 5 IDs in one turn)",
"tools": _TODO_TOOLS,
"messages": [
{
"role": "user",
"content": (
"I finished every item on today's list. Please mark all of the "
"following TODOs as complete, in one parallel batch: T-101, T-102, "
"T-103, T-104, T-105. Don't mark them one at a time across separate "
"turns — issue all five mark_todo_complete calls at once. Afterwards "
"confirm which ones succeeded."
),
}
],
"mock_tool_responses": {"mark_todo_complete": _mark_todo_mock},
"expected_parallel": {
"min_parallel": 5,
"require_same_tool": "mark_todo_complete",
"min_distinct_args_key": "todo_id",
"min_distinct_args_count": 5,
},
"validate": lambda turns, tcs, content: _validate_todo(turns, tcs, content),
}
def _validate_todo(turns, tcs, content):
del turns
if not content:
return False, "No confirmation summary produced"
return True, f"{len(tcs)} total mark_todo_complete calls"
# ---- Test 3: Multi-city weather (same tool, N parallel locations) ----
_WEATHER_TOOLS = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": (
"Fetch current weather for ONE city. When the user asks about "
"several cities, call this tool in parallel — one call per city — "
"instead of sequentially."
),
"parameters": {
"type": "object",
"properties": {
"city": {"type": "string", "description": "City name"},
"units": {
"type": "string",
"enum": ["metric", "imperial"],
"default": "metric",
},
},
"required": ["city"],
},
},
},
]
_WEATHER_DB = {
"tokyo": {"city": "Tokyo", "temp_c": 18.4, "condition": "partly cloudy", "humidity": 64},
"london": {"city": "London", "temp_c": 9.1, "condition": "overcast", "humidity": 81},
"new york": {"city": "New York", "temp_c": 12.7, "condition": "clear", "humidity": 55},
"paris": {"city": "Paris", "temp_c": 11.3, "condition": "light rain", "humidity": 78},
}
def _weather_mock(args):
city = args.get("city", "").strip().lower()
if city.startswith("new york"):
city = "new york"
if city in _WEATHER_DB:
return json.dumps(_WEATHER_DB[city])
return json.dumps({"city": args.get("city", ""), "error": "unknown city"})
MULTI_WEATHER_TEST = {
"name": "Parallel multi-city weather (same tool, 4 cities)",
"tools": _WEATHER_TOOLS,
"messages": [
{
"role": "user",
"content": (
"I'm comparing today's weather across four cities for a travel "
"decision: Tokyo, London, New York, and Paris. Please call "
"get_weather for all four in parallel in a single turn — don't "
"fetch them one at a time. Then rank them from warmest to coolest."
),
}
],
"mock_tool_responses": {"get_weather": _weather_mock},
"expected_parallel": {
"min_parallel": 4,
"require_same_tool": "get_weather",
"min_distinct_args_key": "city",
"min_distinct_args_count": 4,
},
"validate": lambda turns, tcs, content: _validate_weather(turns, tcs, content),
}
def _validate_weather(turns, tcs, content):
del turns
if not content or not any(
kw in content.lower() for kw in ("warmest", "rank", "hot", "cool")
):
return False, f"Final content missing a ranking: {content!r}"
return True, f"{len(tcs)} total get_weather calls; ranking produced"
# ---- Test 4: Trip planning (different tools, parallel in one turn) ----
_TRIP_TOOLS = [
{
"type": "function",
"function": {
"name": "search_flights",
"description": "Search one-way flights between two airports on a given date.",
"parameters": {
"type": "object",
"properties": {
"from_airport": {"type": "string", "description": "IATA code, e.g. SFO"},
"to_airport": {"type": "string", "description": "IATA code, e.g. JFK"},
"date": {"type": "string", "description": "YYYY-MM-DD"},
},
"required": ["from_airport", "to_airport", "date"],
},
},
},
{
"type": "function",
"function": {
"name": "search_hotels",
"description": "Search hotels in a city for a date range.",
"parameters": {
"type": "object",
"properties": {
"city": {"type": "string"},
"check_in": {"type": "string", "description": "YYYY-MM-DD"},
"check_out": {"type": "string", "description": "YYYY-MM-DD"},
"max_price": {"type": "integer"},
},
"required": ["city", "check_in", "check_out"],
},
},
},
{
"type": "function",
"function": {
"name": "search_restaurants",
"description": "Search restaurants in a city by cuisine.",
"parameters": {
"type": "object",
"properties": {
"city": {"type": "string"},
"cuisine": {"type": "string"},
},
"required": ["city"],
},
},
},
]
_FLIGHTS_RESULT = {
"results": [
{"flight": "UA 1552", "depart": "08:15", "arrive": "16:45", "price": 389},
{"flight": "AA 20", "depart": "10:00", "arrive": "18:35", "price": 412},
]
}
_HOTELS_RESULT = {
"results": [
{"name": "Midtown Grand", "nightly_rate": 245, "rating": 4.3},
{"name": "Harbour Boutique", "nightly_rate": 312, "rating": 4.6},
]
}
_RESTAURANTS_RESULT = {
"results": [
{"name": "Trattoria Nona", "cuisine": "italian", "rating": 4.5},
{"name": "Osteria Blu", "cuisine": "italian", "rating": 4.4},
]
}
TRIP_PLAN_TEST = {
"name": "Trip planning (3 different tools in parallel)",
"tools": _TRIP_TOOLS,
"messages": [
{
"role": "user",
"content": (
"I'm flying from SFO to JFK on 2026-06-12 and staying four nights "
"(check out 2026-06-16). I'd also like some Italian restaurant "
"suggestions in New York. Please call search_flights, search_hotels, "
"and search_restaurants in parallel — all three in a single turn, "
"since they don't depend on each other. Then give me a concise "
"travel summary."
),
}
],
"mock_tool_responses": {
"search_flights": lambda _: json.dumps(_FLIGHTS_RESULT),
"search_hotels": lambda _: json.dumps(_HOTELS_RESULT),
"search_restaurants": lambda _: json.dumps(_RESTAURANTS_RESULT),
},
"expected_parallel": {
"min_parallel": 3,
"require_distinct_tools": 3,
},
"validate": lambda turns, tcs, content: _validate_trip(turns, tcs, content),
}
def _validate_trip(turns, tcs, content):
del turns
names = {tc["function"]["name"] for tc in tcs}
required = {"search_flights", "search_hotels", "search_restaurants"}
missing = required - names
if missing:
return False, f"Missing tool calls: {missing}"
if not content:
return False, "No travel summary produced"
return True, f"All three tools called; summary length={len(content)}"
# ---- Test 5: Portfolio check (same tool, parallel tickers) ----
_STOCK_TOOLS = [
{
"type": "function",
"function": {
"name": "get_stock_quote",
"description": (
"Get the latest quote for ONE ticker. When the user asks about "
"multiple tickers, call this tool in parallel — one per symbol — "
"rather than sequentially."
),
"parameters": {
"type": "object",
"properties": {
"symbol": {"type": "string", "description": "Ticker symbol"},
},
"required": ["symbol"],
},
},
},
]
_STOCK_DB = {
"AAPL": {"symbol": "AAPL", "price": 218.45, "change_pct": "+0.8%"},
"MSFT": {"symbol": "MSFT", "price": 421.10, "change_pct": "+1.2%"},
"GOOGL":{"symbol": "GOOGL","price": 175.22, "change_pct": "-0.3%"},
"AMZN": {"symbol": "AMZN", "price": 189.76, "change_pct": "+0.5%"},
"NVDA": {"symbol": "NVDA", "price": 140.88, "change_pct": "+2.4%"},
}
def _stock_mock(args):
sym = args.get("symbol", "").strip().upper()
if sym in _STOCK_DB:
return json.dumps(_STOCK_DB[sym])
return json.dumps({"symbol": sym, "error": "unknown ticker"})
PORTFOLIO_TEST = {
"name": "Portfolio check (same tool, 5 tickers in parallel)",
"tools": _STOCK_TOOLS,
"messages": [
{
"role": "user",
"content": (
"Pull the latest quote for every ticker in my portfolio — AAPL, "
"MSFT, GOOGL, AMZN, and NVDA — in a single parallel batch. These "
"lookups are independent, so please don't chain them across turns. "
"Once you have all five, tell me which ticker had the biggest "
"percentage change today."
),
}
],
"mock_tool_responses": {"get_stock_quote": _stock_mock},
"expected_parallel": {
"min_parallel": 5,
"require_same_tool": "get_stock_quote",
"min_distinct_args_key": "symbol",
"min_distinct_args_count": 5,
},
"validate": lambda turns, tcs, content: _validate_portfolio(turns, tcs, content),
}
def _validate_portfolio(turns, tcs, content):
del turns
if not content or ("nvda" not in content.lower() and "NVDA" not in content):
return False, f"Expected NVDA to be identified as the biggest mover: {content!r}"
return True, f"{len(tcs)} total quotes pulled"
# ---- Test 6: Mixed — translate + dictionary in parallel for the same word ----
_LANG_TOOLS = [
{
"type": "function",
"function": {
"name": "translate_text",
"description": "Translate a short text into a target language.",
"parameters": {
"type": "object",
"properties": {
"text": {"type": "string"},
"target_language": {"type": "string",
"description": "ISO 639-1 language code, e.g. 'es'"},
},
"required": ["text", "target_language"],
},
},
},
{
"type": "function",
"function": {
"name": "get_definition",
"description": "Get the English dictionary definition of a word.",
"parameters": {
"type": "object",
"properties": {
"word": {"type": "string"},
},
"required": ["word"],
},
},
},
{
"type": "function",
"function": {
"name": "get_synonyms",
"description": "Get English synonyms for a word.",
"parameters": {
"type": "object",
"properties": {
"word": {"type": "string"},
},
"required": ["word"],
},
},
},
]
def _translate_mock(args):
t = args.get("text", "")
lang = args.get("target_language", "")
return json.dumps({"source": t, "target_language": lang, "translation": f"[{lang}] {t}"})
def _definition_mock(args):
w = args.get("word", "")
return json.dumps({
"word": w,
"definition": f"A standard dictionary definition of {w!r}.",
})
def _synonyms_mock(args):
w = args.get("word", "")
return json.dumps({
"word": w,
"synonyms": ["synonym_a", "synonym_b", "synonym_c"],
})
LANG_TOOLKIT_TEST = {
"name": "Language toolkit (translate + definition + synonyms in parallel)",
"tools": _LANG_TOOLS,
"messages": [
{
"role": "user",
"content": (
"For the English word 'resilient', I need three independent "
"look-ups at once: (a) translate it into Spanish, (b) fetch its "
"dictionary definition, and (c) list its synonyms. These three "
"calls don't depend on each other — please issue them in parallel "
"in a single turn. Then present the combined results as a short "
"language note."
),
}
],
"mock_tool_responses": {
"translate_text": _translate_mock,
"get_definition": _definition_mock,
"get_synonyms": _synonyms_mock,
},
"expected_parallel": {
"min_parallel": 3,
"require_distinct_tools": 3,
},
"validate": lambda turns, tcs, content: _validate_lang(turns, tcs, content),
}
def _validate_lang(turns, tcs, content):
del turns
names = {tc["function"]["name"] for tc in tcs}
required = {"translate_text", "get_definition", "get_synonyms"}
missing = required - names
if missing:
return False, f"Missing tool calls: {missing}"
if not content:
return False, "No language note produced"
return True, f"All three lookup tools called; note length={len(content)}"
# ---------------------------------------------------------------------------
# All test cases
# ---------------------------------------------------------------------------
ALL_TEST_CASES = [
MULTIFILE_READ_TEST,
TODO_BATCH_TEST,
MULTI_WEATHER_TEST,
TRIP_PLAN_TEST,
PORTFOLIO_TEST,
LANG_TOOLKIT_TEST,
]
# ---------------------------------------------------------------------------
# Entry point
# ---------------------------------------------------------------------------
def main():
parser = argparse.ArgumentParser(
description=(
"Test llama-server parallel tool-calling capability. Run this only "
"against models configured for parallel tool calls — this script "
"does not configure that itself."
)
)
parser.add_argument("--host", default="localhost")
parser.add_argument("--port", default=8080, type=int)
parser.add_argument(
"--no-stream", action="store_true", help="Disable streaming mode tests"
)
parser.add_argument(
"--stream-only", action="store_true", help="Only run streaming mode tests"
)
parser.add_argument(
"--test",
help="Run only the test whose name contains this substring (case-insensitive)",
)
args = parser.parse_args()
url = f"http://{args.host}:{args.port}/v1/chat/completions"
print_info(f"Testing server at {url}")
print_warn(
"This script expects the target model to emit multiple tool calls in a "
"single assistant turn. Run it only against parallel-tool-capable models."
)
modes: list[bool] = []
if not args.stream_only:
modes.append(False)
if not args.no_stream:
modes.append(True)
cases: list[dict] = ALL_TEST_CASES
if args.test:
name_filter = args.test.lower()
cases = [c for c in cases if name_filter in str(c["name"]).lower()]
if not cases:
print_fail(f"No test cases matched '{args.test}'")
sys.exit(1)
total = 0
passed = 0
for stream in modes:
for case in cases:
total += 1
if run_test(url, case, stream=stream):
passed += 1
color = GREEN if passed == total else RED
_print(f"\n{BOLD}{color}{'' * 60}{RESET}")
_print(f"{BOLD}{color} Results: {passed}/{total} passed{RESET}")
_print(f"{BOLD}{color}{'' * 60}{RESET}\n")
sys.exit(0 if passed == total else 1)
if __name__ == "__main__":
main()

1040
scripts/server-test-structured.py Executable file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1 @@
0xffff

View File

@@ -0,0 +1,49 @@
#!/bin/sh
#
# Basedir on device
basedir=/data/local/tmp/llama.cpp
branch=.
[ "$B" != "" ] && branch=$B
adbserial=
[ "$S" != "" ] && adbserial="-s $S"
adbhost=
[ "$H" != "" ] && adbhost="-H $H"
model="Llama-3.2-3B-Instruct-Q4_0.gguf"
[ "$M" != "" ] && model="$M"
device="HTP0"
[ "$D" != "" ] && device="$D"
verbose=
[ "$V" != "" ] && verbose="GGML_HEXAGON_VERBOSE=$V" cli_opts="$cli_opts -v"
profile=
[ "$PROF" != "" ] && profile="GGML_HEXAGON_PROFILE=$PROF" cli_opts="$cli_opts -v"
opmask=
[ "$OPSTAGE" != "" ] && opmask="GGML_HEXAGON_OPSTAGE=$OPSTAGE"
nhvx=
[ "$NHVX" != "" ] && nhvx="GGML_HEXAGON_NHVX=$NHVX"
ndev=
[ "$NDEV" != "" ] && ndev="GGML_HEXAGON_NDEV=$NDEV"
hb=
[ "$HB" != "" ] && hb="GGML_HEXAGON_HOSTBUF=$HB"
set -x
adb $adbserial $adbhost shell " \
cd $basedir; \
LD_LIBRARY_PATH=$basedir/$branch/lib \
ADSP_LIBRARY_PATH=$basedir/$branch/lib \
$ndev $nhvx $opmask $verbose $profile $hb ./$branch/bin/llama-bench --device $device --mmap 0 -m $basedir/../gguf/$model \
--poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 \
--ubatch-size 256 -fa 1 -ngl 99 $cli_opts $@ \
"

View File

@@ -0,0 +1,78 @@
#!/bin/sh
#
# Basedir on device
basedir=/data/local/tmp/llama.cpp
cli_opts=
branch=.
[ "$B" != "" ] && branch=$B
adbserial=
[ "$S" != "" ] && adbserial="-s $S"
adbhost=
[ "$H" != "" ] && adbhost="-H $H"
model="Llama-3.2-3B-Instruct-Q4_0.gguf"
[ "$M" != "" ] && model="$M"
device="HTP0"
[ "$D" != "" ] && device="$D"
verbose=
[ "$V" != "" ] && verbose="GGML_HEXAGON_VERBOSE=$V" cli_opts="$cli_opts -v"
sched=
[ "$SCHED" != "" ] && sched="GGML_SCHED_DEBUG=2" cli_opts="$cli_opts -v"
profile=
[ "$PROF" != "" ] && profile="GGML_HEXAGON_PROFILE=$PROF" cli_opts="$cli_opts -v"
opmask=
[ "$OPSTAGE" != "" ] && opmask="GGML_HEXAGON_OPSTAGE=$OPSTAGE"
nhvx=
[ "$NHVX" != "" ] && nhvx="GGML_HEXAGON_NHVX=$NHVX"
hmx=
[ "$HMX" != "" ] && hmx="GGML_HEXAGON_USE_HMX=$HMX"
ndev=
[ "$NDEV" != "" ] && ndev="GGML_HEXAGON_NDEV=$NDEV"
hb=
[ "$HB" != "" ] && hb="GGML_HEXAGON_HOSTBUF=$HB"
opbatch=
[ "$OB" != "" ] && opbatch="GGML_HEXAGON_OPBATCH=$OB"
opqueue=
[ "$OQ" != "" ] && opqueue="GGML_HEXAGON_OPQUEUE=$OQ"
opflt=
[ "$OF" != "" ] && opflt="GGML_HEXAGON_OPFILTER=$OF"
vmem=
[ "$VM" != "" ] && opflt="GGML_HEXAGON_VMEM=$VM"
mbuf=
[ "$MB" != "" ] && opflt="GGML_HEXAGON_MBUF=$MB"
vmem=
[ "$VM" != "" ] && vmem="GGML_HEXAGON_VMEM=$VM"
mbuf=
[ "$MB" != "" ] && mbuf="GGML_HEXAGON_MBUF=$MB"
set -x
adb $adbserial $adbhost shell " \
cd $basedir; ulimit -c unlimited; \
LD_LIBRARY_PATH=$basedir/$branch/lib \
ADSP_LIBRARY_PATH=$basedir/$branch/lib \
$verbose $sched $opmask $profile $nhvx $hmx $ndev $hb $opbatch $opqueue $opflt $vmem $mbuf \
./$branch/bin/llama-cli --no-mmap -m $basedir/../gguf/$model \
--poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 \
--ctx-size 8192 --ubatch-size 256 -fa on \
-ngl 99 --device $device $cli_opts $@ \
"

View File

@@ -0,0 +1,74 @@
#!/bin/sh
#
# Basedir on device
basedir=/data/local/tmp/llama.cpp
cli_opts=
branch=.
[ "$B" != "" ] && branch=$B
adbserial=
[ "$S" != "" ] && adbserial="-s $S"
adbhost=
[ "$H" != "" ] && adbhost="-H $H"
model="Llama-3.2-3B-Instruct-Q4_0.gguf"
[ "$M" != "" ] && model="$M"
device="HTP0"
[ "$D" != "" ] && device="$D"
verbose=
[ "$V" != "" ] && verbose="GGML_HEXAGON_VERBOSE=$V" cli_opts="$cli_opts -v"
sched=
[ "$SCHED" != "" ] && sched="GGML_SCHED_DEBUG=2" cli_opts="$cli_opts -v"
profile=
[ "$PROF" != "" ] && profile="GGML_HEXAGON_PROFILE=$PROF" cli_opts="$cli_opts -v"
opmask=
[ "$OPSTAGE" != "" ] && opmask="GGML_HEXAGON_OPSTAGE=$OPSTAGE"
nhvx=
[ "$NHVX" != "" ] && nhvx="GGML_HEXAGON_NHVX=$NHVX"
hmx=
[ "$HMX" != "" ] && hmx="GGML_HEXAGON_USE_HMX=$HMX"
ndev=
[ "$NDEV" != "" ] && ndev="GGML_HEXAGON_NDEV=$NDEV"
hb=
[ "$HB" != "" ] && hb="GGML_HEXAGON_HOSTBUF=$HB"
opbatch=
[ "$OB" != "" ] && opbatch="GGML_HEXAGON_OPBATCH=$OB"
opqueue=
[ "$OQ" != "" ] && opqueue="GGML_HEXAGON_OPQUEUE=$OQ"
opflt=
[ "$OF" != "" ] && opflt="GGML_HEXAGON_OPFILTER=$OF"
vmem=
[ "$VM" != "" ] && vmem="GGML_HEXAGON_VMEM=$VM"
mbuf=
[ "$MB" != "" ] && mbuf="GGML_HEXAGON_MBUF=$MB"
set -x
adb $adbserial $adbhost shell " \
cd $basedir; ulimit -c unlimited; \
LD_LIBRARY_PATH=$basedir/$branch/lib \
ADSP_LIBRARY_PATH=$basedir/$branch/lib \
$verbose $sched $opmask $profile $nhvx $hmx $ndev $hb $opbatch $opqueue $opflt $vmem $mbuf \
./$branch/bin/llama-completion --no-mmap -m $basedir/../gguf/$model \
--poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 \
--ctx-size 8192 --ubatch-size 256 -fa on \
-ngl 99 -no-cnv --device $device $cli_opts $@ \
"

View File

@@ -0,0 +1,71 @@
#!/bin/sh
#
# Basedir on device
basedir=/data/local/tmp/llama.cpp
cli_opts=
branch=.
[ "$B" != "" ] && branch=$B
adbserial=
[ "$S" != "" ] && adbserial="-s $S"
adbhost=
[ "$H" != "" ] && adbhost="-H $H"
model="gemma-3-4b-it-Q4_0.gguf"
[ "$M" != "" ] && model="$M"
mmproj="mmproj-F16.gguf"
[ "$MMPROJ" != "" ] && mmproj="$MMPROJ"
image=
[ "$IMG" != "" ] && image="$IMG"
device="HTP0"
[ "$D" != "" ] && device="$D"
verbose=
[ "$V" != "" ] && verbose="GGML_HEXAGON_VERBOSE=$V"
experimental="GGML_HEXAGON_EXPERIMENTAL=1"
[ "$E" != "" ] && experimental="GGML_HEXAGON_EXPERIMENTAL=$E"
sched=
[ "$SCHED" != "" ] && sched="GGML_SCHED_DEBUG=2" cli_opts="$cli_opts -v"
profile=
[ "$PROF" != "" ] && profile="GGML_HEXAGON_PROFILE=$PROF"
opmask=
[ "$OPSTAGE" != "" ] && opmask="GGML_HEXAGON_OPSTAGE=$OPSTAGE"
nhvx=
[ "$NHVX" != "" ] && nhvx="GGML_HEXAGON_NHVX=$NHVX"
hmx=
[ "$HMX" != "" ] && hmx="GGML_HEXAGON_USE_HMX=$HMX"
ndev=
[ "$NDEV" != "" ] && ndev="GGML_HEXAGON_NDEV=$NDEV"
# MTMD backend device for vision model (defaults to CPU if not set)
mtmd_backend=
[ "$MTMD_DEVICE" != "" ] && mtmd_backend="MTMD_BACKEND_DEVICE=$MTMD_DEVICE"
set -x
adb $adbserial $adbhost shell " \
cd $basedir; ulimit -c unlimited; \
LD_LIBRARY_PATH=$basedir/$branch/lib \
ADSP_LIBRARY_PATH=$basedir/$branch/lib \
$verbose $experimental $sched $opmask $profile $hmx $nhvx $ndev $mtmd_backend \
./$branch/bin/llama-mtmd-cli --no-mmap -m $basedir/../gguf/$model \
--mmproj $basedir/../gguf/$mmproj \
--image $basedir/../gguf/$image \
--poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 \
--ctx-size 8192 --ubatch-size 256 -fa on \
-ngl 99 --device $device -v $cli_opts $@ \
"

View File

@@ -0,0 +1,54 @@
#!/bin/sh
#
# Basedir on device
basedir=/data/local/tmp/llama.cpp
cli_opts=
branch=.
[ "$B" != "" ] && branch=$B
adbserial=
[ "$S" != "" ] && adbserial="-s $S"
adbhost=
[ "$H" != "" ] && adbhost="-H $H"
device="HTP0"
[ "$D" != "" ] && device="$D"
verbose=
[ "$V" != "" ] && verbose="GGML_HEXAGON_VERBOSE=$V"
sched=
[ "$SCHED" != "" ] && sched="GGML_SCHED_DEBUG=2" cli_opts="$cli_opts -v"
profile=
[ "$PROF" != "" ] && profile="GGML_HEXAGON_PROFILE=$PROF"
opmask=
[ "$OPSTAGE" != "" ] && opmask="GGML_HEXAGON_OPSTAGE=$OPSTAGE"
nhvx=
[ "$NHVX" != "" ] && nhvx="GGML_HEXAGON_NHVX=$NHVX"
hmx=
[ "$HMX" != "" ] && hmx="GGML_HEXAGON_USE_HMX=$HMX"
ndev=
[ "$NDEV" != "" ] && ndev="GGML_HEXAGON_NDEV=$NDEV"
hb=
[ "$HB" != "" ] && hb="GGML_HEXAGON_HOSTBUF=$HB"
set -x
tool=$1; shift
adb $adbserial $adbhost shell " \
cd $basedir; ulimit -c unlimited; \
LD_LIBRARY_PATH=$basedir/$branch/lib \
ADSP_LIBRARY_PATH=$basedir/$branch/lib \
$verbose $sched $opmask $profile $nhvx $hmx $ndev $hb ./$branch/bin/$tool $@ \
"

View File

@@ -0,0 +1,188 @@
#!/usr/bin/env python3
import sys
import os
import re
import argparse
import statistics
import logging
from collections import defaultdict
# Mapping of cli-friendly names to (internal_data_key, Display Header, numeric_sort_key)
COL_MAP = {
"op": ("op", "Op", "op"),
"dims": ("dims", "Dims", "dims"),
"dtypes": ("dtypes", "DTypes", "dtypes"),
"count": ("count", "Count", "_sort_count"),
"max-usec": ("max_usec", "Max usec", "_sort_max_usec"),
"avg-usec": ("avg_usec", "Avg usec", "_sort_avg_usec"),
"max-cycles": ("max_cycles", "Max Cycles", "_sort_max_cycles"),
"avg-cycles": ("avg_cycles", "Avg Cycles", "_sort_avg_cycles"),
"max-pmu": ("max_pmu", "Max PMU", "_sort_max_pmu"),
"avg-pmu": ("avg_pmu", "Avg PMU", "_sort_avg_pmu"),
}
op_pattern = re.compile(
r"profile-op\s+(?P<op_name>[A-Z_0-9]+):\s+.*?\s+:\s+(?P<dims>[\d:x\s\->!]+)\s+:\s+(?P<types>[a-z\d_\s\->x]+)\s+:\s+.*?\s+usec\s+(?P<usec>\d+)\s+cycles\s+(?P<cycles>\d+)(?:\s+pmu\s+\[(?P<pmu>[\d,\s]+)\])?"
)
logger = logging.getLogger("ggml-hexagon-profile")
def parse_log(file_path, pmu_index=None):
try:
if file_path != "-":
f = open(file_path, 'r', encoding='utf-8', errors='ignore')
else:
f = os.fdopen(0, 'r', encoding='utf-8', errors='ignore')
except FileNotFoundError:
logger.error(f"file '{file_path}' not found.")
sys.exit(1)
all_ops = []
for line in f:
match = op_pattern.search(line)
if not match: continue
pmu_raw = match.group('pmu')
pmu_val = None
if pmu_raw and pmu_index is not None:
try:
pmu_list = [int(x.strip()) for x in pmu_raw.split(',')]
if len(pmu_list) > pmu_index:
pmu_val = pmu_list[pmu_index]
except (ValueError, IndexError):
pmu_val = None
all_ops.append({
'name': match.group('op_name'),
'dims': match.group('dims').strip(),
'types': match.group('types').strip(),
'usec': int(match.group('usec')),
'cycles': int(match.group('cycles')),
'pmu_val': pmu_val
})
f.close()
return all_ops
def generate_report(ops, top_n, width_overrides, sort_col, pmu_name=None):
if not ops:
logger.info("No valid records found.")
return
grouped = defaultdict(list)
for op in ops:
key = (op['name'], op['dims'], op['types'])
grouped[key].append(op)
group_stats = []
for (name, dims, types), group_ops in grouped.items():
usecs = [o['usec'] for o in group_ops]
cycles = [o['cycles'] for o in group_ops]
pmu_vals = [o['pmu_val'] for o in group_ops if o['pmu_val'] is not None]
group_stats.append({
'op': name,
'dims': dims,
'dtypes': types,
'count': str(len(group_ops)),
'max_usec': str(max(usecs)),
'avg_usec': f"{statistics.mean(usecs):.2f}",
'max_cycles': str(max(cycles)),
'avg_cycles': f"{statistics.mean(cycles):.2f}",
'max_pmu': str(max(pmu_vals)) if pmu_vals else "0",
'avg_pmu': f"{statistics.mean(pmu_vals):.2f}" if pmu_vals else "0.00",
# Numeric values for accurate sorting
'_sort_count': len(group_ops),
'_sort_max_usec': max(usecs),
'_sort_avg_usec': statistics.mean(usecs),
'_sort_max_cycles': max(cycles),
'_sort_avg_cycles': statistics.mean(cycles),
'_sort_max_pmu': max(pmu_vals) if pmu_vals else 0,
'_sort_avg_pmu': statistics.mean(pmu_vals) if pmu_vals else 0
})
# Sorting logic
actual_sort_key = COL_MAP[sort_col][2]
# We sort numeric fields descending, strings (op/dims) ascending
is_numeric = actual_sort_key.startswith("_") or actual_sort_key == "count"
sorted_groups = sorted(group_stats, key=lambda x: x[actual_sort_key], reverse=is_numeric)[:top_n]
# Define initial column order
active_cols = ["op", "dims", "dtypes"]
if pmu_name:
active_cols += ["max-pmu", "avg-pmu"]
active_cols += ["max-usec", "avg-usec", "max-cycles", "avg-cycles", "count"]
final_headers, final_keys, final_widths = [], [], []
for col_name in active_cols:
data_key, header_text, _ = COL_MAP[col_name]
if "pmu" in col_name and pmu_name:
header_text = header_text.replace("PMU", pmu_name)
natural_width = max([len(row[data_key]) for row in sorted_groups] + [len(header_text)])
target_width = width_overrides.get(col_name, natural_width)
if target_width == 0:
continue
final_headers.append(header_text)
final_keys.append(data_key)
final_widths.append(target_width)
# Print Report
logger.info(f"\n# Profile Report (Top {top_n} Ops sorted by {sort_col})\n")
header_line = "| " + " | ".join(f"{h:<{final_widths[i]}}" for i, h in enumerate(final_headers)) + " |"
sep_line = "| " + " | ".join("-" * final_widths[i] for i in range(len(final_headers))) + " |"
logger.info(header_line)
logger.info(sep_line)
for group in sorted_groups:
row_vals = []
for i, key in enumerate(final_keys):
val = group[key]
if len(val) > final_widths[i]:
val = val[:final_widths[i] - 3] + "..."
row_vals.append(f"{val:<{final_widths[i]}}")
logger.info("| " + " | ".join(row_vals) + " |")
def main():
parser = argparse.ArgumentParser(description="Post-process Op profile info.")
parser.add_argument("logfile")
parser.add_argument("-n", "--top", type=int, default=100)
parser.add_argument("--sort", type=str, default="max-usec", choices=list(COL_MAP.keys()))
parser.add_argument("--pmu-index", type=int)
parser.add_argument("--pmu-name", type=str)
parser.add_argument("--width", action='append', default=['dims:40'], help="Override column width, e.g. --width dims:50")
args = parser.parse_args()
logging.basicConfig(level=logging.INFO, format='%(message)s')
# Sort validation: can't sort by PMU if index isn't provided
if "pmu" in args.sort and args.pmu_index is None:
logger.error(f"Cannot sort by '{args.sort}' without --pmu-index.")
sys.exit(1)
overrides = {}
if args.width:
for w in args.width:
try:
name, val = w.split(':')
overrides[name.lower()] = int(val)
except ValueError:
logger.warning(f"Invalid width format '{w}'")
final_pmu_name = (args.pmu_name or f"#{args.pmu_index}") if args.pmu_index is not None else None
ops = parse_log(args.logfile, pmu_index=args.pmu_index)
generate_report(ops, args.top, overrides, args.sort, pmu_name=final_pmu_name)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,22 @@
Appium-Python-Client==5.2.4
attrs==25.4.0
certifi==2025.10.5
exceptiongroup==1.3.0
h11==0.16.0
idna==3.11
iniconfig==2.1.0
outcome==1.3.0.post0
packaging==25.0
pluggy==1.6.0
PySocks==1.7.1
pytest==8.4.2
selenium==4.36.0
sniffio==1.3.1
sortedcontainers==2.4.0
tomli==2.3.0
trio==0.31.0
trio-websocket==0.12.2
typing_extensions==4.15.0
urllib3==2.5.0
websocket-client==1.9.0
wsproto==1.2.0

View File

@@ -0,0 +1,401 @@
"""Run llama.cpp Hexagon Android tests in a single QDC Appium job.
Bundles test scripts into one artifact and submits a single QDC job:
1. run_bench_tests_posix.py — llama-cli and llama-bench on CPU / GPU / NPU
(from scripts/snapdragon/qdc/)
Results are written to $GITHUB_STEP_SUMMARY when set (GitHub Actions).
Prerequisites:
pip install /path/to/qualcomm_device_cloud_sdk*.whl
Required environment variables:
QDC_API_KEY API key from QDC UI -> Users -> Settings -> API Keys
Usage:
python run_qdc_jobs.py \\
--pkg-dir pkg-snapdragon/llama.cpp \\
--model-url https://.../Llama-3.2-1B-Instruct-Q4_0.gguf \\
--device SM8750
"""
from __future__ import annotations
import argparse
import logging
import os
import re
import shutil
import sys
import tempfile
import time
import xml.etree.ElementTree as ET
from dataclasses import dataclass, field
from pathlib import Path
from qualcomm_device_cloud_sdk.api import qdc_api # ty: ignore[unresolved-import]
from qualcomm_device_cloud_sdk.logging import configure_logging # ty: ignore[unresolved-import]
from qualcomm_device_cloud_sdk.models import ArtifactType, JobMode, JobState, JobSubmissionParameter, JobType, TestFramework # ty: ignore[unresolved-import]
configure_logging(level=logging.INFO, handlers=[logging.StreamHandler()])
log = logging.getLogger(__name__)
POLL_INTERVAL = 30
JOB_TIMEOUT = 3600
LOG_UPLOAD_TIMEOUT = 600
CAPACITY_TIMEOUT = 1800
CAPACITY_POLL = 60
MAX_CONCURRENT_JOBS = 5
TERMINAL_STATES = {JobState.COMPLETED, JobState.CANCELED}
NON_TERMINAL_STATES = {JobState.DISPATCHED, JobState.RUNNING, JobState.SETUP, JobState.SUBMITTED}
_SCRIPTS_DIR = Path(__file__).parent
_TESTS_DIR = _SCRIPTS_DIR / "tests"
_RUN_BENCH = _TESTS_DIR / "run_bench_tests_posix.py"
_RUN_BACKEND_OPS = _TESTS_DIR / "run_backend_ops_posix.py"
_UTILS = _TESTS_DIR / "utils.py"
_CONFTEST = _TESTS_DIR / "conftest.py"
_REQUIREMENTS = _SCRIPTS_DIR / "requirements.txt"
_PYTEST_LINE_RE = re.compile(
r"(?:[\w/]+\.py::)?(?:\w+::)?([\w\[\].-]+)\s+(PASSED|FAILED|ERROR|SKIPPED)"
)
_EXCLUDED_LOGS = {"qdc_android_whole_host-000.log", "qdc_kernel_host-000.log"}
_NON_TERMINAL_STATE_VALUES = {s.value for s in NON_TERMINAL_STATES}
@dataclass
class JobResult:
passed: bool
tests: dict[str, bool] = field(default_factory=dict)
raw_logs: dict[str, str] = field(default_factory=dict)
failure_details: dict[str, str] = field(default_factory=dict)
def build_artifact_zip(
pkg_dir: Path,
stage_dir: Path,
*,
test_mode: str = "bench",
model_url: str | None = None,
) -> Path:
"""Bundle everything into a single QDC artifact zip.
Zip structure (extracted by QDC to /qdc/appium/ on the runner):
llama_cpp_bundle/ installed package (adb pushed to /data/local/tmp/)
tests/
utils.py shared helpers (paths, run_adb_command, …)
conftest.py shared pytest fixtures (driver)
test_bench_posix.py bench + cli tests (<<MODEL_URL>> substituted)
AND/OR
test_backend_ops_posix.py test-backend-ops -b HTP0
requirements.txt
"""
shutil.copytree(pkg_dir, stage_dir / "llama_cpp_bundle")
tests_dir = stage_dir / "tests"
tests_dir.mkdir()
shutil.copy(_UTILS, tests_dir / "utils.py")
shutil.copy(_CONFTEST, tests_dir / "conftest.py")
if test_mode in ("bench", "all"):
assert model_url is not None, "--model-url is required for bench/all test modes"
(tests_dir / "test_bench_posix.py").write_text(
_RUN_BENCH.read_text().replace("<<MODEL_URL>>", model_url)
)
if test_mode in ("backend-ops", "all"):
shutil.copy(_RUN_BACKEND_OPS, tests_dir / "test_backend_ops_posix.py")
shutil.copy(_REQUIREMENTS, stage_dir / "requirements.txt")
(stage_dir / "pytest.ini").write_text("[pytest]\naddopts = --junitxml=results.xml\n")
zip_base = str(stage_dir / "artifact")
shutil.make_archive(zip_base, "zip", stage_dir)
return Path(f"{zip_base}.zip")
def wait_for_job(client, job_id: str, timeout: int) -> str:
elapsed = 0
while elapsed < timeout:
raw = qdc_api.get_job_status(client, job_id)
try:
status = JobState(raw)
except ValueError:
status = raw
if status in TERMINAL_STATES:
return raw.lower()
log.info("Job %s: %s", job_id, raw)
time.sleep(POLL_INTERVAL)
elapsed += POLL_INTERVAL
raise TimeoutError(f"Job {job_id} did not finish within {timeout}s")
def wait_for_log_upload(client, job_id: str) -> None:
elapsed = 0
while elapsed <= LOG_UPLOAD_TIMEOUT:
status = (qdc_api.get_job_log_upload_status(client, job_id) or "").lower()
if status in {"completed", "failed"}:
return
log.info("Waiting for log upload (status=%s) ...", status)
time.sleep(POLL_INTERVAL)
elapsed += POLL_INTERVAL
log.warning("Timed out waiting for log upload after %ds", LOG_UPLOAD_TIMEOUT)
def wait_for_capacity(client, max_jobs: int = MAX_CONCURRENT_JOBS) -> None:
"""Block until the user's active (non-terminal) QDC job count is below max_jobs."""
elapsed = 0
while elapsed < CAPACITY_TIMEOUT:
jobs_page = qdc_api.get_jobs_list(client, page_number=0, page_size=50)
if jobs_page is None:
log.warning("Could not retrieve job list; proceeding without capacity check")
return
items = getattr(jobs_page, "data", []) or []
active = sum(1 for j in items if getattr(j, "state", None) in _NON_TERMINAL_STATE_VALUES)
if active < max_jobs:
log.info("Active QDC jobs: %d / %d — proceeding", active, max_jobs)
return
log.info("Active QDC jobs: %d / %d — waiting %ds ...", active, max_jobs, CAPACITY_POLL)
time.sleep(CAPACITY_POLL)
elapsed += CAPACITY_POLL
log.warning("Capacity wait timed out after %ds; proceeding anyway", CAPACITY_TIMEOUT)
def _parse_junit_xml(content: str) -> tuple[dict[str, bool], dict[str, str]]:
try:
root = ET.fromstring(content)
except ET.ParseError:
return {}, {}
results: dict[str, bool] = {}
failures: dict[str, str] = {}
for tc in root.iter("testcase"):
name = tc.get("name", "")
if classname := tc.get("classname", ""):
name = f"{classname}.{name}"
failure_el = tc.find("failure")
if failure_el is None:
failure_el = tc.find("error")
results[name] = failure_el is None
if failure_el is not None:
parts = [failure_el.get("message", ""), failure_el.text or ""]
failures[name] = "\n".join(p for p in parts if p).strip()
return results, failures
def _parse_pytest_output(content: str) -> dict[str, bool]:
results: dict[str, bool] = {}
for m in _PYTEST_LINE_RE.finditer(content):
results[m.group(1)] = m.group(2) == "PASSED"
return results
def fetch_logs_and_parse_tests(
client, job_id: str
) -> tuple[dict[str, bool], dict[str, str], dict[str, str]]:
"""Returns (test_results, raw_logs, failure_details)."""
log_files = qdc_api.get_job_log_files(client, job_id)
if not log_files:
log.warning("No log files returned for job %s", job_id)
return {}, {}, {}
test_results: dict[str, bool] = {}
pytest_fallback: dict[str, bool] = {}
raw_logs: dict[str, str] = {}
failure_details: dict[str, str] = {}
with tempfile.TemporaryDirectory() as tmpdir:
for lf in log_files:
log.info("Downloading log file: %s", lf.filename)
zip_path = os.path.join(tmpdir, "log.zip")
qdc_api.download_job_log_files(client, lf.filename, zip_path)
try:
shutil.unpack_archive(zip_path, tmpdir, "zip")
except Exception as e:
log.warning("Could not unpack %s as zip: %s", lf.filename, e)
for root_dir, _, files in os.walk(tmpdir):
for fname in sorted(files):
fpath = os.path.join(root_dir, fname)
content = Path(fpath).read_text(errors="replace")
if fname.endswith(".xml"):
results, failures = _parse_junit_xml(content)
test_results.update(results)
failure_details.update(failures)
elif fname.endswith(".log"):
if fname in _EXCLUDED_LOGS:
continue
log.info("--- %s ---", fname)
log.info("%s", content)
raw_logs[fname] = content
pytest_fallback.update(_parse_pytest_output(content))
return (test_results if test_results else pytest_fallback), raw_logs, failure_details
def write_summary(result: JobResult, title: str = "QDC Test Results") -> None:
summary_path = os.environ.get("GITHUB_STEP_SUMMARY")
if not summary_path:
return
icon = "" if result.passed else ""
lines = [
f"## {title}\n",
f"Overall: {icon} {'PASSED' if result.passed else 'FAILED'}\n",
]
reportable = {n: ok for n, ok in result.tests.items() if "test_install" not in n}
if reportable:
lines += ["| Test | Result |", "| ---- | ------ |"]
for name, ok in reportable.items():
lines.append(f"| `{name}` | {'' if ok else ''} |")
passed_n = sum(1 for v in reportable.values() if v)
failed_n = sum(1 for v in reportable.values() if not v)
lines += ["", f"**{passed_n} passed, {failed_n} failed**"]
else:
lines.append("_No per-test data available._")
failed_names = [n for n, ok in reportable.items() if not ok]
if failed_names:
lines += ["", "### Failures"]
for name in failed_names:
detail = result.failure_details.get(name)
if detail:
lines += [
f"<details><summary><code>{name}</code></summary>",
"",
"```",
detail,
"```",
"",
"</details>",
]
if result.raw_logs:
lines += ["", "### Raw Logs"]
for fname, content in sorted(result.raw_logs.items()):
lines += [
f"<details><summary>{fname}</summary>",
"",
"```",
content.rstrip(),
"```",
"",
"</details>",
]
with open(summary_path, "a") as f:
f.write("\n".join(lines) + "\n")
def parse_args() -> argparse.Namespace:
p = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
p.add_argument("--pkg-dir", required=True, type=Path,
help="Installed llama.cpp package directory (contains bin/ and lib/)")
p.add_argument("--model-url",
help="Direct URL to the GGUF model file (required for --test bench)")
p.add_argument("--device", required=True,
help="QDC chipset name, e.g. SM8750")
p.add_argument("--test", choices=["bench", "backend-ops", "all"], default="bench",
help="Test suite to run (default: bench)")
p.add_argument("--job-timeout", type=int, default=JOB_TIMEOUT, metavar="SECONDS",
help=f"Max seconds to wait for job completion (default: {JOB_TIMEOUT})")
args = p.parse_args()
if args.test in ("bench", "all") and not args.model_url:
p.error("--model-url is required when --test bench or --test all")
return args
def main() -> int:
args = parse_args()
api_key = os.environ.get("QDC_API_KEY")
if not api_key:
log.error("QDC_API_KEY environment variable must be set")
return 1
if not args.pkg_dir.is_dir():
log.error("--pkg-dir %s does not exist", args.pkg_dir)
return 1
client = qdc_api.get_public_api_client_using_api_key(
api_key_header=api_key,
app_name_header="llama-cpp-ci",
on_behalf_of_header="llama-cpp-ci",
client_type_header="Python",
)
target_id = qdc_api.get_target_id(client, args.device)
if target_id is None:
log.error("Could not find QDC target for device %r", args.device)
return 1
with tempfile.TemporaryDirectory() as tmpdir:
log.info("Building artifact ...")
zip_path = build_artifact_zip(
args.pkg_dir, Path(tmpdir),
test_mode=args.test, model_url=args.model_url,
)
log.info("Uploading artifact (%d MB) ...", zip_path.stat().st_size // 1_000_000)
artifact_id = qdc_api.upload_file(client, str(zip_path), ArtifactType.TESTSCRIPT)
if artifact_id is None:
log.error("Artifact upload failed")
return 1
wait_for_capacity(client)
job_id = qdc_api.submit_job(
public_api_client=client,
target_id=target_id,
job_name="llama.cpp Hexagon tests",
external_job_id=None,
job_type=JobType.AUTOMATED,
job_mode=JobMode.APPLICATION,
timeout=max(1, args.job_timeout // 60),
test_framework=TestFramework.APPIUM,
entry_script=None,
job_artifacts=[artifact_id],
monkey_events=None,
monkey_session_timeout=None,
job_parameters=[JobSubmissionParameter.WIFIENABLED],
)
if job_id is None:
log.error("Job submission failed")
return 1
log.info("Job submitted: %s (device=%s)", job_id, args.device)
try:
job_status = wait_for_job(client, job_id, timeout=args.job_timeout)
except TimeoutError as e:
log.error("%s", e)
write_summary(JobResult(passed=False, tests={}), title=f"QDC Job Timed Out ({args.device})")
return 1
log.info("Job %s finished: %s", job_id, job_status)
wait_for_log_upload(client, job_id)
tests, raw_logs, failure_details = fetch_logs_and_parse_tests(client, job_id)
passed = job_status == JobState.COMPLETED.value.lower()
if tests:
passed = passed and all(tests.values())
if not passed:
log.error("Job did not complete successfully or tests failed (status=%s)", job_status)
result = JobResult(passed=passed, tests=tests, raw_logs=raw_logs, failure_details=failure_details)
if args.test == "backend-ops":
title = f"Backend Ops — HTP0 ({args.device})"
elif args.test == "all":
title = f"QDC Tests ({args.device})"
else:
title = f"QDC Test Results ({args.device})"
write_summary(result, title=title)
return 0 if passed else 1
if __name__ == "__main__":
sys.exit(main())

View File

@@ -0,0 +1,20 @@
"""Shared pytest fixtures for QDC on-device test runners."""
import os
import pytest
from appium import webdriver
from utils import options, write_qdc_log
@pytest.fixture(scope="session", autouse=True)
def driver():
return webdriver.Remote(command_executor="http://127.0.0.1:4723/wd/hub", options=options)
def pytest_sessionfinish(session, exitstatus):
xml_path = getattr(session.config.option, "xmlpath", None) or "results.xml"
if os.path.exists(xml_path):
with open(xml_path) as f:
write_qdc_log("results.xml", f.read())

View File

@@ -0,0 +1,41 @@
"""
On-device test-backend-ops runner for llama.cpp (HTP0 backend).
Executed by QDC's Appium test framework on the QDC runner.
The runner has ADB access to the allocated device.
"""
import os
import sys
import pytest
from utils import BIN_PATH, CMD_PREFIX, push_bundle_if_needed, run_adb_command, write_qdc_log
@pytest.fixture(scope="session", autouse=True)
def install(driver):
push_bundle_if_needed(f"{BIN_PATH}/test-backend-ops")
@pytest.mark.parametrize("type_a", ["mxfp4", "fp16", "q4_0"])
def test_backend_ops_htp0(type_a):
cmd = f"{CMD_PREFIX} GGML_HEXAGON_HOSTBUF=0 GGML_HEXAGON_EXPERIMENTAL=1 {BIN_PATH}/test-backend-ops -b HTP0 -o MUL_MAT"
if type_a == "q4_0":
cmd += r' -p "^(?=.*type_a=q4_0)(?!.*type_b=f32,m=576,n=512,k=576).*$"'
else:
cmd += f" -p type_a={type_a}"
result = run_adb_command(
cmd,
check=False,
)
write_qdc_log(f"backend_ops_{type_a}.log", result.stdout or "")
assert result.returncode == 0, f"test-backend-ops type_a={type_a} failed (exit {result.returncode})"
if __name__ == "__main__":
ret = pytest.main(["-s", "--junitxml=results.xml", os.path.realpath(__file__)])
if os.path.exists("results.xml"):
with open("results.xml") as f:
write_qdc_log("results.xml", f.read())
sys.exit(ret)

View File

@@ -0,0 +1,76 @@
"""
On-device bench and completion test runner for llama.cpp (CPU, GPU, NPU backends).
Executed by QDC's Appium test framework on the QDC runner.
The runner has ADB access to the allocated device.
Placeholders replaced at artifact creation time by run_qdc_jobs.py:
<<MODEL_URL>> Direct URL to the GGUF model file (downloaded on-device via curl)
"""
import os
import subprocess
import sys
import pytest
from utils import BIN_PATH, CMD_PREFIX, push_bundle_if_needed, run_adb_command, write_qdc_log
MODEL_PATH = "/data/local/tmp/model.gguf"
PROMPT = "What is the capital of France?"
CLI_OPTS = "--batch-size 128 -n 128 -no-cnv --seed 42"
@pytest.fixture(scope="session", autouse=True)
def install(driver):
push_bundle_if_needed(f"{BIN_PATH}/llama-cli")
# Skip model download if already present
check = subprocess.run(
["adb", "shell", f"ls {MODEL_PATH}"],
text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
)
if check.returncode != 0:
run_adb_command(f'curl -L -J --output {MODEL_PATH} "<<MODEL_URL>>"')
@pytest.mark.parametrize("device,extra_flags", [
pytest.param("none", "-ctk q8_0 -ctv q8_0", id="cpu"),
pytest.param("GPUOpenCL", "", id="gpu"),
pytest.param("HTP0", "-ctk q8_0 -ctv q8_0", id="npu"),
])
def test_llama_completion(device, extra_flags):
result = run_adb_command(
f'{CMD_PREFIX} {BIN_PATH}/llama-completion'
f' -m {MODEL_PATH} --device {device} -ngl 99 -t 4 {CLI_OPTS} {extra_flags} -fa on'
f' -p "{PROMPT}"',
check=False,
)
write_qdc_log(f"llama_completion_{device}.log", result.stdout or "")
assert result.returncode == 0, f"llama-completion {device} failed (exit {result.returncode})"
_DEVICE_LOG_NAME = {"none": "cpu", "GPUOpenCL": "gpu", "HTP0": "htp"}
@pytest.mark.parametrize("device", [
pytest.param("none", id="cpu"),
pytest.param("GPUOpenCL", id="gpu"),
pytest.param("HTP0", id="npu"),
])
def test_llama_bench(device):
result = run_adb_command(
f"{CMD_PREFIX} {BIN_PATH}/llama-bench"
f" -m {MODEL_PATH} --device {device} -ngl 99 --batch-size 128 -t 4 -p 128 -n 32",
check=False,
)
write_qdc_log(f"llama_bench_{_DEVICE_LOG_NAME[device]}.log", result.stdout or "")
assert result.returncode == 0, f"llama-bench {device} failed (exit {result.returncode})"
if __name__ == "__main__":
ret = pytest.main(["-s", "--junitxml=results.xml", os.path.realpath(__file__)])
if os.path.exists("results.xml"):
with open("results.xml") as f:
write_qdc_log("results.xml", f.read())
sys.exit(ret)

View File

@@ -0,0 +1,93 @@
"""Shared helpers for QDC on-device test runners."""
import logging
import os
import subprocess
import tempfile
from appium.options.common import AppiumOptions
log = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# On-device paths
# ---------------------------------------------------------------------------
BUNDLE_PATH = "/data/local/tmp/llama_cpp_bundle"
QDC_LOGS_PATH = "/data/local/tmp/QDC_logs"
LIB_PATH = f"{BUNDLE_PATH}/lib"
BIN_PATH = f"{BUNDLE_PATH}/bin"
ENV_PREFIX = (
f"export LD_LIBRARY_PATH={LIB_PATH} && "
f"export ADSP_LIBRARY_PATH={LIB_PATH} && "
f"chmod +x {BIN_PATH}/* &&"
)
CMD_PREFIX = f"cd {BUNDLE_PATH} && {ENV_PREFIX}"
# ---------------------------------------------------------------------------
# Appium session options
# ---------------------------------------------------------------------------
options = AppiumOptions()
options.set_capability("automationName", "UiAutomator2")
options.set_capability("platformName", "Android")
options.set_capability("deviceName", os.getenv("ANDROID_DEVICE_VERSION"))
# ---------------------------------------------------------------------------
# ADB helpers
# ---------------------------------------------------------------------------
def run_adb_command(cmd: str, *, check: bool = True) -> subprocess.CompletedProcess:
# Append exit-code sentinel because `adb shell` doesn't reliably propagate
# the on-device exit code (older ADB versions always return 0).
raw = subprocess.run(
["adb", "shell", f"{cmd}; echo __RC__:$?"],
text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
)
stdout = raw.stdout
returncode = raw.returncode
if stdout:
lines = stdout.rstrip("\n").split("\n")
if lines and lines[-1].startswith("__RC__:"):
try:
returncode = int(lines[-1][7:])
stdout = "\n".join(lines[:-1]) + "\n"
except ValueError:
pass
log.info("%s", stdout)
result = subprocess.CompletedProcess(raw.args, returncode, stdout=stdout)
if check:
assert returncode == 0, f"Command failed (exit {returncode})"
return result
def write_qdc_log(filename: str, content: str) -> None:
"""Push content as a log file to QDC_LOGS_PATH on the device for QDC log collection."""
subprocess.run(
["adb", "shell", f"mkdir -p {QDC_LOGS_PATH}"],
stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
)
with tempfile.NamedTemporaryFile(mode="w", suffix=".log", delete=False) as f:
f.write(content)
tmp_path = f.name
try:
subprocess.run(
["adb", "push", tmp_path, f"{QDC_LOGS_PATH}/{filename}"],
stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
)
finally:
os.unlink(tmp_path)
def push_bundle_if_needed(check_binary: str) -> None:
"""Push llama_cpp_bundle to the device if check_binary is not already present."""
result = subprocess.run(
["adb", "shell", f"ls {check_binary}"],
text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
)
if result.returncode != 0:
subprocess.run(
["adb", "push", "/qdc/appium/llama_cpp_bundle/", "/data/local/tmp"],
text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
)

View File

@@ -0,0 +1,48 @@
#!/usr/bin/env pwsh
# Basedir on device
$basedir=".\pkg-snapdragon"
$cli_opts=$args
$model="Llama-3.2-3B-Instruct-Q4_0.gguf"
if ($null -ne $env:M) {
$model=$env:M
}
$device="HTP0"
if ($null -ne $env:D) {
$device=$env:D
}
if ($null -ne $env:V) {
$env:GGML_HEXAGON_VERBOSE=$env:V
}
if ($null -ne $env:PROF) {
$env:GGML_HEXAGON_PROFILE=$env:PROF
}
if ($null -ne $env:OPSTAGE) {
$env:GGML_HEXAGON_OPSTAGE=$env:OPSTAGE
}
if ($null -ne $env:NHVX) {
$env:GGML_HEXAGON_NHVX=$env:NHVX
}
if ($null -ne $env:NDEV) {
$env:GGML_HEXAGON_NDEV=$env:NDEV
}
if ($null -ne $env:HB) {
$env:GGML_HEXAGON_HOSTBUF=$env:HB
}
$env:ADSP_LIBRARY_PATH="$basedir\lib"
& "$basedir\bin\llama-bench.exe" `
--mmap 0 -m $basedir\..\..\gguf\$model `
--poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 `
--batch-size 128 -ngl 99 --device $device $cli_opts

View File

@@ -0,0 +1,53 @@
#!/usr/bin/env pwsh
# Basedir on device
$basedir=".\pkg-snapdragon"
$cli_opts=$args
$model="Llama-3.2-3B-Instruct-Q4_0.gguf"
if ($null -ne $env:M) {
$model=$env:M
}
$device="HTP0"
if ($null -ne $env:D) {
$device=$env:D
}
if ($null -ne $env:V) {
$env:GGML_HEXAGON_VERBOSE=$env:V
}
if ($null -ne $env:SCHED) {
$env:GGML_SCHED_DEBUG=$env:SCHED; $cli_opts="$cli_opts -v"
}
if ($null -ne $env:PROF) {
$env:GGML_HEXAGON_PROFILE=$env:PROF
}
if ($null -ne $env:OPSTAGE) {
$env:GGML_HEXAGON_OPSTAGE=$env:OPSTAGE
}
if ($null -ne $env:NHVX) {
$env:GGML_HEXAGON_NHVX=$env:NHVX
}
if ($null -ne $env:NDEV) {
$env:GGML_HEXAGON_NDEV=$env:NDEV
}
if ($null -ne $env:HB) {
$env:GGML_HEXAGON_HOSTBUF=$env:HB
}
$env:ADSP_LIBRARY_PATH="$basedir\lib"
& "$basedir\bin\llama-cli.exe" `
--no-mmap -m $basedir\..\..\gguf\$model `
--poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 `
--ctx-size 8192 --ubatch-size 256 -fa on `
-ngl 99 --device $device $cli_opts

View File

@@ -0,0 +1,53 @@
#!/usr/bin/env pwsh
# Basedir on device
$basedir=".\pkg-snapdragon"
$cli_opts=$args
$model="Llama-3.2-3B-Instruct-Q4_0.gguf"
if ($null -ne $env:M) {
$model=$env:M
}
$device="HTP0"
if ($null -ne $env:D) {
$device=$env:D
}
if ($null -ne $env:V) {
$env:GGML_HEXAGON_VERBOSE=$env:V
}
if ($null -ne $env:SCHED) {
$env:GGML_SCHED_DEBUG=$env:SCHED; $cli_opts="$cli_opts -v"
}
if ($null -ne $env:PROF) {
$env:GGML_HEXAGON_PROFILE=$env:PROF
}
if ($null -ne $env:OPSTAGE) {
$env:GGML_HEXAGON_OPSTAGE=$env:OPSTAGE
}
if ($null -ne $env:NHVX) {
$env:GGML_HEXAGON_NHVX=$env:NHVX
}
if ($null -ne $env:NDEV) {
$env:GGML_HEXAGON_NDEV=$env:NDEV
}
if ($null -ne $env:HB) {
$env:GGML_HEXAGON_HOSTBUF=$env:HB
}
$env:ADSP_LIBRARY_PATH="$basedir\lib"
& "$basedir\bin\llama-completion.exe" `
--no-mmap -m $basedir\..\..\gguf\$model `
--poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 `
--ctx-size 8192 --batch-size 256 -fa on `
-ngl 99 -no-cnv --device $device $cli_opts

View File

@@ -0,0 +1,68 @@
#!/usr/bin/env pwsh
# Basedir on device
$basedir=".\pkg-snapdragon"
$cli_opts=$args
$model="gemma-3-4b-it-Q4_0.gguf"
if ($null -ne $env:M) {
$model=$env:M
}
$mmproj="mmproj-F16.gguf"
if ($null -ne $env:MMPROJ) {
$mmproj=$env:MMPROJ
}
$image=""
if ($null -ne $env:IMG) {
$image=$env:IMG
}
$device="HTP0"
if ($null -ne $env:D) {
$device=$env:D
}
if ($null -ne $env:V) {
$env:GGML_HEXAGON_VERBOSE=$env:V
}
if ($null -ne $env:SCHED) {
$env:GGML_SCHED_DEBUG=$env:SCHED; $cli_opts="$cli_opts -v"
}
if ($null -ne $env:PROF) {
$env:GGML_HEXAGON_PROFILE=$env:PROF
}
if ($null -ne $env:OPSTAGE) {
$env:GGML_HEXAGON_OPSTAGE=$env:OPSTAGE
}
if ($null -ne $env:NHVX) {
$env:GGML_HEXAGON_NHVX=$env:NHVX
}
if ($null -ne $env:NDEV) {
$env:GGML_HEXAGON_NDEV=$env:NDEV
}
if ($null -ne $env:HB) {
$env:GGML_HEXAGON_HOSTBUF=$env:HB
}
if ($null -ne $env:MTMD_DEVICE) {
$env:MTMD_BACKEND_DEVICE=$env:MTMD_DEVICE
}
$env:ADSP_LIBRARY_PATH="$basedir\lib"
& "$basedir\bin\llama-mtmd-cli.exe" `
--no-mmap -m $basedir\..\..\gguf\$model `
--mmproj $basedir\..\..\gguf\$mmproj `
--image $basedir\..\..\gguf\$image `
--poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 `
--ctx-size 8192 --ubatch-size 256 -fa on `
-ngl 99 --device $device -v $cli_opts

View File

@@ -0,0 +1,56 @@
#!/usr/bin/env pwsh
# Basedir on device
$basedir=".\pkg-snapdragon"
if ($args.Count -eq 0) {
Write-Host "No arguments provided.Expected the tool and argument to run."
exit -1
}
$tool=$args[0]
$cli_opts=@()
if ($args.Count -gt 1) {
$cli_opts=$args[1..($args.Count - 1)]
$remainingArgs = $args[1..($args.Count - 1)]
}
$device="HTP0"
if ($null -ne $env:D) {
$device=$env:D
}
if ($null -ne $env:V) {
$env:GGML_HEXAGON_VERBOSE=$env:V
}
if ($null -ne $env:SCHED) {
$env:GGML_SCHED_DEBUG=$env:SCHED; $cli_opts="$cli_opts -v"
}
if ($null -ne $env:PROF) {
$env:GGML_HEXAGON_PROFILE=$env:PROF
}
if ($null -ne $env:OPSTAGE) {
$env:GGML_HEXAGON_OPSTAGE=$env:OPSTAGE
}
if ($null -ne $env:NHVX) {
$env:GGML_HEXAGON_NHVX=$env:NHVX
}
if ($null -ne $env:NDEV) {
$env:GGML_HEXAGON_NDEV=$env:NDEV
}
if ($null -ne $env:HB) {
$env:GGML_HEXAGON_HOSTBUF=$env:HB
}
$env:ADSP_LIBRARY_PATH="$basedir\lib"
& "$basedir\bin\$tool" `
$cli_opts

View File

@@ -0,0 +1,105 @@
# Requires Run as Administrator is NOT strictly necessary for User-scope env vars,
# but recommended for creating directories in C:\ root if permissions are restricted.
$ErrorActionPreference = "Stop"
# --- Configuration ---
$BaseDir = "C:\Qualcomm"
# SDK 1: Hexagon
$HexagonUrl = "https://github.com/snapdragon-toolchain/hexagon-sdk/releases/download/v6.4.0.2/hexagon-sdk-v6.4.0.2-arm64-wos.tar.xz"
$HexagonParent = Join-Path $BaseDir "Hexagon_SDK"
$HexagonSdkVersion = "6.4.0.2"
$HexagonToolsVersion = "19.0.04"
$HexagonSdkTarget = Join-Path $HexagonParent $HexagonSdkVersion
$HexagonToolsTarget = Join-Path $HexagonSdkTarget "\tools\HEXAGON_Tools\$HexagonToolsVersion"
# SDK 2: OpenCL
$OpenCLUrl = "https://github.com/snapdragon-toolchain/opencl-sdk/releases/download/v2.3.2/adreno-opencl-sdk-v2.3.2-arm64-wos.tar.xz"
$OpenCLParent = Join-Path $BaseDir "OpenCL_SDK"
$OpenCLVersion = "2.3.2"
$OpenCLTarget = Join-Path $OpenCLParent $OpenCLVersion
# --- Helper Function ---
function Install-QualcommSDK {
param (
[string]$Url,
[string]$ParentDir,
[string]$TargetDir,
[string]$Name
)
# 1. Create Parent Directory
if (-not (Test-Path -Path $ParentDir)) {
Write-Host "Creating directory: $ParentDir" -ForegroundColor Cyan
New-Item -Path $ParentDir -ItemType Directory -Force | Out-Null
}
# 2. Check for Specific Version Directory
if (Test-Path -Path $TargetDir) {
Write-Host "$Name ($TargetDir) already exists. Skipping download." -ForegroundColor Green
}
else {
Write-Host "$Name not found. preparing to download..." -ForegroundColor Yellow
# Create the target directory to extract into
New-Item -Path $TargetDir -ItemType Directory -Force | Out-Null
# Define temporary archive path
$TempFile = Join-Path $ParentDir "temp_sdk.tar.xz"
try {
# Download
Write-Host "Downloading from: $Url"
Invoke-WebRequest -Uri $Url -OutFile $TempFile
# Untar
# Note: We assume Windows includes tar.exe (Win 10 build 17063+)
Write-Host "Extracting archive to $TargetDir..."
# We use -C to extract contents INTO the target directory created above
tar -xJvf $TempFile -C $TargetDir\..
Write-Host "Extraction complete." -ForegroundColor Green
}
catch {
Write-Error "Failed to download or extract $Name. Error: $_"
# Cleanup target dir if failed so script tries again next time
Remove-Item -Path $TargetDir -Recurse -Force -ErrorAction SilentlyContinue
}
finally {
# Cleanup Archive
if (Test-Path $TempFile) { Remove-Item $TempFile -Force }
}
}
}
# --- Execution ---
# 1. Ensure Base C:\Qualcomm exists
if (-not (Test-Path $BaseDir)) {
New-Item -Path $BaseDir -ItemType Directory -Force | Out-Null
}
# 2. Run Install Logic
Install-QualcommSDK -Url $HexagonUrl -ParentDir $HexagonParent -TargetDir $HexagonSdkTarget -Name "Hexagon SDK"
Install-QualcommSDK -Url $OpenCLUrl -ParentDir $OpenCLParent -TargetDir $OpenCLTarget -Name "OpenCL SDK"
# --- Environment Variables ---
Write-Host "`nSetting Environment Variables..." -ForegroundColor Cyan
# Set OPENCL_SDK_ROOT
[System.Environment]::SetEnvironmentVariable('OPENCL_SDK_ROOT', $OpenCLTarget, [System.EnvironmentVariableTarget]::User)
$env:OPENCL_SDK_ROOT = $OpenCLTarget # Set for current session as well
Write-Host "OPENCL_SDK_ROOT set to: $OpenCLTarget"
# Set HEXAGON_SDK_ROOT
[System.Environment]::SetEnvironmentVariable('HEXAGON_SDK_ROOT', $HexagonSdkTarget, [System.EnvironmentVariableTarget]::User)
$env:HEXAGON_SDK_ROOT = $HexagonSdkTarget # Set for current session as well
Write-Host "HEXAGON_SDK_ROOT set to: $HexagonSdkTarget"
# Set HEXAGON_SDK_ROOT
[System.Environment]::SetEnvironmentVariable('HEXAGON_TOOLS_ROOT', $HexagonToolsTarget, [System.EnvironmentVariableTarget]::User)
$env:HEXAGON_TOOLS_ROOT = $HexagonToolsTarget # Set for current session as well
Write-Host "HEXAGON_TOOLS_ROOT set to: $HexagonToolsTarget"

158
scripts/sync-ggml-am.sh Executable file
View File

@@ -0,0 +1,158 @@
#!/usr/bin/env bash
#
# Synchronize ggml changes to llama.cpp
#
# Usage:
#
# $ cd /path/to/llama.cpp
# $ ./scripts/sync-ggml-am.sh -skip hash0,hash1,hash2... -C 3
#
set -e
sd=$(dirname $0)
cd $sd/../
SRC_LLAMA=$(pwd)
SRC_GGML=$(cd ../ggml; pwd)
if [ ! -d $SRC_GGML ]; then
echo "ggml not found at $SRC_GGML"
exit 1
fi
lc=$(cat $SRC_LLAMA/scripts/sync-ggml.last)
echo "Syncing ggml changes since commit $lc"
to_skip=""
# context for git patches in number of lines
ctx="8"
while [ "$1" != "" ]; do
case $1 in
-skip )
shift
to_skip=$1
;;
-C )
shift
ctx=$1
;;
esac
shift
done
cd $SRC_GGML
git log --oneline $lc..HEAD
git log --oneline $lc..HEAD --reverse | grep -v "(llama/[0-9]*)" | cut -d' ' -f1 > $SRC_LLAMA/ggml-commits
if [ ! -s $SRC_LLAMA/ggml-commits ]; then
rm -v $SRC_LLAMA/ggml-commits
echo "No new commits"
exit 0
fi
if [ -f $SRC_LLAMA/ggml-src.patch ]; then
rm -v $SRC_LLAMA/ggml-src.patch
fi
while read c; do
if [ -n "$to_skip" ]; then
if [[ $to_skip == *"$c"* ]]; then
echo "Skipping $c"
continue
fi
fi
git format-patch -U${ctx} -k $c~1..$c --stdout -- \
CMakeLists.txt \
src/CMakeLists.txt \
cmake/BuildTypes.cmake \
cmake/GitVars.cmake \
cmake/common.cmake \
cmake/ggml-config.cmake.in \
src/ggml-cpu/cmake/FindSIMD.cmake \
src/ggml* \
include/ggml*.h \
include/gguf*.h \
tests/test-opt.cpp \
tests/test-quantize-fns.cpp \
tests/test-quantize-perf.cpp \
tests/test-backend-ops.cpp \
LICENSE \
scripts/gen-authors.sh \
>> $SRC_LLAMA/ggml-src.patch
done < $SRC_LLAMA/ggml-commits
rm -v $SRC_LLAMA/ggml-commits
# delete files if empty
if [ ! -s $SRC_LLAMA/ggml-src.patch ]; then
rm -v $SRC_LLAMA/ggml-src.patch
fi
cd $SRC_LLAMA
if [ -f $SRC_LLAMA/ggml-src.patch ]; then
# replace PR numbers
#
# Subject: some text (#1234)
# Subject: some text (ggml/1234)
cat ggml-src.patch | sed -e 's/^Subject: \(.*\) (#\([0-9]*\))/Subject: \1 (ggml\/\2)/' > ggml-src.patch.tmp
mv ggml-src.patch.tmp ggml-src.patch
cat ggml-src.patch | sed -e 's/^\(.*\) (#\([0-9]*\))$/\1 (ggml\/\2)/' > ggml-src.patch.tmp
mv ggml-src.patch.tmp ggml-src.patch
# replace filenames:
#
# CMakelists.txt -> ggml/CMakeLists.txt
# src/CMakeLists.txt -> ggml/src/CMakeLists.txt
# cmake/BuildTypes.cmake -> ggml/cmake/BuildTypes.cmake
# cmake/GitVars.cmake -> ggml/cmake/GitVars.cmake
# cmake/common.cmake -> ggml/cmake/common.cmake
# cmake/ggml-config.cmake.in -> ggml/cmake/ggml-config.cmake.in
# src/ggml-cpu/cmake/FindSIMD.cmake -> ggml/src/ggml-cpu/cmake/FindSIMD.cmake
#
# src/ggml* -> ggml/src/ggml*
#
# include/ggml*.h -> ggml/include/ggml*.h
# include/gguf*.h -> ggml/include/gguf*.h
#
# tests/test*.cpp -> tests/
#
# LICENSE -> LICENSE
# scripts/gen-authors.sh -> scripts/gen-authors.sh
cat ggml-src.patch | sed -E \
-e 's/([[:space:]]| [ab]\/)CMakeLists.txt/\1ggml\/CMakeLists.txt/g' \
-e 's/([[:space:]]| [ab]\/)src\/CMakeLists.txt/\1ggml\/src\/CMakeLists.txt/g' \
-e 's/([[:space:]]| [ab]\/)cmake\/BuildTypes.cmake/\1ggml\/cmake\/BuildTypes.cmake/g' \
-e 's/([[:space:]]| [ab]\/)cmake\/GitVars.cmake/\1ggml\/cmake\/GitVars.cmake/g' \
-e 's/([[:space:]]| [ab]\/)cmake\/common.cmake/\1ggml\/cmake\/common.cmake/g' \
-e 's/([[:space:]]| [ab]\/)cmake\/ggml-config.cmake.in/\1ggml\/cmake\/ggml-config.cmake.in/g' \
-e 's/([[:space:]]| [ab]\/)src\/ggml-cpu\/cmake\/FindSIMD.cmake/\1ggml\/src\/ggml-cpu\/cmake\/FindSIMD.cmake/g' \
-e 's/([[:space:]]| [ab]\/)src\/ggml(.*)/\1ggml\/src\/ggml\2/g' \
-e 's/([[:space:]]| [ab]\/)include\/ggml(.*)\.h/\1ggml\/include\/ggml\2.h/g' \
-e 's/([[:space:]]| [ab]\/)include\/gguf(.*)\.h/\1ggml\/include\/gguf\2.h/g' \
-e 's/([[:space:]]| [ab]\/)tests\/(.*)\.cpp/\1tests\/\2.cpp/g' \
-e 's/([[:space:]]| [ab]\/)LICENSE/\1LICENSE/g' \
-e 's/([[:space:]]| [ab]\/)scripts\/gen-authors\.sh/\1scripts\/gen-authors.sh/g' \
> ggml-src.patch.tmp
mv ggml-src.patch.tmp ggml-src.patch
git am -C${ctx} ggml-src.patch
rm -v $SRC_LLAMA/ggml-src.patch
fi
# update last commit
cd $SRC_GGML
git log -1 --format=%H > $SRC_LLAMA/scripts/sync-ggml.last
echo "Done"
exit 0

1
scripts/sync-ggml.last Normal file
View File

@@ -0,0 +1 @@
628249b398293fc8d2fa81a449ae2920a02c6523

20
scripts/sync-ggml.sh Executable file
View File

@@ -0,0 +1,20 @@
#!/usr/bin/env bash
cp -rpv ../ggml/CMakeLists.txt ./ggml/CMakeLists.txt
cp -rpv ../ggml/src/CMakeLists.txt ./ggml/src/CMakeLists.txt
cp -rpv ../ggml/cmake/* ./ggml/cmake/
cp -rpv ../ggml/src/ggml-cpu/cmake/* ./ggml/src/ggml-cpu/cmake/
cp -rpv ../ggml/src/ggml* ./ggml/src/
cp -rpv ../ggml/include/ggml*.h ./ggml/include/
cp -rpv ../ggml/include/gguf*.h ./ggml/include/
cp -rpv ../ggml/tests/test-opt.cpp ./tests/test-opt.cpp
cp -rpv ../ggml/tests/test-quantize-fns.cpp ./tests/test-quantize-fns.cpp
cp -rpv ../ggml/tests/test-quantize-perf.cpp ./tests/test-quantize-perf.cpp
cp -rpv ../ggml/tests/test-backend-ops.cpp ./tests/test-backend-ops.cpp
cp -rpv ../LICENSE ./LICENSE
cp -rpv ../ggml/scripts/gen-authors.sh ./scripts/gen-authors.sh

43
scripts/sync_vendor.py Executable file
View File

@@ -0,0 +1,43 @@
#!/usr/bin/env python3
import urllib.request
import os
import sys
import subprocess
HTTPLIB_VERSION = "refs/tags/v0.44.0"
vendor = {
"https://github.com/nlohmann/json/releases/latest/download/json.hpp": "vendor/nlohmann/json.hpp",
"https://github.com/nlohmann/json/releases/latest/download/json_fwd.hpp": "vendor/nlohmann/json_fwd.hpp",
"https://raw.githubusercontent.com/nothings/stb/refs/heads/master/stb_image.h": "vendor/stb/stb_image.h",
# not using latest tag to avoid this issue: https://github.com/ggml-org/llama.cpp/pull/17179#discussion_r2515877926
# "https://github.com/mackron/miniaudio/raw/refs/tags/0.11.24/miniaudio.h": "vendor/miniaudio/miniaudio.h",
"https://github.com/mackron/miniaudio/raw/9634bedb5b5a2ca38c1ee7108a9358a4e233f14d/miniaudio.h": "vendor/miniaudio/miniaudio.h",
f"https://raw.githubusercontent.com/yhirose/cpp-httplib/{HTTPLIB_VERSION}/httplib.h": "httplib.h",
f"https://raw.githubusercontent.com/yhirose/cpp-httplib/{HTTPLIB_VERSION}/split.py": "split.py",
f"https://raw.githubusercontent.com/yhirose/cpp-httplib/{HTTPLIB_VERSION}/LICENSE": "vendor/cpp-httplib/LICENSE",
"https://raw.githubusercontent.com/sheredom/subprocess.h/b49c56e9fe214488493021017bf3954b91c7c1f5/subprocess.h": "vendor/sheredom/subprocess.h",
}
for url, filename in vendor.items():
print(f"downloading {url} to {filename}") # noqa: NP100
urllib.request.urlretrieve(url, filename)
print("Splitting httplib.h...") # noqa: NP100
try:
subprocess.check_call([
sys.executable, "split.py",
"--extension", "cpp",
"--out", "vendor/cpp-httplib"
])
except Exception as e:
print(f"Error: {e}") # noqa: NP100
sys.exit(1)
finally:
os.remove("split.py")
os.remove("httplib.h")

379
scripts/tool_bench.py Executable file
View File

@@ -0,0 +1,379 @@
#!/usr/bin/env uv run
'''
Simplistic tool call benchmarks for llama-server and ollama.
Essentially runs the tests at server/tools/server/tests/unit/test_tool_call.py N times, at different temperatures and on different backends (current llama-server, baseline llama-server and ollama),
and plots the results of multiple runs (from same .jsonl file or multiple ones) as a success rate heatmap.
Simple usage example:
cmake -B build && cmake --build build --config Release -j -t llama-server
export LLAMA_SERVER_BIN_PATH=$PWD/build/bin/llama-server
export LLAMA_CACHE=${LLAMA_CACHE:-$HOME/Library/Caches/llama.cpp}
./scripts/tool_bench.py run --n 10 --temp -1 --temp 0 --temp 1 --temp 2 --temp 5 --llama-baseline $PWD/buildMaster/bin/llama-server --output qwen14b.jsonl --hf bartowski/Qwen2.5-14B-Instruct-GGUF:Q4_K_L
./scripts/tool_bench.py run --n 30 --temp -1 --temp 0 --temp 1 --model "Qwen 2.5 1.5B Q4_K_M" --output qwen1.5b.jsonl --hf bartowski/Qwen2.5-1.5B-Instruct-GGUF --ollama qwen2.5:1.5b-instruct-q4_K_M
./scripts/tool_bench.py run --n 30 --temp -1 --temp 0 --temp 1 --model "Qwen 2.5 Coder 7B Q4_K_M" --output qwenc7b.jsonl --hf bartowski/Qwen2.5-Coder-7B-Instruct-GGUF --ollama qwen2.5-coder:7b
./scripts/tool_bench.py plot *.jsonl # Opens window w/ heatmap
./scripts/tool_bench.py plot qwen*.jsonl --output qwen.png # Saves heatmap to qwen.png
(please see ./scripts/tool_bench.sh for a more complete example)
'''
# /// script
# requires-python = ">=3.10"
# dependencies = [
# "pytest",
# "pandas",
# "matplotlib",
# "seaborn",
# "requests",
# "wget",
# "typer",
# ]
# ///
from contextlib import contextmanager
from pathlib import Path
import re
from statistics import mean, median
from typing import Annotated, Dict, List, Optional, Tuple
import atexit
import json
import logging
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import subprocess
import sys
import time
import typer
sys.path.insert(0, Path(__file__).parent.parent.as_posix())
if True:
from tools.server.tests.utils import ServerProcess
from tools.server.tests.unit.test_tool_call import do_test_calc_result, do_test_hello_world, do_test_weather
@contextmanager
def scoped_server(sp: ServerProcess):
def stop():
nonlocal sp
if sp is not None:
sp.stop()
sp = None # type: ignore
atexit.register(stop)
yield sp
stop()
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
app = typer.Typer()
@app.command()
def plot(files: List[Path], output: Optional[Path] = None, test_regex: Optional[str] = None, server_regex: Optional[str] = None):
lines: List[Dict] = []
for file in files:
if not file.exists():
logger.error(f"File not found: {file}")
continue
try:
with file.open() as f:
raw_data = f.read()
logger.info(f"Reading {file} ({len(raw_data)} bytes)")
for line_num, line in enumerate(raw_data.split('\n'), 1):
line = line.strip()
if not line:
continue
try:
record = json.loads(line)
lines.append(record)
except json.JSONDecodeError as e:
logger.warning(f"Invalid JSON at {file}:{line_num} - {e}")
except Exception as e:
logger.error(f"Error processing {file}: {e}")
if not lines:
raise Exception("No valid data was loaded")
data_dict: Dict[Tuple, float] = {}
models: List[str] = []
temps = set()
tests = set()
server_names = set()
total_counts = set()
for rec in lines:
try:
model = rec["model"]
temp = rec["temp"]
server_name = rec["server_name"]
test = rec["test"]
success = rec["success_ratio"]
success_count = rec["success_count"]
failure_count = rec["failure_count"]
total_count = success_count + failure_count
total_counts.add(total_count)
if test_regex and not re.search(test_regex, test):
continue
if server_regex and not re.search(server_regex, server_name):
continue
data_dict[(model, temp, server_name, test)] = success
if model not in models:
models.append(model)
temps.add(temp)
tests.add(test)
server_names.add(server_name)
except KeyError as e:
logger.warning(f"Missing required field in record: {e}")
if len(total_counts) > 1:
logger.warning(f"Total counts are not consistent: {total_counts}")
# Sort the collected values
temps = list(sorted(temps, key=lambda x: x if x is not None else -1))
tests = list(sorted(tests))
server_names = list(sorted(server_names))
logger.info(f"Processed {len(lines)} lines")
logger.info(f"Found {len(data_dict)} valid data points")
logger.info(f"Models: {models}")
logger.info(f"Temperatures: {temps}")
logger.info(f"Tests: {tests}")
logger.info(f"Servers: {server_names}")
matrix: list[list[float]] = []
index: list[str] = []
all_cols = [
(server_name, test)
for server_name in server_names
for test in tests
]
for model in models:
for temp in temps:
index.append(f"{model} @ {temp}")
row_vals = [
data_dict.get((model, temp, server_name, test), np.nan)
for server_name, test in all_cols
]
matrix.append(row_vals)
columns: list[str] = [f"{server_name}\n{test}" for server_name, test in all_cols]
df = pd.DataFrame(matrix, index=np.array(index), columns=np.array(columns))
plt.figure(figsize=(12, 6))
sns.heatmap(
df, annot=True, cmap="RdYlGn", vmin=0.0, vmax=1.0, cbar=True, fmt=".2f", center=0.5, square=True, linewidths=0.5,
cbar_kws={"label": "Success Ratio"},
)
plt.title(f"Tool Call Bench (n = {str(min(total_counts)) if len(total_counts) == 1 else f'{min(total_counts)}-{max(total_counts)}'})\nSuccess Ratios by Server & Test", pad=20)
plt.xlabel("Server & Test", labelpad=10)
plt.ylabel("Model @ Temperature", labelpad=10)
plt.xticks(rotation=45, ha='right')
plt.yticks(rotation=0)
plt.tight_layout()
if output:
plt.savefig(output, dpi=300, bbox_inches='tight')
logger.info(f"Plot saved to {output}")
else:
plt.show()
@app.command()
def run(
output: Annotated[Path, typer.Option(help="Output JSON file")],
model: Annotated[Optional[str], typer.Option(help="Name of the model to test (server agnostic)")] = None,
hf: Annotated[Optional[str], typer.Option(help="GGUF huggingface model repo id (+ optional quant) to test w/ llama-server")] = None,
chat_template: Annotated[Optional[str], typer.Option(help="Chat template override for llama-server")] = None,
chat_template_file: Annotated[Optional[str], typer.Option(help="Chat template file override for llama-server")] = None,
ollama: Annotated[Optional[str], typer.Option(help="Ollama model tag to test")] = None,
llama_baseline: Annotated[Optional[str], typer.Option(help="llama-server baseline binary path to use as baseline")] = None,
n: Annotated[int, typer.Option(help="Number of times to run each test")] = 10,
temp: Annotated[Optional[List[float]], typer.Option(help="Set of temperatures to test")] = None,
top_p: Annotated[Optional[float], typer.Option(help="top_p")] = None,
top_k: Annotated[Optional[int], typer.Option(help="top_k")] = None,
ctk: Annotated[Optional[str], typer.Option(help="ctk")] = None,
ctv: Annotated[Optional[str], typer.Option(help="ctv")] = None,
fa: Annotated[Optional[bool], typer.Option(help="fa")] = None,
seed: Annotated[Optional[int], typer.Option(help="Random seed")] = None,
port: Annotated[int, typer.Option(help="llama-server port")] = 8084,
force: Annotated[bool, typer.Option(help="Force overwrite of output file")] = False,
append: Annotated[bool, typer.Option(help="Append to output file")] = False,
test_hello_world: Annotated[bool, typer.Option(help="Whether to run the hello world test")] = True,
test_weather: Annotated[bool, typer.Option(help="Whether to run the weather test")] = True,
test_calc_result: Annotated[bool, typer.Option(help="Whether to run the calc result test")] = False,
):
# Check only one of output and append
n_predict = 512 # High because of DeepSeek R1
# n_ctx = 8192
n_ctx = 2048
if model is None:
if hf is not None:
model = hf.split("/")[-1]
elif ollama is not None:
model = ollama
assert force or append or not output.exists(), f"Output file already exists: {output}; use --force to overwrite"
with output.open('a' if append else 'w') as output_file:
def run(server: ServerProcess, *, server_name: str, model_id: str, temp: Optional[float] = None, output_kwargs={}, request_kwargs={}):
request_kwargs = {**request_kwargs}
if temp is not None:
request_kwargs['temperature'] = temp
if top_p is not None:
request_kwargs['top_p'] = top_p
if top_k is not None:
request_kwargs['top_k'] = top_k
if seed is not None:
request_kwargs['seed'] = seed
request_kwargs['cache_prompt'] = False
tests = {}
if test_hello_world:
tests["hello world"] = lambda server: do_test_hello_world(server, **request_kwargs)
if test_weather:
tests["weather"] = lambda server: do_test_weather(server, **request_kwargs)
if test_calc_result:
tests["calc result"] = lambda server: do_test_calc_result(server, None, 512, **request_kwargs)
for test_name, test in tests.items():
success_count = 0
failure_count = 0
failures = []
success_times = []
failure_times = []
logger.info(f"Running {test_name} ({server_name}, {model}): ")
for i in range(n):
start_time = time.time()
def elapsed():
return time.time() - start_time
try:
test(server)
success_times.append(elapsed())
success_count += 1
logger.info('success')
except Exception as e:
logger.error(f'failure: {e}')
failure_count += 1
failure_times.append(elapsed())
failures.append(str(e))
# import traceback
# traceback.print_exc()
output_file.write(json.dumps({**output_kwargs, **dict(
model=model,
server_name=server_name,
model_id=model_id,
test=test_name,
temp=t,
top_p=top_p,
top_k=top_k,
ctk=ctk,
ctv=ctv,
seed=seed,
success_ratio=float(success_count) / n,
avg_time=mean(success_times + failure_times),
median_time=median(success_times + failure_times),
success_count=success_count,
success_times=success_times,
failure_count=failure_count,
failure_times=failure_times,
failures=list(set(failures)),
)}) + '\n')
output_file.flush()
for t in [None] if temp is None else [t if t >= 0 else None for t in temp]:
if hf is not None:
servers: list[Tuple[str, Optional[str]]] = [('llama-server', None)]
if llama_baseline is not None:
servers.append(('llama-server (baseline)', llama_baseline))
for server_name, server_path in servers:
server = ServerProcess()
server.n_ctx = n_ctx
server.n_slots = 1
server.jinja = True
server.ctk = ctk
server.ctv = ctv
server.fa = "on" if fa else "off"
server.n_predict = n_predict
server.model_hf_repo = hf
server.model_hf_file = None
server.chat_template = chat_template
server.chat_template_file = chat_template_file
server.server_path = server_path
if port is not None:
server.server_port = port
# server.debug = True
with scoped_server(server):
server.start(timeout_seconds=15 * 60)
for ignore_chat_grammar in [False]:
run(
server,
server_name=server_name,
model_id=hf,
temp=t,
output_kwargs=dict(
chat_template=chat_template,
chat_template_file=chat_template_file,
),
request_kwargs=dict(
ignore_chat_grammar=ignore_chat_grammar,
),
)
if ollama is not None:
server = ServerProcess()
server.server_port = 11434
server.server_host = "localhost"
subprocess.check_call(["ollama", "pull", ollama])
with scoped_server(server):
run(
server,
server_name="ollama",
model_id=ollama,
temp=t,
output_kwargs=dict(
chat_template=None,
chat_template_file=None,
),
request_kwargs=dict(
model=ollama,
max_tokens=n_predict,
num_ctx = n_ctx,
),
)
if __name__ == "__main__":
app()

66
scripts/tool_bench.sh Executable file
View File

@@ -0,0 +1,66 @@
#!/usr/bin/env bash
set -euo pipefail
cmake --build build -j
export LLAMA_CACHE=${LLAMA_CACHE:-$HOME/Library/Caches/llama.cpp}
export LLAMA_SERVER_BIN_PATH=$PWD/build/bin/llama-server
if [ ! -x "$LLAMA_SERVER_BIN_PATH" ]; then
echo "Could not find llama-server binary at $LLAMA_SERVER_BIN_PATH"
exit 1
fi
if [ ! -d "$LLAMA_CACHE" ]; then
echo "Could not find llama cache at $LLAMA_CACHE, please set LLAMA_CACHE explicitly."
exit 1
fi
export ARGS=(
--llama-baseline="$(which llama-server)"
--n 30
--temp -1 # Leaves temperature parameter unset (use the server's default, e.g. 0.6 for ollama)
--temp 0
--temp 0.5
--temp 0.75
--temp 1
--temp 1.5
--temp 2
--temp 5
"$@"
)
./scripts/tool_bench.py run ${ARGS[@]} --model "Qwen 2.5 Coder 0.5B Q4_K_M" --output ../qwenc0.5b.jsonl --hf bartowski/Qwen2.5-Coder-0.5B-Instruct-GGUF:Q4_K_M --ollama qwen2.5-coder:0.5b-instruct-q4_K_M
./scripts/tool_bench.py run ${ARGS[@]} --model "Qwen 2.5 Coder 1.5B Q4_K_M" --output ../qwenc1.5b.jsonl --hf bartowski/Qwen2.5-Coder-1.5B-Instruct-GGUF:Q4_K_M --ollama qwen2.5-coder:1.5b-instruct-q4_K_M
./scripts/tool_bench.py run ${ARGS[@]} --model "Qwen 2.5 Coder 3B Q4_K_M" --output ../qwenc3b.jsonl --hf bartowski/Qwen2.5-Coder-3B-Instruct-GGUF:Q4_K_M --ollama qwen2.5-coder:3b-instruct-q4_K_M
./scripts/tool_bench.py run ${ARGS[@]} --model "Qwen 2.5 Coder 7B Q4_K_M" --output ../qwenc7b.jsonl --hf bartowski/Qwen2.5-Coder-7B-Instruct-GGUF:Q4_K_M --ollama qwen2.5-coder:7b-instruct-q4_K_M
./scripts/tool_bench.py run ${ARGS[@]} --model "Qwen 2.5 Coder 32B Q4_K_M" --output ../qwenc32b.jsonl --hf bartowski/Qwen2.5-Coder-32B-Instruct-GGUF:Q4_K_M --ollama qwen2.5-coder:32B-instruct-q4_K_M
./scripts/tool_bench.py run ${ARGS[@]} --model "Qwen 2.5 1.5B Q4_K_M" --output ../qwen1.5b.jsonl --hf bartowski/Qwen2.5-1.5B-Instruct-GGUF:Q4_K_M --ollama qwen2.5:1.5b-instruct-q4_K_M
./scripts/tool_bench.py run ${ARGS[@]} --model "Qwen 2.5 3B Q4_K_M" --output ../qwen3b.jsonl --hf bartowski/Qwen2.5-3B-Instruct-GGUF:Q4_K_M --ollama qwen2.5:3b-instruct-q4_K_M
./scripts/tool_bench.py run ${ARGS[@]} --model "Qwen 2.5 7B Q4_K_M" --output ../qwen7b.jsonl --hf bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M --ollama qwen2.5:7b-instruct-q4_K_M
./scripts/tool_bench.py run ${ARGS[@]} --model "Llama 3.2 Instruct 1B Q4_K_M" --output ../llama1b.jsonl --hf bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M --ollama llama3.2:1b-instruct-q4_K_M
./scripts/tool_bench.py run ${ARGS[@]} --model "Llama 3.2 Instruct 3B Q4_K_M" --output ../llama3b.jsonl --hf bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M --ollama llama3.2:3b-instruct-q4_K_M
./scripts/tool_bench.py run ${ARGS[@]} --model "Llama 3.1 Instruct 8B Q4_K_M" --output ../llama8b.jsonl --hf bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M --ollama llama3.1:8b-instruct-q4_K_M
./scripts/tool_bench.py run ${ARGS[@]} --model "Llama 3.3 70B Q4_K_M" --output ../llama70b.jsonl --hf bartowski/Llama-3.3-70B-Instruct-GGUF:Q4_K_M
./scripts/tool_bench.py run ${ARGS[@]} --model "Mistral Nemo Q4_K_M" --output ../nemo.jsonl --hf bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M --ollama mistral-nemo:12b-instruct-2407-q4_K_M
./scripts/tool_bench.py run ${ARGS[@]} --model "Hermes 3 Llama 3.1 8B Q4_K_M" --output ../hermes3.jsonl --hf bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M --ollama hermes3:8b-llama3.1-q4_K_M --chat-template-file <( python scripts/get_chat_template.py NousResearch/Hermes-3-Llama-3.1-8B tool_use )
./scripts/tool_bench.py run ${ARGS[@]} --model "Hermes 2 Pro Llama 3 8B Q4_K_M" --output ../hermes2.jsonl --hf bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M --ollama hermes2:8b-llama3-q4_K_M --chat-template-file <( python scripts/get_chat_template.py NousResearch/Hermes-2-Pro-Llama-3-8B tool_use )
./scripts/tool_bench.py run ${ARGS[@]} --model "Functionary Small V3.2 Q4_K_M" --output ../funct3.2.jsonl --hf bartowski/functionary-small-v3.2-GGUF:Q4_K_M
./scripts/tool_bench.py run ${ARGS[@]} --model "FireFunction V2 IQ1_M" --output ../firef2.jsonl --hf bartowski/firefunction-v2-GGUF:IQ1_M --chat-template-file <( python scripts/get_chat_template.py fireworks-ai/llama-3-firefunction-v2 tool_use )
./scripts/tool_bench.py run ${ARGS[@]} --model "Command R7B 12-2024 Q6_K_L" --output ../c4ai.jsonl --hf bartowski/c4ai-command-r7b-12-2024-GGUF:Q6_K_L --chat-template-file <( python scripts/get_chat_template.py CohereForAI/c4ai-command-r7b-12-2024 tool_use )
./scripts/tool_bench.py run ${ARGS[@]} --model "Gemma 2 2B Q8_0" --output ../gemma2.jsonl --hf bartowski/gemma-2-2b-it-GGUF:Q8_0
./scripts/tool_bench.py run ${ARGS[@]} --model "Phi 4 Instruct Q4_K_M" --output ../phi4.jsonl --hf bartowski/phi-4-GGUF:Q4_K_M # --ollama phi4
./scripts/tool_bench.py run ${ARGS[@]} --model "Phi 3.5 Mini Instruct Q4_K_M" --output ../phi3.5.jsonl --hf bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M # --ollama phi3.5:3.8b-mini-instruct-q4_K_M
# ./scripts/tool_bench.py run ${ARGS[@]} --model "DeepSeek R1 Distill Qwen 7B Q6_K_L" --output ../dsqw7.jsonl --hf bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q6_K_L --chat-template-file <( python scripts/get_chat_template.py NousResearch/DeepSeek-R1-Distill-Qwen-7B tool_use )
# ./scripts/tool_bench.py run ${ARGS[@]} --model "DeepSeek R1 Distill Qwen 32B Q4_K_M" --output ../dsqw32.jsonl --hf bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q4_K_M --chat-template-file <( python scripts/get_chat_template.py NousResearch/DeepSeek-R1-Distill-Qwen-32B tool_use )
for f in ../*.jsonl; do
./scripts/tool_bench.py plot "$f" --output ${f%.jsonl}.png || true
done

View File

@@ -0,0 +1,84 @@
#!/usr/bin/env python3
import logging
import os
import hashlib
logger = logging.getLogger("verify-checksum-models")
def sha256sum(file):
block_size = 16 * 1024 * 1024 # 16 MB block size
b = bytearray(block_size)
file_hash = hashlib.sha256()
mv = memoryview(b)
with open(file, 'rb', buffering=0) as f:
while True:
n = f.readinto(mv)
if not n:
break
file_hash.update(mv[:n])
return file_hash.hexdigest()
# Define the path to the llama directory (parent folder of script directory)
llama_path = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir))
# Define the file with the list of hashes and filenames
hash_list_file = os.path.join(llama_path, "SHA256SUMS")
# Check if the hash list file exists
if not os.path.exists(hash_list_file):
logger.error(f"Hash list file not found: {hash_list_file}")
exit(1)
# Read the hash file content and split it into an array of lines
with open(hash_list_file, "r") as f:
hash_list = f.read().splitlines()
# Create an array to store the results
results = []
# Loop over each line in the hash list
for line in hash_list:
# Split the line into hash and filename
hash_value, filename = line.split(" ")
# Get the full path of the file by joining the llama path and the filename
file_path = os.path.join(llama_path, filename)
# Informing user of the progress of the integrity check
logger.info(f"Verifying the checksum of {file_path}")
# Check if the file exists
if os.path.exists(file_path):
# Calculate the SHA256 checksum of the file using hashlib
file_hash = sha256sum(file_path)
# Compare the file hash with the expected hash
if file_hash == hash_value:
valid_checksum = "V"
file_missing = ""
else:
valid_checksum = ""
file_missing = ""
else:
valid_checksum = ""
file_missing = "X"
# Add the results to the array
results.append({
"filename": filename,
"valid checksum": valid_checksum,
"file missing": file_missing
})
# Print column headers for results table
print("filename".ljust(40) + "valid checksum".center(20) + "file missing".center(20)) # noqa: NP100
print("-" * 80) # noqa: NP100
# Output the results as a table
for r in results:
print(f"{r['filename']:40} {r['valid checksum']:^20} {r['file missing']:^20}") # noqa: NP100

58
scripts/wc2wt.sh Executable file
View File

@@ -0,0 +1,58 @@
#!/usr/bin/env bash
# initialize a new worktree from a branch name:
#
# - creates a new branch from current HEAD
# - creates a new worktree in a parent folder, suffixed with the branch name
#
# sample usage:
# ./scripts/wc2wt.sh gg/new-feature-foo-bar
# ./scripts/wc2wt.sh gg/new-feature-foo-bar opencode
# ./scripts/wc2wt.sh gg/new-feature-foo-bar "cmake -B build && cmake --build build"
# ./scripts/wc2wt.sh gg/new-feature-foo-bar "bash -l"
function usage() {
echo "usage: $0 <branch_name> [cmd]"
exit 1
}
# check we are in the right directory
if [[ ! -f "scripts/wc2wt.sh" ]]; then
echo "error: this script must be run from the root of the repository"
exit 1
fi
if [[ $# -lt 1 || $# -gt 2 ]]; then
usage
fi
BRANCH=$1
if [[ -z "$BRANCH" ]]; then
echo "error: branch name must not be empty"
exit 1
fi
dir=$(basename $(pwd))
# sanitize branch name for directory name (replace / with -)
dir_suffix=$(echo "$BRANCH" | tr '/' '-')
git worktree add -b "$BRANCH" "../$dir-$dir_suffix" HEAD
og_path=$(pwd)
wt_path=$(cd "../$dir-$dir_suffix" && pwd)
echo "git worktree created in $wt_path"
cd "$wt_path"
# pi agent setup in the worktree
if [[ -f "$og_path/.pi/SYSTEM.md" && ! -f ".pi/SYSTEM.md" ]]; then
mkdir -p .pi
ln -sfn "$og_path/.pi/SYSTEM.md" .pi/SYSTEM.md
fi
if [[ $# -eq 2 ]]; then
echo "executing: $2"
eval "$2"
fi

16
scripts/xxd.cmake Normal file
View File

@@ -0,0 +1,16 @@
# CMake equivalent of `xxd -i ${INPUT} ${OUTPUT}`
# Usage: cmake -DINPUT=tools/server/public/index.html -DOUTPUT=tools/server/index.html.hpp -P scripts/xxd.cmake
SET(INPUT "" CACHE STRING "Input File")
SET(OUTPUT "" CACHE STRING "Output File")
get_filename_component(filename "${INPUT}" NAME)
string(REGEX REPLACE "\\.|-" "_" name "${filename}")
file(READ "${INPUT}" hex_data HEX)
string(REGEX REPLACE "([0-9a-f][0-9a-f])" "0x\\1," hex_sequence "${hex_data}")
string(LENGTH ${hex_data} hex_len)
math(EXPR len "${hex_len} / 2")
file(WRITE "${OUTPUT}" "unsigned char ${name}[] = {${hex_sequence}};\nunsigned int ${name}_len = ${len};\n")