diff --git a/.github/workflows/continuous.yaml b/.github/workflows/continuous.yaml index bd8cf1aa..f532ae2c 100644 --- a/.github/workflows/continuous.yaml +++ b/.github/workflows/continuous.yaml @@ -19,6 +19,7 @@ jobs: #################### Unix: + if: false # DIAGNOSTIC: temporarily disabled to iterate fast on Windows ARM64 Debug Eigen alignment investigation name: ${{ matrix.os }} (${{ matrix.compiler }}, ${{ matrix.config }}, ${{ matrix.sanitizer }}Sanitizer) runs-on: ${{ matrix.os }} strategy: @@ -195,14 +196,17 @@ jobs: #################### Windows: - name: windows-2025 (${{ matrix.config }}) - runs-on: windows-2025 + name: ${{ matrix.os }} (${{ matrix.config }}) + runs-on: ${{ matrix.os }} env: SCCACHE_GHA_ENABLED: "true" strategy: fail-fast: false matrix: - config: [Release, Debug] + # DIAGNOSTIC: only the failing config (Win ARM64 Debug) while we hunt the + # Eigen alignment issue in serialization2. + os: [windows-11-arm] + config: [Debug] steps: - name: Show disk space run: Get-PSDrive @@ -214,9 +218,26 @@ jobs: fetch-depth: 0 - uses: actions/setup-python@v5 + if: matrix.os != 'windows-11-arm' with: python-version: 3.13 + # On windows-11-arm the hostedtoolcache Python ships only the interpreter binary; + # it lacks include/ headers and libs/python3XX.lib, so CMake cannot satisfy the + # Development.Module component. Use uv instead: it pulls python-build-standalone + # distributions which include full dev files. uv defaults to x64-emulated Python + # on ARM64 Windows (uv PR #13724), so we must pin the aarch64 specifier. + - uses: astral-sh/setup-uv@v6 + if: matrix.os == 'windows-11-arm' + + - name: Install native ARM64 Python via uv + if: matrix.os == 'windows-11-arm' + shell: pwsh + run: | + uv python install cpython-3.13-windows-aarch64 + $pyExe = (uv python find cpython-3.13-windows-aarch64).Trim() + echo "PYTHON_ARM64_EXE=$($pyExe -replace '\\', '/')" >> $env:GITHUB_ENV + - name: Install Ninja uses: seanmiddleditch/gha-setup-ninja@master @@ -227,10 +248,6 @@ jobs: # starving sccache of requests until the default 600s timeout kills the server. echo "SCCACHE_IDLE_TIMEOUT=0" >> ${env:GITHUB_ENV} - - name: Select embree isa (Windows) - if: runner.os == 'Windows' - run: echo "embree_max_isa=AVX2" >> ${env:GITHUB_ENV} - - name: Get number of CPU cores uses: SimenB/github-actions-cpu-cores@v1 id: cpu-cores @@ -238,22 +255,54 @@ jobs: - name: Sccache uses: mozilla-actions/sccache-action@v0.0.10 - # We run configure + build in the same step, since they both need to call VsDevCmd - # Also, cmd uses ^ to break commands into multiple lines (in powershell this is `) - - name: Configure and build - shell: cmd + - name: Set x64 vars + if: matrix.os == 'windows-2025' + run: | + echo "BUILD_DIR=D:/build" >> ${env:GITHUB_ENV} + echo "ARCH=x64" >> ${env:GITHUB_ENV} + + - name: Set arm64 vars + if: matrix.os == 'windows-11-arm' + run: | + echo "BUILD_DIR=C:/build" >> ${env:GITHUB_ENV} + echo "ARCH=arm64" >> ${env:GITHUB_ENV} + + - name: Setup MSVC Developer Command Prompt + uses: ilammy/msvc-dev-cmd@v1 + with: + arch: ${{ env.ARCH }} + + # Cmd uses ^ to break commands into multiple lines, powershell uses ` + - name: Configure + if: matrix.os != 'windows-11-arm' + run: | + cmake --version + cmake -G Ninja ` + -DCMAKE_BUILD_TYPE=${{ matrix.config }} ` + -DLAGRANGE_JENKINS=ON ` + -DLAGRANGE_ALL=ON ` + -DLAGRANGE_POLYSCOPE_MOCK_BACKEND=ON ` + -B ${{ env.BUILD_DIR }} ` + -S . + + # DIAGNOSTIC: minimal configure for Win ARM64 — enable only the failing module + # (serialization2) so we get a fast iteration loop while we capture cpptrace + # stack traces for the Eigen alignment assertion. LAGRANGE_ALL=OFF, no Python. + - name: Configure (ARM64) + if: matrix.os == 'windows-11-arm' run: | - call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\Common7\Tools\VsDevCmd.bat" -arch=x64 cmake --version - cmake -G Ninja ^ - -DCMAKE_BUILD_TYPE=${{ matrix.config }} ^ - -DLAGRANGE_JENKINS=ON ^ - -DLAGRANGE_ALL=ON ^ - -DLAGRANGE_POLYSCOPE_MOCK_BACKEND=ON ^ - -DEMBREE_MAX_ISA=${{ env.embree_max_isa }} ^ - -B "D:/build" ^ + cmake -G Ninja ` + -DCMAKE_BUILD_TYPE=${{ matrix.config }} ` + -DLAGRANGE_JENKINS=ON ` + -DLAGRANGE_ALL=OFF ` + -DLAGRANGE_MODULE_SERIALIZATION2=ON ` + -DLAGRANGE_MODULE_PYTHON=OFF ` + -B ${{ env.BUILD_DIR }} ` -S . - cmake --build "D:/build" -j ${{ steps.cpu-cores.outputs.count }} + + - name: Build + run: cmake --build ${{ env.BUILD_DIR }} -j ${{ steps.cpu-cores.outputs.count }} - name: Sccache stats if: always() @@ -265,4 +314,6 @@ jobs: run: Get-PSDrive - name: Tests - run: cd "D:/build"; ctest --verbose -j ${{ steps.cpu-cores.outputs.count }} + # DIAGNOSTIC: scope ctest to the serialization2 suite (the only one firing the assert). + # --verbose is critical: cpptrace writes to stderr and we need it captured in the log. + run: cd ${{ env.BUILD_DIR }}; ctest --verbose -j ${{ steps.cpu-cores.outputs.count }} -R "serialization2|serialize_" diff --git a/cmake/recipes/external/Boost.cmake b/cmake/recipes/external/Boost.cmake index ac371ef1..fe826840 100644 --- a/cmake/recipes/external/Boost.cmake +++ b/cmake/recipes/external/Boost.cmake @@ -79,12 +79,15 @@ option(BOOST_IOSTREAMS_ENABLE_BZIP2 "Boost.Iostreams: Enable BZip2 support" OFF) option(BOOST_IOSTREAMS_ENABLE_LZMA "Boost.Iostreams: Enable LZMA support" OFF) option(BOOST_IOSTREAMS_ENABLE_ZSTD "Boost.Iostreams: Enable Zstd support" OFF) -set(BOOST_PATCHES "") +set(BOOST_PATCHES) if(EMSCRIPTEN) # Wasm doesn't have rounding mode control yet, so we trick Boost::interval into thinking it has. # https://github.com/WebAssembly/rounding-mode-control # https://github.com/boostorg/interval/issues/44 - set(BOOST_PATCHES PATCHES Boost.wasm.patch) + list(APPEND BOOST_PATCHES Boost.wasm.patch) +endif() +if(WIN32) + list(APPEND BOOST_PATCHES Boost.winarm.patch) endif() # Modern CMake target support was added in Boost 1.82.0 @@ -95,7 +98,7 @@ CPMAddPackage( URL https://github.com/boostorg/boost/releases/download/boost-1.84.0/boost-1.84.0.tar.xz URL_HASH SHA256=2e64e5d79a738d0fa6fb546c6e5c2bd28f88d268a2a080546f74e5ff98f29d0e EXCLUDE_FROM_ALL ON - ${BOOST_PATCHES} + PATCHES ${BOOST_PATCHES} ) # Due to MKL, we may require the release runtime (/MD) even when compiling in Debug mode. diff --git a/cmake/recipes/external/Boost.winarm.patch b/cmake/recipes/external/Boost.winarm.patch new file mode 100644 index 00000000..fc6681aa --- /dev/null +++ b/cmake/recipes/external/Boost.winarm.patch @@ -0,0 +1,132 @@ +Submodule libs/context contains modified content +diff --git i/libs/context/CMakeLists.txt w/libs/context/CMakeLists.txt +index dca5349..bd064b1 100644 +--- i/libs/context/CMakeLists.txt ++++ w/libs/context/CMakeLists.txt +@@ -12,7 +12,7 @@ list(APPEND CMAKE_MODULE_PATH ${boost_context_SOURCE_DIR}/cmake) + + ## Binary format + +-if(WIN32) ++if(WIN32 OR CYGWIN) + set(_default_binfmt pe) + elseif(APPLE) + set(_default_binfmt mach-o) +@@ -31,7 +31,7 @@ math(EXPR _bits "${CMAKE_SIZEOF_VOID_P}*8") + + if(CMAKE_SYSTEM_PROCESSOR MATCHES "^[Aa][Rr][Mm]" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") + set(_default_abi aapcs) +-elseif(WIN32) ++elseif(WIN32 OR CYGWIN) + set(_default_abi ms) + elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^mips") + if(_bits EQUAL 32) +@@ -60,6 +60,8 @@ elseif(_bits EQUAL 32) + set(_default_arch arm) + elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^mips") + set(_default_arch mips32) ++ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc32)") ++ set(_default_arch ppc32) + else() + set(_default_arch i386) + endif() +@@ -69,6 +71,8 @@ else() + set(_default_arch arm64) + elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^mips") + set(_default_arch mips64) ++ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc64)") ++ set(_default_arch ppc64) + else() + set(_default_arch x86_64) + endif() +@@ -89,18 +93,22 @@ if(MSVC) + else() + set(_default_asm masm) + endif() ++elseif(BOOST_CONTEXT_ARCHITECTURE STREQUAL arm64 AND MINGW) ++ set(_default_asm armclang) + else() + set(_default_asm gas) + endif() + +-set(BOOST_CONTEXT_ASSEMBLER "${_default_asm}" CACHE STRING "Boost.Context assembler (masm, gas, armasm)") +-set_property(CACHE BOOST_CONTEXT_ASSEMBLER PROPERTY STRINGS masm gas armasm) ++set(BOOST_CONTEXT_ASSEMBLER "${_default_asm}" CACHE STRING "Boost.Context assembler (masm, gas, armasm, armclang)") ++set_property(CACHE BOOST_CONTEXT_ASSEMBLER PROPERTY STRINGS masm gas armasm armclang) + + unset(_default_asm) + + ## Assembler source suffix + +-if(BOOST_CONTEXT_BINARY_FORMAT STREQUAL pe) ++if(BOOST_CONTEXT_ASSEMBLER STREQUAL armclang) ++ set(_default_ext .S) ++elseif(BOOST_CONTEXT_BINARY_FORMAT STREQUAL pe) + set(_default_ext .asm) + elseif(BOOST_CONTEXT_ASSEMBLER STREQUAL gas) + set(_default_ext .S) +@@ -133,18 +141,22 @@ message(STATUS "Boost.Context: " + "implementation ${BOOST_CONTEXT_IMPLEMENTATION}") + + # Enable the right assembler +- ++set(ASM_LANGUAGE) + if(BOOST_CONTEXT_IMPLEMENTATION STREQUAL "fcontext") +- if(BOOST_CONTEXT_ASSEMBLER STREQUAL gas) ++ if(BOOST_CONTEXT_ASSEMBLER STREQUAL gas OR BOOST_CONTEXT_ASSEMBLER STREQUAL armclang) + if(CMAKE_CXX_PLATFORM_ID MATCHES "Cygwin") +- enable_language(ASM-ATT) ++ set(ASM_LANGUAGE ASM-ATT) + else() +- enable_language(ASM) ++ set(ASM_LANGUAGE ASM) + endif() + elseif(BOOST_CONTEXT_ASSEMBLER STREQUAL armasm) +- enable_language(ASM_ARMASM) ++ if(MSVC) ++ set(ASM_LANGUAGE ASM_MARMASM) ++ else() ++ set(ASM_LANGUAGE ASM_ARMASM) ++ endif() + else() +- enable_language(ASM_MASM) ++ set(ASM_LANGUAGE ASM_MASM) + endif() + endif() + +@@ -170,13 +182,29 @@ if(BOOST_CONTEXT_IMPLEMENTATION STREQUAL "fcontext") + + set(IMPL_SOURCES ${ASM_SOURCES}) + +- if(BOOST_CONTEXT_ASSEMBLER STREQUAL masm AND BOOST_CONTEXT_ARCHITECTURE STREQUAL i386) +- set_source_files_properties(${ASM_SOURCES} PROPERTIES COMPILE_FLAGS "/safeseh") +- endif() ++ if(BOOST_CONTEXT_ASSEMBLER STREQUAL masm) + +- if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") +- set_property(SOURCE ${ASM_SOURCES} APPEND PROPERTY COMPILE_OPTIONS "-x" "assembler-with-cpp") +- endif() ++ set_property(SOURCE ${ASM_SOURCES} APPEND PROPERTY COMPILE_OPTIONS "/nologo") ++ ++ if(MSVC AND NOT(MSVC_VERSION LESS 1936) AND NOT(CMAKE_CXX_SIMULATE_VERSION)) ++ set_property(SOURCE ${ASM_SOURCES} APPEND PROPERTY COMPILE_OPTIONS "/quiet") ++ endif() ++ ++ if(BOOST_CONTEXT_ARCHITECTURE STREQUAL i386) ++ set_property(SOURCE ${ASM_SOURCES} APPEND PROPERTY COMPILE_OPTIONS "/safeseh") ++ endif() ++ ++ # armasm doesn't support most of these options ++ elseif(NOT BOOST_CONTEXT_ASSEMBLER STREQUAL armasm) # masm ++ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") ++ set_property(SOURCE ${ASM_SOURCES} APPEND PROPERTY COMPILE_OPTIONS "-x" "assembler-with-cpp") ++ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") ++ set_property(SOURCE ${ASM_SOURCES} APPEND PROPERTY COMPILE_OPTIONS "-Wno-unused-command-line-argument") ++ endif() ++ endif() # masm ++ ++ enable_language(${ASM_LANGUAGE}) ++ set_source_files_properties(${ASM_SOURCES} PROPERTIES LANGUAGE ${ASM_LANGUAGE}) + else() + set(IMPL_SOURCES + src/continuation.cpp diff --git a/cmake/recipes/external/Eigen3.cmake b/cmake/recipes/external/Eigen3.cmake index 726e6053..d328b5d6 100644 --- a/cmake/recipes/external/Eigen3.cmake +++ b/cmake/recipes/external/Eigen3.cmake @@ -46,6 +46,36 @@ if(EIGEN_DONT_VECTORIZE) target_compile_definitions(Eigen3_Eigen INTERFACE EIGEN_DONT_VECTORIZE) endif() +# Diagnostic only — TEMPORARY: on Windows ARM64 Debug, force-include a header that overrides +# eigen_assert with a non-fatal handler. When the assertion comes from DenseStorage.h (i.e. the +# plain_array<> alignment check) we capture a cpptrace stack trace so we can pinpoint the call +# site that constructs a misaligned fixed-size Eigen object. All other eigen_assert failures +# still abort, so unrelated invariants are not masked. +# +# The cpptrace dependency stays in a separate static library (lagrange_eigen_align_diag) so it +# does not leak into every Eigen consumer's interface. +if(WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "ARM64" AND MSVC AND CMAKE_BUILD_TYPE STREQUAL "Debug") + include(cpptrace) + add_library(lagrange_eigen_align_diag STATIC + ${CMAKE_CURRENT_LIST_DIR}/eigen_alignment_diag.cpp + ${CMAKE_CURRENT_LIST_DIR}/eigen_alignment_diag.h + ) + target_include_directories(lagrange_eigen_align_diag PUBLIC ${CMAKE_CURRENT_LIST_DIR}) + target_compile_definitions(lagrange_eigen_align_diag PUBLIC LAGRANGE_DIAG_EIGEN_ALIGN=1) + target_link_libraries(lagrange_eigen_align_diag PRIVATE cpptrace::cpptrace) + set_target_properties(lagrange_eigen_align_diag PROPERTIES FOLDER third_party) + + set(_lagrange_eigen_diag_header "${CMAKE_CURRENT_LIST_DIR}/eigen_alignment_diag.h") + # Wrap with $ so install(EXPORT Eigen_Targets) does not see the + # diagnostic target (which is intentionally not part of the export set). + target_compile_options(Eigen3_Eigen INTERFACE + "$") + target_compile_definitions(Eigen3_Eigen INTERFACE + $) + target_link_libraries(Eigen3_Eigen INTERFACE + $) +endif() + if(EIGEN_WITH_MKL) # TODO: Checks that, on 64bits systems, `MKL::MKL` is using the LP64 interface # (by looking at the compile definition of the target) diff --git a/cmake/recipes/external/eigen_alignment_diag.cpp b/cmake/recipes/external/eigen_alignment_diag.cpp new file mode 100644 index 00000000..7a64c885 --- /dev/null +++ b/cmake/recipes/external/eigen_alignment_diag.cpp @@ -0,0 +1,80 @@ +// See eigen_alignment_diag.h. Defined in a separate translation unit so the +// cpptrace dependency stays private to this static library and does not leak +// into every consumer of Eigen3::Eigen. +#include "eigen_alignment_diag.h" + +#ifdef LAGRANGE_DIAG_EIGEN_ALIGN + +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace lagrange_diag { + +namespace { +std::atomic& trace_count() +{ + static std::atomic n{0}; + return n; +} + +std::mutex& trace_mutex() +{ + static std::mutex m; + return m; +} + +bool is_alignment_assert(const char* file) +{ + if (!file) return false; + return std::strstr(file, "DenseStorage") != nullptr; +} +} // namespace + +void eigen_assert_handler(const char* expr, const char* file, int line) +{ + if (is_alignment_assert(file)) { + // Alignment assertion: log + capture trace, but DO NOT abort. ARM64 NEON + // tolerates unaligned access; we only need to find where the misaligned + // plain_array<> is being constructed. + int n = trace_count().fetch_add(1); + if (n < 30) { + std::lock_guard lock(trace_mutex()); + std::fprintf( + stderr, + "\n[EIGEN_DIAG #%d] %s:%d %s\n", + n, + file, + line, + expr ? expr : "(null)"); + std::fflush(stderr); + try { + auto trace = cpptrace::generate_trace(/*skip*/ 1, /*max*/ 64); + std::string s = trace.to_string(/*color*/ false); + std::fprintf(stderr, "%s\n", s.c_str()); + std::fprintf(stderr, "[EIGEN_DIAG #%d] frames=%zu\n", n, trace.frames.size()); + } catch (const std::exception& e) { + std::fprintf(stderr, "[EIGEN_DIAG #%d] cpptrace exception: %s\n", n, e.what()); + } catch (...) { + std::fprintf(stderr, "[EIGEN_DIAG #%d] cpptrace unknown exception\n", n); + } + std::fflush(stderr); + } + return; + } + // Any other Eigen invariant violation: preserve original assert() behavior + // so unrelated bugs still surface (and tests fail) rather than being masked. + std::fprintf(stderr, "Eigen assertion failed: %s at %s:%d\n", expr, file, line); + std::fflush(stderr); + std::abort(); +} + +} // namespace lagrange_diag + +#endif // LAGRANGE_DIAG_EIGEN_ALIGN diff --git a/cmake/recipes/external/eigen_alignment_diag.h b/cmake/recipes/external/eigen_alignment_diag.h new file mode 100644 index 00000000..f14bb931 --- /dev/null +++ b/cmake/recipes/external/eigen_alignment_diag.h @@ -0,0 +1,25 @@ +// Diagnostic Eigen assertion override for Windows ARM64 Debug investigation. +// Replaces eigen_assert with a non-fatal handler that captures a stack trace +// via cpptrace whenever the failing call originates from DenseStorage.h (i.e. +// the alignment check on plain_array<>). The actual cpptrace call lives in +// eigen_alignment_diag.cpp so this header has no dependency on cpptrace. +// +// Force-included via /FI on MSVC (see cmake/recipes/external/Eigen3.cmake). +// Pre-defining eigen_assert here works because Eigen/src/Core/util/Macros.h +// guards its own definition with #ifndef eigen_assert. +#pragma once + +#ifdef LAGRANGE_DIAG_EIGEN_ALIGN + +namespace lagrange_diag { +void eigen_assert_handler(const char* expr, const char* file, int line); +} // namespace lagrange_diag + +#define eigen_assert(x) \ + do { \ + if (!(x)) { \ + ::lagrange_diag::eigen_assert_handler(#x, __FILE__, __LINE__); \ + } \ + } while (0) + +#endif // LAGRANGE_DIAG_EIGEN_ALIGN diff --git a/cmake/recipes/external/embree-winarm.patch b/cmake/recipes/external/embree-winarm.patch new file mode 100644 index 00000000..9102a25b --- /dev/null +++ b/cmake/recipes/external/embree-winarm.patch @@ -0,0 +1,24 @@ +diff --git i/common/sys/intrinsics.h w/common/sys/intrinsics.h +--- i/common/sys/intrinsics.h ++++ w/common/sys/intrinsics.h +@@ -92,6 +92,6 @@ + #if defined(__X86_64__) || defined (__aarch64__) || defined(_M_ARM64) + __forceinline size_t bsf(size_t v) { +-#if defined(__AVX2__) ++#if defined(__AVX2__) && !defined(_M_ARM64) + return _tzcnt_u64(v); + #else + unsigned long r = 0; _BitScanForward64(&r,v); return r; +@@ -142,5 +142,5 @@ + __forceinline size_t bsr(size_t v) { +-#if defined(__AVX2__) ++#if defined(__AVX2__) && !defined(_M_ARM64) + return 63 -_lzcnt_u64(v); + #else + unsigned long r = 0; _BitScanReverse64(&r, v); return r; +@@ -534,4 +534,4 @@ +-#if defined(__AVX2__) && !defined(__aarch64__) ++#if defined(__AVX2__) && !defined(__aarch64__) && !defined(_M_ARM64) + __forceinline unsigned int pext(unsigned int a, unsigned int b) { return _pext_u32(a, b); } + __forceinline unsigned int pdep(unsigned int a, unsigned int b) { return _pdep_u32(a, b); } + #if defined(__X86_64__) diff --git a/cmake/recipes/external/embree.cmake b/cmake/recipes/external/embree.cmake index a323f51c..129c9b90 100644 --- a/cmake/recipes/external/embree.cmake +++ b/cmake/recipes/external/embree.cmake @@ -33,8 +33,10 @@ option(EMBREE_RAY_PACKETS "Enable the usage packed ray." # Match embree's platform detection logic for arm if(APPLE AND CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND (CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64" AND CMAKE_OSX_ARCHITECTURES STREQUAL "") OR ("arm64" IN_LIST CMAKE_OSX_ARCHITECTURES)) + message(STATUS "Setting arm version of Embree") set(EMBREE_ARM ON) elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "ARM64") + message(STATUS "Setting arm version of Embree") set(EMBREE_ARM ON) endif() @@ -120,9 +122,22 @@ function(embree_import_target) # https://github.com/RenderKit/embree/issues/486 set(EMBREE_PATCHES PATCHES embree.patch) endif() + set(EMBREE_URL RenderKit/embree) + if(WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "ARM64" AND EMBREE_VERSION STREQUAL "v4.4.0") + message(STATUS "Testing winarm version of embree 4") + set(EMBREE_VERSION 03d8ec87213176a7e91c92a18d42e15a8a9bbbc8) + set(EMBREE_URL dousse-adobe/embree) + # The dousse-adobe fork guards x86 BMI/LZCNT/PEXT intrinsics with !defined(__aarch64__) + # (GCC/Clang macro) but misses _M_ARM64 (MSVC macro), causing build failures on Windows + # ARM64. We use git apply --ignore-whitespace instead of CPM's PATCHES (patch -p1) because + # git-cloned files may have CRLF line endings on Windows, which confuses patch.exe. + find_package(Git REQUIRED QUIET) + set(EMBREE_ARM64_PATCH "${CMAKE_CURRENT_LIST_DIR}/embree-winarm.patch") + set(EMBREE_PATCHES PATCH_COMMAND "${GIT_EXECUTABLE}" apply --ignore-whitespace "${EMBREE_ARM64_PATCH}") + endif() CPMAddPackage( NAME embree - GITHUB_REPOSITORY RenderKit/embree + GITHUB_REPOSITORY ${EMBREE_URL} GIT_TAG ${EMBREE_VERSION} ${EMBREE_PATCHES} ) diff --git a/cmake/recipes/external/gklib.cmake b/cmake/recipes/external/gklib.cmake index d62b7ad5..322eb070 100644 --- a/cmake/recipes/external/gklib.cmake +++ b/cmake/recipes/external/gklib.cmake @@ -19,14 +19,14 @@ include(CPM) CPMAddPackage( NAME gklib GITHUB_REPOSITORY KarypisLab/GKlib - GIT_TAG 67c6e4322bb326a04727995775c3eafc47d7a252 + GIT_TAG e2856c2f595b153ca1ce9258c5301dbabc4f39f5 DOWNLOAD_ONLY ON ) -file(GLOB INC_FILES "${gklib_SOURCE_DIR}/*.h" ) -file(GLOB SRC_FILES "${gklib_SOURCE_DIR}/*.c" ) +file(GLOB INC_FILES "${gklib_SOURCE_DIR}/include/*.h" ) +file(GLOB SRC_FILES "${gklib_SOURCE_DIR}/src/*.c" ) if(NOT MSVC) - list(REMOVE_ITEM SRC_FILES "${gklib_SOURCE_DIR}/gkregex.c") + list(REMOVE_ITEM SRC_FILES "${gklib_SOURCE_DIR}/src/gkregex.c") endif() add_library(GKlib STATIC ${INC_FILES} ${SRC_FILES}) @@ -35,11 +35,18 @@ add_library(GKlib::GKlib ALIAS GKlib) if(MSVC) target_compile_definitions(GKlib PUBLIC USE_GKREGEX) target_compile_definitions(GKlib PUBLIC "__thread=__declspec(thread)") + # gk_ms_stdint.h / gk_ms_inttypes.h are 2006-era polyfills for pre-VS2010 MSVC. + # Modern MSVC (VS2010+) ships natively, but on ARM64 it defines + # int_fast16_t as 'int' (32-bit) while the polyfill defines it as 'int16_t', + # causing a redefinition error. Suppress the polyfills via their include guards + # and force-include the real system header so the types are still available. + target_compile_definitions(GKlib PUBLIC _MSC_STDINT_H_ _MSC_INTTYPES_H_) + target_compile_options(GKlib PUBLIC "/FIstdint.h" "/FIinttypes.h") endif() include(GNUInstallDirs) target_include_directories(GKlib SYSTEM PUBLIC - "$" + "$" "$" ) diff --git a/cmake/recipes/external/simde.cmake b/cmake/recipes/external/simde.cmake index cffa746e..9a5f6cae 100644 --- a/cmake/recipes/external/simde.cmake +++ b/cmake/recipes/external/simde.cmake @@ -19,14 +19,8 @@ include(CPM) CPMAddPackage( NAME simde GITHUB_REPOSITORY simd-everywhere/simde - GIT_TAG 48edfa906d835525e2061fbf6062b7c326d66840 + GIT_TAG 1747b2482589fe894d49989159421da08c2a8bcd ) -add_library(simde::simde INTERFACE IMPORTED GLOBAL) -target_include_directories(simde::simde INTERFACE "${simde_SOURCE_DIR}") - # Enables native aliases. Not ideal but makes it easier to convert old code. -target_compile_definitions(simde::simde INTERFACE SIMDE_ENABLE_NATIVE_ALIASES) - -# Uncomment this line to ensure code can be compiled without native SIMD (i.e. emulates everything) -# target_compile_definitions(simde::simde INTERFACE SIMDE_NO_NATIVE) +target_compile_definitions(simde INTERFACE SIMDE_ENABLE_NATIVE_ALIASES) diff --git a/cmake/recipes/external/winding-number-winarm.patch b/cmake/recipes/external/winding-number-winarm.patch new file mode 100644 index 00000000..23d22d36 --- /dev/null +++ b/cmake/recipes/external/winding-number-winarm.patch @@ -0,0 +1,50 @@ +diff --git i/VM_SSEFunc.h w/VM_SSEFunc.h +--- i/VM_SSEFunc.h ++++ w/VM_SSEFunc.h +@@ -39,8 +39,26 @@ + #include + #include + +-typedef simde__m128 v4sf; +-typedef simde__m128i v4si; ++#if defined(_MSC_VER) && defined(_M_ARM64) ++// On MSVC ARM64, simde__m128 and simde__m128i are both __n128 (the native NEON type). ++// Plain typedefs make v4sf==v4si, breaking all overloaded functions (vm_shuffle, vm_extract, ++// etc.). Use distinct wrapper structs with implicit conversions to/from the simde types. ++struct v4sf { ++ simde__m128 _v; ++ v4sf() = default; ++ SYS_FORCE_INLINE v4sf(simde__m128 v) noexcept : _v(v) {} ++ SYS_FORCE_INLINE operator simde__m128() const noexcept { return _v; } ++}; ++struct v4si { ++ simde__m128i _v; ++ v4si() = default; ++ SYS_FORCE_INLINE v4si(simde__m128i v) noexcept : _v(v) {} ++ SYS_FORCE_INLINE operator simde__m128i() const noexcept { return _v; } ++}; ++#else ++typedef simde__m128 v4sf; ++typedef simde__m128i v4si; ++#endif + + #define CPU_HAS_SIMD_INSTR 1 + #define VM_SSE_STYLE 1 +@@ -59,7 +77,7 @@ + // MSVC has problems casting between __m128 and __m128i, so we implement a + // custom casting routine specifically for windows. + +-#if defined(_MSC_VER) ++#if defined(_MSC_VER) && !defined(_M_ARM64) + + static SYS_FORCE_INLINE v4sf + vm_v4sf(const v4si &a) +@@ -249,6 +267,6 @@ + vm_splats(float a, float b, float c, float d) + { + return vm_shuffle<0,2,0,2>( +- vm_shuffle<0>(simde_mm_set_ss(a), simde_mm_set_ss(b)), +- vm_shuffle<0>(simde_mm_set_ss(c), simde_mm_set_ss(d))); ++ vm_shuffle<0>(V4SF(simde_mm_set_ss(a)), V4SF(simde_mm_set_ss(b))), ++ vm_shuffle<0>(V4SF(simde_mm_set_ss(c)), V4SF(simde_mm_set_ss(d)))); + } diff --git a/cmake/recipes/external/winding_number.cmake b/cmake/recipes/external/winding_number.cmake index 970709f4..7e703d4c 100644 --- a/cmake/recipes/external/winding_number.cmake +++ b/cmake/recipes/external/winding_number.cmake @@ -19,10 +19,20 @@ lagrange_find_package(TBB CONFIG REQUIRED) include(simde) include(CPM) +set(WINDINGNUMBER_PATCHES "") +if(MSVC AND CMAKE_SYSTEM_PROCESSOR STREQUAL "ARM64") + # On MSVC ARM64, simde__m128 and simde__m128i are both __n128, making plain typedefs + # identical and breaking all overloaded functions. Patch VM_SSEFunc.h to use distinct + # wrapper structs instead. Use git apply --ignore-whitespace for robust CRLF handling. + find_package(Git REQUIRED QUIET) + set(_wn_patch "${CMAKE_CURRENT_LIST_DIR}/winding-number-winarm.patch") + set(WINDINGNUMBER_PATCHES PATCH_COMMAND "${GIT_EXECUTABLE}" apply --ignore-whitespace "${_wn_patch}") +endif() CPMAddPackage( NAME WindingNumber GITHUB_REPOSITORY jdumas/WindingNumber GIT_TAG a48b8f555b490afe7aab9159c7daaf83fa2cdf8e + ${WINDINGNUMBER_PATCHES} ) set_target_properties(WindingNumber PROPERTIES FOLDER third_party) diff --git a/modules/solver/CMakeLists.txt b/modules/solver/CMakeLists.txt index f7a2dd56..8c6a65e1 100644 --- a/modules/solver/CMakeLists.txt +++ b/modules/solver/CMakeLists.txt @@ -21,13 +21,17 @@ if(NOT EMSCRIPTEN AND (NOT LAGRANGE_NO_INTERNAL OR NOT SKBUILD)) # Note: For now we avoid using MKL in our open-source Python bindings, to avoid bloating up the size of # the uploaded wheels. The long-term solution is to depend on the PyPI package for MKL at build-time. - include(blas) # Accelerate on macOS, MKL on other platforms - if(APPLE) - target_compile_definitions(lagrange_solver INTERFACE LA_SOLVER_ACCELERATE) - else() - target_compile_definitions(lagrange_solver INTERFACE LA_SOLVER_MKL) + # Intel MKL has no Windows ARM64 support; fall back to Eigen's SimplicialLDLT on that platform + # (DirectSolver.h uses SimplicialLDLT when LA_SOLVER_MKL is not defined). + if(NOT (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "ARM64")) + include(blas) # Accelerate on macOS, MKL on other platforms + if(APPLE) + target_compile_definitions(lagrange_solver INTERFACE LA_SOLVER_ACCELERATE) + else() + target_compile_definitions(lagrange_solver INTERFACE LA_SOLVER_MKL) + endif() + target_link_libraries(lagrange_solver INTERFACE BLAS::BLAS) endif() - target_link_libraries(lagrange_solver INTERFACE BLAS::BLAS) endif() if(USE_SANITIZER MATCHES "([Tt]hread)")