diff --git a/.github/workflows/cmake-multi-platform.yml b/.github/workflows/cmake-multi-platform.yml index aa70001..59604ce 100644 --- a/.github/workflows/cmake-multi-platform.yml +++ b/.github/workflows/cmake-multi-platform.yml @@ -41,9 +41,6 @@ jobs: run: | echo "build-output-dir=${{ github.workspace }}/build" >> \ "$GITHUB_OUTPUT" - - name: Install OpenMP - run: sudo apt -y install libomp-18-dev - if: matrix.cpp_compiler == 'clang++' - name: Configure CMake run: > cmake -B ${{ steps.strings.outputs.build-output-dir }} -DCMAKE_CXX_COMPILER=${{ matrix.cpp_compiler }} -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -S ${{ github.workspace }} diff --git a/CMakeLists.txt b/CMakeLists.txt index 7d7be87..34ed041 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,6 +3,11 @@ cmake_minimum_required(VERSION 3.20) +project( + LAS++ + VERSION 0.0.0 + LANGUAGES CXX) + option(LASPP_DEBUG_ASSERTS "Enable debugging asserts" ON) option(LASPP_BUILD_TESTS "Build tests" ON) # Default benchmarks to OFF if this is a subproject (to avoid downloading @@ -18,11 +23,6 @@ option(LASPP_AGGRESSIVE_OPTIMIZATIONS "Enable aggressive performance optimizations (-march=native, etc.)" ON) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) - -project( - LAS++ - VERSION 0.0.0 - LANGUAGES CXX) set(COPYRIGHT "Copyright (c) 2024 Trailblaze Software. All rights reserved.") if(LASPP_BUILD_TESTS OR LASPP_BUILD_BENCHMARK) @@ -86,81 +86,103 @@ endif() # ── Performance optimization flags for aggressive Release builds # ────────────────────────── -if(CMAKE_BUILD_TYPE MATCHES "Release" AND LASPP_AGGRESSIVE_OPTIMIZATIONS) - if(MSVC) - # Apply only to Release configuration using generator expressions - add_compile_options("$<$:/O2>" "$<$:/Ob2>" - "$<$:/GL>") - add_link_options("$<$:/LTCG>") - if(HAVE_AVX2) - add_compile_options("$<$:/arch:AVX2>") - message(STATUS "AVX2 optimizations enabled for Release builds") - endif() - message( - STATUS "Aggressive performance optimizations enabled for Release builds") - elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") - # For single-config generators (Unix Makefiles), CMAKE_BUILD_TYPE check is - # sufficient - add_compile_options("-O3" "-march=native" "-flto" "-funroll-loops") - add_link_options("-flto") - if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - add_compile_options("-fomit-frame-pointer" "-fno-semantic-interposition") +# Only apply when this is the top-level project to avoid affecting consuming +# projects +if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME) + if(CMAKE_BUILD_TYPE MATCHES "Release" AND LASPP_AGGRESSIVE_OPTIMIZATIONS) + if(MSVC) + # Apply only to Release configuration using generator expressions + add_compile_options( + "$<$:/O2>" "$<$:/Ob2>" + "$<$:/GL>") + add_link_options("$<$:/LTCG>") + if(HAVE_AVX2) + add_compile_options("$<$:/arch:AVX2>") + message(STATUS "AVX2 optimizations enabled for Release builds") + endif() + message( + STATUS "Aggressive performance optimizations enabled for Release builds" + ) + elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") + # For single-config generators (Unix Makefiles), CMAKE_BUILD_TYPE check is + # sufficient + add_compile_options("-O3" "-march=native" "-flto" "-funroll-loops") + add_link_options("-flto") + if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + add_compile_options("-fomit-frame-pointer" + "-fno-semantic-interposition") + endif() + message( + STATUS "Aggressive performance optimizations enabled for Release builds" + ) endif() + elseif(CMAKE_BUILD_TYPE MATCHES "Release") message( - STATUS "Aggressive performance optimizations enabled for Release builds") + STATUS + "Using standard Release optimizations (LASPP_AGGRESSIVE_OPTIMIZATIONS=OFF)" + ) endif() -elseif(CMAKE_BUILD_TYPE MATCHES "Release") - message( - STATUS - "Using standard Release optimizations (LASPP_AGGRESSIVE_OPTIMIZATIONS=OFF)" - ) endif() set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) -if(WIN32) - if(${CMAKE_CXX_COMPILER} MATCHES ".*mingw.*") - add_compile_options("-fstrict-enums" "-Wall" "-Wextra" "-Wpedantic" - "-Werror") - else() - add_compile_options( - "/W4" - "/WX" - # C4267/C4244: size_t/type narrowing inside MSVC STL template bodies. - "/wd4267" # size_t narrowing in MSVC STL template - # instantiations - "/wd4244") # type narrowing in MSVC STL template instantiations - endif() -elseif(UNIX) - if(NOT CMAKE_BUILD_TYPE MATCHES "Release") - add_compile_options("-Werror") - endif() - if(CMAKE_BUILD_TYPE MATCHES "Debug") - add_compile_options("-fsanitize=address,undefined" - "-fno-sanitize-recover=all") - add_link_options("-fsanitize=address,undefined") - endif() - add_compile_options( - "-fstrict-enums" - "-Wall" - "-Wextra" - "-Wpedantic" - "-Wdouble-promotion" - "-Wshadow" - "-Wpointer-arith" - "-Wstrict-aliasing" - "-Wnull-dereference" - "-Wdouble-promotion" - "-Wold-style-cast" - "-Wconversion" - "-Wsign-conversion") - if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + +# Only apply strict compiler flags when this is the top-level project. +if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME) + if(WIN32) + if(${CMAKE_CXX_COMPILER} MATCHES ".*mingw.*") + add_compile_options("-fstrict-enums" "-Wall" "-Wextra" "-Wpedantic" + "-Werror") + else() + add_compile_options( + "/W4" + "/WX" + # C4267/C4244: size_t/type narrowing inside MSVC STL template bodies. + "/wd4267" # size_t narrowing in MSVC STL template + # instantiations + "/wd4244") # type narrowing in MSVC STL template + # instantiations + endif() + elseif(UNIX) + if(NOT CMAKE_BUILD_TYPE MATCHES "Release") + add_compile_options("-Werror") + endif() + if(CMAKE_BUILD_TYPE MATCHES "Debug") + # Clang's static ASan conflicts with WSL2's address space layout (Windows + # kernel mappings can overlap with ASan's shadow memory region), causing + # random segfaults during ASan initialization. Use UBSan only for Clang; + # GCC's dynamic ASan works fine. + if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + add_compile_options("-fsanitize=undefined" "-fno-sanitize-recover=all") + add_link_options("-fsanitize=undefined") + else() + add_compile_options("-fsanitize=address,undefined" + "-fno-sanitize-recover=all") + add_link_options("-fsanitize=address,undefined") + endif() + endif() add_compile_options( - "-Wduplicated-branches" "-Wlogical-op" "-Wrestrict" "-Wcast-align=strict" - "-Wduplicated-cond" "-Wuseless-cast") + "-fstrict-enums" + "-Wall" + "-Wextra" + "-Wpedantic" + "-Wdouble-promotion" + "-Wshadow" + "-Wpointer-arith" + "-Wstrict-aliasing" + "-Wnull-dereference" + "-Wdouble-promotion" + "-Wold-style-cast" + "-Wconversion" + "-Wsign-conversion") + if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + add_compile_options( + "-Wduplicated-branches" "-Wlogical-op" "-Wrestrict" + "-Wcast-align=strict" "-Wduplicated-cond" "-Wuseless-cast") + endif() + else() + message(FATAL_ERROR "Unsupported platform") endif() -else() - message(FATAL_ERROR "Unsupported platform") endif() set(LIBRARY_NAME las++) @@ -169,6 +191,13 @@ add_library(${LIBRARY_NAME} INTERFACE) target_include_directories(${LIBRARY_NAME} INTERFACE src) +# Public headers use __VA_OPT__ (C++20), which requires MSVC's conformant +# preprocessor. Attach this as an INTERFACE option so consumers that add las++ +# via add_subdirectory() inherit it automatically, regardless of whether they +# set /Zc:preprocessor themselves. +target_compile_options(${LIBRARY_NAME} + INTERFACE $<$:/Zc:preprocessor>) + if(LASPP_DEBUG_ASSERTS) message(WARNING "Enabling LAS++ debugging asserts. May be slow.") target_compile_definitions(${LIBRARY_NAME} INTERFACE LASPP_DEBUG_ASSERTS) @@ -203,6 +232,10 @@ if(LASPP_BUILD_TESTS) target_link_libraries(${TEST_NAME} PRIVATE ${LIBRARY_NAME}) add_test(NAME ${TEST_NAME} COMMAND ${TEST_NAME}) target_compile_definitions(${TEST_NAME} PRIVATE LASPP_DEBUG_ASSERTS) + # MSVC needs /bigobj for large translation units in debug mode + if(MSVC) + target_compile_options(${TEST_NAME} PRIVATE $<$:/bigobj>) + endif() endforeach() if(TARGET test_laszip_interop) @@ -239,6 +272,14 @@ if(LASPP_BUILD_TESTS) endif() link_target_to_laszip(test_laszip_interop) + + # LASzip's LASinterval allocator has a new/delete type-size mismatch that + # triggers ASan when laszip_write_indexed_point is used. This is a bug in + # LASzip itself (third-party code), so we suppress only that specific check + # rather than disabling address sanitization for the whole test. + set_tests_properties( + test_laszip_interop PROPERTIES ENVIRONMENT + "ASAN_OPTIONS=new_delete_type_mismatch=0") endif() endif() diff --git a/apps/CMakeLists.txt b/apps/CMakeLists.txt index f6bf3fd..ea63006 100644 --- a/apps/CMakeLists.txt +++ b/apps/CMakeLists.txt @@ -7,12 +7,31 @@ add_executable(${LAS2LAS++_EXE_NAME} las2las++.cpp) target_link_libraries(${LAS2LAS++_EXE_NAME} ${LIBRARY_NAME}) +# MSVC needs /bigobj for large translation units in debug mode +if(MSVC) + target_compile_options(${LAS2LAS++_EXE_NAME} + PRIVATE $<$:/bigobj>) +endif() + # Utility to create test files for benchmarking add_executable(create_test_file create_test_file.cpp) target_link_libraries(create_test_file PRIVATE ${LIBRARY_NAME}) +set(INSPECT_VLRS_EXE_NAME las++-inspect-vlrs) + +add_executable(${INSPECT_VLRS_EXE_NAME} inspect_vlrs.cpp) + +target_link_libraries(${INSPECT_VLRS_EXE_NAME} ${LIBRARY_NAME}) + +set(VALIDATE_SPATIAL_INDEX_EXE_NAME las++-validate-spatial-index) + +add_executable(${VALIDATE_SPATIAL_INDEX_EXE_NAME} validate_spatial_index.cpp) + +target_link_libraries(${VALIDATE_SPATIAL_INDEX_EXE_NAME} ${LIBRARY_NAME}) + install( - TARGETS ${LAS2LAS++_EXE_NAME} + TARGETS ${LAS2LAS++_EXE_NAME} ${INSPECT_VLRS_EXE_NAME} + ${VALIDATE_SPATIAL_INDEX_EXE_NAME} DESTINATION bin COMPONENT applications) diff --git a/apps/create_test_file.cpp b/apps/create_test_file.cpp index 14bd495..2493e04 100644 --- a/apps/create_test_file.cpp +++ b/apps/create_test_file.cpp @@ -73,12 +73,12 @@ int main(int argc, char* argv[]) { // Format 1 with compression (0x81) LASWriter writer(ofs, 0x81); - writer.header().transform().m_scale_factors.x() = 0.01; - writer.header().transform().m_scale_factors.y() = 0.01; - writer.header().transform().m_scale_factors.z() = 0.01; - writer.header().transform().m_offsets.x() = 0.0; - writer.header().transform().m_offsets.y() = 0.0; - writer.header().transform().m_offsets.z() = 0.0; + writer.header().transform().scale_factors().x() = 0.01; + writer.header().transform().scale_factors().y() = 0.01; + writer.header().transform().scale_factors().z() = 0.01; + writer.header().transform().offsets().x() = 0.0; + writer.header().transform().offsets().y() = 0.0; + writer.header().transform().offsets().z() = 0.0; writer.write_points(std::span(points), 50000); diff --git a/apps/inspect_vlrs.cpp b/apps/inspect_vlrs.cpp new file mode 100644 index 0000000..2f52f6f --- /dev/null +++ b/apps/inspect_vlrs.cpp @@ -0,0 +1,91 @@ +/* + * SPDX-FileCopyrightText: (c) 2025-2026 Trailblaze Software, all rights reserved + * SPDX-License-Identifier: MIT + */ + +#include +#include +#include +#include +#include + +#include "las_reader.hpp" +#include "spatial_index.hpp" +#include "utilities/assert.hpp" +#include "utilities/printing.hpp" +#include "vlr.hpp" + +using namespace laspp; + +int main(int argc, char* argv[]) { + if (argc != 2) { + std::cerr << "Usage: " << argv[0] << " " << std::endl; + return 1; + } + + std::string filename = argv[1]; + std::filesystem::path file_path(filename); + LASReader reader(file_path); + + std::cout << "=== VLRs ===" << std::endl; + const auto& vlrs = reader.vlr_headers(); + for (size_t i = 0; i < vlrs.size(); ++i) { + const auto& vlr = vlrs[i]; + std::cout << "VLR " << i << ":" << std::endl; + std::cout << indented(vlr, " "); + std::cout << " Is LAZ VLR: " << (vlr.is_laz_vlr() ? "yes" : "no") << std::endl; + std::cout << " Is LAStools spatial index VLR: " + << (vlr.is_lastools_spatial_index_vlr() ? "yes" : "no") << std::endl; + std::cout << std::endl; + } + + std::cout << "=== EVLRs ===" << std::endl; + const auto& evlrs = reader.evlr_headers(); + for (size_t i = 0; i < evlrs.size(); ++i) { + const auto& evlr = evlrs[i]; + std::cout << "EVLR " << i << ":" << std::endl; + std::cout << indented(evlr, " "); + std::cout << " Is LAZ VLR: " << (evlr.is_laz_vlr() ? "yes" : "no") << std::endl; + std::cout << " Is LAStools spatial index EVLR: " + << (evlr.is_lastools_spatial_index_evlr() ? "yes" : "no") << std::endl; + + // If it's a LAStools spatial index, show more details + if (evlr.is_lastools_spatial_index_evlr()) { + LASPP_ASSERT(reader.has_lastools_spatial_index(), + "Failed to load LAStools spatial index from EVLR."); + std::cout << "Spatial index: " << std::endl; + const auto& index = reader.lastools_spatial_index(); + const auto& quadtree = index.quadtree_header(); + + std::cout << " Spatial index header:" << std::endl; + std::cout << indented(quadtree, " "); + std::cout << " Number of cells: " << index.num_cells() << std::endl; + + // Show cell details + size_t total_intervals = 0; + for (const auto& [cell_index, cell] : index.cells()) { + total_intervals += cell.intervals.size(); + } + std::cout << " Total intervals: " << total_intervals << std::endl; + std::cout << " Average intervals per cell: " << std::fixed << std::setprecision(1) + << (index.num_cells() > 0 ? static_cast(total_intervals) / + static_cast(index.num_cells()) + : 0.0) + << std::endl; + + // Show first few cells as examples + std::cout << indented(limited_map(index.cells(), 3), " ") << std::endl; + } + std::cout << std::endl; + } + + if (reader.has_lastools_spatial_index()) { + const auto& spatial_index = reader.lastools_spatial_index(); + std::cout << "Loaded LAStools spatial index with " << spatial_index.num_cells() << " cells." + << std::endl; + std::cout << "Spatial index details:" << std::endl; + std::cout << indented(spatial_index, " ") << std::endl; + } + + return 0; +} diff --git a/apps/las2las++.cpp b/apps/las2las++.cpp index f89eb31..125406b 100644 --- a/apps/las2las++.cpp +++ b/apps/las2las++.cpp @@ -3,89 +3,43 @@ * SPDX-License-Identifier: MIT */ -#include #include #include #include +#include -#include "las_point.hpp" #include "las_reader.hpp" #include "las_writer.hpp" +#include "utilities/assert.hpp" -class LASPoint { - std::array position; - double gps_time; - - public: - void operator=(const laspp::LASPointFormat0& point) { - position[0] = point.x; - position[1] = point.y; - position[2] = point.z; - } - - void operator=(const laspp::GPSTime& point) { gps_time = point; } - - operator laspp::LASPointFormat0() const { - laspp::LASPointFormat0 point; - point.x = position[0]; - point.y = position[1]; - point.z = position[2]; - point.intensity = 0; - point.bit_byte = 0; - point.classification_byte = 0; - point.scan_angle_rank = 0; - point.user_data = 0; - point.point_source_id = 0; - return point; - } - - operator laspp::GPSTime() const { return laspp::GPSTime(gps_time); } +int main(int argc, char* argv[]) { + bool add_spatial_index_flag = false; + int file_arg_start = 1; - friend std::ostream& operator<<(std::ostream& os, const LASPoint& point) { - os << "Position: (" << point.position[0] << ", " << point.position[1] << ", " - << point.position[2] << ")"; - os << " GPS Time: " << point.gps_time; - return os; + for (int i = 1; i < argc; ++i) { + if (std::string(argv[i]) == "--add-spatial-index" || std::string(argv[i]) == "-s") { + add_spatial_index_flag = true; + file_arg_start++; + } } -}; - -template -void read_and_write_points(laspp::LASReader& reader, laspp::LASWriter& writer) { - std::vector points(reader.num_points()); - reader.read_chunks(points, {0, reader.num_chunks()}); - std::cout << points[0] << std::endl; - std::cout << points[points.size() - 1] << std::endl; - writer.write_points(points); -} -int main(int argc, char* argv[]) { - if (argc != 3) { - std::cerr << "Usage: " << argv[0] << " " << std::endl; + if (argc - file_arg_start != 2) { + std::cerr << "Usage: " << argv[0] << " [--add-spatial-index|-s] " + << std::endl; + std::cerr + << " --add-spatial-index, -s: Add spatial index to output file (points will be reordered)" + << std::endl; return 1; } - std::filesystem::path in_file(argv[1]); - std::ifstream ifs(in_file, std::ios::binary); - LASPP_ASSERT(ifs.is_open(), "Failed to open ", in_file); - laspp::LASReader reader(ifs); - std::cout << reader.header() << std::endl; - - if (reader.geo_keys()) { - std::cout << "GeoTIFF Projection Info:" << std::endl; - std::cout << *reader.geo_keys() << std::endl; - } - - auto vlrs = reader.vlr_headers(); - for (const auto& vlr : vlrs) { - std::cout << vlr << std::endl; - } + std::string in_file_str = argv[file_arg_start]; + std::string out_file_str = argv[file_arg_start + 1]; - auto evlrs = reader.evlr_headers(); - for (const auto& evlr : evlrs) { - std::cout << evlr << std::endl; - } + std::filesystem::path in_file(in_file_str); + laspp::LASReader reader(in_file); + std::cout << reader.header() << std::endl; - std::filesystem::path out_file(argv[2]); + std::filesystem::path out_file(out_file_str); std::fstream ofs; ofs.open(out_file, std::ios::binary | std::ios::in | std::ios::out | std::ios::trunc); LASPP_ASSERT(ofs.is_open(), "Failed to open ", out_file); @@ -109,28 +63,32 @@ int main(int argc, char* argv[]) { } laspp::LASWriter writer(ofs, point_format); - for (const auto& vlr : reader.vlr_headers()) { - if (vlr.is_laz_vlr()) { - continue; - } - writer.write_vlr(vlr, reader.read_vlr_data(vlr)); - } - - writer.header().transform() = reader.header().transform(); - - { - std::vector points(reader.num_points()); - - // reader.read_chunks(points, {0, reader.num_chunks()}); - // std::cout << points[0] << std::endl; - // std::cout << points[1] << std::endl; - // std::cout << points[1000] << std::endl; - // std::cout << points[points.size() - 1] << std::endl; - - // writer.write_points(points); - LASPP_SWITCH_OVER_POINT_TYPE(reader.header().point_format(), read_and_write_points, reader, - writer); - } + // Copy everything from reader to writer + writer.copy_from_reader(reader, add_spatial_index_flag); + + LASPP_ASSERT_EQ(reader.num_points(), writer.header().num_points(), + "Number of points in output file does not match input file"); + LASPP_ASSERT_EQ(reader.header().num_points_by_return(), writer.header().num_points_by_return(), + "Number of points by return in output file does not match input file"); + LASPP_ASSERT_LE(reader.header().bounds().min_x(), writer.header().bounds().min_x(), + "Output file min x is less than input file min x"); + LASPP_ASSERT_GE(reader.header().bounds().max_x(), writer.header().bounds().max_x(), + "Output file max x is greater than input file max x"); + LASPP_ASSERT_LE(reader.header().bounds().min_y(), writer.header().bounds().min_y(), + "Output file min y is less than input file min y"); + LASPP_ASSERT_GE(reader.header().bounds().max_y(), writer.header().bounds().max_y(), + "Output file max y is greater than input file max y"); + LASPP_ASSERT_LE(reader.header().bounds().min_z(), writer.header().bounds().min_z(), + "Output file min z is less than input file min z"); + LASPP_ASSERT_GE(reader.header().bounds().max_z(), writer.header().bounds().max_z(), + "Output file max z is greater than input file max z"); + LASPP_ASSERT_EQ(reader.header().transform().scale_factors(), + writer.header().transform().scale_factors(), + "Scale factors in output file do not match input file"); + LASPP_ASSERT_EQ(reader.header().transform().offsets(), writer.header().transform().offsets(), + "Offsets in output file do not match input file"); + + std::cout << writer.header() << std::endl; } return 0; diff --git a/apps/validate_spatial_index.cpp b/apps/validate_spatial_index.cpp new file mode 100644 index 0000000..d09e9dd --- /dev/null +++ b/apps/validate_spatial_index.cpp @@ -0,0 +1,232 @@ +/* + * SPDX-FileCopyrightText: (c) 2025-2026 Trailblaze Software, all rights reserved + * SPDX-License-Identifier: MIT + */ + +#include +#include +#include +#include +#include + +#include "las_point.hpp" +#include "las_reader.hpp" +#include "spatial_index.hpp" +#include "utilities/assert.hpp" + +using namespace laspp; + +// Check if a point index is within any interval of a cell +static bool point_in_intervals(uint32_t point_index, const CellIntervals& cell) { + for (const auto& interval : cell.intervals) { + if (point_index >= interval.start && point_index <= interval.end) { + return true; + } + } + return false; +} + +template +void validate_spatial_index_for_points(LASReader& reader, const QuadtreeSpatialIndex& index) { + // Get scale and offset for coordinate conversion + double scale_x = reader.header().transform().scale_factors().x(); + double offset_x = reader.header().transform().offsets().x(); + double scale_y = reader.header().transform().scale_factors().y(); + double offset_y = reader.header().transform().offsets().y(); + + // Read all points + std::vector points(reader.num_points()); + reader.read_chunks(points, {0, reader.num_chunks()}); + + // Statistics + size_t total_points = points.size(); + size_t points_in_correct_intervals = 0; + size_t points_in_correct_bounds = 0; + size_t points_not_in_intervals = 0; + size_t points_with_missing_cells = 0; + size_t total_interval_range = 0; // Sum of all interval ranges across all cells + + // Get all cells from the spatial index + const auto& cells = index.cells(); + + // Calculate total interval range across all cells + for (const auto& [cell_index, cell] : cells) { + for (const auto& interval : cell.intervals) { + total_interval_range += (interval.end - interval.start + 1); + } + } + + // Debug: collect sample of calculated vs stored cell indices + std::map calculated_cell_indices; + std::map stored_cell_indices; + for (const auto& [cell_index, cell] : cells) { + stored_cell_indices[cell_index] = cell.number_points; + } + + // Validate each point + for (size_t i = 0; i < points.size(); ++i) { + const auto& point = points[i]; + uint32_t point_index = static_cast(i); + + // Read x, y using memcpy to avoid alignment issues with packed structures + int32_t x_raw, y_raw; + std::memcpy(&x_raw, &point.x, sizeof(x_raw)); + std::memcpy(&y_raw, &point.y, sizeof(y_raw)); + double x = int32_to_double(x_raw, scale_x, offset_x); + double y = int32_to_double(y_raw, scale_y, offset_y); + + // Get the cell index for this point (searching up the tree if needed) + int32_t cell_index = index.find_cell_index(x, y); + if (cell_index < 0) { + std::cerr << "ERROR: No cell found for point " << i << " at (" << x << ", " << y << ")" + << std::endl; + points_with_missing_cells++; + continue; + } + LASPP_ASSERT(index.get_cell_bounds(cell_index).contains(x, y), + "find_cell_index returned a cell that does not contain the point", x, " ", y, " ", + cell_index); + calculated_cell_indices[cell_index]++; + + // find_cell_index already found the cell, so it must exist + const auto& cell = cells.at(cell_index); + + // Check if point is in the intervals for this cell + bool in_intervals = point_in_intervals(point_index, cell); + if (in_intervals) { + points_in_correct_intervals++; + } else { + points_not_in_intervals++; + } + + // Check if point is in the bounds for this cell + Bound2D cell_bounds = index.get_cell_bounds(cell_index); + bool in_bounds = cell_bounds.contains(x, y); + if (in_bounds) { + points_in_correct_bounds++; + } + } + + // Report results + std::cout << "=== Spatial Index Validation Results ===" << std::endl; + std::cout << "Total points: " << total_points << std::endl; + std::cout << "Unique calculated cell indices: " << calculated_cell_indices.size() << std::endl; + std::cout << "Unique stored cell indices: " << stored_cell_indices.size() << std::endl; + + // Debug: show sample of mismatched cell indices + if (points_with_missing_cells > 0) { + std::cout << std::endl << "=== Debug: Sample of Calculated Cell Indices ===" << std::endl; + size_t sample_count = 0; + for (const auto& [cell_index, count] : calculated_cell_indices) { + if (stored_cell_indices.find(cell_index) == stored_cell_indices.end()) { + std::cout << " Calculated cell index " << cell_index << " (used by " << count + << " points) - NOT in spatial index" << std::endl; + sample_count++; + if (sample_count >= 10) { + std::cout << " ... (showing first 10)" << std::endl; + break; + } + } + } + std::cout << std::endl << "=== Debug: Sample of Stored Cell Indices ===" << std::endl; + sample_count = 0; + for (const auto& [cell_index, count] : stored_cell_indices) { + if (calculated_cell_indices.find(cell_index) == calculated_cell_indices.end()) { + std::cout << " Stored cell index " << cell_index << " (has " << count + << " points) - NOT calculated by any point" << std::endl; + sample_count++; + if (sample_count >= 10) { + std::cout << " ... (showing first 10)" << std::endl; + break; + } + } + } + std::cout << std::endl; + } + std::cout << "Points in correct intervals: " << points_in_correct_intervals << " (" << std::fixed + << std::setprecision(2) + << (100.0 * static_cast(points_in_correct_intervals) / + static_cast(total_points)) + << "%)" << std::endl; + std::cout << "Points in correct bounds: " << points_in_correct_bounds << " (" + << (100.0 * static_cast(points_in_correct_bounds) / + static_cast(total_points)) + << "%)" << std::endl; + std::cout << "Points NOT in intervals: " << points_not_in_intervals << " (" + << (100.0 * static_cast(points_not_in_intervals) / + static_cast(total_points)) + << "%)" << std::endl; + std::cout << "Points with missing cells: " << points_with_missing_cells << " (" + << (100.0 * static_cast(points_with_missing_cells) / + static_cast(total_points)) + << "%)" << std::endl; + std::cout << std::endl; + std::cout << "=== Efficiency Analysis ===" << std::endl; + std::cout << "Total points: " << total_points << std::endl; + std::cout << "Total interval range: " << total_interval_range << std::endl; + + if (total_interval_range > 0) { + double efficiency = + 100.0 * (static_cast(total_points) / static_cast(total_interval_range)); + std::cout << "Efficiency: " << std::fixed << std::setprecision(2) << efficiency << "%" + << std::endl; + std::cout << " (Total points / Sum of all interval ranges)" << std::endl; + std::cout << " (Higher is better - indicates more compact intervals)" << std::endl; + } else { + std::cout << "Efficiency: N/A (no intervals found)" << std::endl; + } + + // Validation checks + bool all_valid = true; + if (points_not_in_intervals > 0) { + std::cerr << "ERROR: " << points_not_in_intervals + << " points are not in the intervals for their cell!" << std::endl; + all_valid = false; + } + if (points_with_missing_cells > 0) { + std::cerr << "ERROR: " << points_with_missing_cells + << " points have cell indices that don't exist in the spatial index!" << std::endl; + all_valid = false; + } + + if (all_valid) { + std::cout << std::endl << "✓ All points are in the correct intervals!" << std::endl; + } else { + std::cout << std::endl << "✗ Validation failed!" << std::endl; + } +} + +int main(int argc, char* argv[]) { + if (argc != 2) { + std::cerr << "Usage: " << argv[0] << " " << std::endl; + std::cerr << " Validates the spatial index in a LAS/LAZ file" << std::endl; + return 1; + } + + std::string filename = argv[1]; + std::filesystem::path file_path(filename); + LASReader reader(file_path); + + // Check if spatial index exists + if (!reader.has_lastools_spatial_index()) { + std::cerr << "ERROR: No spatial index found in file!" << std::endl; + std::cerr << " (Checked both EVLRs and .lax file)" << std::endl; + return 1; + } + + const auto& index = reader.lastools_spatial_index(); + const auto& header = index.quadtree_header(); + std::cout << "Spatial index found with " << index.num_cells() << " cells" << std::endl; + std::cout << "Quadtree levels: " << header.levels << std::endl; + std::cout << "Level index: " << header.level_index << std::endl; + std::cout << "Implicit levels: " << header.implicit_levels << std::endl; + std::cout << "Bounds: [" << header.min_x << ", " << header.min_y << "] to [" << header.max_x + << ", " << header.max_y << "]" << std::endl; + std::cout << std::endl; + + // Validate based on point format + LASPP_SWITCH_OVER_POINT_TYPE(reader.header().point_format(), validate_spatial_index_for_points, + reader, index); + + return 0; +} diff --git a/cmake/SetupLaszip.cmake b/cmake/SetupLaszip.cmake index 515d3ae..364652c 100644 --- a/cmake/SetupLaszip.cmake +++ b/cmake/SetupLaszip.cmake @@ -58,15 +58,16 @@ function(setup_laszip) # Quiet warnings coming from LASzip itself. if(NOT MSVC) - target_compile_options( - ${_lz_lib} - PRIVATE -Wno-maybe-uninitialized -Wno-format -Wno-format-security - -Wno-switch -Wno-stringop-truncation -Wno-format-overflow) + # Common flags for both GCC and Clang + set(_common_flags -Wno-format -Wno-format-security -Wno-switch) + # GCC-specific flags + if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + list(APPEND _common_flags -Wno-maybe-uninitialized + -Wno-stringop-truncation -Wno-format-overflow) + endif() + target_compile_options(${_lz_lib} PRIVATE ${_common_flags}) if(_lz_api) - target_compile_options( - ${_lz_api} - PRIVATE -Wno-maybe-uninitialized -Wno-format -Wno-format-security - -Wno-switch -Wno-stringop-truncation -Wno-format-overflow) + target_compile_options(${_lz_api} PRIVATE ${_common_flags}) endif() endif() endfunction() diff --git a/cmake/SetupLazperf.cmake b/cmake/SetupLazperf.cmake index a33b1da..a7d0e44 100644 --- a/cmake/SetupLazperf.cmake +++ b/cmake/SetupLazperf.cmake @@ -50,16 +50,15 @@ function(setup_lazperf) # Quiet warnings coming from LAZperf itself. if(NOT MSVC) - target_compile_options( - lazperf - PRIVATE -Wno-maybe-uninitialized - -Wno-format - -Wno-format-security - -Wno-switch - -Wno-stringop-truncation - -Wno-format-overflow - -Wno-sign-conversion - -Wno-conversion) + # Common flags for both GCC and Clang + set(_common_flags -Wno-format -Wno-format-security -Wno-switch + -Wno-sign-conversion -Wno-conversion) + # GCC-specific flags + if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + list(APPEND _common_flags -Wno-maybe-uninitialized + -Wno-stringop-truncation -Wno-format-overflow) + endif() + target_compile_options(lazperf PRIVATE ${_common_flags}) endif() endfunction() diff --git a/src/example_custom_las_point.hpp b/src/example_custom_las_point.hpp index fed4555..9d899d3 100644 --- a/src/example_custom_las_point.hpp +++ b/src/example_custom_las_point.hpp @@ -6,6 +6,8 @@ #pragma once #include +#include +#include #include "las_point.hpp" diff --git a/src/las_header.hpp b/src/las_header.hpp index b66e4aa..4d20c04 100644 --- a/src/las_header.hpp +++ b/src/las_header.hpp @@ -6,14 +6,16 @@ #pragma once +#include #include #include #include #include -#include #include +#include #include #include +#include #include "las_point.hpp" #include "utilities/assert.hpp" @@ -49,8 +51,12 @@ class Vector3D { double& y() { return m_data[1]; } double& z() { return m_data[2]; } + const double& x() const { return m_data[0]; } + const double& y() const { return m_data[1]; } + const double& z() const { return m_data[2]; } + double& operator[](size_t i) { return m_data[i]; } - double operator[](size_t i) const { return m_data[i]; } + const double& operator[](size_t i) const { return m_data[i]; } explicit Vector3D(std::istream& in_stream) { LASPP_CHECK_READ(in_stream, m_data.data(), sizeof(m_data)); @@ -60,16 +66,19 @@ class Vector3D { Vector3D() = default; + bool operator==(const Vector3D& other) const = default; + friend std::ostream& operator<<(std::ostream& os, const Vector3D& vec) { os << "(" << vec.m_data[0] << ", " << vec.m_data[1] << ", " << vec.m_data[2] << ")"; return os; } }; -struct Transform { +class Transform { Vector3D m_scale_factors = {0.001, 0.001, 0.001}; Vector3D m_offsets = {0, 0, 0}; + public: explicit Transform(std::istream& in_stream) : m_scale_factors(in_stream), m_offsets(in_stream) {} Transform(const Vector3D& scale_factors, const Vector3D& offsets) @@ -77,6 +86,17 @@ struct Transform { Transform() = default; + const Vector3D& scale_factors() const { return m_scale_factors; } + Vector3D& scale_factors() { return m_scale_factors; } + const Vector3D& offsets() const { return m_offsets; } + Vector3D& offsets() { return m_offsets; } + + Vector3D transform_point(int x, int y, int z) const { + return Vector3D(x * m_scale_factors.x() + m_offsets.x(), + y * m_scale_factors.y() + m_offsets.y(), + z * m_scale_factors.z() + m_offsets.z()); + } + friend std::ostream& operator<<(std::ostream& os, const Transform& transform) { os << "Scale factors: " << transform.m_scale_factors << std::endl; os << "Offsets: " << transform.m_offsets << std::endl; @@ -84,6 +104,10 @@ struct Transform { } }; +inline double int32_to_double(int32_t coord, double scale, double offset) { + return coord * scale + offset; +} + template void string_to_arr(const std::string& str, char (&arr)[N]) { for (size_t i = 0; i < N; ++i) { @@ -151,6 +175,45 @@ struct LASPP_PACKED LASHeader14Packed : public LASHeaderPacked { static_assert(sizeof(LASHeaderPacked) == 227); static_assert(sizeof(LASHeader14Packed) == 375); +class Bound2D { + double m_min_x; + double m_min_y; + double m_max_x; + double m_max_y; + + public: + Bound2D() + : m_min_x(std::numeric_limits::max()), + m_min_y(std::numeric_limits::max()), + m_max_x(std::numeric_limits::lowest()), + m_max_y(std::numeric_limits::lowest()) {} + + Bound2D(double min_x, double min_y, double max_x, double max_y) + : m_min_x(min_x), m_min_y(min_y), m_max_x(max_x), m_max_y(max_y) {} + + double min_x() const { return m_min_x; } + double min_y() const { return m_min_y; } + double max_x() const { return m_max_x; } + double max_y() const { return m_max_y; } + + void update(double x, double y) { + m_min_x = std::min(m_min_x, x); + m_min_y = std::min(m_min_y, y); + m_max_x = std::max(m_max_x, x); + m_max_y = std::max(m_max_y, y); + } + + bool contains(double x, double y) const { + return x >= m_min_x && x <= m_max_x && y >= m_min_y && y <= m_max_y; + } + + friend std::ostream& operator<<(std::ostream& os, const Bound2D& bound) { + os << "Bounds2D: [" << bound.m_min_x << ", " << bound.m_min_y << "] to [" << bound.m_max_x + << ", " << bound.m_max_y << "]" << std::endl; + return os; + } +}; + class Bound3D { Vector3D m_min; Vector3D m_max; @@ -179,6 +242,13 @@ class Bound3D { } } + bool contains(const Vector3D& pos) const { + for (size_t i = 0; i < 3; ++i) { + if (pos[i] < m_min[i] || pos[i] > m_max[i]) return false; + } + return true; + } + friend std::ostream& operator<<(std::ostream& os, const Bound3D& bound) { os << "Bounds: " << bound.m_min << " to " << bound.m_max << std::endl; return os; @@ -317,12 +387,26 @@ class LASHeader { std::array transform(std::array pos) { return {{ - m_transform.m_scale_factors.x() * pos[0] + m_transform.m_offsets.x(), - m_transform.m_scale_factors.y() * pos[1] + m_transform.m_offsets.y(), - m_transform.m_scale_factors.z() * pos[2] + m_transform.m_offsets.z(), + m_transform.scale_factors().x() * pos[0] + m_transform.offsets().x(), + m_transform.scale_factors().y() * pos[1] + m_transform.offsets().y(), + m_transform.scale_factors().z() * pos[2] + m_transform.offsets().z(), }}; } + std::array num_points_by_return() const { + std::array counts = {0}; + if (m_legacy_number_of_point_records == 0) { + for (size_t i = 0; i < 15; ++i) { + counts[i] = static_cast(m_number_of_points_by_return[i]); + } + } else { + for (size_t i = 0; i < 5; ++i) { + counts[i] = static_cast(m_legacy_number_of_points_by_return[i]); + } + } + return counts; + } + void update_bounds(std::array pos) { m_bounds.update(transform(pos)); } void set_point_format(uint8_t point_format, uint16_t num_extra_bytes) { @@ -357,7 +441,7 @@ class LASHeader { const Transform& transform() const { return m_transform; } uint8_t point_format() const { return m_point_data_record_format; } - int num_extra_bytes() const { + uint16_t num_extra_bytes() const { return m_point_data_record_length - size_of_point_format(m_point_data_record_format); } diff --git a/src/las_point.hpp b/src/las_point.hpp index 731d675..0c9a52d 100644 --- a/src/las_point.hpp +++ b/src/las_point.hpp @@ -11,7 +11,6 @@ #include #include #include -#include #include #include diff --git a/src/las_reader.hpp b/src/las_reader.hpp index bd14344..9d3ef9a 100644 --- a/src/las_reader.hpp +++ b/src/las_reader.hpp @@ -6,15 +6,12 @@ #pragma once #include -#include -#include -#include #include -#include #include #include #include #include +#include #include #include #include @@ -29,6 +26,7 @@ #include "laz/chunktable.hpp" #include "laz/laz_reader.hpp" #include "laz/stream.hpp" +#include "spatial_index.hpp" #include "utilities/assert.hpp" #include "utilities/env.hpp" #include "utilities/memory_mapped_file.hpp" @@ -51,11 +49,13 @@ class LASReader { std::optional m_mapped_file; // Memory-mapped file (faster path) std::optional m_mapped_buffer; // Stream buffer for mapped file std::optional m_mapped_stream; // Stream view of mapped file + std::optional m_file_path; // File path for .lax file lookup LASHeader m_header; std::optional m_laz_reader; std::optional m_math_wkt; std::optional m_coordinate_wkt; std::optional m_las_geo_keys; + std::optional m_spatial_index; std::vector m_vlr_headers; std::vector m_evlr_headers; @@ -179,13 +179,55 @@ class LASReader { } std::vector read_evlr_headers() { - return read_record_headers( + auto evlrs = read_record_headers( static_cast(m_header.EVLR_offset()), m_header.EVLR_count()); + + // Check for spatial index EVLR + for (const auto& evlr : evlrs) { + if (evlr.is_lastools_spatial_index_evlr()) { + std::vector data = read_evlr_data(evlr); + std::stringstream ss; + ss.write(reinterpret_cast(data.data()), static_cast(data.size())); + ss.seekg(0); // Rewind to beginning for reading + LASPP_ASSERT(!m_spatial_index.has_value(), "Multiple spatial index EVLRs found"); + try { + m_spatial_index.emplace(ss); + } catch (const std::runtime_error&) { + // Corrupted or unrecognised EVLR payload: leave m_spatial_index empty so + // the file can still be opened (mirrors the .lax fallback behaviour). + // Note: std::ios_base::failure (truncated payload) is also caught here + // because on libstdc++ it derives from std::system_error → std::runtime_error. + } + break; + } + } + + // If no spatial index found in EVLRs, try to read from .lax file + if (!m_spatial_index.has_value() && m_file_path.has_value()) { + std::filesystem::path lax_path = *m_file_path; + lax_path.replace_extension(".lax"); + + if (std::filesystem::exists(lax_path)) { + std::ifstream lax_file(lax_path, std::ios::binary); + if (lax_file.is_open()) { + try { + m_spatial_index.emplace(lax_file); + } catch (const std::runtime_error&) { + // Corrupted or unrecognised .lax content: leave m_spatial_index empty + // so the file can still be read without its spatial index. + // Note: std::ios_base::failure (truncated file) is also caught here + // because on libstdc++ it derives from std::system_error → std::runtime_error. + } + } + } + } + + return evlrs; } public: // Constructor from file path - uses memory mapping for optimal performance - explicit LASReader(const std::filesystem::path& file_path) { + explicit LASReader(const std::filesystem::path& file_path) : m_file_path(file_path) { // Allow forcing stream-based I/O via environment variable (for benchmarking/comparison) // Any non-empty value other than "0" enables stream-based I/O. auto disable_mmap = utilities::get_env("LASPP_DISABLE_MMAP"); @@ -230,8 +272,10 @@ class LASReader { } // Constructor from istream - uses stream-based I/O (backward compatibility) - explicit LASReader(std::istream& input_stream) + explicit LASReader(std::istream& input_stream, + const std::optional& file_path = std::nullopt) : m_input_stream(&input_stream), + m_file_path(file_path), m_header(read_header(*m_input_stream)), m_vlr_headers(read_vlr_headers()), m_evlr_headers(read_evlr_headers()) { @@ -308,6 +352,13 @@ class LASReader { } std::optional geo_keys() const { return m_las_geo_keys; } + bool has_lastools_spatial_index() const { return m_spatial_index.has_value(); } + + const QuadtreeSpatialIndex& lastools_spatial_index() const { + LASPP_ASSERT(m_spatial_index.has_value(), "Spatial index not available"); + return *m_spatial_index; + } + template std::span read_chunk(std::span output_location, size_t chunk_index) { if (header().is_laz_compressed()) { @@ -333,45 +384,71 @@ class LASReader { template std::span read_chunks(std::span output_location, std::pair chunk_indexes) { if (header().is_laz_compressed()) { - size_t compressed_start_offset = - m_laz_reader->chunk_table().chunk_offset(chunk_indexes.first); - size_t total_compressed_size = - m_laz_reader->chunk_table().compressed_chunk_size(chunk_indexes.second - 1) + - m_laz_reader->chunk_table().chunk_offset(chunk_indexes.second - 1) - - compressed_start_offset; + const auto& chunk_table = m_laz_reader->chunk_table(); size_t total_n_points = - (chunk_indexes.second == m_laz_reader->chunk_table().num_chunks() + (chunk_indexes.second == chunk_table.num_chunks() ? header().num_points() - : m_laz_reader->chunk_table().decompressed_chunk_offsets()[chunk_indexes.second]) - - m_laz_reader->chunk_table().decompressed_chunk_offsets()[chunk_indexes.first]; + : chunk_table.decompressed_chunk_offsets()[chunk_indexes.second]) - + chunk_table.decompressed_chunk_offsets()[chunk_indexes.first]; LASPP_ASSERT_GE(output_location.size(), total_n_points); - size_t file_data_offset = header().offset_to_point_data() + compressed_start_offset; - auto buf = get_bytes(file_data_offset, total_compressed_size); - std::vector chunk_indices(chunk_indexes.second - chunk_indexes.first); std::iota(chunk_indices.begin(), chunk_indices.end(), chunk_indexes.first); - // Use thread pool for parallel execution (respects LASPP_NUM_THREADS environment variable) - utilities::parallel_for(size_t{0}, chunk_indices.size(), [&](size_t idx) { - size_t chunk_index = chunk_indices[idx]; - size_t start_offset = - m_laz_reader->chunk_table().chunk_offset(chunk_index) - compressed_start_offset; - size_t compressed_chunk_size = - m_laz_reader->chunk_table().compressed_chunk_size(chunk_index); - std::span compressed_chunk = - buf.data.subspan(start_offset, compressed_chunk_size); - - size_t point_offset = - m_laz_reader->chunk_table().decompressed_chunk_offsets()[chunk_index] - - m_laz_reader->chunk_table().decompressed_chunk_offsets()[chunk_indexes.first]; - size_t n_points = m_laz_reader->chunk_table().points_per_chunk()[chunk_index]; - if (chunk_index == 0) { - LASPP_ASSERT_EQ(point_offset, 0u); - } - m_laz_reader->decompress_chunk(compressed_chunk, - output_location.subspan(point_offset, n_points)); - }); + if (m_mapped_file.has_value()) { + // Memory-mapped path: read contiguous block once, then decompress chunks in parallel + size_t compressed_start_offset = chunk_table.chunk_offset(chunk_indexes.first); + size_t total_compressed_size = chunk_table.compressed_chunk_size(chunk_indexes.second - 1) + + chunk_table.chunk_offset(chunk_indexes.second - 1) - + compressed_start_offset; + size_t file_data_offset = header().offset_to_point_data() + compressed_start_offset; + auto buf = get_bytes(file_data_offset, total_compressed_size); + + utilities::parallel_for(size_t{0}, chunk_indices.size(), [&](size_t idx) { + size_t chunk_index = chunk_indices[idx]; + size_t start_offset = chunk_table.chunk_offset(chunk_index) - compressed_start_offset; + size_t compressed_chunk_size = chunk_table.compressed_chunk_size(chunk_index); + std::span compressed_chunk = + buf.data.subspan(start_offset, compressed_chunk_size); + + size_t point_offset = chunk_table.decompressed_chunk_offsets()[chunk_index] - + chunk_table.decompressed_chunk_offsets()[chunk_indexes.first]; + size_t n_points = chunk_table.points_per_chunk()[chunk_index]; + if (chunk_index == 0) { + LASPP_ASSERT_EQ(point_offset, 0u); + } + m_laz_reader->decompress_chunk(compressed_chunk, + output_location.subspan(point_offset, n_points)); + }); + } else { + // Stream-based path: read chunks in parallel (with mutex protection) and decompress. + // This overlaps I/O and decompression - while one thread decompresses, others can read. + std::mutex stream_mutex; + utilities::parallel_for(size_t{0}, chunk_indices.size(), [&](size_t idx) { + size_t chunk_index = chunk_indices[idx]; + size_t file_data_offset = + header().offset_to_point_data() + chunk_table.chunk_offset(chunk_index); + size_t compressed_chunk_size = chunk_table.compressed_chunk_size(chunk_index); + + // Read chunk data with stream mutex protection + std::vector compressed_buffer; + { + std::lock_guard lock(stream_mutex); + auto buf = get_bytes(file_data_offset, compressed_chunk_size); + compressed_buffer = {buf.data.begin(), buf.data.end()}; + } + + // Decompress without holding the lock (allows other threads to read while we decompress) + size_t point_offset = chunk_table.decompressed_chunk_offsets()[chunk_index] - + chunk_table.decompressed_chunk_offsets()[chunk_indexes.first]; + size_t n_points = chunk_table.points_per_chunk()[chunk_index]; + if (chunk_index == 0) { + LASPP_ASSERT_EQ(point_offset, 0u); + } + m_laz_reader->decompress_chunk(std::span(compressed_buffer), + output_location.subspan(point_offset, n_points)); + }); + } return output_location.subspan(0, total_n_points); } LASPP_ASSERT(chunk_indexes.first == 0); @@ -388,6 +465,130 @@ class LASReader { auto buf = get_bytes(evlr.global_offset(), evlr.record_length_after_header); return {buf.data.begin(), buf.data.end()}; } + + // Get chunk indices that contain the given point intervals + // Returns a sorted list of unique chunk indices. + // Uses binary search on the monotonically-increasing decompressed_chunk_offsets array + std::vector get_chunk_indices_from_intervals( + const std::vector& intervals) const { + std::vector chunk_indices; + + if (header().is_laz_compressed() && m_laz_reader.has_value()) { + const auto& chunk_table = m_laz_reader->chunk_table(); + const auto& decompressed_offsets = chunk_table.decompressed_chunk_offsets(); + const size_t n_chunks = decompressed_offsets.size(); + + if (n_chunks == 0 || intervals.empty()) { + return chunk_indices; + } + + for (const auto& interval : intervals) { + const size_t a = static_cast(interval.start); + const size_t b = static_cast(interval.end); + + // First overlapping chunk: the first chunk i where chunk_end[i] >= a. + // chunk_end[i] = decompressed_offsets[i+1] - 1 (for i < n_chunks-1), so + // chunk_end[i] >= a <=> decompressed_offsets[i+1] > a. + // Find the first position j in [begin+1, end) where offsets[j] > a; + // then i = j - 1 is the first chunk whose end reaches a. + // When j == end (a is past all offset values), first_chunk = n_chunks - 1 + // (the last chunk, whose end extends to total_points - 1). + auto first_it = + std::upper_bound(decompressed_offsets.begin() + 1, decompressed_offsets.end(), a); + size_t first_chunk = static_cast(first_it - decompressed_offsets.begin()) - 1; + + // Last overlapping chunk (exclusive upper bound): the first chunk i where + // chunk_start[i] = decompressed_offsets[i] > b. + auto last_it = + std::upper_bound(decompressed_offsets.begin(), decompressed_offsets.end(), b); + size_t last_chunk_exclusive = static_cast(last_it - decompressed_offsets.begin()); + + for (size_t i = first_chunk; i < last_chunk_exclusive; ++i) { + chunk_indices.push_back(i); + } + } + + // Remove duplicates and sort + std::sort(chunk_indices.begin(), chunk_indices.end()); + chunk_indices.erase(std::unique(chunk_indices.begin(), chunk_indices.end()), + chunk_indices.end()); + } else { + // For non-LAZ files, there's only one chunk (index 0) + if (!intervals.empty()) { + chunk_indices.push_back(0); + } + } + + return chunk_indices; + } + + // Read a list of chunks (not necessarily contiguous), decompressing in parallel. + template + std::span read_chunks_list(std::span output_location, + const std::vector& chunk_indices) { + if (chunk_indices.empty()) { + return output_location.subspan(0, 0); + } + + if (header().is_laz_compressed()) { + const auto& chunk_table = m_laz_reader->chunk_table(); + const auto& points_per_chunk_vec = chunk_table.points_per_chunk(); + + // Pre-compute per-chunk output offsets in a single sequential pass. + size_t total_points = 0; + std::vector output_offsets(chunk_indices.size()); + for (size_t i = 0; i < chunk_indices.size(); ++i) { + output_offsets[i] = total_points; + total_points += points_per_chunk_vec[chunk_indices[i]]; + } + + LASPP_ASSERT_GE(output_location.size(), total_points); + + if (m_mapped_file.has_value()) { + // Memory-mapped path: get_bytes is zero-copy and thread-safe. + // Decompress all chunks in parallel — each call uses only local state. + utilities::parallel_for(size_t{0}, chunk_indices.size(), [&](size_t i) { + const size_t chunk_idx = chunk_indices[i]; + const size_t file_data_offset = + header().offset_to_point_data() + chunk_table.chunk_offset(chunk_idx); + auto buf = get_bytes(file_data_offset, chunk_table.compressed_chunk_size(chunk_idx)); + m_laz_reader->decompress_chunk( + buf.data, + output_location.subspan(output_offsets[i], points_per_chunk_vec[chunk_idx])); + }); + } else { + // Stream-based path: read chunks in parallel (with mutex protection) and decompress. + // This overlaps I/O and decompression - while one thread decompresses, others can read. + std::mutex stream_mutex; + utilities::parallel_for(size_t{0}, chunk_indices.size(), [&](size_t i) { + const size_t chunk_idx = chunk_indices[i]; + const size_t file_data_offset = + header().offset_to_point_data() + chunk_table.chunk_offset(chunk_idx); + const size_t compressed_size = chunk_table.compressed_chunk_size(chunk_idx); + + // Read chunk data with stream mutex protection + std::vector compressed_buffer; + { + std::lock_guard lock(stream_mutex); + auto buf = get_bytes(file_data_offset, compressed_size); + compressed_buffer = {buf.data.begin(), buf.data.end()}; + } + + // Decompress without holding the lock (allows other threads to read while we decompress) + m_laz_reader->decompress_chunk( + std::span(compressed_buffer), + output_location.subspan(output_offsets[i], points_per_chunk_vec[chunk_idx])); + }); + } + + return output_location.subspan(0, total_points); + } else { + // For non-LAZ files, there's only one chunk + LASPP_ASSERT(chunk_indices.size() == 1 && chunk_indices[0] == 0, + "Non-LAZ files should only have chunk index 0"); + return read_chunk(output_location, 0); + } + } }; } // namespace laspp diff --git a/src/las_writer.hpp b/src/las_writer.hpp index decf457..76ab662 100644 --- a/src/las_writer.hpp +++ b/src/las_writer.hpp @@ -6,18 +6,22 @@ #pragma once #include -#include +#include #include #include +#include #include #include #include "example_custom_las_point.hpp" #include "las_header.hpp" #include "las_point.hpp" +#include "las_reader.hpp" #include "laz/laz_vlr.hpp" #include "laz/laz_writer.hpp" +#include "spatial_index.hpp" #include "utilities/assert.hpp" +#include "utilities/thread_pool.hpp" #include "vlr.hpp" namespace laspp { @@ -80,6 +84,19 @@ class LASWriter { const LASHeader& header() const { return m_header; } LASHeader& header() { return m_header; } + // Copy header metadata from another header (preserves writer-managed fields) + void copy_header_metadata(const LASHeader& source_header) { + m_header.m_file_source_id = source_header.m_file_source_id; + m_header.m_global_encoding = source_header.m_global_encoding; + m_header.m_project_id_1 = source_header.m_project_id_1; + m_header.m_project_id_2 = source_header.m_project_id_2; + m_header.m_project_id_3 = source_header.m_project_id_3; + std::memcpy(m_header.m_project_id_4, source_header.m_project_id_4, + sizeof(m_header.m_project_id_4)); + std::memcpy(m_header.m_system_id, source_header.m_system_id, sizeof(m_header.m_system_id)); + m_header.transform() = source_header.transform(); + } + void write_vlr(const LASVLR& vlr, std::span data) { LASPP_ASSERT_EQ(m_stage, WritingStage::VLRS); LASPP_ASSERT_EQ(vlr.record_length_after_header, data.size()); @@ -188,21 +205,18 @@ class LASWriter { static_assert(is_copy_assignable()); static_assert(is_copy_fromable()); - std::vector point_indices(points.size()); - std::iota(point_indices.begin(), point_indices.end(), size_t{0}); - // Parallel copy from user point type into the serialisable PointType buffer. - std::for_each(std::execution::par_unseq, point_indices.begin(), point_indices.end(), - [&](size_t i) { - copy_if_possible(points_to_write[i], points[i]); - copy_if_possible(points_to_write[i], points[i]); - copy_if_possible(points_to_write[i], points[i]); - copy_if_possible(points_to_write[i], points[i]); - copy_if_possible(points_to_write[i], points[i]); - }); + utilities::parallel_for(size_t{0}, points.size(), [&](size_t i) { + copy_if_possible(points_to_write[i], points[i]); + copy_if_possible(points_to_write[i], points[i]); + copy_if_possible(points_to_write[i], points[i]); + copy_if_possible(points_to_write[i], points[i]); + copy_if_possible(points_to_write[i], points[i]); + }); // Parallel reduction to accumulate per-return counts and bounding box. - if constexpr (std::is_base_of_v) { + if constexpr (std::is_base_of_v || + std::is_base_of_v) { struct PointStats { size_t points_by_return[15]{}; int32_t min_pos[3]{std::numeric_limits::max(), std::numeric_limits::max(), @@ -210,28 +224,51 @@ class LASWriter { int32_t max_pos[3]{std::numeric_limits::lowest(), std::numeric_limits::lowest(), std::numeric_limits::lowest()}; + + void combine(const PointStats& other) { + for (size_t j = 0; j < 15; j++) { + points_by_return[j] += other.points_by_return[j]; + } + for (size_t j = 0; j < 3; j++) { + min_pos[j] = std::min(min_pos[j], other.min_pos[j]); + max_pos[j] = std::max(max_pos[j], other.max_pos[j]); + } + } }; - PointStats result = std::transform_reduce( - std::execution::par_unseq, point_indices.begin(), point_indices.end(), PointStats{}, - [](PointStats a, const PointStats& b) { - for (size_t j = 0; j < 15; j++) a.points_by_return[j] += b.points_by_return[j]; - for (size_t j = 0; j < 3; j++) { - a.min_pos[j] = std::min(a.min_pos[j], b.min_pos[j]); - a.max_pos[j] = std::max(a.max_pos[j], b.max_pos[j]); + PointStats result; + // Use chunking internally: process 1000 points per chunk for better cache locality + constexpr size_t reduction_chunk_size = 1000; + const size_t num_chunks = (points.size() + reduction_chunk_size - 1) / reduction_chunk_size; + utilities::parallel_for_reduction( + size_t{0}, num_chunks, result, [&](size_t chunk_idx, PointStats& local_stats) { + // Process all points in this chunk + const size_t chunk_start = chunk_idx * reduction_chunk_size; + const size_t chunk_end = std::min(chunk_start + reduction_chunk_size, points.size()); + for (size_t i = chunk_start; i < chunk_end; ++i) { + // Accumulate stats for this point + if constexpr (std::is_base_of_v) { + if (points_to_write[i].bit_byte.return_number < 16 && + points_to_write[i].bit_byte.return_number > 0) { + local_stats.points_by_return[points_to_write[i].bit_byte.return_number - 1]++; + } + } else if constexpr (std::is_base_of_v) { + if (points_to_write[i].return_number < 16 && points_to_write[i].return_number > 0) { + local_stats.points_by_return[points_to_write[i].return_number - 1]++; + } + } + // Read x, y, z using memcpy to avoid alignment issues with packed structures + int32_t x, y, z; + std::memcpy(&x, &points_to_write[i].x, sizeof(x)); + std::memcpy(&y, &points_to_write[i].y, sizeof(y)); + std::memcpy(&z, &points_to_write[i].z, sizeof(z)); + local_stats.min_pos[0] = std::min(local_stats.min_pos[0], x); + local_stats.min_pos[1] = std::min(local_stats.min_pos[1], y); + local_stats.min_pos[2] = std::min(local_stats.min_pos[2], z); + local_stats.max_pos[0] = std::max(local_stats.max_pos[0], x); + local_stats.max_pos[1] = std::max(local_stats.max_pos[1], y); + local_stats.max_pos[2] = std::max(local_stats.max_pos[2], z); } - return a; - }, - [&](size_t i) { - PointStats s; - if (points_to_write[i].bit_byte.return_number < 16 && - points_to_write[i].bit_byte.return_number > 0) { - s.points_by_return[points_to_write[i].bit_byte.return_number - 1] = 1; - } - s.min_pos[0] = s.max_pos[0] = points_to_write[i].x; - s.min_pos[1] = s.max_pos[1] = points_to_write[i].y; - s.min_pos[2] = s.max_pos[2] = points_to_write[i].z; - return s; }); for (size_t j = 0; j < 15; j++) points_by_return[j] = result.points_by_return[j]; @@ -311,7 +348,7 @@ class LASWriter { } public: - void write_evlr(const LASEVLR& evlr, const std::vector& data) { + void write_evlr(const LASEVLR& evlr, const std::span& data) { write_chunktable(); LASPP_ASSERT_LE(m_stage, WritingStage::EVLRS); if (m_stage < WritingStage::EVLRS) { @@ -326,6 +363,130 @@ class LASWriter { header().m_number_of_extended_variable_length_records++; } + void write_lastools_spatial_index(const QuadtreeSpatialIndex& spatial_index) { + // Write spatial index to a buffer first to get the size + std::stringstream index_stream; + spatial_index.write(index_stream); + std::vector index_data((std::istreambuf_iterator(index_stream)), + std::istreambuf_iterator()); + + // Create EVLR header + LASEVLR evlr; + evlr.reserved = 0; + string_to_arr("LAStools", evlr.user_id); + evlr.record_id = 30; + evlr.record_length_after_header = index_data.size(); + string_to_arr("LAX spatial indexing (LASindex)", evlr.description); + + write_evlr(evlr, std::span(reinterpret_cast(index_data.data()), index_data.size())); + } + + private: + template + void copy_points_with_spatial_index(LASReader& reader, bool add_spatial_index) { + if (!add_spatial_index) { + // Streaming fast path: process one batch of chunks at a time. + // Each batch is decompressed in parallel (read_chunks uses parallel_for + // internally), so throughput matches the bulk path while peak memory is + // bounded to O(batch_size * chunk_pts) instead of O(total_points). + const auto ppc = reader.points_per_chunk(); + if (ppc.empty()) return; + + const size_t n_chunks = reader.num_chunks(); + const size_t batch_size = 20 * utilities::get_num_threads(); + + // Compute the exact maximum point count across all batches so the buffer + // is neither over- nor under-allocated (matters for single-chunk non-LAZ + // files where max_chunk_pts == num_points). + size_t max_batch_pts = 0; + for (size_t b = 0; b < n_chunks; b += batch_size) { + size_t pts = 0; + for (size_t i = b, end = std::min(b + batch_size, n_chunks); i < end; ++i) pts += ppc[i]; + max_batch_pts = std::max(max_batch_pts, pts); + } + + std::vector batch_buf(max_batch_pts); + for (size_t b = 0; b < n_chunks; b += batch_size) { + auto pts = + reader.read_chunks(batch_buf, {b, std::min(b + batch_size, n_chunks)}); + write_points(pts); + } + return; + } + + // Spatial-index path: reordering requires the full point set in memory. + std::vector points(reader.num_points()); + reader.read_chunks(points, {0, reader.num_chunks()}); + + double scale_x = reader.header().transform().scale_factors().x(); + double offset_x = reader.header().transform().offsets().x(); + double scale_y = reader.header().transform().scale_factors().y(); + double offset_y = reader.header().transform().offsets().y(); + + QuadtreeSpatialIndex temp_index(reader.header(), points); + + std::vector> point_cell_pairs; + point_cell_pairs.reserve(points.size()); + + for (size_t i = 0; i < points.size(); ++i) { + int32_t x_raw; + int32_t y_raw; + std::memcpy(&x_raw, &points[i].x, sizeof(x_raw)); + std::memcpy(&y_raw, &points[i].y, sizeof(y_raw)); + double x = int32_to_double(x_raw, scale_x, offset_x); + double y = int32_to_double(y_raw, scale_y, offset_y); + int32_t cell_index = temp_index.get_cell_index(x, y); + point_cell_pairs.push_back({cell_index, i}); + } + + // Sort by cell index, then by original index + std::sort(point_cell_pairs.begin(), point_cell_pairs.end()); + + std::vector reordered_points; + reordered_points.reserve(points.size()); + for (const auto& pair : point_cell_pairs) { + reordered_points.push_back(points[pair.second]); + } + points = std::move(reordered_points); + + QuadtreeSpatialIndex spatial_index(reader.header(), points); + + write_points(points, 50000); + write_lastools_spatial_index(spatial_index); + } + + public: + // Copy all data from a reader to this writer + void copy_from_reader(LASReader& reader, bool add_spatial_index = false) { + // Copy header metadata (preserves writer-managed fields) + copy_header_metadata(reader.header()); + + // Copy VLRs (skip LAZ VLR since compression is handled by point format; + // skip existing LAStools spatial index VLR when we're building a new one) + for (const auto& vlr : reader.vlr_headers()) { + if (vlr.is_laz_vlr()) { + continue; + } + if (add_spatial_index && vlr.is_lastools_spatial_index_vlr()) { + continue; // Skip existing spatial index, we'll write a new one + } + write_vlr(vlr, reader.read_vlr_data(vlr)); + } + + // Read and write points (with optional spatial index) + LASPP_SWITCH_OVER_POINT_TYPE(reader.header().point_format(), copy_points_with_spatial_index, + reader, add_spatial_index); + + // Copy EVLRs (skip spatial index EVLR if we're adding a new one) + for (const auto& evlr : reader.evlr_headers()) { + if (add_spatial_index && evlr.is_lastools_spatial_index_evlr()) { + continue; // Skip existing spatial index, we'll write a new one + } + auto data = reader.read_evlr_data(evlr); + write_evlr(evlr, data); + } + } + ~LASWriter() { write_chunktable(); write_header(); diff --git a/src/laz/bytes14_encoder.hpp b/src/laz/bytes14_encoder.hpp new file mode 100644 index 0000000..7f7df9e --- /dev/null +++ b/src/laz/bytes14_encoder.hpp @@ -0,0 +1,105 @@ +/* + * SPDX-FileCopyrightText: (c) 2025-2026 Trailblaze Software, all rights reserved + * SPDX-License-Identifier: MIT + */ + +#pragma once + +#include +#include +#include + +#include "laz/layered_stream.hpp" +#include "laz/stream.hpp" +#include "laz/symbol_encoder.hpp" +#include "utilities/assert.hpp" + +namespace laspp { + +class Bytes14Encoder { + struct Context { + bool initialized; + std::vector last_value; + std::vector> byte_encoders; // One encoder per byte position + + Context() : initialized(false) {} + + void initialize(size_t num_bytes) { + if (byte_encoders.empty()) { + byte_encoders.resize(num_bytes); + } + } + }; + + std::array m_contexts; + uint8_t m_active_context; + uint8_t m_last_value_context; + size_t m_num_bytes; + + Context& ensure_context(uint8_t idx) { + if (!m_contexts[idx].initialized) { + LASPP_ASSERT(m_contexts[m_active_context].initialized, + "Cannot switch to uninitialized context when no initialized context exists."); + m_contexts[idx].last_value = m_contexts[m_active_context].last_value; + m_contexts[idx].initialize(m_num_bytes); + m_contexts[idx].initialized = true; + } + m_active_context = idx; + return m_contexts[idx]; + } + + public: + static constexpr int NUM_LAYERS = 1; + + using EncodedType = std::vector; + + const EncodedType& last_value() const { return m_contexts[m_last_value_context].last_value; } + + explicit Bytes14Encoder(const std::vector& initial_bytes, uint8_t context) + : m_active_context(context), m_num_bytes(initial_bytes.size()) { + m_contexts[context].initialize(m_num_bytes); + m_contexts[context].last_value = initial_bytes; + m_contexts[context].initialized = true; + m_last_value_context = context; + } + + public: + std::vector decode(LayeredInStreams& in_streams, uint8_t context_idx) { + InStream& in_stream = in_streams[0]; + // Yet another cursed bug in LASzip where the last item + // is not switched to the new context + m_last_value_context = m_contexts[context_idx].initialized ? m_active_context : context_idx; + std::vector& last_value = m_contexts[m_last_value_context].last_value; + Context& context = ensure_context(context_idx); + + for (size_t i = 0; i < m_num_bytes; i++) { + uint8_t diff = static_cast(context.byte_encoders[i].decode_symbol(in_stream)); + last_value[i] = static_cast(static_cast(last_value[i]) + diff); + } + + return last_value; + } + + void encode(LayeredOutStreams& out_streams, const std::vector& bytes, + uint8_t context_idx) { + OutStream& out_stream = out_streams[0]; + // Yet another cursed bug in LASzip where the last item + // is not switched to the new context + std::vector& last_value = m_contexts[context_idx].initialized + ? m_contexts[m_active_context].last_value + : m_contexts[context_idx].last_value; + Context& context = ensure_context(context_idx); + + LASPP_ASSERT_EQ(bytes.size(), m_num_bytes); + + // Encode deltas for each byte + for (size_t i = 0; i < m_num_bytes; i++) { + uint8_t diff = static_cast(bytes[i]) - static_cast(last_value[i]); + context.byte_encoders[i].encode_symbol(out_stream, diff); + } + + last_value = bytes; + } +}; + +} // namespace laspp diff --git a/src/laz/chunktable.hpp b/src/laz/chunktable.hpp index 46c7595..6a82e27 100644 --- a/src/laz/chunktable.hpp +++ b/src/laz/chunktable.hpp @@ -38,6 +38,8 @@ struct LASPP_PACKED LAZChunkTableHeader { } }; +#pragma pack(pop) + class LAZChunkTable : LAZChunkTableHeader { std::vector m_compressed_chunk_size; std::optional m_constant_chunk_size; @@ -153,6 +155,4 @@ class LAZChunkTable : LAZChunkTableHeader { } }; -#pragma pack(pop) - } // namespace laspp diff --git a/src/laz/layered_stream.hpp b/src/laz/layered_stream.hpp index 683eefc..e1e83d3 100644 --- a/src/laz/layered_stream.hpp +++ b/src/laz/layered_stream.hpp @@ -57,8 +57,8 @@ class LayeredInStreams { // Dummy buffer for empty layers (InStream requires at least 4 bytes to initialize). // This is only used for layers with size 0 or < 4, which should never be read from // (non_empty() check prevents access). - static constexpr std::array s_empty_layer_buffer = {std::byte{0}, std::byte{0}, - std::byte{0}, std::byte{0}}; + static constexpr std::array s_empty_layer_buffer = { + {std::byte{0}, std::byte{0}, std::byte{0}, std::byte{0}}}; public: LayeredInStreams(std::span& layer_sizes, diff --git a/src/laz/laz_reader.hpp b/src/laz/laz_reader.hpp index fa45ffa..038ff64 100644 --- a/src/laz/laz_reader.hpp +++ b/src/laz/laz_reader.hpp @@ -137,6 +137,7 @@ class LAZReader { case LAZItemType::Wavepacket13: case LAZItemType::RGBNIR14: case LAZItemType::Wavepacket14: + case LAZItemType::Byte14: default: LASPP_FAIL("Currently unsupported LAZ item type: ", LAZItemType(record.item_type), " (", static_cast(record.item_type), ")"); diff --git a/src/laz/laz_writer.hpp b/src/laz/laz_writer.hpp index 8f8c075..3f2dd9c 100644 --- a/src/laz/laz_writer.hpp +++ b/src/laz/laz_writer.hpp @@ -7,7 +7,6 @@ #include #include -#include #include #include #include @@ -158,6 +157,7 @@ class LAZWriter { case LAZItemType::Wavepacket13: case LAZItemType::RGBNIR14: case LAZItemType::Wavepacket14: + case LAZItemType::Byte14: default: LASPP_FAIL("Currently unsupported LAZ item type: ", static_cast(record.item_type)); diff --git a/src/laz/stream.hpp b/src/laz/stream.hpp index 502f053..694f21f 100644 --- a/src/laz/stream.hpp +++ b/src/laz/stream.hpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include "utilities/assert.hpp" @@ -40,6 +41,29 @@ class PointerStreamBuffer : public std::streambuf { #endif } + // Override xsgetn to ensure proper bounds checking when reading + std::streamsize xsgetn(char* s, std::streamsize n) override { + char* base = eback(); + char* current = gptr(); + char* end = egptr(); + if (current == nullptr || end == nullptr || base == nullptr) { + return 0; // No data available + } + + std::streamsize available = end - current; + std::streamsize to_read = (n < available) ? n : available; + + if (to_read > 0) { + std::memcpy(s, current, static_cast(to_read)); + // Update get pointer + setg(base, current + to_read, end); + } + + // Return the number of bytes actually read + // If to_read < n, LASPP_CHECK_READ will detect this via gcount() + return to_read; + } + protected: // Support seekg() / tellg() so that LASReader can seek within mapped memory. pos_type seekoff(off_type off, std::ios_base::seekdir dir, @@ -69,7 +93,7 @@ class PointerStreamBuffer : public std::streambuf { // Safe to convert to pointer now char* new_gptr = const_cast(base) + new_index; setg(const_cast(base), new_gptr, const_cast(end)); - return pos_type(static_cast(new_index)); + return pos_type(new_index); } pos_type seekpos(pos_type pos, std::ios_base::openmode which = std::ios_base::in) override { @@ -99,8 +123,9 @@ class InStream : StreamVariables { // Owned buffer used only by the std::istream& constructor. std::vector m_owned_data; - [[nodiscard]] std::byte read_byte() noexcept { - LASPP_ASSERT(m_ptr < m_end, "InStream: read past end of buffer"); + [[nodiscard]] std::byte read_byte() { + if (m_ptr >= m_end) [[unlikely]] + throw std::runtime_error("InStream: read past end of buffer"); return *m_ptr++; } @@ -112,7 +137,7 @@ class InStream : StreamVariables { // Fast path: construct directly from in-memory data. No copies, no virtual dispatch. // Requires at least 4 bytes to initialize the range coder state. - InStream(const std::byte* data, size_t size) noexcept : m_ptr(data), m_end(data + size) { + InStream(const std::byte* data, size_t size) : m_ptr(data), m_end(data + size) { LASPP_ASSERT_GE(size, 4u, "InStream: buffer too small (need at least 4 bytes)"); for (int i = 0; i < 4; i++) { m_value <<= 8; @@ -151,9 +176,13 @@ class InStream : StreamVariables { // Renormalise the range coder and return the current value. Reads 0, 1, 2 or 3 bytes // from the in-memory pointer — the common case (length >= 2^24) reads nothing, so this // is very cheap on average. - uint32_t get_value() noexcept { + // + // The bounds checks use [[unlikely]] so the compiler keeps them out of the hot path and + // the branch predictor treats them as never-taken. + uint32_t get_value() { if (m_length < (1u << 8)) { - LASPP_ASSERT(m_ptr + 3 <= m_end, "InStream: read past end of buffer (need 3 bytes)"); + if (m_ptr + 3 > m_end) [[unlikely]] + throw std::runtime_error("InStream: read past end of buffer (need 3 bytes)"); m_value <<= 24; m_length <<= 24; // Read 3 bytes big-endian. @@ -162,7 +191,8 @@ class InStream : StreamVariables { static_cast(static_cast(m_ptr[2])); m_ptr += 3; } else if (m_length < (1u << 16)) { - LASPP_ASSERT(m_ptr + 2 <= m_end, "InStream: read past end of buffer (need 2 bytes)"); + if (m_ptr + 2 > m_end) [[unlikely]] + throw std::runtime_error("InStream: read past end of buffer (need 2 bytes)"); m_value <<= 16; m_length <<= 16; // Read 2 bytes big-endian. @@ -170,7 +200,8 @@ class InStream : StreamVariables { static_cast(static_cast(m_ptr[1])); m_ptr += 2; } else if (m_length < (1u << 24)) { - LASPP_ASSERT(m_ptr < m_end, "InStream: read past end of buffer (need 1 byte)"); + if (m_ptr >= m_end) [[unlikely]] + throw std::runtime_error("InStream: read past end of buffer (need 1 byte)"); m_value <<= 8; m_length <<= 8; m_value |= static_cast(static_cast(*m_ptr++)); @@ -252,7 +283,7 @@ class OutStream : StreamVariables { } void update_range(uint32_t lower, uint32_t upper) { - LASPP_ASSERT(!m_finalized); + LASPP_DEBUG_ASSERT(!m_finalized); if ((static_cast(m_base) + static_cast(lower)) >= (static_cast(1) << 32)) { propogate_carry(); diff --git a/src/laz/symbol_encoder.hpp b/src/laz/symbol_encoder.hpp index 64c7006..ec6e82a 100644 --- a/src/laz/symbol_encoder.hpp +++ b/src/laz/symbol_encoder.hpp @@ -166,7 +166,7 @@ class SymbolEncoder { } void encode_symbol(OutStream& stream, uint_fast16_t symbol) { - LASPP_ASSERT_LT(symbol, NSymbols); + LASPP_DEBUG_ASSERT_LT(symbol, NSymbols); uint32_t l_tmp = (stream.length() >> 15); stream.update_range(distribution[symbol] * l_tmp, symbol < NSymbols - 1 ? (distribution[symbol + 1] * l_tmp) diff --git a/src/laz/tests/test_bytes14_encoder.cpp b/src/laz/tests/test_bytes14_encoder.cpp new file mode 100644 index 0000000..617ff39 --- /dev/null +++ b/src/laz/tests/test_bytes14_encoder.cpp @@ -0,0 +1,151 @@ +/* + * SPDX-FileCopyrightText: (c) 2025-2026 Trailblaze Software, all rights reserved + * SPDX-License-Identifier: MIT + */ + +#include +#include +#include + +#include "laz/bytes14_encoder.hpp" +#include "laz/layered_stream.hpp" +#include "utilities/assert.hpp" + +using namespace laspp; + +int main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) { + { + std::vector> values; + values.emplace_back(std::vector{std::byte(0), std::byte(1), std::byte(2)}); + values.emplace_back(std::vector{std::byte(7), std::byte(255), std::byte(5)}); + values.emplace_back(std::vector{std::byte(0), std::byte(254), std::byte(27)}); + values.emplace_back(std::vector{std::byte(0), std::byte(1), std::byte(2)}); + values.emplace_back(std::vector{std::byte(7), std::byte(0), std::byte(52)}); + + std::string combined_data; + + { + LayeredOutStreams out_streams; + Bytes14Encoder encoder(values[0], 0); + for (size_t i = 1; i < values.size(); i++) { + encoder.encode(out_streams, values[i], 0); + } + + std::stringstream combined_stream = out_streams.combined_stream(); + combined_data = combined_stream.str(); + } + + std::span size_span(reinterpret_cast(combined_data.data()), + Bytes14Encoder::NUM_LAYERS * sizeof(uint32_t)); + std::span data_span( + reinterpret_cast(combined_data.data()) + size_span.size(), + combined_data.size() - size_span.size()); + + LayeredInStreams in_streams(size_span, data_span); + + Bytes14Encoder decoder(values[0], 0); + LASPP_ASSERT_EQ(decoder.last_value(), values[0]); + for (size_t i = 1; i < values.size(); i++) { + std::vector decoded = decoder.decode(in_streams, 0); + LASPP_ASSERT_EQ(decoded, values[i]); + LASPP_ASSERT_EQ(decoder.last_value(), values[i]); + } + } + + { + std::mt19937_64 gen(42); + std::vector> random_values; + random_values.reserve(100); + + // Generate random byte arrays of varying sizes + for (size_t i = 0; i < 100; i++) { + size_t num_bytes = (i % 10) + 1; // Sizes from 1 to 10 bytes + std::vector bytes(num_bytes); + for (size_t j = 0; j < num_bytes; j++) { + bytes[j] = static_cast(gen() % 256); + } + random_values.push_back(bytes); + } + + // Use first value as initial value + std::vector initial_value = random_values[0]; + + std::string combined_data; + + { + LayeredOutStreams out_streams; + Bytes14Encoder encoder(initial_value, 0); + for (size_t i = 1; i < random_values.size(); i++) { + // Pad or truncate to match initial size for this test + std::vector value = random_values[i]; + if (value.size() != initial_value.size()) { + value.resize(initial_value.size()); + } + encoder.encode(out_streams, value, 0); + } + + std::stringstream combined_stream = out_streams.combined_stream(); + combined_data = combined_stream.str(); + } + + std::span size_span(reinterpret_cast(combined_data.data()), + Bytes14Encoder::NUM_LAYERS * sizeof(uint32_t)); + std::span data_span( + reinterpret_cast(combined_data.data()) + size_span.size(), + combined_data.size() - size_span.size()); + + LayeredInStreams in_streams(size_span, data_span); + + Bytes14Encoder decoder(initial_value, 0); + for (size_t i = 1; i < random_values.size(); i++) { + std::vector expected = random_values[i]; + if (expected.size() != initial_value.size()) { + expected.resize(initial_value.size()); + } + std::vector decoded = decoder.decode(in_streams, 0); + LASPP_ASSERT_EQ(decoded, expected); + } + } + + { + // Test context switching + std::vector initial_value{std::byte(0), std::byte(1), std::byte(2)}; + std::vector> values; + values.emplace_back(std::vector{std::byte(10), std::byte(11), std::byte(12)}); + values.emplace_back(std::vector{std::byte(20), std::byte(21), std::byte(22)}); + values.emplace_back(std::vector{std::byte(30), std::byte(31), std::byte(32)}); + + std::string combined_data; + + { + LayeredOutStreams out_streams; + Bytes14Encoder encoder(initial_value, 0); + encoder.encode(out_streams, values[0], 0); + encoder.encode(out_streams, values[1], 1); // Switch to context 1 + encoder.encode(out_streams, values[2], 2); // Switch to context 2 + + std::stringstream combined_stream = out_streams.combined_stream(); + combined_data = combined_stream.str(); + } + + std::span size_span(reinterpret_cast(combined_data.data()), + Bytes14Encoder::NUM_LAYERS * sizeof(uint32_t)); + std::span data_span( + reinterpret_cast(combined_data.data()) + size_span.size(), + combined_data.size() - size_span.size()); + + LayeredInStreams in_streams(size_span, data_span); + + Bytes14Encoder decoder(initial_value, 0); + std::vector decoded1 = decoder.decode(in_streams, 0); + LASPP_ASSERT_EQ(decoded1, values[0]); + + std::vector decoded2 = decoder.decode(in_streams, 1); + LASPP_ASSERT_EQ(decoded2, values[1]); + + std::vector decoded3 = decoder.decode(in_streams, 2); + LASPP_ASSERT_EQ(decoded3, values[2]); + } + + return 0; +} diff --git a/src/laz/tests/test_laszip_interop.cpp b/src/laz/tests/test_laszip_interop.cpp index a6b2bcc..e777e94 100644 --- a/src/laz/tests/test_laszip_interop.cpp +++ b/src/laz/tests/test_laszip_interop.cpp @@ -363,13 +363,7 @@ void write_points_with_laspp(const std::vector& points, const std::files LASPP_ASSERT(writer.header().is_laz_compressed(), "File should be LAZ compressed"); // Set scale factors and offsets (LASWriter handles point counts and bounds automatically) - const double scale = 0.27; - writer.header().transform().m_scale_factors.x() = scale; - writer.header().transform().m_scale_factors.y() = scale; - writer.header().transform().m_scale_factors.z() = scale; - writer.header().transform().m_offsets.x() = 146; - writer.header().transform().m_offsets.y() = -25.4; - writer.header().transform().m_offsets.z() = 512; + writer.header().transform() = Transform({0.27, 0.35, -26}, {146, -25.4, 512}); writer.write_points(std::span(points.data(), points.size()), ChunkSize); } @@ -561,6 +555,136 @@ void run_laszip_internal_roundtrip(size_t n_points) { } } +// Test: LASzip writes a .laz with a spatial-index sidecar (.lax); +// las++ reads the sidecar and verifies structural + geometric integrity. +// +// Two point clusters 700 units apart ensure the spatial index produces at least +// two distinct cells. scale = 1.0, offset = 0.0 → integer coord == real coord. +void test_spatial_index_laszip_creates_laspp_reads() { + constexpr int32_t kNPerCluster = 200; + constexpr double kClusterA = 150.0; // centre of cluster A, indices [0, 199] + constexpr double kClusterB = 850.0; // centre of cluster B, indices [200, 399] + + std::vector points; + points.reserve(static_cast(kNPerCluster * 2)); + for (int32_t i = 0; i < kNPerCluster; ++i) { + LASPointFormat0 pt{}; + pt.x = static_cast(kClusterA) - kNPerCluster / 2 + i; // [50, 249] + pt.y = pt.x; + points.push_back(pt); + } + for (int32_t i = 0; i < kNPerCluster; ++i) { + LASPointFormat0 pt{}; + pt.x = static_cast(kClusterB) - kNPerCluster / 2 + i; // [750, 949] + pt.y = pt.x; + points.push_back(pt); + } + + TempFile temp_laz("spatial_interop"); + auto lax_path = temp_laz.path(); + lax_path.replace_extension(".lax"); + struct LaxGuard { + std::filesystem::path p; + ~LaxGuard() { + std::error_code ec; + std::filesystem::remove(p, ec); + } + } lax_guard{lax_path}; + + // ── Write with LASzip, enabling spatial index creation ─────────────────── + { + laszip_POINTER writer; + LASPP_ASSERT_EQ(laszip_create(&writer), 0); + + laszip_header* hdr; + LASPP_ASSERT_EQ(laszip_get_header_pointer(writer, &hdr), 0); + std::memset(hdr, 0, sizeof(laszip_header)); + hdr->version_major = 1; + hdr->version_minor = 2; + hdr->header_size = 227; + hdr->offset_to_point_data = 227; + hdr->point_data_format = static_cast(LASPointFormat0::PointFormat); + hdr->point_data_record_length = static_cast(sizeof(LASPointFormat0)); + hdr->number_of_point_records = static_cast(points.size()); + hdr->extended_number_of_point_records = points.size(); + // scale=1.0, offset=0.0 → integer coordinate == real coordinate + hdr->x_scale_factor = 1.0; + hdr->y_scale_factor = 1.0; + hdr->z_scale_factor = 1.0; + // bounding box covers both clusters + hdr->min_x = 50.0; + hdr->max_x = 949.0; + hdr->min_y = 50.0; + hdr->max_y = 949.0; + hdr->min_z = 0.0; + hdr->max_z = 0.0; + LASPP_ASSERT_EQ(laszip_set_header(writer, hdr), 0); + + // laszip_create_spatial_index must be called after set_header and before open_writer + LASPP_ASSERT_EQ(laszip_create_spatial_index(writer, 1 /*create*/, 0 /*append*/), 0); + LASPP_ASSERT_EQ(laszip_open_writer(writer, temp_laz.path().string().c_str(), 1 /*compress*/), + 0); + + laszip_point* las_pt; + LASPP_ASSERT_EQ(laszip_get_point_pointer(writer, &las_pt), 0); + for (size_t i = 0; i < points.size(); ++i) { + populate_laszip_point(points[i], *las_pt); + // laszip_write_indexed_point feeds coordinates into the spatial index + LASPP_ASSERT_EQ(laszip_write_indexed_point(writer), 0, "indexed write failed at ", i); + } + + LASPP_ASSERT_EQ(laszip_close_writer(writer), 0); // writes the .lax sidecar + LASPP_ASSERT_EQ(laszip_destroy(writer), 0); + } + + LASPP_ASSERT(std::filesystem::exists(lax_path), "LASzip did not create the .lax sidecar"); + + // ── Read with las++ via file path (auto-discovers the .lax) ────────────── + LASReader reader(temp_laz.path()); + LASPP_ASSERT(reader.has_lastools_spatial_index(), + "las++ should load the LASzip-generated .lax sidecar"); + + const auto& index = reader.lastools_spatial_index(); + LASPP_ASSERT_GT(index.num_cells(), 0u, "spatial index must contain at least one cell"); + + // Every interval endpoint must reference a valid point position + const auto n_pts = static_cast(points.size()); + for (const auto& [cell_idx, cell] : index.cells()) { + for (const auto& iv : cell.intervals) { + LASPP_ASSERT_LT(iv.start, n_pts, "interval start out of range in cell ", cell_idx); + LASPP_ASSERT_LT(iv.end, n_pts, "interval end out of range in cell ", cell_idx); + LASPP_ASSERT_LE(iv.start, iv.end, "interval start > end in cell ", cell_idx); + } + } + + // The clusters are ~700 units apart; with LASzip's default cell size of 100 + // they must fall in different cells + const int32_t cell_a = index.find_cell_index(kClusterA, kClusterA); + const int32_t cell_b = index.find_cell_index(kClusterB, kClusterB); + LASPP_ASSERT_GE(cell_a, 0, "no cell found for cluster A centre"); + LASPP_ASSERT_GE(cell_b, 0, "no cell found for cluster B centre"); + LASPP_ASSERT_NE(cell_a, cell_b, "clusters A and B must map to different cells"); + + // Cell bounds must geometrically contain the cluster centres + const Bound2D ba = index.get_cell_bounds(cell_a); + const Bound2D bb = index.get_cell_bounds(cell_b); + LASPP_ASSERT(ba.contains(kClusterA, kClusterA), "cell for cluster A must contain its centre"); + LASPP_ASSERT(bb.contains(kClusterB, kClusterB), "cell for cluster B must contain its centre"); + + // Cluster-A write-order indices are [0, kNPerCluster); the cell that contains + // cluster A's centre must reference at least one of those indices + bool cell_a_has_cluster_a_pt = false; + for (const auto& iv : index.cells().at(cell_a).intervals) { + if (iv.start < static_cast(kNPerCluster) || + iv.end < static_cast(kNPerCluster)) { + cell_a_has_cluster_a_pt = true; + break; + } + } + LASPP_ASSERT(cell_a_has_cluster_a_pt, + "cell for cluster A must reference at least one cluster-A point index"); +} + } // namespace int main() { @@ -613,5 +737,8 @@ int main() { run_laspp_file_roundtrip(MediumPointCount); run_laspp_file_roundtrip(LargePointCount); + // Spatial index interop: LASzip writes .lax sidecar, las++ reads and validates it + test_spatial_index_laszip_creates_laspp_reads(); + return 0; } diff --git a/src/laz/tests/test_spatial_index.cpp b/src/laz/tests/test_spatial_index.cpp new file mode 100644 index 0000000..1e1f8de --- /dev/null +++ b/src/laz/tests/test_spatial_index.cpp @@ -0,0 +1,408 @@ +/* + * SPDX-FileCopyrightText: (c) 2025-2026 Trailblaze Software, all rights reserved + * SPDX-License-Identifier: MIT + */ + +#include +#include +#include + +#include "las_header.hpp" +#include "las_point.hpp" +#include "spatial_index.hpp" +#include "utilities/assert.hpp" + +using namespace laspp; + +struct TestInterval { + uint32_t start; + uint32_t end; +}; + +// Write a minimal spatial index stream. The quadtree header uses valid bounds and the given +// levels. The interval section contains the provided cells; each cell carries the given +// number_points header field and intervals verbatim (no fixup), allowing injection of bad data. +static std::stringstream make_spatial_index_stream( + uint32_t levels, float min_x, float max_x, float min_y, float max_y, + // Each entry: {cell_index, number_points_field, intervals} + const std::vector>>& cells = {}) { + std::stringstream ss; + + auto write_u32 = [&](uint32_t v) { ss.write(reinterpret_cast(&v), 4); }; + auto write_i32 = [&](int32_t v) { ss.write(reinterpret_cast(&v), 4); }; + auto write_f32 = [&](float v) { ss.write(reinterpret_cast(&v), 4); }; + + // LASX outer header + ss.write("LASX", 4); + write_u32(0); // version + + // QuadtreeHeader + ss.write("LASS", 4); + write_u32(0); // type + ss.write("LASQ", 4); + write_u32(0); // version + write_u32(levels); + write_u32(0); // level_index + write_u32(0); // implicit_levels + write_f32(min_x); + write_f32(max_x); + write_f32(min_y); + write_f32(max_y); + + // Interval section + ss.write("LASV", 4); + write_u32(0); // version + write_u32(static_cast(cells.size())); + for (const auto& [cell_index, number_points, intervals] : cells) { + write_i32(cell_index); + write_u32(static_cast(intervals.size())); + write_u32(number_points); // written verbatim — may not match actual intervals + for (const auto& iv : intervals) { + write_u32(iv.start); + write_u32(iv.end); + } + } + + return ss; +} + +int main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) { + // Test default constructor + { + QuadtreeSpatialIndex index; + const auto& header = index.quadtree_header(); + + LASPP_ASSERT(memcmp(header.spatial_signature, "LASS", 4) == 0); + LASPP_ASSERT_EQ(header.type, 0u); + LASPP_ASSERT(memcmp(header.quadtree_signature, "LASQ", 4) == 0); + LASPP_ASSERT_EQ(header.version, 0u); + LASPP_ASSERT_EQ(header.levels, 0u); + LASPP_ASSERT_EQ(header.level_index, 0u); + LASPP_ASSERT_EQ(header.implicit_levels, 0u); + LASPP_ASSERT_EQ(header.min_x, 0.0f); + LASPP_ASSERT_EQ(header.max_x, 0.0f); + LASPP_ASSERT_EQ(header.min_y, 0.0f); + LASPP_ASSERT_EQ(header.max_y, 0.0f); + LASPP_ASSERT_EQ(index.num_cells(), 0); + } + + // Test constructor from header and points + { + LASHeader header; + const_cast(header.bounds()).update({0.0, 0.0, 0.0}); + const_cast(header.bounds()).update({100.0, 100.0, 0.0}); + + // Create points at different locations to generate cells. + // With scale 0.001: x = (i%10)*10000 -> 0-90, y = (i/10)*10000 -> 0-90 (i < 100). + std::vector points(100); + for (size_t i = 0; i < points.size(); ++i) { + points[i].x = static_cast((i % 10) * 10000); // 0.001 scale -> 0-90 + points[i].y = static_cast((i / 10) * 10000); // 0.001 scale -> 0-90 + points[i].z = 0; + } + + QuadtreeSpatialIndex index(header, points); + + const auto& quadtree_header = index.quadtree_header(); + LASPP_ASSERT_EQ(quadtree_header.min_x, 0.0f); + LASPP_ASSERT_EQ(quadtree_header.min_y, 0.0f); + LASPP_ASSERT_EQ(quadtree_header.max_x, 100.0f); + LASPP_ASSERT_EQ(quadtree_header.max_y, 100.0f); + LASPP_ASSERT_GT(quadtree_header.levels, 0u); + LASPP_ASSERT_GT(index.num_cells(), 0u); + } + + // Test write and read round-trip + { + std::stringstream ss; + { + LASHeader header; + const_cast(header.bounds()).update({1.0, 2.0, 0.0}); + const_cast(header.bounds()).update({3.0, 4.0, 0.0}); + + std::vector points(150); + for (size_t i = 0; i < points.size(); ++i) { + points[i].x = static_cast(1000 + (i % 10) * 100); // 0.001 scale + points[i].y = static_cast(2000 + (i / 10) * 100); + points[i].z = 0; + } + + // Use a smaller tile_size to generate multiple levels + // With bounds [1,2] to [3,4], max_dim = 2.0 + // With tile_size = 0.1, ratio = 2.0 / 0.1 = 20, so levels = ceil(log2(20)) = 5 + QuadtreeSpatialIndex spatial_index(header, points, 0.1); + + // Write to stream + spatial_index.write(ss); + } + + // Read back + QuadtreeSpatialIndex read_index(ss); + + // Verify header + const auto& read_header = read_index.quadtree_header(); + LASPP_ASSERT(memcmp(read_header.spatial_signature, "LASS", 4) == 0); + LASPP_ASSERT_EQ(read_header.type, 0u); + LASPP_ASSERT(memcmp(read_header.quadtree_signature, "LASQ", 4) == 0); + LASPP_ASSERT_EQ(read_header.version, 0u); + LASPP_ASSERT_EQ(read_header.min_x, 1.0f); + LASPP_ASSERT_EQ(read_header.min_y, 2.0f); + LASPP_ASSERT_EQ(read_header.max_x, 3.0f); + LASPP_ASSERT_EQ(read_header.max_y, 4.0f); + // With tile_size=0.1 and max_dim=2.0, ratio=20, so levels = ceil(log2(20)) = 5 + LASPP_ASSERT_EQ(read_header.levels, 5u); + LASPP_ASSERT_GT(read_index.num_cells(), 0u); + } + + // Test get_cell_index + { + LASHeader header; + const_cast(header.bounds()).update({0.0, 0.0, 0.0}); + const_cast(header.bounds()).update({100.0, 100.0, 0.0}); + + std::vector points; // Empty points, just testing get_cell_index + QuadtreeSpatialIndex index(header, points, 25.0); // tile_size=25 gives 2 levels + + // Test cell index calculation for different positions + // With 2 levels, level_offset[2] = 5, so cells are at indices 5-20 + // Cell 0 (path): [0, 25) x [0, 25) -> index 5 (5 + 0) + int32_t cell0 = index.get_cell_index(10.0, 10.0); + LASPP_ASSERT_EQ(cell0, 5); + + // Cell 1 (path): [25, 50) x [0, 25) -> index 6 (5 + 1) + int32_t cell1 = index.get_cell_index(30.0, 10.0); + LASPP_ASSERT_EQ(cell1, 6); + + // Cell 2 (path): [0, 25) x [25, 50) -> index 7 (5 + 2) + int32_t cell2 = index.get_cell_index(10.0, 30.0); + LASPP_ASSERT_EQ(cell2, 7); + + // Cell 3 (path): [25, 50) x [25, 50) -> index 8 (5 + 3) + int32_t cell3 = index.get_cell_index(30.0, 30.0); + LASPP_ASSERT_EQ(cell3, 8); + } + + // Test get_cell_bounds + { + LASHeader header; + const_cast(header.bounds()).update({0.0, 0.0, 0.0}); + const_cast(header.bounds()).update({100.0, 100.0, 0.0}); + + std::vector points; + // Use tile_size=25 to get 2 levels: ceil(log2(100/25)) = ceil(log2(4)) = 2 + QuadtreeSpatialIndex index(header, points, 25.0); + + // Test bounds for root cell (index 0) + Bound2D root_bounds = index.get_cell_bounds(0); + LASPP_ASSERT_EQ(root_bounds.min_x(), 0.0); + LASPP_ASSERT_EQ(root_bounds.min_y(), 0.0); + LASPP_ASSERT_EQ(root_bounds.max_x(), 100.0); + LASPP_ASSERT_EQ(root_bounds.max_y(), 100.0); + + // Test bounds for level 1 cells + // Cell index 1 (cell_path=0) -> bottom-left quadrant [0, 50) x [0, 50) + int32_t cell1 = index.get_cell_index_at_level(25.0, 25.0, 1); + Bound2D bounds1 = index.get_cell_bounds(cell1); + LASPP_ASSERT_EQ(bounds1.min_x(), 0.0); + LASPP_ASSERT_EQ(bounds1.min_y(), 0.0); + LASPP_ASSERT_EQ(bounds1.max_x(), 50.0); + LASPP_ASSERT_EQ(bounds1.max_y(), 50.0); + + // Cell index 2 (cell_path=1) -> right-bottom quadrant [50, 100) x [0, 50) + int32_t cell2 = index.get_cell_index_at_level(75.0, 25.0, 1); + Bound2D bounds2 = index.get_cell_bounds(cell2); + LASPP_ASSERT_EQ(bounds2.min_x(), 50.0); + LASPP_ASSERT_EQ(bounds2.min_y(), 0.0); + LASPP_ASSERT_EQ(bounds2.max_x(), 100.0); + LASPP_ASSERT_EQ(bounds2.max_y(), 50.0); + } + + // Test find_cell_index with adaptive quadtree + { + LASHeader header; + const_cast(header.bounds()).update({0.0, 0.0, 0.0}); + const_cast(header.bounds()).update({100.0, 100.0, 0.0}); + + // Create points that will generate specific cells + std::vector points(200); + // Points in bottom-left quadrant + for (size_t i = 0; i < 100; ++i) { + points[i].x = static_cast(i * 100); // 0-99 * 0.001 = 0-0.099 + points[i].y = static_cast(i * 100); + points[i].z = 0; + } + // Points in top-right quadrant + for (size_t i = 100; i < 200; ++i) { + points[i].x = static_cast(50000 + (i - 100) * 100); // 50-59.9 + points[i].y = static_cast(50000 + (i - 100) * 100); + points[i].z = 0; + } + + QuadtreeSpatialIndex spatial_index(header, points); + + // find_cell_index should find the most specific cell containing the point + int32_t cell1 = spatial_index.find_cell_index(25.0, 25.0); + LASPP_ASSERT_GE(cell1, 0); + Bound2D bounds1 = spatial_index.get_cell_bounds(cell1); + LASPP_ASSERT(bounds1.contains(25.0, 25.0)); + + int32_t cell2 = spatial_index.find_cell_index(75.0, 75.0); + LASPP_ASSERT_GE(cell2, 0); + Bound2D bounds2 = spatial_index.get_cell_bounds(cell2); + LASPP_ASSERT(bounds2.contains(75.0, 75.0)); + } + + // Test get_cell_level_from_index + { + LASHeader header; + const_cast(header.bounds()).update({0.0, 0.0, 0.0}); + const_cast(header.bounds()).update({100.0, 100.0, 0.0}); + + std::vector points; + QuadtreeSpatialIndex index(header, points, 25.0); // 2 levels + + // Root level (index 0) -> level 0 + uint32_t level0 = index.get_cell_level_from_index(0); + LASPP_ASSERT_EQ(level0, 0u); + + // Level 1 cells (indices 1-4) -> level 1 (first subdivision) + int32_t cell1 = index.get_cell_index_at_level(25.0, 25.0, 1); + uint32_t level1 = index.get_cell_level_from_index(cell1); + LASPP_ASSERT_EQ(level1, 1u); + + // Level 2 cells (indices 5-20) -> level 2 (second subdivision) + int32_t cell2 = index.get_cell_index_at_level(12.5, 12.5, 2); + uint32_t level2 = index.get_cell_level_from_index(cell2); + LASPP_ASSERT_EQ(level2, 2u); + } + + // Test get_cell_index_at_level + { + LASHeader header; + const_cast(header.bounds()).update({0.0, 0.0, 0.0}); + const_cast(header.bounds()).update({100.0, 100.0, 0.0}); + + std::vector points; + QuadtreeSpatialIndex index(header, points, 25.0); // 2 levels + + // Test getting cell index at specific level + int32_t cell_level1 = index.get_cell_index_at_level(30.0, 30.0, 1); + int32_t cell_level2 = index.get_cell_index_at_level(30.0, 30.0, 2); + + LASPP_ASSERT_GE(cell_level1, 1); + LASPP_ASSERT_GE(cell_level2, 5); + + // Level 2 should be more specific (higher index due to level offset) + LASPP_ASSERT_GT(cell_level2, cell_level1); + } + + // Test that levels > 16 is rejected when loading from stream + { + bool caught = false; + try { + auto ss = make_spatial_index_stream(17u, 0.0f, 100.0f, 0.0f, 100.0f); + QuadtreeSpatialIndex bad_index(ss); + } catch (const std::runtime_error& e) { + caught = true; + std::string msg = e.what(); + LASPP_ASSERT(msg.find("levels") != std::string::npos, + "Expected 'levels' in error message, got: ", msg); + } + LASPP_ASSERT(caught, "Expected std::runtime_error for levels > 16"); + } + + // Test that inverted X bounds are rejected when loading from stream + { + bool caught = false; + try { + auto ss = make_spatial_index_stream(4u, 100.0f, 0.0f, 0.0f, 100.0f); // min_x > max_x + QuadtreeSpatialIndex bad_index(ss); + } catch (const std::runtime_error& e) { + caught = true; + std::string msg = e.what(); + LASPP_ASSERT(msg.find("Inverted bounds") != std::string::npos, + "Expected 'Inverted bounds' in error message, got: ", msg); + } + LASPP_ASSERT(caught, "Expected std::runtime_error for inverted X bounds"); + } + + // Test that inverted Y bounds are rejected when loading from stream + { + bool caught = false; + try { + auto ss = make_spatial_index_stream(4u, 0.0f, 100.0f, 100.0f, 0.0f); // min_y > max_y + QuadtreeSpatialIndex bad_index(ss); + } catch (const std::runtime_error& e) { + caught = true; + std::string msg = e.what(); + LASPP_ASSERT(msg.find("Inverted bounds") != std::string::npos, + "Expected 'Inverted bounds' in error message, got: ", msg); + } + LASPP_ASSERT(caught, "Expected std::runtime_error for inverted Y bounds"); + } + + // Test that a well-formed cell with valid intervals loads correctly + { + // Cell 5 (level-2 leaf), intervals [0,4] and [10,14] -> 10 points total + auto ss = make_spatial_index_stream(2u, 0.0f, 100.0f, 0.0f, 100.0f, + {{5, 10u, {{0u, 4u}, {10u, 14u}}}}); + QuadtreeSpatialIndex ok_index(ss); + LASPP_ASSERT_EQ(ok_index.num_cells(), 1u); + const auto& cell = ok_index.cells().at(5); + LASPP_ASSERT_EQ(cell.number_points, 10u); + LASPP_ASSERT_EQ(cell.intervals.size(), 2u); + } + + // Test that an inverted interval (start > end) is rejected + { + bool caught = false; + try { + // Interval [20, 10] is inverted; number_points field is set consistently to avoid + // masking the start>end error with a point-count mismatch error first. + auto ss = make_spatial_index_stream(2u, 0.0f, 100.0f, 0.0f, 100.0f, {{5, 0u, {{20u, 10u}}}}); + QuadtreeSpatialIndex bad_index(ss); + } catch (const std::runtime_error& e) { + caught = true; + std::string msg = e.what(); + LASPP_ASSERT(msg.find("start") != std::string::npos && msg.find("end") != std::string::npos, + "Expected 'start'/'end' in error message, got: ", msg); + } + LASPP_ASSERT(caught, "Expected std::runtime_error for inverted interval start > end"); + } + + // Test that a number_points mismatch is rejected + { + bool caught = false; + try { + // Intervals [0,4] give 5 points; number_points header says 99 -> mismatch + auto ss = make_spatial_index_stream(2u, 0.0f, 100.0f, 0.0f, 100.0f, {{5, 99u, {{0u, 4u}}}}); + QuadtreeSpatialIndex bad_index(ss); + } catch (const std::runtime_error& e) { + caught = true; + std::string msg = e.what(); + LASPP_ASSERT(msg.find("mismatch") != std::string::npos || + msg.find("number_points") != std::string::npos, + "Expected 'mismatch'/'number_points' in error message, got: ", msg); + } + LASPP_ASSERT(caught, "Expected std::runtime_error for number_points mismatch"); + } + + // Test that duplicate cell_index entries in the stream are rejected + { + bool caught = false; + try { + // Two cells both claiming cell_index 5; each has a valid, consistent interval. + auto ss = make_spatial_index_stream(2u, 0.0f, 100.0f, 0.0f, 100.0f, + {{5, 5u, {{0u, 4u}}}, {5, 3u, {{10u, 12u}}}}); + QuadtreeSpatialIndex bad_index(ss); + } catch (const std::runtime_error& e) { + caught = true; + std::string msg = e.what(); + LASPP_ASSERT(msg.find("Duplicate") != std::string::npos, + "Expected 'Duplicate' in error message, got: ", msg); + } + LASPP_ASSERT(caught, "Expected std::runtime_error for duplicate cell_index"); + } + + return 0; +} diff --git a/src/spatial_index.hpp b/src/spatial_index.hpp new file mode 100644 index 0000000..9e080eb --- /dev/null +++ b/src/spatial_index.hpp @@ -0,0 +1,624 @@ +/* + * SPDX-FileCopyrightText: (c) 2025-2026 Trailblaze Software, all rights reserved + * SPDX-License-Identifier: MIT + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "las_header.hpp" +#include "las_point.hpp" +#include "utilities/assert.hpp" +#include "utilities/macros.hpp" +#include "utilities/printing.hpp" + +namespace laspp { + +/** + * LAStools quadtree-based spatial index format. + * + * This implements the LAStools spatial index format which uses: + * - A quadtree structure for spatial organization + * - Interval lists per cell to identify point ranges + * + * Format: + * - "LASX" signature (4 bytes) + * - version (U32, 4 bytes, little endian, currently 0) + * - Quadtree: + * - "LASS" signature (4 bytes) + * - type (U32, 4 bytes, little endian, value = 0 for quadtree) + * - "LASQ" signature (4 bytes) + * - version (U32, 4 bytes, little endian, currently 0) + * - levels (U32, 4 bytes, little endian) + * - level_index (U32, 4 bytes, little endian, default 0) + * - implicit_levels (U32, 4 bytes, little endian, default 0) + * - min_x (F32, 4 bytes, little endian) + * - max_x (F32, 4 bytes, little endian) + * - min_y (F32, 4 bytes, little endian) + * - max_y (F32, 4 bytes, little endian) + * - Interval: + * - "LASV" signature (4 bytes) + * - version (U32, 4 bytes, little endian, currently 0) + * - number_cells (U32, 4 bytes, little endian) + * - For each cell: + * - cell_index (I32, 4 bytes, little endian) + * - number_intervals (U32, 4 bytes, little endian) + * - number_points (U32, 4 bytes, little endian) + * - For each interval: + * - start (U32, 4 bytes, little endian) + * - end (U32, 4 bytes, little endian) + */ + +#pragma pack(push, 1) + +struct LASPP_PACKED QuadtreeHeader { + char spatial_signature[4]; // "LASS" + uint32_t type; // 0 = LAS_SPATIAL_QUAD_TREE + char quadtree_signature[4]; // "LASQ" + uint32_t version; // 0 + uint32_t levels; + uint32_t level_index; // default 0 + uint32_t implicit_levels; // default 0 + float min_x; + float max_x; + float min_y; + float max_y; + + friend std::ostream& operator<<(std::ostream& os, const QuadtreeHeader& header) { + os << "Spatial signature: " << std::string(header.spatial_signature, 4) << std::endl; + os << "Type: " << header.type << std::endl; + os << "Quadtree signature: " << std::string(header.quadtree_signature, 4) << std::endl; + os << "Version: " << header.version << std::endl; + os << "Levels: " << header.levels << std::endl; + os << "Level index: " << header.level_index << std::endl; + os << "Implicit levels: " << header.implicit_levels << std::endl; + os << "Min X: " << header.min_x << std::endl; + os << "Max X: " << header.max_x << std::endl; + os << "Min Y: " << header.min_y << std::endl; + os << "Max Y: " << header.max_y << std::endl; + return os; + } +}; + +struct LASPP_PACKED IntervalCellData { + int32_t cell_index; + uint32_t number_intervals; + uint32_t number_points; + // Followed by number_intervals pairs of (start, end) U32 values +}; + +#pragma pack(pop) + +struct PointInterval { + uint32_t start; + uint32_t end; + + friend std::ostream& operator<<(std::ostream& os, const PointInterval& interval) { + os << "[" << interval.start << ", " << interval.end << "]"; + return os; + } +}; + +struct CellIntervals { + int32_t cell_index; + uint32_t number_points; + std::vector intervals; + + friend std::ostream& operator<<(std::ostream& os, const CellIntervals& cell) { + os << "Cell index: " << cell.cell_index << std::endl; + os << "Number of points: " << cell.number_points << std::endl; + os << "Number of intervals: " << cell.intervals.size(); + if (cell.intervals.size() == 1) { + os << " (" << cell.intervals[0] << ")"; + } + os << std::endl; + return os; + } +}; + +class QuadtreeSpatialIndex { + QuadtreeHeader m_quadtree_header; + std::map m_cells; + + // Calculate level offset for a given level + // level_offset[0] = 0 + // level_offset[l+1] = level_offset[l] + ((1<16 levels would + // cause calculate_level_offset and get_cell_index* to produce shifts/overflows. + if (m_quadtree_header.levels > 16u) { + throw std::runtime_error( + "Invalid quadtree levels in spatial index: " + std::to_string(m_quadtree_header.levels) + + " (maximum supported is 16)"); + } + + // Validate bounds: inverted bounds make all cell-index calculations nonsensical. + if (m_quadtree_header.min_x > m_quadtree_header.max_x || + m_quadtree_header.min_y > m_quadtree_header.max_y) { + throw std::runtime_error("Inverted bounds in spatial index quadtree header: x=[" + + std::to_string(m_quadtree_header.min_x) + ", " + + std::to_string(m_quadtree_header.max_x) + "] y=[" + + std::to_string(m_quadtree_header.min_y) + ", " + + std::to_string(m_quadtree_header.max_y) + "]"); + } + + // Read interval header + char interval_signature[4]; + LASPP_CHECK_READ(is, interval_signature, 4); + LASPP_ASSERT(memcmp(interval_signature, "LASV", 4) == 0, "Invalid interval signature"); + + uint32_t interval_version; + LASPP_CHECK_READ(is, &interval_version, 4); + LASPP_ASSERT_EQ(interval_version, 0u); + + uint32_t number_cells; + LASPP_CHECK_READ(is, &number_cells, 4); + + // Sanity check: prevent reading unreasonably large values from corrupted files + // A reasonable upper limit: 1 million cells (each cell has at least 12 bytes of header) + constexpr uint32_t MAX_REASONABLE_CELLS = 1000000; + if (number_cells > MAX_REASONABLE_CELLS) { + throw std::runtime_error("Invalid number_cells in spatial index: " + + std::to_string(number_cells)); + } + + // Read cells + for (uint32_t i = 0; i < number_cells; ++i) { + // Read packed struct field by field to avoid alignment issues + int32_t cell_index; + uint32_t number_intervals; + uint32_t number_points; + LASPP_CHECK_READ(is, &cell_index, sizeof(int32_t)); + LASPP_CHECK_READ(is, &number_intervals, sizeof(uint32_t)); + LASPP_CHECK_READ(is, &number_points, sizeof(uint32_t)); + + // Sanity check: prevent reading unreasonably large values from corrupted files + // A reasonable upper limit: 1 million intervals per cell + constexpr uint32_t MAX_REASONABLE_INTERVALS = 1000000; + if (number_intervals > MAX_REASONABLE_INTERVALS) { + throw std::runtime_error("Invalid number_intervals in spatial index cell: " + + std::to_string(number_intervals)); + } + + CellIntervals cell; + cell.cell_index = cell_index; + cell.number_points = number_points; + cell.intervals.reserve(number_intervals); + + // Read intervals + uint64_t computed_points = 0; + for (uint32_t j = 0; j < number_intervals; ++j) { + PointInterval interval; + LASPP_CHECK_READ(is, &interval.start, 4); + LASPP_CHECK_READ(is, &interval.end, 4); + + if (interval.start > interval.end) { + throw std::runtime_error( + "Invalid interval in spatial index cell " + std::to_string(cell_index) + ": start (" + + std::to_string(interval.start) + ") > end (" + std::to_string(interval.end) + ")"); + } + + computed_points += static_cast(interval.end - interval.start) + 1u; + cell.intervals.push_back(interval); + } + + if (computed_points != static_cast(number_points)) { + throw std::runtime_error("Point count mismatch in spatial index cell " + + std::to_string(cell_index) + + ": number_points=" + std::to_string(number_points) + + " but sum of interval lengths=" + std::to_string(computed_points)); + } + + auto [it, inserted] = m_cells.try_emplace(cell_index, std::move(cell)); + if (!inserted) { + throw std::runtime_error("Duplicate cell_index " + std::to_string(cell_index) + + " in spatial index interval section"); + } + } + } + + // Build spatial index from points + template + explicit QuadtreeSpatialIndex(const LASHeader& header, const std::vector& points = {}, + double tile_size = 50.0) { + // Initialize header signatures + memcpy(m_quadtree_header.spatial_signature, "LASS", 4); + m_quadtree_header.type = 0; // LAS_SPATIAL_QUAD_TREE + memcpy(m_quadtree_header.quadtree_signature, "LASQ", 4); + m_quadtree_header.version = 0; + m_quadtree_header.level_index = 0; + m_quadtree_header.implicit_levels = 0; + + // Get bounds from header + double min_x = header.bounds().min_x(); + double min_y = header.bounds().min_y(); + double max_x = header.bounds().max_x(); + double max_y = header.bounds().max_y(); + + // Calculate appropriate quadtree levels based on tile_size + double dx = max_x - min_x; + double dy = max_y - min_y; + double max_dim = std::max(dx, dy); + + uint32_t levels = 0; + if (max_dim > 0 && tile_size > 0) { + double ratio = max_dim / tile_size; + if (ratio >= 1.0) { + levels = static_cast(std::ceil(std::log2(ratio))); + // Limit to 16 levels max: uint32_t can only represent 16 levels (32 bits / 2 bits per + // level) + levels = std::max(1u, std::min(levels, 16u)); + } else { + // If max_dim < tile_size, we only need 1 level (root) + levels = 1; + } + } else { + levels = 4; // Default + } + + m_quadtree_header.levels = levels; + m_quadtree_header.min_x = static_cast(min_x); + m_quadtree_header.min_y = static_cast(min_y); + m_quadtree_header.max_x = static_cast(max_x); + m_quadtree_header.max_y = static_cast(max_y); + + // Get scale and offset for coordinate conversion + double scale_x = header.transform().scale_factors().x(); + double offset_x = header.transform().offsets().x(); + double scale_y = header.transform().scale_factors().y(); + double offset_y = header.transform().offsets().y(); + + // Group points by quadtree cell + std::map> cell_to_points; + for (size_t i = 0; i < points.size(); ++i) { + // Read x, y using memcpy to avoid alignment issues with packed structures + int32_t x_int, y_int; + std::memcpy(&x_int, &points[i].x, sizeof(x_int)); + std::memcpy(&y_int, &points[i].y, sizeof(y_int)); + double x = int32_to_double(x_int, scale_x, offset_x); + double y = int32_to_double(y_int, scale_y, offset_y); + + int32_t cell_index = get_cell_index(x, y); + cell_to_points[cell_index].push_back(static_cast(i)); + } + + // Build intervals for each cell + // Intervals are consecutive point ranges within each cell + for (const auto& [cell_index, point_indices] : cell_to_points) { + std::vector intervals; + uint32_t interval_start = static_cast(point_indices[0]); + uint32_t interval_end = interval_start; + + for (size_t i = 1; i < point_indices.size(); ++i) { + if (point_indices[i] == interval_end + 1) { + // Consecutive point, extend interval + interval_end = static_cast(point_indices[i]); + } else { + // Gap found, save current interval and start new one + intervals.push_back({interval_start, interval_end}); + interval_start = static_cast(point_indices[i]); + interval_end = interval_start; + } + } + // Add final interval + intervals.push_back({interval_start, interval_end}); + + add_cell(cell_index, std::move(intervals)); + } + } + + private: + void set_bounds(float min_x, float min_y, float max_x, float max_y) { + m_quadtree_header.min_x = min_x; + m_quadtree_header.min_y = min_y; + m_quadtree_header.max_x = max_x; + m_quadtree_header.max_y = max_y; + } + + void set_levels(uint32_t levels) { m_quadtree_header.levels = levels; } + + void add_cell(int32_t cell_index, std::vector&& intervals) { + CellIntervals cell; + cell.cell_index = cell_index; + cell.number_points = 0; + for (const auto& interval : intervals) { + cell.number_points += (interval.end - interval.start + 1); + } + cell.intervals = std::move(intervals); + LASPP_ASSERT(m_cells.find(cell_index) == m_cells.end(), + "Cell index already exists in spatial index"); + m_cells.emplace(cell_index, std::move(cell)); + } + + public: + void write(std::ostream& os) const { + // Write "LASX" signature + os.write("LASX", 4); + + // Write version + uint32_t version = 0; + os.write(reinterpret_cast(&version), 4); + + // Write quadtree header + os.write(reinterpret_cast(&m_quadtree_header), sizeof(QuadtreeHeader)); + + // Write interval header + os.write("LASV", 4); + uint32_t interval_version = 0; + os.write(reinterpret_cast(&interval_version), 4); + + uint32_t number_cells = static_cast(m_cells.size()); + os.write(reinterpret_cast(&number_cells), 4); + + // Write cells + for (const auto& [cell_index, cell] : m_cells) { + IntervalCellData cell_data; + cell_data.cell_index = cell_index; + cell_data.number_intervals = static_cast(cell.intervals.size()); + cell_data.number_points = cell.number_points; + os.write(reinterpret_cast(&cell_data), sizeof(IntervalCellData)); + + // Write intervals + for (const auto& interval : cell.intervals) { + os.write(reinterpret_cast(&interval.start), 4); + os.write(reinterpret_cast(&interval.end), 4); + } + } + } + + const QuadtreeHeader& quadtree_header() const { return m_quadtree_header; } + const std::map& cells() const { return m_cells; } + + size_t num_cells() const { return m_cells.size(); } + + // Get cell index for a point at a specific level (public for testing) + int32_t get_cell_index_at_level(double x, double y, uint32_t target_level) const { + if (target_level == 0 || m_quadtree_header.levels == 0) return 0; + if (target_level > m_quadtree_header.levels) { + target_level = m_quadtree_header.levels; + } + + double min_x = static_cast(m_quadtree_header.min_x); + double min_y = static_cast(m_quadtree_header.min_y); + double max_x = static_cast(m_quadtree_header.max_x); + double max_y = static_cast(m_quadtree_header.max_y); + + double dx = max_x - min_x; + double dy = max_y - min_y; + if (dx <= 0 || dy <= 0) return 0; + + // Build the quadtree path (cell index within the target level) + uint32_t cell_path = 0; + double cell_size_x = dx; + double cell_size_y = dy; + + // Build index from root (MSB) to target level (LSB) + for (uint32_t level = 0; level < target_level; ++level) { + cell_size_x /= 2.0; + cell_size_y /= 2.0; + + // Determine quadrant at this level + uint32_t quadrant = 0; + if (x >= min_x + cell_size_x) { + quadrant |= 1; // Right half (x bit) + min_x += cell_size_x; + } + if (y >= min_y + cell_size_y) { + quadrant |= 2; // Top half (y bit) + min_y += cell_size_y; + } + + // Place bits at the correct position: root at MSB, leaves at LSB + uint32_t shift = 2 * (target_level - 1 - level); + cell_path |= (quadrant << shift); + } + + // Add level offset for the target level + uint32_t level_offset = calculate_level_offset(target_level); + return static_cast(level_offset + cell_path); + } + + // Determine which level a cell index belongs to + // Returns the 0-based level: 0 = root (cell_index 0), 1 = first subdivision, 2 = second, etc. + // Level offsets: offset[0]=0, offset[1]=1, offset[2]=5, offset[3]=21, etc. + uint32_t get_cell_level_from_index(int32_t cell_index) const { + if (m_quadtree_header.levels == 0 || cell_index == 0) return 0; + + for (uint32_t level = m_quadtree_header.levels; level > 0; --level) { + if (static_cast(cell_index) >= calculate_level_offset(level)) { + return level; + } + } + return 0; // Root level + } + + // Find the cell index for a point, searching up the quadtree if the exact cell doesn't exist + // Returns the cell index of the nearest ancestor that exists in the spatial index + // Returns -1 if no cell is found (shouldn't happen for valid spatial indexes) + // This handles adaptive quadtrees where cells may be merged at higher levels + int32_t find_cell_index(double x, double y) const { + // Start from the deepest level and work up + for (uint32_t level = m_quadtree_header.levels; level > 0; --level) { + int32_t cell_index = get_cell_index_at_level(x, y, level); + if (m_cells.find(cell_index) != m_cells.end()) { + return cell_index; + } + } + // Check root level + if (m_cells.find(0) != m_cells.end()) { + return 0; + } + return -1; // No cell found (shouldn't happen) + } + + // Get cell index for a given x, y coordinate + // Returns the LAStools cell index which includes level offset + // Quadtree encoding: root level at MSB, leaf level at LSB + // Each level adds 2 bits: x bit (bit 1 of level), y bit (bit 0 of level) + int32_t get_cell_index(double x, double y) const { + if (m_quadtree_header.levels == 0) return 0; + + double min_x = static_cast(m_quadtree_header.min_x); + double min_y = static_cast(m_quadtree_header.min_y); + double max_x = static_cast(m_quadtree_header.max_x); + double max_y = static_cast(m_quadtree_header.max_y); + + double dx = max_x - min_x; + double dy = max_y - min_y; + if (dx <= 0 || dy <= 0) return 0; + + // Build the quadtree path (cell index within the deepest level) + uint32_t cell_path = 0; + double cell_size_x = dx; + double cell_size_y = dy; + + // Build index from root (MSB) to leaf (LSB) + for (uint32_t level = 0; level < m_quadtree_header.levels; ++level) { + cell_size_x /= 2.0; + cell_size_y /= 2.0; + + // Determine quadrant at this level + uint32_t quadrant = 0; + if (x >= min_x + cell_size_x) { + quadrant |= 1; // Right half (x bit) + min_x += cell_size_x; + } + if (y >= min_y + cell_size_y) { + quadrant |= 2; // Top half (y bit) + min_y += cell_size_y; + } + + // Place bits at the correct position: root at MSB, leaves at LSB + // For level 0 (root): shift by 2*(levels-1) to put at MSB + // For level n-1 (leaf): shift by 0 to put at LSB + uint32_t shift = 2 * (m_quadtree_header.levels - 1 - level); + cell_path |= (quadrant << shift); + } + + // Add level offset for the deepest level (leaf level) + // All cells are at the deepest level in our implementation + uint32_t level_offset = calculate_level_offset(m_quadtree_header.levels); + return static_cast(level_offset + cell_path); + } + + // Get cell bounds from cell index (reverse of get_cell_index) + // The cell_index includes level offset, so we need to determine which level it's at + // This handles adaptive quadtrees where cells can be at any level + Bound2D get_cell_bounds(int32_t cell_index) const { + if (m_quadtree_header.levels == 0 || cell_index == 0) { + // Root cell (index 0) or no levels - return full bounds + return Bound2D(static_cast(m_quadtree_header.min_x), + static_cast(m_quadtree_header.min_y), + static_cast(m_quadtree_header.max_x), + static_cast(m_quadtree_header.max_y)); + } + + // get_cell_level_from_index returns 0=root, 1=first subdivision, 2=second, etc. + uint32_t cell_level = get_cell_level_from_index(cell_index); + uint32_t level_offset = calculate_level_offset(cell_level); + uint32_t cell_path = static_cast(cell_index) - level_offset; + + double min_x = static_cast(m_quadtree_header.min_x); + double min_y = static_cast(m_quadtree_header.min_y); + double max_x = static_cast(m_quadtree_header.max_x); + double max_y = static_cast(m_quadtree_header.max_y); + + double dx = max_x - min_x; + double dy = max_y - min_y; + if (dx <= 0 || dy <= 0) { + return Bound2D(min_x, min_y, max_x, max_y); + } + + double cell_size_x = dx; + double cell_size_y = dy; + double current_min_x = min_x; + double current_min_y = min_y; + + // Extract bits from cell_path to determine cell position. + // The path is built with root at MSB, so we extract from MSB to LSB. + uint32_t num_levels_to_process = cell_level; + for (uint32_t level = 0; level < num_levels_to_process; ++level) { + uint32_t shift = 2 * (num_levels_to_process - 1 - level); + uint32_t bits = (cell_path >> shift) & 3; + + cell_size_x /= 2.0; + cell_size_y /= 2.0; + + if (bits & 1) { // X bit + current_min_x += cell_size_x; + } + if (bits & 2) { // Y bit + current_min_y += cell_size_y; + } + } + + return Bound2D(current_min_x, current_min_y, current_min_x + cell_size_x, + current_min_y + cell_size_y); + } + + friend std::ostream& operator<<(std::ostream& os, const QuadtreeSpatialIndex& index) { + os << "Quadtree Header:" << std::endl; + os << index.m_quadtree_header << std::endl; + os << "Cells:" << index.m_cells << std::endl; + return os; + } +}; + +} // namespace laspp diff --git a/src/tests/test_bound2d.cpp b/src/tests/test_bound2d.cpp new file mode 100644 index 0000000..62e7866 --- /dev/null +++ b/src/tests/test_bound2d.cpp @@ -0,0 +1,128 @@ +/* + * SPDX-FileCopyrightText: (c) 2025-2026 Trailblaze Software, all rights reserved + * SPDX-License-Identifier: MIT + */ + +#include +#include + +#include "las_header.hpp" +#include "utilities/assert.hpp" + +using namespace laspp; + +int main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) { + // Test default constructor + { + Bound2D bound; + LASPP_ASSERT_EQ(bound.min_x(), std::numeric_limits::max()); + LASPP_ASSERT_EQ(bound.min_y(), std::numeric_limits::max()); + LASPP_ASSERT_EQ(bound.max_x(), std::numeric_limits::lowest()); + LASPP_ASSERT_EQ(bound.max_y(), std::numeric_limits::lowest()); + } + + // Test parameterized constructor + { + Bound2D bound(10.0, 20.0, 30.0, 40.0); + LASPP_ASSERT_EQ(bound.min_x(), 10.0); + LASPP_ASSERT_EQ(bound.min_y(), 20.0); + LASPP_ASSERT_EQ(bound.max_x(), 30.0); + LASPP_ASSERT_EQ(bound.max_y(), 40.0); + } + + // Test update method + { + Bound2D bound; + bound.update(5.0, 10.0); + LASPP_ASSERT_EQ(bound.min_x(), 5.0); + LASPP_ASSERT_EQ(bound.min_y(), 10.0); + LASPP_ASSERT_EQ(bound.max_x(), 5.0); + LASPP_ASSERT_EQ(bound.max_y(), 10.0); + + bound.update(15.0, 20.0); + LASPP_ASSERT_EQ(bound.min_x(), 5.0); + LASPP_ASSERT_EQ(bound.min_y(), 10.0); + LASPP_ASSERT_EQ(bound.max_x(), 15.0); + LASPP_ASSERT_EQ(bound.max_y(), 20.0); + + bound.update(3.0, 25.0); + LASPP_ASSERT_EQ(bound.min_x(), 3.0); + LASPP_ASSERT_EQ(bound.min_y(), 10.0); + LASPP_ASSERT_EQ(bound.max_x(), 15.0); + LASPP_ASSERT_EQ(bound.max_y(), 25.0); + } + + // Test update with multiple points + { + Bound2D bound; + bound.update(0.0, 0.0); + bound.update(10.0, 5.0); + bound.update(5.0, 10.0); + bound.update(-5.0, -10.0); + bound.update(15.0, 20.0); + + LASPP_ASSERT_EQ(bound.min_x(), -5.0); + LASPP_ASSERT_EQ(bound.min_y(), -10.0); + LASPP_ASSERT_EQ(bound.max_x(), 15.0); + LASPP_ASSERT_EQ(bound.max_y(), 20.0); + } + + // Test stream output + { + Bound2D bound(1.5, 2.5, 3.5, 4.5); + std::stringstream ss; + ss << bound; + std::string output = ss.str(); + LASPP_ASSERT(output.find("1.5") != std::string::npos); + LASPP_ASSERT(output.find("2.5") != std::string::npos); + LASPP_ASSERT(output.find("3.5") != std::string::npos); + LASPP_ASSERT(output.find("4.5") != std::string::npos); + } + + // Test with negative coordinates + { + Bound2D bound(-10.0, -20.0, -5.0, -15.0); + LASPP_ASSERT_EQ(bound.min_x(), -10.0); + LASPP_ASSERT_EQ(bound.min_y(), -20.0); + LASPP_ASSERT_EQ(bound.max_x(), -5.0); + LASPP_ASSERT_EQ(bound.max_y(), -15.0); + + bound.update(-15.0, -25.0); + LASPP_ASSERT_EQ(bound.min_x(), -15.0); + LASPP_ASSERT_EQ(bound.min_y(), -25.0); + LASPP_ASSERT_EQ(bound.max_x(), -5.0); + LASPP_ASSERT_EQ(bound.max_y(), -15.0); + } + + // Test with very large coordinates + { + double large = 1e10; + Bound2D bound(-large, -large, large, large); + LASPP_ASSERT_EQ(bound.min_x(), -large); + LASPP_ASSERT_EQ(bound.min_y(), -large); + LASPP_ASSERT_EQ(bound.max_x(), large); + LASPP_ASSERT_EQ(bound.max_y(), large); + } + + // Test with zero-size bounds (point) + { + Bound2D bound; + bound.update(5.0, 10.0); + bound.update(5.0, 10.0); // Same point + LASPP_ASSERT_EQ(bound.min_x(), 5.0); + LASPP_ASSERT_EQ(bound.min_y(), 10.0); + LASPP_ASSERT_EQ(bound.max_x(), 5.0); + LASPP_ASSERT_EQ(bound.max_y(), 10.0); + } + + // Test with zero coordinates + { + Bound2D bound(0.0, 0.0, 0.0, 0.0); + LASPP_ASSERT_EQ(bound.min_x(), 0.0); + LASPP_ASSERT_EQ(bound.min_y(), 0.0); + LASPP_ASSERT_EQ(bound.max_x(), 0.0); + LASPP_ASSERT_EQ(bound.max_y(), 0.0); + } + + return 0; +} diff --git a/src/tests/test_reader.cpp b/src/tests/test_reader.cpp index 105b02f..4d9a304 100644 --- a/src/tests/test_reader.cpp +++ b/src/tests/test_reader.cpp @@ -12,6 +12,7 @@ #include "las_header.hpp" #include "las_reader.hpp" #include "las_writer.hpp" +#include "spatial_index.hpp" #include "vlr.hpp" using namespace laspp; @@ -298,6 +299,109 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) { } } + // Test get_chunk_indices_from_intervals and read_chunks_list + { + std::stringstream las_stream; + std::stringstream laz_stream; + + // Create a file with 100 points + { + LASWriter las_writer(las_stream, 0, 0); + LASWriter laz_writer(laz_stream, 128, 0); // LAZ format + + std::vector points(100); + for (size_t i = 0; i < points.size(); i++) { + points[i].x = static_cast(i); + points[i].y = static_cast(i); + points[i].z = static_cast(i); + } + + las_writer.write_points(std::span(points)); + laz_writer.write_points(std::span(points)); + } + + // Test with LAZ file (multiple chunks) + { + laz_stream.seekg(0); + LASReader reader(laz_stream); + + // Test get_chunk_indices_from_intervals + // Assuming chunks are: [0-19], [20-80], [81-99] (based on typical chunking) + std::vector intervals; + + // Interval that spans only first chunk + intervals.push_back({0, 10}); + auto chunk_indices = reader.get_chunk_indices_from_intervals(intervals); + LASPP_ASSERT_GE(chunk_indices.size(), 1u); + LASPP_ASSERT_EQ(chunk_indices[0], 0u); + + // Interval that spans multiple chunks + intervals.clear(); + intervals.push_back({10, 50}); + chunk_indices = reader.get_chunk_indices_from_intervals(intervals); + LASPP_ASSERT_GE(chunk_indices.size(), 1u); + + // Multiple intervals + intervals.clear(); + intervals.push_back({0, 5}); + intervals.push_back({50, 60}); + intervals.push_back({90, 99}); + chunk_indices = reader.get_chunk_indices_from_intervals(intervals); + LASPP_ASSERT_GE(chunk_indices.size(), 1u); + + // Test read_chunks_list + if (chunk_indices.size() > 0) { + // Calculate total points needed + size_t total_points = 0; + const auto& points_per_chunk = reader.points_per_chunk(); + for (size_t chunk_idx : chunk_indices) { + total_points += points_per_chunk[chunk_idx]; + } + + std::vector points(total_points); + auto result = reader.read_chunks_list(points, chunk_indices); + LASPP_ASSERT_EQ(result.size(), total_points); + + // Verify we got valid points + for (size_t i = 0; i < result.size(); ++i) { + LASPP_ASSERT_GE(result[i].x, 0); + } + } + + // Test with empty intervals + intervals.clear(); + chunk_indices = reader.get_chunk_indices_from_intervals(intervals); + LASPP_ASSERT_EQ(chunk_indices.size(), 0u); + + std::vector empty_points(0); + auto empty_result = reader.read_chunks_list(empty_points, chunk_indices); + LASPP_ASSERT_EQ(empty_result.size(), 0u); + } + + // Test with LAS file (single chunk) + { + las_stream.seekg(0); + LASReader reader(las_stream); + + std::vector intervals; + intervals.push_back({0, 50}); + intervals.push_back({50, 99}); + + auto chunk_indices = reader.get_chunk_indices_from_intervals(intervals); + LASPP_ASSERT_EQ(chunk_indices.size(), 1u); + LASPP_ASSERT_EQ(chunk_indices[0], 0u); + + std::vector points(100); + auto result = reader.read_chunks_list(points, chunk_indices); + LASPP_ASSERT_EQ(result.size(), 100u); + + // Verify points + for (size_t i = 0; i < result.size(); ++i) { + LASPP_ASSERT_EQ(result[i].x, static_cast(i)); + } + } + } + // Test both memory-mapped and stream-based I/O paths { TempFile temp_file("test_io_paths"); diff --git a/src/tests/test_reader_constructors.cpp b/src/tests/test_reader_constructors.cpp new file mode 100644 index 0000000..e42bd30 --- /dev/null +++ b/src/tests/test_reader_constructors.cpp @@ -0,0 +1,233 @@ +/* + * SPDX-FileCopyrightText: (c) 2025-2026 Trailblaze Software, all rights reserved + * SPDX-License-Identifier: MIT + */ + +#include +#include +#include + +#include "las_header.hpp" +#include "las_reader.hpp" +#include "las_writer.hpp" +#include "spatial_index.hpp" +#include "utilities/assert.hpp" + +using namespace laspp; + +int main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) { + // Test LASReader constructor from stream + { + std::stringstream stream; + { + LASWriter writer(stream, 0, 0); + std::vector points(10); + for (size_t i = 0; i < points.size(); ++i) { + points[i].x = int(i) * 1000; + points[i].y = int(i) * 1000; + points[i].z = int(i) * 100; + } + writer.write_points(std::span(points)); + } + + stream.seekg(0); + LASReader reader(stream); + LASPP_ASSERT_EQ(reader.header().num_points(), 10u); + LASPP_ASSERT_EQ(reader.header().point_format(), 0); + } + + // Test LASReader constructor from stream with file_path (for .lax lookup) + { + std::stringstream stream; + { + LASWriter writer(stream, 0, 0); + std::vector points(10); + for (size_t i = 0; i < points.size(); ++i) { + points[i].x = int(i) * 1000; + points[i].y = int(i) * 1000; + points[i].z = int(i) * 100; + } + writer.write_points(std::span(points)); + } + + stream.seekg(0); + std::filesystem::path dummy_path("dummy.las"); + LASReader reader(stream, dummy_path); + LASPP_ASSERT_EQ(reader.header().num_points(), 10u); + // Should try to look for dummy.lax but won't find it (which is fine) + LASPP_ASSERT(!reader.has_lastools_spatial_index()); + } + + // Test LASReader constructor from filename (creates temporary file) + { + // Create a temporary file + std::filesystem::path temp_file = + std::filesystem::temp_directory_path() / "test_las_reader.las"; + + { + std::fstream file_stream(temp_file, std::ios::binary | std::ios::out | std::ios::trunc); + LASWriter writer(file_stream, 0, 0); + std::vector points(10); + for (size_t i = 0; i < points.size(); ++i) { + points[i].x = int(i) * 1000; + points[i].y = int(i) * 1000; + points[i].z = int(i) * 100; + } + writer.write_points(std::span(points)); + } + + // Test reading from filename + { + LASReader reader(temp_file); + LASPP_ASSERT_EQ(reader.header().num_points(), 10u); + LASPP_ASSERT_EQ(reader.header().point_format(), 0); + } // Ensure reader is destroyed before removing file + + // Clean up + std::filesystem::remove(temp_file); + } + + // Test reading spatial index from .lax file + { + // Create a temporary LAS file + std::filesystem::path temp_file = + std::filesystem::temp_directory_path() / "test_lax_reader.las"; + std::filesystem::path lax_file = std::filesystem::temp_directory_path() / "test_lax_reader.lax"; + + { + std::fstream file_stream(temp_file, std::ios::binary | std::ios::out | std::ios::trunc); + LASWriter writer(file_stream, 0, 0); + std::vector points(10); + for (size_t i = 0; i < points.size(); ++i) { + points[i].x = int(i) * 1000; + points[i].y = int(i) * 1000; + points[i].z = int(i) * 100; + } + writer.write_points(std::span(points)); + } + + // Create a .lax file with a spatial index + { + std::ofstream lax_stream(lax_file, std::ios::binary); + LASHeader header; + const_cast(header.bounds()).update({0.0, 0.0, 0.0}); + const_cast(header.bounds()).update({100.0, 100.0, 0.0}); + std::vector points(10); + for (size_t i = 0; i < points.size(); ++i) { + points[i].x = static_cast(i * 1000); + points[i].y = static_cast(i * 1000); + points[i].z = 0; + } + // Use tile_size to control levels: with bounds [0, 100] and tile_size=25, we get 2 levels + QuadtreeSpatialIndex index(header, points, 25.0); + index.write(lax_stream); + } + + // Test reading from filename - should find .lax file + { + LASReader reader(temp_file); + LASPP_ASSERT(reader.has_lastools_spatial_index()); + const auto& spatial_index = reader.lastools_spatial_index(); + LASPP_ASSERT_GT(spatial_index.num_cells(), 0u); + // The actual number of levels depends on the tile_size calculation + LASPP_ASSERT_GT(spatial_index.quadtree_header().levels, 0u); + } // Ensure reader is destroyed before removing files + + // Clean up + std::filesystem::remove(temp_file); + std::filesystem::remove(lax_file); + } + + // Test that .lax file is ignored if spatial index exists in EVLR + { + // Create a temporary LAS file with spatial index in EVLR + std::filesystem::path temp_file = std::filesystem::temp_directory_path() / "test_lax_evlr.las"; + std::filesystem::path lax_file = std::filesystem::temp_directory_path() / "test_lax_evlr.lax"; + + { + std::fstream file_stream(temp_file, std::ios::binary | std::ios::out | std::ios::trunc); + LASWriter writer(file_stream, 0, 0); + std::vector points(10); + for (size_t i = 0; i < points.size(); ++i) { + points[i].x = int(i) * 1000; + points[i].y = int(i) * 1000; + points[i].z = int(i) * 100; + } + writer.write_points(std::span(points)); + + // Add spatial index to EVLR + LASHeader spatial_header; + const_cast(spatial_header.bounds()).update({0.0, 0.0, 0.0}); + const_cast(spatial_header.bounds()).update({100.0, 100.0, 0.0}); + // Use tile_size=12.5 to get 3 levels: ceil(log2(100/12.5)) = ceil(log2(8)) = 3 + QuadtreeSpatialIndex index(spatial_header, points, 12.5); + writer.write_lastools_spatial_index(index); + } + + // Create a different .lax file + { + std::ofstream lax_stream(lax_file, std::ios::binary); + LASHeader lax_header; + const_cast(lax_header.bounds()).update({0.0, 0.0, 0.0}); + const_cast(lax_header.bounds()).update({100.0, 100.0, 0.0}); + std::vector lax_points(10); + for (size_t i = 0; i < lax_points.size(); ++i) { + lax_points[i].x = static_cast(i * 1000); + lax_points[i].y = static_cast(i * 1000); + lax_points[i].z = 0; + } + QuadtreeSpatialIndex lax_index(lax_header, lax_points); + lax_index.write(lax_stream); + } + + // Test reading - should use EVLR spatial index, not .lax + { + LASReader reader(temp_file); + LASPP_ASSERT(reader.has_lastools_spatial_index()); + const auto& spatial_index = reader.lastools_spatial_index(); + LASPP_ASSERT_EQ(spatial_index.quadtree_header().levels, 3u); // From EVLR, not .lax + } // Ensure reader is destroyed before removing files + + // Clean up + std::filesystem::remove(temp_file); + std::filesystem::remove(lax_file); + } + + // Test that corrupted .lax file doesn't crash + { + std::filesystem::path temp_file = + std::filesystem::temp_directory_path() / "test_corrupt_lax.las"; + std::filesystem::path lax_file = + std::filesystem::temp_directory_path() / "test_corrupt_lax.lax"; + + { + std::fstream file_stream(temp_file, std::ios::binary | std::ios::out | std::ios::trunc); + LASWriter writer(file_stream, 0, 0); + std::vector points(10); + for (size_t i = 0; i < points.size(); ++i) { + points[i].x = int(i) * 1000; + points[i].y = int(i) * 1000; + points[i].z = int(i) * 100; + } + writer.write_points(std::span(points)); + } + + // Create a corrupted .lax file + { + std::ofstream lax_stream(lax_file, std::ios::binary); + lax_stream << "CORRUPTED DATA"; + } + + // Should not crash, just not find spatial index + { + LASReader reader(temp_file); + LASPP_ASSERT(!reader.has_lastools_spatial_index()); + } // Ensure reader is destroyed before removing files + + // Clean up + std::filesystem::remove(temp_file); + std::filesystem::remove(lax_file); + } + + return 0; +} diff --git a/src/tests/test_transform_accessors.cpp b/src/tests/test_transform_accessors.cpp new file mode 100644 index 0000000..e8ecddc --- /dev/null +++ b/src/tests/test_transform_accessors.cpp @@ -0,0 +1,69 @@ +/* + * SPDX-FileCopyrightText: (c) 2025-2026 Trailblaze Software, all rights reserved + * SPDX-License-Identifier: MIT + */ + +#include "las_header.hpp" +#include "utilities/assert.hpp" + +using namespace laspp; + +int main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) { + // Test non-const Transform::offsets() and scale_factors() + { + Transform transform; + + // Test non-const offsets() + transform.offsets().x() = 100.0; + transform.offsets().y() = 200.0; + transform.offsets().z() = 300.0; + + LASPP_ASSERT_EQ(transform.offsets().x(), 100.0); + LASPP_ASSERT_EQ(transform.offsets().y(), 200.0); + LASPP_ASSERT_EQ(transform.offsets().z(), 300.0); + + // Test non-const scale_factors() + transform.scale_factors().x() = 0.01; + transform.scale_factors().y() = 0.02; + transform.scale_factors().z() = 0.03; + + LASPP_ASSERT_EQ(transform.scale_factors().x(), 0.01); + LASPP_ASSERT_EQ(transform.scale_factors().y(), 0.02); + LASPP_ASSERT_EQ(transform.scale_factors().z(), 0.03); + } + + // Test through LASHeader + { + LASHeader header; + + // Test non-const access through header + header.transform().offsets().x() = 500.0; + header.transform().offsets().y() = 600.0; + header.transform().offsets().z() = 700.0; + + LASPP_ASSERT_EQ(header.transform().offsets().x(), 500.0); + LASPP_ASSERT_EQ(header.transform().offsets().y(), 600.0); + LASPP_ASSERT_EQ(header.transform().offsets().z(), 700.0); + + header.transform().scale_factors().x() = 0.1; + header.transform().scale_factors().y() = 0.2; + header.transform().scale_factors().z() = 0.3; + + LASPP_ASSERT_EQ(header.transform().scale_factors().x(), 0.1); + LASPP_ASSERT_EQ(header.transform().scale_factors().y(), 0.2); + LASPP_ASSERT_EQ(header.transform().scale_factors().z(), 0.3); + } + + // Test const access still works + { + Transform transform; + transform.offsets().x() = 10.0; + transform.scale_factors().x() = 0.5; + + const Transform& const_transform = transform; + LASPP_ASSERT_EQ(const_transform.offsets().x(), 10.0); + LASPP_ASSERT_EQ(const_transform.scale_factors().x(), 0.5); + } + + return 0; +} diff --git a/src/tests/test_writer_copy_from_reader.cpp b/src/tests/test_writer_copy_from_reader.cpp new file mode 100644 index 0000000..7570143 --- /dev/null +++ b/src/tests/test_writer_copy_from_reader.cpp @@ -0,0 +1,448 @@ +/* + * SPDX-FileCopyrightText: (c) 2025-2026 Trailblaze Software, all rights reserved + * SPDX-License-Identifier: MIT + */ + +#include + +#include "las_header.hpp" +#include "las_point.hpp" +#include "las_reader.hpp" +#include "las_writer.hpp" +#include "spatial_index.hpp" +#include "utilities/assert.hpp" +#include "vlr.hpp" + +using namespace laspp; + +int main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) { + // Test copy_from_reader without spatial index + { + std::stringstream input_stream; + { + LASWriter writer(input_stream, 0, 0); + writer.copy_header_metadata(LASHeader()); + writer.header().transform() = Transform({0.001, 0.001, 0.001}, {0.0, 0.0, 0.0}); + + std::vector points; + for (int i = 0; i < 100; ++i) { + LASPointFormat0 point; + point.x = i * 1000; + point.y = i * 1000; + point.z = i * 100; + point.intensity = 100; + point.bit_byte = 0; + point.classification_byte = 0; + point.scan_angle_rank = 0; + point.user_data = 0; + point.point_source_id = 0; + points.push_back(point); + } + writer.write_points(std::span(points)); + } + + // Read it back + input_stream.seekg(0); + LASReader reader(input_stream); + + // Copy to new writer + std::stringstream output_stream; + { + LASWriter writer(output_stream, 0, 0); + writer.copy_from_reader(reader, false); + } + + // Verify the copy + output_stream.seekg(0); + LASReader output_reader(output_stream); + LASPP_ASSERT_EQ(output_reader.header().num_points(), 100u); + LASPP_ASSERT_EQ(output_reader.header().point_format(), 0); + + std::vector output_points(100); + output_reader.read_chunks(output_points, {0, output_reader.num_chunks()}); + + for (size_t i = 0; i < 100; ++i) { + LASPP_ASSERT_EQ(output_points[i].x, static_cast(i * 1000)); + LASPP_ASSERT_EQ(output_points[i].y, static_cast(i * 1000)); + LASPP_ASSERT_EQ(output_points[i].z, static_cast(i * 100)); + } + } + + // Test copy_from_reader with spatial index + { + std::stringstream input_stream; + { + LASWriter writer(input_stream, 0, 0); + writer.copy_header_metadata(LASHeader()); + writer.header().transform() = Transform({0.001, 0.001, 0.001}, {0.0, 0.0, 0.0}); + // Use const_cast to get non-const access to bounds for testing + // x: (i%10)*10000 * 0.001 → [0, 90], y: (i/10)*10000 * 0.001 → [0, 190], z: i*10 * 0.001 → + // [0, 1.99] + const_cast(writer.header().bounds()).update({0.0, 0.0, 0.0}); + const_cast(writer.header().bounds()).update({90.0, 190.0, 1.99}); + + std::vector points; + for (int i = 0; i < 200; ++i) { + LASPointFormat0 point; + point.x = (i % 10) * 10000; // Spread across 0-90 + point.y = (i / 10) * 10000; // Spread across 0-190 + point.z = i * 10; + point.intensity = 100; + point.bit_byte = 0; + point.classification_byte = 0; + point.scan_angle_rank = 0; + point.user_data = 0; + point.point_source_id = 0; + points.push_back(point); + } + writer.write_points(std::span(points)); + } + + // Read it back + input_stream.seekg(0); + LASReader reader(input_stream); + + // Copy to new writer with spatial index + std::stringstream output_stream; + { + LASWriter writer(output_stream, 0, 0); + writer.copy_from_reader(reader, true); + } + + // Verify the copy has spatial index + output_stream.seekg(0); + LASReader output_reader(output_stream); + LASPP_ASSERT_EQ(output_reader.header().num_points(), 200u); + LASPP_ASSERT(output_reader.has_lastools_spatial_index()); + + const auto& spatial_index = output_reader.lastools_spatial_index(); + LASPP_ASSERT_GT(spatial_index.num_cells(), 0); + + // Verify points were reordered (check that spatial index is valid) + std::vector output_points(200); + output_reader.read_chunks(output_points, {0, output_reader.num_chunks()}); + LASPP_ASSERT_EQ(output_points.size(), 200u); + } + + // Test copy_from_reader with VLRs + { + std::stringstream input_stream; + { + LASWriter writer(input_stream, 0, 0); + writer.copy_header_metadata(LASHeader()); + + // Add a VLR + LASVLR vlr; + vlr.reserved = 0; + string_to_arr("TEST_USER", vlr.user_id); + vlr.record_id = 12345; + vlr.record_length_after_header = 10; + string_to_arr("Test VLR", vlr.description); + std::vector vlr_data(10, std::byte{0x42}); + writer.write_vlr(vlr, vlr_data); + + std::vector points(10); + writer.write_points(std::span(points)); + } + + // Read it back + input_stream.seekg(0); + LASReader reader(input_stream); + + // Copy to new writer + std::stringstream output_stream; + { + LASWriter writer(output_stream, 0, 0); + writer.copy_from_reader(reader, false); + } + + // Verify VLR was copied + output_stream.seekg(0); + LASReader output_reader(output_stream); + const auto& vlrs = output_reader.vlr_headers(); + LASPP_ASSERT_EQ(vlrs.size(), 1u); + LASPP_ASSERT_EQ(std::string(vlrs[0].user_id, 16).substr(0, 9), "TEST_USER"); + LASPP_ASSERT_EQ(vlrs[0].record_id, 12345u); + } + + // Test copy_from_reader with EVLRs + { + std::stringstream input_stream; + { + LASWriter writer(input_stream, 0, 0); + writer.copy_header_metadata(LASHeader()); + + std::vector points(10); + writer.write_points(std::span(points)); + + // Add an EVLR + LASEVLR evlr; + evlr.reserved = 0; + string_to_arr("TEST_EVLR", evlr.user_id); + evlr.record_id = 54321; + evlr.record_length_after_header = 20; + string_to_arr("Test EVLR", evlr.description); + std::vector evlr_data(20, std::byte{0x99}); + writer.write_evlr(evlr, evlr_data); + } + + // Read it back + input_stream.seekg(0); + LASReader reader(input_stream); + + // Copy to new writer + std::stringstream output_stream; + { + LASWriter writer(output_stream, 0, 0); + writer.copy_from_reader(reader, false); + } + + // Verify EVLR was copied + output_stream.seekg(0); + LASReader output_reader(output_stream); + const auto& evlrs = output_reader.evlr_headers(); + LASPP_ASSERT_EQ(evlrs.size(), 1u); + LASPP_ASSERT_EQ(std::string(evlrs[0].user_id, 16).substr(0, 9), "TEST_EVLR"); + LASPP_ASSERT_EQ(evlrs[0].record_id, 54321u); + } + + // Test copy_from_reader skips existing spatial index when adding new one + { + std::stringstream input_stream; + { + LASWriter writer(input_stream, 0, 0); + writer.copy_header_metadata(LASHeader()); + writer.header().transform() = Transform({0.001, 0.001, 0.001}, {0.0, 0.0, 0.0}); + // Use const_cast to get non-const access to bounds for testing + const_cast(writer.header().bounds()).update({0.0, 0.0, 0.0}); + const_cast(writer.header().bounds()).update({100.0, 100.0, 10.0}); + + std::vector points; + for (int i = 0; i < 50; ++i) { + LASPointFormat0 point; + point.x = i * 1000; + point.y = i * 1000; + point.z = 0; + point.intensity = 0; + point.bit_byte = 0; + point.classification_byte = 0; + point.scan_angle_rank = 0; + point.user_data = 0; + point.point_source_id = 0; + points.push_back(point); + } + writer.write_points(std::span(points)); + + // Add an old spatial index + LASHeader old_header; + const_cast(old_header.bounds()).update({0.0, 0.0, 0.0}); + const_cast(old_header.bounds()).update({100.0, 100.0, 0.0}); + QuadtreeSpatialIndex old_index(old_header, points); + writer.write_lastools_spatial_index(old_index); + } + + // Read it back + input_stream.seekg(0); + LASReader reader(input_stream); + LASPP_ASSERT(reader.has_lastools_spatial_index()); + + // Copy to new writer with new spatial index + std::stringstream output_stream; + { + LASWriter writer(output_stream, 0, 0); + writer.copy_from_reader(reader, true); + } + + // Verify only one spatial index exists (the new one) + output_stream.seekg(0); + LASReader output_reader(output_stream); + LASPP_ASSERT(output_reader.has_lastools_spatial_index()); + const auto& evlrs = output_reader.evlr_headers(); + size_t spatial_index_count = 0; + for (const auto& evlr : evlrs) { + if (evlr.is_lastools_spatial_index_evlr()) { + spatial_index_count++; + } + } + LASPP_ASSERT_EQ(spatial_index_count, 1u); + } + + // Test copy_from_reader with different point formats + { + std::stringstream input_stream; + { + LASWriter writer(input_stream, 1, 0); // Format 1 with GPS time + writer.copy_header_metadata(LASHeader()); + writer.header().transform() = Transform({0.001, 0.001, 0.001}, {0.0, 0.0, 0.0}); + + std::vector points; + for (int i = 0; i < 50; ++i) { + LASPointFormat1 point; + point.x = i * 1000; + point.y = i * 1000; + point.z = i * 100; + point.intensity = 100; + point.bit_byte = 0; + point.classification_byte = 0; + point.scan_angle_rank = 0; + point.user_data = 0; + point.point_source_id = 0; + point.gps_time = GPSTime(static_cast(i)).gps_time; + points.push_back(point); + } + writer.write_points(std::span(points)); + } + + // Read it back + input_stream.seekg(0); + LASReader reader(input_stream); + + // Copy to new writer + std::stringstream output_stream; + { + LASWriter writer(output_stream, 1, 0); + writer.copy_from_reader(reader, false); + } + + // Verify the copy + output_stream.seekg(0); + LASReader output_reader(output_stream); + LASPP_ASSERT_EQ(output_reader.header().num_points(), 50u); + LASPP_ASSERT_EQ(output_reader.header().point_format(), 1); + + std::vector output_points(50); + output_reader.read_chunks(output_points, {0, output_reader.num_chunks()}); + + for (size_t i = 0; i < 50; ++i) { + LASPP_ASSERT_EQ(output_points[i].x, static_cast(i * 1000)); + LASPP_ASSERT_EQ(output_points[i].gps_time.f64, static_cast(i)); + } + } + + // Test copy_from_reader with LAZ compression + { + std::stringstream input_stream; + { + LASWriter writer(input_stream, 128, 0); // Format 0 with LAZ compression + writer.copy_header_metadata(LASHeader()); + writer.header().transform() = Transform({0.001, 0.001, 0.001}, {0.0, 0.0, 0.0}); + + std::vector points; + for (int i = 0; i < 100; ++i) { + LASPointFormat0 point; + point.x = i * 1000; + point.y = i * 1000; + point.z = i * 100; + point.intensity = 100; + point.bit_byte = 0; + point.classification_byte = 0; + point.scan_angle_rank = 0; + point.user_data = 0; + point.point_source_id = 0; + points.push_back(point); + } + writer.write_points(std::span(points)); + } + + // Read it back + input_stream.seekg(0); + LASReader reader(input_stream); + + // Copy to new writer (uncompressed) + std::stringstream output_stream; + { + LASWriter writer(output_stream, 0, 0); // Uncompressed + writer.copy_from_reader(reader, false); + } + + // Verify the copy + output_stream.seekg(0); + LASReader output_reader(output_stream); + LASPP_ASSERT_EQ(output_reader.header().num_points(), 100u); + LASPP_ASSERT_EQ(output_reader.header().point_format(), 0); + LASPP_ASSERT(!output_reader.header().is_laz_compressed()); + + std::vector output_points(100); + output_reader.read_chunks(output_points, {0, output_reader.num_chunks()}); + + for (size_t i = 0; i < 100; ++i) { + LASPP_ASSERT_EQ(output_points[i].x, static_cast(i * 1000)); + } + } + + // Test copy_from_reader skips existing spatial index stored as a VLR. + // LAS 1.2-era files can carry the LAStools index as a VLR (record_id 30) + // rather than an EVLR. When add_spatial_index=true the old VLR must be + // dropped so only the freshly-built EVLR appears in the output. + { + std::stringstream input_stream; + { + LASWriter writer(input_stream, 0, 0); + writer.copy_header_metadata(LASHeader()); + writer.header().transform() = Transform({0.001, 0.001, 0.001}, {0.0, 0.0, 0.0}); + const_cast(writer.header().bounds()).update({0.0, 0.0, 0.0}); + const_cast(writer.header().bounds()).update({100.0, 100.0, 10.0}); + + // Plant a dummy LAStools spatial-index VLR (record_id 30) before points. + LASVLR index_vlr; + index_vlr.reserved = 0; + string_to_arr("LAStools", index_vlr.user_id); + index_vlr.record_id = 30; + const std::byte dummy_byte{0}; + index_vlr.record_length_after_header = 1; + string_to_arr("LAX spatial index", index_vlr.description); + writer.write_vlr(index_vlr, std::span(&dummy_byte, 1)); + + std::vector points; + for (int i = 0; i < 50; ++i) { + LASPointFormat0 point; + point.x = i * 1000; + point.y = i * 1000; + point.z = 0; + point.intensity = 0; + point.bit_byte = 0; + point.classification_byte = 0; + point.scan_angle_rank = 0; + point.user_data = 0; + point.point_source_id = 0; + points.push_back(point); + } + writer.write_points(std::span(points)); + } + + // Verify the input has exactly one spatial-index VLR. + input_stream.seekg(0); + LASReader reader(input_stream); + size_t in_vlr_count = 0; + for (const auto& vlr : reader.vlr_headers()) { + if (vlr.is_lastools_spatial_index_vlr()) in_vlr_count++; + } + LASPP_ASSERT_EQ(in_vlr_count, 1u); + + // Copy with add_spatial_index=true — the old VLR must NOT be forwarded. + std::stringstream output_stream; + { + LASWriter writer(output_stream, 0, 0); + writer.copy_from_reader(reader, true); + } + + output_stream.seekg(0); + LASReader output_reader(output_stream); + + // No LAStools spatial-index VLR in the output. + size_t out_vlr_count = 0; + for (const auto& vlr : output_reader.vlr_headers()) { + if (vlr.is_lastools_spatial_index_vlr()) out_vlr_count++; + } + LASPP_ASSERT_EQ(out_vlr_count, 0u); + + // Exactly one spatial-index EVLR (the freshly-built one). + size_t out_evlr_count = 0; + for (const auto& evlr : output_reader.evlr_headers()) { + if (evlr.is_lastools_spatial_index_evlr()) out_evlr_count++; + } + LASPP_ASSERT_EQ(out_evlr_count, 1u); + } + + return 0; +} diff --git a/src/utilities/assert.hpp b/src/utilities/assert.hpp index e7be6c3..2e39eb5 100644 --- a/src/utilities/assert.hpp +++ b/src/utilities/assert.hpp @@ -5,8 +5,11 @@ #pragma once +#include + #include #include +#include #include #include #include @@ -18,7 +21,6 @@ #define LASPP_HAS_BUILTIN(x) 0 #endif -#ifdef LASPP_DEBUG_ASSERTS #include #include #if defined(_MSC_VER) || LASPP_HAS_BUILTIN(__builtin_source_location) @@ -31,7 +33,6 @@ using source_location = std::experimental::source_location; #endif #include "printing.hpp" -#endif namespace laspp { @@ -45,7 +46,6 @@ inline size_t to_size_t(T value) { } } -#ifdef LASPP_DEBUG_ASSERTS template std::optional OptionalString(Args &&...args) { if constexpr (sizeof...(args) == 0) { @@ -57,11 +57,6 @@ std::optional OptionalString(Args &&...args) { } } -#define LASPP_ASSERT(condition, ...) \ - if (!(condition)) \ - laspp::_LASPP_FAIL_ASSERT(#condition, laspp::OptionalString(__VA_ARGS__), \ - std::source_location::current()); - inline void _LASPP_FAIL_ASSERT(const std::string &condition_str, const std::optional &message, const std::source_location &loc = std::source_location::current()) { @@ -74,6 +69,22 @@ inline void _LASPP_FAIL_ASSERT(const std::string &condition_str, throw std::runtime_error(ss.str()); } +template +inline void _LASPP_FAILBinOp(const A &a, const B &b, const std::string &a_str, + const std::string &b_str, const std::string &nop, + const std::optional &message, + const std::source_location &loc = std::source_location::current()) { + std::stringstream ss; + ss << a << " " << nop << " " << b; + laspp::_LASPP_FAIL_ASSERT(a_str + " " + nop + " " + b_str, + message ? (ss.str() + " " + *message) : ss.str(), loc); +} + +#define LASPP_ASSERT(condition, ...) \ + if (!(condition)) \ + laspp::_LASPP_FAIL_ASSERT(#condition, laspp::OptionalString(__VA_ARGS__), \ + std::source_location::current()); + #ifdef _MSC_VER #define LASPP_ASSERT_BIN_OP(a, b, op, nop, ...) \ { \ @@ -95,34 +106,49 @@ inline void _LASPP_FAIL_ASSERT(const std::string &condition_str, } #endif -template -inline void _LASPP_FAILBinOp(const A &a, const B &b, const std::string &a_str, - const std::string &b_str, const std::string &nop, - const std::optional &message, - const std::source_location &loc = std::source_location::current()) { - std::stringstream ss; - ss << a << " " << nop << " " << b; - laspp::_LASPP_FAIL_ASSERT(a_str + " " + nop + " " + b_str, - message ? (ss.str() + " " + *message) : ss.str(), loc); -} - +// LASPP_DEBUG_ASSERT compiles out entirely in release (zero overhead on hot paths). +// In debug builds it is identical to LASPP_ASSERT. +#ifdef LASPP_DEBUG_ASSERTS +#define LASPP_DEBUG_ASSERT(condition, ...) LASPP_ASSERT(condition __VA_OPT__(, ) __VA_ARGS__) +#define LASPP_DEBUG_ASSERT_BIN_OP(a, b, op, nop, ...) \ + LASPP_ASSERT_BIN_OP(a, b, op, nop __VA_OPT__(, ) __VA_ARGS__) #else -#define LASPP_ASSERT(condition, ...) -#define LASPP_ASSERT_BIN_OP(a, b, op, nop, ...) +#define LASPP_DEBUG_ASSERT(condition, ...) +#define LASPP_DEBUG_ASSERT_BIN_OP(a, b, op, nop, ...) #endif -#define LASPP_FAIL(...) \ - LASPP_ASSERT(false, __VA_ARGS__); \ +#define LASPP_FAIL(...) \ + LASPP_ASSERT(false __VA_OPT__(, ) __VA_ARGS__); \ UNREACHABLE() // LCOV_EXCL_LINE #define LASPP_UNIMPLEMENTED(...) LASPP_FAIL("LASPP_UNIMPLEMENTED") -#define LASPP_ASSERT_GE(expr, val, ...) LASPP_ASSERT_BIN_OP(expr, val, >=, <, __VA_ARGS__) -#define LASPP_ASSERT_LE(expr, val, ...) LASPP_ASSERT_BIN_OP(expr, val, <=, >, __VA_ARGS__) -#define LASPP_ASSERT_GT(expr, val, ...) LASPP_ASSERT_BIN_OP(expr, val, >, <=, __VA_ARGS__) -#define LASPP_ASSERT_LT(expr, val, ...) LASPP_ASSERT_BIN_OP(expr, val, <, >=, __VA_ARGS__) -#define LASPP_ASSERT_EQ(expr, val, ...) LASPP_ASSERT_BIN_OP(expr, val, ==, !=, __VA_ARGS__) -#define LASPP_ASSERT_NE(expr, val, ...) LASPP_ASSERT_BIN_OP(expr, val, !=, ==, __VA_ARGS__) +#define LASPP_ASSERT_GE(expr, val, ...) \ + LASPP_ASSERT_BIN_OP(expr, val, >=, < __VA_OPT__(, ) __VA_ARGS__) +#define LASPP_ASSERT_LE(expr, val, ...) \ + LASPP_ASSERT_BIN_OP(expr, val, <=, > __VA_OPT__(, ) __VA_ARGS__) +#define LASPP_ASSERT_GT(expr, val, ...) \ + LASPP_ASSERT_BIN_OP(expr, val, >, <= __VA_OPT__(, ) __VA_ARGS__) +#define LASPP_ASSERT_LT(expr, val, ...) \ + LASPP_ASSERT_BIN_OP(expr, val, <, >= __VA_OPT__(, ) __VA_ARGS__) +#define LASPP_ASSERT_EQ(expr, val, ...) \ + LASPP_ASSERT_BIN_OP(expr, val, ==, != __VA_OPT__(, ) __VA_ARGS__) +#define LASPP_ASSERT_NE(expr, val, ...) \ + LASPP_ASSERT_BIN_OP(expr, val, !=, == __VA_OPT__(, ) __VA_ARGS__) + +// Debug-only binary-op shorthands — compile out entirely in release builds. +#define LASPP_DEBUG_ASSERT_GE(expr, val, ...) \ + LASPP_DEBUG_ASSERT_BIN_OP(expr, val, >=, < __VA_OPT__(, ) __VA_ARGS__) +#define LASPP_DEBUG_ASSERT_LE(expr, val, ...) \ + LASPP_DEBUG_ASSERT_BIN_OP(expr, val, <=, > __VA_OPT__(, ) __VA_ARGS__) +#define LASPP_DEBUG_ASSERT_GT(expr, val, ...) \ + LASPP_DEBUG_ASSERT_BIN_OP(expr, val, >, <= __VA_OPT__(, ) __VA_ARGS__) +#define LASPP_DEBUG_ASSERT_LT(expr, val, ...) \ + LASPP_DEBUG_ASSERT_BIN_OP(expr, val, <, >= __VA_OPT__(, ) __VA_ARGS__) +#define LASPP_DEBUG_ASSERT_EQ(expr, val, ...) \ + LASPP_DEBUG_ASSERT_BIN_OP(expr, val, ==, != __VA_OPT__(, ) __VA_ARGS__) +#define LASPP_DEBUG_ASSERT_NE(expr, val, ...) \ + LASPP_DEBUG_ASSERT_BIN_OP(expr, val, !=, == __VA_OPT__(, ) __VA_ARGS__) #if defined(_MSC_VER) && !defined(__clang__) // MSVC #define UNREACHABLE() __assume(false) @@ -212,12 +238,26 @@ static_assert(f_arr(DEBRACKET((std::array{{4, 4}})))); } \ } +// Macros to suppress useless-cast warnings only for GCC (Clang doesn't support -Wuseless-cast) +#if defined(__GNUC__) && !defined(__clang__) +#define LASPP_PUSH_WARNING_DISABLE_USELESS_CAST \ + _Pragma("GCC diagnostic push") _Pragma("GCC diagnostic ignored \"-Wuseless-cast\"") +#define LASPP_POP_WARNING _Pragma("GCC diagnostic pop") +#else +#define LASPP_PUSH_WARNING_DISABLE_USELESS_CAST +#define LASPP_POP_WARNING +#endif + // LASPP_CHECK_SEEK checks if a seek operation succeeded // Parameters: stream, offset, direction (e.g., std::ios::beg, std::ios::cur, std::ios::end) +// Note: Cast to std::streamoff is needed for sign conversion (size_t -> signed) and is +// harmless when types already match (may trigger -Wuseless-cast in some cases) #define LASPP_CHECK_SEEK(stream, offset, direction) \ { \ auto &laspp_check_stream = (stream); \ + LASPP_PUSH_WARNING_DISABLE_USELESS_CAST \ laspp_check_stream.seekg(static_cast(offset), direction); \ + LASPP_POP_WARNING \ if (!laspp_check_stream.good()) { \ std::stringstream laspp_error_msg; \ laspp_error_msg << "Failed to seek in stream to offset " << offset; \ diff --git a/src/utilities/printing.hpp b/src/utilities/printing.hpp index 9febe9e..70d726e 100644 --- a/src/utilities/printing.hpp +++ b/src/utilities/printing.hpp @@ -6,11 +6,16 @@ #pragma once #include +#include #include #include +#include +#include #include #include #include +#include +#include #include #include @@ -52,8 +57,11 @@ inline std::ostream& operator<<(std::ostream& os, const std::array& arr) { template inline std::ostream& operator<<(std::ostream& os, const std::set& set) { os << "{"; + bool first = true; for (const T& elem : set) { - os << elem << ", "; + if (!first) os << ", "; + os << elem; + first = false; } return os << "}"; } @@ -76,4 +84,112 @@ inline std::ostream& operator<<(std::ostream& os, const std::tuple& tup std::apply([&os](const Args&... args) { ((os << args << ", "), ...); }, tup); return os; } + +template +inline std::ostream& operator<<(std::ostream& os, const std::map& map) { + os << "{"; + bool first = true; + for (const auto& [k, v] : map) { + if (!first) os << ", "; + os << k << ": " << v; + first = false; + } + return os << "}"; +} + +/** + * Wrapper class to limit the number of items printed from a map + * Usage: std::cout << limited_map(map, 5) << std::endl; + */ +template +class LimitedMap { + const std::map& m_map; + size_t m_limit; + bool m_show_remaining; + + public: + LimitedMap(const std::map& map, size_t limit, bool show_remaining = true) + : m_map(map), m_limit(limit), m_show_remaining(show_remaining) {} + + friend std::ostream& operator<<(std::ostream& os, const LimitedMap& limited) { + size_t count = 0; + for (const auto& kv : limited.m_map) { + if (count++ >= limited.m_limit) { + break; + } + os << kv.first << ": " << kv.second << '\n'; + } + if (limited.m_show_remaining && limited.m_map.size() > limited.m_limit) { + os << "... (" << (limited.m_map.size() - limited.m_limit) << " more)" << '\n'; + } + return os; + } +}; + +/** + * Helper function to create a LimitedMap wrapper + * Usage: std::cout << indented(limited_map(map, 5), " ") << std::endl; + */ +template +inline LimitedMap limited_map(const std::map& map, size_t limit, + bool show_remaining = true) { + return LimitedMap(map, limit, show_remaining); +} + +/** + * Wrapper class to add indentation prefix to each line of output from operator<< + * Usage: std::cout << indented(vlr, " ") << std::endl; + */ +template +class Indented { + const T& m_value; + std::string m_indent; + + public: + Indented(const T& value, const std::string& indent) : m_value(value), m_indent(indent) {} + + friend std::ostream& operator<<(std::ostream& os, const Indented& indented) { + // Capture the output from the object's operator<< + std::ostringstream ss; + ss << indented.m_value; + std::string output = ss.str(); + + // Add indentation to each line + if (output.empty()) { + return os; + } + + size_t start = 0; + while (start < output.length()) { + size_t end = output.find('\n', start); + if (end == std::string::npos) { + // Last line (no trailing newline) + if (start < output.length()) { + os << indented.m_indent; + os.write(output.data() + start, static_cast(output.length() - start)); + } + break; + } + + // Add indentation prefix to this line + os << indented.m_indent; + os.write(output.data() + start, static_cast(end - start)); + os << '\n'; + + start = end + 1; + } + + return os; + } +}; + +/** + * Helper function to create an Indented wrapper + * Usage: std::cout << indented(vlr, " ") << std::endl; + */ +template +inline Indented indented(const T& value, const std::string& indent) { + return Indented(value, indent); +} + } // namespace laspp diff --git a/src/utilities/tests/test_printing.cpp b/src/utilities/tests/test_printing.cpp new file mode 100644 index 0000000..275fb5d --- /dev/null +++ b/src/utilities/tests/test_printing.cpp @@ -0,0 +1,152 @@ +/* + * SPDX-FileCopyrightText: (c) 2025-2026 Trailblaze Software, all rights reserved + * SPDX-License-Identifier: MIT + */ + +#include +#include +#include +#include + +#include "utilities/assert.hpp" +#include "utilities/printing.hpp" + +using namespace laspp; + +// Simple test class with operator<< +struct TestStruct { + int value; + friend std::ostream& operator<<(std::ostream& os, const TestStruct& ts) { + return os << "TestStruct(" << ts.value << ")"; + } +}; + +int main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) { + // Test Indented with simple string output + { + std::ostringstream oss; + std::string test_str = "line1\nline2\nline3"; + oss << test_str; + std::string result = oss.str(); + LASPP_ASSERT_EQ(result, "line1\nline2\nline3"); + } + + // Test Indented with single line + { + std::ostringstream oss; + TestStruct ts{42}; + oss << indented(ts, " "); + std::string result = oss.str(); + LASPP_ASSERT_EQ(result, " TestStruct(42)"); + } + + // Test Indented with multiple lines + { + std::ostringstream oss; + std::string multi_line = "first\nsecond\nthird"; + oss << indented(multi_line, " "); + std::string result = oss.str(); + LASPP_ASSERT_EQ(result, " first\n second\n third"); + } + + // Test Indented with empty string + { + std::ostringstream oss; + std::string empty = ""; + oss << indented(empty, " "); + std::string result = oss.str(); + LASPP_ASSERT_EQ(result, ""); + } + + // Test Indented with trailing newline + { + std::ostringstream oss; + std::string with_newline = "line1\nline2\n"; + oss << indented(with_newline, " "); + std::string result = oss.str(); + LASPP_ASSERT_EQ(result, " line1\n line2\n"); + } + + // Test Indented with custom indent + { + std::ostringstream oss; + std::string text = "a\nb\nc"; + oss << indented(text, " "); + std::string result = oss.str(); + LASPP_ASSERT_EQ(result, " a\n b\n c"); + } + + // Test Indented with vector + { + std::ostringstream oss; + std::vector vec = {1, 2, 3}; + oss << indented(vec, " "); + std::string result = oss.str(); + LASPP_ASSERT_EQ(result, " [1, 2, 3]"); + } + + // Test LimitedMap with small map + { + std::map map = {{1, "one"}, {2, "two"}, {3, "three"}}; + std::ostringstream oss; + oss << limited_map(map, 2); + std::string result = oss.str(); + LASPP_ASSERT(result.find("one") != std::string::npos); + LASPP_ASSERT(result.find("two") != std::string::npos); + LASPP_ASSERT(result.find("three") == std::string::npos); // Should be limited + LASPP_ASSERT(result.find("more") != std::string::npos); // Should show remaining + } + + // Test LimitedMap with limit larger than map size + { + std::map map = {{1, "one"}, {2, "two"}}; + std::ostringstream oss; + oss << limited_map(map, 10); + std::string result = oss.str(); + LASPP_ASSERT(result.find("one") != std::string::npos); + LASPP_ASSERT(result.find("two") != std::string::npos); + LASPP_ASSERT(result.find("more") == std::string::npos); // Should not show remaining + } + + // Test LimitedMap with show_remaining = false + { + std::map map = {{1, "one"}, {2, "two"}, {3, "three"}}; + std::ostringstream oss; + oss << limited_map(map, 1, false); + std::string result = oss.str(); + LASPP_ASSERT(result.find("one") != std::string::npos); + LASPP_ASSERT(result.find("more") == std::string::npos); // Should not show remaining + } + + // Test LimitedMap with empty map + { + std::map map; + std::ostringstream oss; + oss << limited_map(map, 5); + std::string result = oss.str(); + LASPP_ASSERT_EQ(result, ""); + } + + // Test LimitedMap with limit of 0 + { + std::map map = {{1, "one"}, {2, "two"}}; + std::ostringstream oss; + oss << limited_map(map, 0); + std::string result = oss.str(); + LASPP_ASSERT(result.find("one") == std::string::npos); + LASPP_ASSERT(result.find("more") != std::string::npos); // Should show remaining + } + + // Test combination: indented limited_map + { + std::map map = {{1, "one"}, {2, "two"}, {3, "three"}}; + std::ostringstream oss; + oss << indented(limited_map(map, 2), " "); + std::string result = oss.str(); + LASPP_ASSERT(result.find(" 1: one") != std::string::npos); + LASPP_ASSERT(result.find(" 2: two") != std::string::npos); + LASPP_ASSERT(result.find(" ...") != std::string::npos); + } + + return 0; +} diff --git a/src/utilities/thread_pool.hpp b/src/utilities/thread_pool.hpp index 63bff3e..6fd6ffe 100644 --- a/src/utilities/thread_pool.hpp +++ b/src/utilities/thread_pool.hpp @@ -13,10 +13,14 @@ #include #include #include +#include #include +#include #include +#include #include +#include "assert.hpp" #include "env.hpp" namespace laspp { @@ -91,6 +95,7 @@ class ThreadPool { // chunk_size: number of indices to process per atomic increment (default: 1) template void parallel_for(size_t begin, size_t end, Func func, size_t chunk_size = 1) { + LASPP_ASSERT(chunk_size > 0, "chunk_size must be positive"); if (begin >= end) { return; } @@ -133,6 +138,64 @@ class ThreadPool { completion_latch->wait(); } + // Execute a reduction over [begin, end), accumulating into `result` via T::combine(const T&). + // `func` is called as func(index, thread_local_T) for each index. + // Each worker thread builds its own local T (default-constructed), then merges into `result` + // under a mutex once all its assigned work is done. + template + void parallel_for_reduction(size_t begin, size_t end, T& result, Func func, + size_t chunk_size = 1) { + LASPP_ASSERT(chunk_size > 0, "chunk_size must be positive"); + if (begin >= end) { + return; + } + + size_t requested_threads = get_num_threads(); + ensure_threads(requested_threads); + size_t active_threads = std::min(requested_threads, m_max_threads); + + const size_t total_work = end - begin; + const size_t num_chunks = (total_work + chunk_size - 1) / chunk_size; // Ceiling division + + // Latch counts down once per active thread (after it finishes work + combine) + auto completion_latch = std::make_shared(active_threads); + auto next_chunk = std::make_shared>(0); + auto combine_mutex = std::make_shared(); + + for (size_t i = 0; i < active_threads; ++i) { + enqueue([next_chunk, num_chunks, begin, end, chunk_size, func, completion_latch, + combine_mutex, &result]() mutable { + T local{}; + bool has_work = false; + + while (true) { + size_t chunk_idx = next_chunk->fetch_add(1); + if (chunk_idx >= num_chunks) { + break; + } + + has_work = true; + size_t chunk_begin = begin + chunk_idx * chunk_size; + size_t chunk_end = std::min(chunk_begin + chunk_size, end); + for (size_t idx = chunk_begin; idx < chunk_end; ++idx) { + func(idx, local); + } + } + + // Merge thread-local result into shared result under mutex before signalling done + if (has_work) { + std::lock_guard lock(*combine_mutex); + result.combine(local); + } + + completion_latch->count_down(1); + }); + } + + // Block until all threads have finished work and combined their local results + completion_latch->wait(); + } + private: void enqueue(std::function task) { { @@ -170,5 +233,11 @@ void parallel_for(size_t begin, size_t end, Func func, size_t chunk_size = 1) { get_thread_pool().parallel_for(begin, end, func, chunk_size); } +// Convenience function for parallel_for_reduction using the global thread pool +template +void parallel_for_reduction(size_t begin, size_t end, T& result, Func func, size_t chunk_size = 1) { + get_thread_pool().parallel_for_reduction(begin, end, result, func, chunk_size); +} + } // namespace utilities } // namespace laspp diff --git a/src/vlr.hpp b/src/vlr.hpp index 570bbe5..9d71bf6 100644 --- a/src/vlr.hpp +++ b/src/vlr.hpp @@ -5,11 +5,15 @@ #pragma once +#include + #include +#include #include #include #include #include +#include #include #include @@ -142,6 +146,17 @@ class LASGeoKeys { } }; +// Fixed-width LAS string (user_id, description, ...): may omit a trailing NUL. +inline std::string_view las_packed_string(std::string_view raw) { + const auto z = raw.find('\0'); + return z == std::string_view::npos ? raw : raw.substr(0, z); +} + +template +inline std::string_view las_packed_string(const char (&arr)[N]) { + return las_packed_string(std::string_view(arr, N)); +} + #pragma pack(push, 1) struct LASPP_PACKED ExtraBytesInfo { @@ -189,14 +204,14 @@ struct LASPP_PACKED LASVariableLengthRecord { friend std::ostream& operator<<(std::ostream& os, const LASVariableLengthRecord& vlr) { os << "Reserved: " << vlr.reserved << std::endl; - os << "User ID: " << vlr.user_id << std::endl; + os << "User ID: \"" << las_packed_string(vlr.user_id) << "\"" << std::endl; os << "Record ID: " << vlr.record_id << std::endl; os << "Record length after header: " << vlr.record_length_after_header << std::endl; - os << "Description: " << vlr.description << std::endl; + os << "Description: \"" << las_packed_string(vlr.description) << "\"" << std::endl; return os; } - bool is_projection() const { return std::string(user_id) == "LASF_Projection"; } + bool is_projection() const { return las_packed_string(user_id) == "LASF_Projection"; } bool is_ogc_math_transform_wkt() const { return is_projection() && record_id == 2111; } @@ -208,7 +223,7 @@ struct LASPP_PACKED LASVariableLengthRecord { bool is_geo_ascii_params() const { return is_projection() && record_id == 34737; } - bool is_spec() const { return std::string(user_id) == "LASF_Spec"; } + bool is_spec() const { return las_packed_string(user_id) == "LASF_Spec"; } bool is_classification_lookup() const { return is_spec() && record_id == 0; } @@ -231,9 +246,13 @@ struct LASPP_PACKED LASVariableLengthRecord { bool is_waveform_data_packets() const { return is_spec() && record_id == 65535; } bool is_laz_vlr() const { + const std::string_view uid = las_packed_string(user_id); return (reserved == 0 || reserved == 0xAABB) && // 43707 - (std::string(user_id) == "LAZ encoded" || std::string(user_id) == "laszip encoded") && - record_id == 22204; + (uid == "LAZ encoded" || uid == "laszip encoded") && record_id == 22204; + } + + bool is_lastools_spatial_index_vlr() const { + return las_packed_string(user_id) == "LAStools" && record_id == 30; } }; @@ -260,12 +279,22 @@ struct LASPP_PACKED LASExtendedVariableLengthRecord { friend std::ostream& operator<<(std::ostream& os, const LASExtendedVariableLengthRecord& evlr) { os << "Reserved: " << evlr.reserved << std::endl; - os << "User ID: " << evlr.user_id << std::endl; + os << "User ID: \"" << las_packed_string(evlr.user_id) << "\"" << std::endl; os << "Record ID: " << evlr.record_id << std::endl; os << "Record length after header: " << evlr.record_length_after_header << std::endl; - os << "Description: " << evlr.description << std::endl; + os << "Description: \"" << las_packed_string(evlr.description) << "\"" << std::endl; return os; } + + bool is_laz_vlr() const { + const std::string_view uid = las_packed_string(user_id); + return (reserved == 0 || reserved == 0xAABB) && + (uid == "LAZ encoded" || uid == "laszip encoded") && record_id == 22204; + } + + bool is_lastools_spatial_index_evlr() const { + return las_packed_string(user_id) == "LAStools" && record_id == 30; + } }; using LASEVLR = LASExtendedVariableLengthRecord;