diff --git a/Common/include/containers/CLookUpTable.hpp b/Common/include/containers/CLookUpTable.hpp index e7d8fd17e6e5..2458da32f424 100644 --- a/Common/include/containers/CLookUpTable.hpp +++ b/Common/include/containers/CLookUpTable.hpp @@ -35,6 +35,7 @@ #include "../../Common/include/option_structure.hpp" #include "CFileReaderLUT.hpp" #include "CTrapezoidalMap.hpp" +#include "CTrapezoidalMapFAST.hpp" /*! * \brief Look up table. @@ -100,10 +101,16 @@ class CLookUpTable { su2vector> hull; /*! \brief - * Trapezoidal map objects for the table levels. + * Trapezoidal map objects for the table levels (original SU2 implementation). */ su2vector trap_map_x_y; + /*! \brief + * Memory-efficient trapezoidal map objects for LUT_FAST. + */ + su2vector trap_map_x_y_FAST; + bool use_fast_trap_map_{false}; + /*! \brief * Vector of all the weight factors for the interpolation. */ @@ -319,6 +326,23 @@ class CLookUpTable { public: CLookUpTable(const std::string& file_name_lut, std::string name_CV1_in, std::string name_CV2_in); + /*! + * \brief Build the original SU2 trapezoidal map (DAG-based). + * Use EnableFastTrapMap() for large tables instead. + */ + void BuildOriginalTrapMap(); + + /*! + * \brief Enable Memory-efficient trapezoidal map (LUT_FAST mode). + * This builds band-based maps with O(n) memory instead of the original O(n log n) DAG. + */ + void EnableFastTrapMap(); + + /*! + * \brief Check if fast trapezoidal map is enabled. + */ + inline bool UsingFastTrapMap() const { return use_fast_trap_map_; } + /*! * \brief Print information to screen. */ diff --git a/Common/include/containers/CTrapezoidalMapFAST.hpp b/Common/include/containers/CTrapezoidalMapFAST.hpp new file mode 100644 index 000000000000..7b18f4919ac1 --- /dev/null +++ b/Common/include/containers/CTrapezoidalMapFAST.hpp @@ -0,0 +1,617 @@ +/*! + * \file CTrapezoidalMapFAST.hpp + * \brief Fast trapezoidal map implementation for 2D LUT queries in SU2. + * Based on Pedro Gomes' (pcarruscag) LUT implementation: + * https://github.com/pcarruscag/LUT + */ + +#ifndef CTRAPEZOIDALMAP_FAST_HPP +#define CTRAPEZOIDALMAP_FAST_HPP + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "../basic_types/datatype_structure.hpp" + +namespace su2_lut { + +using IntT = int32_t; +using RealT = su2double; +using PassiveReal = passivedouble; + +/*--- Simple Matrix container (row-major) ---*/ +template +struct Matrix { + std::vector data; + + void resize(size_t rows, size_t) { data.resize(rows * N); } + size_t rows() const { return data.size() / N; } + + const T& operator()(size_t i, size_t j) const { return data[i * N + j]; } + T& operator()(size_t i, size_t j) { return data[i * N + j]; } +}; + +using Matrix2i = Matrix; +using Matrix3i = Matrix; +using VectorInt = std::vector; +using VectorReal = std::vector; + +/*--- Trapezoidal Map structure ---*/ +struct TrapezoidalMap { + VectorInt offsets, edge_id; + VectorReal x_bands, edge_y; +}; + +/*! + * \brief Orders points by ascending x coordinates and updates triangle indices. + */ +inline void ReorderPoints(Matrix3i& triangles, VectorReal& x, VectorReal& y) { + const IntT n_pts = static_cast(x.size()); + + // Create sort permutation based on (x, y) coordinates + std::vector perm(n_pts); + std::iota(perm.begin(), perm.end(), 0); + std::sort(perm.begin(), perm.end(), + [&x, &y](const auto i, const auto j) { return x[i] != x[j] ? x[i] < x[j] : y[i] < y[j]; }); + + // Reorder coordinate arrays + auto reorder = [n_pts, &perm](const auto& v) { + VectorReal tmp(n_pts); + for (IntT i = 0; i < n_pts; ++i) { + tmp[i] = v[perm[i]]; + } + return tmp; + }; + x = reorder(x); + y = reorder(y); + + // Build inverse permutation to update triangle indices + std::vector inv_perm(n_pts); + for (IntT i = 0; i < n_pts; ++i) { + inv_perm[perm[i]] = i; + } + + // Update triangle connectivity + for (IntT i = 0; i < static_cast(triangles.rows()); ++i) { + for (IntT j = 0; j < 3; ++j) { + triangles(i, j) = inv_perm[triangles(i, j)]; + } + } +} + +/*! + * \brief Check if two edges are equal (same point IDs). + */ +inline bool EdgesEqual(const std::array& a, const std::array& b) { + return a[0] == b[0] && a[1] == b[1]; +} + +/*! + * \brief Extract unique edges from triangles. + */ +inline void ExtractEdges(const Matrix3i& triangles, Matrix2i& edge_pts, Matrix2i& edge_faces) { + // Extract edges from triangles + std::vector> edges; + edges.resize(3 * triangles.rows()); + + for (IntT i_tri = 0; i_tri < static_cast(triangles.rows()); ++i_tri) { + for (IntT i = 0; i < 3; ++i) { + const IntT j = (i + 1) % 3; + const IntT i_pt = std::min(triangles(i_tri, i), triangles(i_tri, j)); + const IntT j_pt = std::max(triangles(i_tri, i), triangles(i_tri, j)); + edges[3 * i_tri + i] = {i_pt, j_pt, i_tri}; + } + } + + // Sort to identify duplicates + std::sort(edges.begin(), edges.end(), + [](const auto& a, const auto& b) { return a[0] != b[0] ? (a[0] < b[0]) : (a[1] < b[1]); }); + + // Count unique edges + IntT n_edges = 1; + for (IntT i = 1; i < static_cast(edges.size()); ++i) { + n_edges += static_cast(!EdgesEqual(edges[i], edges[i - 1])); + } + + // Map edge points and edge faces + edge_pts.resize(n_edges, 2); + edge_faces.resize(n_edges, 2); + IntT pos = 0; + + auto new_edge = [&](const auto& edge) { + edge_pts(pos, 0) = edge[0]; + edge_pts(pos, 1) = edge[1]; + edge_faces(pos, 0) = edge[2]; + edge_faces(pos, 1) = -1; + ++pos; + }; + + new_edge(edges[0]); + for (IntT i = 1; i < static_cast(edges.size()); ++i) { + if (EdgesEqual(edges[i], edges[i - 1])) { + edge_faces(pos - 1, 1) = edges[i][2]; + } else { + new_edge(edges[i]); + } + } +} + +/*! + * \brief Detects bands based on unique x-coordinates. + * Uses coarse banding to limit memory for large tables. + * Returns tuple of (n_bands, pt_to_band, x_bands). + */ +inline auto DetectBands(const VectorReal& x, IntT max_bands = 0) { + // Safety check: empty input + if (x.empty()) { + return std::make_tuple(IntT{0}, std::vector{}, VectorReal{}); + } + + // First pass: count unique x-coordinates + IntT n_unique = 1; + for (IntT i = 1; i < static_cast(x.size()); ++i) { + if (x[i] != x[i - 1]) n_unique++; + } + + // Determine if we need to use coarser bands + if (max_bands <= 0) { + max_bands = std::min(IntT{5000}, static_cast(4.0 * std::sqrt(static_cast(x.size())))); + } + + const bool use_coarse_bands = (n_unique > max_bands); + + if (!use_coarse_bands) { + // Original algorithm: use all unique x-coordinates as band boundaries + std::vector pt_to_band(x.size()); + pt_to_band[0] = 0; + IntT n_bands = 0; + + for (IntT i = 1; i < static_cast(x.size()); ++i) { + n_bands += static_cast(x[i] != x[i - 1]); + pt_to_band[i] = n_bands; + } + + VectorReal x_bands(n_bands + 1); + IntT pos = 0; + x_bands[pos] = x[0]; + for (IntT i = 1; i < static_cast(x.size()); ++i) { + if (x[i] != x_bands[pos]) { + x_bands[++pos] = x[i]; + } + } + + return std::make_tuple(n_bands, std::move(pt_to_band), std::move(x_bands)); + } else { + // Coarse banding: divide x-range into max_bands equal intervals + const RealT x_min = x.front(); + const RealT x_max = x.back(); + const RealT x_range = x_max - x_min; + + // Handle degenerate case where all x are the same + if (x_range < 1e-30) { + std::vector pt_to_band(x.size(), 0); + VectorReal x_bands = {x_min, x_max}; + return std::make_tuple(IntT{1}, std::move(pt_to_band), std::move(x_bands)); + } + + const IntT n_bands = max_bands; + const RealT band_width = x_range / max_bands; + + // Build band boundaries + VectorReal x_bands(max_bands + 1); + for (IntT i = 0; i <= max_bands; ++i) { + x_bands[i] = x_min + i * band_width; + } + x_bands[max_bands] = x_max; + + // Map each point to its band + std::vector pt_to_band(x.size()); + for (IntT i = 0; i < static_cast(x.size()); ++i) { + IntT band = static_cast(SU2_TYPE::GetValue((x[i] - x_min) / band_width)); + band = std::max(IntT{0}, std::min(band, max_bands - 1)); + pt_to_band[i] = band; + } + + return std::make_tuple(n_bands, std::move(pt_to_band), std::move(x_bands)); + } +} + +/*! + * \brief Builds the trapezoidal map for efficient 2D queries. + */ +inline void BuildTrapezoidalMap(const Matrix2i& edge_pts, const VectorReal& x, const VectorReal& y, + TrapezoidalMap& map) { + auto& x_bands = map.x_bands; + auto& offsets = map.offsets; + auto& edge_id = map.edge_id; + auto& edge_y = map.edge_y; + + const auto [n_bands, pt_to_band, bands] = DetectBands(x); + x_bands = std::move(bands); + + // Safety check + if (n_bands <= 0 || x_bands.size() < 2) { + x_bands.clear(); + offsets.clear(); + edge_id.clear(); + edge_y.clear(); + return; + } + + // Helper function to find band index for an x-coordinate + auto find_band = [&x_bands, n_bands](RealT x_val) -> IntT { + auto it = std::lower_bound(x_bands.begin(), x_bands.end(), x_val); + IntT idx = static_cast(it - x_bands.begin()); + idx = std::max(IntT{0}, idx - 1); + idx = std::min(idx, n_bands - 1); + return idx; + }; + + // Count edges per band + auto& counts = offsets; + counts.clear(); + counts.resize(n_bands + 1, 0); + + for (IntT i = 0; i < static_cast(edge_pts.rows()); ++i) { + const IntT pt_0 = edge_pts(i, 0); + const IntT pt_1 = edge_pts(i, 1); + const RealT x_0 = x[pt_0]; + const RealT x_1 = x[pt_1]; + + const IntT band_0 = find_band(x_0); + const IntT band_1 = find_band(x_1); + + const IntT min_band = std::min(band_0, band_1); + const IntT max_band = std::max(band_0, band_1); + + for (IntT j = min_band; j <= max_band && j < n_bands; ++j) { + ++counts[j + 1]; + } + } + + // Convert counts to offsets (CSR format) + for (IntT i = 2; i < static_cast(offsets.size()); ++i) { + offsets[i] += offsets[i - 1]; + } + + // Check memory requirement + const size_t total_entries = static_cast(offsets.back()); + const size_t memory_mb = total_entries * (sizeof(IntT) + sizeof(RealT)) / (1024 * 1024); + + if (memory_mb > 2048) { + x_bands.clear(); + offsets.clear(); + edge_id.clear(); + edge_y.clear(); + return; + } + + // Allocate storage + edge_id.resize(offsets.back()); + edge_y.resize(offsets.back()); + auto pos = offsets; + + // Store edges in each band + for (IntT i_edge = 0; i_edge < static_cast(edge_pts.rows()); ++i_edge) { + const IntT pt_0 = edge_pts(i_edge, 0); + const IntT pt_1 = edge_pts(i_edge, 1); + const RealT x_0 = x[pt_0], y_0 = y[pt_0]; + const RealT x_1 = x[pt_1], y_1 = y[pt_1]; + + const IntT band_0 = find_band(x_0); + const IntT band_1 = find_band(x_1); + + if (band_0 == band_1) continue; + + const IntT min_band = std::min(band_0, band_1); + const IntT max_band = std::max(band_0, band_1); + const RealT dy_dx = (y_1 - y_0) / (x_1 - x_0); + + for (IntT j = min_band; j < max_band && j < n_bands; ++j) { + if (pos[j] < static_cast(edge_id.size())) { + edge_id[pos[j]] = i_edge; + const RealT x_mid = (x_bands[j] + x_bands[j + 1]) / 2; + edge_y[pos[j]] = y_0 + dy_dx * (x_mid - x_0); + ++pos[j]; + } + } + } + + // Sort edges in each band by y coordinate + std::vector> tmp; + for (IntT i = 0; i < n_bands; ++i) { + const IntT begin = offsets[i]; + const IntT end = offsets[i + 1]; + if (begin >= end) continue; + + tmp.resize(end - begin); + for (auto k = begin; k < end; ++k) { + tmp[k - begin] = {edge_id[k], edge_y[k]}; + } + std::sort(tmp.begin(), tmp.end(), [](const auto& a, const auto& b) { return a.second < b.second; }); + for (auto k = begin; k < end; ++k) { + edge_id[k] = tmp[k - begin].first; + edge_y[k] = tmp[k - begin].second; + } + } +} + +/*! + * \brief Query the trapezoidal map for edges bounding a point. + * Returns pair of (edge_below, edge_above). Either can be -1 if at boundary. + */ +inline auto QueryTrapezoidalMap(const TrapezoidalMap& map, const Matrix2i& edge_pts, const VectorReal& x_coords, + const VectorReal& y_coords, const RealT& x, const RealT& y) { + // Safety check + if (map.x_bands.size() < 2 || map.offsets.empty()) { + return std::make_pair(IntT{-1}, IntT{-1}); + } + + const IntT n_bands = static_cast(map.x_bands.size()) - 1; + + // Find the band containing x + const auto& x_bands = map.x_bands; + auto it = std::lower_bound(x_bands.begin(), x_bands.end(), x); + IntT d = static_cast(it - x_bands.begin()); + IntT center_band = std::min(std::max(IntT{0}, d - 1), n_bands - 1); + + // Search in current band AND adjacent bands + RealT best_y_below = -1e300; + RealT best_y_above = 1e300; + IntT edge_below = -1; + IntT edge_above = -1; + + for (IntT band_offset = -1; band_offset <= 1; ++band_offset) { + IntT band_idx = center_band + band_offset; + if (band_idx < 0 || band_idx >= n_bands) continue; + if (band_idx + 1 >= static_cast(map.offsets.size())) continue; + + const IntT begin = map.offsets[band_idx]; + const IntT end = map.offsets[band_idx + 1]; + + if (begin < 0 || end > static_cast(map.edge_id.size()) || begin >= end) { + continue; + } + + for (IntT k = begin; k < end; ++k) { + const IntT e_id = map.edge_id[k]; + + const IntT p0 = edge_pts(e_id, 0); + const IntT p1 = edge_pts(e_id, 1); + const RealT x0 = x_coords[p0], y0 = y_coords[p0]; + const RealT x1 = x_coords[p1], y1 = y_coords[p1]; + + // Check if query x is within edge's x-extent + if (x < std::min(x0, x1) - 1e-10 || x > std::max(x0, x1) + 1e-10) { + continue; + } + + // Compute y-position of edge at query x + RealT edge_y_at_x; + const RealT dx = x1 - x0; + if (std::abs(SU2_TYPE::GetValue(dx)) < 1e-30) { + edge_y_at_x = (y0 + y1) / 2.0; + } else { + const RealT t = (x - x0) / dx; + edge_y_at_x = y0 + t * (y1 - y0); + } + + if (edge_y_at_x <= y + 1e-10 && edge_y_at_x > best_y_below) { + best_y_below = edge_y_at_x; + edge_below = e_id; + } + if (edge_y_at_x >= y - 1e-10 && edge_y_at_x < best_y_above) { + best_y_above = edge_y_at_x; + edge_above = e_id; + } + } + } + + return std::make_pair(edge_below, edge_above); +} + +/*! + * \brief Get triangles adjacent to two query edges (up to 3 triangles). + */ +inline auto AdjacentTriangles(const IntT edge_0, const IntT edge_1, const Matrix2i& edge_faces) { + std::array tris = {-1, -1, -1}; + IntT pos = 0; + + auto insert = [&tris, &pos](const IntT t) { + if (t < 0) return; + for (IntT i = 0; i < pos; ++i) { + if (t == tris[i]) return; + } + tris[pos++] = t; + }; + + auto get_tris = [&edge_faces](const IntT e) { + if (e < 0) return std::array{IntT{-1}, IntT{-1}}; + return std::array{edge_faces(e, 0), edge_faces(e, 1)}; + }; + + for (const auto e : {edge_0, edge_1}) { + for (const auto t : get_tris(e)) { + insert(t); + } + } + return tris; +} + +/*! + * \brief Compute barycentric coordinates of point (x, y) in triangle. + */ +inline auto TriangleCoords(const IntT i_tri, const Matrix3i& triangles, const VectorReal& x, const VectorReal& y, + const RealT x_q, const RealT y_q) { + const IntT p0 = triangles(i_tri, 0); + const IntT p1 = triangles(i_tri, 1); + const IntT p2 = triangles(i_tri, 2); + + const RealT x0 = x[p0], y0 = y[p0]; + const RealT x1 = x[p1], y1 = y[p1]; + const RealT x2 = x[p2], y2 = y[p2]; + + const RealT dx1 = x1 - x0, dy1 = y1 - y0; + const RealT dx2 = x2 - x0, dy2 = y2 - y0; + + auto cross = [](const RealT ux, const RealT uy, const RealT vx, const RealT vy) { return ux * vy - uy * vx; }; + + const RealT det = cross(dx1, dy1, dx2, dy2); + if (std::abs(SU2_TYPE::GetValue(det)) < 1e-30) { + return std::array{RealT{0}, RealT{0}, RealT{0}}; + } + + const RealT inv_det = 1.0 / det; + const RealT a = (cross(x_q, y_q, dx2, dy2) - cross(x0, y0, dx2, dy2)) * inv_det; + const RealT b = (cross(x0, y0, dx1, dy1) - cross(x_q, y_q, dx1, dy1)) * inv_det; + + return std::array{1 - a - b, a, b}; +} + +/*! + * \brief Check if point is inside triangle. + */ +inline bool InTriangle(const std::array& coords, const RealT tol = 0.0) { + return coords[0] >= -tol && coords[1] >= -tol && coords[2] >= -tol; +} + +/*! + * \brief Find triangle containing point using the trapezoidal map. + */ +inline IntT FindTriangle(const TrapezoidalMap& map, const Matrix3i& triangles, const Matrix2i& edge_pts, + const Matrix2i& edge_faces, const VectorReal& x, const VectorReal& y, const RealT x_q, + const RealT y_q, std::array& bary_out) { + const auto [e_below, e_above] = QueryTrapezoidalMap(map, edge_pts, x, y, x_q, y_q); + const auto candidates = AdjacentTriangles(e_below, e_above, edge_faces); + + const RealT tol = 1e-12; + for (const auto t : candidates) { + if (t < 0) continue; + + const auto coords = TriangleCoords(t, triangles, x, y, x_q, y_q); + if (InTriangle(coords, tol)) { + bary_out = coords; + return t; + } + } + + bary_out = {0.0, 0.0, 0.0}; + return -1; +} + +} // namespace su2_lut + +/*! + * \class CTrapezoidalMapFAST + * \brief Wrapper class for SU2 integration. + */ +class CTrapezoidalMapFAST { + private: + su2_lut::Matrix3i triangles; + su2_lut::Matrix2i edge_pts, edge_faces; + su2_lut::VectorReal x_coords, y_coords; + su2_lut::TrapezoidalMap map; + + unsigned long n_points; + unsigned long n_triangles; + bool build_successful; + + public: + CTrapezoidalMapFAST() : n_points(0), n_triangles(0), build_successful(false) {} + + /*! + * \brief Build the trapezoidal map from mesh data. + */ + bool Build(unsigned long num_points, unsigned long num_triangles, const su2double* x, const su2double* y, + const unsigned long* connectivity) { + n_points = num_points; + n_triangles = num_triangles; + build_successful = false; + + if (num_points == 0 || num_triangles == 0) { + x_coords.clear(); + y_coords.clear(); + triangles.resize(0, 3); + edge_pts.resize(0, 2); + edge_faces.resize(0, 2); + map.x_bands.clear(); + map.offsets.clear(); + map.edge_id.clear(); + map.edge_y.clear(); + return false; + } + + // Copy coordinates + x_coords.assign(x, x + num_points); + y_coords.assign(y, y + num_points); + + // Copy triangle connectivity + triangles.resize(num_triangles, 3); + for (size_t i = 0; i < num_triangles; ++i) { + unsigned long v0 = connectivity[3 * i + 0]; + unsigned long v1 = connectivity[3 * i + 1]; + unsigned long v2 = connectivity[3 * i + 2]; + + if (v0 >= num_points || v1 >= num_points || v2 >= num_points) { + triangles(i, 0) = 0; + triangles(i, 1) = 0; + triangles(i, 2) = 0; + } else { + triangles(i, 0) = static_cast(v0); + triangles(i, 1) = static_cast(v1); + triangles(i, 2) = static_cast(v2); + } + } + + // Build the map + su2_lut::ReorderPoints(triangles, x_coords, y_coords); + su2_lut::ExtractEdges(triangles, edge_pts, edge_faces); + su2_lut::BuildTrapezoidalMap(edge_pts, x_coords, y_coords, map); + + if (map.x_bands.empty() || map.offsets.empty()) { + build_successful = false; + return false; + } + + build_successful = true; + return true; + } + + bool IsBuilt() const { return build_successful; } + + /*! + * \brief Find the triangle containing a query point. + */ + bool FindTriangle(su2double val_x, su2double val_y, unsigned long& triangle_id, + std::array& bary_coords) const { + if (n_triangles == 0 || n_points == 0 || map.x_bands.empty()) { + bary_coords = {0.0, 0.0, 0.0}; + return false; + } + + std::array bary; + su2_lut::IntT tri_id = + su2_lut::FindTriangle(map, triangles, edge_pts, edge_faces, x_coords, y_coords, val_x, val_y, bary); + + if (tri_id >= 0) { + triangle_id = static_cast(tri_id); + bary_coords = {bary[0], bary[1], bary[2]}; + return true; + } + return false; + } + + unsigned long GetNTriangles() const { return n_triangles; } + unsigned long GetNPoints() const { return n_points; } + unsigned long GetNEdges() const { return edge_pts.rows(); } + unsigned long GetNBands() const { return map.x_bands.size() > 0 ? map.x_bands.size() - 1 : 0; } +}; + +#endif // CTRAPEZOIDALMAP_FAST_HPP diff --git a/Common/include/option_structure.hpp b/Common/include/option_structure.hpp index 202062fd243d..2e24be709da1 100644 --- a/Common/include/option_structure.hpp +++ b/Common/include/option_structure.hpp @@ -599,12 +599,14 @@ MakePair("ONESPECIES", ONESPECIES) */ enum class ENUM_DATADRIVEN_METHOD { LUT = 0, - MLP = 1 + MLP = 1, + LUT_FAST = 2 /* New LUT implementation based on Pedro's code */ }; static const MapType DataDrivenMethod_Map = { MakePair("LUT", ENUM_DATADRIVEN_METHOD::LUT) MakePair("MLP", ENUM_DATADRIVEN_METHOD::MLP) + MakePair("LUT_FAST", ENUM_DATADRIVEN_METHOD::LUT_FAST) }; /*! diff --git a/Common/src/containers/CLookUpTable.cpp b/Common/src/containers/CLookUpTable.cpp index 98722a6b5b28..005ea10d17c6 100644 --- a/Common/src/containers/CLookUpTable.cpp +++ b/Common/src/containers/CLookUpTable.cpp @@ -46,10 +46,7 @@ CLookUpTable::CLookUpTable(const string& var_file_name_lut, string name_CV1_in, FindTableLimits(name_CV1, name_CV2); - if (rank == MASTER_NODE) - cout << "Detecting all unique edges and setting edge to triangle connectivity " - "..." - << endl; + if (rank == MASTER_NODE) cout << "Detecting all unique edges and setting edge to triangle connectivity ..." << endl; IdentifyUniqueEdges(); @@ -60,18 +57,25 @@ CLookUpTable::CLookUpTable(const string& var_file_name_lut, string name_CV1_in, /* Add additional variable index which will always result in zero when looked up. */ idx_null = names_var.size(); + /*--- NOTE: Trapezoidal map is NOT built here anymore! ---*/ + /*--- Call EnableFastTrapMap() for LUT_FAST (efficient O(n) memory) ---*/ + /*--- Call BuildOriginalTrapMap() for standard LUT (original SU2 implementation) ---*/ + + ComputeInterpCoeffs(); + + if (rank == MASTER_NODE) cout << "LUT fluid model ready for use" << endl; +} + +void CLookUpTable::BuildOriginalTrapMap() { + /*--- Build the original SU2 trapezoidal map (DAG-based, O(n log n) memory) ---*/ + /*--- WARNING: This can use a lot of memory for large tables! ---*/ + if (rank == MASTER_NODE) switch (table_dim) { case 2: - cout << "Building a trapezoidal map for the (" + name_CV1 + ", " + name_CV2 + - ") " - "space ..." - << endl; + cout << "Building a trapezoidal map for the (" + name_CV1 + ", " + name_CV2 + ") space ..." << endl; break; case 3: - cout << "Building trapezoidal map stack for the (" + name_CV1 + ", " + name_CV2 + - ") " - "space ..." - << endl; + cout << "Building trapezoidal map stack for the (" + name_CV1 + ", " + name_CV2 + ") space ..." << endl; break; default: break; @@ -82,11 +86,13 @@ CLookUpTable::CLookUpTable(const string& var_file_name_lut, string name_CV1_in, unsigned short barwidth = 65; bool display_map_info = (n_table_levels < 2); double tmap_memory_footprint = 0; + for (auto i_level = 0ul; i_level < n_table_levels; i_level++) { trap_map_x_y[i_level] = CTrapezoidalMap(GetDataP(name_CV1, i_level), GetDataP(name_CV2, i_level), table_data[i_level].cols(), edges[i_level], edge_to_triangle[i_level], display_map_info); tmap_memory_footprint += trap_map_x_y[i_level].GetMemoryFootprint(); + /* Display a progress bar to monitor table generation process */ if (rank == MASTER_NODE) { su2double progress = su2double(i_level) / n_table_levels; @@ -114,10 +120,68 @@ CLookUpTable::CLookUpTable(const string& var_file_name_lut, string name_CV1_in, cout << "Trapezoidal map memory footprint: " << tmap_memory_footprint << " MB\n"; cout << "Table data memory footprint: " << memory_footprint_data << " MB\n" << endl; } +} - ComputeInterpCoeffs(); +void CLookUpTable::EnableFastTrapMap() { + /*--- Build Memory-efficient trapezoidal map (band-based, O(n) memory) ---*/ - if (rank == MASTER_NODE) cout << "LUT fluid model ready for use" << endl; + use_fast_trap_map_ = true; + + trap_map_x_y_FAST.resize(n_table_levels); + + if (rank == MASTER_NODE) { + cout << endl; + cout << "+--------------------------------------------------+" << endl; + cout << "| Building Memory-efficient trapezoidal maps |" << endl; + cout << "| (O(n) memory, O(log n) queries) |" << endl; + cout << "+--------------------------------------------------+" << endl; + } + + su2double startTime = SU2_MPI::Wtime(); + + for (unsigned long i_level = 0; i_level < n_table_levels; ++i_level) { + const auto n_pts = n_points[i_level]; + const auto n_tris = n_triangles[i_level]; + + if (rank == MASTER_NODE) { + cout << " Level " << i_level << ": " << n_pts << " points, " << n_tris << " triangles ... " << flush; + } + + // Get point coordinates + std::vector x_coords(n_pts); + std::vector y_coords(n_pts); + + for (unsigned long i_point = 0; i_point < n_pts; ++i_point) { + x_coords[i_point] = table_data[i_level][idx_CV1][i_point]; + y_coords[i_point] = table_data[i_level][idx_CV2][i_point]; + } + + // Get triangle connectivity + std::vector tri_conn(3 * n_tris); + for (unsigned long i_tri = 0; i_tri < n_tris; ++i_tri) { + tri_conn[3 * i_tri + 0] = triangles[i_level][i_tri][0]; + tri_conn[3 * i_tri + 1] = triangles[i_level][i_tri][1]; + tri_conn[3 * i_tri + 2] = triangles[i_level][i_tri][2]; + } + + // Build FAST trapezoidal map for this level + trap_map_x_y_FAST[i_level].Build(n_pts, n_tris, x_coords.data(), y_coords.data(), tri_conn.data()); + + if (rank == MASTER_NODE) { + cout << "done." << endl; + } + } + + su2double stopTime = SU2_MPI::Wtime(); + + if (rank == MASTER_NODE) { + cout << "+--------------------------------------------------+" << endl; + cout << "| FAST trapezoidal maps built successfully! |" << endl; + cout << "| Build time: " << setw(10) << setprecision(3) << (stopTime - startTime) << " seconds |" + << endl; + cout << "+--------------------------------------------------+" << endl; + cout << endl; + } } void CLookUpTable::LoadTableRaw(const string& var_file_name_lut) { @@ -617,17 +681,30 @@ bool CLookUpTable::LookUp_XY(const vector& idx_var, vector= *limits_table_x[iLevel].first && val_CV1 <= *limits_table_x[iLevel].second) && - (val_CV2 >= *limits_table_y[iLevel].first && val_CV2 <= *limits_table_y[iLevel].second)) { - /* if so, try to find the triangle that holds the (prog, enth) point */ - id_triangle = trap_map_x_y[iLevel].GetTriangle(val_CV1, val_CV2); - - /* check if point is inside a triangle (if table domain is non-rectangular, - * the previous range check might be true but the point could still be outside of the domain) */ - return IsInTriangle(val_CV1, val_CV2, id_triangle, iLevel); + /*--- Use Memory-efficient trapezoidal map if enabled (LUT_FAST) ---*/ + if (use_fast_trap_map_ && iLevel < trap_map_x_y_FAST.size()) { + std::array bary_coords; + if (trap_map_x_y_FAST[iLevel].FindTriangle(val_CV1, val_CV2, id_triangle, bary_coords)) { + return true; + } + // Fall through to bounds check if not found (point may be outside domain) } + + /*--- Use original trapezoidal map if available ---*/ + if (!use_fast_trap_map_ && iLevel < trap_map_x_y.size() && trap_map_x_y.size() > 0) { + /* check if x value is in table x-dimension range + * and if y is in table y-dimension table range */ + if ((val_CV1 >= *limits_table_x[iLevel].first && val_CV1 <= *limits_table_x[iLevel].second) && + (val_CV2 >= *limits_table_y[iLevel].first && val_CV2 <= *limits_table_y[iLevel].second)) { + /* if so, try to find the triangle that holds the (prog, enth) point */ + id_triangle = trap_map_x_y[iLevel].GetTriangle(val_CV1, val_CV2); + + /* check if point is inside a triangle (if table domain is non-rectangular, + * the previous range check might be true but the point could still be outside of the domain) */ + return IsInTriangle(val_CV1, val_CV2, id_triangle, iLevel); + } + } + return false; } diff --git a/SU2_CFD/src/fluid/CFluidFlamelet.cpp b/SU2_CFD/src/fluid/CFluidFlamelet.cpp index f6fd529df832..12ae1a9dc54b 100644 --- a/SU2_CFD/src/fluid/CFluidFlamelet.cpp +++ b/SU2_CFD/src/fluid/CFluidFlamelet.cpp @@ -55,7 +55,8 @@ CFluidFlamelet::CFluidFlamelet(CConfig* config, su2double value_pressure_operati scalars_vector.resize(n_scalars); table_scalar_names.resize(n_scalars); - for (auto iCV = 0u; iCV < n_control_vars; iCV++) table_scalar_names[iCV] = flamelet_options.controlling_variable_names[iCV]; + for (auto iCV = 0u; iCV < n_control_vars; iCV++) + table_scalar_names[iCV] = flamelet_options.controlling_variable_names[iCV]; /*--- auxiliary species transport equations---*/ for (auto i_aux = 0u; i_aux < n_user_scalars; i_aux++) { @@ -64,10 +65,11 @@ CFluidFlamelet::CFluidFlamelet(CConfig* config, su2double value_pressure_operati controlling_variable_names.resize(n_control_vars); for (auto iCV = 0u; iCV < n_control_vars; iCV++) - controlling_variable_names[iCV] =flamelet_options.controlling_variable_names[iCV]; + controlling_variable_names[iCV] = flamelet_options.controlling_variable_names[iCV]; passive_specie_names.resize(n_user_scalars); - for (auto i_aux = 0u; i_aux < n_user_scalars; i_aux++) passive_specie_names[i_aux] = flamelet_options.user_scalar_names[i_aux]; + for (auto i_aux = 0u; i_aux < n_user_scalars; i_aux++) + passive_specie_names[i_aux] = flamelet_options.user_scalar_names[i_aux]; switch (Kind_DataDriven_Method) { case ENUM_DATADRIVEN_METHOD::LUT: @@ -78,7 +80,23 @@ CFluidFlamelet::CFluidFlamelet(CConfig* config, su2double value_pressure_operati } look_up_table = new CLookUpTable(datadriven_fluid_options.datadriven_filenames[0], table_scalar_names[I_PROGVAR], table_scalar_names[I_ENTH]); + /*--- Build the original trapezoidal map (can use lots of memory!) ---*/ + look_up_table->BuildOriginalTrapMap(); + break; + + case ENUM_DATADRIVEN_METHOD::LUT_FAST: + if (rank == MASTER_NODE) { + cout << "**************************************************" << endl; + cout << "*** initializing the FAST lookup table ***" << endl; + cout << "*** (Memory-efficient trapezoidal map) ***" << endl; + cout << "**************************************************" << endl; + } + look_up_table = new CLookUpTable(datadriven_fluid_options.datadriven_filenames[0], table_scalar_names[I_PROGVAR], + table_scalar_names[I_ENTH]); + /*--- Build Memory-efficient trapezoidal map (O(n) memory) ---*/ + look_up_table->EnableFastTrapMap(); break; + default: if (rank == MASTER_NODE) { cout << "***********************************************" << endl; @@ -86,7 +104,8 @@ CFluidFlamelet::CFluidFlamelet(CConfig* config, su2double value_pressure_operati cout << "***********************************************" << endl; } #ifdef USE_MLPCPP - lookup_mlp = new MLPToolbox::CLookUp_ANN(datadriven_fluid_options.n_filenames, datadriven_fluid_options.datadriven_filenames); + lookup_mlp = new MLPToolbox::CLookUp_ANN(datadriven_fluid_options.n_filenames, + datadriven_fluid_options.datadriven_filenames); if ((rank == MASTER_NODE)) lookup_mlp->DisplayNetworkInfo(); #else SU2_MPI::Error("SU2 was not compiled with MLPCpp enabled (-Denable-mlpcpp=true).", CURRENT_FUNCTION); @@ -104,8 +123,10 @@ CFluidFlamelet::CFluidFlamelet(CConfig* config, su2double value_pressure_operati } CFluidFlamelet::~CFluidFlamelet() { - if (Kind_DataDriven_Method == ENUM_DATADRIVEN_METHOD::LUT) + if (Kind_DataDriven_Method == ENUM_DATADRIVEN_METHOD::LUT || + Kind_DataDriven_Method == ENUM_DATADRIVEN_METHOD::LUT_FAST) delete look_up_table; + #ifdef USE_MLPCPP if (Kind_DataDriven_Method == ENUM_DATADRIVEN_METHOD::MLP) { delete iomap_TD; @@ -113,7 +134,7 @@ CFluidFlamelet::~CFluidFlamelet() { delete iomap_LookUp; delete lookup_mlp; if (preferential_diffusion) delete iomap_PD; - } + } #endif } @@ -184,8 +205,7 @@ void CFluidFlamelet::PreprocessLookUp(CConfig* config) { size_t n_sources = n_control_vars + 2 * n_user_scalars; varnames_Sources.resize(n_sources); val_vars_Sources.resize(n_sources); - for (auto iCV = 0u; iCV < n_control_vars; iCV++) - varnames_Sources[iCV] = flamelet_options.cv_source_names[iCV]; + for (auto iCV = 0u; iCV < n_control_vars; iCV++) varnames_Sources[iCV] = flamelet_options.cv_source_names[iCV]; /*--- No source term for enthalpy ---*/ /*--- For the auxiliary equations, we use a positive (production) and a negative (consumption) term: @@ -206,7 +226,8 @@ void CFluidFlamelet::PreprocessLookUp(CConfig* config) { } else { varnames_LookUp.resize(n_lookups); val_vars_LookUp.resize(n_lookups); - for (auto iLookup = 0u; iLookup < n_lookups; iLookup++) varnames_LookUp[iLookup] = flamelet_options.lookup_names[iLookup]; + for (auto iLookup = 0u; iLookup < n_lookups; iLookup++) + varnames_LookUp[iLookup] = flamelet_options.lookup_names[iLookup]; } /*--- Preferential diffusion scalars ---*/ @@ -226,7 +247,10 @@ void CFluidFlamelet::PreprocessLookUp(CConfig* config) { preferential_diffusion = flamelet_options.preferential_diffusion; switch (Kind_DataDriven_Method) { case ENUM_DATADRIVEN_METHOD::LUT: - preferential_diffusion = look_up_table->CheckForVariables(varnames_PD); + case ENUM_DATADRIVEN_METHOD::LUT_FAST: + if (preferential_diffusion) { + preferential_diffusion = look_up_table->CheckForVariables(varnames_PD); + } break; case ENUM_DATADRIVEN_METHOD::MLP: #ifdef USE_MLPCPP @@ -259,10 +283,10 @@ void CFluidFlamelet::PreprocessLookUp(CConfig* config) { } #endif } else { - for (auto iVar=0u; iVar < varnames_TD.size(); iVar++) { + for (auto iVar = 0u; iVar < varnames_TD.size(); iVar++) { LUT_idx_TD.push_back(look_up_table->GetIndexOfVar(varnames_TD[iVar])); } - for (auto iVar=0u; iVar < varnames_Sources.size(); iVar++) { + for (auto iVar = 0u; iVar < varnames_Sources.size(); iVar++) { unsigned long LUT_idx; if (noSource(varnames_Sources[iVar])) { LUT_idx = look_up_table->GetNullIndex(); @@ -271,16 +295,16 @@ void CFluidFlamelet::PreprocessLookUp(CConfig* config) { } LUT_idx_Sources.push_back(LUT_idx); } - for (auto iVar=0u; iVar < varnames_LookUp.size(); iVar++) { + for (auto iVar = 0u; iVar < varnames_LookUp.size(); iVar++) { unsigned long LUT_idx; if (noSource(varnames_LookUp[iVar])) LUT_idx = look_up_table->GetNullIndex(); - else + else LUT_idx = look_up_table->GetIndexOfVar(varnames_LookUp[iVar]); LUT_idx_LookUp.push_back(LUT_idx); } if (preferential_diffusion) { - for (auto iVar=0u; iVar < varnames_PD.size(); iVar++) { + for (auto iVar = 0u; iVar < varnames_PD.size(); iVar++) { LUT_idx_PD.push_back(look_up_table->GetIndexOfVar(varnames_PD[iVar])); } } @@ -291,7 +315,7 @@ unsigned long CFluidFlamelet::EvaluateDataSet(const vector& input_sca vector& output_refs) { AD::StartPreacc(); for (auto iVar = 0u; iVar < input_scalar.size(); iVar++) AD::SetPreaccIn(input_scalar[iVar]); - + su2double val_enth = input_scalar[I_ENTH]; su2double val_prog = input_scalar[I_PROGVAR]; su2double val_mixfrac = include_mixture_fraction ? input_scalar[I_MIXFRAC] : 0.0; @@ -326,12 +350,12 @@ unsigned long CFluidFlamelet::EvaluateDataSet(const vector& input_sca default: break; } - /*--- Add all quantities and their names to the look up vectors. ---*/ bool inside; switch (Kind_DataDriven_Method) { case ENUM_DATADRIVEN_METHOD::LUT: + case ENUM_DATADRIVEN_METHOD::LUT_FAST: if (output_refs.size() != LUT_idx.size()) SU2_MPI::Error(string("Output vector size incompatible with manifold lookup operation."), CURRENT_FUNCTION); if (include_mixture_fraction) { @@ -339,8 +363,10 @@ unsigned long CFluidFlamelet::EvaluateDataSet(const vector& input_sca } else { inside = look_up_table->LookUp_XY(LUT_idx, output_refs, val_prog, val_enth); } - if (inside) extrapolation = 0; - else extrapolation = 1; + if (inside) + extrapolation = 0; + else + extrapolation = 1; break; case ENUM_DATADRIVEN_METHOD::MLP: refs_vars.resize(output_refs.size()); diff --git a/UnitTests/Common/containers/CLookupTable_tests.cpp b/UnitTests/Common/containers/CLookupTable_tests.cpp index 4b9f06eb9469..fa5668bdc6ac 100644 --- a/UnitTests/Common/containers/CLookupTable_tests.cpp +++ b/UnitTests/Common/containers/CLookupTable_tests.cpp @@ -35,11 +35,12 @@ #include "../../../Common/include/containers/CLookUpTable.hpp" #include "../../../Common/include/containers/CFileReaderLUT.hpp" +/*--- Original 2D LUT test (using default trapezoidal map) ---*/ TEST_CASE("LUTreader", "[tabulated chemistry]") { /*--- smaller and trivial lookup table ---*/ CLookUpTable look_up_table("src/SU2/UnitTests/Common/containers/lookuptable.drg", "ProgressVariable", "EnthalpyTot"); - + look_up_table.BuildOriginalTrapMap(); /*--- string names of the controlling variables ---*/ string name_CV1 = "ProgressVariable"; @@ -84,11 +85,122 @@ TEST_CASE("LUTreader", "[tabulated chemistry]") { CHECK(look_up_dat == Approx(1.1738796125)); } +/*--- Original 3D LUT test (using default trapezoidal map) ---*/ TEST_CASE("LUTreader_3D", "[tabulated chemistry]") { /*--- smaller and trivial lookup table ---*/ CLookUpTable look_up_table("src/SU2/UnitTests/Common/containers/lookuptable_3D.drg", "ProgressVariable", "EnthalpyTot"); + look_up_table.BuildOriginalTrapMap(); + /*--- string names of the controlling variables ---*/ + + string name_CV1 = "ProgressVariable"; + string name_CV2 = "EnthalpyTot"; + + /*--- look up a single value for density ---*/ + + su2double prog = 0.55; + su2double enth = -0.5; + su2double mfrac = 0.5; + string look_up_tag = "Density"; + unsigned long idx_tag = look_up_table.GetIndexOfVar(look_up_tag); + su2double look_up_dat; + look_up_table.LookUp_XYZ(idx_tag, &look_up_dat, prog, enth, mfrac); + CHECK(look_up_dat == Approx(1.02)); + + /*--- look up a single value for viscosity ---*/ + + prog = 0.6; + enth = 0.9; + mfrac = 0.8; + look_up_tag = "Viscosity"; + idx_tag = look_up_table.GetIndexOfVar(look_up_tag); + look_up_table.LookUp_XYZ(idx_tag, &look_up_dat, prog, enth, mfrac); + CHECK(look_up_dat == Approx(0.0000674286)); + + /* find the table limits */ + + auto limitsEnth = look_up_table.GetTableLimitsY(); + CHECK(SU2_TYPE::GetValue(*limitsEnth.first) == Approx(-1.0)); + CHECK(SU2_TYPE::GetValue(*limitsEnth.second) == Approx(1.0)); + + auto limitsProgvar = look_up_table.GetTableLimitsX(); + CHECK(SU2_TYPE::GetValue(*limitsProgvar.first) == Approx(0.0)); + CHECK(SU2_TYPE::GetValue(*limitsProgvar.second) == Approx(1.0)); + + /* lookup value outside of lookup table */ + + prog = 1.10; + enth = 1.1; + mfrac = 2.0; + look_up_tag = "Density"; + idx_tag = look_up_table.GetIndexOfVar(look_up_tag); + look_up_table.LookUp_XYZ(idx_tag, &look_up_dat, prog, enth, mfrac); + CHECK(look_up_dat == Approx(1.1738796125)); +} + +/*--- 2D LUT test using Pedro's FAST trapezoidal map (LUT_FAST) ---*/ +TEST_CASE("LUTreader_FAST", "[tabulated chemistry]") { + /*--- smaller and trivial lookup table ---*/ + + CLookUpTable look_up_table("src/SU2/UnitTests/Common/containers/lookuptable.drg", "ProgressVariable", "EnthalpyTot"); + + /*--- Enable Pedro's FAST trapezoidal map ---*/ + look_up_table.EnableFastTrapMap(); + + /*--- string names of the controlling variables ---*/ + + string name_CV1 = "ProgressVariable"; + string name_CV2 = "EnthalpyTot"; + + /*--- look up a single value for density ---*/ + + su2double prog = 0.55; + su2double enth = -0.5; + string look_up_tag = "Density"; + unsigned long idx_tag = look_up_table.GetIndexOfVar(look_up_tag); + su2double look_up_dat; + look_up_table.LookUp_XY(idx_tag, &look_up_dat, prog, enth); + CHECK(look_up_dat == Approx(1.02)); + + /*--- look up a single value for viscosity ---*/ + + prog = 0.6; + enth = 0.9; + look_up_tag = "Viscosity"; + idx_tag = look_up_table.GetIndexOfVar(look_up_tag); + look_up_table.LookUp_XY(idx_tag, &look_up_dat, prog, enth); + CHECK(look_up_dat == Approx(0.0000674286)); + + /* find the table limits */ + + auto limitsEnth = look_up_table.GetTableLimitsY(); + CHECK(SU2_TYPE::GetValue(*limitsEnth.first) == Approx(-1.0)); + CHECK(SU2_TYPE::GetValue(*limitsEnth.second) == Approx(1.0)); + + auto limitsProgvar = look_up_table.GetTableLimitsX(); + CHECK(SU2_TYPE::GetValue(*limitsProgvar.first) == Approx(0.0)); + CHECK(SU2_TYPE::GetValue(*limitsProgvar.second) == Approx(1.0)); + + /* lookup value outside of lookup table */ + + prog = 1.10; + enth = 1.1; + look_up_tag = "Density"; + idx_tag = look_up_table.GetIndexOfVar(look_up_tag); + look_up_table.LookUp_XY(idx_tag, &look_up_dat, prog, enth); + CHECK(look_up_dat == Approx(1.1738796125)); +} + +/*--- 3D LUT test using Pedro's FAST trapezoidal map (LUT_FAST) ---*/ +TEST_CASE("LUTreader_3D_FAST", "[tabulated chemistry]") { + /*--- smaller and trivial lookup table ---*/ + + CLookUpTable look_up_table("src/SU2/UnitTests/Common/containers/lookuptable_3D.drg", "ProgressVariable", + "EnthalpyTot"); + + /*--- Enable Pedro's FAST trapezoidal map ---*/ + look_up_table.EnableFastTrapMap(); /*--- string names of the controlling variables ---*/