diff --git a/CMakeLists.txt b/CMakeLists.txt index 67e2337210..0c42fbdeb2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -136,6 +136,7 @@ set(BOUT_SOURCES ./include/bout/field_accessor.hxx ./include/bout/field_data.hxx ./include/bout/field_factory.hxx + ./include/bout/fieldops.hxx ./include/bout/fieldgroup.hxx ./include/bout/fieldperp.hxx ./include/bout/fv_ops.hxx @@ -206,6 +207,7 @@ set(BOUT_SOURCES ./include/bout/template_combinations.hxx ./include/bout/tokamak_coordinates.hxx ./include/bout/traits.hxx + ./include/bout/twiddle.hxx ./include/bout/unused.hxx ./include/bout/utils.hxx ./include/bout/vecops.hxx @@ -430,10 +432,17 @@ if(BOUT_GENERATE_FIELDOPS) "clang-format not found, but you have requested to generate code!" ) endif() + if(BOUT_ENABLE_RAJA) + set(GEN_LOOP_EXEC "raja") + elseif(BOUT_ENABLE_OPENMP) + set(GEN_LOOP_EXEC "openmp") + else() + set(GEN_LOOP_EXEC "serial") + endif() add_custom_command( OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/src/field/generated_fieldops.cxx - COMMAND ${Python3_EXECUTABLE} gen_fieldops.py --filename - generated_fieldops.cxx.tmp + COMMAND ${Python3_EXECUTABLE} gen_fieldops.py --loop-exec ${GEN_LOOP_EXEC} + --filename generated_fieldops.cxx.tmp COMMAND ${ClangFormat_BIN} generated_fieldops.cxx.tmp -i COMMAND ${CMAKE_COMMAND} -E rename generated_fieldops.cxx.tmp generated_fieldops.cxx diff --git a/README.md b/README.md index c9e76a1ff4..84062638fa 100644 --- a/README.md +++ b/README.md @@ -62,7 +62,7 @@ Homepage found at [http://boutproject.github.io/](http://boutproject.github.io/) BOUT++ needs the following: -* A C++17 compiler +* A C++20 compiler * MPI * NetCDF @@ -113,7 +113,7 @@ You can convert the CITATION.cff file into a Bibtex file as follows: See [CONTRIBUTING.md](CONTRIBUTING.md) and the [manual page](https://bout-dev.readthedocs.io/en/stable/developer_docs/contributing.html) ## License -Copyright 2010-2024 BOUT++ contributors +Copyright 2010-2026 BOUT++ contributors BOUT++ is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by diff --git a/examples/elm-pb-outerloop/elm_pb_outerloop.cxx b/examples/elm-pb-outerloop/elm_pb_outerloop.cxx index 22a7eaa295..dbad416281 100644 --- a/examples/elm-pb-outerloop/elm_pb_outerloop.cxx +++ b/examples/elm-pb-outerloop/elm_pb_outerloop.cxx @@ -1008,7 +1008,8 @@ class ELMpb : public PhysicsModel { vacuum_trans *= pnorm; // Transitions from 0 in core to 1 in vacuum - vac_mask = (1.0 - tanh((P0 - vacuum_pressure) / vacuum_trans)) / 2.0; + Field2D tanh_res = tanh((P0 - vacuum_pressure) / vacuum_trans); + vac_mask = (1.0 - tanh_res) / 2.0; if (spitzer_resist) { // Use Spitzer resistivity @@ -1169,7 +1170,7 @@ class ELMpb : public PhysicsModel { // Only if not restarting: Check initial perturbation // Set U to zero where P0 < vacuum_pressure - U = where(P0 - vacuum_pressure, U, 0.0); + U = where(Field2D{P0 - vacuum_pressure}, U, 0.0); if (constn0) { ubyn = U; @@ -1796,7 +1797,8 @@ class ELMpb : public PhysicsModel { ddt(U) -= 0.5 * Upara2 * bracket(Pi0, Dperp2Phi, bm_exb) / B0; Field3D B0phi = B0 * phi; mesh->communicate(B0phi); - Field3D B0phi0 = B0 * phi0; + Field2D res = B0 * phi0; + Field3D B0phi0 = res; mesh->communicate(B0phi0); ddt(U) += 0.5 * Upara2 * bracket(B0phi, Dperp2Pi0, bm_exb) / B0; ddt(U) += 0.5 * Upara2 * bracket(B0phi0, Dperp2Pi, bm_exb) / B0; diff --git a/examples/elm-pb/elm_pb.cxx b/examples/elm-pb/elm_pb.cxx index 9eb7396987..62cc970869 100644 --- a/examples/elm-pb/elm_pb.cxx +++ b/examples/elm-pb/elm_pb.cxx @@ -6,9 +6,14 @@ *******************************************************************************/ #include +#include +#include #include +#include #include #include +#include +#include #include #include #include @@ -16,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -246,8 +252,8 @@ class ELMpb : public PhysicsModel { std::unique_ptr phiSolver{nullptr}; std::unique_ptr aparSolver{nullptr}; - const Field2D N0tanh(BoutReal n0_height, BoutReal n0_ave, BoutReal n0_width, - BoutReal n0_center, BoutReal n0_bottom_x) { + Field2D N0tanh(BoutReal n0_height, BoutReal n0_ave, BoutReal n0_width, + BoutReal n0_center, BoutReal n0_bottom_x) { Field2D result; result.allocate(); @@ -1138,7 +1144,7 @@ class ELMpb : public PhysicsModel { // Only if not restarting: Check initial perturbation // Set U to zero where P0 < vacuum_pressure - U = where(P0 - vacuum_pressure, U, 0.0); + U = where(Field2D{P0 - vacuum_pressure}, U, 0.0); if (constn0) { ubyn = U; @@ -1202,7 +1208,7 @@ class ELMpb : public PhysicsModel { // Perform communications mesh->communicate(comms); - Coordinates* metric = mesh->getCoordinates(); + const Coordinates* metric = mesh->getCoordinates(); //////////////////////////////////////////// // Transitions from 0 in core to 1 in vacuum @@ -1698,10 +1704,10 @@ class ELMpb : public PhysicsModel { // Vacuum solution if (relax_j_vac) { // Calculate the J and Psi profile we're aiming for - Field3D Jtarget = Jpar * (1.0 - vac_mask); // Zero in vacuum + const Field3D Jtarget = Jpar * (1.0 - vac_mask); // Zero in vacuum // Invert laplacian for Psi - Field3D Psitarget = aparSolver->solve(Jtarget); + const Field3D Psitarget = aparSolver->solve(Jtarget); // Add a relaxation term in the vacuum ddt(Psi) = @@ -1832,7 +1838,7 @@ class ELMpb : public PhysicsModel { ddt(U) -= 0.5 * Upara2 * bracket(Pi0, Dperp2Phi, bm_exb) / B0; Field3D B0phi = B0 * phi; mesh->communicate(B0phi); - Field3D B0phi0 = B0 * phi0; + Field2D B0phi0 = B0 * phi0; mesh->communicate(B0phi0); ddt(U) += 0.5 * Upara2 * bracket(B0phi, Dperp2Pi0, bm_exb) / B0; ddt(U) += 0.5 * Upara2 * bracket(B0phi0, Dperp2Pi, bm_exb) / B0; diff --git a/externalpackages/cpptrace b/externalpackages/cpptrace index d4dbf01eca..027f9aee2d 160000 --- a/externalpackages/cpptrace +++ b/externalpackages/cpptrace @@ -1 +1 @@ -Subproject commit d4dbf01ecaa820ede6a47c832c322a408b8cb78a +Subproject commit 027f9aee2d34dbe1c98f26224e1fbe1654cb4aae diff --git a/include/bout/array.hxx b/include/bout/array.hxx index 82677fd38a..bb98ff6b0b 100644 --- a/include/bout/array.hxx +++ b/include/bout/array.hxx @@ -68,6 +68,7 @@ struct ArrayData { auto& rm = umpire::ResourceManager::getInstance(); #if BOUT_HAS_CUDA auto allocator = rm.getAllocator(umpire::resource::Pinned); + //auto allocator = rm.getAllocator(umpire::resource::Unified); #else auto allocator = rm.getAllocator("HOST"); #endif diff --git a/include/bout/assert.hxx b/include/bout/assert.hxx index 653c44ed42..28bff1f2ec 100644 --- a/include/bout/assert.hxx +++ b/include/bout/assert.hxx @@ -2,16 +2,16 @@ * Defines a macro ASSERT which throws a BoutException if a given * condition is false. Whether the assertion is tested depends on * the checking level, so assetions can be removed for optimised runs. - * + * * ASSERT ( condition ) * * level - An integer known at compile time. * condition tested if level >= CHECK * * condition - The expression to test - * + * * e.g. ASSERT2( condition ) will only test condition if CHECK >= 2 - * + * */ #ifndef BOUT_ASSERT_H @@ -40,6 +40,7 @@ if (!(condition)) { \ throw BoutException("Assertion failed in {:s}, line {:d}: {:s}", __FILE__, __LINE__, \ #condition); \ + abort(); \ } #else // CHECKLEVEL >= 1 #define ASSERT1(condition) diff --git a/include/bout/bout_types.hxx b/include/bout/bout_types.hxx index c1f06fca7c..7747b937b3 100644 --- a/include/bout/bout_types.hxx +++ b/include/bout/bout_types.hxx @@ -1,8 +1,8 @@ /************************************************************************** - * Copyright 2010 B.D.Dudson, S.Farley, M.V.Umansky, X.Q.Xu + * Copyright 2010 - 2026 BOUT++ contributors + * + * Contact Ben Dudson, dudson2@llnl.gov * - * Contact Ben Dudson, bd512@york.ac.uk - * * This file is part of BOUT++. * * BOUT++ is free software: you can redistribute it and/or modify @@ -22,6 +22,8 @@ #ifndef BOUT_TYPES_H #define BOUT_TYPES_H +#include "bout/build_config.hxx" + #include #include @@ -140,4 +142,15 @@ struct enumWrapper { /// Boundary condition function using FuncPtr = BoutReal (*)(BoutReal t, BoutReal x, BoutReal y, BoutReal z); +template +struct Constant { + T val; + struct View { + T v; + View(T v) : v(v) {} + BOUT_HOST_DEVICE T operator()(int) const { return v; } + }; + operator View() const { return {val}; } +}; + #endif // BOUT_TYPES_H diff --git a/include/bout/build_config.hxx b/include/bout/build_config.hxx index c10cd07746..abc78e2cf0 100644 --- a/include/bout/build_config.hxx +++ b/include/bout/build_config.hxx @@ -52,10 +52,22 @@ constexpr auto use_msgstack = static_cast(BOUT_USE_MSGSTACK); #define BOUT_HOST_DEVICE __host__ __device__ #define BOUT_HOST __host__ #define BOUT_DEVICE __device__ +#define BOUT_FORCEINLINE __forceinline__ +#elif defined(_MSC_VER) +#define BOUT_HOST_DEVICE +#define BOUT_HOST +#define BOUT_DEVICE +#define BOUT_FORCEINLINE __forceinline +#elif defined(__clang__) || defined(__GNUC__) +#define BOUT_HOST_DEVICE +#define BOUT_HOST +#define BOUT_DEVICE +#define BOUT_FORCEINLINE inline __attribute__((always_inline)) #else #define BOUT_HOST_DEVICE #define BOUT_HOST #define BOUT_DEVICE +#define BOUT_FORCEINLINE inline #endif #endif // BOUT_BUILD_OPTIONS_HXX diff --git a/include/bout/coordinates_accessor.hxx b/include/bout/coordinates_accessor.hxx index 532351d57a..2376ab5039 100644 --- a/include/bout/coordinates_accessor.hxx +++ b/include/bout/coordinates_accessor.hxx @@ -31,7 +31,7 @@ /// -> If Coordinates data is changed, the cache should be cleared /// by calling CoordinatesAccessor::clear() struct CoordinatesAccessor { - CoordinatesAccessor() = delete; + CoordinatesAccessor() {} /// Constructor from Coordinates /// Copies data from coords, doesn't modify it diff --git a/include/bout/field.hxx b/include/bout/field.hxx index 4ad69f5b78..a8e1952546 100644 --- a/include/bout/field.hxx +++ b/include/bout/field.hxx @@ -3,9 +3,9 @@ * \brief field base class definition for differencing methods * ************************************************************************** - * Copyright 2010 B.D.Dudson, S.Farley, M.V.Umansky, X.Q.Xu + * Copyright 2010 - 2026 BOUT++ contributors * - * Contact: Ben Dudson, bd512@york.ac.uk + * Contact: Ben Dudson, dudson2@llnl.gov * * This file is part of BOUT++. * @@ -44,6 +44,8 @@ class Field; #include #include +#include "bout/fieldops.hxx" + class Mesh; /// Base class for scalar fields @@ -181,8 +183,30 @@ inline bool areFieldsCompatible(const Field& field1, const Field& field2) { #field2, toString((field2).getDirections())); \ } +#define ASSERT1_EXPR_COMPATIBLE(expr1, expr2) \ + if ((expr1).getLocation() != (expr2).getLocation()) { \ + throw BoutException("Error in {:s}:{:d}\nFields at different position:" \ + "`{:s}` at {:s}, `{:s}` at {:s}", \ + __FILE__, __LINE__, #expr1, toString((expr1).getLocation()), \ + #expr2, toString((expr2).getLocation())); \ + } \ + if ((expr1).getMesh() != (expr2).getMesh()) { \ + throw BoutException("Error in {:s}:{:d}\nFields are on different Meshes:" \ + "`{:s}` at {:p}, `{:s}` at {:p}", \ + __FILE__, __LINE__, #expr1, \ + static_cast((expr1).getMesh()), #expr2, \ + static_cast((expr2).getMesh())); \ + } \ + if (!areDirectionsCompatible((expr1).getDirections(), (expr2).getDirections())) { \ + throw BoutException("Error in {:s}:{:d}\nFields at different directions:" \ + "`{:s}` at {:s}, `{:s}` at {:s}", \ + __FILE__, __LINE__, #expr1, toString((expr1).getDirections()), \ + #expr2, toString((expr2).getDirections())); \ + } + #else #define ASSERT1_FIELDS_COMPATIBLE(field1, field2) ; +#define ASSERT1_EXPR_COMPATIBLE(expr1, expr2) ; #endif /// Return an empty shell field of some type derived from Field, with metadata @@ -341,6 +365,24 @@ inline BoutReal min(const T& f, bool allpe = false, return result; } +template +inline BoutReal min(const BinaryExpr& f, bool allpe = false, + const std::string& rgn = "RGN_NOBNDRY") { + const auto& region = f.getMesh()->template getRegion(rgn); + const auto reduction_view = + makeReductionView(static_cast::View>(f), + region.getLinearIndices()); + BoutReal result = + bout::reduce::Min::finalize(reduceExpr(reduction_view)); + + if (allpe) { + BoutReal localresult = result; + MPI_Allreduce(&localresult, &result, 1, MPI_DOUBLE, MPI_MIN, BoutComm::get()); + } + + return result; +} + /// Returns true if all elements of \p f over \p region are equal. By /// default only checks the local processor, use \p allpe to check /// globally @@ -425,6 +467,24 @@ inline BoutReal max(const T& f, bool allpe = false, return result; } +template +inline BoutReal max(const BinaryExpr& f, bool allpe = false, + const std::string& rgn = "RGN_NOBNDRY") { + const auto& region = f.getMesh()->template getRegion(rgn); + const auto reduction_view = + makeReductionView(static_cast::View>(f), + region.getLinearIndices()); + BoutReal result = + bout::reduce::Max::finalize(reduceExpr(reduction_view)); + + if (allpe) { + BoutReal localresult = result; + MPI_Allreduce(&localresult, &result, 1, MPI_DOUBLE, MPI_MAX, BoutComm::get()); + } + + return result; +} + /// Mean of \p f, excluding the boundary/guard cells by default (can /// be changed with \p rgn argument). /// @@ -460,6 +520,25 @@ inline BoutReal mean(const T& f, bool allpe = false, return result / static_cast(count); } +template +inline BoutReal mean(const BinaryExpr& f, bool allpe = false, + const std::string& rgn = "RGN_NOBNDRY") { + const auto& region = f.getMesh()->template getRegion(rgn); + const auto reduction_view = + makeReductionView(static_cast::View>(f), + region.getLinearIndices()); + auto state = reduceExpr(reduction_view); + + if (allpe) { + BoutReal localsum = state.sum; + int localcount = state.count; + MPI_Allreduce(&localsum, &state.sum, 1, MPI_DOUBLE, MPI_SUM, BoutComm::get()); + MPI_Allreduce(&localcount, &state.count, 1, MPI_INT, MPI_SUM, BoutComm::get()); + } + + return bout::reduce::Mean::finalize(state); +} + /// Exponent: pow(lhs, lhs) is \p lhs raised to the power of \p rhs /// /// This loops over the entire domain, including guard/boundary cells by @@ -526,29 +605,146 @@ T pow(BoutReal lhs, const T& rhs, const std::string& rgn = "RGN_ALL") { * */ class Field3DParallel; +class FieldPerp; + +namespace bout::detail { +template +std::optional getPerpYIndex(const T& value) { + if constexpr (std::is_same_v, ::FieldPerp>) { + return value.getIndex(); + } else { + return std::nullopt; + } +} + +template +std::optional getPerpYIndex(const BinaryExpr& expr) { + if constexpr (std::is_same_v) { + return expr.getIndex(); + } else { + return std::nullopt; + } +} +} // namespace bout::detail + #ifdef FIELD_FUNC #error This macro has already been defined #else -#define FIELD_FUNC(_name, func) \ - template > \ - inline T _name(const T& f, const std::string& rgn = "RGN_ALL") { \ - /* Check if the input is allocated */ \ - checkData(f); \ - /* Define and allocate the output result */ \ - T result{emptyFrom(f)}; \ - BOUT_FOR(d, result.getRegion(rgn)) { result[d] = func(f[d]); } \ - if constexpr (std::is_base_of_v) { \ - for (int i = 0; i < f.numberParallelSlices(); ++i) { \ - result.yup(i) = func(f.yup(i)); \ - result.ydown(i) = func(f.ydown(i)); \ - } \ - } \ - result.name = std::string(#_name "(") + f.name + std::string(")"); \ - checkData(result); \ - return result; \ +#define FIELD_FUNC(name, func) \ + namespace bout::op { \ + struct name { \ + template \ + BOUT_HOST_DEVICE BoutReal operator()(int idx, const LView& L, const RView&) const { \ + return func(L(idx)); \ + } \ + }; \ + }; \ + template > \ + inline auto name(const T& f, const std::string& rgn = "RGN_ALL") { \ + if constexpr (std::is_same_v) { \ + /* Check if the input is allocated */ \ + checkData(f); \ + /* Define and allocate the output result */ \ + T result{emptyFrom(f)}; \ + BOUT_FOR(d, result.getRegion(rgn)) { result[d] = func(f[d]); } \ + for (int i = 0; i < f.numberParallelSlices(); ++i) { \ + result.yup(i) = func(f.yup(i)); \ + result.ydown(i) = func(f.ydown(i)); \ + } \ + result.name = std::string(#name "(") + f.name + std::string(")"); \ + checkData(result); \ + return result; \ + } else { \ + return BinaryExpr{static_cast(f), \ + static_cast(f), \ + bout::op::name{}, \ + f.getMesh(), \ + f.getLocation(), \ + f.getDirections(), \ + std::nullopt, \ + f.getRegion(rgn), \ + bout::detail::getPerpYIndex(f)}; \ + } \ + } \ + template \ + inline auto name(const BinaryExpr& f) { \ + return BinaryExpr, BinaryExpr, \ + bout::op::name>{ \ + static_cast::View>(f), \ + static_cast::View>(f), \ + bout::op::name{}, \ + f.getMesh(), \ + f.getLocation(), \ + f.getDirections(), \ + f.getRegionID(), \ + f.indices, \ + bout::detail::getPerpYIndex(f)}; \ + } \ + template \ + inline auto name(const BinaryExpr& f, const std::string& rgn) { \ + return name(ResT{f}, rgn); \ } #endif +namespace bout::op { +struct Square { + template + BOUT_HOST_DEVICE BoutReal operator()(int idx, const LView& L, const RView&) const { + const BoutReal value = L(idx); + return ::SQ(value); + } +}; +}; // namespace bout::op + +template > +inline auto SQ(const T& f, const std::string& rgn = "RGN_ALL") { + if constexpr (std::is_same_v) { + checkData(f); + T result{emptyFrom(f)}; + if (f.hasParallelSlices() and !result.hasParallelSlices()) { + result.splitParallelSlices(); + } + BOUT_FOR(d, result.getRegion(rgn)) { result[d] = ::SQ(f[d]); } + for (size_t i = 0; i < f.numberParallelSlices(); ++i) { + result.yup(i) = SQ(f.yup(i), rgn); + result.ydown(i) = SQ(f.ydown(i), rgn); + } + result.name = std::string("SQ(") + f.name + std::string(")"); + checkData(result); + return result; + } else { + return BinaryExpr{static_cast(f), + static_cast(f), + bout::op::Square{}, + f.getMesh(), + f.getLocation(), + f.getDirections(), + std::nullopt, + f.getRegion(rgn), + bout::detail::getPerpYIndex(f)}; + } +} + +template +inline auto SQ(const BinaryExpr& f) { + return BinaryExpr, BinaryExpr, + bout::op::Square>{ + static_cast::View>(f), + static_cast::View>(f), + bout::op::Square{}, + f.getMesh(), + f.getLocation(), + f.getDirections(), + f.getRegionID(), + f.indices, + bout::detail::getPerpYIndex(f)}; +} + +template +inline auto SQ(const BinaryExpr& f, const std::string& rgn) { + return SQ(ResT{f}, rgn); +} + /// Square root of \p f over region \p rgn /// /// This loops over the entire domain, including guard/boundary cells by diff --git a/include/bout/field2d.hxx b/include/bout/field2d.hxx index efa878ef0a..0a670fa0d0 100644 --- a/include/bout/field2d.hxx +++ b/include/bout/field2d.hxx @@ -45,6 +45,8 @@ class Field2D; #include #include +#include "bout/fieldops.hxx" + #if BOUT_HAS_RAJA #include "RAJA/RAJA.hpp" // using RAJA lib #endif @@ -52,6 +54,15 @@ class Field2D; class Field3D; class Mesh; +template +struct is_expr_field2d> + : std::integral_constant> + && is_expr_field2d_v>) + || (is_expr_constant_v> + && is_expr_field2d_v>) + || (is_expr_field2d_v> + && is_expr_constant_v>)> {}; + /*! * \brief 2D X-Y scalar fields * @@ -99,6 +110,14 @@ public: DirectionTypes directions_in = {YDirectionType::Standard, ZDirectionType::Average}); + template < + typename ResT, typename L, typename R, typename Func, + typename = std::enable_if_t<(is_expr_field2d_v && is_expr_field2d_v) + || (is_expr_constant_v && is_expr_field2d_v) + || (is_expr_field2d_v && is_expr_constant_v)>> + Field2D(const BinaryExpr& expr) + : Field2D(evaluateBinaryExpr(expr), expr.getMesh(), expr.getLocation(), + expr.getDirections()) {} /*! * Destructor */ @@ -167,6 +186,21 @@ public: */ Field2D& operator=(BoutReal rhs); + template + std::enable_if_t, Field2D&> + operator=(const BinaryExpr& expr) { + if (!isAllocated() || getMesh() != expr.getMesh()) { + *this = Field2D{expr}; + return *this; + } + + setLocation(expr.getLocation()); + setDirections(expr.getDirections()); + allocate(); + expr.evaluate(&data[0]); + return *this; + } + ///////////////////////////////////////////////////////// // Data access @@ -238,22 +272,32 @@ public: return operator()(jx, jy); } - /// In-place addition. Copy-on-write used if data is shared + Field2D& operator*=(const Field2D& rhs); + Field2D& operator/=(const Field2D& rhs); Field2D& operator+=(const Field2D& rhs); - /// In-place addition. Copy-on-write used if data is shared - Field2D& operator+=(BoutReal rhs); - /// In-place subtraction. Copy-on-write used if data is shared Field2D& operator-=(const Field2D& rhs); - /// In-place subtraction. Copy-on-write used if data is shared - Field2D& operator-=(BoutReal rhs); - /// In-place multiplication. Copy-on-write used if data is shared - Field2D& operator*=(const Field2D& rhs); - /// In-place multiplication. Copy-on-write used if data is shared Field2D& operator*=(BoutReal rhs); - /// In-place division. Copy-on-write used if data is shared - Field2D& operator/=(const Field2D& rhs); - /// In-place division. Copy-on-write used if data is shared Field2D& operator/=(BoutReal rhs); + Field2D& operator+=(BoutReal rhs); + Field2D& operator-=(BoutReal rhs); + +#define FIELD2D_OP_EQUALS(OP_SYM) \ + template \ + std::enable_if_t || is_expr_constant_v, Field2D&> \ + operator OP_SYM## = (R rhs) { \ + if (data.unique()) { \ + auto expr = (*this)OP_SYM rhs; \ + expr.evaluate(&data[0]); \ + } else { \ + (*this) = (*this)OP_SYM rhs; \ + } \ + return *this; \ + } + + FIELD2D_OP_EQUALS(+) + FIELD2D_OP_EQUALS(-) + FIELD2D_OP_EQUALS(*) + FIELD2D_OP_EQUALS(/) // FieldData virtual functions FieldType field_type() const override { return FieldType::field2d; } @@ -284,7 +328,40 @@ public: Field2D& asField3DParallel() { return *this; } const Field2D& asField3DParallel() const { return *this; } + struct View { + BoutReal* data; + int mul = 1; + int div = 1; + BOUT_HOST_DEVICE BOUT_FORCEINLINE BoutReal operator()(int idx) const { + return data[(idx * mul / div)]; + } + BOUT_HOST_DEVICE BOUT_FORCEINLINE BoutReal& operator[](int idx) const { + return data[(idx * mul) / div]; + } + + View& setScale(int mul, int div) { + this->mul = mul; + this->div = div; + return *this; + } + }; + operator View() { return View{&data[0]}; } + operator View() const { return View{const_cast(&data[0])}; } + + BOUT_DEVICE inline BoutReal operator()(int i) { return View()(i); } + BOUT_DEVICE inline BoutReal operator()(int i) const { return View()(i); } + private: + template + static Array evaluateBinaryExpr(const BinaryExpr& expr) { + const auto* mesh = expr.getMesh(); + ASSERT1(mesh != nullptr); + + Array data{mesh->LocalNx * mesh->LocalNy}; + expr.evaluate(&data[0]); + return data; + } + /// Internal data array. Handles allocation/freeing of memory Array data; @@ -301,31 +378,177 @@ FieldPerp operator-(const Field2D& lhs, const FieldPerp& rhs); FieldPerp operator*(const Field2D& lhs, const FieldPerp& rhs); FieldPerp operator/(const Field2D& lhs, const FieldPerp& rhs); -Field2D operator+(const Field2D& lhs, const Field2D& rhs); -Field2D operator-(const Field2D& lhs, const Field2D& rhs); -Field2D operator*(const Field2D& lhs, const Field2D& rhs); -Field2D operator/(const Field2D& lhs, const Field2D& rhs); +#define FIELD2D_FIELD2D_FIELD2D_OP(OP_SYM, OP_TYPE) \ + template \ + std::enable_if_t && is_expr_field2d_v, \ + BinaryExpr> \ + operator OP_SYM(const L& lhs, const R& rhs) { \ + return BinaryExpr{ \ + static_cast(lhs), \ + static_cast(rhs), \ + bout::op::OP_TYPE{}, \ + lhs.getMesh(), \ + lhs.getLocation(), \ + lhs.getDirections(), \ + std::nullopt, \ + lhs.getMesh()->getRegion2D("RGN_ALL")}; \ + } -Field3D operator+(const Field2D& lhs, const Field3D& rhs); -Field3D operator-(const Field2D& lhs, const Field3D& rhs); -Field3D operator*(const Field2D& lhs, const Field3D& rhs); -Field3D operator/(const Field2D& lhs, const Field3D& rhs); +FIELD2D_FIELD2D_FIELD2D_OP(+, Add) +FIELD2D_FIELD2D_FIELD2D_OP(-, Sub) +FIELD2D_FIELD2D_FIELD2D_OP(*, Mul) +FIELD2D_FIELD2D_FIELD2D_OP(/, Div) + +#define FIELD3D_FIELD2D_FIELD3D_OP(OP_SYM, OP_TYPE) \ + template \ + std::enable_if_t && is_expr_field3d_v, \ + BinaryExpr> \ + operator OP_SYM(const L& lhs, const R& rhs) { \ + ASSERT1_EXPR_COMPATIBLE(lhs, rhs); \ + auto regionID = rhs.getRegionID(); \ + int mesh_nz = rhs.getMesh()->LocalNz; \ + return BinaryExpr{ \ + static_cast(lhs).setScale(1, mesh_nz), \ + static_cast(rhs), \ + bout::op::OP_TYPE{}, \ + rhs.getMesh(), \ + rhs.getLocation(), \ + rhs.getDirections(), \ + regionID, \ + rhs.getMesh()->getRegion("RGN_ALL")}; \ + } -Field2D operator+(const Field2D& lhs, BoutReal rhs); -Field2D operator-(const Field2D& lhs, BoutReal rhs); -Field2D operator*(const Field2D& lhs, BoutReal rhs); -Field2D operator/(const Field2D& lhs, BoutReal rhs); +FIELD3D_FIELD2D_FIELD3D_OP(+, Add) +FIELD3D_FIELD2D_FIELD3D_OP(-, Sub) +FIELD3D_FIELD2D_FIELD3D_OP(*, Mul) +FIELD3D_FIELD2D_FIELD3D_OP(/, Div) + +#define FIELD2D_FIELD2D_BOUTREAL_OP(OP_SYM, OP_TYPE) \ + template \ + std::enable_if_t && is_expr_constant_v, \ + BinaryExpr, bout::op::OP_TYPE>> \ + operator OP_SYM(const L& lhs, R rhs) { \ + return BinaryExpr, bout::op::OP_TYPE>{ \ + static_cast(lhs), \ + static_cast::View>(rhs), \ + bout::op::OP_TYPE{}, \ + lhs.getMesh(), \ + lhs.getLocation(), \ + lhs.getDirections(), \ + std::nullopt, \ + lhs.getMesh()->getRegion2D("RGN_ALL")}; \ + } -Field2D operator+(BoutReal lhs, const Field2D& rhs); -Field2D operator-(BoutReal lhs, const Field2D& rhs); -Field2D operator*(BoutReal lhs, const Field2D& rhs); -Field2D operator/(BoutReal lhs, const Field2D& rhs); +FIELD2D_FIELD2D_BOUTREAL_OP(+, Add) +FIELD2D_FIELD2D_BOUTREAL_OP(-, Sub) +FIELD2D_FIELD2D_BOUTREAL_OP(*, Mul) +FIELD2D_FIELD2D_BOUTREAL_OP(/, Div) + +#define FIELD2D_BOUTREAL_FIELD2D_OP(OP_SYM, OP_TYPE) \ + template \ + std::enable_if_t && is_expr_field2d_v, \ + BinaryExpr, R, bout::op::OP_TYPE>> \ + operator OP_SYM(L lhs, const R& rhs) { \ + return BinaryExpr, R, bout::op::OP_TYPE>{ \ + static_cast::View>(lhs), \ + static_cast(rhs), \ + bout::op::OP_TYPE{}, \ + rhs.getMesh(), \ + rhs.getLocation(), \ + rhs.getDirections(), \ + std::nullopt, \ + rhs.getMesh()->getRegion2D("RGN_ALL")}; \ + } + +FIELD2D_BOUTREAL_FIELD2D_OP(+, Add) +FIELD2D_BOUTREAL_FIELD2D_OP(-, Sub) +FIELD2D_BOUTREAL_FIELD2D_OP(*, Mul) +FIELD2D_BOUTREAL_FIELD2D_OP(/, Div) + +template +std::enable_if_t && is_expr_field2d_v, + BinaryExpr> +if_else(bool condition, const L& lhs, const R& rhs) { + return BinaryExpr{ + static_cast(lhs), + static_cast(rhs), + bout::op::IfElse{condition}, + lhs.getMesh(), + lhs.getLocation(), + lhs.getDirections(), + std::nullopt, + lhs.getMesh()->getRegion2D("RGN_ALL")}; +} + +template +std::enable_if_t && is_expr_field3d_v, + BinaryExpr> +if_else(bool condition, const L& lhs, const R& rhs) { + ASSERT1_EXPR_COMPATIBLE(lhs, rhs); + auto regionID = rhs.getRegionID(); + int mesh_nz = rhs.getMesh()->LocalNz; + return BinaryExpr{ + static_cast(lhs).setScale(1, mesh_nz), + static_cast(rhs), + bout::op::IfElse{condition}, + rhs.getMesh(), + rhs.getLocation(), + rhs.getDirections(), + regionID, + rhs.getMesh()->getRegion("RGN_ALL")}; +} + +template +std::enable_if_t && is_expr_constant_v, + BinaryExpr, bout::op::IfElse>> +if_else(bool condition, const L& lhs, R rhs) { + return BinaryExpr, bout::op::IfElse>{ + static_cast(lhs), + static_cast::View>(rhs), + bout::op::IfElse{condition}, + lhs.getMesh(), + lhs.getLocation(), + lhs.getDirections(), + std::nullopt, + lhs.getMesh()->getRegion2D("RGN_ALL")}; +} + +template +std::enable_if_t && is_expr_field2d_v, + BinaryExpr, R, bout::op::IfElse>> +if_else(bool condition, L lhs, const R& rhs) { + return BinaryExpr, R, bout::op::IfElse>{ + static_cast::View>(lhs), + static_cast(rhs), + bout::op::IfElse{condition}, + rhs.getMesh(), + rhs.getLocation(), + rhs.getDirections(), + std::nullopt, + rhs.getMesh()->getRegion2D("RGN_ALL")}; +} + +template || is_expr_field3d_v>> +auto if_else_zero(bool condition, const L& lhs) { + return if_else(condition, lhs, 0.0); +} /*! * Unary minus. Returns the negative of given field, * iterates over whole domain including guard/boundary cells. */ -Field2D operator-(const Field2D& f); +inline auto operator-(const Field2D& f) { + return BinaryExpr, Field2D, bout::op::Mul>{ + static_cast::View>(-1.0), + static_cast(f), + bout::op::Mul{}, + f.getMesh(), + f.getLocation(), + f.getDirections(), + std::nullopt, + f.getRegion("RGN_ALL")}; +} // Non-member functions diff --git a/include/bout/field3d.hxx b/include/bout/field3d.hxx index fabe2b646d..09c7f1ce55 100644 --- a/include/bout/field3d.hxx +++ b/include/bout/field3d.hxx @@ -31,6 +31,7 @@ class Field3D; #include "bout/array.hxx" #include "bout/assert.hxx" #include "bout/bout_types.hxx" +#include "bout/build_config.hxx" #include "bout/field.hxx" #include "bout/field2d.hxx" #include "bout/field_data.hxx" @@ -43,6 +44,7 @@ class Field3D; #include #include #include +#include #include #include @@ -50,6 +52,8 @@ class Mesh; class Options; class Field3DParallel; +#include "bout/fieldops.hxx" + /// Class for 3D X-Y-Z scalar fields /*! This class represents a scalar field defined over the mesh. @@ -195,8 +199,15 @@ public: Field3D(Array data, Mesh* localmesh, CELL_LOC location = CELL_CENTRE, DirectionTypes directions_in = {YDirectionType::Standard, ZDirectionType::Standard}); + template || is_expr_field3d_v>> + Field3D(const BinaryExpr& expr) + : Field3D(evaluateBinaryExpr(expr), expr.getMesh(), expr.getLocation(), + expr.getDirections()) { + setRegion(expr.getRegionID()); + } /// Destructor - ~Field3D() override; + ~Field3D() override { delete deriv; } /// Data type stored in this field using value_type = BoutReal; @@ -444,39 +455,111 @@ public: return &data[(jx * ny + jy) * nz]; } + struct View { + BoutReal* data; + int mul = 1; + int div = 1; + BOUT_HOST_DEVICE BOUT_FORCEINLINE BoutReal operator()(int idx) const { + return data[(idx * mul) / div]; + } + BOUT_HOST_DEVICE BOUT_FORCEINLINE BoutReal& operator[](int idx) const { + return data[(idx * mul) / div]; + } + + View& setScale(int mul, int div) { + this->mul = mul; + this->div = div; + return *this; + } + }; + operator View() { return View{&data[0]}; } + operator View() const { return View{const_cast(&data[0])}; } + //operator View() const { return View{&data[0]}; } + ///////////////////////////////////////////////////////// // Operators /// Assignment operators ///@{ Field3D& operator=(const Field3D& rhs); - Field3D& operator=(Field3D&& rhs) noexcept; + Field3D& operator=(Field3D&& rhs) noexcept { + track(rhs, "operator="); + + // Move parallel slices or delete existing ones. + yup_fields = std::move(rhs.yup_fields); + ydown_fields = std::move(rhs.ydown_fields); + + // Move the data and data sizes + nx = rhs.nx; + ny = rhs.ny; + nz = rhs.nz; + regionID = rhs.regionID; + + data = std::move(rhs.data); + + // Move base slice last + Field::operator=(std::move(rhs)); + + return *this; + } Field3D& operator=(const Field2D& rhs); /// return void, as only part initialised void operator=(const FieldPerp& rhs); Field3D& operator=(BoutReal val); - ///@} + template + std::enable_if_t, Field3D&> + operator=(const BinaryExpr& expr) { + if (!isAllocated() || getMesh() != expr.getMesh()) { + *this = Field3D{expr}; + return *this; + } + + clearParallelSlices(); + setRegion(expr.getRegionID()); + setLocation(expr.getLocation()); + setDirections(expr.getDirections()); + allocate(); + expr.evaluate(&data[0]); + return *this; + } - /// Addition operators - ///@{ - Field3D& operator+=(const Field3D& rhs); - Field3D& operator+=(const Field2D& rhs); - Field3D& operator+=(BoutReal rhs); ///@} - /// Subtraction operators - ///@{ - Field3D& operator-=(const Field3D& rhs); - Field3D& operator-=(const Field2D& rhs); - Field3D& operator-=(BoutReal rhs); +#define FIELD3D_OP_EQUALS(OP_SYM) \ + template \ + std::enable_if_t< \ + is_expr_field3d_v || is_expr_field2d_v || is_expr_constant_v, Field3D&> \ + operator OP_SYM## = (const R& rhs) { \ + if (data.unique()) { \ + clearParallelSlices(); \ + auto expr = (*this)OP_SYM rhs; \ + expr.evaluate(&data[0]); \ + } else { \ + (*this) = (*this)OP_SYM rhs; \ + } \ + return *this; \ + } + + FIELD3D_OP_EQUALS(+) + FIELD3D_OP_EQUALS(-) + FIELD3D_OP_EQUALS(*) + FIELD3D_OP_EQUALS(/) + ///@} - /// Multiplication operators - ///@{ Field3D& operator*=(const Field3D& rhs); + Field3D& operator+=(const Field3D& rhs); + Field3D& operator-=(const Field3D& rhs); + Field3D& operator*=(const Field3DParallel& rhs); + Field3D& operator/=(const Field3DParallel& rhs); + Field3D& operator+=(const Field3DParallel& rhs); + Field3D& operator-=(const Field3DParallel& rhs); Field3D& operator*=(const Field2D& rhs); + Field3D& operator+=(const Field2D& rhs); + Field3D& operator-=(const Field2D& rhs); Field3D& operator*=(BoutReal rhs); - ///@} + Field3D& operator+=(BoutReal rhs); + Field3D& operator-=(BoutReal rhs); /// Division operators ///@{ @@ -582,35 +665,189 @@ protected: template > void _track(const T& change, std::string operation); void _track(const BoutReal& change, std::string operation); + + template + static Array evaluateBinaryExpr(const BinaryExpr& expr) { + const auto* mesh = expr.getMesh(); + ASSERT1(mesh != nullptr); + + Array data{mesh->LocalNx * mesh->LocalNy * mesh->LocalNz}; + expr.evaluate(&data[0]); + return data; + } }; // Non-member overloaded operators +template +constexpr bool always_false = false; + // Binary operators FieldPerp operator+(const Field3D& lhs, const FieldPerp& rhs); FieldPerp operator-(const Field3D& lhs, const FieldPerp& rhs); FieldPerp operator*(const Field3D& lhs, const FieldPerp& rhs); FieldPerp operator/(const Field3D& lhs, const FieldPerp& rhs); -Field3D operator+(const Field3D& lhs, const Field3D& rhs); -Field3D operator-(const Field3D& lhs, const Field3D& rhs); -Field3D operator*(const Field3D& lhs, const Field3D& rhs); -Field3D operator/(const Field3D& lhs, const Field3D& rhs); +#define FIELD3D_FIELD3D_FIELD3D_OP(OP_SYM, OP_TYPE) \ + template && is_expr_field3d_v>> \ + BinaryExpr operator OP_SYM(const L& lhs, \ + const R& rhs) { \ + ASSERT1_EXPR_COMPATIBLE(lhs, rhs); \ + auto regionID = \ + lhs.getMesh()->getCommonRegion(lhs.getRegionID(), rhs.getRegionID()); \ + return BinaryExpr{ \ + static_cast(lhs), \ + static_cast(rhs), \ + bout::op::OP_TYPE{}, \ + lhs.getMesh(), \ + lhs.getLocation(), \ + lhs.getDirections(), \ + regionID, \ + (regionID.has_value() ? lhs.getMesh()->getRegion(regionID.value()) \ + : lhs.getMesh()->getRegion("RGN_ALL"))}; \ + } + +FIELD3D_FIELD3D_FIELD3D_OP(+, Add) +FIELD3D_FIELD3D_FIELD3D_OP(-, Sub) +FIELD3D_FIELD3D_FIELD3D_OP(*, Mul) +FIELD3D_FIELD3D_FIELD3D_OP(/, Div) + +#define FIELD3D_FIELD3D_FIELD2D_OP(OP_SYM, OP_TYPE) \ + template \ + std::enable_if_t && is_expr_field2d_v, \ + BinaryExpr> \ + operator OP_SYM(const L& lhs, const R& rhs) { \ + ASSERT1_EXPR_COMPATIBLE(lhs, rhs); \ + auto regionID = lhs.getRegionID(); \ + int mesh_nz = lhs.getMesh()->LocalNz; \ + return BinaryExpr{ \ + static_cast(lhs), \ + static_cast(rhs).setScale(1, mesh_nz), \ + bout::op::OP_TYPE{}, \ + lhs.getMesh(), \ + lhs.getLocation(), \ + lhs.getDirections(), \ + regionID, \ + lhs.getMesh()->getRegion("RGN_ALL")}; \ + } -Field3D operator+(const Field3D& lhs, const Field2D& rhs); -Field3D operator-(const Field3D& lhs, const Field2D& rhs); -Field3D operator*(const Field3D& lhs, const Field2D& rhs); -Field3D operator/(const Field3D& lhs, const Field2D& rhs); +FIELD3D_FIELD3D_FIELD2D_OP(+, Add) +FIELD3D_FIELD3D_FIELD2D_OP(-, Sub) +FIELD3D_FIELD3D_FIELD2D_OP(*, Mul) +FIELD3D_FIELD3D_FIELD2D_OP(/, Div) + +#define FIELD3D_FIELD3D_BOUTREAL_OP(OP_SYM, OP_TYPE) \ + template \ + std::enable_if_t && is_expr_constant_v, \ + BinaryExpr, bout::op::OP_TYPE>> \ + operator OP_SYM(const L& lhs, R rhs) { \ + auto regionID = lhs.getRegionID(); \ + return BinaryExpr, bout::op::OP_TYPE>{ \ + static_cast(lhs), \ + static_cast::View>(rhs), \ + bout::op::OP_TYPE{}, \ + lhs.getMesh(), \ + lhs.getLocation(), \ + lhs.getDirections(), \ + regionID, \ + lhs.getMesh()->getRegion("RGN_ALL")}; \ + } -Field3D operator+(const Field3D& lhs, BoutReal rhs); -Field3D operator-(const Field3D& lhs, BoutReal rhs); -Field3D operator*(const Field3D& lhs, BoutReal rhs); -Field3D operator/(const Field3D& lhs, BoutReal rhs); +FIELD3D_FIELD3D_BOUTREAL_OP(+, Add) +FIELD3D_FIELD3D_BOUTREAL_OP(-, Sub) +FIELD3D_FIELD3D_BOUTREAL_OP(*, Mul) +FIELD3D_FIELD3D_BOUTREAL_OP(/, Div) + +#define FIELD3D_BOUTREAL_FIELD3D_OP(OP_SYM, OP_TYPE) \ + template \ + std::enable_if_t && is_expr_field3d_v, \ + BinaryExpr, R, bout::op::OP_TYPE>> \ + operator OP_SYM(const L& lhs, const R& rhs) { \ + auto regionID = rhs.getRegionID(); \ + return BinaryExpr, R, bout::op::OP_TYPE>{ \ + static_cast::View>(lhs), \ + static_cast(rhs), \ + bout::op::OP_TYPE{}, \ + rhs.getMesh(), \ + rhs.getLocation(), \ + rhs.getDirections(), \ + regionID, \ + rhs.getMesh()->getRegion("RGN_ALL")}; \ + } -Field3D operator+(BoutReal lhs, const Field3D& rhs); -Field3D operator-(BoutReal lhs, const Field3D& rhs); -Field3D operator*(BoutReal lhs, const Field3D& rhs); -Field3D operator/(BoutReal lhs, const Field3D& rhs); +FIELD3D_BOUTREAL_FIELD3D_OP(+, Add) +FIELD3D_BOUTREAL_FIELD3D_OP(-, Sub) +FIELD3D_BOUTREAL_FIELD3D_OP(*, Mul) +FIELD3D_BOUTREAL_FIELD3D_OP(/, Div) + +template && is_expr_field3d_v>> +BinaryExpr if_else(bool condition, const L& lhs, + const R& rhs) { + ASSERT1_EXPR_COMPATIBLE(lhs, rhs); + auto regionID = lhs.getMesh()->getCommonRegion(lhs.getRegionID(), rhs.getRegionID()); + return BinaryExpr{ + static_cast(lhs), + static_cast(rhs), + bout::op::IfElse{condition}, + lhs.getMesh(), + lhs.getLocation(), + lhs.getDirections(), + regionID, + (regionID.has_value() ? lhs.getMesh()->getRegion(regionID.value()) + : lhs.getMesh()->getRegion("RGN_ALL"))}; +} + +template +std::enable_if_t && is_expr_field2d_v, + BinaryExpr> +if_else(bool condition, const L& lhs, const R& rhs) { + ASSERT1_EXPR_COMPATIBLE(lhs, rhs); + auto regionID = lhs.getRegionID(); + int mesh_nz = lhs.getMesh()->LocalNz; + return BinaryExpr{ + static_cast(lhs), + static_cast(rhs).setScale(1, mesh_nz), + bout::op::IfElse{condition}, + lhs.getMesh(), + lhs.getLocation(), + lhs.getDirections(), + regionID, + lhs.getMesh()->getRegion("RGN_ALL")}; +} + +template +std::enable_if_t && is_expr_constant_v, + BinaryExpr, bout::op::IfElse>> +if_else(bool condition, const L& lhs, R rhs) { + auto regionID = lhs.getRegionID(); + return BinaryExpr, bout::op::IfElse>{ + static_cast(lhs), + static_cast::View>(rhs), + bout::op::IfElse{condition}, + lhs.getMesh(), + lhs.getLocation(), + lhs.getDirections(), + regionID, + lhs.getMesh()->getRegion("RGN_ALL")}; +} + +template +std::enable_if_t && is_expr_field3d_v, + BinaryExpr, R, bout::op::IfElse>> +if_else(bool condition, const L& lhs, const R& rhs) { + auto regionID = rhs.getRegionID(); + return BinaryExpr, R, bout::op::IfElse>{ + static_cast::View>(lhs), + static_cast(rhs), + bout::op::IfElse{condition}, + rhs.getMesh(), + rhs.getLocation(), + rhs.getDirections(), + regionID, + rhs.getMesh()->getRegion("RGN_ALL")}; +} Field3DParallel operator+(const Field3D& lhs, const Field3DParallel& rhs); Field3DParallel operator-(const Field3D& lhs, const Field3DParallel& rhs); @@ -641,7 +878,18 @@ Field3DParallel operator/(const Field3DParallel& lhs, BoutReal rhs); * Unary minus. Returns the negative of given field, * iterates over whole domain including guard/boundary cells. */ -Field3D operator-(const Field3D& f); +inline auto operator-(const Field3D& f) { + auto regionID = f.getRegionID(); + return BinaryExpr, Field3D, bout::op::Mul>{ + static_cast::View>(-1.0), + static_cast(f), + bout::op::Mul{}, + f.getMesh(), + f.getLocation(), + f.getDirections(), + regionID, + f.getRegion("RGN_ALL")}; +} // Non-member functions @@ -843,32 +1091,37 @@ Field3DParallel Field3D::asField3DParallel() { } Field3DParallel Field3D::asField3DParallel() const { return Field3DParallel(*this); } -inline Field3D operator+(const Field2D& lhs, const Field3DParallel& rhs) { - return lhs + rhs.asField3D(); -} -inline Field3D operator-(const Field2D& lhs, const Field3DParallel& rhs) { - return lhs + rhs.asField3D(); -} -inline Field3D operator*(const Field2D& lhs, const Field3DParallel& rhs) { - return lhs + rhs.asField3D(); -} -inline Field3D operator/(const Field2D& lhs, const Field3DParallel& rhs) { - return lhs + rhs.asField3D(); +inline Field3D& Field3D::operator*=(const Field3DParallel& rhs) { + return (*this) *= rhs.asField3D(); } -inline Field3D operator+(const Field3DParallel& lhs, const Field2D& rhs) { - return lhs.asField3D() + rhs; +inline Field3D& Field3D::operator/=(const Field3DParallel& rhs) { + return (*this) /= rhs.asField3D(); } -inline Field3D operator-(const Field3DParallel& lhs, const Field2D& rhs) { - return lhs.asField3D() - rhs; -} -inline Field3D operator*(const Field3DParallel& lhs, const Field2D& rhs) { - return lhs.asField3D() * rhs; + +inline Field3D& Field3D::operator+=(const Field3DParallel& rhs) { + return (*this) += rhs.asField3D(); } -inline Field3D operator/(const Field3DParallel& lhs, const Field2D& rhs) { - return lhs.asField3D() / rhs; + +inline Field3D& Field3D::operator-=(const Field3DParallel& rhs) { + return (*this) -= rhs.asField3D(); } +template +struct is_expr_field3d> + : std::integral_constant>::value + || is_expr_field3d_v>> {}; + +Field3D operator+(const Field2D& lhs, const Field3DParallel& rhs); +Field3D operator-(const Field2D& lhs, const Field3DParallel& rhs); +Field3D operator*(const Field2D& lhs, const Field3DParallel& rhs); +Field3D operator/(const Field2D& lhs, const Field3DParallel& rhs); + +Field3D operator+(const Field3DParallel& lhs, const Field2D& rhs); +Field3D operator-(const Field3DParallel& lhs, const Field2D& rhs); +Field3D operator*(const Field3DParallel& lhs, const Field2D& rhs); +Field3D operator/(const Field3DParallel& lhs, const Field2D& rhs); + inline Field3DParallel filledFrom(const Field3DParallel& f, const std::function& func) { diff --git a/include/bout/field_accessor.hxx b/include/bout/field_accessor.hxx index 69b58da979..7c0ba2eb7c 100644 --- a/include/bout/field_accessor.hxx +++ b/include/bout/field_accessor.hxx @@ -57,10 +57,16 @@ struct FieldAccessor { /// Constructor from Field3D /// /// @param[in] f The field to access. Must already be allocated - explicit FieldAccessor(FieldType& f) : coords(f.getCoordinates()) { + explicit FieldAccessor(FieldType& f) { ASSERT0(f.getLocation() == location); ASSERT0(f.isAllocated()); + if (auto* Coords = f.getCoordinates()) { + coords = CoordinatesAccessor{Coords}; + } else { + coords = CoordinatesAccessor{}; + } + data = BoutRealArray{&f(0, 0, 0)}; // Field size @@ -81,15 +87,20 @@ struct FieldAccessor { ddt = BoutRealArray{&(f.timeDeriv()->operator()(0, 0, 0))}; } + explicit FieldAccessor(const FieldType& f) : FieldAccessor(const_cast(f)) {} + /// Provide shorthand for access to field data. /// Does not convert between 3D and 2D indices, /// so fa[i] is equivalent to fa.data[i]. /// BOUT_HOST_DEVICE inline const BoutReal& operator[](int ind) const { return data[ind]; } + BOUT_HOST_DEVICE inline BoutReal& operator[](int ind) { return data[ind]; } + BOUT_DEVICE inline BoutReal operator()(int i) const { return data[i]; } BOUT_HOST_DEVICE inline const BoutReal& operator[](const Ind3D& ind) const { return data[ind.ind]; } + BOUT_HOST_DEVICE inline BoutReal& operator[](const Ind3D& ind) { return data[ind.ind]; } // Pointers to the field data arrays // These are wrapped in BoutRealArray types so they can be indexed with Ind3D or int @@ -115,6 +126,9 @@ struct FieldAccessor { template using Field2DAccessor = FieldAccessor; +template +using Field3DAccessor = FieldAccessor; + /// Syntactic sugar for time derivative of a field /// /// Usage: @@ -130,4 +144,28 @@ BOUT_HOST_DEVICE inline BoutRealArray& ddt(const FieldAccessor(fa.ddt); } +struct FieldPerpAccessor { + FieldPerpAccessor() = delete; + + int nx, nz; + int yindex; + BoutReal* data; + + explicit FieldPerpAccessor(const FieldPerp& f) { + ASSERT0(f.isAllocated()); + + data = BoutRealArray{const_cast(&f(0, 0, 0))}; + + // Field size + nx = f.getNx(); + nz = f.getNz(); + + yindex = f.getIndex(); + } + + BOUT_HOST_DEVICE int getIndex() const { return yindex; } + BOUT_HOST_DEVICE inline const BoutReal& operator[](int ind) const { return data[ind]; } + BOUT_HOST_DEVICE inline BoutReal& operator[](int ind) { return data[ind]; } +}; + #endif diff --git a/include/bout/fieldops.hxx b/include/bout/fieldops.hxx new file mode 100644 index 0000000000..53e28042c9 --- /dev/null +++ b/include/bout/fieldops.hxx @@ -0,0 +1,428 @@ +#pragma once +#ifndef BOUT_FIELDOPS_HXX +#define BOUT_FIELDOPS_HXX + +#include "bout/array.hxx" +#include "bout/assert.hxx" +#include "bout/bout_types.hxx" +#include "bout/build_config.hxx" +#include "bout/build_defines.hxx" +#include "bout/region.hxx" + +#include +#include +#include +#include + +#if BOUT_HAS_CUDA +#include +#endif + +class Mesh; +class Field3D; +class Field3DParallel; +class Field2D; +class FieldPerp; + +template +struct is_expr_field2d : std::false_type {}; + +template <> +struct is_expr_field2d : std::true_type {}; + +template +inline constexpr bool is_expr_field2d_v = is_expr_field2d>::value; + +// Base template: nothing is an expression by default +template +struct is_expr_field3d : std::false_type {}; + +template <> +struct is_expr_field3d : std::true_type {}; + +template <> +struct is_expr_field3d : std::true_type {}; + +template +struct is_expr_fieldperp : std::false_type {}; + +template <> +struct is_expr_fieldperp : std::true_type {}; + +template +inline constexpr bool is_expr_fieldperp_v = is_expr_fieldperp>::value; + +// Helper variable template +template +inline constexpr bool is_expr_field3d_v = is_expr_field3d>::value; + +template +struct is_expr_constant : std::bool_constant> {}; + +template +inline constexpr bool is_expr_constant_v = is_expr_constant>::value; + +template +struct is_expr_constant> + : std::integral_constant>> {}; + +constexpr int THREADS = 128; +namespace bout { +namespace op { +struct Assign { + int scale = 1; + int offset = 0; + template + BOUT_HOST_DEVICE void operator()(int idx, BoutReal* out, const Expr& expr) const { + out[(idx * scale) + offset] = expr.lhs(idx) + expr.rhs(idx); + } +}; + +struct Add { + template + BOUT_HOST_DEVICE BOUT_FORCEINLINE BoutReal operator()(int idx, const LView& L, + const RView& R) const { + return L(idx) + R(idx); + } + BOUT_HOST_DEVICE BOUT_FORCEINLINE BoutReal operator()(BoutReal a, BoutReal b) const { + return a + b; + } +}; +struct Sub { + template + BOUT_HOST_DEVICE BOUT_FORCEINLINE BoutReal operator()(int idx, const LView& L, + const RView& R) const { + return L(idx) - R(idx); + } + BOUT_HOST_DEVICE BOUT_FORCEINLINE BoutReal operator()(BoutReal a, BoutReal b) const { + return a - b; + } +}; +struct Mul { + template + BOUT_HOST_DEVICE BOUT_FORCEINLINE BoutReal operator()(int idx, const LView& L, + const RView& R) const { + return L(idx) * R(idx); + } + BOUT_HOST_DEVICE BOUT_FORCEINLINE BoutReal operator()(BoutReal a, BoutReal b) const { + return a * b; + } +}; +struct Div { + template + BOUT_HOST_DEVICE BOUT_FORCEINLINE BoutReal operator()(int idx, const LView& L, + const RView& R) const { + return L(idx) / R(idx); + } + BOUT_HOST_DEVICE BOUT_FORCEINLINE BoutReal operator()(BoutReal a, BoutReal b) const { + return a / b; + } +}; +struct IfElse { + bool condition; + + template + BOUT_HOST_DEVICE BOUT_FORCEINLINE BoutReal operator()(int idx, const LView& L, + const RView& R) const { + return condition ? L(idx) : R(idx); + } + BOUT_HOST_DEVICE BOUT_FORCEINLINE BoutReal operator()(BoutReal a, BoutReal b) const { + return condition ? a : b; + } +}; +}; // namespace op + +namespace reduce { + +struct Min { + struct State { + BoutReal value; + }; + + BOUT_HOST_DEVICE static State identity() { + return {std::numeric_limits::infinity()}; + } + BOUT_HOST_DEVICE static void accumulate(State& state, BoutReal value) { + state.value = value < state.value ? value : state.value; + } + BOUT_HOST_DEVICE static void combine(State& state, const State& other) { + state.value = other.value < state.value ? other.value : state.value; + } + static BoutReal finalize(const State& state) { return state.value; } +}; + +struct Max { + struct State { + BoutReal value; + }; + + BOUT_HOST_DEVICE static State identity() { + return {-std::numeric_limits::infinity()}; + } + BOUT_HOST_DEVICE static void accumulate(State& state, BoutReal value) { + state.value = value > state.value ? value : state.value; + } + BOUT_HOST_DEVICE static void combine(State& state, const State& other) { + state.value = other.value > state.value ? other.value : state.value; + } + static BoutReal finalize(const State& state) { return state.value; } +}; + +struct Mean { + struct State { + BoutReal sum; + int count; + }; + + BOUT_HOST_DEVICE static State identity() { return {0.0, 0}; } + BOUT_HOST_DEVICE static void accumulate(State& state, BoutReal value) { + state.sum += value; + state.count += 1; + } + BOUT_HOST_DEVICE static void combine(State& state, const State& other) { + state.sum += other.sum; + state.count += other.count; + } + static BoutReal finalize(const State& state) { + return state.sum / static_cast(state.count); + } +}; + +} // namespace reduce +}; // namespace bout + +template +struct ReductionView { + ExprView expr; + const int* indices; + int num_indices; + + BOUT_HOST_DEVICE BOUT_FORCEINLINE int size() const { return num_indices; } + BOUT_HOST_DEVICE BOUT_FORCEINLINE BoutReal valueAtRegionPos(int idx) const { + return expr(indices[idx]); + } +}; + +template +ReductionView makeReductionView(const ExprView& expr, + const Array& indices) { + return ReductionView{expr, indices.size() > 0 ? &indices[0] : nullptr, + indices.size()}; +} + +#if BOUT_HAS_CUDA && defined(__CUDACC__) +template +__global__ void __launch_bounds__(THREADS) evaluatorExpr(BoutReal* out, const Expr expr) { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + int e = expr.size(); + + // Out-of-bounds version + if (tid >= e) { + return; + } + int idx = expr.regionIdx(tid); + out[idx] = expr(idx); // single‐pass fusion + + // Grid-strided loop + //int stride = blockDim.x * gridDim.x; + //for (int i = tid; i < e; i += stride) { + // int idx = expr.regionIdx(i); + // out[idx] = expr(idx); // single‐pass fusion + //} +} + +template +__global__ void __launch_bounds__(THREADS) + reducerExpr(typename Reducer::State* partials, const ExprView expr) { + using State = typename Reducer::State; + + __shared__ State shared[THREADS]; + + const int tid = threadIdx.x; + const int global = blockIdx.x * blockDim.x + tid; + const int stride = blockDim.x * gridDim.x; + + State local = Reducer::identity(); + + for (int i = global; i < expr.size(); i += stride) { + Reducer::accumulate(local, expr.valueAtRegionPos(i)); + } + + shared[tid] = local; + __syncthreads(); + + for (int offset = blockDim.x / 2; offset > 0; offset /= 2) { + if (tid < offset) { + Reducer::combine(shared[tid], shared[tid + offset]); + } + __syncthreads(); + } + + if (tid == 0) { + partials[blockIdx.x] = shared[0]; + } +} +#endif + +#if BOUT_HAS_CUDA && defined(__CUDACC__) +struct StreamsRAII { + std::vector streams; + + cudaStream_t get() { + cudaStream_t stream = 0; + + if (streams.empty()) { + if (cudaStreamCreate(&stream) != cudaSuccess) { + throw BoutException("Failed to create CUDA stream"); + } + } else { + stream = streams.back(); + streams.pop_back(); + } + + return stream; + } + + void put(cudaStream_t stream) { streams.push_back(stream); } + + ~StreamsRAII() { + for (auto& stream : streams) { + cudaStreamDestroy(stream); + } + } + + StreamsRAII() = default; + StreamsRAII(const StreamsRAII&) = delete; + StreamsRAII(StreamsRAII&&) = delete; + StreamsRAII& operator=(const StreamsRAII&) = delete; + StreamsRAII& operator=(StreamsRAII&&) = delete; +}; +inline struct StreamsRAII streams; +#endif + +template +auto reduceExpr(const ExprView& expr_view) -> typename Reducer::State { + using State = typename Reducer::State; + + ASSERT1(expr_view.size() > 0); + +#if BOUT_HAS_CUDA && defined(__CUDACC__) + cudaStream_t stream = streams.get(); + int blocks = (expr_view.size() + THREADS - 1) / THREADS; + blocks = blocks < 1024 ? blocks : 1024; + Array partials(blocks); + + reducerExpr<<>>(&partials[0], expr_view); + cudaStreamSynchronize(stream); + streams.put(stream); + + State result = Reducer::identity(); + for (int i = 0; i < blocks; ++i) { + Reducer::combine(result, partials[i]); + } + return result; +#else + State result = Reducer::identity(); + for (int i = 0; i < expr_view.size(); ++i) { + Reducer::accumulate(result, expr_view.valueAtRegionPos(i)); + } + return result; +#endif +} + +template +struct BinaryExpr { + typename L::View lhs; + typename R::View rhs; + Array indices; + Func f; + + Mesh* mesh; + CELL_LOC location = CELL_CENTRE; + DirectionTypes directions; + std::optional regionID; + std::optional yindex; + + template + BinaryExpr(const typename L::View& lhs, const typename R::View& rhs, Func f, Mesh* mesh, + CELL_LOC location, DirectionTypes directions, std::optional regionID, + const Region& region, std::optional yindex = std::nullopt) + : lhs(lhs), rhs(rhs), indices(region.getLinearIndices()), f(f), mesh(mesh), + location(location), directions(directions), regionID(regionID), yindex(yindex) {} + + BinaryExpr(const typename L::View& lhs, const typename R::View& rhs, Func f, Mesh* mesh, + CELL_LOC location, DirectionTypes directions, std::optional regionID, + const Array& indices, std::optional yindex = std::nullopt) + : lhs(lhs), rhs(rhs), indices(indices), f(f), mesh(mesh), location(location), + directions(directions), regionID(regionID), yindex(yindex) {} + + BinaryExpr& operator=(const BinaryExpr&) = delete; + BinaryExpr& operator=(BinaryExpr&&) = delete; + + BOUT_HOST_DEVICE BOUT_FORCEINLINE int size() const { return indices.size(); } + BOUT_HOST_DEVICE BOUT_FORCEINLINE BoutReal operator()(int idx) const { + return f(idx, lhs, rhs); // single‐pass fusion + } + template + BOUT_HOST_DEVICE BOUT_FORCEINLINE auto operator[](const IndType& d) const + -> decltype(d.ind, BoutReal{}) { + if constexpr (std::is_same_v) { + return operator()(d.ind / d.nz); + } else { + return operator()(d.ind); + } + } + BOUT_HOST_DEVICE BOUT_FORCEINLINE int regionIdx(int idx) const { return indices[idx]; } + + //operator ResT() { return ResT{*this}; } + struct View { + typename L::View lhs; + typename R::View rhs; + const int* indices; + int num_indices; + Func f; + int mul = 1; + int div = 1; + + View& setScale(int mul, int div) { + this->mul = mul; + this->div = div; + return *this; + } + BOUT_HOST_DEVICE BOUT_FORCEINLINE int size() const { return num_indices; } + BOUT_HOST_DEVICE BOUT_FORCEINLINE int regionIdx(int idx) const { + return indices[idx]; + } + BOUT_HOST_DEVICE BOUT_FORCEINLINE BoutReal operator()(int idx) const { + return f((idx * mul) / div, lhs, rhs); // single‐pass fusion + //return f(lhs((idx * mul) / div), rhs((idx * mul) / div)); // single‐pass fusion + } + }; + + operator View() { return View{lhs, rhs, &indices[0], indices.size(), f}; } + operator View() const { return View{lhs, rhs, &indices[0], indices.size(), f}; } + + void evaluate(BoutReal* data) const { +#if BOUT_HAS_CUDA && defined(__CUDACC__) + cudaStream_t stream = streams.get(); + int blocks = (size() + THREADS - 1) / THREADS; + evaluatorExpr<<>>(&data[0], static_cast(*this)); + cudaStreamSynchronize(stream); + streams.put(stream); +#else + int e = size(); + for (int i = 0; i < e; ++i) { + int idx = regionIdx(i); + data[idx] = operator()(idx); // single‐pass fusion + } +#endif + } + + Mesh* getMesh() const { return mesh; } + CELL_LOC getLocation() const { return location; } + DirectionTypes getDirections() const { return directions; } + std::optional getRegionID() const { return regionID; }; + int getIndex() const { return yindex.value_or(-1); } +}; + +#endif // BOUT_FIELDSOPS_HXX diff --git a/include/bout/fieldperp.hxx b/include/bout/fieldperp.hxx index bc28cf9ce9..36a116e1b5 100644 --- a/include/bout/fieldperp.hxx +++ b/include/bout/fieldperp.hxx @@ -2,9 +2,9 @@ * Class for 2D X-Z slices * ************************************************************************** - * Copyright 2010 B.D.Dudson, S.Farley, M.V.Umansky, X.Q.Xu + * Copyright 2010 - 2026 BOUT++ contributors * - * Contact: Ben Dudson, bd512@york.ac.uk + * Contact: Ben Dudson, dudson2@llnl.gov * * This file is part of BOUT++. * @@ -23,24 +23,27 @@ * **************************************************************************/ -#include -#include -#include class FieldPerp; #ifndef BOUT_FIELDPERP_H #define BOUT_FIELDPERP_H -#include "bout/field.hxx" - #include "bout/array.hxx" #include "bout/assert.hxx" +#include "bout/bout_types.hxx" +#include "bout/build_config.hxx" +#include "bout/field.hxx" +#include "bout/fieldops.hxx" #include "bout/region.hxx" - #include "bout/unused.hxx" +#include "bout/utils.hxx" +#include +#include #include #include +#include +#include class Field2D; // #include "bout/field2d.hxx" class Field3D; // #include "bout/field3d.hxx" @@ -90,6 +93,15 @@ public: DirectionTypes directions_in = {YDirectionType::Standard, ZDirectionType::Standard}); + template < + typename ResT, typename L, typename R, typename Func, + typename = std::enable_if_t<(is_expr_fieldperp_v && is_expr_fieldperp_v) + || (is_expr_constant_v && is_expr_fieldperp_v) + || (is_expr_fieldperp_v && is_expr_constant_v)>> + FieldPerp(const BinaryExpr& expr) + : FieldPerp(evaluateBinaryExpr(expr), expr.getMesh(), expr.getLocation(), + expr.getIndex(), expr.getDirections()) {} + ~FieldPerp() override = default; /*! @@ -98,6 +110,21 @@ public: FieldPerp& operator=(const FieldPerp& rhs); FieldPerp& operator=(FieldPerp&& rhs) = default; FieldPerp& operator=(BoutReal rhs); + template + std::enable_if_t || is_expr_constant_v, FieldPerp&> + operator=(const BinaryExpr& expr) { + if (!isAllocated() || getMesh() != expr.getMesh()) { + *this = FieldPerp{expr}; + return *this; + } + + setLocation(expr.getLocation()); + setDirections(expr.getDirections()); + setIndex(expr.getIndex()); + allocate(); + expr.evaluate(&data[0]); + return *this; + } /// Return a Region reference to use to iterate over this field const Region& getRegion(REGION region) const; @@ -223,7 +250,7 @@ public: jx, jz, nx, nz); } #endif - return data[jx * nz + jz]; + return data[(jx * nz) + jz]; } /*! @@ -240,7 +267,7 @@ public: jx, jz, nx, nz); } #endif - return data[jx * nz + jz]; + return data[(jx * nz) + jz]; } /*! @@ -309,7 +336,37 @@ public: int size() const override { return nx * nz; }; + struct View { + BoutReal* data; + int mul = 1; + int div = 1; + BOUT_HOST_DEVICE BOUT_FORCEINLINE BoutReal operator()(int idx) const { + return data[(idx * mul) / div]; + } + BOUT_HOST_DEVICE BOUT_FORCEINLINE BoutReal& operator[](int idx) const { + return data[(idx * mul) / div]; + } + + View& setScale(int mul, int div) { + this->mul = mul; + this->div = div; + return *this; + } + }; + operator View() { return View{&data[0]}; } + operator View() const { return View{const_cast(&data[0])}; } + private: + template + static Array evaluateBinaryExpr(const BinaryExpr& expr) { + const auto* mesh = expr.getMesh(); + ASSERT1(mesh != nullptr); + + Array data{mesh->LocalNx * mesh->LocalNz}; + expr.evaluate(&data[0]); + return data; + } + /// The Y index at which this FieldPerp is defined int yindex{-1}; @@ -352,7 +409,18 @@ FieldPerp operator/(BoutReal lhs, const FieldPerp& rhs); * Unary minus. Returns the negative of given field, * iterates over whole domain including guard/boundary cells. */ -FieldPerp operator-(const FieldPerp& f); +inline auto operator-(const FieldPerp& f) { + return BinaryExpr, FieldPerp, bout::op::Mul>{ + static_cast::View>(-1.0), + static_cast(f), + bout::op::Mul{}, + f.getMesh(), + f.getLocation(), + f.getDirections(), + std::nullopt, + f.getRegion("RGN_ALL"), + f.getIndex()}; +} /// Create a FieldPerp by slicing a 3D field at a given y const FieldPerp sliceXZ(const Field3D& f, int y); @@ -396,4 +464,13 @@ bool operator==(const FieldPerp& a, const FieldPerp& b); /// Output a string describing a FieldPerp to a stream std::ostream& operator<<(std::ostream& out, const FieldPerp& value); +template +struct is_expr_fieldperp> + : std::integral_constant> + && is_expr_fieldperp_v>) + || (is_expr_constant_v> + && is_expr_fieldperp_v>) + || (is_expr_fieldperp_v> + && is_expr_constant_v>)> {}; + #endif diff --git a/include/bout/interpolation.hxx b/include/bout/interpolation.hxx index 1753dfe0c0..40ff603712 100644 --- a/include/bout/interpolation.hxx +++ b/include/bout/interpolation.hxx @@ -2,9 +2,9 @@ * Functions to interpolate between cell locations (e.g. lower Y and centred) * ************************************************************************** - * Copyright 2010 B.D.Dudson, S.Farley, M.V.Umansky, X.Q.Xu + * Copyright 2010 - 2026 BOUT++ contributors * - * Contact: Ben Dudson, bd512@york.ac.uk + * Contact: Ben Dudson, dudson2@llnl.gov * * This file is part of BOUT++. * @@ -26,9 +26,19 @@ #ifndef BOUT_INTERP_H #define BOUT_INTERP_H +#include "bout/assert.hxx" +#include "bout/bout_types.hxx" +#include "bout/boutexception.hxx" +#include "bout/field2d.hxx" +#include "bout/field3d.hxx" #include "bout/mesh.hxx" +#include "bout/msg_stack.hxx" +#include "bout/region.hxx" #include "bout/stencils.hxx" +#include +#include + /// Perform interpolation between centre -> shifted or vice-versa /*! Interpolate using 4th-order staggered formula @@ -55,15 +65,15 @@ inline BoutReal interp(const stencil& s) { @param[in] region Region where output will be calculated */ template -const T interp_to(const T& var, CELL_LOC loc, const std::string region = "RGN_ALL") { - +std::enable_if_t || bout::utils::is_Field3D_v, const T> +interp_to(const T& var, CELL_LOC loc, const std::string& region = "RGN_ALL") { static_assert(bout::utils::is_Field2D_v || bout::utils::is_Field3D_v, "interp_to must be templated with one of Field2D or Field3D."); ASSERT1(loc != CELL_DEFAULT); // doesn't make sense to interplote to CELL_DEFAULT Mesh* fieldmesh = var.getMesh(); - if ((loc != CELL_CENTRE) && (fieldmesh->StaggerGrids == false)) { + if ((loc != CELL_CENTRE) && !fieldmesh->StaggerGrids) { throw BoutException("Asked to interpolate, but StaggerGrids is disabled!"); } @@ -72,7 +82,7 @@ const T interp_to(const T& var, CELL_LOC loc, const std::string region = "RGN_AL return var; } - // NOTE: invalidateGuards() is called in Field3D::alloctate() if the data + // NOTE: invalidateGuards() is called in Field3D::allocate() if the data // block is not already allocated, so will be called here if // region==RGN_NOBNDRY T result{emptyFrom(var).setLocation(loc)}; @@ -203,4 +213,16 @@ const T interp_to(const T& var, CELL_LOC loc, const std::string region = "RGN_AL return result; } +template +std::enable_if_t && !bout::utils::is_Field3D_v, const Field3D> +interp_to(const E& expr, CELL_LOC loc, const std::string& rgn = "RGN_ALL") { + return interp_to(Field3D{expr}, loc, rgn); +} + +template +std::enable_if_t && !bout::utils::is_Field2D_v, const Field2D> +interp_to(const E& expr, CELL_LOC loc, const std::string& rgn = "RGN_ALL") { + return interp_to(Field2D{expr}, loc, rgn); +} + #endif // BOUT_INTERP_H diff --git a/include/bout/mesh.hxx b/include/bout/mesh.hxx index 1453f51f5f..929e9d5aa7 100644 --- a/include/bout/mesh.hxx +++ b/include/bout/mesh.hxx @@ -789,6 +789,12 @@ public: return {(indPerp.ind - jz) * LocalNy + LocalNz * jy + jz, LocalNy, LocalNz}; } + BOUT_HOST_DEVICE int flatIndPerpto3D(const int& flatIndPerp, const int nz, + int jy = 0) const { + int jz = flatIndPerp % nz; + return (flatIndPerp - jz) * LocalNy + LocalNz * jy + jz; + } + /// Converts an Ind3D to an Ind2D representing a 2D index using a lookup -- to be used with care Ind2D map3Dto2D(const Ind3D& ind3D) { return {indexLookup3Dto2D[ind3D.ind], LocalNy, 1}; diff --git a/include/bout/options.hxx b/include/bout/options.hxx index 03e95488a8..87503b0a22 100644 --- a/include/bout/options.hxx +++ b/include/bout/options.hxx @@ -74,19 +74,19 @@ class Options; * which can be used as a map. * * Options options; - * + * * // Set values * options["key"] = 1.0; * * // Get values. Throws BoutException if not found - * int val = options["key"]; // Sets val to 1 + * int val = options["key"]; // Sets val to 1 * * // Return as specified type. Throws BoutException if not found * BoutReal var = options["key"].as(); * * // A default value can be used if key is not found * BoutReal value = options["pi"].withDefault(3.14); - * + * * // Assign value with source label. Throws if already has a value from same source * options["newkey"].assign(1.0, "some source"); * @@ -94,7 +94,7 @@ class Options; * options["newkey"].force(2.0, "some source"); * * A legacy interface is also supported: - * + * * options.set("key", 1.0, "code"); // Sets a key from source "code" * * int val; @@ -119,9 +119,9 @@ class Options; * * Each Options object can also contain any number of sections, which are * themselves Options objects. - * + * * Options §ion = options["section"]; - * + * * which can be nested: * * options["section"]["subsection"]["value"] = 3; @@ -134,13 +134,13 @@ class Options; * * e.g. * options->getSection("section")->getSection("subsection")->set("value", 3); - * + * * Options also know about their parents: * * Options &parent = section.parent(); - * + * * or - * + * * Options *parent = section->getParent(); * * Root options object @@ -150,8 +150,8 @@ class Options; * there is a global singleton Options object which can be accessed with a static function * * Options &root = Options::root(); - * - * or + * + * or * * Options *root = Options::getRoot(); * @@ -193,7 +193,7 @@ public: /// @param[in] parent Parent object /// @param[in] sectionName Name of the section, including path from the root Options(Options* parent_instance, std::string full_name) - : parent_instance(parent_instance), full_name(std::move(full_name)){}; + : parent_instance(parent_instance), full_name(std::move(full_name)) {}; /// Initialise with a value /// These enable Options to be constructed using initializer lists @@ -441,6 +441,13 @@ public: return inputvalue; } + template + ResT operator=(const BinaryExpr& expr) { + ResT value{expr}; + assign(value); + return value; + } + /// Assign a value to the option. /// This will throw an exception if already has a value /// diff --git a/include/bout/petsc_interface.hxx b/include/bout/petsc_interface.hxx index 2ce71d0549..830a7f4122 100644 --- a/include/bout/petsc_interface.hxx +++ b/include/bout/petsc_interface.hxx @@ -355,7 +355,7 @@ public: ASSERT2(positions.size() == weights.size()); #if CHECK > 2 for (const auto val : weights) { - ASSERT3(finite(val)); + ASSERT3(std::isfinite(val)); } #endif if (positions.empty()) { @@ -376,25 +376,25 @@ public: if (this == &other) { return *this; } - ASSERT3(finite(static_cast(other))); + ASSERT3(std::isfinite(static_cast(other))); *this = static_cast(other); return *this; } Element& operator=(BoutReal val) { - ASSERT3(finite(val)); + ASSERT3(std::isfinite(val)); value = val; setValues(val, INSERT_VALUES); return *this; } Element& operator+=(BoutReal val) { - ASSERT3(finite(val)); + ASSERT3(std::isfinite(val)); auto columnPosition = std::find(positions.begin(), positions.end(), petscCol); if (columnPosition != positions.end()) { const int index = std::distance(positions.begin(), columnPosition); value += weights[index] * val; - ASSERT3(finite(value)); + ASSERT3(std::isfinite(value)); } setValues(val, ADD_VALUES); return *this; diff --git a/include/bout/rajalib.hxx b/include/bout/rajalib.hxx index b9f6913459..29bab8f23b 100644 --- a/include/bout/rajalib.hxx +++ b/include/bout/rajalib.hxx @@ -14,6 +14,7 @@ */ #pragma once +#include "bout/array.hxx" #ifndef RAJALIB_H #define RAJALIB_H @@ -23,6 +24,15 @@ #include "RAJA/RAJA.hpp" // using RAJA lib +#if BOUT_HAS_CUDA +// TODO: Make configurable +const int CUDA_BLOCK_SIZE = 256; +using EXEC_POL = RAJA::cuda_exec; +//using EXEC_POL = RAJA::loop_exec; +#else // not BOUT_USE_CUDA +using EXEC_POL = RAJA::loop_exec; +#endif // end BOUT_USE_CUDA + /// Wrapper around RAJA::forall /// Enables computations to be done on CPU or GPU (CUDA). /// @@ -81,7 +91,7 @@ struct RajaForAll { // Note: must be a local variable const int* _ob_i_ind_raw = &_ob_i_ind[0]; RAJA::forall(RAJA::RangeSegment(0, _ob_i_ind.size()), - [=] RAJA_DEVICE(int id) { + [=] RAJA_DEVICE(int id) mutable { // Look up index and call user function f(_ob_i_ind_raw[id]); }); @@ -127,7 +137,7 @@ private: /// to create variables which shadow the class members. /// #define BOUT_FOR_RAJA(index, region, ...) \ - RajaForAll(region) << [ =, ##__VA_ARGS__ ] RAJA_DEVICE(int index) + RajaForAll(region) << [ =, ##__VA_ARGS__ ] RAJA_DEVICE(int index) mutable #else // BOUT_HAS_RAJA diff --git a/include/bout/region.hxx b/include/bout/region.hxx index e00ad6d41d..88829fc65e 100644 --- a/include/bout/region.hxx +++ b/include/bout/region.hxx @@ -49,6 +49,7 @@ #include #include +#include "bout/array.hxx" #include "bout/assert.hxx" #include "bout/bout_types.hxx" #include "bout/boutexception.hxx" @@ -170,8 +171,8 @@ struct SpecificInd { int ny = -1, nz = -1; ///< Sizes of y and z dimensions SpecificInd() = default; - SpecificInd(int i, int ny, int nz) : ind(i), ny(ny), nz(nz){}; - explicit SpecificInd(int i) : ind(i){}; + SpecificInd(int i, int ny, int nz) : ind(i), ny(ny), nz(nz) {}; + explicit SpecificInd(int i) : ind(i) {}; /// Allow explicit conversion to an int explicit operator int() const { return ind; } @@ -491,10 +492,9 @@ template class Region { // Following prevents a Region being created with anything other // than Ind2D, Ind3D or IndPerp as template type - static_assert( - std::is_base_of_v< - Ind2D, T> || std::is_base_of_v || std::is_base_of_v, - "Region must be templated with one of IndPerp, Ind2D or Ind3D"); + static_assert(std::is_base_of_v || std::is_base_of_v + || std::is_base_of_v, + "Region must be templated with one of IndPerp, Ind2D or Ind3D"); public: using data_type = T; @@ -570,7 +570,7 @@ public: }; Region(RegionIndices& indices, int maxregionblocksize = MAXREGIONBLOCKSIZE) - : indices(indices), blocks(getContiguousBlocks(maxregionblocksize)){}; + : indices(indices), blocks(getContiguousBlocks(maxregionblocksize)) {}; // We need to first set the blocks, and only after that call getRegionIndices. // Do not put in the member initialisation @@ -595,17 +595,28 @@ public: const ContiguousBlocks& getBlocks() const { return blocks; }; const RegionIndices& getIndices() const { return indices; }; + const Array& getLinearIndices() const { + if (linearIndices.empty()) { + linearIndices = Array(indices.size()); + for (size_type i = 0; i < indices.size(); ++i) { + linearIndices[i] = indices[i].ind; + } + } + return linearIndices; + } /// Set the indices and ensure blocks updated void setIndices(RegionIndices& indicesIn, int maxregionblocksize = MAXREGIONBLOCKSIZE) { indices = indicesIn; blocks = getContiguousBlocks(maxregionblocksize); + invalidateLinearIndices(); }; /// Set the blocks and ensure indices updated void setBlocks(ContiguousBlocks& blocksIn) { blocks = blocksIn; indices = getRegionIndices(); + invalidateLinearIndices(); }; /// Return a new Region that has the same indices as this one but @@ -829,10 +840,13 @@ public: // sorted this would prevent this usage. private: - RegionIndices indices; //< Flattened indices - ContiguousBlocks blocks; //< Contiguous sections of flattened indices - int ny = -1; //< Size of y dimension - int nz = -1; //< Size of z dimension + RegionIndices indices; //< Flattened indices + ContiguousBlocks blocks; //< Contiguous sections of flattened indices + int ny = -1; //< Size of y dimension + int nz = -1; //< Size of z dimension + mutable Array linearIndices; //< Cached flattened integer indices + + void invalidateLinearIndices() const { linearIndices.clear(); } /// Helper function to create a RegionIndices, given the start and end /// points in x, y, z, and the total y, z lengths diff --git a/include/bout/single_index_ops.hxx b/include/bout/single_index_ops.hxx index 60bd78bc36..c29d1a471f 100644 --- a/include/bout/single_index_ops.hxx +++ b/include/bout/single_index_ops.hxx @@ -7,17 +7,6 @@ #include "field_accessor.hxx" -#if BOUT_HAS_RAJA -//-- RAJA CUDA settings--------------------------------------------------------start -#if BOUT_HAS_CUDA -const int CUDA_BLOCK_SIZE = 256; // TODO: Make configurable -using EXEC_POL = RAJA::cuda_exec; -#else // not BOUT_USE_CUDA -using EXEC_POL = RAJA::loop_exec; -#endif // end BOUT_USE_CUDA -////-----------CUDA settings------------------------------------------------------end -#endif // end BOUT_HAS_RAJA - // Ind3D: i.zp(): BOUT_HOST_DEVICE inline int i_zp(const int id, const int nz) { int jz = id % nz; diff --git a/include/bout/twiddle.hxx b/include/bout/twiddle.hxx new file mode 100644 index 0000000000..6da72dd8ff --- /dev/null +++ b/include/bout/twiddle.hxx @@ -0,0 +1,1025 @@ +__constant__ double2 c_twiddle_16[16] = { + {1.0000000000000000, -0.0000000000000000}, // k=0 + {0.9238795325112867, -0.3826834323650898}, // k=1 + {0.7071067811865476, -0.7071067811865475}, // k=2 + {0.3826834323650898, -0.9238795325112867}, // k=3 + {0.0000000000000001, -1.0000000000000000}, // k=4 + {-0.3826834323650897, -0.9238795325112867}, // k=5 + {-0.7071067811865475, -0.7071067811865476}, // k=6 + {-0.9238795325112867, -0.3826834323650899}, // k=7 + {-1.0000000000000000, -0.0000000000000001}, // k=8 + {-0.9238795325112868, 0.3826834323650897}, // k=9 + {-0.7071067811865477, 0.7071067811865475}, // k=10 + {-0.3826834323650903, 0.9238795325112865}, // k=11 + {-0.0000000000000002, 1.0000000000000000}, // k=12 + {0.3826834323650900, 0.9238795325112866}, // k=13 + {0.7071067811865474, 0.7071067811865477}, // k=14 + {0.9238795325112865, 0.3826834323650904}, // k=15 +}; + +__constant__ double2 c_twiddle_32[32] = { + {1.0000000000000000, -0.0000000000000000}, // k=0 + {0.9807852804032304, -0.1950903220161282}, // k=1 + {0.9238795325112867, -0.3826834323650898}, // k=2 + {0.8314696123025452, -0.5555702330196022}, // k=3 + {0.7071067811865476, -0.7071067811865475}, // k=4 + {0.5555702330196023, -0.8314696123025452}, // k=5 + {0.3826834323650898, -0.9238795325112867}, // k=6 + {0.1950903220161283, -0.9807852804032304}, // k=7 + {0.0000000000000001, -1.0000000000000000}, // k=8 + {-0.1950903220161282, -0.9807852804032304}, // k=9 + {-0.3826834323650897, -0.9238795325112867}, // k=10 + {-0.5555702330196020, -0.8314696123025455}, // k=11 + {-0.7071067811865475, -0.7071067811865476}, // k=12 + {-0.8314696123025453, -0.5555702330196022}, // k=13 + {-0.9238795325112867, -0.3826834323650899}, // k=14 + {-0.9807852804032304, -0.1950903220161286}, // k=15 + {-1.0000000000000000, -0.0000000000000001}, // k=16 + {-0.9807852804032304, 0.1950903220161284}, // k=17 + {-0.9238795325112868, 0.3826834323650897}, // k=18 + {-0.8314696123025455, 0.5555702330196020}, // k=19 + {-0.7071067811865477, 0.7071067811865475}, // k=20 + {-0.5555702330196022, 0.8314696123025452}, // k=21 + {-0.3826834323650903, 0.9238795325112865}, // k=22 + {-0.1950903220161287, 0.9807852804032303}, // k=23 + {-0.0000000000000002, 1.0000000000000000}, // k=24 + {0.1950903220161283, 0.9807852804032304}, // k=25 + {0.3826834323650900, 0.9238795325112866}, // k=26 + {0.5555702330196018, 0.8314696123025455}, // k=27 + {0.7071067811865474, 0.7071067811865477}, // k=28 + {0.8314696123025452, 0.5555702330196022}, // k=29 + {0.9238795325112865, 0.3826834323650904}, // k=30 + {0.9807852804032303, 0.1950903220161287}, // k=31 +}; + +__constant__ double2 c_twiddle_64[64] = { + {1.0000000000000000, -0.0000000000000000}, // k=0 + {0.9951847266721969, -0.0980171403295606}, // k=1 + {0.9807852804032304, -0.1950903220161282}, // k=2 + {0.9569403357322088, -0.2902846772544623}, // k=3 + {0.9238795325112867, -0.3826834323650898}, // k=4 + {0.8819212643483550, -0.4713967368259976}, // k=5 + {0.8314696123025452, -0.5555702330196022}, // k=6 + {0.7730104533627370, -0.6343932841636455}, // k=7 + {0.7071067811865476, -0.7071067811865475}, // k=8 + {0.6343932841636455, -0.7730104533627370}, // k=9 + {0.5555702330196023, -0.8314696123025452}, // k=10 + {0.4713967368259978, -0.8819212643483549}, // k=11 + {0.3826834323650898, -0.9238795325112867}, // k=12 + {0.2902846772544623, -0.9569403357322089}, // k=13 + {0.1950903220161283, -0.9807852804032304}, // k=14 + {0.0980171403295608, -0.9951847266721968}, // k=15 + {0.0000000000000001, -1.0000000000000000}, // k=16 + {-0.0980171403295606, -0.9951847266721969}, // k=17 + {-0.1950903220161282, -0.9807852804032304}, // k=18 + {-0.2902846772544622, -0.9569403357322089}, // k=19 + {-0.3826834323650897, -0.9238795325112867}, // k=20 + {-0.4713967368259977, -0.8819212643483550}, // k=21 + {-0.5555702330196020, -0.8314696123025455}, // k=22 + {-0.6343932841636454, -0.7730104533627371}, // k=23 + {-0.7071067811865475, -0.7071067811865476}, // k=24 + {-0.7730104533627370, -0.6343932841636455}, // k=25 + {-0.8314696123025453, -0.5555702330196022}, // k=26 + {-0.8819212643483549, -0.4713967368259979}, // k=27 + {-0.9238795325112867, -0.3826834323650899}, // k=28 + {-0.9569403357322088, -0.2902846772544624}, // k=29 + {-0.9807852804032304, -0.1950903220161286}, // k=30 + {-0.9951847266721968, -0.0980171403295608}, // k=31 + {-1.0000000000000000, -0.0000000000000001}, // k=32 + {-0.9951847266721969, 0.0980171403295606}, // k=33 + {-0.9807852804032304, 0.1950903220161284}, // k=34 + {-0.9569403357322089, 0.2902846772544621}, // k=35 + {-0.9238795325112868, 0.3826834323650897}, // k=36 + {-0.8819212643483550, 0.4713967368259976}, // k=37 + {-0.8314696123025455, 0.5555702330196020}, // k=38 + {-0.7730104533627371, 0.6343932841636453}, // k=39 + {-0.7071067811865477, 0.7071067811865475}, // k=40 + {-0.6343932841636459, 0.7730104533627367}, // k=41 + {-0.5555702330196022, 0.8314696123025452}, // k=42 + {-0.4713967368259979, 0.8819212643483549}, // k=43 + {-0.3826834323650903, 0.9238795325112865}, // k=44 + {-0.2902846772544624, 0.9569403357322088}, // k=45 + {-0.1950903220161287, 0.9807852804032303}, // k=46 + {-0.0980171403295605, 0.9951847266721969}, // k=47 + {-0.0000000000000002, 1.0000000000000000}, // k=48 + {0.0980171403295601, 0.9951847266721969}, // k=49 + {0.1950903220161283, 0.9807852804032304}, // k=50 + {0.2902846772544621, 0.9569403357322089}, // k=51 + {0.3826834323650900, 0.9238795325112866}, // k=52 + {0.4713967368259976, 0.8819212643483550}, // k=53 + {0.5555702330196018, 0.8314696123025455}, // k=54 + {0.6343932841636456, 0.7730104533627369}, // k=55 + {0.7071067811865474, 0.7071067811865477}, // k=56 + {0.7730104533627367, 0.6343932841636459}, // k=57 + {0.8314696123025452, 0.5555702330196022}, // k=58 + {0.8819212643483548, 0.4713967368259979}, // k=59 + {0.9238795325112865, 0.3826834323650904}, // k=60 + {0.9569403357322088, 0.2902846772544625}, // k=61 + {0.9807852804032303, 0.1950903220161287}, // k=62 + {0.9951847266721969, 0.0980171403295605}, // k=63 +}; + +__constant__ double2 c_twiddle_128[128] = { + {1.0000000000000000, -0.0000000000000000}, // k=0 + {0.9987954562051724, -0.0490676743274180}, // k=1 + {0.9951847266721969, -0.0980171403295606}, // k=2 + {0.9891765099647810, -0.1467304744553617}, // k=3 + {0.9807852804032304, -0.1950903220161282}, // k=4 + {0.9700312531945440, -0.2429801799032639}, // k=5 + {0.9569403357322088, -0.2902846772544623}, // k=6 + {0.9415440651830208, -0.3368898533922201}, // k=7 + {0.9238795325112867, -0.3826834323650898}, // k=8 + {0.9039892931234433, -0.4275550934302821}, // k=9 + {0.8819212643483550, -0.4713967368259976}, // k=10 + {0.8577286100002721, -0.5141027441932217}, // k=11 + {0.8314696123025452, -0.5555702330196022}, // k=12 + {0.8032075314806449, -0.5956993044924334}, // k=13 + {0.7730104533627370, -0.6343932841636455}, // k=14 + {0.7409511253549591, -0.6715589548470183}, // k=15 + {0.7071067811865476, -0.7071067811865475}, // k=16 + {0.6715589548470183, -0.7409511253549591}, // k=17 + {0.6343932841636455, -0.7730104533627370}, // k=18 + {0.5956993044924335, -0.8032075314806448}, // k=19 + {0.5555702330196023, -0.8314696123025452}, // k=20 + {0.5141027441932217, -0.8577286100002721}, // k=21 + {0.4713967368259978, -0.8819212643483549}, // k=22 + {0.4275550934302822, -0.9039892931234433}, // k=23 + {0.3826834323650898, -0.9238795325112867}, // k=24 + {0.3368898533922201, -0.9415440651830208}, // k=25 + {0.2902846772544623, -0.9569403357322089}, // k=26 + {0.2429801799032640, -0.9700312531945440}, // k=27 + {0.1950903220161283, -0.9807852804032304}, // k=28 + {0.1467304744553617, -0.9891765099647810}, // k=29 + {0.0980171403295608, -0.9951847266721968}, // k=30 + {0.0490676743274181, -0.9987954562051724}, // k=31 + {0.0000000000000001, -1.0000000000000000}, // k=32 + {-0.0490676743274180, -0.9987954562051724}, // k=33 + {-0.0980171403295606, -0.9951847266721969}, // k=34 + {-0.1467304744553616, -0.9891765099647810}, // k=35 + {-0.1950903220161282, -0.9807852804032304}, // k=36 + {-0.2429801799032639, -0.9700312531945440}, // k=37 + {-0.2902846772544622, -0.9569403357322089}, // k=38 + {-0.3368898533922199, -0.9415440651830208}, // k=39 + {-0.3826834323650897, -0.9238795325112867}, // k=40 + {-0.4275550934302819, -0.9039892931234434}, // k=41 + {-0.4713967368259977, -0.8819212643483550}, // k=42 + {-0.5141027441932217, -0.8577286100002721}, // k=43 + {-0.5555702330196020, -0.8314696123025455}, // k=44 + {-0.5956993044924334, -0.8032075314806449}, // k=45 + {-0.6343932841636454, -0.7730104533627371}, // k=46 + {-0.6715589548470184, -0.7409511253549590}, // k=47 + {-0.7071067811865475, -0.7071067811865476}, // k=48 + {-0.7409511253549589, -0.6715589548470186}, // k=49 + {-0.7730104533627370, -0.6343932841636455}, // k=50 + {-0.8032075314806448, -0.5956993044924335}, // k=51 + {-0.8314696123025453, -0.5555702330196022}, // k=52 + {-0.8577286100002720, -0.5141027441932218}, // k=53 + {-0.8819212643483549, -0.4713967368259979}, // k=54 + {-0.9039892931234433, -0.4275550934302820}, // k=55 + {-0.9238795325112867, -0.3826834323650899}, // k=56 + {-0.9415440651830207, -0.3368898533922203}, // k=57 + {-0.9569403357322088, -0.2902846772544624}, // k=58 + {-0.9700312531945440, -0.2429801799032641}, // k=59 + {-0.9807852804032304, -0.1950903220161286}, // k=60 + {-0.9891765099647810, -0.1467304744553618}, // k=61 + {-0.9951847266721968, -0.0980171403295608}, // k=62 + {-0.9987954562051724, -0.0490676743274180}, // k=63 + {-1.0000000000000000, -0.0000000000000001}, // k=64 + {-0.9987954562051724, 0.0490676743274177}, // k=65 + {-0.9951847266721969, 0.0980171403295606}, // k=66 + {-0.9891765099647810, 0.1467304744553616}, // k=67 + {-0.9807852804032304, 0.1950903220161284}, // k=68 + {-0.9700312531945440, 0.2429801799032638}, // k=69 + {-0.9569403357322089, 0.2902846772544621}, // k=70 + {-0.9415440651830208, 0.3368898533922201}, // k=71 + {-0.9238795325112868, 0.3826834323650897}, // k=72 + {-0.9039892931234434, 0.4275550934302818}, // k=73 + {-0.8819212643483550, 0.4713967368259976}, // k=74 + {-0.8577286100002721, 0.5141027441932216}, // k=75 + {-0.8314696123025455, 0.5555702330196020}, // k=76 + {-0.8032075314806449, 0.5956993044924332}, // k=77 + {-0.7730104533627371, 0.6343932841636453}, // k=78 + {-0.7409511253549591, 0.6715589548470184}, // k=79 + {-0.7071067811865477, 0.7071067811865475}, // k=80 + {-0.6715589548470187, 0.7409511253549589}, // k=81 + {-0.6343932841636459, 0.7730104533627367}, // k=82 + {-0.5956993044924331, 0.8032075314806451}, // k=83 + {-0.5555702330196022, 0.8314696123025452}, // k=84 + {-0.5141027441932218, 0.8577286100002720}, // k=85 + {-0.4713967368259979, 0.8819212643483549}, // k=86 + {-0.4275550934302825, 0.9039892931234431}, // k=87 + {-0.3826834323650903, 0.9238795325112865}, // k=88 + {-0.3368898533922199, 0.9415440651830208}, // k=89 + {-0.2902846772544624, 0.9569403357322088}, // k=90 + {-0.2429801799032641, 0.9700312531945440}, // k=91 + {-0.1950903220161287, 0.9807852804032303}, // k=92 + {-0.1467304744553623, 0.9891765099647809}, // k=93 + {-0.0980171403295605, 0.9951847266721969}, // k=94 + {-0.0490676743274180, 0.9987954562051724}, // k=95 + {-0.0000000000000002, 1.0000000000000000}, // k=96 + {0.0490676743274177, 0.9987954562051724}, // k=97 + {0.0980171403295601, 0.9951847266721969}, // k=98 + {0.1467304744553619, 0.9891765099647809}, // k=99 + {0.1950903220161283, 0.9807852804032304}, // k=100 + {0.2429801799032638, 0.9700312531945440}, // k=101 + {0.2902846772544621, 0.9569403357322089}, // k=102 + {0.3368898533922196, 0.9415440651830209}, // k=103 + {0.3826834323650900, 0.9238795325112866}, // k=104 + {0.4275550934302821, 0.9039892931234433}, // k=105 + {0.4713967368259976, 0.8819212643483550}, // k=106 + {0.5141027441932216, 0.8577286100002722}, // k=107 + {0.5555702330196018, 0.8314696123025455}, // k=108 + {0.5956993044924329, 0.8032075314806453}, // k=109 + {0.6343932841636456, 0.7730104533627369}, // k=110 + {0.6715589548470183, 0.7409511253549591}, // k=111 + {0.7071067811865474, 0.7071067811865477}, // k=112 + {0.7409511253549589, 0.6715589548470187}, // k=113 + {0.7730104533627367, 0.6343932841636459}, // k=114 + {0.8032075314806451, 0.5956993044924332}, // k=115 + {0.8314696123025452, 0.5555702330196022}, // k=116 + {0.8577286100002720, 0.5141027441932219}, // k=117 + {0.8819212643483548, 0.4713967368259979}, // k=118 + {0.9039892931234431, 0.4275550934302825}, // k=119 + {0.9238795325112865, 0.3826834323650904}, // k=120 + {0.9415440651830208, 0.3368898533922200}, // k=121 + {0.9569403357322088, 0.2902846772544625}, // k=122 + {0.9700312531945440, 0.2429801799032642}, // k=123 + {0.9807852804032303, 0.1950903220161287}, // k=124 + {0.9891765099647809, 0.1467304744553624}, // k=125 + {0.9951847266721969, 0.0980171403295605}, // k=126 + {0.9987954562051724, 0.0490676743274181}, // k=127 +}; + +__constant__ double2 c_twiddle_256[256] = { + {1.0000000000000000, -0.0000000000000000}, // k=0 + {0.9996988186962042, -0.0245412285229123}, // k=1 + {0.9987954562051724, -0.0490676743274180}, // k=2 + {0.9972904566786902, -0.0735645635996674}, // k=3 + {0.9951847266721969, -0.0980171403295606}, // k=4 + {0.9924795345987100, -0.1224106751992162}, // k=5 + {0.9891765099647810, -0.1467304744553617}, // k=6 + {0.9852776423889412, -0.1709618887603012}, // k=7 + {0.9807852804032304, -0.1950903220161282}, // k=8 + {0.9757021300385286, -0.2191012401568698}, // k=9 + {0.9700312531945440, -0.2429801799032639}, // k=10 + {0.9637760657954398, -0.2667127574748984}, // k=11 + {0.9569403357322088, -0.2902846772544623}, // k=12 + {0.9495281805930367, -0.3136817403988915}, // k=13 + {0.9415440651830208, -0.3368898533922201}, // k=14 + {0.9329927988347390, -0.3598950365349881}, // k=15 + {0.9238795325112867, -0.3826834323650898}, // k=16 + {0.9142097557035307, -0.4052413140049899}, // k=17 + {0.9039892931234433, -0.4275550934302821}, // k=18 + {0.8932243011955153, -0.4496113296546065}, // k=19 + {0.8819212643483550, -0.4713967368259976}, // k=20 + {0.8700869911087115, -0.4928981922297840}, // k=21 + {0.8577286100002721, -0.5141027441932217}, // k=22 + {0.8448535652497071, -0.5349976198870972}, // k=23 + {0.8314696123025452, -0.5555702330196022}, // k=24 + {0.8175848131515837, -0.5758081914178453}, // k=25 + {0.8032075314806449, -0.5956993044924334}, // k=26 + {0.7883464276266063, -0.6152315905806268}, // k=27 + {0.7730104533627370, -0.6343932841636455}, // k=28 + {0.7572088465064846, -0.6531728429537768}, // k=29 + {0.7409511253549591, -0.6715589548470183}, // k=30 + {0.7242470829514670, -0.6895405447370668}, // k=31 + {0.7071067811865476, -0.7071067811865475}, // k=32 + {0.6895405447370669, -0.7242470829514669}, // k=33 + {0.6715589548470183, -0.7409511253549591}, // k=34 + {0.6531728429537768, -0.7572088465064845}, // k=35 + {0.6343932841636455, -0.7730104533627370}, // k=36 + {0.6152315905806268, -0.7883464276266062}, // k=37 + {0.5956993044924335, -0.8032075314806448}, // k=38 + {0.5758081914178453, -0.8175848131515837}, // k=39 + {0.5555702330196023, -0.8314696123025452}, // k=40 + {0.5349976198870973, -0.8448535652497070}, // k=41 + {0.5141027441932217, -0.8577286100002721}, // k=42 + {0.4928981922297841, -0.8700869911087113}, // k=43 + {0.4713967368259978, -0.8819212643483549}, // k=44 + {0.4496113296546066, -0.8932243011955153}, // k=45 + {0.4275550934302822, -0.9039892931234433}, // k=46 + {0.4052413140049899, -0.9142097557035307}, // k=47 + {0.3826834323650898, -0.9238795325112867}, // k=48 + {0.3598950365349883, -0.9329927988347388}, // k=49 + {0.3368898533922201, -0.9415440651830208}, // k=50 + {0.3136817403988916, -0.9495281805930367}, // k=51 + {0.2902846772544623, -0.9569403357322089}, // k=52 + {0.2667127574748984, -0.9637760657954398}, // k=53 + {0.2429801799032640, -0.9700312531945440}, // k=54 + {0.2191012401568698, -0.9757021300385286}, // k=55 + {0.1950903220161283, -0.9807852804032304}, // k=56 + {0.1709618887603014, -0.9852776423889412}, // k=57 + {0.1467304744553617, -0.9891765099647810}, // k=58 + {0.1224106751992163, -0.9924795345987100}, // k=59 + {0.0980171403295608, -0.9951847266721968}, // k=60 + {0.0735645635996675, -0.9972904566786902}, // k=61 + {0.0490676743274181, -0.9987954562051724}, // k=62 + {0.0245412285229123, -0.9996988186962042}, // k=63 + {0.0000000000000001, -1.0000000000000000}, // k=64 + {-0.0245412285229121, -0.9996988186962042}, // k=65 + {-0.0490676743274180, -0.9987954562051724}, // k=66 + {-0.0735645635996673, -0.9972904566786902}, // k=67 + {-0.0980171403295606, -0.9951847266721969}, // k=68 + {-0.1224106751992162, -0.9924795345987100}, // k=69 + {-0.1467304744553616, -0.9891765099647810}, // k=70 + {-0.1709618887603012, -0.9852776423889412}, // k=71 + {-0.1950903220161282, -0.9807852804032304}, // k=72 + {-0.2191012401568697, -0.9757021300385286}, // k=73 + {-0.2429801799032639, -0.9700312531945440}, // k=74 + {-0.2667127574748983, -0.9637760657954398}, // k=75 + {-0.2902846772544622, -0.9569403357322089}, // k=76 + {-0.3136817403988914, -0.9495281805930367}, // k=77 + {-0.3368898533922199, -0.9415440651830208}, // k=78 + {-0.3598950365349882, -0.9329927988347388}, // k=79 + {-0.3826834323650897, -0.9238795325112867}, // k=80 + {-0.4052413140049897, -0.9142097557035307}, // k=81 + {-0.4275550934302819, -0.9039892931234434}, // k=82 + {-0.4496113296546067, -0.8932243011955152}, // k=83 + {-0.4713967368259977, -0.8819212643483550}, // k=84 + {-0.4928981922297840, -0.8700869911087115}, // k=85 + {-0.5141027441932217, -0.8577286100002721}, // k=86 + {-0.5349976198870970, -0.8448535652497072}, // k=87 + {-0.5555702330196020, -0.8314696123025455}, // k=88 + {-0.5758081914178453, -0.8175848131515837}, // k=89 + {-0.5956993044924334, -0.8032075314806449}, // k=90 + {-0.6152315905806267, -0.7883464276266063}, // k=91 + {-0.6343932841636454, -0.7730104533627371}, // k=92 + {-0.6531728429537765, -0.7572088465064847}, // k=93 + {-0.6715589548470184, -0.7409511253549590}, // k=94 + {-0.6895405447370669, -0.7242470829514669}, // k=95 + {-0.7071067811865475, -0.7071067811865476}, // k=96 + {-0.7242470829514668, -0.6895405447370671}, // k=97 + {-0.7409511253549589, -0.6715589548470186}, // k=98 + {-0.7572088465064846, -0.6531728429537766}, // k=99 + {-0.7730104533627370, -0.6343932841636455}, // k=100 + {-0.7883464276266062, -0.6152315905806269}, // k=101 + {-0.8032075314806448, -0.5956993044924335}, // k=102 + {-0.8175848131515836, -0.5758081914178454}, // k=103 + {-0.8314696123025453, -0.5555702330196022}, // k=104 + {-0.8448535652497071, -0.5349976198870972}, // k=105 + {-0.8577286100002720, -0.5141027441932218}, // k=106 + {-0.8700869911087113, -0.4928981922297841}, // k=107 + {-0.8819212643483549, -0.4713967368259979}, // k=108 + {-0.8932243011955152, -0.4496113296546069}, // k=109 + {-0.9039892931234433, -0.4275550934302820}, // k=110 + {-0.9142097557035307, -0.4052413140049899}, // k=111 + {-0.9238795325112867, -0.3826834323650899}, // k=112 + {-0.9329927988347388, -0.3598950365349883}, // k=113 + {-0.9415440651830207, -0.3368898533922203}, // k=114 + {-0.9495281805930367, -0.3136817403988914}, // k=115 + {-0.9569403357322088, -0.2902846772544624}, // k=116 + {-0.9637760657954398, -0.2667127574748985}, // k=117 + {-0.9700312531945440, -0.2429801799032641}, // k=118 + {-0.9757021300385285, -0.2191012401568700}, // k=119 + {-0.9807852804032304, -0.1950903220161286}, // k=120 + {-0.9852776423889412, -0.1709618887603012}, // k=121 + {-0.9891765099647810, -0.1467304744553618}, // k=122 + {-0.9924795345987100, -0.1224106751992163}, // k=123 + {-0.9951847266721968, -0.0980171403295608}, // k=124 + {-0.9972904566786902, -0.0735645635996677}, // k=125 + {-0.9987954562051724, -0.0490676743274180}, // k=126 + {-0.9996988186962042, -0.0245412285229123}, // k=127 + {-1.0000000000000000, -0.0000000000000001}, // k=128 + {-0.9996988186962042, 0.0245412285229121}, // k=129 + {-0.9987954562051724, 0.0490676743274177}, // k=130 + {-0.9972904566786902, 0.0735645635996675}, // k=131 + {-0.9951847266721969, 0.0980171403295606}, // k=132 + {-0.9924795345987100, 0.1224106751992161}, // k=133 + {-0.9891765099647810, 0.1467304744553616}, // k=134 + {-0.9852776423889413, 0.1709618887603010}, // k=135 + {-0.9807852804032304, 0.1950903220161284}, // k=136 + {-0.9757021300385286, 0.2191012401568698}, // k=137 + {-0.9700312531945440, 0.2429801799032638}, // k=138 + {-0.9637760657954400, 0.2667127574748983}, // k=139 + {-0.9569403357322089, 0.2902846772544621}, // k=140 + {-0.9495281805930368, 0.3136817403988912}, // k=141 + {-0.9415440651830208, 0.3368898533922201}, // k=142 + {-0.9329927988347390, 0.3598950365349881}, // k=143 + {-0.9238795325112868, 0.3826834323650897}, // k=144 + {-0.9142097557035307, 0.4052413140049897}, // k=145 + {-0.9039892931234434, 0.4275550934302818}, // k=146 + {-0.8932243011955153, 0.4496113296546067}, // k=147 + {-0.8819212643483550, 0.4713967368259976}, // k=148 + {-0.8700869911087115, 0.4928981922297839}, // k=149 + {-0.8577286100002721, 0.5141027441932216}, // k=150 + {-0.8448535652497072, 0.5349976198870969}, // k=151 + {-0.8314696123025455, 0.5555702330196020}, // k=152 + {-0.8175848131515837, 0.5758081914178453}, // k=153 + {-0.8032075314806449, 0.5956993044924332}, // k=154 + {-0.7883464276266063, 0.6152315905806267}, // k=155 + {-0.7730104533627371, 0.6343932841636453}, // k=156 + {-0.7572088465064848, 0.6531728429537765}, // k=157 + {-0.7409511253549591, 0.6715589548470184}, // k=158 + {-0.7242470829514670, 0.6895405447370668}, // k=159 + {-0.7071067811865477, 0.7071067811865475}, // k=160 + {-0.6895405447370671, 0.7242470829514668}, // k=161 + {-0.6715589548470187, 0.7409511253549589}, // k=162 + {-0.6531728429537771, 0.7572088465064842}, // k=163 + {-0.6343932841636459, 0.7730104533627367}, // k=164 + {-0.6152315905806273, 0.7883464276266059}, // k=165 + {-0.5956993044924331, 0.8032075314806451}, // k=166 + {-0.5758081914178452, 0.8175848131515838}, // k=167 + {-0.5555702330196022, 0.8314696123025452}, // k=168 + {-0.5349976198870973, 0.8448535652497070}, // k=169 + {-0.5141027441932218, 0.8577286100002720}, // k=170 + {-0.4928981922297842, 0.8700869911087113}, // k=171 + {-0.4713967368259979, 0.8819212643483549}, // k=172 + {-0.4496113296546069, 0.8932243011955152}, // k=173 + {-0.4275550934302825, 0.9039892931234431}, // k=174 + {-0.4052413140049904, 0.9142097557035305}, // k=175 + {-0.3826834323650903, 0.9238795325112865}, // k=176 + {-0.3598950365349879, 0.9329927988347390}, // k=177 + {-0.3368898533922199, 0.9415440651830208}, // k=178 + {-0.3136817403988915, 0.9495281805930367}, // k=179 + {-0.2902846772544624, 0.9569403357322088}, // k=180 + {-0.2667127574748985, 0.9637760657954398}, // k=181 + {-0.2429801799032641, 0.9700312531945440}, // k=182 + {-0.2191012401568701, 0.9757021300385285}, // k=183 + {-0.1950903220161287, 0.9807852804032303}, // k=184 + {-0.1709618887603017, 0.9852776423889411}, // k=185 + {-0.1467304744553623, 0.9891765099647809}, // k=186 + {-0.1224106751992160, 0.9924795345987101}, // k=187 + {-0.0980171403295605, 0.9951847266721969}, // k=188 + {-0.0735645635996674, 0.9972904566786902}, // k=189 + {-0.0490676743274180, 0.9987954562051724}, // k=190 + {-0.0245412285229124, 0.9996988186962042}, // k=191 + {-0.0000000000000002, 1.0000000000000000}, // k=192 + {0.0245412285229120, 0.9996988186962042}, // k=193 + {0.0490676743274177, 0.9987954562051724}, // k=194 + {0.0735645635996670, 0.9972904566786902}, // k=195 + {0.0980171403295601, 0.9951847266721969}, // k=196 + {0.1224106751992156, 0.9924795345987101}, // k=197 + {0.1467304744553619, 0.9891765099647809}, // k=198 + {0.1709618887603013, 0.9852776423889412}, // k=199 + {0.1950903220161283, 0.9807852804032304}, // k=200 + {0.2191012401568697, 0.9757021300385286}, // k=201 + {0.2429801799032638, 0.9700312531945440}, // k=202 + {0.2667127574748982, 0.9637760657954400}, // k=203 + {0.2902846772544621, 0.9569403357322089}, // k=204 + {0.3136817403988911, 0.9495281805930368}, // k=205 + {0.3368898533922196, 0.9415440651830209}, // k=206 + {0.3598950365349876, 0.9329927988347391}, // k=207 + {0.3826834323650900, 0.9238795325112866}, // k=208 + {0.4052413140049900, 0.9142097557035306}, // k=209 + {0.4275550934302821, 0.9039892931234433}, // k=210 + {0.4496113296546066, 0.8932243011955153}, // k=211 + {0.4713967368259976, 0.8819212643483550}, // k=212 + {0.4928981922297839, 0.8700869911087115}, // k=213 + {0.5141027441932216, 0.8577286100002722}, // k=214 + {0.5349976198870969, 0.8448535652497072}, // k=215 + {0.5555702330196018, 0.8314696123025455}, // k=216 + {0.5758081914178449, 0.8175848131515840}, // k=217 + {0.5956993044924329, 0.8032075314806453}, // k=218 + {0.6152315905806270, 0.7883464276266061}, // k=219 + {0.6343932841636456, 0.7730104533627369}, // k=220 + {0.6531728429537768, 0.7572088465064846}, // k=221 + {0.6715589548470183, 0.7409511253549591}, // k=222 + {0.6895405447370668, 0.7242470829514670}, // k=223 + {0.7071067811865474, 0.7071067811865477}, // k=224 + {0.7242470829514667, 0.6895405447370672}, // k=225 + {0.7409511253549589, 0.6715589548470187}, // k=226 + {0.7572088465064842, 0.6531728429537771}, // k=227 + {0.7730104533627367, 0.6343932841636459}, // k=228 + {0.7883464276266059, 0.6152315905806274}, // k=229 + {0.8032075314806451, 0.5956993044924332}, // k=230 + {0.8175848131515837, 0.5758081914178452}, // k=231 + {0.8314696123025452, 0.5555702330196022}, // k=232 + {0.8448535652497070, 0.5349976198870973}, // k=233 + {0.8577286100002720, 0.5141027441932219}, // k=234 + {0.8700869911087113, 0.4928981922297843}, // k=235 + {0.8819212643483548, 0.4713967368259979}, // k=236 + {0.8932243011955151, 0.4496113296546070}, // k=237 + {0.9039892931234431, 0.4275550934302825}, // k=238 + {0.9142097557035305, 0.4052413140049904}, // k=239 + {0.9238795325112865, 0.3826834323650904}, // k=240 + {0.9329927988347390, 0.3598950365349880}, // k=241 + {0.9415440651830208, 0.3368898533922200}, // k=242 + {0.9495281805930367, 0.3136817403988915}, // k=243 + {0.9569403357322088, 0.2902846772544625}, // k=244 + {0.9637760657954398, 0.2667127574748986}, // k=245 + {0.9700312531945440, 0.2429801799032642}, // k=246 + {0.9757021300385285, 0.2191012401568702}, // k=247 + {0.9807852804032303, 0.1950903220161287}, // k=248 + {0.9852776423889411, 0.1709618887603018}, // k=249 + {0.9891765099647809, 0.1467304744553624}, // k=250 + {0.9924795345987100, 0.1224106751992160}, // k=251 + {0.9951847266721969, 0.0980171403295605}, // k=252 + {0.9972904566786902, 0.0735645635996674}, // k=253 + {0.9987954562051724, 0.0490676743274181}, // k=254 + {0.9996988186962042, 0.0245412285229124}, // k=255 +}; + +__constant__ double2 c_twiddle_512[512] = { + {1.0000000000000000, -0.0000000000000000}, // k=0 + {0.9999247018391445, -0.0122715382857199}, // k=1 + {0.9996988186962042, -0.0245412285229123}, // k=2 + {0.9993223845883495, -0.0368072229413588}, // k=3 + {0.9987954562051724, -0.0490676743274180}, // k=4 + {0.9981181129001492, -0.0613207363022086}, // k=5 + {0.9972904566786902, -0.0735645635996674}, // k=6 + {0.9963126121827780, -0.0857973123444399}, // k=7 + {0.9951847266721969, -0.0980171403295606}, // k=8 + {0.9939069700023561, -0.1102222072938831}, // k=9 + {0.9924795345987100, -0.1224106751992162}, // k=10 + {0.9909026354277800, -0.1345807085071262}, // k=11 + {0.9891765099647810, -0.1467304744553617}, // k=12 + {0.9873014181578584, -0.1588581433338614}, // k=13 + {0.9852776423889412, -0.1709618887603012}, // k=14 + {0.9831054874312163, -0.1830398879551410}, // k=15 + {0.9807852804032304, -0.1950903220161282}, // k=16 + {0.9783173707196277, -0.2071113761922186}, // k=17 + {0.9757021300385286, -0.2191012401568698}, // k=18 + {0.9729399522055602, -0.2310581082806711}, // k=19 + {0.9700312531945440, -0.2429801799032639}, // k=20 + {0.9669764710448521, -0.2548656596045146}, // k=21 + {0.9637760657954398, -0.2667127574748984}, // k=22 + {0.9604305194155658, -0.2785196893850531}, // k=23 + {0.9569403357322088, -0.2902846772544623}, // k=24 + {0.9533060403541939, -0.3020059493192281}, // k=25 + {0.9495281805930367, -0.3136817403988915}, // k=26 + {0.9456073253805213, -0.3253102921622629}, // k=27 + {0.9415440651830208, -0.3368898533922201}, // k=28 + {0.9373390119125750, -0.3484186802494346}, // k=29 + {0.9329927988347390, -0.3598950365349881}, // k=30 + {0.9285060804732156, -0.3713171939518375}, // k=31 + {0.9238795325112867, -0.3826834323650898}, // k=32 + {0.9191138516900578, -0.3939920400610481}, // k=33 + {0.9142097557035307, -0.4052413140049899}, // k=34 + {0.9091679830905224, -0.4164295600976372}, // k=35 + {0.9039892931234433, -0.4275550934302821}, // k=36 + {0.8986744656939538, -0.4386162385385277}, // k=37 + {0.8932243011955153, -0.4496113296546065}, // k=38 + {0.8876396204028539, -0.4605387109582400}, // k=39 + {0.8819212643483550, -0.4713967368259976}, // k=40 + {0.8760700941954066, -0.4821837720791227}, // k=41 + {0.8700869911087115, -0.4928981922297840}, // k=42 + {0.8639728561215868, -0.5035383837257176}, // k=43 + {0.8577286100002721, -0.5141027441932217}, // k=44 + {0.8513551931052652, -0.5245896826784689}, // k=45 + {0.8448535652497071, -0.5349976198870972}, // k=46 + {0.8382247055548381, -0.5453249884220465}, // k=47 + {0.8314696123025452, -0.5555702330196022}, // k=48 + {0.8245893027850253, -0.5657318107836131}, // k=49 + {0.8175848131515837, -0.5758081914178453}, // k=50 + {0.8104571982525948, -0.5857978574564389}, // k=51 + {0.8032075314806449, -0.5956993044924334}, // k=52 + {0.7958369046088836, -0.6055110414043255}, // k=53 + {0.7883464276266063, -0.6152315905806268}, // k=54 + {0.7807372285720945, -0.6248594881423863}, // k=55 + {0.7730104533627370, -0.6343932841636455}, // k=56 + {0.7651672656224590, -0.6438315428897914}, // k=57 + {0.7572088465064846, -0.6531728429537768}, // k=58 + {0.7491363945234594, -0.6624157775901718}, // k=59 + {0.7409511253549591, -0.6715589548470183}, // k=60 + {0.7326542716724128, -0.6806009977954530}, // k=61 + {0.7242470829514670, -0.6895405447370668}, // k=62 + {0.7157308252838186, -0.6983762494089729}, // k=63 + {0.7071067811865476, -0.7071067811865475}, // k=64 + {0.6983762494089729, -0.7157308252838186}, // k=65 + {0.6895405447370669, -0.7242470829514669}, // k=66 + {0.6806009977954531, -0.7326542716724128}, // k=67 + {0.6715589548470183, -0.7409511253549591}, // k=68 + {0.6624157775901718, -0.7491363945234593}, // k=69 + {0.6531728429537768, -0.7572088465064845}, // k=70 + {0.6438315428897915, -0.7651672656224590}, // k=71 + {0.6343932841636455, -0.7730104533627370}, // k=72 + {0.6248594881423865, -0.7807372285720944}, // k=73 + {0.6152315905806268, -0.7883464276266062}, // k=74 + {0.6055110414043255, -0.7958369046088835}, // k=75 + {0.5956993044924335, -0.8032075314806448}, // k=76 + {0.5857978574564389, -0.8104571982525948}, // k=77 + {0.5758081914178453, -0.8175848131515837}, // k=78 + {0.5657318107836132, -0.8245893027850253}, // k=79 + {0.5555702330196023, -0.8314696123025452}, // k=80 + {0.5453249884220465, -0.8382247055548380}, // k=81 + {0.5349976198870973, -0.8448535652497070}, // k=82 + {0.5245896826784688, -0.8513551931052652}, // k=83 + {0.5141027441932217, -0.8577286100002721}, // k=84 + {0.5035383837257176, -0.8639728561215867}, // k=85 + {0.4928981922297841, -0.8700869911087113}, // k=86 + {0.4821837720791228, -0.8760700941954066}, // k=87 + {0.4713967368259978, -0.8819212643483549}, // k=88 + {0.4605387109582400, -0.8876396204028539}, // k=89 + {0.4496113296546066, -0.8932243011955153}, // k=90 + {0.4386162385385277, -0.8986744656939538}, // k=91 + {0.4275550934302822, -0.9039892931234433}, // k=92 + {0.4164295600976373, -0.9091679830905223}, // k=93 + {0.4052413140049899, -0.9142097557035307}, // k=94 + {0.3939920400610481, -0.9191138516900578}, // k=95 + {0.3826834323650898, -0.9238795325112867}, // k=96 + {0.3713171939518376, -0.9285060804732155}, // k=97 + {0.3598950365349883, -0.9329927988347388}, // k=98 + {0.3484186802494345, -0.9373390119125750}, // k=99 + {0.3368898533922201, -0.9415440651830208}, // k=100 + {0.3253102921622630, -0.9456073253805213}, // k=101 + {0.3136817403988916, -0.9495281805930367}, // k=102 + {0.3020059493192282, -0.9533060403541938}, // k=103 + {0.2902846772544623, -0.9569403357322089}, // k=104 + {0.2785196893850531, -0.9604305194155658}, // k=105 + {0.2667127574748984, -0.9637760657954398}, // k=106 + {0.2548656596045146, -0.9669764710448521}, // k=107 + {0.2429801799032640, -0.9700312531945440}, // k=108 + {0.2310581082806713, -0.9729399522055601}, // k=109 + {0.2191012401568698, -0.9757021300385286}, // k=110 + {0.2071113761922186, -0.9783173707196277}, // k=111 + {0.1950903220161283, -0.9807852804032304}, // k=112 + {0.1830398879551411, -0.9831054874312163}, // k=113 + {0.1709618887603014, -0.9852776423889412}, // k=114 + {0.1588581433338614, -0.9873014181578584}, // k=115 + {0.1467304744553617, -0.9891765099647810}, // k=116 + {0.1345807085071262, -0.9909026354277800}, // k=117 + {0.1224106751992163, -0.9924795345987100}, // k=118 + {0.1102222072938832, -0.9939069700023561}, // k=119 + {0.0980171403295608, -0.9951847266721968}, // k=120 + {0.0857973123444399, -0.9963126121827780}, // k=121 + {0.0735645635996675, -0.9972904566786902}, // k=122 + {0.0613207363022086, -0.9981181129001492}, // k=123 + {0.0490676743274181, -0.9987954562051724}, // k=124 + {0.0368072229413590, -0.9993223845883495}, // k=125 + {0.0245412285229123, -0.9996988186962042}, // k=126 + {0.0122715382857199, -0.9999247018391445}, // k=127 + {0.0000000000000001, -1.0000000000000000}, // k=128 + {-0.0122715382857198, -0.9999247018391445}, // k=129 + {-0.0245412285229121, -0.9996988186962042}, // k=130 + {-0.0368072229413589, -0.9993223845883495}, // k=131 + {-0.0490676743274180, -0.9987954562051724}, // k=132 + {-0.0613207363022085, -0.9981181129001492}, // k=133 + {-0.0735645635996673, -0.9972904566786902}, // k=134 + {-0.0857973123444398, -0.9963126121827780}, // k=135 + {-0.0980171403295606, -0.9951847266721969}, // k=136 + {-0.1102222072938831, -0.9939069700023561}, // k=137 + {-0.1224106751992162, -0.9924795345987100}, // k=138 + {-0.1345807085071261, -0.9909026354277800}, // k=139 + {-0.1467304744553616, -0.9891765099647810}, // k=140 + {-0.1588581433338613, -0.9873014181578584}, // k=141 + {-0.1709618887603012, -0.9852776423889412}, // k=142 + {-0.1830398879551409, -0.9831054874312163}, // k=143 + {-0.1950903220161282, -0.9807852804032304}, // k=144 + {-0.2071113761922184, -0.9783173707196277}, // k=145 + {-0.2191012401568697, -0.9757021300385286}, // k=146 + {-0.2310581082806711, -0.9729399522055602}, // k=147 + {-0.2429801799032639, -0.9700312531945440}, // k=148 + {-0.2548656596045145, -0.9669764710448521}, // k=149 + {-0.2667127574748983, -0.9637760657954398}, // k=150 + {-0.2785196893850529, -0.9604305194155659}, // k=151 + {-0.2902846772544622, -0.9569403357322089}, // k=152 + {-0.3020059493192281, -0.9533060403541939}, // k=153 + {-0.3136817403988914, -0.9495281805930367}, // k=154 + {-0.3253102921622629, -0.9456073253805214}, // k=155 + {-0.3368898533922199, -0.9415440651830208}, // k=156 + {-0.3484186802494344, -0.9373390119125750}, // k=157 + {-0.3598950365349882, -0.9329927988347388}, // k=158 + {-0.3713171939518375, -0.9285060804732156}, // k=159 + {-0.3826834323650897, -0.9238795325112867}, // k=160 + {-0.3939920400610480, -0.9191138516900578}, // k=161 + {-0.4052413140049897, -0.9142097557035307}, // k=162 + {-0.4164295600976370, -0.9091679830905225}, // k=163 + {-0.4275550934302819, -0.9039892931234434}, // k=164 + {-0.4386162385385274, -0.8986744656939539}, // k=165 + {-0.4496113296546067, -0.8932243011955152}, // k=166 + {-0.4605387109582401, -0.8876396204028539}, // k=167 + {-0.4713967368259977, -0.8819212643483550}, // k=168 + {-0.4821837720791227, -0.8760700941954066}, // k=169 + {-0.4928981922297840, -0.8700869911087115}, // k=170 + {-0.5035383837257175, -0.8639728561215868}, // k=171 + {-0.5141027441932217, -0.8577286100002721}, // k=172 + {-0.5245896826784687, -0.8513551931052652}, // k=173 + {-0.5349976198870970, -0.8448535652497072}, // k=174 + {-0.5453249884220462, -0.8382247055548382}, // k=175 + {-0.5555702330196020, -0.8314696123025455}, // k=176 + {-0.5657318107836132, -0.8245893027850252}, // k=177 + {-0.5758081914178453, -0.8175848131515837}, // k=178 + {-0.5857978574564389, -0.8104571982525948}, // k=179 + {-0.5956993044924334, -0.8032075314806449}, // k=180 + {-0.6055110414043254, -0.7958369046088836}, // k=181 + {-0.6152315905806267, -0.7883464276266063}, // k=182 + {-0.6248594881423862, -0.7807372285720946}, // k=183 + {-0.6343932841636454, -0.7730104533627371}, // k=184 + {-0.6438315428897913, -0.7651672656224591}, // k=185 + {-0.6531728429537765, -0.7572088465064847}, // k=186 + {-0.6624157775901719, -0.7491363945234593}, // k=187 + {-0.6715589548470184, -0.7409511253549590}, // k=188 + {-0.6806009977954530, -0.7326542716724128}, // k=189 + {-0.6895405447370669, -0.7242470829514669}, // k=190 + {-0.6983762494089728, -0.7157308252838187}, // k=191 + {-0.7071067811865475, -0.7071067811865476}, // k=192 + {-0.7157308252838186, -0.6983762494089729}, // k=193 + {-0.7242470829514668, -0.6895405447370671}, // k=194 + {-0.7326542716724127, -0.6806009977954532}, // k=195 + {-0.7409511253549589, -0.6715589548470186}, // k=196 + {-0.7491363945234591, -0.6624157775901720}, // k=197 + {-0.7572088465064846, -0.6531728429537766}, // k=198 + {-0.7651672656224590, -0.6438315428897914}, // k=199 + {-0.7730104533627370, -0.6343932841636455}, // k=200 + {-0.7807372285720945, -0.6248594881423863}, // k=201 + {-0.7883464276266062, -0.6152315905806269}, // k=202 + {-0.7958369046088835, -0.6055110414043257}, // k=203 + {-0.8032075314806448, -0.5956993044924335}, // k=204 + {-0.8104571982525947, -0.5857978574564390}, // k=205 + {-0.8175848131515836, -0.5758081914178454}, // k=206 + {-0.8245893027850251, -0.5657318107836135}, // k=207 + {-0.8314696123025453, -0.5555702330196022}, // k=208 + {-0.8382247055548381, -0.5453249884220464}, // k=209 + {-0.8448535652497071, -0.5349976198870972}, // k=210 + {-0.8513551931052652, -0.5245896826784689}, // k=211 + {-0.8577286100002720, -0.5141027441932218}, // k=212 + {-0.8639728561215867, -0.5035383837257177}, // k=213 + {-0.8700869911087113, -0.4928981922297841}, // k=214 + {-0.8760700941954065, -0.4821837720791229}, // k=215 + {-0.8819212643483549, -0.4713967368259979}, // k=216 + {-0.8876396204028538, -0.4605387109582402}, // k=217 + {-0.8932243011955152, -0.4496113296546069}, // k=218 + {-0.8986744656939539, -0.4386162385385275}, // k=219 + {-0.9039892931234433, -0.4275550934302820}, // k=220 + {-0.9091679830905224, -0.4164295600976372}, // k=221 + {-0.9142097557035307, -0.4052413140049899}, // k=222 + {-0.9191138516900578, -0.3939920400610482}, // k=223 + {-0.9238795325112867, -0.3826834323650899}, // k=224 + {-0.9285060804732155, -0.3713171939518377}, // k=225 + {-0.9329927988347388, -0.3598950365349883}, // k=226 + {-0.9373390119125748, -0.3484186802494348}, // k=227 + {-0.9415440651830207, -0.3368898533922203}, // k=228 + {-0.9456073253805212, -0.3253102921622633}, // k=229 + {-0.9495281805930367, -0.3136817403988914}, // k=230 + {-0.9533060403541939, -0.3020059493192280}, // k=231 + {-0.9569403357322088, -0.2902846772544624}, // k=232 + {-0.9604305194155658, -0.2785196893850532}, // k=233 + {-0.9637760657954398, -0.2667127574748985}, // k=234 + {-0.9669764710448521, -0.2548656596045147}, // k=235 + {-0.9700312531945440, -0.2429801799032641}, // k=236 + {-0.9729399522055601, -0.2310581082806713}, // k=237 + {-0.9757021300385285, -0.2191012401568700}, // k=238 + {-0.9783173707196275, -0.2071113761922188}, // k=239 + {-0.9807852804032304, -0.1950903220161286}, // k=240 + {-0.9831054874312163, -0.1830398879551409}, // k=241 + {-0.9852776423889412, -0.1709618887603012}, // k=242 + {-0.9873014181578584, -0.1588581433338615}, // k=243 + {-0.9891765099647810, -0.1467304744553618}, // k=244 + {-0.9909026354277800, -0.1345807085071263}, // k=245 + {-0.9924795345987100, -0.1224106751992163}, // k=246 + {-0.9939069700023561, -0.1102222072938832}, // k=247 + {-0.9951847266721968, -0.0980171403295608}, // k=248 + {-0.9963126121827780, -0.0857973123444402}, // k=249 + {-0.9972904566786902, -0.0735645635996677}, // k=250 + {-0.9981181129001492, -0.0613207363022085}, // k=251 + {-0.9987954562051724, -0.0490676743274180}, // k=252 + {-0.9993223845883495, -0.0368072229413588}, // k=253 + {-0.9996988186962042, -0.0245412285229123}, // k=254 + {-0.9999247018391445, -0.0122715382857200}, // k=255 + {-1.0000000000000000, -0.0000000000000001}, // k=256 + {-0.9999247018391445, 0.0122715382857198}, // k=257 + {-0.9996988186962042, 0.0245412285229121}, // k=258 + {-0.9993223845883495, 0.0368072229413586}, // k=259 + {-0.9987954562051724, 0.0490676743274177}, // k=260 + {-0.9981181129001492, 0.0613207363022082}, // k=261 + {-0.9972904566786902, 0.0735645635996675}, // k=262 + {-0.9963126121827780, 0.0857973123444399}, // k=263 + {-0.9951847266721969, 0.0980171403295606}, // k=264 + {-0.9939069700023561, 0.1102222072938830}, // k=265 + {-0.9924795345987100, 0.1224106751992161}, // k=266 + {-0.9909026354277800, 0.1345807085071261}, // k=267 + {-0.9891765099647810, 0.1467304744553616}, // k=268 + {-0.9873014181578584, 0.1588581433338612}, // k=269 + {-0.9852776423889413, 0.1709618887603010}, // k=270 + {-0.9831054874312164, 0.1830398879551406}, // k=271 + {-0.9807852804032304, 0.1950903220161284}, // k=272 + {-0.9783173707196277, 0.2071113761922186}, // k=273 + {-0.9757021300385286, 0.2191012401568698}, // k=274 + {-0.9729399522055602, 0.2310581082806711}, // k=275 + {-0.9700312531945440, 0.2429801799032638}, // k=276 + {-0.9669764710448522, 0.2548656596045145}, // k=277 + {-0.9637760657954400, 0.2667127574748983}, // k=278 + {-0.9604305194155659, 0.2785196893850529}, // k=279 + {-0.9569403357322089, 0.2902846772544621}, // k=280 + {-0.9533060403541940, 0.3020059493192278}, // k=281 + {-0.9495281805930368, 0.3136817403988912}, // k=282 + {-0.9456073253805213, 0.3253102921622630}, // k=283 + {-0.9415440651830208, 0.3368898533922201}, // k=284 + {-0.9373390119125750, 0.3484186802494346}, // k=285 + {-0.9329927988347390, 0.3598950365349881}, // k=286 + {-0.9285060804732156, 0.3713171939518374}, // k=287 + {-0.9238795325112868, 0.3826834323650897}, // k=288 + {-0.9191138516900578, 0.3939920400610479}, // k=289 + {-0.9142097557035307, 0.4052413140049897}, // k=290 + {-0.9091679830905225, 0.4164295600976369}, // k=291 + {-0.9039892931234434, 0.4275550934302818}, // k=292 + {-0.8986744656939540, 0.4386162385385273}, // k=293 + {-0.8932243011955153, 0.4496113296546067}, // k=294 + {-0.8876396204028539, 0.4605387109582401}, // k=295 + {-0.8819212643483550, 0.4713967368259976}, // k=296 + {-0.8760700941954066, 0.4821837720791227}, // k=297 + {-0.8700869911087115, 0.4928981922297839}, // k=298 + {-0.8639728561215868, 0.5035383837257175}, // k=299 + {-0.8577286100002721, 0.5141027441932216}, // k=300 + {-0.8513551931052653, 0.5245896826784687}, // k=301 + {-0.8448535652497072, 0.5349976198870969}, // k=302 + {-0.8382247055548382, 0.5453249884220461}, // k=303 + {-0.8314696123025455, 0.5555702330196020}, // k=304 + {-0.8245893027850253, 0.5657318107836132}, // k=305 + {-0.8175848131515837, 0.5758081914178453}, // k=306 + {-0.8104571982525948, 0.5857978574564389}, // k=307 + {-0.8032075314806449, 0.5956993044924332}, // k=308 + {-0.7958369046088836, 0.6055110414043254}, // k=309 + {-0.7883464276266063, 0.6152315905806267}, // k=310 + {-0.7807372285720946, 0.6248594881423862}, // k=311 + {-0.7730104533627371, 0.6343932841636453}, // k=312 + {-0.7651672656224591, 0.6438315428897913}, // k=313 + {-0.7572088465064848, 0.6531728429537765}, // k=314 + {-0.7491363945234593, 0.6624157775901718}, // k=315 + {-0.7409511253549591, 0.6715589548470184}, // k=316 + {-0.7326542716724128, 0.6806009977954530}, // k=317 + {-0.7242470829514670, 0.6895405447370668}, // k=318 + {-0.7157308252838187, 0.6983762494089728}, // k=319 + {-0.7071067811865477, 0.7071067811865475}, // k=320 + {-0.6983762494089730, 0.7157308252838185}, // k=321 + {-0.6895405447370671, 0.7242470829514668}, // k=322 + {-0.6806009977954532, 0.7326542716724126}, // k=323 + {-0.6715589548470187, 0.7409511253549589}, // k=324 + {-0.6624157775901720, 0.7491363945234590}, // k=325 + {-0.6531728429537771, 0.7572088465064842}, // k=326 + {-0.6438315428897915, 0.7651672656224590}, // k=327 + {-0.6343932841636459, 0.7730104533627367}, // k=328 + {-0.6248594881423865, 0.7807372285720944}, // k=329 + {-0.6152315905806273, 0.7883464276266059}, // k=330 + {-0.6055110414043257, 0.7958369046088835}, // k=331 + {-0.5956993044924331, 0.8032075314806451}, // k=332 + {-0.5857978574564391, 0.8104571982525947}, // k=333 + {-0.5758081914178452, 0.8175848131515838}, // k=334 + {-0.5657318107836135, 0.8245893027850251}, // k=335 + {-0.5555702330196022, 0.8314696123025452}, // k=336 + {-0.5453249884220468, 0.8382247055548379}, // k=337 + {-0.5349976198870973, 0.8448535652497070}, // k=338 + {-0.5245896826784694, 0.8513551931052649}, // k=339 + {-0.5141027441932218, 0.8577286100002720}, // k=340 + {-0.5035383837257180, 0.8639728561215865}, // k=341 + {-0.4928981922297842, 0.8700869911087113}, // k=342 + {-0.4821837720791226, 0.8760700941954067}, // k=343 + {-0.4713967368259979, 0.8819212643483549}, // k=344 + {-0.4605387109582399, 0.8876396204028540}, // k=345 + {-0.4496113296546069, 0.8932243011955152}, // k=346 + {-0.4386162385385276, 0.8986744656939538}, // k=347 + {-0.4275550934302825, 0.9039892931234431}, // k=348 + {-0.4164295600976372, 0.9091679830905224}, // k=349 + {-0.4052413140049904, 0.9142097557035305}, // k=350 + {-0.3939920400610482, 0.9191138516900577}, // k=351 + {-0.3826834323650903, 0.9238795325112865}, // k=352 + {-0.3713171939518378, 0.9285060804732155}, // k=353 + {-0.3598950365349879, 0.9329927988347390}, // k=354 + {-0.3484186802494348, 0.9373390119125748}, // k=355 + {-0.3368898533922199, 0.9415440651830208}, // k=356 + {-0.3253102921622633, 0.9456073253805212}, // k=357 + {-0.3136817403988915, 0.9495281805930367}, // k=358 + {-0.3020059493192285, 0.9533060403541938}, // k=359 + {-0.2902846772544624, 0.9569403357322088}, // k=360 + {-0.2785196893850536, 0.9604305194155657}, // k=361 + {-0.2667127574748985, 0.9637760657954398}, // k=362 + {-0.2548656596045143, 0.9669764710448522}, // k=363 + {-0.2429801799032641, 0.9700312531945440}, // k=364 + {-0.2310581082806709, 0.9729399522055602}, // k=365 + {-0.2191012401568701, 0.9757021300385285}, // k=366 + {-0.2071113761922185, 0.9783173707196277}, // k=367 + {-0.1950903220161287, 0.9807852804032303}, // k=368 + {-0.1830398879551410, 0.9831054874312163}, // k=369 + {-0.1709618887603017, 0.9852776423889411}, // k=370 + {-0.1588581433338615, 0.9873014181578583}, // k=371 + {-0.1467304744553623, 0.9891765099647809}, // k=372 + {-0.1345807085071264, 0.9909026354277800}, // k=373 + {-0.1224106751992160, 0.9924795345987101}, // k=374 + {-0.1102222072938833, 0.9939069700023561}, // k=375 + {-0.0980171403295605, 0.9951847266721969}, // k=376 + {-0.0857973123444402, 0.9963126121827780}, // k=377 + {-0.0735645635996674, 0.9972904566786902}, // k=378 + {-0.0613207363022090, 0.9981181129001492}, // k=379 + {-0.0490676743274180, 0.9987954562051724}, // k=380 + {-0.0368072229413593, 0.9993223845883494}, // k=381 + {-0.0245412285229124, 0.9996988186962042}, // k=382 + {-0.0122715382857205, 0.9999247018391445}, // k=383 + {-0.0000000000000002, 1.0000000000000000}, // k=384 + {0.0122715382857201, 0.9999247018391445}, // k=385 + {0.0245412285229120, 0.9996988186962042}, // k=386 + {0.0368072229413590, 0.9993223845883495}, // k=387 + {0.0490676743274177, 0.9987954562051724}, // k=388 + {0.0613207363022086, 0.9981181129001492}, // k=389 + {0.0735645635996670, 0.9972904566786902}, // k=390 + {0.0857973123444399, 0.9963126121827780}, // k=391 + {0.0980171403295601, 0.9951847266721969}, // k=392 + {0.1102222072938829, 0.9939069700023561}, // k=393 + {0.1224106751992156, 0.9924795345987101}, // k=394 + {0.1345807085071260, 0.9909026354277800}, // k=395 + {0.1467304744553619, 0.9891765099647809}, // k=396 + {0.1588581433338612, 0.9873014181578584}, // k=397 + {0.1709618887603013, 0.9852776423889412}, // k=398 + {0.1830398879551406, 0.9831054874312164}, // k=399 + {0.1950903220161283, 0.9807852804032304}, // k=400 + {0.2071113761922181, 0.9783173707196278}, // k=401 + {0.2191012401568697, 0.9757021300385286}, // k=402 + {0.2310581082806706, 0.9729399522055603}, // k=403 + {0.2429801799032638, 0.9700312531945440}, // k=404 + {0.2548656596045140, 0.9669764710448523}, // k=405 + {0.2667127574748982, 0.9637760657954400}, // k=406 + {0.2785196893850533, 0.9604305194155658}, // k=407 + {0.2902846772544621, 0.9569403357322089}, // k=408 + {0.3020059493192281, 0.9533060403541939}, // k=409 + {0.3136817403988911, 0.9495281805930368}, // k=410 + {0.3253102921622629, 0.9456073253805213}, // k=411 + {0.3368898533922196, 0.9415440651830209}, // k=412 + {0.3484186802494345, 0.9373390119125750}, // k=413 + {0.3598950365349876, 0.9329927988347391}, // k=414 + {0.3713171939518374, 0.9285060804732156}, // k=415 + {0.3826834323650900, 0.9238795325112866}, // k=416 + {0.3939920400610479, 0.9191138516900579}, // k=417 + {0.4052413140049900, 0.9142097557035306}, // k=418 + {0.4164295600976369, 0.9091679830905225}, // k=419 + {0.4275550934302821, 0.9039892931234433}, // k=420 + {0.4386162385385273, 0.8986744656939540}, // k=421 + {0.4496113296546066, 0.8932243011955153}, // k=422 + {0.4605387109582396, 0.8876396204028542}, // k=423 + {0.4713967368259976, 0.8819212643483550}, // k=424 + {0.4821837720791222, 0.8760700941954069}, // k=425 + {0.4928981922297839, 0.8700869911087115}, // k=426 + {0.5035383837257178, 0.8639728561215866}, // k=427 + {0.5141027441932216, 0.8577286100002722}, // k=428 + {0.5245896826784691, 0.8513551931052651}, // k=429 + {0.5349976198870969, 0.8448535652497072}, // k=430 + {0.5453249884220465, 0.8382247055548380}, // k=431 + {0.5555702330196018, 0.8314696123025455}, // k=432 + {0.5657318107836131, 0.8245893027850253}, // k=433 + {0.5758081914178449, 0.8175848131515840}, // k=434 + {0.5857978574564388, 0.8104571982525949}, // k=435 + {0.5956993044924329, 0.8032075314806453}, // k=436 + {0.6055110414043253, 0.7958369046088837}, // k=437 + {0.6152315905806270, 0.7883464276266061}, // k=438 + {0.6248594881423861, 0.7807372285720946}, // k=439 + {0.6343932841636456, 0.7730104533627369}, // k=440 + {0.6438315428897912, 0.7651672656224592}, // k=441 + {0.6531728429537768, 0.7572088465064846}, // k=442 + {0.6624157775901715, 0.7491363945234596}, // k=443 + {0.6715589548470183, 0.7409511253549591}, // k=444 + {0.6806009977954527, 0.7326542716724131}, // k=445 + {0.6895405447370668, 0.7242470829514670}, // k=446 + {0.6983762494089724, 0.7157308252838190}, // k=447 + {0.7071067811865474, 0.7071067811865477}, // k=448 + {0.7157308252838188, 0.6983762494089727}, // k=449 + {0.7242470829514667, 0.6895405447370672}, // k=450 + {0.7326542716724129, 0.6806009977954530}, // k=451 + {0.7409511253549589, 0.6715589548470187}, // k=452 + {0.7491363945234594, 0.6624157775901718}, // k=453 + {0.7572088465064842, 0.6531728429537771}, // k=454 + {0.7651672656224588, 0.6438315428897915}, // k=455 + {0.7730104533627367, 0.6343932841636459}, // k=456 + {0.7807372285720944, 0.6248594881423865}, // k=457 + {0.7883464276266059, 0.6152315905806274}, // k=458 + {0.7958369046088833, 0.6055110414043257}, // k=459 + {0.8032075314806451, 0.5956993044924332}, // k=460 + {0.8104571982525947, 0.5857978574564391}, // k=461 + {0.8175848131515837, 0.5758081914178452}, // k=462 + {0.8245893027850251, 0.5657318107836136}, // k=463 + {0.8314696123025452, 0.5555702330196022}, // k=464 + {0.8382247055548377, 0.5453249884220468}, // k=465 + {0.8448535652497070, 0.5349976198870973}, // k=466 + {0.8513551931052649, 0.5245896826784694}, // k=467 + {0.8577286100002720, 0.5141027441932219}, // k=468 + {0.8639728561215864, 0.5035383837257181}, // k=469 + {0.8700869911087113, 0.4928981922297843}, // k=470 + {0.8760700941954067, 0.4821837720791226}, // k=471 + {0.8819212643483548, 0.4713967368259979}, // k=472 + {0.8876396204028539, 0.4605387109582399}, // k=473 + {0.8932243011955151, 0.4496113296546070}, // k=474 + {0.8986744656939538, 0.4386162385385277}, // k=475 + {0.9039892931234431, 0.4275550934302825}, // k=476 + {0.9091679830905224, 0.4164295600976373}, // k=477 + {0.9142097557035305, 0.4052413140049904}, // k=478 + {0.9191138516900577, 0.3939920400610483}, // k=479 + {0.9238795325112865, 0.3826834323650904}, // k=480 + {0.9285060804732155, 0.3713171939518378}, // k=481 + {0.9329927988347390, 0.3598950365349880}, // k=482 + {0.9373390119125748, 0.3484186802494349}, // k=483 + {0.9415440651830208, 0.3368898533922200}, // k=484 + {0.9456073253805212, 0.3253102921622634}, // k=485 + {0.9495281805930367, 0.3136817403988915}, // k=486 + {0.9533060403541936, 0.3020059493192286}, // k=487 + {0.9569403357322088, 0.2902846772544625}, // k=488 + {0.9604305194155657, 0.2785196893850537}, // k=489 + {0.9637760657954398, 0.2667127574748986}, // k=490 + {0.9669764710448522, 0.2548656596045144}, // k=491 + {0.9700312531945440, 0.2429801799032642}, // k=492 + {0.9729399522055602, 0.2310581082806710}, // k=493 + {0.9757021300385285, 0.2191012401568702}, // k=494 + {0.9783173707196277, 0.2071113761922185}, // k=495 + {0.9807852804032303, 0.1950903220161287}, // k=496 + {0.9831054874312163, 0.1830398879551410}, // k=497 + {0.9852776423889411, 0.1709618887603018}, // k=498 + {0.9873014181578583, 0.1588581433338616}, // k=499 + {0.9891765099647809, 0.1467304744553624}, // k=500 + {0.9909026354277800, 0.1345807085071264}, // k=501 + {0.9924795345987100, 0.1224106751992160}, // k=502 + {0.9939069700023561, 0.1102222072938834}, // k=503 + {0.9951847266721969, 0.0980171403295605}, // k=504 + {0.9963126121827780, 0.0857973123444403}, // k=505 + {0.9972904566786902, 0.0735645635996674}, // k=506 + {0.9981181129001492, 0.0613207363022091}, // k=507 + {0.9987954562051724, 0.0490676743274181}, // k=508 + {0.9993223845883494, 0.0368072229413594}, // k=509 + {0.9996988186962042, 0.0245412285229124}, // k=510 + {0.9999247018391445, 0.0122715382857206}, // k=511 +}; diff --git a/include/bout/utils.hxx b/include/bout/utils.hxx index 5088a025c1..3dbae60d74 100644 --- a/include/bout/utils.hxx +++ b/include/bout/utils.hxx @@ -8,7 +8,7 @@ * Copyright 2010 - 2026 BOUT++ contributors * * Contact: Ben Dudson, dudson2@llnl.gov - * + * * This file is part of BOUT++. * * BOUT++ is free software: you can redistribute it and/or modify @@ -29,12 +29,11 @@ #ifndef BOUT_UTILS_H #define BOUT_UTILS_H -#include "bout/build_config.hxx" - #include "bout/array.hxx" #include "bout/assert.hxx" #include "bout/bout_types.hxx" #include "bout/boutexception.hxx" +#include "bout/build_config.hxx" #include "bout/region.hxx" #include "bout/unused.hxx" @@ -46,6 +45,11 @@ #include #include #include +#include +#include +#include + +class Field; #ifdef _MSC_VER // finite is not actually standard C++, it's a BSD extention for C @@ -436,15 +440,13 @@ inline BoutReal randomu() { * Calculate the square of a variable \p t * i.e. t * t */ -template -inline T SQ(const T& t) { +template >>> +inline auto SQ(const T& t) { return t * t; } -template <> -BOUT_HOST_DEVICE inline BoutReal SQ(const BoutReal& t) { - return t * t; -} +BOUT_HOST_DEVICE inline BoutReal SQ(const BoutReal& t) { return t * t; } /*! * Round \p x to the nearest integer @@ -484,7 +486,7 @@ inline bool is_pow2(int x) { return x && !((x - 1) & x); } /*! * Return the sign of a number \p a - * by testing if a > 0 + * by testing if a > 0 */ template T SIGN(T a) { // Return +1 or -1 (0 -> +1) @@ -511,7 +513,7 @@ inline void checkData(BoutReal f) { } #else /// Ignored with disabled CHECK; Throw an exception if \p f is not finite -inline void checkData(BoutReal UNUSED(f)){}; +inline void checkData(BoutReal UNUSED(f)) {}; #endif /*! @@ -587,7 +589,7 @@ BoutReal stringToReal(const std::string& s); /*! * Convert a string to an int - * + * * Throws BoutException if can't be done */ int stringToInt(const std::string& s); @@ -604,7 +606,7 @@ std::list& strsplit(const std::string& s, char delim, /*! * Split a string on a given delimiter - * + * * @param[in] s The string to split (not modified by call) * @param[in] delim The delimiter to split on (single char) */ @@ -612,7 +614,7 @@ std::list strsplit(const std::string& s, char delim); /*! * Strips leading and trailing spaces from a string - * + * * @param[in] s The string to trim (not modified) * @param[in] c Collection of characters to remove */ @@ -620,7 +622,7 @@ std::string trim(const std::string& s, const std::string& c = " \t\r"); /*! * Strips leading spaces from a string - * + * * @param[in] s The string to trim (not modified) * @param[in] c Collection of characters to remove */ @@ -628,7 +630,7 @@ std::string trimLeft(const std::string& s, const std::string& c = " \t"); /*! * Strips leading spaces from a string - * + * * @param[in] s The string to trim (not modified) * @param[in] c Collection of characters to remove */ @@ -636,7 +638,7 @@ std::string trimRight(const std::string& s, const std::string& c = " \t\r"); /*! * Strips the comments from a string - * + * * @param[in] s The string to trim (not modified) * @param[in] c Collection of characters to remove */ diff --git a/include/bout/vector3d.hxx b/include/bout/vector3d.hxx index 655a85ca73..73d3912838 100644 --- a/include/bout/vector3d.hxx +++ b/include/bout/vector3d.hxx @@ -3,12 +3,10 @@ * * \brief Class for 3D vectors. Built on the Field3D class. * - * \author B. Dudson, October 2007 - * ************************************************************************** - * Copyright 2010 B.D.Dudson, S.Farley, M.V.Umansky, X.Q.Xu + * Copyright 2010 - 2026 BOUT++ contributors * - * Contact: Ben Dudson, bd512@york.ac.uk + * Contact: Ben Dudson, dudson2@llnl.gov * * This file is part of BOUT++. * diff --git a/manual/sphinx/developer_docs/data_types.rst b/manual/sphinx/developer_docs/data_types.rst index f9411dcb39..7feb3945aa 100644 --- a/manual/sphinx/developer_docs/data_types.rst +++ b/manual/sphinx/developer_docs/data_types.rst @@ -280,7 +280,7 @@ The region to iterate over can be over ``Field2D``, ``Field3D``, or - `RGN_NOY`, which skips the y boundaries and guard cells New regions can be created and modified, see section below. - + A standard C++ range for loop can also be used, but this is unlikely to OpenMP parallelise or vectorise:: @@ -306,7 +306,7 @@ For loops inside parallel regions, there is ``BOUT_FOR_INNER``:: } ... } - + If a more general OpenMP directive is needed, there is ``BOUT_FOR_OMP``:: @@ -314,7 +314,7 @@ If a more general OpenMP directive is needed, there is BOUT_FOR_OMP(i, region, parallel for reduction(max:result)) { result = f[i] > result ? f[i] : result; } - + The iterator provides access to the x, y, z indices:: Field3D f(0.0); @@ -385,14 +385,14 @@ good performance on typical x86_64 hardware. Some simple diagnostics are printed at the start of the BOUT++ output which may help. For example the ``blob2d`` example prints:: - Registered region 3D RGN_ALL: - Total blocks : 1040, min(count)/max(count) : 64 (1040)/ 64 (1040), Max imbalance : 1, Small block count : 0 + Registered region 3D RGN_ALL: + Total blocks : 1040, min(count)/max(count) : 64 (1040)/ 64 (1040), Max imbalance : 1, Small block count : 0 In this case all blocks are the same size, so the ``Max imbalance`` (ratio of maximum to minimum block size) is 1. The ``Small block count`` is currently defined as the number of blocks with a size less than half the maximum block size. Ideally all blocks should be a -similar size, so that work is evenly balanced between threads. +similar size, so that work is evenly balanced between threads. Creating new regions ~~~~~~~~~~~~~~~~~~~~ @@ -422,7 +422,7 @@ in the mask (i.e. set subtraction):: or:: auto region = mask(mesh->getRegion2D("RGN_ALL"), mesh->getRegion2D("RGN_GUARDS")); - + The above example would produce a region containing all the indices in ``RGN_ALL`` which are not in ``RGN_GUARDS``. @@ -444,7 +444,7 @@ In the current implementation overwriting a region, by attempting to add a region which already exists, is not allowed, and will result in a ``BoutException`` being thrown. This restriction may be removed in future. - + .. _sec-rangeiterator: Iterating over ranges @@ -493,33 +493,73 @@ initialised in the constructor. .. _sec-fieldops: -Field2D/Field3D Arithmetic Operators ------------------------------------- - -The arithmetic operators (``+``, ``-``, ``/``, ``*``) for `Field2D` -and `Field3D` are generated automatically using the `Jinja`_ -templating system. This requires Python 3 (2.7 may work, but only 3 is -supported). - -Because this is fairly low-level code, and we don't expect it to -change very much, the generated code is kept in the git -repository. This has the benefit that Python and Jinja are not needed -to build BOUT++, only to change the ``Field`` operator code. - -.. warning:: You should not modify the generated code - directly. Instead, modify the template and re-generate - the code. If you commit changes to the template and/or - driver, make sure to re-generate the code and commit it - as well - -The Jinja template is in ``src/field/gen_fieldops.jinja``, and the -driver is ``src/field/gen_fieldops.py``. The driver loops over every -combination of `BoutReal`, `Field2D`, `Field3D` (collectively just -"fields" here) with the arithmetic operators, and uses the template to -generate the appropriate code. There is some logic in the template to -handle certain combinations of the input fields: for example, for the -binary infix operators, only check the two arguments are on identical -meshes if neither is `BoutReal`. +Field expressions and generated operators +----------------------------------------- + +At user level, field algebra now looks more uniform than it used to: +ordinary arithmetic and many unary algebraic operators can be combined +into lazy expressions and only materialized when a concrete field or +scalar result is needed. + +This implementation is split into two layers. + +``BinaryExpr`` and views +~~~~~~~~~~~~~~~~~~~~~~~~ + +The lazy-expression layer lives in ``include/bout/fieldops.hxx``. The +central type is ``BinaryExpr``, which stores: + +- views of the left and right expression operands +- the operation functor +- mesh and metadata needed to check compatibility and materialize the + result +- a cached list of linear region indices describing where the + expression is valid + +`Field2D`, `Field3D`, and `FieldPerp` act as expression leaves by +providing lightweight ``View`` types. Those views are the device- and +backend-friendly objects used by the expression evaluator. + +Materialization happens when a field is constructed or assigned from an +expression, when an expression is stored in `Options`, or when a scalar +reduction such as ``min`` or ``mean`` is requested. The same mechanism +is also used to propagate metadata such as mesh, staggered location, +directions, and `FieldPerp` y-index. + +The unary algebraic helpers in ``include/bout/field.hxx`` build on the +same mechanism. Functions such as ``sqrt``, ``abs``, ``SQ``, +``if_else``, ``if_else_zero``, ``min``, ``max``, and ``mean`` can all +operate directly on lazy expressions. + +Generated eager operators +~~~~~~~~~~~~~~~~~~~~~~~~~ + +The eager arithmetic operators and in-place update paths are still +generated automatically using the `Jinja`_ templating system. The main +files are: + +- ``src/field/gen_fieldops.jinja`` +- ``src/field/gen_fieldops.py`` +- ``src/field/generated_fieldops.cxx`` + +The generated code handles the broad matrix of combinations between +`BoutReal`, `Field2D`, `Field3D`, `Field3DParallel`, and `FieldPerp`, +including several mixed-rank and in-place cases where hand-maintaining +all overloads would be error-prone. + +The generated loops now also depend on the configured execution backend. +At configure time, the generator is told whether to emit RAJA-based, +OpenMP-based, or serial loop bodies for the eager paths. + +Because this is low-level code, the generated source is kept in the git +repository. Python and Jinja are therefore only needed when changing the +operator generator itself, not for an ordinary build. + +.. warning:: + + Do not edit ``generated_fieldops.cxx`` directly. Instead, modify the + template or generator, then regenerate the file and commit both the + source change and the regenerated output. To install Jinja: @@ -527,16 +567,17 @@ To install Jinja: $ pip3 install --user Jinja2 -To re-generate the code, there is a ``make`` target for -``gen_fieldops.cxx`` in ``src/field/makefile``. This also tries to -apply ``clang-format`` in order to keep to a consistent code style. +To regenerate the code, use the target for ``gen_fieldops.cxx`` in +``src/field/makefile`` or the corresponding CMake-driven generation +path. This also applies ``clang-format`` to keep the output consistent. -.. note:: ``clang-format`` is bundled with ``clang``. This should be - available through your system package manager. If you do not - have sufficient privileges on your system, you can install - it from the source `clang`_. One of the BOUT++ maintainers - can help apply it for you too. +.. note:: + + ``clang-format`` is bundled with ``clang``. This should be available + through your system package manager. If you do not have sufficient + privileges on your system, you can install it from the source + `clang`_. One of the BOUT++ maintainers can also help apply it for + you. .. _Jinja: http://jinja.pocoo.org/ .. _clang: https://clang.llvm.org/ - diff --git a/manual/sphinx/index.rst b/manual/sphinx/index.rst index 9408d05057..6a5a48ba34 100644 --- a/manual/sphinx/index.rst +++ b/manual/sphinx/index.rst @@ -15,10 +15,12 @@ The documentation is divided into the following sections: * :ref:`model-outputs` * :ref:`bout-interfaces` - + + * :ref:`performance-and-accelerators` + * :ref:`developer-docs` - + .. toctree:: :maxdepth: 2 :caption: Getting started @@ -30,25 +32,24 @@ The documentation is divided into the following sections: user_docs/advanced_install user_docs/running_bout user_docs/new_in_v5 - + .. toctree:: :maxdepth: 2 :caption: BOUT++ models :name: bout-models - + user_docs/physics_models user_docs/makefiles user_docs/variable_init user_docs/boundary_options user_docs/testing - user_docs/gpu_support user_docs/adios2 - + .. toctree:: :maxdepth: 2 :caption: Model inputs :name: model-inputs - + user_docs/bout_options user_docs/input_grids @@ -56,19 +57,20 @@ The documentation is divided into the following sections: :maxdepth: 2 :caption: Model outputs :name: model-outputs - + user_docs/output_and_post user_docs/python_boutpp - + .. toctree:: :maxdepth: 2 :caption: BOUT++ interfaces :name: bout-interfaces - + user_docs/time_integration user_docs/parallel-transforms user_docs/laplacian user_docs/differential_operators + user_docs/field_expressions user_docs/algebraic_operators user_docs/staggered_grids user_docs/eigenvalue_solver @@ -76,6 +78,13 @@ The documentation is divided into the following sections: user_docs/invertable_operator user_docs/petsc +.. toctree:: + :maxdepth: 2 + :caption: Performance and accelerators + :name: performance-and-accelerators + + user_docs/gpu_support + .. toctree:: :maxdepth: 1 :caption: Field-aligned coordinate systems diff --git a/manual/sphinx/user_docs/algebraic_operators.rst b/manual/sphinx/user_docs/algebraic_operators.rst index b2089f9ec3..b8c40d4dc5 100644 --- a/manual/sphinx/user_docs/algebraic_operators.rst +++ b/manual/sphinx/user_docs/algebraic_operators.rst @@ -1,28 +1,39 @@ .. _sec-algebraic-ops: Algebraic operators -========================= +=================== BOUT++ provides a wide variety of algebraic operators acting on fields. -The algebraic operators are listed in :numref:`tab-algebraic-ops`. -For a completely up-to-date list, see the ``Non-member functions`` -part of :doc:`field2d.hxx<../_breathe_autogen/file/field2d_8hxx>`, -:doc:`field3d.hxx<../_breathe_autogen/file/field3d_8hxx>`, +Most of these operators can participate in the lazy field-expression +system described in :ref:`sec-field-expressions`. In practice this means +you can usually write ordinary algebraic code and let BOUT++ delay +evaluation until assignment or reduction. + +For a completely up-to-date list, see the ``Non-member functions`` part +of :doc:`field2d.hxx<../_breathe_autogen/file/field2d_8hxx>`, +:doc:`field3d.hxx<../_breathe_autogen/file/field3d_8hxx>`, and :doc:`fieldperp.hxx<../_breathe_autogen/file/fieldperp_8hxx>`. +Common operators +---------------- + .. _tab-algebraic-ops: .. table:: Algebraic operators - +------------------------------------------+------------------------------------------------------+ - | Name | Description | + +------------------------------------------+------------------------------------------------------+ + | Name | Description | +==========================================+======================================================+ - | ``min(f, allpe=true, region)`` | Minimum (optionally over all processes) | + | ``min(f, allpe=true, region)`` | Minimum (optionally over all processes) | +------------------------------------------+------------------------------------------------------+ | ``max(f, allpe=true, region)`` | Maximum (optionally over all processes) | +------------------------------------------+------------------------------------------------------+ + | ``mean(f, allpe=true, region)`` | Mean (optionally over all processes) | + +------------------------------------------+------------------------------------------------------+ | ``pow(lhs, rhs, region)`` | :math:`\mathtt{lhs}^\mathtt{rhs}` | +------------------------------------------+------------------------------------------------------+ + | ``SQ(f, region)`` | Square of ``f`` | + +------------------------------------------+------------------------------------------------------+ | ``sqrt(f, region)`` | :math:`\sqrt{(f)}` | +------------------------------------------+------------------------------------------------------+ | ``abs(f, region)`` | :math:`|f|` | @@ -65,24 +76,43 @@ part of :doc:`field2d.hxx<../_breathe_autogen/file/field2d_8hxx>`, | | of `f` as opposed to the AC, alternating current, or | | | fluctuating part) | +------------------------------------------+------------------------------------------------------+ + | ``if_else(cond, lhs, rhs)`` | Select between two algebraic branches | + +------------------------------------------+------------------------------------------------------+ + | ``if_else_zero(cond, expr)`` | Select either ``expr`` or zero | + +------------------------------------------+------------------------------------------------------+ + +These operators can usually be combined directly in expressions:: + + Field3D rhs = sqrt(SQ(n) + SQ(T)); + Field3D masked = if_else(use_drive, source * profile, sink * profile); + BoutReal max_error = max(abs(lhs - rhs), true); + +Reductions such as ``min``, ``max``, and ``mean`` can operate directly +on an expression, so an intermediate field is often unnecessary. + +Region arguments +---------------- -These operators take a ``region`` argument, whose values can be [#]_ (see -:ref:`sec-iterating`) +These operators take a ``region`` argument. Common values are [#]_ (see +:ref:`sec-iterating`): -- `RGN_ALL`, which is the whole mesh; +- ``RGN_ALL``, which is the whole mesh +- ``RGN_NOBNDRY``, which skips all boundaries +- ``RGN_NOX``, which skips the x boundaries +- ``RGN_NOY``, which skips the y boundaries -- `RGN_NOBNDRY`, which skips all boundaries; +The default is usually ``RGN_ALL``. Restricting the region can improve +performance when guard-cell values will not be used. -- `RGN_NOX`, which skips the x boundaries +When a region-limited expression is materialized into a field, only the +selected region is guaranteed to contain valid values. This is the same +performance-oriented convention used by other field operators. -- `RGN_NOY`, which skips the y boundaries +Further reading +--------------- -The default value for the region argument is `RGN_ALL` which should work in all -cases. However, the region argument can be used for optimization, to skip -calculations in guard cells if it is known that those results will not be -needed (for example, if no derivatives of the result will be calculated). Since -these operators can be relatively expensive compared to addition, subtraction, -multiplication this can be a useful performance improvement. +- :ref:`sec-field-expressions` +- :ref:`sec-gpusupport` -.. [#] More regions may be added in future, for example to act on only subsets of the - physical domain. +.. [#] More regions may be added in future, for example to act on only + subsets of the physical domain. diff --git a/manual/sphinx/user_docs/field_expressions.rst b/manual/sphinx/user_docs/field_expressions.rst new file mode 100644 index 0000000000..62a50988d6 --- /dev/null +++ b/manual/sphinx/user_docs/field_expressions.rst @@ -0,0 +1,151 @@ +.. _sec-field-expressions: + +Field Expressions +================= + +BOUT++ field algebra now supports *lazy expressions* for many common +operations. Instead of creating a temporary field for every ``+``, +``-``, ``*``, ``/``, ``sqrt`` or ``abs``, BOUT++ can keep the expression +symbolic and evaluate it only when a concrete field or scalar result is +needed. + +This keeps ordinary model code readable while reducing temporary +allocations and extra loops over the mesh. It is especially helpful for +accelerator backends, where launching fewer kernels matters. + +What stays lazy +--------------- + +The following operations can form lazy expressions over `Field2D`, +`Field3D`, `Field3DParallel`, and `FieldPerp` where the combination makes +sense: + +- Arithmetic operators: ``+``, ``-``, ``*``, ``/`` +- Unary algebraic operators such as ``sqrt``, ``abs``, ``exp``, ``log``, + ``sin``, ``cos``, ``tan``, ``sinh``, ``cosh``, ``tanh``, ``floor``, + and ``SQ`` +- Simple conditionals with ``if_else`` and ``if_else_zero`` +- Reductions such as ``min``, ``max``, and ``mean`` + +For example:: + + Field3D n, T; + Field3D result; + + result = sqrt(SQ(n) + SQ(T)); + +The right-hand side can stay lazy until the assignment to ``result``. + +When evaluation happens +----------------------- + +An expression is evaluated when BOUT++ needs actual storage or a scalar +answer. Common triggers are: + +- assigning to a field +- constructing a field from an expression +- assigning a field expression into an `Options` object +- calling scalar reductions such as ``min``, ``max``, or ``mean`` + +Examples:: + + Field3D result = n + T; + options["rhs"] = n + T; + BoutReal max_value = max(abs(n + T), true); + +Region-limited expressions +-------------------------- + +Many algebraic operators take a ``region`` argument, usually defaulting +to ``RGN_ALL``. A lazy expression keeps track of that region. + +Only values inside the requested region are guaranteed to be valid after +materialization. This is useful for skipping guard-cell work when the +result will only be used in a smaller region:: + + Field3D interior = abs(n, "RGN_NOBNDRY"); + +As with other region-limited field operations in BOUT++, code that later +uses guard cells should communicate or otherwise fill those cells before +relying on them. + +Metadata propagation +-------------------- + +When an expression is materialized into a field, BOUT++ propagates the +field metadata carried by the expression: + +- mesh pointer +- cell location +- field directions +- for `FieldPerp`, the y-index + +This means expressions are intended to behave like ordinary field +operations in user code. Compatibility checks still apply: combining +fields on different meshes or incompatible staggered locations is an +error. + +Mixed field types +----------------- + +Several mixed-type combinations are supported directly: + +- `Field2D` with `Field3D`: the 2D quantity is broadcast in ``z`` +- `FieldPerp` with matching perpendicular data: the operation uses the + `FieldPerp` y-index +- expressions involving metric components may return + `Coordinates::FieldMetric`, which is `Field2D` or `Field3D` depending + on how BOUT++ was built + +In practice, this means code such as:: + + Coordinates::FieldMetric grad = coords->J / coords->g_22; + Field3D rhs = density * temperature + background_2d; + +can use the same algebraic style even when metric dimensionality or +field rank differs. + +Conditionals +------------ + +``if_else`` selects between two algebraic branches without forcing the +branches to be precomputed:: + + Field3D rhs = if_else(use_source, source * density, sink * density); + +``if_else_zero(condition, expr)`` is a shorthand for selecting either an +expression or zero:: + + Field3D rhs = if_else_zero(include_drive, drive * profile); + +This is particularly convenient when optional source terms are enabled +or disabled by compile-time or run-time logic. + +Reductions on expressions +------------------------- + +Reductions can operate directly on expressions instead of requiring an +intermediate field:: + + BoutReal rms = sqrt(mean(SQ(n - n0), true, "RGN_NOBNDRY")); + BoutReal max_error = max(abs(lhs - rhs), true); + +This is often clearer than explicitly constructing a temporary field, +and it avoids extra storage. + +Relation to GPU execution +------------------------- + +Lazy field expressions are the high-level path to reducing temporary +work. They are a good default when ordinary field algebra expresses the +operation clearly. + +For more control, especially when you want to fuse derivative operators +into a single explicit loop, see :ref:`sec-gpusupport`. + +See also +-------- + +- :doc:`algebraic_operators` +- :doc:`gpu_support` +- :doc:`differential_operators` diff --git a/manual/sphinx/user_docs/gpu_support.rst b/manual/sphinx/user_docs/gpu_support.rst index cc0cba8def..670c1a9b32 100644 --- a/manual/sphinx/user_docs/gpu_support.rst +++ b/manual/sphinx/user_docs/gpu_support.rst @@ -3,68 +3,92 @@ GPU support =========== -This section describes work in progress to develop GPU support in -BOUT++ models. It includes both configuration and compilation on GPU -systems, but also ways to write physics models which are designed to -give higher performance. These methods may also be beneficial for CPU -architectures, but have fewer safety checks, less functionality and -run-time flexibility than the field operators. +This section describes the main ways to run BOUT++ work efficiently on +GPUs or other accelerator-style backends. -To use the single index operators and the ``BOUT_FOR_RAJA`` loop macro:: +There are now two complementary levels of optimization: + +1. Write ordinary field algebra and let BOUT++ keep many algebraic + expressions lazy until assignment or reduction. +2. Drop down to explicit `RAJA` loops and single-index operators when + you want complete control over loop fusion and kernel structure. + +The first approach is usually the best starting point. The second is for +hot loops where you want to manually combine derivative operators, +accessors, and run-time captures in one kernel. + +Automatic fusion with field expressions +--------------------------------------- + +Many algebraic operations on fields can now be represented as lazy +expressions. This keeps user code close to the familiar field-based +style while reducing temporary fields and extra passes over memory. + +Typical examples are: + +.. code-block:: cpp + + Field3D rhs = sqrt(SQ(n) + SQ(T)); + ddt(n) = source * profile - sink * n; + BoutReal max_error = max(abs(lhs - rhs), true); + +This is the highest-level route to better execution behavior, and it is +usually the most maintainable. See :ref:`sec-field-expressions` for the +details of what stays lazy and when evaluation happens. + +Lazy expressions mainly help with *algebraic* fusion. If your hot path +is dominated by differential operators and you need to fuse those +operators into a single explicit loop, use the lower-level approach +described below. + +Manual fusion with RAJA loops +----------------------------- + +To use the single-index operators and the ``BOUT_FOR_RAJA`` loop macro:: #include "bout/single_index_ops.hxx" #include "bout/rajalib.hxx" -To run parts of a physics model RHS function on a GPU, the basic -outline of the code is to (optionally) first copy any class member -variables which will be used in the loop into local variables -(see below for an alternative method):: +To run part of a physics-model RHS on a GPU, start by copying any class +member variables needed inside the loop into local variables, or capture +them explicitly:: - auto _setting = setting; // Create a local variable to capture + auto _setting = setting; -Then create a `FieldAccessor` to efficiently access field and -coordinate system data inside the loop:: +Then create `FieldAccessor` objects to read and write field data inside +the loop:: auto n_acc = FieldAccessor<>(n); auto phi_acc = FieldAccessor<>(phi); -There are also ``Field2DAccessor``s for accessing ``Field2D`` -types. If fields are staggered, then the expected location should be -passed as a template parameter:: +There are also ``Field2DAccessor`` objects for `Field2D`. If fields are +staggered, the expected location can be supplied as a template +parameter:: auto Jpar_acc = FieldAccessor(Jpar); -which enables the cell location to be checked in the operators at -compile time rather than run time. +Finally the loop itself can be written as:: -Finally the loop itself can be written something like:: + Field3D result; + auto result_acc = FieldAccessor<>(result); BOUT_FOR_RAJA(i, region) { - ddt(n_acc)[i] = -bracket(phi_acc, n_acc, i) - 2 * DDZ(n_acc, i); - /* ... */ + result_acc[i] = -bracket(phi_acc, n_acc, i) - 2.0 * DDZ(n_acc, i); }; Note the semicolon after the closing brace, which is needed because -this is the body of a lambda function. Inside the body of the loop, -the operators like ``bracket`` and ``DDZ`` calculate the derivatives -at a single index ``i``. These are "single index operators` and are -defined in ``bout/single_index_ops.hxx``. - -Any class member variables which are used inside the loop must be captured -as a local variable. If this is not done, then the code will probably compile, -but may produce an illegal memory access error at runtime on the GPU. To -capture the class member, you can copy any class member variables which -will be used in the loop into local variables:: +this is the body of a lambda function. Inside the loop, operators such +as ``bracket`` and ``DDZ`` act at a single index ``i``. These are the +single-index operators defined in ``bout/single_index_ops.hxx``. - auto _setting = setting; // Create a local variable to capture - -and then use ``_setting`` rather than ``setting`` inside the loop. -Alternatively, add variables to be captured to a CAPTURE argument to -the ``BOUT_FOR_RAJA`` loop:: +Any class member variables used inside the loop must be captured +carefully. Otherwise the code may compile but fail at run time on the +GPU. Instead of using ``this`` implicitly, either shadow members with +local variables or add them to the capture list:: BOUT_FOR_RAJA(i, region, CAPTURE(setting)) { ddt(n_acc)[i] = -bracket(phi_acc, n_acc, i) - 2 * DDZ(n_acc, i); - /* ... code which uses `setting` ... */ + /* ... code that uses `setting` ... */ }; If RAJA is not available, the ``BOUT_FOR_RAJA`` macro will revert to @@ -75,10 +99,26 @@ Note: An important difference between ``BOUT_FOR`` and ``BOUT_FOR_RAJA`` (apart from the closing semicolon) is that the type of the index ``i`` is different inside the loop: ``BOUT_FOR`` uses ``SpecificInd`` types (typically ``Ind3D``), but ``BOUT_FOR_RAJA`` -uses ``int``. ``SpecificInd`` can be explicitly cast to ``int`` so +uses ``int``. ``SpecificInd`` can be explicitly cast to ``int`` so use ``static_cast(i)`` to ensure that it's an integer both with and without RAJA. This might (hopefully) change in future versions. +Choosing between the two approaches +----------------------------------- + +Use lazy field expressions when: + +- the code is mostly algebraic combinations of existing fields +- readability matters more than extracting the last bit of performance +- you want a clear default path that still maps well to accelerator + backends + +Use explicit RAJA loops and single-index operators when: + +- a hot loop is dominated by derivatives +- you want to combine many operations into one kernel manually +- you need direct control over captures, data access, or loop structure + Examples -------- @@ -115,8 +155,12 @@ Notes: CMake configuration ------------------- -To compile BOUT++ components into GPU kernels a few different pieces need to be configured to work together: -RAJA, Umpire, and a CUDA compiler. +To compile BOUT++ components into GPU kernels, a few different pieces +need to work together: RAJA, Umpire, and a CUDA-capable compiler. + +The generated eager field-operator code also selects a loop backend at +configure time. If RAJA is enabled it uses RAJA loops, otherwise it +falls back to OpenMP or serial loops depending on the build. .. _tab-gpusupport-cmake: @@ -136,6 +180,25 @@ RAJA, Umpire, and a CUDA compiler. | BOUT_ENABLE_WARNINGS | nvcc has incompatible warning flags | On (turn Off for CUDA) | +----------------------+-----------------------------------------+------------------------+ +Shifted metric on GPUs +---------------------- + +When BOUT++ is built with CUDA, the shifted-metric parallel transform +has a CUDA implementation of its toroidal ``shiftZ`` work used while +calculating parallel slices during communication. + +This is most relevant when using: + +.. code-block:: cfg + + [mesh:paralleltransform] + type = shifted + calcParallelSlices_on_communicate = true + +The current implementation is specialized for supported power-of-two +``LocalNz`` values. If parallel slices are disabled on communicate, as in +the aligned-transform workflow, this precomputed-slice path is not used. + Single index operators ---------------------- @@ -263,7 +326,7 @@ likely that the results might be architecture dependent. To minimise the number of times this data needs to be copied from individual fields into the single array, and then copied from CPU to -GPU, ``CoordinatesAccessor``s are cached. A map (``coords_store`` +GPU, ``CoordinatesAccessor``\ s are cached. A map (``coords_store`` defined in ``coordinates_accessor.cxx``) associates ``Array`` objects (containing the array of data) to ``Coordinates`` pointers. If a ``CoordinatesAccessor`` is constructed @@ -314,10 +377,10 @@ This is a `good talk by John Lakos [ACCU 2017] on memory allocators Future work ----------- -Indices -~~~~~~~ - -Setting up a RAJA loop to run on a GPU is still cumbersome and inefficient +The GPU path is still evolving. The main long-term direction is to let +more of ordinary field code map efficiently onto accelerator backends, +so that manual kernel construction is only needed for the most +performance-critical cases. due to the need to transform CPU data structures into a form which can be passed to and used on the GPU. In the ``bout/rajalib.hxx`` header there is code like:: @@ -332,7 +395,7 @@ is code like:: auto _ob_i_ind_raw = &_ob_i_ind[0]; which is creating a raw pointer (``_ob_i_ind_raw``) to an array of -``int``s which are allocated using Umpire. The original ``indices`` +``int``\ s which are allocated using Umpire. The original ``indices`` are allocated using ``new`` and are inside a C++ ``std::vector``. The RAJA loop then uses this array like this:: diff --git a/manual/sphinx/user_docs/parallel-transforms.rst b/manual/sphinx/user_docs/parallel-transforms.rst index 3ee3eccfb8..9d9b94af9e 100644 --- a/manual/sphinx/user_docs/parallel-transforms.rst +++ b/manual/sphinx/user_docs/parallel-transforms.rst @@ -120,6 +120,21 @@ Note that here :math:`\theta_0` does not need to be constant in X (radius), since it is only the relative shifts between Y locations which matters. +When BOUT++ is built with CUDA, the shifted-metric implementation also +has a GPU path for the ``shiftZ`` work used to calculate parallel +slices during communication. This is most useful in the standard +shifted-metric workflow with + +.. code-block:: cfg + + [mesh:paralleltransform] + type = shifted + calcParallelSlices_on_communicate = true + +If ``calcParallelSlices_on_communicate = false`` is used, BOUT++ is in +the aligned-transform mode described below, so those precomputed +parallel slices are not generated on communicate. + Special handling is needed for parallel boundary conditions, see :ref:`sec-parallel-bc-shifted-metric`. diff --git a/src/field/field2d.cxx b/src/field/field2d.cxx index b363eeef07..61799e3444 100644 --- a/src/field/field2d.cxx +++ b/src/field/field2d.cxx @@ -331,9 +331,6 @@ void Field2D::swapData(Field2D& other) { std::swap(data, other.data); } ////////////// NON-MEMBER OVERLOADED OPERATORS ////////////// -// Unary minus -Field2D operator-(const Field2D& f) { return -1.0 * f; } - //////////////// NON-MEMBER FUNCTIONS ////////////////// namespace { diff --git a/src/field/field3d.cxx b/src/field/field3d.cxx index 633eb42e9b..7915579440 100644 --- a/src/field/field3d.cxx +++ b/src/field/field3d.cxx @@ -51,6 +51,7 @@ #include #include #include +#include #include #include @@ -84,6 +85,38 @@ Field3D::Field3D(const Field3D& f) } } +Field3D operator+(const Field2D& lhs, const Field3DParallel& rhs) { + return lhs + rhs.asField3D(); +} + +Field3D operator-(const Field2D& lhs, const Field3DParallel& rhs) { + return lhs - rhs.asField3D(); +} + +Field3D operator*(const Field2D& lhs, const Field3DParallel& rhs) { + return lhs * rhs.asField3D(); +} + +Field3D operator/(const Field2D& lhs, const Field3DParallel& rhs) { + return lhs / rhs.asField3D(); +} + +Field3D operator+(const Field3DParallel& lhs, const Field2D& rhs) { + return lhs.asField3D() + rhs; +} + +Field3D operator-(const Field3DParallel& lhs, const Field2D& rhs) { + return lhs.asField3D() - rhs; +} + +Field3D operator*(const Field3DParallel& lhs, const Field2D& rhs) { + return lhs.asField3D() * rhs; +} + +Field3D operator/(const Field3DParallel& lhs, const Field2D& rhs) { + return lhs.asField3D() / rhs; +} + Field3D::Field3D(const Field2D& f) : Field(f) { nx = fieldmesh->LocalNx; @@ -122,8 +155,6 @@ Field3D::Field3D(Array data_in, Mesh* localmesh, CELL_LOC datalocation ASSERT1(data.size() == nx * ny * nz); } -Field3D::~Field3D() { delete deriv; } - Field3D& Field3D::allocate() { if (data.empty()) { if (!fieldmesh) { @@ -280,27 +311,6 @@ Field3D& Field3D::operator=(const Field3D& rhs) { return *this; } -Field3D& Field3D::operator=(Field3D&& rhs) noexcept { - track(rhs, "operator="); - - // Move parallel slices or delete existing ones. - yup_fields = std::move(rhs.yup_fields); - ydown_fields = std::move(rhs.ydown_fields); - - // Move the data and data sizes - nx = rhs.nx; - ny = rhs.ny; - nz = rhs.nz; - regionID = rhs.regionID; - - data = std::move(rhs.data); - - // Move base slice last - Field::operator=(std::move(rhs)); - - return *this; -} - Field3D& Field3D::operator=(const Field2D& rhs) { track(rhs, "operator="); @@ -676,8 +686,6 @@ void Field3D::swapData(Field3D& other) { std::swap(data, other.data); } * NON-MEMBER OVERLOADED OPERATORS ***************************************************************/ -Field3D operator-(const Field3D& f) { return -1.0 * f; } - //////////////// NON-MEMBER FUNCTIONS ////////////////// Field3D pow(const Field3D& lhs, const Field2D& rhs, const std::string& rgn) { @@ -910,7 +918,8 @@ bool operator==(const Field3D& a, const Field3D& b) { if (!a.isAllocated() || !b.isAllocated()) { return false; } - return min(abs(a - b)) < 1e-10; + Field3D Sub = a - b; + return min(Sub) < 1e-10; } std::ostream& operator<<(std::ostream& out, const Field3D& value) { diff --git a/src/field/fieldperp.cxx b/src/field/fieldperp.cxx index d5f0d93706..b7b2d9d731 100644 --- a/src/field/fieldperp.cxx +++ b/src/field/fieldperp.cxx @@ -2,7 +2,7 @@ * Class for 2D X-Z slices * ************************************************************************** - * Copyright 2010 - 2025 BOUT++ developers + * Copyright 2010 - 2026 BOUT++ developers * * Contact: Ben Dudson, dudson2@llnl.gov * @@ -150,9 +150,6 @@ FieldPerp fromFieldAligned(const FieldPerp& f, const std::string& region) { ////////////// NON-MEMBER OVERLOADED OPERATORS ////////////// -// Unary minus -FieldPerp operator-(const FieldPerp& f) { return -1.0 * f; } - ///////////////////////////////////////////////// // functions diff --git a/src/field/gen_fieldops.jinja b/src/field/gen_fieldops.jinja index ca171eaf8d..913acadf7b 100644 --- a/src/field/gen_fieldops.jinja +++ b/src/field/gen_fieldops.jinja @@ -1,3 +1,6 @@ +{% set use_parallel_arg = lhs.field_type == "Field3DParallel" or rhs.field_type == "Field3DParallel" %} +{% set use_raja_path = region_loop == "BOUT_FOR_RAJA" and not use_parallel_arg %} + // Provide the C++ wrapper for {{operator_name}} of {{lhs}} and {{rhs}} {{out}} operator{{operator}}(const {{lhs.passByReference}}, const {{rhs.passByReference}}) { {% if lhs != "BoutReal" and rhs != "BoutReal" %} @@ -8,6 +11,26 @@ checkData({{lhs.name}}); checkData({{rhs.name}}); + {% if use_raja_path %} + {% if out.field_type == "FieldPerp" %} + auto {{out.name}}_acc = FieldPerpAccessor{ {{out.name}} }; + {% else %} + auto {{out.name}}_acc = FieldAccessor({{out.name}}); + {% endif %} + {% if lhs.field_type == "FieldPerp" %} + auto {{lhs.name}}_acc = FieldPerpAccessor{ {{lhs.name}} }; + {% elif lhs.field_type == "BoutReal" %} + {% else %} + auto {{lhs.name}}_acc = FieldAccessor({{lhs.name}}); + {% endif %} + {% if rhs.field_type == "FieldPerp" %} + auto {{rhs.name}}_acc = FieldPerpAccessor{ {{rhs.name}} }; + {% elif rhs.field_type == "BoutReal" %} + {% else %} + auto {{rhs.name}}_acc = FieldAccessor({{rhs.name}}); + {% endif %} + {% endif %} + {% if out.region_type == "3D" %} {% if lhs.region_type == rhs.region_type == "3D" %} {{out.name}}.setRegion({{lhs.name}}.getMesh()->getCommonRegion({{lhs.name}}.getRegionID(), @@ -23,9 +46,9 @@ {{ rhs.assertParallelSlices }} {{out.name}}.splitParallelSlices(); {% if lhs.region_type == "3D" %} - for (size_t i{0} ; i < {{lhs.name}}.numberParallelSlices() ; ++i) { + for (size_t i{0}; i < {{lhs.name}}.numberParallelSlices(); ++i) { {% else %} - for (size_t i{0} ; i < {{rhs.name}}.numberParallelSlices() ; ++i) { + for (size_t i{0}; i < {{rhs.name}}.numberParallelSlices(); ++i) { {% endif %} {{out.name}}.yup(i) = {{lhs.yup}} {{operator}} {{rhs.yup}}; {{out.name}}.ydown(i) = {{lhs.ydown}} {{operator}} {{rhs.ydown}}; @@ -35,46 +58,83 @@ {% endif %} {% endif %} - {% if (out == "Field3D") and ((lhs == "Field2D") or (rhs =="Field2D")) %} + {% if (out == "Field3D") and ((lhs == "Field2D") or (rhs == "Field2D")) %} + {% if use_raja_path %} + int mesh_nz = {{lhs.name if lhs.field_type != "BoutReal" else rhs.name}}_acc.mesh_nz; + {% else %} Mesh *localmesh = {{lhs.name if lhs.field_type != "BoutReal" else rhs.name}}.getMesh(); + {% endif %} - {% if (lhs == "Field2D") %} + {% if lhs == "Field2D" %} {{region_loop}}({{index_var}}, {{lhs.name}}.getRegion({{region_name}})) { {% else %} {{region_loop}}({{index_var}}, {{rhs.name}}.getRegion({{region_name}})) { {% endif %} + {% if use_raja_path %} + const auto {{mixed_base_ind}} = {{index_var}} * mesh_nz; + {% else %} const auto {{mixed_base_ind}} = localmesh->ind2Dto3D({{index_var}}); - {% if (operator == "/") and (rhs == "Field2D") %} - const auto tmp = 1.0 / {{rhs.mixed_index}}; - for (int {{jz_var}} = 0; {{jz_var}} < localmesh->LocalNz; ++{{jz_var}}){ - {{out.mixed_index}} = {{lhs.mixed_index}} * tmp; + {% endif %} + {% if (operator == "/") and (rhs == "Field2D") %} + {% if use_raja_path %} + const auto tmp = 1.0 / {{rhs.mixed_index_acc}}; + for (int {{jz_var}} = 0; {{jz_var}} < mesh_nz; ++{{jz_var}}) { + {{out.mixed_index_acc}} = {{lhs.mixed_index_acc}} * tmp; {% else %} - for (int {{jz_var}} = 0; {{jz_var}} < localmesh->LocalNz; ++{{jz_var}}){ - {{out.mixed_index}} = {{lhs.mixed_index}} {{operator}} {{rhs.mixed_index}}; + const auto tmp = 1.0 / {{rhs.mixed_index}}; + for (int {{jz_var}} = 0; {{jz_var}} < localmesh->LocalNz; ++{{jz_var}}) { + {{out.mixed_index}} = {{lhs.mixed_index}} * tmp; {% endif %} + {% else %} + {% if use_raja_path %} + for (int {{jz_var}} = 0; {{jz_var}} < mesh_nz; ++{{jz_var}}) { + {{out.mixed_index_acc}} = {{lhs.mixed_index_acc}} {{operator}} {{rhs.mixed_index_acc}}; + {% else %} + for (int {{jz_var}} = 0; {{jz_var}} < localmesh->LocalNz; ++{{jz_var}}) { + {{out.mixed_index}} = {{lhs.mixed_index}} {{operator}} {{rhs.mixed_index}}; + {% endif %} + {% endif %} } - } - {% elif out == "FieldPerp" and (lhs == "Field2D" or lhs == "Field3D" or rhs == "Field2D" or rhs == "Field3D")%} + }{% if use_raja_path %};{% endif %} + {% elif out == "FieldPerp" and (lhs == "Field2D" or lhs == "Field3D" or rhs == "Field2D" or rhs == "Field3D") %} Mesh *localmesh = {{lhs.name if lhs.field_type != "BoutReal" else rhs.name}}.getMesh(); {{region_loop}}({{index_var}}, {{out.name}}.getRegion({{region_name}})) { - int yind = {{lhs.name if lhs == "FieldPerp" else rhs.name}}.getIndex(); - const auto {{mixed_base_ind}} = localmesh->indPerpto3D({{index_var}}, yind); - {% if lhs != "FieldPerp" %} - {{out.index}} = {{lhs.base_index}} {{operator}} {{rhs.index}}; - {% else %} - {{out.index}} = {{lhs.index}} {{operator}} {{rhs.base_index}}; - {% endif %} - } + {% if use_raja_path %} + int yind = {{lhs.name if lhs == "FieldPerp" else rhs.name}}_acc.getIndex(); + const auto {{mixed_base_ind}} = localmesh->flatIndPerpto3D({{index_var}}, {{out.name}}_acc.nz, yind); + {% if lhs != "FieldPerp" %} + {{out.index_acc}} = {{lhs.base_index_acc}} {{operator}} {{rhs.index_acc}}; + {% else %} + {{out.index_acc}} = {{lhs.index_acc}} {{operator}} {{rhs.base_index_acc}}; + {% endif %} + {% else %} + int yind = {{lhs.name if lhs == "FieldPerp" else rhs.name}}.getIndex(); + const auto {{mixed_base_ind}} = localmesh->indPerpto3D({{index_var}}, yind); + {% if lhs != "FieldPerp" %} + {{out.index}} = {{lhs.base_index}} {{operator}} {{rhs.index}}; + {% else %} + {{out.index}} = {{lhs.index}} {{operator}} {{rhs.base_index}}; + {% endif %} + {% endif %} + }{% if use_raja_path %};{% endif %} {% elif (operator == "/") and (rhs == "BoutReal") %} - const auto tmp = 1.0 / {{rhs.index}}; - {{region_loop}}({{index_var}}, {{out.name}}.getValidRegionWithDefault({{region_name}})) { - {{out.index}} = {{lhs.index}} * tmp; - } + const auto tmp = 1.0 / {{rhs.index}}; + {{region_loop}}({{index_var}}, {{out.name}}.getValidRegionWithDefault({{region_name}})) { + {% if use_raja_path %} + {{out.index_acc}} = {{lhs.index_acc}} * tmp; + {% else %} + {{out.index}} = {{lhs.index}} * tmp; + {% endif %} + }{% if use_raja_path %};{% endif %} {% else %} {{region_loop}}({{index_var}}, {{out.name}}.getValidRegionWithDefault({{region_name}})) { - {{out.index}} = {{lhs.index}} {{operator}} {{rhs.index}}; - } + {% if use_raja_path %} + {{out.index_acc}} = {{lhs.index_acc}} {{operator}} {{rhs.index_acc}}; + {% else %} + {{out.index}} = {{lhs.index}} {{operator}} {{rhs.index}}; + {% endif %} + }{% if use_raja_path %};{% endif %} {% endif %} checkData({{out.name}}); @@ -90,47 +150,74 @@ ASSERT1_FIELDS_COMPATIBLE(*this, rhs); {% endif %} - // Delete existing parallel slices. We don't update parallel slices, so any - // that currently exist will be incorrect. - clearParallelSlices(); - checkData(*this); - checkData({{rhs.name}}); + // Delete existing parallel slices. We don't update parallel slices, so any + // that currently exist will be incorrect. + clearParallelSlices(); + checkData(*this); + checkData({{rhs.name}}); - {% if lhs.region_type == rhs.region_type == "3D" %} - regionID = fieldmesh->getCommonRegion(regionID, {{rhs.name}}.getRegionID()); + {% if use_raja_path %} + auto this_acc = FieldAccessor(*this); + {% if rhs.field_type == "FieldPerp" %} + auto {{rhs.name}}_acc = FieldPerpAccessor{ {{rhs.name}} }; + {% elif rhs.field_type == "BoutReal" %} + {% else %} + auto {{rhs.name}}_acc = FieldAccessor({{rhs.name}}); {% endif %} + {% endif %} + {% if lhs.region_type == rhs.region_type == "3D" %} + regionID = fieldmesh->getCommonRegion(regionID, {{rhs.name}}.getRegionID()); + {% endif %} - {% if (lhs == "Field3D") and (rhs =="Field2D") %} + {% if rhs == "Field2D" %} + {% if use_raja_path %} + int mesh_nz = fieldmesh->LocalNz; + {% endif %} {{region_loop}}({{index_var}}, {{rhs.name}}.getRegion({{region_name}})) { + {% if use_raja_path %} + const auto {{mixed_base_ind}} = {{index_var}} * mesh_nz; + {% else %} const auto {{mixed_base_ind}} = fieldmesh->ind2Dto3D({{index_var}}); - {% if (operator == "/") and (rhs == "Field2D") %} - const auto tmp = 1.0 / {{rhs.mixed_index}}; - for (int {{jz_var}} = 0; {{jz_var}} < fieldmesh->LocalNz; ++{{jz_var}}){ - (*this)[{{mixed_base_ind}} + {{jz_var}}] *= tmp; + {% endif %} + {% if operator == "/" %} + {% if use_raja_path %} + const auto tmp = 1.0 / {{rhs.mixed_index_acc}}; + for (int {{jz_var}} = 0; {{jz_var}} < mesh_nz; ++{{jz_var}}) { + this_acc[{{mixed_base_ind}} + {{jz_var}}] *= tmp; {% else %} - for (int {{jz_var}} = 0; {{jz_var}} < fieldmesh->LocalNz; ++{{jz_var}}){ - (*this)[{{mixed_base_ind}} + {{jz_var}}] {{operator}}= {{rhs.index}}; + const auto tmp = 1.0 / {{rhs.mixed_index}}; + for (int {{jz_var}} = 0; {{jz_var}} < fieldmesh->LocalNz; ++{{jz_var}}) { + (*this)[{{mixed_base_ind}} + {{jz_var}}] *= tmp; {% endif %} + {% else %} + {% if use_raja_path %} + for (int {{jz_var}} = 0; {{jz_var}} < mesh_nz; ++{{jz_var}}) { + this_acc[{{mixed_base_ind}} + {{jz_var}}] {{operator}}= {{rhs.index_acc}}; + {% else %} + for (int {{jz_var}} = 0; {{jz_var}} < fieldmesh->LocalNz; ++{{jz_var}}) { + (*this)[{{mixed_base_ind}} + {{jz_var}}] {{operator}}= {{rhs.index}}; + {% endif %} + {% endif %} } - } - {% elif rhs == "FieldPerp" and (lhs == "Field3D" or lhs == "Field2D")%} - Mesh *localmesh = this->getMesh(); - - {{region_loop}}({{index_var}}, {{rhs.name}}.getRegion({{region_name}})) { - int yind = {{rhs.name}}.getIndex(); - const auto {{mixed_base_ind}} = localmesh->indPerpto3D({{index_var}}, yind); - (*this)[{{base_ind_var}}] {{operator}}= {{rhs.index}}; - } - {% elif (operator == "/") and (lhs == "Field3D" or lhs == "Field2D") and (rhs =="BoutReal") %} + }{% if use_raja_path %};{% endif %} + {% elif (operator == "/") and (rhs == "BoutReal") %} const auto tmp = 1.0 / {{rhs.index}}; {{region_loop}}({{index_var}}, this->getRegion({{region_name}})) { + {% if use_raja_path %} + this_acc[{{index_var}}] *= tmp; + {% else %} (*this)[{{index_var}}] *= tmp; - } + {% endif %} + }{% if use_raja_path %};{% endif %} {% else %} {{region_loop}}({{index_var}}, this->getRegion({{region_name}})) { - (*this)[{{index_var}}] {{operator}}= {{rhs.index}}; - } + {% if use_raja_path %} + this_acc[{{index_var}}] {{operator}}= {{rhs.index_acc}}; + {% else %} + (*this)[{{index_var}}] {{operator}}= {{rhs.index}}; + {% endif %} + }{% if use_raja_path %};{% endif %} {% endif %} {% if lhs.region_type == "3D" %} @@ -169,12 +256,12 @@ {% if lhs == "Field3DParallel" and (rhs.region_type == "3D" or rhs == "BoutReal") %} if (this->isFci()) { if (isRef) { - for (size_t i{0} ; i < yup_fields.size() ; ++i) { + for (size_t i{0}; i < yup_fields.size(); ++i) { yup(i).update_{{operator_name}}_inplace({{rhs.name}}{% if rhs == "Field3D" %}.yup(i){% endif %}); ydown(i).update_{{operator_name}}_inplace({{rhs.name}}{% if rhs == "Field3D" %}.ydown(i){% endif %}); } } else { - for (size_t i{0} ; i < yup_fields.size() ; ++i) { + for (size_t i{0}; i < yup_fields.size(); ++i) { yup(i) {{operator}}= {{rhs.name}}{% if rhs == "Field3D" %}.yup(i){% endif %}; ydown(i) {{operator}}= {{rhs.name}}{% if rhs == "Field3D" %}.ydown(i){% endif %}; } @@ -186,49 +273,88 @@ checkData(*this); checkData({{rhs.name}}); + {% if use_raja_path %} + {% if lhs.field_type == "FieldPerp" %} + auto this_acc = FieldPerpAccessor{(*this)}; + {% else %} + auto this_acc = FieldAccessor(*this); + {% endif %} + {% if rhs.field_type == "FieldPerp" %} + auto {{rhs.name}}_acc = FieldPerpAccessor{ {{rhs.name}} }; + {% elif rhs.field_type == "BoutReal" %} + {% else %} + auto {{rhs.name}}_acc = FieldAccessor({{rhs.name}}); + {% endif %} + {% endif %} + {% if lhs.region_type == rhs.region_type == "3D" %} regionID = fieldmesh->getCommonRegion(regionID, {{rhs.name}}.getRegionID()); {% endif %} - - {% if (lhs == "Field3D") and (rhs =="Field2D") %} + {% if (lhs == "Field3D") and (rhs == "Field2D") %} + {% if use_raja_path %} + int mesh_nz = fieldmesh->LocalNz; + {% endif %} {{region_loop}}({{index_var}}, {{rhs.name}}.getRegion({{region_name}})) { + {% if use_raja_path %} + const auto {{mixed_base_ind}} = {{index_var}} * mesh_nz; + {% else %} const auto {{mixed_base_ind}} = fieldmesh->ind2Dto3D({{index_var}}); - {% if (operator == "/") and (rhs == "Field2D") %} - const auto tmp = 1.0 / {{rhs.mixed_index}}; - for (int {{jz_var}} = 0; {{jz_var}} < fieldmesh->LocalNz; ++{{jz_var}}){ - (*this)[{{mixed_base_ind}} + {{jz_var}}] *= tmp; + {% endif %} + {% if operator == "/" %} + {% if use_raja_path %} + const auto tmp = 1.0 / {{rhs.mixed_index_acc}}; + for (int {{jz_var}} = 0; {{jz_var}} < mesh_nz; ++{{jz_var}}) { + this_acc[{{mixed_base_ind}} + {{jz_var}}] *= tmp; {% else %} - for (int {{jz_var}} = 0; {{jz_var}} < fieldmesh->LocalNz; ++{{jz_var}}){ - (*this)[{{mixed_base_ind}} + {{jz_var}}] {{operator}}= {{rhs.index}}; + const auto tmp = 1.0 / {{rhs.mixed_index}}; + for (int {{jz_var}} = 0; {{jz_var}} < fieldmesh->LocalNz; ++{{jz_var}}) { + (*this)[{{mixed_base_ind}} + {{jz_var}}] *= tmp; {% endif %} + {% else %} + {% if use_raja_path %} + for (int {{jz_var}} = 0; {{jz_var}} < mesh_nz; ++{{jz_var}}) { + this_acc[{{mixed_base_ind}} + {{jz_var}}] {{operator}}= {{rhs.index_acc}}; + {% else %} + for (int {{jz_var}} = 0; {{jz_var}} < fieldmesh->LocalNz; ++{{jz_var}}) { + (*this)[{{mixed_base_ind}} + {{jz_var}}] {{operator}}= {{rhs.index}}; + {% endif %} + {% endif %} } - } - {% elif lhs == "FieldPerp" and (rhs == "Field3D" or rhs == "Field2D")%} + }{% if use_raja_path %};{% endif %} + {% elif lhs == "FieldPerp" and (rhs == "Field3D" or rhs == "Field2D") %} Mesh *localmesh = this->getMesh(); + {% if use_raja_path %} + int yind = this_acc.getIndex(); + {% endif %} {{region_loop}}({{index_var}}, this->getRegion({{region_name}})) { - int yind = this->getIndex(); - const auto {{mixed_base_ind}} = localmesh->indPerpto3D({{index_var}}, yind); - (*this)[{{index_var}}] {{operator}}= {{rhs.base_index}}; - } - {% elif rhs == "FieldPerp" and (lhs == "Field3D" or lhs == "Field2D")%} - Mesh *localmesh = this->getMesh(); - - {{region_loop}}({{index_var}}, {{rhs.name}}.getRegion({{region_name}})) { - int yind = {{rhs.name}}.getIndex(); - const auto {{mixed_base_ind}} = localmesh->indPerpto3D({{index_var}}, yind); - (*this)[{{base_ind_var}}] {{operator}}= {{rhs.index}}; - } - {% elif (operator == "/") and (lhs == "Field3D" or lhs == "Field2D") and (rhs =="BoutReal") %} + {% if use_raja_path %} + const auto {{mixed_base_ind}} = localmesh->flatIndPerpto3D({{index_var}}, this_acc.nz, yind); + this_acc[{{index_var}}] {{operator}}= {{rhs.base_index_acc}}; + {% else %} + int yind = this->getIndex(); + const auto {{mixed_base_ind}} = localmesh->indPerpto3D({{index_var}}, yind); + (*this)[{{index_var}}] {{operator}}= {{rhs.base_index}}; + {% endif %} + }{% if use_raja_path %};{% endif %} + {% elif (operator == "/") and (lhs == "Field3D" or lhs == "Field2D") and (rhs == "BoutReal") %} const auto tmp = 1.0 / {{rhs.index}}; {{region_loop}}({{index_var}}, this->getRegion({{region_name}})) { + {% if use_raja_path %} + this_acc[{{index_var}}] *= tmp; + {% else %} (*this)[{{index_var}}] *= tmp; - } + {% endif %} + }{% if use_raja_path %};{% endif %} {% else %} {{region_loop}}({{index_var}}, this->getRegion({{region_name}})) { - (*this)[{{index_var}}] {{operator}}= {{rhs.index}}; - } + {% if use_raja_path %} + this_acc[{{index_var}}] {{operator}}= {{rhs.index_acc}}; + {% else %} + (*this)[{{index_var}}] {{operator}}= {{rhs.index}}; + {% endif %} + }{% if use_raja_path %};{% endif %} {% endif %} {% if lhs.region_type == "3D" %} diff --git a/src/field/gen_fieldops.py b/src/field/gen_fieldops.py index 91fd4082f0..6610286af5 100755 --- a/src/field/gen_fieldops.py +++ b/src/field/gen_fieldops.py @@ -13,13 +13,12 @@ from __future__ import print_function -from builtins import object - import argparse import contextlib -from copy import deepcopy as copy import itertools import sys +from builtins import object +from copy import deepcopy as copy try: import jinja2 @@ -131,6 +130,17 @@ def index(self): else: return "{self.name}[{self.index_var}]".format(self=self) + @property + def index_acc(self): + """Returns "_acc[{index_var}]" for an accessor-based index, except if + field_type is BoutReal, in which case just returns "" + + """ + if self.field_type == "BoutReal": + return "{self.name}".format(self=self) + else: + return "{self.name}_acc[{self.index_var}]".format(self=self) + @property def mixed_index(self): """Returns "[{index_var} + {jz_var}]" if field_type is Field3D, @@ -146,6 +156,21 @@ def mixed_index(self): else: # Field2D return "{self.name}[{self.index_var}]".format(self=self) + @property + def mixed_index_acc(self): + """Returns "_acc[{index_var} + {jz_var}]" for an accessor if field_type + is Field3D, self.index if Field2D or just returns "" for BoutReal + + """ + if self.field_type == "BoutReal": + return "{self.name}_acc".format(self=self) + elif self.field_type == "Field3D": + return "{self.name}_acc[{self.mixed_base_ind_var} + {self.jz_var}]".format( + self=self + ) + else: # Field2D + return "{self.name}_acc[{self.index_var}]".format(self=self) + @property def base_index(self): """Returns "[{mixed_base_ind_var}]" if field_type is Field3D, Field2D or FieldPerp @@ -157,6 +182,17 @@ def base_index(self): else: return "{self.name}[{self.mixed_base_ind_var}]".format(self=self) + @property + def base_index_acc(self): + """Returns "_acc[{mixed_base_ind_var}]" for an accessor if field_type is + Field3D, Field2D or FieldPerp or just returns "" for BoutReal + + """ + if self.field_type == "BoutReal": + return "{self.name}".format(self=self) + else: + return "{self.name}_acc[{self.mixed_base_ind_var}]".format(self=self) + @property def yup(self): """Returns {{name}}.yup(i) if it is a field with parallel slices. @@ -228,11 +264,11 @@ def returnType(f1, f2): ) # By default use OpenMP enabled loops but allow to disable parser.add_argument( - "--no-openmp", - action="store_false", - default=False, - dest="noOpenMP", - help="Don't use OpenMP compatible loops", + "--loop-exec", + default="openmp", + dest="loop_exec", + choices=["serial", "openmp", "raja"], + help="Choose the loop execution method. Default is OpenMP", ) args = parser.parse_args() @@ -243,10 +279,16 @@ def returnType(f1, f2): mixed_base_ind_var = "base_ind" region_name = '"RGN_ALL"' - if args.noOpenMP: + if args.loop_exec == "openmp": + region_loop = "BOUT_FOR" + elif args.loop_exec == "raja": + region_loop = "BOUT_FOR_RAJA" + header += "#include \n" + header += "#include \n" + elif args.loop_exec == "serial": region_loop = "BOUT_FOR_SERIAL" else: - region_loop = "BOUT_FOR" + raise ValueError("Unknown loop execution method") # Declare what fields we currently support: field3D = Field( diff --git a/src/field/generated_fieldops.cxx b/src/field/generated_fieldops.cxx index 181c15c709..d47b2a7a89 100644 --- a/src/field/generated_fieldops.cxx +++ b/src/field/generated_fieldops.cxx @@ -8,7 +8,6 @@ #include #include #include -#include // Provide the C++ wrapper for multiplication of Field3D and Field3D Field3D operator*(const Field3D& lhs, const Field3D& rhs) { @@ -20,10 +19,9 @@ Field3D operator*(const Field3D& lhs, const Field3D& rhs) { result.setRegion(lhs.getMesh()->getCommonRegion(lhs.getRegionID(), rhs.getRegionID())); - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs[index] * rhs[index]; } - checkData(result); return result; } @@ -42,8 +40,7 @@ Field3D& Field3D::update_multiplication_inplace(const Field3D& rhs) { regionID = fieldmesh->getCommonRegion(regionID, rhs.getRegionID()); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { (*this)[index] *= rhs[index]; } - + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { (*this)[index] *= rhs[index]; } track(rhs, "operator*="); #if BOUT_USE_TRACK name = fmt::format("{:s} *= {:s}", this->name, rhs.name); @@ -69,8 +66,7 @@ Field3D& Field3D::operator*=(const Field3D& rhs) { regionID = fieldmesh->getCommonRegion(regionID, rhs.getRegionID()); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { (*this)[index] *= rhs[index]; } - + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { (*this)[index] *= rhs[index]; } track(rhs, "operator*="); checkData(*this); @@ -92,10 +88,9 @@ Field3D operator/(const Field3D& lhs, const Field3D& rhs) { result.setRegion(lhs.getMesh()->getCommonRegion(lhs.getRegionID(), rhs.getRegionID())); - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs[index] / rhs[index]; } - checkData(result); return result; } @@ -114,8 +109,7 @@ Field3D& Field3D::update_division_inplace(const Field3D& rhs) { regionID = fieldmesh->getCommonRegion(regionID, rhs.getRegionID()); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { (*this)[index] /= rhs[index]; } - + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { (*this)[index] /= rhs[index]; } track(rhs, "operator/="); #if BOUT_USE_TRACK name = fmt::format("{:s} /= {:s}", this->name, rhs.name); @@ -141,8 +135,7 @@ Field3D& Field3D::operator/=(const Field3D& rhs) { regionID = fieldmesh->getCommonRegion(regionID, rhs.getRegionID()); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { (*this)[index] /= rhs[index]; } - + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { (*this)[index] /= rhs[index]; } track(rhs, "operator/="); checkData(*this); @@ -164,10 +157,9 @@ Field3D operator+(const Field3D& lhs, const Field3D& rhs) { result.setRegion(lhs.getMesh()->getCommonRegion(lhs.getRegionID(), rhs.getRegionID())); - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs[index] + rhs[index]; } - checkData(result); return result; } @@ -186,8 +178,7 @@ Field3D& Field3D::update_addition_inplace(const Field3D& rhs) { regionID = fieldmesh->getCommonRegion(regionID, rhs.getRegionID()); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { (*this)[index] += rhs[index]; } - + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { (*this)[index] += rhs[index]; } track(rhs, "operator+="); #if BOUT_USE_TRACK name = fmt::format("{:s} += {:s}", this->name, rhs.name); @@ -213,8 +204,7 @@ Field3D& Field3D::operator+=(const Field3D& rhs) { regionID = fieldmesh->getCommonRegion(regionID, rhs.getRegionID()); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { (*this)[index] += rhs[index]; } - + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { (*this)[index] += rhs[index]; } track(rhs, "operator+="); checkData(*this); @@ -236,10 +226,9 @@ Field3D operator-(const Field3D& lhs, const Field3D& rhs) { result.setRegion(lhs.getMesh()->getCommonRegion(lhs.getRegionID(), rhs.getRegionID())); - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs[index] - rhs[index]; } - checkData(result); return result; } @@ -258,8 +247,7 @@ Field3D& Field3D::update_subtraction_inplace(const Field3D& rhs) { regionID = fieldmesh->getCommonRegion(regionID, rhs.getRegionID()); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { (*this)[index] -= rhs[index]; } - + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { (*this)[index] -= rhs[index]; } track(rhs, "operator-="); #if BOUT_USE_TRACK name = fmt::format("{:s} -= {:s}", this->name, rhs.name); @@ -285,8 +273,7 @@ Field3D& Field3D::operator-=(const Field3D& rhs) { regionID = fieldmesh->getCommonRegion(regionID, rhs.getRegionID()); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { (*this)[index] -= rhs[index]; } - + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { (*this)[index] -= rhs[index]; } track(rhs, "operator-="); checkData(*this); @@ -310,13 +297,12 @@ Field3D operator*(const Field3D& lhs, const Field2D& rhs) { Mesh* localmesh = lhs.getMesh(); - BOUT_FOR(index, rhs.getRegion("RGN_ALL")) { + BOUT_FOR_SERIAL(index, rhs.getRegion("RGN_ALL")) { const auto base_ind = localmesh->ind2Dto3D(index); for (int jz = 0; jz < localmesh->LocalNz; ++jz) { result[base_ind + jz] = lhs[base_ind + jz] * rhs[index]; } } - checkData(result); return result; } @@ -333,13 +319,12 @@ Field3D& Field3D::update_multiplication_inplace(const Field2D& rhs) { checkData(*this); checkData(rhs); - BOUT_FOR(index, rhs.getRegion("RGN_ALL")) { + BOUT_FOR_SERIAL(index, rhs.getRegion("RGN_ALL")) { const auto base_ind = fieldmesh->ind2Dto3D(index); for (int jz = 0; jz < fieldmesh->LocalNz; ++jz) { (*this)[base_ind + jz] *= rhs[index]; } } - track(rhs, "operator*="); #if BOUT_USE_TRACK name = fmt::format("{:s} *= {:s}", this->name, rhs.name); @@ -363,13 +348,12 @@ Field3D& Field3D::operator*=(const Field2D& rhs) { checkData(*this); checkData(rhs); - BOUT_FOR(index, rhs.getRegion("RGN_ALL")) { + BOUT_FOR_SERIAL(index, rhs.getRegion("RGN_ALL")) { const auto base_ind = fieldmesh->ind2Dto3D(index); for (int jz = 0; jz < fieldmesh->LocalNz; ++jz) { (*this)[base_ind + jz] *= rhs[index]; } } - track(rhs, "operator*="); checkData(*this); @@ -393,14 +377,13 @@ Field3D operator/(const Field3D& lhs, const Field2D& rhs) { Mesh* localmesh = lhs.getMesh(); - BOUT_FOR(index, rhs.getRegion("RGN_ALL")) { + BOUT_FOR_SERIAL(index, rhs.getRegion("RGN_ALL")) { const auto base_ind = localmesh->ind2Dto3D(index); const auto tmp = 1.0 / rhs[index]; for (int jz = 0; jz < localmesh->LocalNz; ++jz) { result[base_ind + jz] = lhs[base_ind + jz] * tmp; } } - checkData(result); return result; } @@ -417,14 +400,13 @@ Field3D& Field3D::update_division_inplace(const Field2D& rhs) { checkData(*this); checkData(rhs); - BOUT_FOR(index, rhs.getRegion("RGN_ALL")) { + BOUT_FOR_SERIAL(index, rhs.getRegion("RGN_ALL")) { const auto base_ind = fieldmesh->ind2Dto3D(index); const auto tmp = 1.0 / rhs[index]; for (int jz = 0; jz < fieldmesh->LocalNz; ++jz) { (*this)[base_ind + jz] *= tmp; } } - track(rhs, "operator/="); #if BOUT_USE_TRACK name = fmt::format("{:s} /= {:s}", this->name, rhs.name); @@ -448,14 +430,13 @@ Field3D& Field3D::operator/=(const Field2D& rhs) { checkData(*this); checkData(rhs); - BOUT_FOR(index, rhs.getRegion("RGN_ALL")) { + BOUT_FOR_SERIAL(index, rhs.getRegion("RGN_ALL")) { const auto base_ind = fieldmesh->ind2Dto3D(index); const auto tmp = 1.0 / rhs[index]; for (int jz = 0; jz < fieldmesh->LocalNz; ++jz) { (*this)[base_ind + jz] *= tmp; } } - track(rhs, "operator/="); checkData(*this); @@ -479,13 +460,12 @@ Field3D operator+(const Field3D& lhs, const Field2D& rhs) { Mesh* localmesh = lhs.getMesh(); - BOUT_FOR(index, rhs.getRegion("RGN_ALL")) { + BOUT_FOR_SERIAL(index, rhs.getRegion("RGN_ALL")) { const auto base_ind = localmesh->ind2Dto3D(index); for (int jz = 0; jz < localmesh->LocalNz; ++jz) { result[base_ind + jz] = lhs[base_ind + jz] + rhs[index]; } } - checkData(result); return result; } @@ -502,13 +482,12 @@ Field3D& Field3D::update_addition_inplace(const Field2D& rhs) { checkData(*this); checkData(rhs); - BOUT_FOR(index, rhs.getRegion("RGN_ALL")) { + BOUT_FOR_SERIAL(index, rhs.getRegion("RGN_ALL")) { const auto base_ind = fieldmesh->ind2Dto3D(index); for (int jz = 0; jz < fieldmesh->LocalNz; ++jz) { (*this)[base_ind + jz] += rhs[index]; } } - track(rhs, "operator+="); #if BOUT_USE_TRACK name = fmt::format("{:s} += {:s}", this->name, rhs.name); @@ -532,13 +511,12 @@ Field3D& Field3D::operator+=(const Field2D& rhs) { checkData(*this); checkData(rhs); - BOUT_FOR(index, rhs.getRegion("RGN_ALL")) { + BOUT_FOR_SERIAL(index, rhs.getRegion("RGN_ALL")) { const auto base_ind = fieldmesh->ind2Dto3D(index); for (int jz = 0; jz < fieldmesh->LocalNz; ++jz) { (*this)[base_ind + jz] += rhs[index]; } } - track(rhs, "operator+="); checkData(*this); @@ -562,13 +540,12 @@ Field3D operator-(const Field3D& lhs, const Field2D& rhs) { Mesh* localmesh = lhs.getMesh(); - BOUT_FOR(index, rhs.getRegion("RGN_ALL")) { + BOUT_FOR_SERIAL(index, rhs.getRegion("RGN_ALL")) { const auto base_ind = localmesh->ind2Dto3D(index); for (int jz = 0; jz < localmesh->LocalNz; ++jz) { result[base_ind + jz] = lhs[base_ind + jz] - rhs[index]; } } - checkData(result); return result; } @@ -585,13 +562,12 @@ Field3D& Field3D::update_subtraction_inplace(const Field2D& rhs) { checkData(*this); checkData(rhs); - BOUT_FOR(index, rhs.getRegion("RGN_ALL")) { + BOUT_FOR_SERIAL(index, rhs.getRegion("RGN_ALL")) { const auto base_ind = fieldmesh->ind2Dto3D(index); for (int jz = 0; jz < fieldmesh->LocalNz; ++jz) { (*this)[base_ind + jz] -= rhs[index]; } } - track(rhs, "operator-="); #if BOUT_USE_TRACK name = fmt::format("{:s} -= {:s}", this->name, rhs.name); @@ -615,13 +591,12 @@ Field3D& Field3D::operator-=(const Field2D& rhs) { checkData(*this); checkData(rhs); - BOUT_FOR(index, rhs.getRegion("RGN_ALL")) { + BOUT_FOR_SERIAL(index, rhs.getRegion("RGN_ALL")) { const auto base_ind = fieldmesh->ind2Dto3D(index); for (int jz = 0; jz < fieldmesh->LocalNz; ++jz) { (*this)[base_ind + jz] -= rhs[index]; } } - track(rhs, "operator-="); checkData(*this); @@ -643,12 +618,11 @@ FieldPerp operator*(const Field3D& lhs, const FieldPerp& rhs) { Mesh* localmesh = lhs.getMesh(); - BOUT_FOR(index, result.getRegion("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getRegion("RGN_ALL")) { int yind = rhs.getIndex(); const auto base_ind = localmesh->indPerpto3D(index, yind); result[index] = lhs[base_ind] * rhs[index]; } - checkData(result); return result; } @@ -663,12 +637,11 @@ FieldPerp operator/(const Field3D& lhs, const FieldPerp& rhs) { Mesh* localmesh = lhs.getMesh(); - BOUT_FOR(index, result.getRegion("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getRegion("RGN_ALL")) { int yind = rhs.getIndex(); const auto base_ind = localmesh->indPerpto3D(index, yind); result[index] = lhs[base_ind] / rhs[index]; } - checkData(result); return result; } @@ -683,12 +656,11 @@ FieldPerp operator+(const Field3D& lhs, const FieldPerp& rhs) { Mesh* localmesh = lhs.getMesh(); - BOUT_FOR(index, result.getRegion("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getRegion("RGN_ALL")) { int yind = rhs.getIndex(); const auto base_ind = localmesh->indPerpto3D(index, yind); result[index] = lhs[base_ind] + rhs[index]; } - checkData(result); return result; } @@ -703,12 +675,11 @@ FieldPerp operator-(const Field3D& lhs, const FieldPerp& rhs) { Mesh* localmesh = lhs.getMesh(); - BOUT_FOR(index, result.getRegion("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getRegion("RGN_ALL")) { int yind = rhs.getIndex(); const auto base_ind = localmesh->indPerpto3D(index, yind); result[index] = lhs[base_ind] - rhs[index]; } - checkData(result); return result; } @@ -722,10 +693,9 @@ Field3D operator*(const Field3D& lhs, const BoutReal rhs) { result.setRegion(lhs.getRegionID()); - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs[index] * rhs; } - checkData(result); return result; } @@ -741,8 +711,7 @@ Field3D& Field3D::update_multiplication_inplace(const BoutReal rhs) { checkData(*this); checkData(rhs); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { (*this)[index] *= rhs; } - + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { (*this)[index] *= rhs; } track(rhs, "operator*="); #if BOUT_USE_TRACK name = fmt::format("{:s} *= {:s}", this->name, "BR"); @@ -765,8 +734,7 @@ Field3D& Field3D::operator*=(const BoutReal rhs) { checkData(*this); checkData(rhs); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { (*this)[index] *= rhs; } - + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { (*this)[index] *= rhs; } track(rhs, "operator*="); checkData(*this); @@ -788,10 +756,9 @@ Field3D operator/(const Field3D& lhs, const BoutReal rhs) { result.setRegion(lhs.getRegionID()); const auto tmp = 1.0 / rhs; - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs[index] * tmp; } - checkData(result); return result; } @@ -808,8 +775,7 @@ Field3D& Field3D::update_division_inplace(const BoutReal rhs) { checkData(rhs); const auto tmp = 1.0 / rhs; - BOUT_FOR(index, this->getRegion("RGN_ALL")) { (*this)[index] *= tmp; } - + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { (*this)[index] *= tmp; } track(rhs, "operator/="); #if BOUT_USE_TRACK name = fmt::format("{:s} /= {:s}", this->name, "BR"); @@ -833,8 +799,7 @@ Field3D& Field3D::operator/=(const BoutReal rhs) { checkData(rhs); const auto tmp = 1.0 / rhs; - BOUT_FOR(index, this->getRegion("RGN_ALL")) { (*this)[index] *= tmp; } - + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { (*this)[index] *= tmp; } track(rhs, "operator/="); checkData(*this); @@ -855,10 +820,9 @@ Field3D operator+(const Field3D& lhs, const BoutReal rhs) { result.setRegion(lhs.getRegionID()); - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs[index] + rhs; } - checkData(result); return result; } @@ -874,8 +838,7 @@ Field3D& Field3D::update_addition_inplace(const BoutReal rhs) { checkData(*this); checkData(rhs); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { (*this)[index] += rhs; } - + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { (*this)[index] += rhs; } track(rhs, "operator+="); #if BOUT_USE_TRACK name = fmt::format("{:s} += {:s}", this->name, "BR"); @@ -898,8 +861,7 @@ Field3D& Field3D::operator+=(const BoutReal rhs) { checkData(*this); checkData(rhs); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { (*this)[index] += rhs; } - + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { (*this)[index] += rhs; } track(rhs, "operator+="); checkData(*this); @@ -920,10 +882,9 @@ Field3D operator-(const Field3D& lhs, const BoutReal rhs) { result.setRegion(lhs.getRegionID()); - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs[index] - rhs; } - checkData(result); return result; } @@ -939,8 +900,7 @@ Field3D& Field3D::update_subtraction_inplace(const BoutReal rhs) { checkData(*this); checkData(rhs); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { (*this)[index] -= rhs; } - + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { (*this)[index] -= rhs; } track(rhs, "operator-="); #if BOUT_USE_TRACK name = fmt::format("{:s} -= {:s}", this->name, "BR"); @@ -963,8 +923,7 @@ Field3D& Field3D::operator-=(const BoutReal rhs) { checkData(*this); checkData(rhs); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { (*this)[index] -= rhs; } - + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { (*this)[index] -= rhs; } track(rhs, "operator-="); checkData(*this); @@ -988,13 +947,12 @@ Field3D operator*(const Field2D& lhs, const Field3D& rhs) { Mesh* localmesh = lhs.getMesh(); - BOUT_FOR(index, lhs.getRegion("RGN_ALL")) { + BOUT_FOR_SERIAL(index, lhs.getRegion("RGN_ALL")) { const auto base_ind = localmesh->ind2Dto3D(index); for (int jz = 0; jz < localmesh->LocalNz; ++jz) { result[base_ind + jz] = lhs[index] * rhs[base_ind + jz]; } } - checkData(result); return result; } @@ -1011,13 +969,12 @@ Field3D operator/(const Field2D& lhs, const Field3D& rhs) { Mesh* localmesh = lhs.getMesh(); - BOUT_FOR(index, lhs.getRegion("RGN_ALL")) { + BOUT_FOR_SERIAL(index, lhs.getRegion("RGN_ALL")) { const auto base_ind = localmesh->ind2Dto3D(index); for (int jz = 0; jz < localmesh->LocalNz; ++jz) { result[base_ind + jz] = lhs[index] / rhs[base_ind + jz]; } } - checkData(result); return result; } @@ -1034,13 +991,12 @@ Field3D operator+(const Field2D& lhs, const Field3D& rhs) { Mesh* localmesh = lhs.getMesh(); - BOUT_FOR(index, lhs.getRegion("RGN_ALL")) { + BOUT_FOR_SERIAL(index, lhs.getRegion("RGN_ALL")) { const auto base_ind = localmesh->ind2Dto3D(index); for (int jz = 0; jz < localmesh->LocalNz; ++jz) { result[base_ind + jz] = lhs[index] + rhs[base_ind + jz]; } } - checkData(result); return result; } @@ -1057,13 +1013,12 @@ Field3D operator-(const Field2D& lhs, const Field3D& rhs) { Mesh* localmesh = lhs.getMesh(); - BOUT_FOR(index, lhs.getRegion("RGN_ALL")) { + BOUT_FOR_SERIAL(index, lhs.getRegion("RGN_ALL")) { const auto base_ind = localmesh->ind2Dto3D(index); for (int jz = 0; jz < localmesh->LocalNz; ++jz) { result[base_ind + jz] = lhs[index] - rhs[base_ind + jz]; } } - checkData(result); return result; } @@ -1076,10 +1031,9 @@ Field2D operator*(const Field2D& lhs, const Field2D& rhs) { checkData(lhs); checkData(rhs); - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs[index] * rhs[index]; } - checkData(result); return result; } @@ -1094,7 +1048,7 @@ Field2D& Field2D::operator*=(const Field2D& rhs) { checkData(*this); checkData(rhs); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { (*this)[index] *= rhs[index]; } + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { (*this)[index] *= rhs[index]; } checkData(*this); @@ -1112,10 +1066,9 @@ Field2D operator/(const Field2D& lhs, const Field2D& rhs) { checkData(lhs); checkData(rhs); - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs[index] / rhs[index]; } - checkData(result); return result; } @@ -1130,7 +1083,7 @@ Field2D& Field2D::operator/=(const Field2D& rhs) { checkData(*this); checkData(rhs); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { (*this)[index] /= rhs[index]; } + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { (*this)[index] /= rhs[index]; } checkData(*this); @@ -1148,10 +1101,9 @@ Field2D operator+(const Field2D& lhs, const Field2D& rhs) { checkData(lhs); checkData(rhs); - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs[index] + rhs[index]; } - checkData(result); return result; } @@ -1166,7 +1118,7 @@ Field2D& Field2D::operator+=(const Field2D& rhs) { checkData(*this); checkData(rhs); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { (*this)[index] += rhs[index]; } + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { (*this)[index] += rhs[index]; } checkData(*this); @@ -1184,10 +1136,9 @@ Field2D operator-(const Field2D& lhs, const Field2D& rhs) { checkData(lhs); checkData(rhs); - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs[index] - rhs[index]; } - checkData(result); return result; } @@ -1202,7 +1153,7 @@ Field2D& Field2D::operator-=(const Field2D& rhs) { checkData(*this); checkData(rhs); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { (*this)[index] -= rhs[index]; } + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { (*this)[index] -= rhs[index]; } checkData(*this); @@ -1222,12 +1173,11 @@ FieldPerp operator*(const Field2D& lhs, const FieldPerp& rhs) { Mesh* localmesh = lhs.getMesh(); - BOUT_FOR(index, result.getRegion("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getRegion("RGN_ALL")) { int yind = rhs.getIndex(); const auto base_ind = localmesh->indPerpto3D(index, yind); result[index] = lhs[base_ind] * rhs[index]; } - checkData(result); return result; } @@ -1242,12 +1192,11 @@ FieldPerp operator/(const Field2D& lhs, const FieldPerp& rhs) { Mesh* localmesh = lhs.getMesh(); - BOUT_FOR(index, result.getRegion("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getRegion("RGN_ALL")) { int yind = rhs.getIndex(); const auto base_ind = localmesh->indPerpto3D(index, yind); result[index] = lhs[base_ind] / rhs[index]; } - checkData(result); return result; } @@ -1262,12 +1211,11 @@ FieldPerp operator+(const Field2D& lhs, const FieldPerp& rhs) { Mesh* localmesh = lhs.getMesh(); - BOUT_FOR(index, result.getRegion("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getRegion("RGN_ALL")) { int yind = rhs.getIndex(); const auto base_ind = localmesh->indPerpto3D(index, yind); result[index] = lhs[base_ind] + rhs[index]; } - checkData(result); return result; } @@ -1282,12 +1230,11 @@ FieldPerp operator-(const Field2D& lhs, const FieldPerp& rhs) { Mesh* localmesh = lhs.getMesh(); - BOUT_FOR(index, result.getRegion("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getRegion("RGN_ALL")) { int yind = rhs.getIndex(); const auto base_ind = localmesh->indPerpto3D(index, yind); result[index] = lhs[base_ind] - rhs[index]; } - checkData(result); return result; } @@ -1299,10 +1246,9 @@ Field2D operator*(const Field2D& lhs, const BoutReal rhs) { checkData(lhs); checkData(rhs); - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs[index] * rhs; } - checkData(result); return result; } @@ -1316,7 +1262,7 @@ Field2D& Field2D::operator*=(const BoutReal rhs) { checkData(*this); checkData(rhs); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { (*this)[index] *= rhs; } + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { (*this)[index] *= rhs; } checkData(*this); @@ -1334,10 +1280,9 @@ Field2D operator/(const Field2D& lhs, const BoutReal rhs) { checkData(rhs); const auto tmp = 1.0 / rhs; - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs[index] * tmp; } - checkData(result); return result; } @@ -1352,7 +1297,7 @@ Field2D& Field2D::operator/=(const BoutReal rhs) { checkData(rhs); const auto tmp = 1.0 / rhs; - BOUT_FOR(index, this->getRegion("RGN_ALL")) { (*this)[index] *= tmp; } + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { (*this)[index] *= tmp; } checkData(*this); @@ -1369,10 +1314,9 @@ Field2D operator+(const Field2D& lhs, const BoutReal rhs) { checkData(lhs); checkData(rhs); - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs[index] + rhs; } - checkData(result); return result; } @@ -1386,7 +1330,7 @@ Field2D& Field2D::operator+=(const BoutReal rhs) { checkData(*this); checkData(rhs); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { (*this)[index] += rhs; } + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { (*this)[index] += rhs; } checkData(*this); @@ -1403,10 +1347,9 @@ Field2D operator-(const Field2D& lhs, const BoutReal rhs) { checkData(lhs); checkData(rhs); - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs[index] - rhs; } - checkData(result); return result; } @@ -1420,7 +1363,7 @@ Field2D& Field2D::operator-=(const BoutReal rhs) { checkData(*this); checkData(rhs); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { (*this)[index] -= rhs; } + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { (*this)[index] -= rhs; } checkData(*this); @@ -1440,12 +1383,11 @@ FieldPerp operator*(const FieldPerp& lhs, const Field3D& rhs) { Mesh* localmesh = lhs.getMesh(); - BOUT_FOR(index, result.getRegion("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getRegion("RGN_ALL")) { int yind = lhs.getIndex(); const auto base_ind = localmesh->indPerpto3D(index, yind); result[index] = lhs[index] * rhs[base_ind]; } - checkData(result); return result; } @@ -1462,7 +1404,7 @@ FieldPerp& FieldPerp::operator*=(const Field3D& rhs) { Mesh* localmesh = this->getMesh(); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { int yind = this->getIndex(); const auto base_ind = localmesh->indPerpto3D(index, yind); (*this)[index] *= rhs[base_ind]; @@ -1486,12 +1428,11 @@ FieldPerp operator/(const FieldPerp& lhs, const Field3D& rhs) { Mesh* localmesh = lhs.getMesh(); - BOUT_FOR(index, result.getRegion("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getRegion("RGN_ALL")) { int yind = lhs.getIndex(); const auto base_ind = localmesh->indPerpto3D(index, yind); result[index] = lhs[index] / rhs[base_ind]; } - checkData(result); return result; } @@ -1508,7 +1449,7 @@ FieldPerp& FieldPerp::operator/=(const Field3D& rhs) { Mesh* localmesh = this->getMesh(); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { int yind = this->getIndex(); const auto base_ind = localmesh->indPerpto3D(index, yind); (*this)[index] /= rhs[base_ind]; @@ -1532,12 +1473,11 @@ FieldPerp operator+(const FieldPerp& lhs, const Field3D& rhs) { Mesh* localmesh = lhs.getMesh(); - BOUT_FOR(index, result.getRegion("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getRegion("RGN_ALL")) { int yind = lhs.getIndex(); const auto base_ind = localmesh->indPerpto3D(index, yind); result[index] = lhs[index] + rhs[base_ind]; } - checkData(result); return result; } @@ -1554,7 +1494,7 @@ FieldPerp& FieldPerp::operator+=(const Field3D& rhs) { Mesh* localmesh = this->getMesh(); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { int yind = this->getIndex(); const auto base_ind = localmesh->indPerpto3D(index, yind); (*this)[index] += rhs[base_ind]; @@ -1578,12 +1518,11 @@ FieldPerp operator-(const FieldPerp& lhs, const Field3D& rhs) { Mesh* localmesh = lhs.getMesh(); - BOUT_FOR(index, result.getRegion("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getRegion("RGN_ALL")) { int yind = lhs.getIndex(); const auto base_ind = localmesh->indPerpto3D(index, yind); result[index] = lhs[index] - rhs[base_ind]; } - checkData(result); return result; } @@ -1600,7 +1539,7 @@ FieldPerp& FieldPerp::operator-=(const Field3D& rhs) { Mesh* localmesh = this->getMesh(); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { int yind = this->getIndex(); const auto base_ind = localmesh->indPerpto3D(index, yind); (*this)[index] -= rhs[base_ind]; @@ -1624,12 +1563,11 @@ FieldPerp operator*(const FieldPerp& lhs, const Field2D& rhs) { Mesh* localmesh = lhs.getMesh(); - BOUT_FOR(index, result.getRegion("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getRegion("RGN_ALL")) { int yind = lhs.getIndex(); const auto base_ind = localmesh->indPerpto3D(index, yind); result[index] = lhs[index] * rhs[base_ind]; } - checkData(result); return result; } @@ -1646,7 +1584,7 @@ FieldPerp& FieldPerp::operator*=(const Field2D& rhs) { Mesh* localmesh = this->getMesh(); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { int yind = this->getIndex(); const auto base_ind = localmesh->indPerpto3D(index, yind); (*this)[index] *= rhs[base_ind]; @@ -1670,12 +1608,11 @@ FieldPerp operator/(const FieldPerp& lhs, const Field2D& rhs) { Mesh* localmesh = lhs.getMesh(); - BOUT_FOR(index, result.getRegion("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getRegion("RGN_ALL")) { int yind = lhs.getIndex(); const auto base_ind = localmesh->indPerpto3D(index, yind); result[index] = lhs[index] / rhs[base_ind]; } - checkData(result); return result; } @@ -1692,7 +1629,7 @@ FieldPerp& FieldPerp::operator/=(const Field2D& rhs) { Mesh* localmesh = this->getMesh(); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { int yind = this->getIndex(); const auto base_ind = localmesh->indPerpto3D(index, yind); (*this)[index] /= rhs[base_ind]; @@ -1716,12 +1653,11 @@ FieldPerp operator+(const FieldPerp& lhs, const Field2D& rhs) { Mesh* localmesh = lhs.getMesh(); - BOUT_FOR(index, result.getRegion("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getRegion("RGN_ALL")) { int yind = lhs.getIndex(); const auto base_ind = localmesh->indPerpto3D(index, yind); result[index] = lhs[index] + rhs[base_ind]; } - checkData(result); return result; } @@ -1738,7 +1674,7 @@ FieldPerp& FieldPerp::operator+=(const Field2D& rhs) { Mesh* localmesh = this->getMesh(); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { int yind = this->getIndex(); const auto base_ind = localmesh->indPerpto3D(index, yind); (*this)[index] += rhs[base_ind]; @@ -1762,12 +1698,11 @@ FieldPerp operator-(const FieldPerp& lhs, const Field2D& rhs) { Mesh* localmesh = lhs.getMesh(); - BOUT_FOR(index, result.getRegion("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getRegion("RGN_ALL")) { int yind = lhs.getIndex(); const auto base_ind = localmesh->indPerpto3D(index, yind); result[index] = lhs[index] - rhs[base_ind]; } - checkData(result); return result; } @@ -1784,7 +1719,7 @@ FieldPerp& FieldPerp::operator-=(const Field2D& rhs) { Mesh* localmesh = this->getMesh(); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { int yind = this->getIndex(); const auto base_ind = localmesh->indPerpto3D(index, yind); (*this)[index] -= rhs[base_ind]; @@ -1806,10 +1741,9 @@ FieldPerp operator*(const FieldPerp& lhs, const FieldPerp& rhs) { checkData(lhs); checkData(rhs); - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs[index] * rhs[index]; } - checkData(result); return result; } @@ -1824,7 +1758,7 @@ FieldPerp& FieldPerp::operator*=(const FieldPerp& rhs) { checkData(*this); checkData(rhs); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { (*this)[index] *= rhs[index]; } + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { (*this)[index] *= rhs[index]; } checkData(*this); @@ -1842,10 +1776,9 @@ FieldPerp operator/(const FieldPerp& lhs, const FieldPerp& rhs) { checkData(lhs); checkData(rhs); - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs[index] / rhs[index]; } - checkData(result); return result; } @@ -1860,7 +1793,7 @@ FieldPerp& FieldPerp::operator/=(const FieldPerp& rhs) { checkData(*this); checkData(rhs); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { (*this)[index] /= rhs[index]; } + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { (*this)[index] /= rhs[index]; } checkData(*this); @@ -1878,10 +1811,9 @@ FieldPerp operator+(const FieldPerp& lhs, const FieldPerp& rhs) { checkData(lhs); checkData(rhs); - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs[index] + rhs[index]; } - checkData(result); return result; } @@ -1896,7 +1828,7 @@ FieldPerp& FieldPerp::operator+=(const FieldPerp& rhs) { checkData(*this); checkData(rhs); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { (*this)[index] += rhs[index]; } + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { (*this)[index] += rhs[index]; } checkData(*this); @@ -1914,10 +1846,9 @@ FieldPerp operator-(const FieldPerp& lhs, const FieldPerp& rhs) { checkData(lhs); checkData(rhs); - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs[index] - rhs[index]; } - checkData(result); return result; } @@ -1932,7 +1863,7 @@ FieldPerp& FieldPerp::operator-=(const FieldPerp& rhs) { checkData(*this); checkData(rhs); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { (*this)[index] -= rhs[index]; } + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { (*this)[index] -= rhs[index]; } checkData(*this); @@ -1949,10 +1880,9 @@ FieldPerp operator*(const FieldPerp& lhs, const BoutReal rhs) { checkData(lhs); checkData(rhs); - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs[index] * rhs; } - checkData(result); return result; } @@ -1966,7 +1896,7 @@ FieldPerp& FieldPerp::operator*=(const BoutReal rhs) { checkData(*this); checkData(rhs); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { (*this)[index] *= rhs; } + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { (*this)[index] *= rhs; } checkData(*this); @@ -1984,10 +1914,9 @@ FieldPerp operator/(const FieldPerp& lhs, const BoutReal rhs) { checkData(rhs); const auto tmp = 1.0 / rhs; - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs[index] * tmp; } - checkData(result); return result; } @@ -2001,7 +1930,7 @@ FieldPerp& FieldPerp::operator/=(const BoutReal rhs) { checkData(*this); checkData(rhs); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { (*this)[index] /= rhs; } + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { (*this)[index] /= rhs; } checkData(*this); @@ -2018,10 +1947,9 @@ FieldPerp operator+(const FieldPerp& lhs, const BoutReal rhs) { checkData(lhs); checkData(rhs); - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs[index] + rhs; } - checkData(result); return result; } @@ -2035,7 +1963,7 @@ FieldPerp& FieldPerp::operator+=(const BoutReal rhs) { checkData(*this); checkData(rhs); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { (*this)[index] += rhs; } + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { (*this)[index] += rhs; } checkData(*this); @@ -2052,10 +1980,9 @@ FieldPerp operator-(const FieldPerp& lhs, const BoutReal rhs) { checkData(lhs); checkData(rhs); - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs[index] - rhs; } - checkData(result); return result; } @@ -2069,7 +1996,7 @@ FieldPerp& FieldPerp::operator-=(const BoutReal rhs) { checkData(*this); checkData(rhs); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { (*this)[index] -= rhs; } + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { (*this)[index] -= rhs; } checkData(*this); @@ -2088,10 +2015,9 @@ Field3D operator*(const BoutReal lhs, const Field3D& rhs) { result.setRegion(rhs.getRegionID()); - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs * rhs[index]; } - checkData(result); return result; } @@ -2105,10 +2031,9 @@ Field3D operator/(const BoutReal lhs, const Field3D& rhs) { result.setRegion(rhs.getRegionID()); - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs / rhs[index]; } - checkData(result); return result; } @@ -2122,10 +2047,9 @@ Field3D operator+(const BoutReal lhs, const Field3D& rhs) { result.setRegion(rhs.getRegionID()); - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs + rhs[index]; } - checkData(result); return result; } @@ -2139,10 +2063,9 @@ Field3D operator-(const BoutReal lhs, const Field3D& rhs) { result.setRegion(rhs.getRegionID()); - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs - rhs[index]; } - checkData(result); return result; } @@ -2154,10 +2077,9 @@ Field2D operator*(const BoutReal lhs, const Field2D& rhs) { checkData(lhs); checkData(rhs); - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs * rhs[index]; } - checkData(result); return result; } @@ -2169,10 +2091,9 @@ Field2D operator/(const BoutReal lhs, const Field2D& rhs) { checkData(lhs); checkData(rhs); - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs / rhs[index]; } - checkData(result); return result; } @@ -2184,10 +2105,9 @@ Field2D operator+(const BoutReal lhs, const Field2D& rhs) { checkData(lhs); checkData(rhs); - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs + rhs[index]; } - checkData(result); return result; } @@ -2199,10 +2119,9 @@ Field2D operator-(const BoutReal lhs, const Field2D& rhs) { checkData(lhs); checkData(rhs); - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs - rhs[index]; } - checkData(result); return result; } @@ -2214,10 +2133,9 @@ FieldPerp operator*(const BoutReal lhs, const FieldPerp& rhs) { checkData(lhs); checkData(rhs); - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs * rhs[index]; } - checkData(result); return result; } @@ -2229,10 +2147,9 @@ FieldPerp operator/(const BoutReal lhs, const FieldPerp& rhs) { checkData(lhs); checkData(rhs); - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs / rhs[index]; } - checkData(result); return result; } @@ -2244,10 +2161,9 @@ FieldPerp operator+(const BoutReal lhs, const FieldPerp& rhs) { checkData(lhs); checkData(rhs); - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs + rhs[index]; } - checkData(result); return result; } @@ -2259,10 +2175,9 @@ FieldPerp operator-(const BoutReal lhs, const FieldPerp& rhs) { checkData(lhs); checkData(rhs); - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs - rhs[index]; } - checkData(result); return result; } @@ -2302,10 +2217,9 @@ Field3DParallel operator*(const Field3D& lhs, const Field3DParallel& rhs) { } } - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs[index] * rhs[index]; } - checkData(result); return result; } @@ -2345,10 +2259,9 @@ Field3DParallel operator/(const Field3D& lhs, const Field3DParallel& rhs) { } } - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs[index] / rhs[index]; } - checkData(result); return result; } @@ -2388,10 +2301,9 @@ Field3DParallel operator+(const Field3D& lhs, const Field3DParallel& rhs) { } } - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs[index] + rhs[index]; } - checkData(result); return result; } @@ -2431,10 +2343,9 @@ Field3DParallel operator-(const Field3D& lhs, const Field3DParallel& rhs) { } } - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs[index] - rhs[index]; } - checkData(result); return result; } @@ -2474,10 +2385,9 @@ Field3DParallel operator*(const Field3DParallel& lhs, const Field3D& rhs) { } } - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs[index] * rhs[index]; } - checkData(result); return result; } @@ -2509,8 +2419,7 @@ Field3DParallel& Field3DParallel::operator*=(const Field3D& rhs) { regionID = fieldmesh->getCommonRegion(regionID, rhs.getRegionID()); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { (*this)[index] *= rhs[index]; } - + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { (*this)[index] *= rhs[index]; } track(rhs, "operator*="); checkData(*this); @@ -2557,10 +2466,9 @@ Field3DParallel operator/(const Field3DParallel& lhs, const Field3D& rhs) { } } - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs[index] / rhs[index]; } - checkData(result); return result; } @@ -2592,8 +2500,7 @@ Field3DParallel& Field3DParallel::operator/=(const Field3D& rhs) { regionID = fieldmesh->getCommonRegion(regionID, rhs.getRegionID()); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { (*this)[index] /= rhs[index]; } - + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { (*this)[index] /= rhs[index]; } track(rhs, "operator/="); checkData(*this); @@ -2640,10 +2547,9 @@ Field3DParallel operator+(const Field3DParallel& lhs, const Field3D& rhs) { } } - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs[index] + rhs[index]; } - checkData(result); return result; } @@ -2675,8 +2581,7 @@ Field3DParallel& Field3DParallel::operator+=(const Field3D& rhs) { regionID = fieldmesh->getCommonRegion(regionID, rhs.getRegionID()); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { (*this)[index] += rhs[index]; } - + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { (*this)[index] += rhs[index]; } track(rhs, "operator+="); checkData(*this); @@ -2723,10 +2628,9 @@ Field3DParallel operator-(const Field3DParallel& lhs, const Field3D& rhs) { } } - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs[index] - rhs[index]; } - checkData(result); return result; } @@ -2758,8 +2662,7 @@ Field3DParallel& Field3DParallel::operator-=(const Field3D& rhs) { regionID = fieldmesh->getCommonRegion(regionID, rhs.getRegionID()); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { (*this)[index] -= rhs[index]; } - + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { (*this)[index] -= rhs[index]; } track(rhs, "operator-="); checkData(*this); @@ -2806,10 +2709,9 @@ Field3DParallel operator*(const Field3DParallel& lhs, const Field3DParallel& rhs } } - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs[index] * rhs[index]; } - checkData(result); return result; } @@ -2841,8 +2743,7 @@ Field3DParallel& Field3DParallel::operator*=(const Field3DParallel& rhs) { regionID = fieldmesh->getCommonRegion(regionID, rhs.getRegionID()); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { (*this)[index] *= rhs[index]; } - + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { (*this)[index] *= rhs[index]; } track(rhs, "operator*="); checkData(*this); @@ -2889,10 +2790,9 @@ Field3DParallel operator/(const Field3DParallel& lhs, const Field3DParallel& rhs } } - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs[index] / rhs[index]; } - checkData(result); return result; } @@ -2924,8 +2824,7 @@ Field3DParallel& Field3DParallel::operator/=(const Field3DParallel& rhs) { regionID = fieldmesh->getCommonRegion(regionID, rhs.getRegionID()); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { (*this)[index] /= rhs[index]; } - + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { (*this)[index] /= rhs[index]; } track(rhs, "operator/="); checkData(*this); @@ -2972,10 +2871,9 @@ Field3DParallel operator+(const Field3DParallel& lhs, const Field3DParallel& rhs } } - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs[index] + rhs[index]; } - checkData(result); return result; } @@ -3007,8 +2905,7 @@ Field3DParallel& Field3DParallel::operator+=(const Field3DParallel& rhs) { regionID = fieldmesh->getCommonRegion(regionID, rhs.getRegionID()); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { (*this)[index] += rhs[index]; } - + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { (*this)[index] += rhs[index]; } track(rhs, "operator+="); checkData(*this); @@ -3055,10 +2952,9 @@ Field3DParallel operator-(const Field3DParallel& lhs, const Field3DParallel& rhs } } - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs[index] - rhs[index]; } - checkData(result); return result; } @@ -3090,8 +2986,7 @@ Field3DParallel& Field3DParallel::operator-=(const Field3DParallel& rhs) { regionID = fieldmesh->getCommonRegion(regionID, rhs.getRegionID()); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { (*this)[index] -= rhs[index]; } - + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { (*this)[index] -= rhs[index]; } track(rhs, "operator-="); checkData(*this); @@ -3132,10 +3027,9 @@ Field3DParallel operator*(const Field3DParallel& lhs, const BoutReal rhs) { } } - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs[index] * rhs; } - checkData(result); return result; } @@ -3164,8 +3058,7 @@ Field3DParallel& Field3DParallel::operator*=(const BoutReal rhs) { checkData(*this); checkData(rhs); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { (*this)[index] *= rhs; } - + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { (*this)[index] *= rhs; } track(rhs, "operator*="); checkData(*this); @@ -3207,10 +3100,9 @@ Field3DParallel operator/(const Field3DParallel& lhs, const BoutReal rhs) { } const auto tmp = 1.0 / rhs; - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs[index] * tmp; } - checkData(result); return result; } @@ -3239,8 +3131,7 @@ Field3DParallel& Field3DParallel::operator/=(const BoutReal rhs) { checkData(*this); checkData(rhs); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { (*this)[index] /= rhs; } - + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { (*this)[index] /= rhs; } track(rhs, "operator/="); checkData(*this); @@ -3281,10 +3172,9 @@ Field3DParallel operator+(const Field3DParallel& lhs, const BoutReal rhs) { } } - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs[index] + rhs; } - checkData(result); return result; } @@ -3313,8 +3203,7 @@ Field3DParallel& Field3DParallel::operator+=(const BoutReal rhs) { checkData(*this); checkData(rhs); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { (*this)[index] += rhs; } - + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { (*this)[index] += rhs; } track(rhs, "operator+="); checkData(*this); @@ -3355,10 +3244,9 @@ Field3DParallel operator-(const Field3DParallel& lhs, const BoutReal rhs) { } } - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs[index] - rhs; } - checkData(result); return result; } @@ -3387,8 +3275,7 @@ Field3DParallel& Field3DParallel::operator-=(const BoutReal rhs) { checkData(*this); checkData(rhs); - BOUT_FOR(index, this->getRegion("RGN_ALL")) { (*this)[index] -= rhs; } - + BOUT_FOR_SERIAL(index, this->getRegion("RGN_ALL")) { (*this)[index] -= rhs; } track(rhs, "operator-="); checkData(*this); @@ -3428,10 +3315,9 @@ Field3DParallel operator*(const BoutReal lhs, const Field3DParallel& rhs) { } } - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs * rhs[index]; } - checkData(result); return result; } @@ -3464,10 +3350,9 @@ Field3DParallel operator/(const BoutReal lhs, const Field3DParallel& rhs) { } } - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs / rhs[index]; } - checkData(result); return result; } @@ -3500,10 +3385,9 @@ Field3DParallel operator+(const BoutReal lhs, const Field3DParallel& rhs) { } } - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs + rhs[index]; } - checkData(result); return result; } @@ -3536,10 +3420,9 @@ Field3DParallel operator-(const BoutReal lhs, const Field3DParallel& rhs) { } } - BOUT_FOR(index, result.getValidRegionWithDefault("RGN_ALL")) { + BOUT_FOR_SERIAL(index, result.getValidRegionWithDefault("RGN_ALL")) { result[index] = lhs - rhs[index]; } - checkData(result); return result; } diff --git a/src/field/vecops.cxx b/src/field/vecops.cxx index 1677187fcd..672e8f6c09 100644 --- a/src/field/vecops.cxx +++ b/src/field/vecops.cxx @@ -1,12 +1,11 @@ /************************************************************************** * Operators on vector objects - * B.Dudson, October 2007 * ************************************************************************** - * Copyright 2010 B.D.Dudson, S.Farley, M.V.Umansky, X.Q.Xu + * Copyright 2010 - 2026 BOUT++ contributors + * + * Contact: Ben Dudson, dudson2@llnl.gov * - * Contact: Ben Dudson, bd512@york.ac.uk - * * This file is part of BOUT++. * * BOUT++ is free software: you can redistribute it and/or modify @@ -161,8 +160,8 @@ Coordinates::FieldMetric Div(const Vector2D& v, CELL_LOC outloc, vcn.toContravariant(); Coordinates::FieldMetric result = DDX(metric->J * vcn.x, outloc, method); - result += DDY(metric->J * vcn.y, outloc, method); - result += DDZ(metric->J * vcn.z, outloc, method); + result += DDY(Coordinates::FieldMetric{metric->J * vcn.y}, outloc, method); + result += DDZ(Coordinates::FieldMetric{metric->J * vcn.z}, outloc, method); result /= metric->J; return result; @@ -186,7 +185,7 @@ Field3D Div(const Vector3D& v, CELL_LOC outloc, const std::string& method) { Vector3D vcn = v; vcn.toContravariant(); - auto vcnJy = vcn.y.getCoordinates()->J * vcn.y; + Field3D vcnJy = vcn.y.getCoordinates()->J * vcn.y; if (v.y.hasParallelSlices()) { // If v.y has parallel slices then we are using ShiftedMetric (with // mesh:calcParallelSlices_on_communicate=true) or FCI, so we should calculate @@ -195,8 +194,8 @@ Field3D Div(const Vector3D& v, CELL_LOC outloc, const std::string& method) { } auto result = DDY(vcnJy, outloc, method); - result += DDX(vcn.x.getCoordinates()->J * vcn.x, outloc, method); - result += DDZ(vcn.z.getCoordinates()->J * vcn.z, outloc, method); + result += DDX(Field3D{vcn.x.getCoordinates()->J * vcn.x}, outloc, method); + result += DDZ(Field3D{vcn.z.getCoordinates()->J * vcn.z}, outloc, method); result /= metric->J; return result; @@ -224,10 +223,12 @@ Coordinates::FieldMetric Div(const Vector2D& v, const Field2D& f, CELL_LOC outlo Vector2D vcn = v; vcn.toContravariant(); - Coordinates::FieldMetric result = - FDDX(vcn.x.getCoordinates()->J * vcn.x, f, outloc, method); - result += FDDY(vcn.y.getCoordinates()->J * vcn.y, f, outloc, method); - result += FDDZ(vcn.z.getCoordinates()->J * vcn.z, f, outloc, method); + Coordinates::FieldMetric result = FDDX( + Coordinates::FieldMetric{vcn.x.getCoordinates()->J * vcn.x}, f, outloc, method); + result += FDDY(Coordinates::FieldMetric{vcn.y.getCoordinates()->J * vcn.y}, f, outloc, + method); + result += FDDZ(Coordinates::FieldMetric{vcn.z.getCoordinates()->J * vcn.z}, f, outloc, + method); result /= metric->J; return result; @@ -249,9 +250,9 @@ Field3D Div(const Vector3D& v, const Field3D& f, CELL_LOC outloc, Vector3D vcn = v; vcn.toContravariant(); - Field3D result = FDDX(vcn.x.getCoordinates()->J * vcn.x, f, outloc, method); - result += FDDY(vcn.y.getCoordinates()->J * vcn.y, f, outloc, method); - result += FDDZ(vcn.z.getCoordinates()->J * vcn.z, f, outloc, method); + Field3D result = FDDX(Field3D{vcn.x.getCoordinates()->J * vcn.x}, f, outloc, method); + result += FDDY(Field3D{vcn.y.getCoordinates()->J * vcn.y}, f, outloc, method); + result += FDDZ(Field3D{vcn.z.getCoordinates()->J * vcn.z}, f, outloc, method); result /= metric->J; return result; diff --git a/src/invert/laplace/impls/naulin/naulin_laplace.cxx b/src/invert/laplace/impls/naulin/naulin_laplace.cxx index bf8711245e..ae8e78d1ff 100644 --- a/src/invert/laplace/impls/naulin/naulin_laplace.cxx +++ b/src/invert/laplace/impls/naulin/naulin_laplace.cxx @@ -18,9 +18,9 @@ * ========= * ************************************************************************** - * Copyright 2018 B.D.Dudson, M. Loiten, J. Omotani + * Copyright 2018 - 2026 BOUT++ contributors * - * Contact: Ben Dudson, benjamin.dudson@york.ac.uk + * Contact: Ben Dudson, dudson2@llnl.gov * * This file is part of BOUT++. * diff --git a/src/invert/laplace/impls/petsc3damg/petsc3damg.cxx b/src/invert/laplace/impls/petsc3damg/petsc3damg.cxx index 3be5e43e63..9966ad654d 100644 --- a/src/invert/laplace/impls/petsc3damg/petsc3damg.cxx +++ b/src/invert/laplace/impls/petsc3damg/petsc3damg.cxx @@ -4,9 +4,9 @@ * Using PETSc Solvers * ************************************************************************** - * Copyright 2013 J. Buchanan, J.Omotani + * Copyright 2013 - 2026 BOUT++ contributors * - * Contact: Ben Dudson, bd512@york.ac.uk + * Contact: Ben Dudson, dudson2@llnl.gov * * This file is part of BOUT++. * @@ -120,39 +120,64 @@ LaplacePetsc3dAmg::LaplacePetsc3dAmg(Options* opt, const CELL_LOC loc, Mesh* mes // Set up boundary conditions in operator const bool inner_X_neumann = isInnerBoundaryFlagSet(INVERT_AC_GRAD); - const auto inner_X_BC = inner_X_neumann ? -1. / coords->dx / sqrt(coords->g_11) : 0.5; - const auto inner_X_BC_plus = inner_X_neumann ? -inner_X_BC : 0.5; - - BOUT_FOR_SERIAL(i, indexer->getRegionInnerX()) { - operator3D(i, i) = inner_X_BC[i]; - operator3D(i, i.xp()) = inner_X_BC_plus[i]; + if (inner_X_neumann) { + // This is a BinaryExpr that is only evaluated when needed + const auto inner_X_BC = -1. / coords->dx / sqrt(coords->g_11); + BOUT_FOR_SERIAL(i, indexer->getRegionInnerX()) { + const BoutReal bc = inner_X_BC[i]; + operator3D(i, i) = bc; + operator3D(i, i.xp()) = -bc; + } + } else { + BOUT_FOR_SERIAL(i, indexer->getRegionInnerX()) { + operator3D(i, i) = 0.5; + operator3D(i, i.xp()) = 0.5; + } } const bool outer_X_neumann = isOuterBoundaryFlagSet(INVERT_AC_GRAD); - const auto outer_X_BC = outer_X_neumann ? 1. / coords->dx / sqrt(coords->g_11) : 0.5; - const auto outer_X_BC_minus = outer_X_neumann ? -outer_X_BC : 0.5; - - BOUT_FOR_SERIAL(i, indexer->getRegionOuterX()) { - operator3D(i, i) = outer_X_BC[i]; - operator3D(i, i.xm()) = outer_X_BC_minus[i]; + if (outer_X_neumann) { + const auto outer_X_BC = 1. / coords->dx / sqrt(coords->g_11); + BOUT_FOR_SERIAL(i, indexer->getRegionOuterX()) { + const BoutReal bc = outer_X_BC[i]; + operator3D(i, i) = bc; + operator3D(i, i.xm()) = -bc; + } + } else { + BOUT_FOR_SERIAL(i, indexer->getRegionOuterX()) { + operator3D(i, i) = 0.5; + operator3D(i, i.xm()) = 0.5; + } } const bool lower_Y_neumann = flagSet(lower_boundary_flags, INVERT_AC_GRAD); - const auto lower_Y_BC = lower_Y_neumann ? -1. / coords->dy / sqrt(coords->g_22) : 0.5; - const auto lower_Y_BC_plus = lower_Y_neumann ? -lower_Y_BC : 0.5; - - BOUT_FOR_SERIAL(i, indexer->getRegionLowerY()) { - operator3D(i, i) = lower_Y_BC[i]; - operator3D(i, i.yp()) = lower_Y_BC_plus[i]; + if (lower_Y_neumann) { + const auto lower_Y_BC = -1. / coords->dy / sqrt(coords->g_22); + BOUT_FOR_SERIAL(i, indexer->getRegionLowerY()) { + const BoutReal bc = lower_Y_BC[i]; + operator3D(i, i) = bc; + operator3D(i, i.yp()) = -bc; + } + } else { + BOUT_FOR_SERIAL(i, indexer->getRegionLowerY()) { + operator3D(i, i) = 0.5; + operator3D(i, i.yp()) = 0.5; + } } const bool upper_Y_neumann = flagSet(upper_boundary_flags, INVERT_AC_GRAD); - const auto upper_Y_BC = upper_Y_neumann ? 1. / coords->dy / sqrt(coords->g_22) : 0.5; - const auto upper_Y_BC_minus = upper_Y_neumann ? -upper_Y_BC : 0.5; - - BOUT_FOR_SERIAL(i, indexer->getRegionUpperY()) { - operator3D(i, i) = upper_Y_BC[i]; - operator3D(i, i.ym()) = upper_Y_BC_minus[i]; + if (upper_Y_neumann) { + const auto upper_Y_BC = 1. / coords->dy / sqrt(coords->g_22); + BOUT_FOR_SERIAL(i, indexer->getRegionUpperY()) { + const BoutReal bc = upper_Y_BC[i]; + operator3D(i, i) = bc; + operator3D(i, i.ym()) = -bc; + } + } else { + BOUT_FOR_SERIAL(i, indexer->getRegionUpperY()) { + operator3D(i, i) = 0.5; + operator3D(i, i.ym()) = 0.5; + } } } @@ -274,7 +299,7 @@ void LaplacePetsc3dAmg::updateMatrix3D() { const Field3D dc_dx = issetC ? DDX(C2) : Field3D(); const Field3D dc_dy = issetC ? DDY(C2) : Field3D(); const Field3D dc_dz = issetC ? DDZ(C2) : Field3D(); - const auto dJ_dy = DDY(coords->J / coords->g_22); + const auto dJ_dy = DDY(Coordinates::FieldMetric{coords->J / coords->g_22}); // Set up the matrix for the internal points on the grid. // Boundary conditions were set in the constructor. @@ -359,7 +384,7 @@ void LaplacePetsc3dAmg::updateMatrix3D() { // Must add these (rather than assign) so that elements used in // interpolation don't overwrite each other. BOUT_FOR_SERIAL(l, indexer->getRegionNobndry()) { - BoutReal C_df_dy = (coords->G2[l] - dJ_dy[l] / coords->J[l]); + BoutReal C_df_dy = coords->G2[l] - (dJ_dy[l] / coords->J[l]); if (issetD) { C_df_dy *= D[l]; } @@ -370,7 +395,7 @@ void LaplacePetsc3dAmg::updateMatrix3D() { / C1[l]; } - BoutReal C_d2f_dy2 = (coords->g22[l] - 1.0 / coords->g_22[l]); + BoutReal C_d2f_dy2 = coords->g22[l] - (1.0 / coords->g_22[l]); if (issetD) { C_d2f_dy2 *= D[l]; } diff --git a/src/mesh/coordinates.cxx b/src/mesh/coordinates.cxx index 2e1c4ad389..057ffa65b5 100644 --- a/src/mesh/coordinates.cxx +++ b/src/mesh/coordinates.cxx @@ -4,6 +4,7 @@ * given the contravariant metric tensor terms **************************************************************************/ +#include "bout/coordinates_accessor.hxx" #include "bout/field3d.hxx" #include "bout/field_data.hxx" #include @@ -785,7 +786,6 @@ Coordinates::Coordinates(Mesh* mesh, Options* options, const CELL_LOC loc, } else { Bxy = interpolateAndExtrapolate(Bxy, location, extrapolate_x, extrapolate_y, false, transform.get()); - output_warn.write("\tMaximum difference in Bxy is %e\n", max(abs(Bxy - Bcalc))); } @@ -1106,7 +1106,7 @@ int Coordinates::geometry(bool recalculate_staggered, if (localmesh->get(d2x, "d2x" + suffix, 0.0, false, location)) { output_warn.write( "\tWARNING: differencing quantity 'd2x' not found. Calculating from dx\n"); - d1_dx = bout::derivatives::index::DDX(1. / dx); // d/di(1/dx) + d1_dx = bout::derivatives::index::DDX(FieldMetric{1. / dx}); // d/di(1/dx) localmesh->communicate_no_slices(d1_dx); d1_dx = @@ -1141,7 +1141,7 @@ int Coordinates::geometry(bool recalculate_staggered, if (localmesh->get(d2z, "d2z" + suffix, 0.0, false)) { output_warn.write( "\tWARNING: differencing quantity 'd2z' not found. Calculating from dz\n"); - d1_dz = bout::derivatives::index::DDZ(1. / dz); + d1_dz = bout::derivatives::index::DDZ(FieldMetric{1. / dz}); localmesh->communicate_no_slices(d1_dz); d1_dz = interpolateAndExtrapolate(d1_dz, location, true, true, true, transform.get()); @@ -1160,7 +1160,7 @@ int Coordinates::geometry(bool recalculate_staggered, if (localmesh->get(d2x, "d2x", 0.0, false)) { output_warn.write( "\tWARNING: differencing quantity 'd2x' not found. Calculating from dx\n"); - d1_dx = bout::derivatives::index::DDX(1. / dx); // d/di(1/dx) + d1_dx = bout::derivatives::index::DDX(FieldMetric{1. / dx}); // d/di(1/dx) localmesh->communicate_no_slices(d1_dx); d1_dx = @@ -1175,7 +1175,7 @@ int Coordinates::geometry(bool recalculate_staggered, if (localmesh->get(d2y, "d2y", 0.0, false)) { output_warn.write( "\tWARNING: differencing quantity 'd2y' not found. Calculating from dy\n"); - d1_dy = DDY(1. / dy); // d/di(1/dy) + d1_dy = DDY(FieldMetric{1. / dy}); // d/di(1/dy) localmesh->communicate_no_slices(d1_dy); d1_dy = @@ -1191,7 +1191,7 @@ int Coordinates::geometry(bool recalculate_staggered, if (localmesh->get(d2z, "d2z", 0.0, false)) { output_warn.write( "\tWARNING: differencing quantity 'd2z' not found. Calculating from dz\n"); - d1_dz = bout::derivatives::index::DDZ(1. / dz); + d1_dz = bout::derivatives::index::DDZ(FieldMetric{1. / dz}); localmesh->communicate_no_slices(d1_dz); d1_dz = @@ -1213,10 +1213,11 @@ int Coordinates::geometry(bool recalculate_staggered, localmesh->recalculateStaggeredCoordinates(); } - // Invalidate and recalculate cached variables + // Invalidate and recalculate cached variables and any accessor zlength_cache.reset(); Grad2_par2_DDY_invSgCache.clear(); invSgCache.reset(); + CoordinatesAccessor::clear(this); return 0; } @@ -1344,8 +1345,8 @@ int Coordinates::jacobian() { const bool extrapolate_x = not localmesh->sourceHasXBoundaryGuards(); const bool extrapolate_y = not localmesh->sourceHasYBoundaryGuards(); - auto g = g11 * g22 * g33 + 2.0 * g12 * g13 * g23 - g11 * g23 * g23 - g22 * g13 * g13 - - g33 * g12 * g12; + const FieldMetric g = g11 * g22 * g33 + 2.0 * g12 * g13 * g23 - g11 * g23 * g23 + - g22 * g13 * g13 - g33 * g12 * g12; // Check that g is positive bout::checkPositive(g, "The determinant of g^ij", "RGN_NOBNDRY"); @@ -1578,7 +1579,7 @@ Coordinates::FieldMetric Coordinates::Div_par(const Field2D& f, CELL_LOC outloc, // Coordinates object auto Bxy_floc = f.getCoordinates()->Bxy; - return Bxy * Grad_par(f / Bxy_floc, outloc, method); + return Bxy * Grad_par(FieldMetric{f / Bxy_floc}, outloc, method); } Field3D Coordinates::Div_par(const Field3DParallel& f, CELL_LOC outloc, @@ -1771,7 +1772,8 @@ FieldPerp Coordinates::Delp2(const FieldPerp& f, CELL_LOC outloc, bool useFFT) { Coordinates::FieldMetric Coordinates::Laplace_par(const Field2D& f, CELL_LOC outloc) { ASSERT1(location == outloc || outloc == CELL_DEFAULT); - return D2DY2(f, outloc) / g_22 + DDY(J / g_22, outloc) * DDY(f, outloc) / J; + return D2DY2(f, outloc) / g_22 + + DDY(FieldMetric{J / g_22}, outloc) * DDY(f, outloc) / J; } Field3D Coordinates::Laplace_par(const Field3DParallel& f, CELL_LOC outloc) { @@ -2034,7 +2036,6 @@ const Coordinates::FieldMetric& Coordinates::g_22_yhigh() const { return *_g_22_yhigh; } _g_22_yhigh.emplace(emptyFrom(g_22)); - //_g_22_yhigh->setLocation(CELL_YHIGH); auto* mesh = Bxy.getMesh(); if (Bxy.isFci()) { if (mesh->get(_g_22_yhigh.value(), "g_22_cell_yhigh", 0.0, false) != 0) { @@ -2050,7 +2051,7 @@ const Coordinates::FieldMetric& Coordinates::g_22_yhigh() const { } void Coordinates::_compute_cell_area_x() const { - const auto area_centre = sqrt(g_22 * g_33 - SQ(g_23)) * dy * dz; + const FieldMetric area_centre = sqrt(g_22 * g_33 - SQ(g_23)) * dy * dz; _cell_area_xlow.emplace(emptyFrom(area_centre)); _cell_area_xhigh.emplace(emptyFrom(area_centre)); // We cannot setLocation, as that would trigger the computation of staggered @@ -2066,7 +2067,7 @@ void Coordinates::_compute_cell_area_x() const { void Coordinates::_compute_cell_area_y() const { auto* mesh = Bxy.getMesh(); if (g_11.isFci()) { - const auto jxz_centre = sqrt(g_11 * g_33 - SQ(g_13)); + const FieldMetric jxz_centre = sqrt(g_11 * g_33 - SQ(g_13)); auto jxz_ylow = emptyFrom(jxz_centre); auto jxz_yhigh = emptyFrom(jxz_centre); @@ -2094,7 +2095,7 @@ void Coordinates::_compute_cell_area_y() const { _cell_area_yhigh.emplace(jxz_yhigh * dx * dz); } else { // Field aligned - const auto area_centre = sqrt(g_11 * g_33 - SQ(g_13)) * dx * dz; + const FieldMetric area_centre = sqrt(g_11 * g_33 - SQ(g_13)) * dx * dz; _cell_area_ylow.emplace(emptyFrom(area_centre)); _cell_area_yhigh.emplace(emptyFrom(area_centre)); // We cannot setLocation, as that would trigger the computation of staggered @@ -2115,12 +2116,11 @@ void Coordinates::_compute_cell_area_y() const { } void Coordinates::_compute_cell_area_z() const { - const auto area_centre = sqrt(g_11 * g_22 - SQ(g_12)) * dx * dy; + const FieldMetric area_centre = sqrt(g_11 * g_22 - SQ(g_12)) * dx * dy; _cell_area_zlow.emplace(emptyFrom(area_centre)); _cell_area_zhigh.emplace(emptyFrom(area_centre)); // We cannot setLocation, as that would trigger the computation of staggered // metrics. - //ASSERT0(mesh->zstart > 0); BOUT_FOR(i, area_centre.getRegion("RGN_NOZ")) { (*_cell_area_zlow)[i] = 0.5 * (area_centre[i] + area_centre[i.zm()]); (*_cell_area_zhigh)[i] = 0.5 * (area_centre[i] + area_centre[i.zp()]); diff --git a/src/mesh/coordinates_accessor.cxx b/src/mesh/coordinates_accessor.cxx index 0ce4b664b5..efc27e9715 100644 --- a/src/mesh/coordinates_accessor.cxx +++ b/src/mesh/coordinates_accessor.cxx @@ -1,6 +1,6 @@ #include "bout/coordinates_accessor.hxx" -#include - +#include "bout/build_defines.hxx" +#include "bout/macro_for_each.hxx" #include "bout/mesh.hxx" #include @@ -41,12 +41,15 @@ CoordinatesAccessor::CoordinatesAccessor(const Coordinates* coords) { // Copy data from Coordinates variable into data array // Uses the symbol to look up the corresponding Offset -#define COPY_STRIPE1(symbol) \ - data[stripe_size * ind.ind + static_cast(Offset::symbol)] = coords->symbol[ind]; +#define COPY_STRIPE1(symbol) \ + if (coords->symbol.isAllocated()) \ + data[stripe_size * ind.ind + static_cast(Offset::symbol)] = coords->symbol[ind]; // Implement copy for each argument -#define COPY_STRIPE(...) \ - { MACRO_FOR_EACH(COPY_STRIPE1, __VA_ARGS__) } +#define COPY_STRIPE(...) \ + { \ + MACRO_FOR_EACH(COPY_STRIPE1, __VA_ARGS__) \ + } // Iterate over all points in the field // Note this could be 2D or 3D, depending on FieldMetric type @@ -55,10 +58,15 @@ CoordinatesAccessor::CoordinatesAccessor(const Coordinates* coords) { COPY_STRIPE(d1_dx, d1_dy, d1_dz); COPY_STRIPE(J); - data[stripe_size * ind.ind + static_cast(Offset::B)] = coords->Bxy[ind]; - data[stripe_size * ind.ind + static_cast(Offset::Byup)] = coords->Bxy.yup()[ind]; - data[stripe_size * ind.ind + static_cast(Offset::Bydown)] = - coords->Bxy.ydown()[ind]; + if (coords->Bxy.isAllocated()) { + data[stripe_size * ind.ind + static_cast(Offset::B)] = coords->Bxy[ind]; + if (coords->Bxy.yup().isAllocated()) + data[stripe_size * ind.ind + static_cast(Offset::Byup)] = + coords->Bxy.yup()[ind]; + if (coords->Bxy.ydown().isAllocated()) + data[stripe_size * ind.ind + static_cast(Offset::Bydown)] = + coords->Bxy.ydown()[ind]; + } COPY_STRIPE(G1, G3); COPY_STRIPE(g11, g12, g13, g22, g23, g33); diff --git a/src/mesh/difops.cxx b/src/mesh/difops.cxx index 09433b0685..3f7aec08bf 100644 --- a/src/mesh/difops.cxx +++ b/src/mesh/difops.cxx @@ -2,9 +2,9 @@ * Various differential operators defined on BOUT grid * ************************************************************************** - * Copyright 2010 B.D.Dudson, S.Farley, M.V.Umansky, X.Q.Xu + * Copyright 2010 - 2026 BOUT++ contributors * - * Contact: Ben Dudson, bd512@york.ac.uk + * Contact: Ben Dudson, dudson2@llnl.gov * * This file is part of BOUT++. * @@ -26,10 +26,16 @@ #include "bout/build_defines.hxx" #include +#include +#include +#include #include #include #include +#include +#include #include +#include #include #include #include @@ -284,7 +290,8 @@ Field3D Div_par_flux(const Field3D& v, const Field3D& f, CELL_LOC outloc, auto Bxy_floc = f.getCoordinates()->Bxy; if (!f.hasParallelSlices()) { - return metric->Bxy * FDDY(v, f / Bxy_floc, outloc, method) / sqrt(metric->g_22); + Field3D f_B = f / Bxy_floc; + return metric->Bxy * FDDY(v, f_B, outloc, method) / sqrt(metric->g_22); } // Need to modify yup and ydown fields @@ -799,7 +806,7 @@ Field3D bracket(const Field2D& f, const Field3D& g, BRACKET_METHOD method, break; case BRACKET_SIMPLE: { // Use a subset of terms for comparison to BOUT-06 - result = VDDZ(-DDX(f, outloc), g, outloc); + result = VDDZ(Field3D{-DDX(f, outloc)}, g, outloc); break; } default: { diff --git a/src/mesh/interpolation_xz.cxx b/src/mesh/interpolation_xz.cxx index ec8bcc0502..04d20769e4 100644 --- a/src/mesh/interpolation_xz.cxx +++ b/src/mesh/interpolation_xz.cxx @@ -24,16 +24,20 @@ **************************************************************************/ #include "parallel/fci_comm.hxx" +#include +#include +#include +#include #include #include +#include #include #include +#include void printLocation(const Field3D& var) { output << toString(var.getLocation()); } void printLocation(const Field2D& var) { output << toString(var.getLocation()); } -const char* strLocation(CELL_LOC loc) { return toString(loc).c_str(); } - const Field3D interpolate(const Field3D& f, const Field3D& delta_x, const Field3D& delta_z) { XZLagrange4pt interpolateMethod{f.getMesh()}; @@ -46,7 +50,7 @@ const Field3D interpolate(const Field2D& f, const Field3D& delta_x, } const Field3D interpolate(const Field2D& f, const Field3D& delta_x) { - Mesh* mesh = f.getMesh(); + const Mesh* mesh = f.getMesh(); ASSERT1(mesh == delta_x.getMesh()); Field3D result{emptyFrom(delta_x)}; diff --git a/src/mesh/parallel/fci.cxx b/src/mesh/parallel/fci.cxx index 4855c25c2a..aebfeb654c 100644 --- a/src/mesh/parallel/fci.cxx +++ b/src/mesh/parallel/fci.cxx @@ -471,7 +471,7 @@ void FCITransform::outputVars(Options& output_options) { void FCITransform::loadParallelMetrics([[maybe_unused]] Coordinates* coords) { #if BOUT_USE_METRIC_3D output_info.write("\tLoading parallel metrics\n"); - const auto JB0 = coords->J * coords->Bxy; + const Coordinates::FieldMetric JB0 = coords->J * coords->Bxy; coords->J.splitParallelSlices(); coords->J.disallowCalcParallelSlices(); coords->J.resetRegionParallel(true); diff --git a/src/mesh/parallel/shiftedmetric.cxx b/src/mesh/parallel/shiftedmetric.cxx index 64c6d9a2ce..705c48e944 100644 --- a/src/mesh/parallel/shiftedmetric.cxx +++ b/src/mesh/parallel/shiftedmetric.cxx @@ -17,6 +17,11 @@ #include +#if BOUT_HAS_CUDA +#include +#include +#endif + ShiftedMetric::ShiftedMetric(Mesh& m, CELL_LOC location_in, Field2D zShift_, BoutReal zlength_in, Options* opt) : ParallelTransform(m, opt), location(location_in), zShift(std::move(zShift_)), @@ -39,8 +44,8 @@ void ShiftedMetric::checkInputGrid() { "Should be 'shiftedmetric'."); } } // else: parallel_transform variable not found in grid input, indicates older input - // file or grid from options so must rely on the user having ensured the type is - // correct + // file or grid from options so must rely on the user having ensured the type is + // correct } void ShiftedMetric::outputVars(Options& output_options) { @@ -223,6 +228,260 @@ void ShiftedMetric::shiftZ(const BoutReal* in, const dcomplex* phs, BoutReal* ou irfft(&cmplx[0], mesh.LocalNz, out); // Reverse FFT } +#if BOUT_HAS_CUDA +// Bit-reversal +__device__ inline unsigned int bit_reverse(unsigned int x, unsigned int log2n) { + unsigned int result = 0; +#pragma unroll + for (unsigned int i = 0; i < log2n; i++) { + result = (result << 1) | (x & 1); + x >>= 1; + } + return result; +} + +// Block-level cooperative FFT +// Multiple threads cooperate on each FFT using shared memory +template +__global__ void fft_block_cooperative(const BoutReal** __restrict__ in, + BoutReal** __restrict__ out, + const double2** __restrict__ blocks_phs, + const int nbatches, const int nblocks) { + + constexpr int LOG2_NZ = __builtin_ctz(NZ); + constexpr double INV_NZ = 1.0 / (double)NZ; + constexpr int NMODES = (NZ / 2) + 1; + + // Shared memory for FFTS_PER_BLOCK FFTs + // Each FFT needs NZ complex values + __shared__ double2 shared_fft[FFTS_PER_BLOCK][NZ]; + + // Select twiddles based on size + const double2* twiddles; + if constexpr (NZ == 16) { + twiddles = c_twiddle_16; + } else if constexpr (NZ == 64) { + twiddles = c_twiddle_64; + } else if constexpr (NZ == 128) { + twiddles = c_twiddle_128; + } else if constexpr (NZ == 256) { + twiddles = c_twiddle_256; + } else if constexpr (NZ == 512) { + twiddles = c_twiddle_512; + } else { + static_assert(NZ == 16 || NZ == 64 || NZ == 128 || NZ == 256 || NZ == 512, + "Unsupported NZ"); + } + + // Each block processes FFTS_PER_BLOCK FFTs + const int fft_id_in_block = + threadIdx.y; // Which FFT this thread works on (0 to FFTS_PER_BLOCK-1) + const int global_fft_id = blockIdx.x * FFTS_PER_BLOCK + fft_id_in_block; + + if (global_fft_id >= nblocks * nbatches) + return; + + const int block = global_fft_id / nbatches; + const int batch = global_fft_id % nbatches; + + const double* __restrict__ in_line = in[block] + batch * NZ; + double* __restrict__ out_line = out[block] + batch * NZ; + const double2* __restrict__ phs = blocks_phs[block]; + + // Thread ID within the FFT computation + const int tid = threadIdx.x; + const int threads_per_fft = blockDim.x; // All threads in x-dimension work on same FFT + + // ===== LOAD INPUT WITH BIT-REVERSAL ===== + // Each thread loads some elements (strided) + for (int i = tid; i < NZ; i += threads_per_fft) { + const unsigned int rev_i = bit_reverse(i, LOG2_NZ); + shared_fft[fft_id_in_block][rev_i].x = in_line[i]; + shared_fft[fft_id_in_block][rev_i].y = 0.0; + } + __syncthreads(); + + // ===== FORWARD FFT: Cooley-Tukey DIT in Shared Memory ===== + for (int stage = 0; stage < LOG2_NZ; ++stage) { + const int m = 1 << (stage + 1); + const int m_half = m >> 1; + + // Each thread processes multiple butterflies + for (int k = tid; k < NZ / 2; k += threads_per_fft) { + const int butterfly_group = k / m_half; + const int j = k % m_half; + const int idx_top = butterfly_group * m + j; + const int idx_bot = idx_top + m_half; + + // Twiddle factor + const int twiddle_k = (j * NZ) / m; + const double wr = twiddles[twiddle_k].x; + const double wi = twiddles[twiddle_k].y; + + // Load from shared memory + const double top_r = shared_fft[fft_id_in_block][idx_top].x; + const double top_i = shared_fft[fft_id_in_block][idx_top].y; + const double bot_r = shared_fft[fft_id_in_block][idx_bot].x; + const double bot_i = shared_fft[fft_id_in_block][idx_bot].y; + + // Butterfly: t = W * bottom + const double t_r = wr * bot_r - wi * bot_i; + const double t_i = wr * bot_i + wi * bot_r; + + // Write back + shared_fft[fft_id_in_block][idx_top].x = top_r + t_r; + shared_fft[fft_id_in_block][idx_top].y = top_i + t_i; + shared_fft[fft_id_in_block][idx_bot].x = top_r - t_r; + shared_fft[fft_id_in_block][idx_bot].y = top_i - t_i; + } + __syncthreads(); + } + + // ===== APPLY PHASE SHIFT ===== + for (int k = tid; k < NMODES; k += threads_per_fft) { + const double2 ph = phs[batch * NMODES + k]; + const double real = shared_fft[fft_id_in_block][k].x; + const double imag = shared_fft[fft_id_in_block][k].y; + shared_fft[fft_id_in_block][k].x = real * ph.x - imag * ph.y; + shared_fft[fft_id_in_block][k].y = real * ph.y + imag * ph.x; + } + + for (int k = tid + NMODES; k < NZ; k += threads_per_fft) { + const int kk = NZ - k; + const double2 tmp = phs[batch * NMODES + kk]; + const double real = shared_fft[fft_id_in_block][k].x; + const double imag = shared_fft[fft_id_in_block][k].y; + shared_fft[fft_id_in_block][k].x = real * tmp.x + imag * tmp.y; + shared_fft[fft_id_in_block][k].y = -real * tmp.y + imag * tmp.x; + } + __syncthreads(); + + // ===== INVERSE FFT: Conjugate, FFT, Conjugate ===== + // Conjugate input + for (int i = tid; i < NZ; i += threads_per_fft) { + shared_fft[fft_id_in_block][i].y = -shared_fft[fft_id_in_block][i].y; + } + __syncthreads(); + + // Bit-reverse with standard swap to avoid temp array + // This is tricky but saves memory + for (int i = tid; i < NZ / 2; i += threads_per_fft) { + const unsigned int rev_i = bit_reverse(i, LOG2_NZ); + if (i < rev_i) { // Only swap once per pair + double2 temp = shared_fft[fft_id_in_block][i]; + shared_fft[fft_id_in_block][i] = shared_fft[fft_id_in_block][rev_i]; + shared_fft[fft_id_in_block][rev_i] = temp; + } + } + __syncthreads(); + + // Forward FFT again (for inverse) + for (int stage = 0; stage < LOG2_NZ; ++stage) { + const int m = 1 << (stage + 1); + const int m_half = m >> 1; + + for (int k = tid; k < NZ / 2; k += threads_per_fft) { + const int butterfly_group = k / m_half; + const int j = k % m_half; + const int idx_top = butterfly_group * m + j; + const int idx_bot = idx_top + m_half; + + const int twiddle_k = (j * NZ) / m; + const double wr = twiddles[twiddle_k].x; + const double wi = twiddles[twiddle_k].y; + + const double top_r = shared_fft[fft_id_in_block][idx_top].x; + const double top_i = shared_fft[fft_id_in_block][idx_top].y; + const double bot_r = shared_fft[fft_id_in_block][idx_bot].x; + const double bot_i = shared_fft[fft_id_in_block][idx_bot].y; + + const double t_r = wr * bot_r - wi * bot_i; + const double t_i = wr * bot_i + wi * bot_r; + + shared_fft[fft_id_in_block][idx_top].x = top_r + t_r; + shared_fft[fft_id_in_block][idx_top].y = top_i + t_i; + shared_fft[fft_id_in_block][idx_bot].x = top_r - t_r; + shared_fft[fft_id_in_block][idx_bot].y = top_i - t_i; + } + __syncthreads(); + } + + // Store output (conjugate and normalize) + for (int i = tid; i < NZ; i += threads_per_fft) { + out_line[i] = shared_fft[fft_id_in_block][i].x * INV_NZ; + } +} + +// Launcher for block-level cooperative FFT +static void shiftZ_block_fft(const int Nz, const BoutReal** in, BoutReal** out, + const double2** phs, int nblocks, int nbatches, + cudaStream_t stream = 0) { + if ((Nz & (Nz - 1)) != 0) { + fprintf(stderr, "Error: Nz=%d must be power of 2\n", Nz); + return; + } + + const int total_ffts = nblocks * nbatches; + + if (Nz == 16) { + constexpr int FFTS_PER_BLOCK = 16; + constexpr int THREADS_PER_FFT = 16; + + dim3 block(THREADS_PER_FFT, FFTS_PER_BLOCK); + dim3 grid((total_ffts + FFTS_PER_BLOCK - 1) / FFTS_PER_BLOCK); + + fft_block_cooperative<16, FFTS_PER_BLOCK> + <<>>(in, out, phs, nbatches, nblocks); + } else if (Nz == 64) { + constexpr int FFTS_PER_BLOCK = 4; + constexpr int THREADS_PER_FFT = 64; + + dim3 block(THREADS_PER_FFT, FFTS_PER_BLOCK); + dim3 grid((total_ffts + FFTS_PER_BLOCK - 1) / FFTS_PER_BLOCK); + + fft_block_cooperative<64, FFTS_PER_BLOCK> + <<>>(in, out, phs, nbatches, nblocks); + + } else if (Nz == 128) { + constexpr int FFTS_PER_BLOCK = 2; + constexpr int THREADS_PER_FFT = 128; + + dim3 block(THREADS_PER_FFT, FFTS_PER_BLOCK); + dim3 grid((total_ffts + FFTS_PER_BLOCK - 1) / FFTS_PER_BLOCK); + + fft_block_cooperative<128, FFTS_PER_BLOCK> + <<>>(in, out, phs, nbatches, nblocks); + + } else if (Nz == 256) { + constexpr int FFTS_PER_BLOCK = 1; + constexpr int THREADS_PER_FFT = 256; + + dim3 block(THREADS_PER_FFT, FFTS_PER_BLOCK); + dim3 grid(total_ffts); + + fft_block_cooperative<256, FFTS_PER_BLOCK> + <<>>(in, out, phs, nbatches, nblocks); + + } else if (Nz == 512) { + constexpr int FFTS_PER_BLOCK = 1; + constexpr int THREADS_PER_FFT = 512; + + dim3 block(THREADS_PER_FFT, FFTS_PER_BLOCK); + dim3 grid(total_ffts); + + fft_block_cooperative<512, FFTS_PER_BLOCK> + <<>>(in, out, phs, nbatches, nblocks); + } else { + throw std::runtime_error("Unsupported Nz " + std::to_string(Nz) + " for block FFT"); + } + + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) { + throw std::runtime_error(std::string("Block FFT failed: ") + cudaGetErrorString(err)); + } +} +#endif + void ShiftedMetric::calcParallelSlices(Field3D& f) { if (f.getDirectionY() == YDirectionType::Aligned) { // Cannot calculate parallel slices for field-aligned fields, so return without @@ -232,9 +491,76 @@ void ShiftedMetric::calcParallelSlices(Field3D& f) { f.splitParallelSlices(); +#if BOUT_HAS_CUDA + auto& region = mesh.getRegion2D("RGN_NOY"); + static size_t nblocks = region.getBlocks().size(); + if (nblocks != region.getBlocks().size()) { + throw BoutException("Number of blocks changed in ShiftedMetric::calcParallelSlices"); + } + + static struct StreamRAII { + cudaStream_t stream = 0; + StreamRAII() { + if (cudaStreamCreate(&stream) != cudaSuccess) { + throw BoutException("Failed to create CUDA stream"); + } + } + + cudaStream_t get() const { return stream; } + + void synchronize() const { cudaStreamSynchronize(stream); } + + ~StreamRAII() { cudaStreamDestroy(stream); } + } stream; + + // Vector of Arrays for each phase. + std::vector> blocks_in_phase; + std::vector> blocks_out_phase; + std::vector> phs_in_phase; + for (const auto& phase : parallel_slice_phases) { auto& f_slice = f.ynext(phase.y_offset); f_slice.allocate(); + + size_t block_idx = 0; + int nbatches = + region.getBlocks().cbegin()->second.ind - region.getBlocks().cbegin()->first.ind; + + Array& blocks_in = blocks_in_phase.emplace_back(nblocks); + Array& blocks_out = blocks_out_phase.emplace_back(nblocks); + Array& phs_in = phs_in_phase.emplace_back(nblocks); + + for (auto block = region.getBlocks().cbegin(), end = region.getBlocks().cend(); + block < end; ++block) { + auto idx_s = block->first; + auto idx_e = block->second; + int inner_nbatches = idx_e.ind - idx_s.ind; + if (inner_nbatches != nbatches) { + throw BoutException( + "Non-uniform number of batches in ShiftedMetric::calcParallelSlices"); + } + const int ix = idx_s.x(); + const int iy = idx_s.y(); + const int iy_offset = iy + phase.y_offset; + + blocks_in[block_idx] = &f(ix, iy_offset, 0); + blocks_out[block_idx] = &f_slice(ix, iy_offset, 0); + phs_in[block_idx] = reinterpret_cast(&phase.phase_shift(ix, iy, 0)); + + block_idx++; + } + + shiftZ_block_fft(mesh.LocalNz, &blocks_in[0], &blocks_out[0], &phs_in[0], nblocks, + nbatches, stream.get()); + } + + // Synchronize to ensure all shifts are complete. + stream.synchronize(); +#else + for (const auto& phase : parallel_slice_phases) { + auto& f_slice = f.ynext(phase.y_offset); + f_slice.allocate(); + BOUT_FOR(i, mesh.getRegion2D("RGN_NOY")) { const int ix = i.x(); const int iy = i.y(); @@ -243,6 +569,7 @@ void ShiftedMetric::calcParallelSlices(Field3D& f) { &(f_slice(ix, iy_offset, 0))); } } +#endif } std::vector diff --git a/src/sys/derivs.cxx b/src/sys/derivs.cxx index 2b606b9e5d..e449dbcd30 100644 --- a/src/sys/derivs.cxx +++ b/src/sys/derivs.cxx @@ -17,9 +17,9 @@ * Div(v*f) * ************************************************************************** - * Copyright 2010 B.D.Dudson, S.Farley, M.V.Umansky, X.Q.Xu + * Copyright 2010 - 2026 BOUT++ contributors * - * Contact: Ben Dudson, bd512@york.ac.uk + * Contact: Ben Dudson, dudson2@llnl.gov * * This file is part of BOUT++. * @@ -38,16 +38,23 @@ * **************************************************************************/ +#include +#include #include #include +#include #include #include +#include +#include #include #include #include #include #include +#include + /******************************************************************************* * First central derivatives *******************************************************************************/ @@ -96,7 +103,7 @@ Coordinates::FieldMetric DDZ(const Field2D& f, CELL_LOC UNUSED(outloc), Vector3D DDZ(const Vector3D& v, CELL_LOC outloc, const std::string& method, const std::string& region) { Vector3D result(v.getMesh()); - Coordinates* metric = v.x.getCoordinates(outloc); + const Coordinates* metric = v.x.getCoordinates(outloc); if (v.covariant) { // From equation (2.6.32) in D'Haeseleer @@ -148,7 +155,7 @@ Vector2D DDZ(const Vector2D& v, CELL_LOC UNUSED(outloc), Field3D D2DX2(const Field3D& f, CELL_LOC outloc, const std::string& method, const std::string& region) { - Coordinates* coords = f.getCoordinates(outloc); + const Coordinates* coords = f.getCoordinates(outloc); Field3D result = bout::derivatives::index::D2DX2(f, outloc, method, region) / SQ(coords->dx); @@ -167,9 +174,9 @@ Field3D D2DX2(const Field3D& f, CELL_LOC outloc, const std::string& method, Coordinates::FieldMetric D2DX2(const Field2D& f, CELL_LOC outloc, const std::string& method, const std::string& region) { - Coordinates* coords = f.getCoordinates(outloc); + const Coordinates* coords = f.getCoordinates(outloc); - auto result = + Coordinates::FieldMetric result = bout::derivatives::index::D2DX2(f, outloc, method, region) / SQ(coords->dx); if (coords->non_uniform) { @@ -185,7 +192,7 @@ Coordinates::FieldMetric D2DX2(const Field2D& f, CELL_LOC outloc, Field3D D2DY2(const Field3D& f, CELL_LOC outloc, const std::string& method, const std::string& region) { - Coordinates* coords = f.getCoordinates(outloc); + const Coordinates* coords = f.getCoordinates(outloc); Field3D result = bout::derivatives::index::D2DY2(f, outloc, method, region) / SQ(coords->dy); @@ -204,9 +211,9 @@ Field3D D2DY2(const Field3D& f, CELL_LOC outloc, const std::string& method, Coordinates::FieldMetric D2DY2(const Field2D& f, CELL_LOC outloc, const std::string& method, const std::string& region) { - Coordinates* coords = f.getCoordinates(outloc); + const Coordinates* coords = f.getCoordinates(outloc); - auto result = + Coordinates::FieldMetric result = bout::derivatives::index::D2DY2(f, outloc, method, region) / SQ(coords->dy); if (coords->non_uniform) { // Correction for non-uniform f.getMesh() @@ -286,7 +293,7 @@ Coordinates::FieldMetric D2DXDY(const Field2D& f, CELL_LOC outloc, const std::string& method, const std::string& region, const std::string& dfdy_boundary_condition, const std::string& dfdy_region) { - std::string dy_region = dfdy_region.empty() ? region : dfdy_region; + const std::string dy_region = dfdy_region.empty() ? region : dfdy_region; // If staggering in x, take y-derivative at f's location. const auto y_location = @@ -311,7 +318,7 @@ Coordinates::FieldMetric D2DXDY(const Field2D& f, CELL_LOC outloc, Field3D D2DXDY(const Field3D& f, CELL_LOC outloc, const std::string& method, const std::string& region, const std::string& dfdy_boundary_condition, const std::string& dfdy_region) { - std::string dy_region = dfdy_region.empty() ? region : dfdy_region; + const std::string dy_region = dfdy_region.empty() ? region : dfdy_region; // If staggering in x, take y-derivative at f's location. const auto y_location = diff --git a/tests/integrated/test-petsc_laplace/test_petsc_laplace.cxx b/tests/integrated/test-petsc_laplace/test_petsc_laplace.cxx index 39b1918480..04182293e7 100644 --- a/tests/integrated/test-petsc_laplace/test_petsc_laplace.cxx +++ b/tests/integrated/test-petsc_laplace/test_petsc_laplace.cxx @@ -99,7 +99,7 @@ void check_laplace(int test_num, std::string_view test_name, Laplacian& invert, template Field3D forward_laplace(const Field3D& field, const T& acoef, const T& ccoef, const T& dcoef) { - auto bcoef = + Field3D bcoef = dcoef * Delp2(field) + Grad_perp(ccoef) * Grad_perp(field) / ccoef + acoef * field; apply_flat_boundary(bcoef); return bcoef; diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index 4ba304b484..4963aaf1f0 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -60,6 +60,7 @@ set(serial_tests_source ./field/test_field3d.cxx ./field/test_field_factory.cxx ./field/test_fieldgroup.cxx + ./field/test_if_else.cxx ./field/test_fieldperp.cxx ./field/test_initialprofiles.cxx ./field/test_vector2d.cxx diff --git a/tests/unit/field/test_field2d.cxx b/tests/unit/field/test_field2d.cxx index cf9a0c1f25..a91acd4a40 100644 --- a/tests/unit/field/test_field2d.cxx +++ b/tests/unit/field/test_field2d.cxx @@ -853,7 +853,7 @@ TEST_F(Field2DTest, InvalidateGuards) { sum = 0; for (const auto& i : field) { - if (!finite(field[i])) { + if (!std::isfinite(field[i])) { sum++; } } @@ -1175,6 +1175,19 @@ TEST_F(Field2DTest, Sqrt) { EXPECT_TRUE(IsFieldEqual(sqrt(field), 4.0)); } +TEST_F(Field2DTest, SQExpressionUsesSquareOp) { + Field2D field; + + field = 2.0; + const auto expr = field + 1.0; + + EXPECT_TRUE( + (std::is_same_v, + BinaryExpr, + std::decay_t, bout::op::Square>>)); + EXPECT_TRUE(IsFieldEqual(SQ(expr), 9.0)); +} + TEST_F(Field2DTest, Abs) { Field2D field; @@ -1182,6 +1195,29 @@ TEST_F(Field2DTest, Abs) { EXPECT_TRUE(IsFieldEqual(abs(field), 31.0)); } +TEST_F(Field2DTest, AbsExpressionUsesAbsOp) { + Field2D field; + + field = -2.0; + const auto expr = field + 1.0; + + EXPECT_TRUE((std::is_same_v, + BinaryExpr, + std::decay_t, bout::op::abs>>)); + EXPECT_TRUE(IsFieldEqual(abs(expr), 1.0)); + EXPECT_TRUE(IsFieldEqual(abs(expr, "RGN_ALL"), 1.0)); +} + +TEST_F(Field2DTest, RegionLimitedExpressionConstructsField2D) { + Field2D field; + + field = -31.0; + + Field2D result = abs(field, "RGN_NOBNDRY"); + + EXPECT_TRUE(IsFieldEqual(result, 31.0, "RGN_NOBNDRY")); +} + TEST_F(Field2DTest, Exp) { Field2D field; @@ -1298,6 +1334,21 @@ TEST_F(Field2DTest, Min) { EXPECT_EQ(min(field, true, "RGN_ALL"), -99.0); } +TEST_F(Field2DTest, MinBinaryExpr) { + Field2D field; + + field = 50.0; + field(0, 0) = -99.0; + field(1, 1) = 60.0; + field(1, 2) = 40.0; + field(2, 4) = 99.0; + + const auto expr = field / 2.0 - 5.0; + + EXPECT_EQ(min(expr, false), 15.0); + EXPECT_EQ(min(expr, false, "RGN_ALL"), -54.5); +} + TEST_F(Field2DTest, Max) { Field2D field; @@ -1315,6 +1366,21 @@ TEST_F(Field2DTest, Max) { EXPECT_EQ(max(field, true, "RGN_ALL"), 99.0); } +TEST_F(Field2DTest, MaxBinaryExpr) { + Field2D field; + + field = 50.0; + field(0, 0) = -99.0; + field(1, 1) = 40.0; + field(1, 2) = 60.0; + field(2, 4) = 99.0; + + const auto expr = field / 2.0 - 5.0; + + EXPECT_EQ(max(expr, false), 25.0); + EXPECT_EQ(max(expr, false, "RGN_ALL"), 44.5); +} + TEST_F(Field2DTest, Swap) { WithQuietOutput quiet{output_info}; @@ -1436,6 +1502,23 @@ TEST_F(Field2DTest, OperatorEqualsField2D) { EXPECT_EQ(field.getDirectionZ(), field2.getDirectionZ()); } +TEST_F(Field2DTest, OperatorEqualsBinaryExprCopiesMetadata) { + Field2D source{ + mesh_staggered, CELL_XLOW, {YDirectionType::Aligned, ZDirectionType::Average}}; + source = 4.; + + Field2D target(mesh_staggered); + target = 0.; + + target = sqrt(source); + + EXPECT_EQ(target.getMesh(), source.getMesh()); + EXPECT_EQ(target.getLocation(), source.getLocation()); + EXPECT_EQ(target.getDirectionY(), source.getDirectionY()); + EXPECT_EQ(target.getDirectionZ(), source.getDirectionZ()); + EXPECT_TRUE(IsFieldEqual(target, 2.)); +} + TEST_F(Field2DTest, EmptyFrom) { // Create field with non-default arguments so we can check they get copied // to 'field2'. diff --git a/tests/unit/field/test_field3d.cxx b/tests/unit/field/test_field3d.cxx index 7672ec7dae..905b182018 100644 --- a/tests/unit/field/test_field3d.cxx +++ b/tests/unit/field/test_field3d.cxx @@ -1211,7 +1211,7 @@ TEST_F(Field3DTest, InvalidateGuards) { sum = 0; for (const auto& i : field) { - if (!finite(field[i])) { + if (!std::isfinite(field[i])) { sum++; } } @@ -1949,6 +1949,36 @@ TEST_F(Field3DTest, Sqrt) { EXPECT_TRUE(IsFieldEqual(sqrt(field), 4.0)); } +TEST_F(Field3DTest, SQExpressionUsesSquareOp) { + Field3D field; + + field = 2.0; + const auto expr = field + 1.0; + + EXPECT_TRUE( + (std::is_same_v, + BinaryExpr, + std::decay_t, bout::op::Square>>)); + EXPECT_TRUE(IsFieldEqual(SQ(expr), 9.0)); +} + +TEST_F(Field3DTest, SQField3DParallelPreservesParallelSlices) { + Field3DParallel field; + + field = 2.0; + field.splitParallelSlices(); + field.yup() = 3.0; + field.ydown() = 4.0; + + const auto squared = SQ(field); + + EXPECT_TRUE((std::is_same_v, Field3DParallel>)); + EXPECT_TRUE(squared.hasParallelSlices()); + EXPECT_TRUE(IsFieldEqual(squared, 4.0)); + EXPECT_TRUE(IsFieldEqual(squared.yup(), 9.0)); + EXPECT_TRUE(IsFieldEqual(squared.ydown(), 16.0)); +} + TEST_F(Field3DTest, Abs) { Field3D field; @@ -1956,6 +1986,29 @@ TEST_F(Field3DTest, Abs) { EXPECT_TRUE(IsFieldEqual(abs(field), 31.0)); } +TEST_F(Field3DTest, AbsExpressionUsesAbsOp) { + Field3D field; + + field = -2.0; + const auto expr = field + 1.0; + + EXPECT_TRUE((std::is_same_v, + BinaryExpr, + std::decay_t, bout::op::abs>>)); + EXPECT_TRUE(IsFieldEqual(abs(expr), 1.0)); + EXPECT_TRUE(IsFieldEqual(abs(expr, "RGN_ALL"), 1.0)); +} + +TEST_F(Field3DTest, RegionLimitedExpressionConstructsField3D) { + Field3D field; + + field = -31.0; + + Field3D result = abs(field, "RGN_NOBNDRY"); + + EXPECT_TRUE(IsFieldEqual(result, 31.0, "RGN_NOBNDRY")); +} + TEST_F(Field3DTest, Exp) { Field3D field; @@ -2072,6 +2125,21 @@ TEST_F(Field3DTest, Min) { EXPECT_EQ(min(field, true, "RGN_ALL"), -99.0); } +TEST_F(Field3DTest, MinBinaryExpr) { + Field3D field; + + field = 50.0; + field(0, 0, 0) = -99.0; + field(1, 1, 1) = 60.0; + field(1, 2, 2) = 40.0; + field(2, 4, 3) = 99.0; + + const auto expr = field / 2.0 - 5.0; + + EXPECT_EQ(min(expr, false), 15.0); + EXPECT_EQ(min(expr, false, "RGN_ALL"), -54.5); +} + TEST_F(Field3DTest, Max) { Field3D field; @@ -2089,6 +2157,21 @@ TEST_F(Field3DTest, Max) { EXPECT_EQ(max(field, true, "RGN_ALL"), 99.0); } +TEST_F(Field3DTest, MaxBinaryExpr) { + Field3D field; + + field = 50.0; + field(0, 0, 0) = -99.0; + field(1, 1, 1) = 40.0; + field(1, 2, 2) = 60.0; + field(2, 4, 3) = 99.0; + + const auto expr = field / 2.0 - 5.0; + + EXPECT_EQ(max(expr, false), 25.0); + EXPECT_EQ(max(expr, false, "RGN_ALL"), 44.5); +} + TEST_F(Field3DTest, Mean) { Field3D field; @@ -2108,6 +2191,24 @@ TEST_F(Field3DTest, Mean) { EXPECT_EQ(mean(field, true, "RGN_ALL"), mean_value_all); } +TEST_F(Field3DTest, MeanBinaryExpr) { + Field3D field; + + field = 50.0; + field(0, 0, 0) = 1.0; + field(1, 1, 1) = 40.0; + field(1, 2, 2) = 60.0; + field(2, 4, 3) = 109.0; + + const int npoints_all = nx * ny * nz; + const BoutReal mean_value_nobndry = 103.0; + const BoutReal mean_value_all = 103.0 + 20.0 / npoints_all; + const auto expr = field * 2.0 + 3.0; + + EXPECT_EQ(mean(expr, false), mean_value_nobndry); + EXPECT_EQ(mean(expr, false, "RGN_ALL"), mean_value_all); +} + TEST_F(Field3DTest, DC) { Field3D field; @@ -2416,6 +2517,25 @@ TEST_F(Field3DTest, OperatorEqualsField3D) { EXPECT_EQ(field.getDirectionZ(), field2.getDirectionZ()); } +TEST_F(Field3DTest, OperatorEqualsBinaryExprCopiesMetadata) { + Field3D source{ + mesh_staggered, CELL_XLOW, {YDirectionType::Aligned, ZDirectionType::Average}}; + source = 9.; + + Field3D target(mesh_staggered); + target = 0.; + target.splitParallelSlices(); + + target = sqrt(source); + + EXPECT_EQ(target.getMesh(), source.getMesh()); + EXPECT_EQ(target.getLocation(), source.getLocation()); + EXPECT_EQ(target.getDirectionY(), source.getDirectionY()); + EXPECT_EQ(target.getDirectionZ(), source.getDirectionZ()); + EXPECT_FALSE(target.hasParallelSlices()); + EXPECT_TRUE(IsFieldEqual(target, 3.)); +} + TEST_F(Field3DTest, EmptyFrom) { // Create field with non-default arguments so we can check they get copied // to 'field2'. diff --git a/tests/unit/field/test_fieldperp.cxx b/tests/unit/field/test_fieldperp.cxx index 8caafa96e4..46f07d589f 100644 --- a/tests/unit/field/test_fieldperp.cxx +++ b/tests/unit/field/test_fieldperp.cxx @@ -849,7 +849,7 @@ TEST_F(FieldPerpTest, InvalidateGuards) { sum = 0; for (const auto& i : field) { - if (!finite(field[i])) { + if (!std::isfinite(field[i])) { sum++; } } @@ -1577,6 +1577,14 @@ TEST_F(FieldPerpTest, Sqrt) { EXPECT_TRUE(IsFieldEqual(sqrt(field), 4.0)); } +TEST_F(FieldPerpTest, SQFieldPerp) { + FieldPerp field; + field.setIndex(0); + + field = 3.0; + EXPECT_TRUE(IsFieldEqual(SQ(field), 9.0)); +} + TEST_F(FieldPerpTest, Abs) { FieldPerp field; field.setIndex(0); @@ -1585,6 +1593,17 @@ TEST_F(FieldPerpTest, Abs) { EXPECT_TRUE(IsFieldEqual(abs(field), 31.0)); } +TEST_F(FieldPerpTest, RegionLimitedExpressionConstructsFieldPerp) { + FieldPerp field; + field.setIndex(0); + + field = -31.0; + + FieldPerp result = abs(field, "RGN_NOX"); + + EXPECT_TRUE(IsFieldEqual(result, 31.0, "RGN_NOX")); +} + TEST_F(FieldPerpTest, Exp) { FieldPerp field; field.setIndex(0); @@ -1730,6 +1749,22 @@ TEST_F(FieldPerpTest, Max) { EXPECT_EQ(max(field, true, "RGN_ALL"), 99.0); } +TEST_F(FieldPerpTest, MaxBinaryExpr) { + FieldPerp field; + field.setIndex(0); + + field = 50.0; + field(0, 0) = -99.0; + field(1, 1) = 40.0; + field(1, 2) = 60.0; + field(2, 4) = 99.0; + + const auto expr = field / 2.0 - 5.0; + + EXPECT_EQ(max(expr, false), 25.0); + EXPECT_EQ(max(expr, false, "RGN_ALL"), 44.5); +} + TEST_F(FieldPerpTest, OperatorEqualsFieldPerp) { FieldPerp field; @@ -1749,6 +1784,42 @@ TEST_F(FieldPerpTest, OperatorEqualsFieldPerp) { EXPECT_EQ(field.getDirectionZ(), field2.getDirectionZ()); } +TEST_F(FieldPerpTest, ConstructFromBinaryExprCopiesMetadata) { + FieldPerp source{ + mesh_staggered, CELL_XLOW, 3, {YDirectionType::Aligned, ZDirectionType::Average}}; + source = 4.; + + FieldPerp result{sqrt(source)}; + + EXPECT_EQ(result.getMesh(), source.getMesh()); + EXPECT_EQ(result.getLocation(), source.getLocation()); + EXPECT_EQ(result.getIndex(), source.getIndex()); + EXPECT_EQ(result.getDirectionY(), source.getDirectionY()); + EXPECT_EQ(result.getDirectionZ(), source.getDirectionZ()); + EXPECT_TRUE(IsFieldEqual(result, 2.)); +} + +TEST_F(FieldPerpTest, OperatorEqualsBinaryExprCopiesMetadata) { + FieldPerp source{ + mesh_staggered, CELL_XLOW, 3, {YDirectionType::Aligned, ZDirectionType::Average}}; + source = 4.; + + FieldPerp target{mesh_staggered, + CELL_CENTRE, + 1, + {YDirectionType::Standard, ZDirectionType::Standard}}; + target = 0.; + + target = sqrt(source); + + EXPECT_EQ(target.getMesh(), source.getMesh()); + EXPECT_EQ(target.getLocation(), source.getLocation()); + EXPECT_EQ(target.getIndex(), source.getIndex()); + EXPECT_EQ(target.getDirectionY(), source.getDirectionY()); + EXPECT_EQ(target.getDirectionZ(), source.getDirectionZ()); + EXPECT_TRUE(IsFieldEqual(target, 2.)); +} + TEST_F(FieldPerpTest, EmptyFrom) { // Create field with non-default arguments so we can check they get copied // to 'field2'. diff --git a/tests/unit/field/test_if_else.cxx b/tests/unit/field/test_if_else.cxx new file mode 100644 index 0000000000..c608aa79db --- /dev/null +++ b/tests/unit/field/test_if_else.cxx @@ -0,0 +1,57 @@ +#include "gtest/gtest.h" + +#include "test_extras.hxx" +#include "bout/field2d.hxx" +#include "bout/field3d.hxx" + +#include "fake_mesh_fixture.hxx" + +#include + +using IfElseTest = FakeMeshFixture; + +TEST_F(IfElseTest, Field2DChoosesSelectedBranch) { + const Field2D lhs{makeField( + [](const Ind2D& i) { return static_cast(i.x() + i.y()); })}; + const Field2D rhs{makeField( + [](const Ind2D& i) { return static_cast(10 + i.x() - i.y()); })}; + + const auto expr = if_else(true, lhs, rhs); + + static_assert(std::is_same_v, + BinaryExpr>); + EXPECT_TRUE(IsFieldEqual(expr, lhs)); + EXPECT_TRUE(IsFieldEqual(if_else(false, lhs, rhs), rhs)); +} + +TEST_F(IfElseTest, Field3DMixesField2DAndField3D) { + const Field2D lhs{makeField( + [](const Ind2D& i) { return static_cast(i.x() + 2 * i.y()); })}; + const Field3D rhs{makeField( + [](const Ind3D& i) { return static_cast(100 + i.x() + i.y() + i.z()); })}; + + const auto expr = if_else(true, lhs, rhs); + const Field3D expected{lhs}; + + static_assert(std::is_same_v, + BinaryExpr>); + EXPECT_TRUE(IsFieldEqual(expr, expected)); + EXPECT_TRUE(IsFieldEqual(if_else(false, lhs, rhs), rhs)); +} + +TEST_F(IfElseTest, IfElseZeroKeepsExpressionWhenConditionTrue) { + const Field3D field{makeField( + [](const Ind3D& i) { return static_cast(1 + i.x() + i.y() + i.z()); })}; + const auto source = 2.0 * field + 1.0; + + EXPECT_TRUE(IsFieldEqual(if_else_zero(true, source), source)); + EXPECT_TRUE(IsFieldEqual(if_else_zero(false, source), 0.0)); +} + +TEST_F(IfElseTest, InactiveBranchIsNotEvaluatedThroughMaskedArithmetic) { + const Field2D lhs{makeField( + [](const Ind2D& i) { return static_cast(1 + i.x() + i.y()); })}; + const Field2D rhs{filledFrom(lhs, BoutNaN)}; + + EXPECT_TRUE(IsFieldEqual(if_else(true, lhs, rhs), lhs)); +} diff --git a/tests/unit/include/bout/test_region.cxx b/tests/unit/include/bout/test_region.cxx index 00137c1ce7..fe66524735 100644 --- a/tests/unit/include/bout/test_region.cxx +++ b/tests/unit/include/bout/test_region.cxx @@ -111,6 +111,51 @@ TEST_F(RegionTest, regionFromIndices) { } } +TEST_F(RegionTest, getLinearIndices) { + Region region(0, mesh->LocalNx - 1, 0, mesh->LocalNy - 1, 0, mesh->LocalNz - 1, + mesh->LocalNy, mesh->LocalNz); + + const auto& indices = region.getIndices(); + const auto& linearIndices = region.getLinearIndices(); + + ASSERT_EQ(linearIndices.size(), indices.size()); + for (int i = 0; i < linearIndices.size(); ++i) { + EXPECT_EQ(linearIndices[i], indices[i].ind); + } +} + +TEST_F(RegionTest, getLinearIndicesUpdatedAfterSetIndices) { + Region::RegionIndices indicesIn{{0, 1, 1}, {2, 1, 1}, {4, 1, 1}}; + Region region(indicesIn); + + const auto& initialLinearIndices = region.getLinearIndices(); + ASSERT_EQ(initialLinearIndices.size(), 3); + EXPECT_EQ(initialLinearIndices[0], 0); + EXPECT_EQ(initialLinearIndices[1], 2); + EXPECT_EQ(initialLinearIndices[2], 4); + + Region::RegionIndices newIndices{{1, 1, 1}, {3, 1, 1}}; + region.setIndices(newIndices); + + const auto& updatedLinearIndices = region.getLinearIndices(); + ASSERT_EQ(updatedLinearIndices.size(), 2); + EXPECT_EQ(updatedLinearIndices[0], 1); + EXPECT_EQ(updatedLinearIndices[1], 3); +} + +TEST_F(RegionTest, getLinearIndicesUpdatedAfterSetBlocks) { + Region::ContiguousBlocks blocks{{Ind3D{1, 1, 1}, Ind3D{3, 1, 1}}, + {Ind3D{5, 1, 1}, Ind3D{6, 1, 1}}}; + Region region; + region.setBlocks(blocks); + + const auto& linearIndices = region.getLinearIndices(); + ASSERT_EQ(linearIndices.size(), 3); + EXPECT_EQ(linearIndices[0], 1); + EXPECT_EQ(linearIndices[1], 2); + EXPECT_EQ(linearIndices[2], 5); +} + TEST_F(RegionTest, regionFromBlocks) { Region region(0, mesh->LocalNx - 1, 0, mesh->LocalNy - 1, 0, mesh->LocalNz - 1, mesh->LocalNy, mesh->LocalNz); diff --git a/tests/unit/invert/laplace/test_laplace_cyclic.cxx b/tests/unit/invert/laplace/test_laplace_cyclic.cxx index a0d99f66c9..586dd80adf 100644 --- a/tests/unit/invert/laplace/test_laplace_cyclic.cxx +++ b/tests/unit/invert/laplace/test_laplace_cyclic.cxx @@ -10,6 +10,7 @@ #include "bout/invert_laplace.hxx" #include "gtest/gtest.h" +#include "bout/bout_types.hxx" #include "bout/derivs.hxx" #include "bout/difops.hxx" #include "bout/field2d.hxx" @@ -17,7 +18,6 @@ #include "bout/griddata.hxx" #include "bout/mesh.hxx" #include "bout/options.hxx" -#include "bout/vecops.hxx" #include "fake_mesh_fixture.hxx" @@ -29,14 +29,13 @@ class CyclicForwardOperator { CyclicForwardOperator(bool xin_neumann, bool xout_neumann) : inner_x_neumann(xin_neumann), outer_x_neumann(xout_neumann), - a(0.0), c1(1.0), c2(1.0), d(1.0), ex(0.0), ez(0.0) { - coords = mesh->getCoordinates(CELL_CENTER); - } + a(0.0), c1(1.0), c2(1.0), d(1.0), ex(0.0), ez(0.0), + coords(mesh->getCoordinates(CELL_CENTER)) {} - const Field3D operator()(Field3D& f) { - auto result = d * Delp2(f) - + (coords->g11 * DDX(f) + coords->g13 * DDZ(f)) * DDX(c2) / c1 + a * f - + ex * DDX(f) + ez * DDZ(f); + Field3D operator()(Field3D& f) { + Field3D result = d * Delp2(f) + + (coords->g11 * DDX(f) + coords->g13 * DDZ(f)) * DDX(c2) / c1 + + a * f + ex * DDX(f) + ez * DDZ(f); applyBoundaries(result, f); return result; } @@ -45,7 +44,7 @@ class CyclicForwardOperator { CyclicForwardOperator(); bool inner_x_neumann, outer_x_neumann; // If false then use Dirichlet conditions - void applyBoundaries(Field3D& newF, const Field3D& f) { + void applyBoundaries(Field3D& newF, const Field3D& f) const { BOUT_FOR(i, f.getMesh()->getRegion3D("RGN_INNER_X")) { if (inner_x_neumann) { newF[i] = (f[i.xp()] - f[i]) / coords->dx[i] / sqrt(coords->g_11[i]); @@ -86,14 +85,14 @@ class CyclicTest : public FakeMeshFixture, coef3.allocate(); BOUT_FOR(i, mesh->getRegion2D("RGN_ALL")) { - BoutReal x = i.x() / (BoutReal)nx - 0.5; - BoutReal y = i.y() / (BoutReal)ny - 0.5; + const BoutReal x = i.x() / (BoutReal)nx - 0.5; + const BoutReal y = i.y() / (BoutReal)ny - 0.5; coef2[i] = x + y; } BOUT_FOR(i, mesh->getRegion3D("RGN_ALL")) { - BoutReal x = i.x() / (BoutReal)nx - 0.5; - BoutReal y = i.y() / (BoutReal)ny - 0.5; - BoutReal z = i.z() / (BoutReal)nz - 0.5; + const BoutReal x = i.x() / (BoutReal)nx - 0.5; + const BoutReal y = i.y() / (BoutReal)ny - 0.5; + const BoutReal z = i.z() / (BoutReal)nz - 0.5; f3[i] = 1e3 * exp(-0.5 * sqrt(x * x + y * y + z * z) / sigmasq); coef3[i] = x + y + sin(2 * 3.14159265358979323846 * z); } diff --git a/tests/unit/invert/laplace/test_laplace_hypre3d.cxx b/tests/unit/invert/laplace/test_laplace_hypre3d.cxx index 3b1bbc5d39..a721b96833 100644 --- a/tests/unit/invert/laplace/test_laplace_hypre3d.cxx +++ b/tests/unit/invert/laplace/test_laplace_hypre3d.cxx @@ -1,5 +1,7 @@ #include "bout/build_defines.hxx" +#if BOUT_HAS_HYPRE + #include #include @@ -18,8 +20,6 @@ #include "bout/options.hxx" #include "bout/vecops.hxx" -#if BOUT_HAS_HYPRE - #include "fake_mesh_fixture.hxx" // The unit tests use the global mesh @@ -39,9 +39,9 @@ class ForwardOperator { } const Field3D operator()(Field3D& f) { - auto result = d * Laplace_perp(f, CELL_DEFAULT, "free", "RGN_NOY") - + (Grad(f) * Grad(c2) - DDY(c2) * DDY(f) / coords->g_22) / c1 + a * f - + ex * DDX(f) + ez * DDZ(f); + Field3D result = d * Laplace_perp(f, CELL_DEFAULT, "free", "RGN_NOY") + + (Grad(f) * Grad(c2) - DDY(c2) * DDY(f) / coords->g_22) / c1 + a * f + + ex * DDX(f) + ez * DDZ(f); applyBoundaries(result, f); return result; } diff --git a/tests/unit/invert/laplace/test_laplace_petsc3damg.cxx b/tests/unit/invert/laplace/test_laplace_petsc3damg.cxx index 157ec22c84..846cb9107f 100644 --- a/tests/unit/invert/laplace/test_laplace_petsc3damg.cxx +++ b/tests/unit/invert/laplace/test_laplace_petsc3damg.cxx @@ -39,9 +39,9 @@ class ForwardOperator { } const Field3D operator()(Field3D& f) { - auto result = d * Laplace_perp(f, CELL_DEFAULT, "free", "RGN_NOY") - + (Grad(f) * Grad(c2) - DDY(c2) * DDY(f) / coords->g_22) / c1 + a * f - + ex * DDX(f) + ez * DDZ(f); + Field3D result = d * Laplace_perp(f, CELL_DEFAULT, "free", "RGN_NOY") + + (Grad(f) * Grad(c2) - DDY(c2) * DDY(f) / coords->g_22) / c1 + a * f + + ex * DDX(f) + ez * DDZ(f); applyBoundaries(result, f); return result; } diff --git a/tests/unit/solver/test_nvector.cxx b/tests/unit/solver/test_nvector.cxx index dc45008caa..18901b067f 100644 --- a/tests/unit/solver/test_nvector.cxx +++ b/tests/unit/solver/test_nvector.cxx @@ -130,7 +130,7 @@ TYPED_TEST(BoutNVectorTest, LinearAndPointwiseOperations) { auto vz = makeNVector(BoutNVector::create(this->sunctx, z, true)); N_VLinearSum(2.0, vx.get(), -1.0, vy.get(), vz.get()); - auto expected = 2.0 * x - y; + TypeParam expected = 2.0 * x - y; EXPECT_TRUE(IsFieldEqual(z, expected)); N_VProd(vx.get(), vy.get(), vz.get()); diff --git a/tests/unit/sys/test_options_fields.cxx b/tests/unit/sys/test_options_fields.cxx index 0c6ec953af..f94ed30386 100644 --- a/tests/unit/sys/test_options_fields.cxx +++ b/tests/unit/sys/test_options_fields.cxx @@ -36,6 +36,36 @@ TEST_F(OptionsFieldTest, StoreField2D) { EXPECT_TRUE(options.isValue()); } +TEST_F(OptionsFieldTest, StoreEvaluatedField3DExpression) { + Field3D lhs = 1.0; + Field3D rhs = 2.0; + lhs(0, 1, 1) = 3.0; + rhs(0, 1, 1) = 4.0; + + Options options; + options = lhs + rhs; + + Field3D stored = options; + + EXPECT_DOUBLE_EQ(stored(0, 1, 0), 3.0); + EXPECT_DOUBLE_EQ(stored(0, 1, 1), 7.0); +} + +TEST_F(OptionsFieldTest, StoreEvaluatedField2DExpression) { + Field2D lhs = 1.0; + Field2D rhs = 2.0; + lhs(0, 1) = 3.0; + rhs(0, 1) = 4.0; + + Options options; + options = lhs + rhs; + + Field2D stored = options; + + EXPECT_DOUBLE_EQ(stored(0, 0), 3.0); + EXPECT_DOUBLE_EQ(stored(0, 1), 7.0); +} + TEST_F(OptionsFieldTest, RetrieveField3D) { Field3D field = 1.0; field(0, 1, 1) = 2.0; diff --git a/tests/unit/test_extras.cxx b/tests/unit/test_extras.cxx index b1caf038d3..30526f55c4 100644 --- a/tests/unit/test_extras.cxx +++ b/tests/unit/test_extras.cxx @@ -1,4 +1,5 @@ #include "test_extras.hxx" +#include "fake_mesh_fixture.hxx" #include "bout/bout_types.hxx" #include "bout/field2d.hxx" #include "bout/field3d.hxx" @@ -42,3 +43,42 @@ void fillField(Field2D& f, std::vector> values) { } } } + +using TestExtrasFieldExpr = FakeMeshFixture; + +TEST_F(TestExtrasFieldExpr, IsFieldEqualHandlesBinaryExprOnEitherSide) { + const Field2D field{1.0}; + const Field2D expected{3.0}; + + EXPECT_TRUE(IsFieldEqual(field + 2.0, expected)); + EXPECT_TRUE(IsFieldEqual(expected, field + 2.0)); +} + +TEST_F(TestExtrasFieldExpr, BinaryExprCanBeIndexedWithRegionIndex) { + const Field3D lhs{ + makeField([](const Ind3D& i) { return static_cast(i.x()); })}; + const Field3D rhs{ + makeField([](const Ind3D& i) { return static_cast(i.y()); })}; + + const auto expr = lhs + 2.0 * rhs; + Field3D result{emptyFrom(lhs)}; + + BOUT_FOR_SERIAL(i, result.getRegion("RGN_ALL")) { result[i] = expr[i]; } + + BOUT_FOR(i, result.getRegion("RGN_ALL")) { + EXPECT_DOUBLE_EQ(result[i], lhs[i] + 2.0 * rhs[i]); + } +} + +TEST_F(TestExtrasFieldExpr, Field2DBinaryExprCanBeIndexedWithInd3D) { + const Field2D lhs{ + makeField([](const Ind2D& i) { return static_cast(i.x()); })}; + const Field2D rhs{ + makeField([](const Ind2D& i) { return static_cast(i.y()); })}; + + const auto expr = lhs + 2.0 * rhs; + + BOUT_FOR(i, lhs.getMesh()->getRegion3D("RGN_ALL")) { + EXPECT_DOUBLE_EQ(expr[i], lhs[i] + 2.0 * rhs[i]); + } +} diff --git a/tests/unit/test_extras.hxx b/tests/unit/test_extras.hxx index dcc6cb3187..4b2226a54c 100644 --- a/tests/unit/test_extras.hxx +++ b/tests/unit/test_extras.hxx @@ -7,15 +7,17 @@ #include #include #include +#include #include #include "bout/bout_types.hxx" #include "bout/field.hxx" +#include "bout/field2d.hxx" +#include "bout/field3d.hxx" +#include "bout/fieldops.hxx" +#include "bout/fieldperp.hxx" #include "bout/region.hxx" -class Field2D; -class Field3D; - static constexpr BoutReal BoutRealTolerance{1e-15}; // FFTs have a slightly looser tolerance than other functions static constexpr BoutReal FFTTolerance{1.e-12}; @@ -29,6 +31,23 @@ void fillField(Field2D& f, std::vector> values); using bout::utils::EnableIfField; +template +inline constexpr bool isFieldOrFieldExpr_v = + bout::utils::is_Field_v> || is_expr_field2d_v + || is_expr_field3d_v || is_expr_fieldperp_v; + +template >>> +auto evaluateFieldExpr(const T& field) -> const T& { + return field; +} + +template >> +auto evaluateFieldExpr(const BinaryExpr& expr) -> ResT { + return ResT{expr}; +} + /// Returns a field filled with the result of \p fill_function at each point /// Arbitrary arguments can be passed to the field constructor template > @@ -80,15 +99,19 @@ auto inline getIndexXYZ(const IndPerp& index) -> std::string { } /// Is \p field equal to \p reference, with a tolerance of \p tolerance? -template > +template && isFieldOrFieldExpr_v>> auto IsFieldEqual(const T& field, const U& reference, const std::string& region = "RGN_ALL", BoutReal tolerance = BoutRealTolerance) -> ::testing::AssertionResult { - for (auto i : field.getRegion(region)) { - if (fabs(field[i] - reference[i]) > tolerance) { + const auto& evaluated_field = evaluateFieldExpr(field); + const auto& evaluated_reference = evaluateFieldExpr(reference); + + for (auto i : evaluated_field.getRegion(region)) { + if (fabs(evaluated_field[i] - evaluated_reference[i]) > tolerance) { return ::testing::AssertionFailure() - << getFieldType(field) << "(" << getIndexXYZ(i) << ") == " << field[i] - << "; Expected: " << reference[i]; + << getFieldType(evaluated_field) << "(" << getIndexXYZ(i) + << ") == " << evaluated_field[i] << "; Expected: " << evaluated_reference[i]; } } return ::testing::AssertionSuccess(); @@ -96,15 +119,17 @@ auto IsFieldEqual(const T& field, const U& reference, /// Is \p field equal to \p reference, with a tolerance of \p tolerance? /// Overload for BoutReals -template > +template >> auto IsFieldEqual(const T& field, BoutReal reference, const std::string& region = "RGN_ALL", BoutReal tolerance = BoutRealTolerance) -> ::testing::AssertionResult { - for (auto i : field.getRegion(region)) { - if (fabs(field[i] - reference) > tolerance) { + const auto& evaluated_field = evaluateFieldExpr(field); + + for (auto i : evaluated_field.getRegion(region)) { + if (fabs(evaluated_field[i] - reference) > tolerance) { return ::testing::AssertionFailure() - << getFieldType(field) << "(" << getIndexXYZ(i) << ") == " << field[i] - << "; Expected: " << reference; + << getFieldType(evaluated_field) << "(" << getIndexXYZ(i) + << ") == " << evaluated_field[i] << "; Expected: " << reference; } } return ::testing::AssertionSuccess();