From ed1ec90287618a458cfe6be09e9b47ab708d66c2 Mon Sep 17 00:00:00 2001 From: ThorstenB Date: Sun, 1 Apr 2012 00:36:41 +0200 Subject: [PATCH] YASim performance optimization Several functions of YASim's math wrapper are hotspots. Allow compiler optimization/inlining. --- src/FDM/YASim/CMakeLists.txt | 1 - src/FDM/YASim/Math.cpp | 264 ----------------------------------- src/FDM/YASim/Math.hpp | 193 ++++++++++++++++++++----- 3 files changed, 157 insertions(+), 301 deletions(-) delete mode 100644 src/FDM/YASim/Math.cpp diff --git a/src/FDM/YASim/CMakeLists.txt b/src/FDM/YASim/CMakeLists.txt index f5c11e2a1..dcd4ee5a9 100644 --- a/src/FDM/YASim/CMakeLists.txt +++ b/src/FDM/YASim/CMakeLists.txt @@ -13,7 +13,6 @@ set(COMMON Integrator.cpp Jet.cpp Launchbar.cpp - Math.cpp Model.cpp PistonEngine.cpp PropEngine.cpp diff --git a/src/FDM/YASim/Math.cpp b/src/FDM/YASim/Math.cpp deleted file mode 100644 index 2c53c3559..000000000 --- a/src/FDM/YASim/Math.cpp +++ /dev/null @@ -1,264 +0,0 @@ -#include - -#include "Math.hpp" -namespace yasim { - -float Math::clamp(float val, float min, float max) -{ - if(val < min) return min; - if(val > max) return max; - return val; -} - -float Math::abs(float f) -{ - return (float)::fabs(f); -} - -float Math::sqrt(float f) -{ - return (float)::sqrt(f); -} - -float Math::ceil(float f) -{ - return (float)::ceil(f); -} - -float Math::acos(float f) -{ - return (float)::acos(f); -} - -float Math::asin(float f) -{ - return (float)::asin(f); -} - -float Math::cos(float f) -{ - return (float)::cos(f); -} - -float Math::sin(float f) -{ - return (float)::sin(f); -} - -float Math::tan(float f) -{ - return (float)::tan(f); -} - -float Math::atan(float f) -{ - return (float)::atan(f); -} - -float Math::atan2(float y, float x) -{ - return (float)::atan2(y, x); -} - -double Math::floor(double x) -{ - return ::floor(x); -} - -double Math::abs(double f) -{ - return ::fabs(f); -} - -double Math::sqrt(double f) -{ - return ::sqrt(f); -} - -float Math::pow(double base, double exp) -{ - return (float)::pow(base, exp); -} - -float Math::exp(float f) -{ - return (float)::exp(f); -} - -double Math::ceil(double f) -{ - return ::ceil(f); -} - -double Math::cos(double f) -{ - return ::cos(f); -} - -double Math::sin(double f) -{ - return ::sin(f); -} - -double Math::tan(double f) -{ - return ::tan(f); -} - -double Math::atan2(double y, double x) -{ - return ::atan2(y, x); -} - -void Math::set3(float* v, float* out) -{ - out[0] = v[0]; - out[1] = v[1]; - out[2] = v[2]; -} - -float Math::dot3(float* a, float* b) -{ - return a[0]*b[0] + a[1]*b[1] + a[2]*b[2]; -} - -void Math::mul3(float scalar, float* v, float* out) -{ - out[0] = scalar * v[0]; - out[1] = scalar * v[1]; - out[2] = scalar * v[2]; -} - -void Math::add3(float* a, float* b, float* out) -{ - out[0] = a[0] + b[0]; - out[1] = a[1] + b[1]; - out[2] = a[2] + b[2]; -} - -void Math::sub3(float* a, float* b, float* out) -{ - out[0] = a[0] - b[0]; - out[1] = a[1] - b[1]; - out[2] = a[2] - b[2]; -} - -float Math::mag3(float* v) -{ - return sqrt(dot3(v, v)); -} - -void Math::unit3(float* v, float* out) -{ - float imag = 1/mag3(v); - mul3(imag, v, out); -} - -void Math::cross3(float* a, float* b, float* out) -{ - float ax=a[0], ay=a[1], az=a[2]; - float bx=b[0], by=b[1], bz=b[2]; - out[0] = ay*bz - by*az; - out[1] = az*bx - bz*ax; - out[2] = ax*by - bx*ay; -} - -void Math::mmul33(float* a, float* b, float* out) -{ - float tmp[9]; - tmp[0] = a[0]*b[0] + a[1]*b[3] + a[2]*b[6]; - tmp[3] = a[3]*b[0] + a[4]*b[3] + a[5]*b[6]; - tmp[6] = a[6]*b[0] + a[7]*b[3] + a[8]*b[6]; - - tmp[1] = a[0]*b[1] + a[1]*b[4] + a[2]*b[7]; - tmp[4] = a[3]*b[1] + a[4]*b[4] + a[5]*b[7]; - tmp[7] = a[6]*b[1] + a[7]*b[4] + a[8]*b[7]; - - tmp[2] = a[0]*b[2] + a[1]*b[5] + a[2]*b[8]; - tmp[5] = a[3]*b[2] + a[4]*b[5] + a[5]*b[8]; - tmp[8] = a[6]*b[2] + a[7]*b[5] + a[8]*b[8]; - - int i; - for(i=0; i<9; i++) - out[i] = tmp[i]; -} - -void Math::vmul33(float* m, float* v, float* out) -{ - float x = v[0], y = v[1], z = v[2]; - out[0] = x*m[0] + y*m[1] + z*m[2]; - out[1] = x*m[3] + y*m[4] + z*m[5]; - out[2] = x*m[6] + y*m[7] + z*m[8]; -} - -void Math::tmul33(float* m, float* v, float* out) -{ - float x = v[0], y = v[1], z = v[2]; - out[0] = x*m[0] + y*m[3] + z*m[6]; - out[1] = x*m[1] + y*m[4] + z*m[7]; - out[2] = x*m[2] + y*m[5] + z*m[8]; -} - -void Math::invert33(float* m, float* out) -{ - // Compute the inverse as the adjoint matrix times 1/(det M). - // A, B ... I are the cofactors of a b c - // d e f - // g h i - float a=m[0], b=m[1], c=m[2]; - float d=m[3], e=m[4], f=m[5]; - float g=m[6], h=m[7], i=m[8]; - - float A = (e*i - h*f); - float B = -(d*i - g*f); - float C = (d*h - g*e); - float D = -(b*i - h*c); - float E = (a*i - g*c); - float F = -(a*h - g*b); - float G = (b*f - e*c); - float H = -(a*f - d*c); - float I = (a*e - d*b); - - float id = 1/(a*A + b*B + c*C); - - out[0] = id*A; out[1] = id*D; out[2] = id*G; - out[3] = id*B; out[4] = id*E; out[5] = id*H; - out[6] = id*C; out[7] = id*F; out[8] = id*I; -} - -void Math::trans33(float* m, float* out) -{ - // 0 1 2 Elements 0, 4, and 8 are the same - // 3 4 5 Swap elements 1/3, 2/6, and 5/7 - // 6 7 8 - out[0] = m[0]; - out[4] = m[4]; - out[8] = m[8]; - - float tmp = m[1]; - out[1] = m[3]; - out[3] = tmp; - - tmp = m[2]; - out[2] = m[6]; - out[6] = tmp; - - tmp = m[5]; - out[5] = m[7]; - out[7] = tmp; -} - -void Math::ortho33(float* x, float* y, - float* xOut, float* yOut, float* zOut) -{ - float x0[3], y0[3]; - set3(x, x0); - set3(y, y0); - - unit3(x0, xOut); - cross3(xOut, y0, zOut); - unit3(zOut, zOut); - cross3(zOut, xOut, yOut); -} - -}; // namespace yasim diff --git a/src/FDM/YASim/Math.hpp b/src/FDM/YASim/Math.hpp index e729f6d72..ab1f39b2d 100644 --- a/src/FDM/YASim/Math.hpp +++ b/src/FDM/YASim/Math.hpp @@ -1,79 +1,200 @@ #ifndef _MATH_HPP #define _MATH_HPP +#include + namespace yasim { class Math { public: // Dumb utilities - static float clamp(float val, float min, float max); + static inline float clamp(float val, float min, float max) { + if(val < min) return min; + if(val > max) return max; + return val; + } // Simple wrappers around library routines - static float abs(float f); - static float sqrt(float f); - static float ceil(float f); - static float sin(float f); - static float cos(float f); - static float tan(float f); - static float atan(float f); - static float atan2(float y, float x); - static float asin(float f); - static float acos(float f); - static float exp(float f); - static float sqr(float f) {return f*f;} + static inline float abs(float f) { return (float)::fabs(f); } + static inline float sqrt(float f) { return (float)::sqrt(f); } + static inline float ceil(float f) { return (float)::ceil(f); } + static inline float sin(float f) { return (float)::sin(f); } + static inline float cos(float f) { return (float)::cos(f); } + static inline float tan(float f) { return (float)::tan(f); } + static inline float atan(float f) { return (float)::atan(f); } + static inline float atan2(float y, float x) { return (float)::atan2(y,x); } + static inline float asin(float f) { return (float)::asin(f); } + static inline float acos(float f) { return (float)::acos(f); } + static inline float exp(float f) { return (float)::exp(f); } + static inline float sqr(float f) { return f*f; } // Takes two args and runs afoul of the Koenig rules. - static float pow(double base, double exp); + static inline float pow(double base, double exp) { return (float)::pow(base, exp); } // double variants of the above - static double abs(double f); - static double sqrt(double f); - static double ceil(double f); - static double sin(double f); - static double cos(double f); - static double tan(double f); - static double atan2(double y, double x); - static double floor(double x); + static inline double abs(double f) { return ::fabs(f); } + static inline double sqrt(double f) { return ::sqrt(f); } + static inline double ceil(double f) { return ::ceil(f); } + static inline double sin(double f) { return ::sin(f); } + static inline double cos(double f) { return ::cos(f); } + static inline double tan(double f) { return ::tan(f); } + static inline double atan2(double y, double x) { return ::atan2(y,x); } + static inline double floor(double x) { return ::floor(x); } // Some 3D vector stuff. In all cases, it is permissible for the // "out" vector to be the same as one of the inputs. - static void set3(float* v, float* out); - static float dot3(float* a, float* b); - static void cross3(float* a, float* b, float* out); - static void mul3(float scalar, float* v, float* out); - static void add3(float* a, float* b, float* out); - static void sub3(float* a, float* b, float* out); - static float mag3(float* v); - static void unit3(float* v, float* out); + static inline void set3(float* v, float* out) { + out[0] = v[0]; + out[1] = v[1]; + out[2] = v[2]; + } + + static inline float dot3(float* a, float* b) { + return a[0]*b[0] + a[1]*b[1] + a[2]*b[2]; + } + + static inline void cross3(float* a, float* b, float* out) { + float ax=a[0], ay=a[1], az=a[2]; + float bx=b[0], by=b[1], bz=b[2]; + out[0] = ay*bz - by*az; + out[1] = az*bx - bz*ax; + out[2] = ax*by - bx*ay; + } + + static inline void mul3(float scalar, float* v, float* out) + { + out[0] = scalar * v[0]; + out[1] = scalar * v[1]; + out[2] = scalar * v[2]; + } + + static inline void add3(float* a, float* b, float* out){ + out[0] = a[0] + b[0]; + out[1] = a[1] + b[1]; + out[2] = a[2] + b[2]; + } + + static inline void sub3(float* a, float* b, float* out) { + out[0] = a[0] - b[0]; + out[1] = a[1] - b[1]; + out[2] = a[2] - b[2]; + } + + static inline float mag3(float* v) { + return sqrt(dot3(v, v)); + } + + static inline void unit3(float* v, float* out) { + float imag = 1/mag3(v); + mul3(imag, v, out); + } // Matrix array convention: 0 1 2 // 3 4 5 // 6 7 8 // Multiply two matrices - static void mmul33(float* a, float* b, float* out); + static void mmul33(float* a, float* b, float* out) { + float tmp[9]; + tmp[0] = a[0]*b[0] + a[1]*b[3] + a[2]*b[6]; + tmp[3] = a[3]*b[0] + a[4]*b[3] + a[5]*b[6]; + tmp[6] = a[6]*b[0] + a[7]*b[3] + a[8]*b[6]; + + tmp[1] = a[0]*b[1] + a[1]*b[4] + a[2]*b[7]; + tmp[4] = a[3]*b[1] + a[4]*b[4] + a[5]*b[7]; + tmp[7] = a[6]*b[1] + a[7]*b[4] + a[8]*b[7]; + + tmp[2] = a[0]*b[2] + a[1]*b[5] + a[2]*b[8]; + tmp[5] = a[3]*b[2] + a[4]*b[5] + a[5]*b[8]; + tmp[8] = a[6]*b[2] + a[7]*b[5] + a[8]*b[8]; + + for(int i=0; i<9; ++i) + out[i] = tmp[i]; + } // Multiply by vector - static void vmul33(float* m, float* v, float* out); + static inline void vmul33(float* m, float* v, float* out) { + float x = v[0], y = v[1], z = v[2]; + out[0] = x*m[0] + y*m[1] + z*m[2]; + out[1] = x*m[3] + y*m[4] + z*m[5]; + out[2] = x*m[6] + y*m[7] + z*m[8]; + } // Multiply the vector by the matrix transpose. Or pre-multiply the // matrix by v as a row vector. Same thing. - static void tmul33(float* m, float* v, float* out); + static inline void tmul33(float* m, float* v, float* out) { + float x = v[0], y = v[1], z = v[2]; + out[0] = x*m[0] + y*m[3] + z*m[6]; + out[1] = x*m[1] + y*m[4] + z*m[7]; + out[2] = x*m[2] + y*m[5] + z*m[8]; + } // Invert matrix - static void invert33(float* m, float* out); + static void invert33(float* m, float* out) { + // Compute the inverse as the adjoint matrix times 1/(det M). + // A, B ... I are the cofactors of a b c + // d e f + // g h i + float a=m[0], b=m[1], c=m[2]; + float d=m[3], e=m[4], f=m[5]; + float g=m[6], h=m[7], i=m[8]; + + float A = (e*i - h*f); + float B = -(d*i - g*f); + float C = (d*h - g*e); + float D = -(b*i - h*c); + float E = (a*i - g*c); + float F = -(a*h - g*b); + float G = (b*f - e*c); + float H = -(a*f - d*c); + float I = (a*e - d*b); + + float id = 1/(a*A + b*B + c*C); + + out[0] = id*A; out[1] = id*D; out[2] = id*G; + out[3] = id*B; out[4] = id*E; out[5] = id*H; + out[6] = id*C; out[7] = id*F; out[8] = id*I; + } // Transpose matrix (for an orthonormal orientation matrix, this // is the same as the inverse). - static void trans33(float* m, float* out); + static inline void trans33(float* m, float* out) { + // 0 1 2 Elements 0, 4, and 8 are the same + // 3 4 5 Swap elements 1/3, 2/6, and 5/7 + // 6 7 8 + out[0] = m[0]; + out[4] = m[4]; + out[8] = m[8]; + + float tmp = m[1]; + out[1] = m[3]; + out[3] = tmp; + + tmp = m[2]; + out[2] = m[6]; + out[6] = tmp; + + tmp = m[5]; + out[5] = m[7]; + out[7] = tmp; + } // Generates an orthonormal basis: // xOut becomes the unit vector in the direction of x // yOut is perpendicular to xOut in the x/y plane // zOut becomes the unit vector: (xOut cross yOut) static void ortho33(float* x, float* y, - float* xOut, float* yOut, float* zOut); + float* xOut, float* yOut, float* zOut) { + float x0[3], y0[3]; + set3(x, x0); + set3(y, y0); + + unit3(x0, xOut); + cross3(xOut, y0, zOut); + unit3(zOut, zOut); + cross3(zOut, xOut, yOut); + } }; }; // namespace yasim -- 2.39.5