Geontech
diff --git a/‎CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion b/‎CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docker/Dockerfile.rocky9‎
Lines changed: 1 addition & 1 deletion b/‎docker/Dockerfile.rocky9‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/write.json‎
Lines changed: 2 additions & 19 deletions b/‎examples/write.json‎
Lines changed: 2 additions & 19 deletions
diff --git a/‎src/components/exp_smooth/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion b/‎src/components/exp_smooth/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/components/exp_smooth/work.hpp‎
Lines changed: 106 additions & 20 deletions b/‎src/components/exp_smooth/work.hpp‎
Lines changed: 106 additions & 20 deletions
diff --git a/‎src/components/fft/CMakeLists.txt‎
Lines changed: 1 addition & 2 deletions b/‎src/components/fft/CMakeLists.txt‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎src/components/fft/apply_window.hpp‎
Lines changed: 0 additions & 44 deletions b/‎src/components/fft/apply_window.hpp‎
Lines changed: 0 additions & 44 deletions
@@ -27,7 +27,7 @@ include(FetchContent)
 # composite dependency
 FetchContent_Declare(composite
  GIT_REPOSITORY https://github.com/geontech/composite.git
- GIT_TAG v0.3.0
+ GIT_TAG v0.3.3
 )
 # vrtgen dependency
 FetchContent_Declare(vrtgen
 
@@ -28,7 +28,7 @@ WORKDIR /opt/composite-comps
 RUN set -ex; \
  . /opt/rh/gcc-toolset-13/enable; \
  cmake -B docker-build \
- -DCOMPOSITE_USE_NATS=ON \
+ -DCOMPOSITE_USE_NATS=OFF \
  -DCOMPOSITE_INSTALL=ON \
  -DCMAKE_INSTALL_PREFIX=/opt/usr/local \
  -DCMAKE_BUILD_TYPE=Release; \
 
@@ -23,23 +23,6 @@
  "value" : "9999"
  }
  ]
- },
- {
- "name" : "histogram",
- "properties" : [
- {
- "name" : "transport",
- "value" : "vita49" 
- },
- {
- "name" : "sample_rate",
- "value" : "32768"
- },
- {
- "name" : "adc_bits",
- "value" : "16"
- }
- ]
  }
  ],
  "connections" : [
@@ -49,9 +32,9 @@
  "port" : "data_out"
  },
  "input" : {
- "component" : "histogram",
+ "component" : "stov",
  "port" : "data_in"
  }
- }
+ },
  ]
 }
@@ -33,7 +33,7 @@ set(CMAKE_CXX_FLAGS_RELEASE_INIT "-O3")
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 
 # Custom compile options
-add_compile_options(-march=cascadelake)
+add_compile_options(-march=x86-64-v2 -mtune=generic)
 
 # Library
 add_library(exp_smooth MODULE
 
@@ -28,58 +28,144 @@ class work {};
 
 template <>
 class work<float> {
- using psd_data_t = aligned::aligned_mem<float>;
+ using data_type = aligned::aligned_mem<float>;
+ static constexpr auto STRIDE_256 = std::size_t{256u / 8u / sizeof(float)};
+ static constexpr auto STRIDE_512 = std::size_t{512u / 8u / sizeof(float)};
 public:
  explicit work(float alpha) {
- m_alpha_vec = _mm512_set1_ps(alpha);
- m_one_minus_alpha_vec = _mm512_set1_ps(1 - alpha);
+ // Initialize the function pointer based on CPU features
+ if (__builtin_cpu_supports("avx512f")) {
+ init_avx512(alpha);
+ } else if (__builtin_cpu_supports("avx2") && __builtin_cpu_supports("fma")) {
+ init_avx2(alpha);
+ }
+ }
+
+ auto process(data_type* curr_psd, data_type* prev_psd) const -> void {
+ (this->*process_func)(curr_psd, prev_psd);
+ }
+
+private:
+ [[gnu::target("avx2,fma")]]
+ auto init_avx2(double alpha) -> void {
+ process_func = &work::process_avx2;
+ m_alpha_vec_256 = _mm256_set1_ps(alpha);
+ m_one_minus_alpha_vec_256 = _mm256_set1_ps(1 - alpha);
+ }
+
+ [[gnu::target("avx512f")]]
+ auto init_avx512(double alpha) -> void {
+ process_func = &work::process_avx512;
+ m_alpha_vec_512 = _mm512_set1_ps(alpha);
+ m_one_minus_alpha_vec_512 = _mm512_set1_ps(1 - alpha);
  }
 
- auto process(psd_data_t* curr_psd, psd_data_t* prev_psd) const -> void {
- for (auto i=0u; i < curr_psd->size(); i += 16) {
+ [[gnu::target("avx2,fma")]]
+ auto process_avx2(data_type* curr_psd, data_type* prev_psd) const -> void {
+ for (auto i=0u; i < curr_psd->size(); i += STRIDE_256) {
+ // Load data
+ auto curr_data = _mm256_load_ps(curr_psd->data() + i);
+ auto prev_data = _mm256_load_ps(prev_psd->data() + i);
+ // Multiply current data by alpha
+ curr_data = _mm256_mul_ps(curr_data, m_alpha_vec_256);
+ // Multiply prev by (1-alpha) and add to current
+ curr_data = _mm256_fmadd_ps(prev_data, m_one_minus_alpha_vec_256, curr_data);
+ // Store result into psd
+ _mm256_store_ps(curr_psd->data() + i, curr_data);
+ }
+ }
+
+ [[gnu::target("avx512f")]]
+ auto process_avx512(data_type* curr_psd, data_type* prev_psd) const -> void {
+ for (auto i=0u; i < curr_psd->size(); i += STRIDE_512) {
  // Load data
  auto curr_data = _mm512_load_ps(curr_psd->data() + i);
  auto prev_data = _mm512_load_ps(prev_psd->data() + i);
  // Multiply current data by alpha
- curr_data = _mm512_mul_ps(curr_data, m_alpha_vec);
+ curr_data = _mm512_mul_ps(curr_data, m_alpha_vec_512);
  // Multiply prev by (1-alpha) and add to current
- curr_data = _mm512_fmadd_ps(prev_data, m_one_minus_alpha_vec, curr_data);
+ curr_data = _mm512_fmadd_ps(prev_data, m_one_minus_alpha_vec_512, curr_data);
  // Store result into psd
  _mm512_store_ps(curr_psd->data() + i, curr_data);
  }
  }
 
-private:
- __m512 m_alpha_vec;
- __m512 m_one_minus_alpha_vec;
+ auto (work::*process_func)(data_type*, data_type*) const -> void;
+ __m256 m_alpha_vec_256;
+ __m512 m_alpha_vec_512;
+ __m256 m_one_minus_alpha_vec_256;
+ __m512 m_one_minus_alpha_vec_512;
 
 }; // class work<float>
 
 template <>
 class work<double> {
- using psd_data_t = aligned::aligned_mem<double>;
+ using data_type = aligned::aligned_mem<double>;
+ static constexpr auto STRIDE_256 = std::size_t{256u / 8u / sizeof(double)};
+ static constexpr auto STRIDE_512 = std::size_t{512u / 8u / sizeof(double)};
 public:
  explicit work(double alpha) {
- m_alpha_vec = _mm512_set1_pd(alpha);
- m_one_minus_alpha_vec = _mm512_set1_pd(1 - alpha);
+ // Initialize the function pointer based on CPU features
+ if (__builtin_cpu_supports("avx512f")) {
+ init_avx512(alpha);
+ } else if (__builtin_cpu_supports("avx2") && __builtin_cpu_supports("fma")) {
+ init_avx2(alpha);
+ }
+ }
+
+ auto process(data_type* curr_psd, data_type* prev_psd) const -> void {
+ (this->*process_func)(curr_psd, prev_psd);
+ }
+
+private:
+ [[gnu::target("avx2,fma")]]
+ auto init_avx2(double alpha) -> void {
+ process_func = &work::process_avx2;
+ m_alpha_vec_256 = _mm256_set1_pd(alpha);
+ m_one_minus_alpha_vec_256 = _mm256_set1_pd(1 - alpha);
+ }
+
+ [[gnu::target("avx512f")]]
+ auto init_avx512(double alpha) -> void {
+ process_func = &work::process_avx512;
+ m_alpha_vec_512 = _mm512_set1_pd(alpha);
+ m_one_minus_alpha_vec_512 = _mm512_set1_pd(1 - alpha);
  }
 
- auto process(psd_data_t* curr_psd, psd_data_t* prev_psd) const -> void {
- for (auto i=0u; i < curr_psd->size(); i += 8) {
+ [[gnu::target("avx2,fma")]]
+ auto process_avx2(data_type* curr_psd, data_type* prev_psd) const -> void {
+ for (auto i=0u; i < curr_psd->size(); i += STRIDE_256) {
+ // Load data
+ auto curr_data = _mm256_load_pd(curr_psd->data() + i);
+ auto prev_data = _mm256_load_pd(prev_psd->data() + i);
+ // Multiply current data by alpha
+ curr_data = _mm256_mul_pd(curr_data, m_alpha_vec_256);
+ // Multiply prev by (1-alpha) and add to current
+ curr_data = _mm256_fmadd_pd(prev_data, m_one_minus_alpha_vec_256, curr_data);
+ // Store result into psd
+ _mm256_store_pd(curr_psd->data() + i, curr_data);
+ }
+ }
+
+ [[gnu::target("avx512f")]]
+ auto process_avx512(data_type* curr_psd, data_type* prev_psd) const -> void {
+ for (auto i=0u; i < curr_psd->size(); i += STRIDE_512) {
  // Load data
  auto curr_data = _mm512_load_pd(curr_psd->data() + i);
  auto prev_data = _mm512_load_pd(prev_psd->data() + i);
  // Multiply current data by alpha
- curr_data = _mm512_mul_pd(curr_data, m_alpha_vec);
+ curr_data = _mm512_mul_pd(curr_data, m_alpha_vec_512);
  // Multiply prev by (1-alpha) and add to current
- curr_data = _mm512_fmadd_pd(prev_data, m_one_minus_alpha_vec, curr_data);
+ curr_data = _mm512_fmadd_pd(prev_data, m_one_minus_alpha_vec_512, curr_data);
  // Store result into psd
  _mm512_store_pd(curr_psd->data() + i, curr_data);
  }
  }
 
-private:
- __m512d m_alpha_vec;
- __m512d m_one_minus_alpha_vec;
+ auto (work::*process_func)(data_type*, data_type*) const -> void;
+ __m256d m_alpha_vec_256;
+ __m512d m_alpha_vec_512;
+ __m256d m_one_minus_alpha_vec_256;
+ __m512d m_one_minus_alpha_vec_512;
 
 }; // class work<double>
@@ -33,7 +33,7 @@ set(CMAKE_CXX_FLAGS_RELEASE_INIT "-O3")
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 
 # Custom compile options
-add_compile_options(-march=cascadelake -ffast-math)
+add_compile_options(-march=x86-64-v2 -mtune=generic -ffast-math)
 
 # Library
 add_library(fft MODULE
@@ -43,7 +43,6 @@ add_library(fft MODULE
 target_include_directories(fft
  PRIVATE
  ${PROJECT_SOURCE_DIR}/../../../include
- ${vrtgen_SOURCE_DIR}/include
 )
 target_link_libraries(fft
  PRIVATE
Original file line number	Diff line number	Diff line change
`@@ -27,7 +27,7 @@ include(FetchContent)`
`27`	`27`	`# composite dependency`
`28`	`28`	`FetchContent_Declare(composite`
`29`	`29`	`GIT_REPOSITORY https://github.com/geontech/composite.git`
`30`		`- GIT_TAG v0.3.0`
	`30`	`+ GIT_TAG v0.3.3`
`31`	`31`	`)`
`32`	`32`	`# vrtgen dependency`
`33`	`33`	`FetchContent_Declare(vrtgen`