On Ubuntu 20.04, OpenMP does not exist as a standalone runtime package with an independent version number. Its version is determined by the compiler implementation (GCC or Clang), since OpenMP support is built into the compiler.
check version
cmake
find_package(OpenMP REQUIRED)
message(STATUS "OpenMP_CXX_VERSION: ${OpenMP_CXX_VERSION}")
message(STATUS "OpenMP_CXX_SPEC_DATE: ${OpenMP_CXX_SPEC_DATE}")
runtime
#include <iostream>
#include <omp.h>
int main()
{
std::cout << "_OPENMP macro: " << _OPENMP << std::endl;
std::cout << "Max threads: " << omp_get_max_threads() << std::endl;
return 0;
}
usage example
// Parallel loop
#pragma omp parallel for
for (int i = 0; i < size; ++i) {
// Code that can be executed in parallel
data[i] *= 2; // Example: Doubling each element
// Code that cannot be executed in parallel; {} is not necessary, it's used to improve readability and avoid unintended behavior.
#pragma omp critical
{
result += data[i]; // Example: Accumulating to a shared variable
}
}
// Parallel loop with reduction
#pragma omp parallel for reduction(+:result)
for (int i = 0; i < size; ++i) {
// Code that can be executed in parallel
data[i] *= 2; // Example: Doubling each element
// Reduction operation
result += data[i];
}
Benefits of Using reduction:
- Reduces overhead of the critical section.
- Automatically handles thread-local copies and merging.
Use the reduction clause for simpler operations and critical for more complex shared operations.
set the number of threads used for parallel execution
// OpenMP does not guarantee a fixed traversal order.
#pragma omp parallel for num_threads(4)
for (int i = 0; i < size; ++i) {
data[i] *= 2; // Example operation: doubling each element
int thread_id = omp_get_thread_num();
std::cout << "Thread " << thread_id << " processed index " << i << "\n";
}
If traversal order is important, use ordered:
#pragma omp parallel for ordered
for (int i = 0; i < 10; i++) {
#pragma omp ordered
printf("Ordered execution: i = %d\n", i);
}