723 lines
		
	
	
		
			20 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
		
		
			
		
	
	
			723 lines
		
	
	
		
			20 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
|  | /*
 | ||
|  | Copyright (c) 2003-2014 Erwin Coumans  http://bullet.googlecode.com
 | ||
|  | 
 | ||
|  | This software is provided 'as-is', without any express or implied warranty. | ||
|  | In no event will the authors be held liable for any damages arising from the use of this software. | ||
|  | Permission is granted to anyone to use this software for any purpose,  | ||
|  | including commercial applications, and to alter it and redistribute it freely,  | ||
|  | subject to the following restrictions: | ||
|  | 
 | ||
|  | 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. | ||
|  | 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. | ||
|  | 3. This notice may not be removed or altered from any source distribution. | ||
|  | */ | ||
|  | 
 | ||
|  | 
 | ||
|  | #include "btThreads.h"
 | ||
|  | #include "btQuickprof.h"
 | ||
|  | #include <algorithm>  // for min and max
 | ||
|  | 
 | ||
|  | 
 | ||
|  | #if BT_USE_OPENMP && BT_THREADSAFE
 | ||
|  | 
 | ||
|  | #include <omp.h>
 | ||
|  | 
 | ||
|  | #endif // #if BT_USE_OPENMP && BT_THREADSAFE
 | ||
|  | 
 | ||
|  | 
 | ||
|  | #if BT_USE_PPL && BT_THREADSAFE
 | ||
|  | 
 | ||
|  | // use Microsoft Parallel Patterns Library (installed with Visual Studio 2010 and later)
 | ||
|  | #include <ppl.h>  // if you get a compile error here, check whether your version of Visual Studio includes PPL
 | ||
|  | // Visual Studio 2010 and later should come with it
 | ||
|  | #include <concrtrm.h>  // for GetProcessorCount()
 | ||
|  | 
 | ||
|  | #endif // #if BT_USE_PPL && BT_THREADSAFE
 | ||
|  | 
 | ||
|  | 
 | ||
|  | #if BT_USE_TBB && BT_THREADSAFE
 | ||
|  | 
 | ||
|  | // use Intel Threading Building Blocks for thread management
 | ||
|  | #define __TBB_NO_IMPLICIT_LINKAGE 1
 | ||
|  | #include <tbb/tbb.h>
 | ||
|  | #include <tbb/task_scheduler_init.h>
 | ||
|  | #include <tbb/parallel_for.h>
 | ||
|  | #include <tbb/blocked_range.h>
 | ||
|  | 
 | ||
|  | #endif // #if BT_USE_TBB && BT_THREADSAFE
 | ||
|  | 
 | ||
|  | 
 | ||
|  | #if BT_THREADSAFE
 | ||
|  | //
 | ||
|  | // Lightweight spin-mutex based on atomics
 | ||
|  | // Using ordinary system-provided mutexes like Windows critical sections was noticeably slower
 | ||
|  | // presumably because when it fails to lock at first it would sleep the thread and trigger costly
 | ||
|  | // context switching.
 | ||
|  | // 
 | ||
|  | 
 | ||
|  | #if __cplusplus >= 201103L
 | ||
|  | 
 | ||
|  | // for anything claiming full C++11 compliance, use C++11 atomics
 | ||
|  | // on GCC or Clang you need to compile with -std=c++11
 | ||
|  | #define USE_CPP11_ATOMICS 1
 | ||
|  | 
 | ||
|  | #elif defined( _MSC_VER )
 | ||
|  | 
 | ||
|  | // on MSVC, use intrinsics instead
 | ||
|  | #define USE_MSVC_INTRINSICS 1
 | ||
|  | 
 | ||
|  | #elif defined( __GNUC__ ) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7))
 | ||
|  | 
 | ||
|  | // available since GCC 4.7 and some versions of clang
 | ||
|  | // todo: check for clang
 | ||
|  | #define USE_GCC_BUILTIN_ATOMICS 1
 | ||
|  | 
 | ||
|  | #elif defined( __GNUC__ ) && (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)
 | ||
|  | 
 | ||
|  | // available since GCC 4.1
 | ||
|  | #define USE_GCC_BUILTIN_ATOMICS_OLD 1
 | ||
|  | 
 | ||
|  | #endif
 | ||
|  | 
 | ||
|  | 
 | ||
|  | #if USE_CPP11_ATOMICS
 | ||
|  | 
 | ||
|  | #include <atomic>
 | ||
|  | #include <thread>
 | ||
|  | 
 | ||
|  | #define THREAD_LOCAL_STATIC thread_local static
 | ||
|  | 
 | ||
|  | bool btSpinMutex::tryLock() | ||
|  | { | ||
|  |     std::atomic<int>* aDest = reinterpret_cast<std::atomic<int>*>(&mLock); | ||
|  |     int expected = 0; | ||
|  |     return std::atomic_compare_exchange_weak_explicit( aDest, &expected, int(1), std::memory_order_acq_rel, std::memory_order_acquire ); | ||
|  | } | ||
|  | 
 | ||
|  | void btSpinMutex::lock() | ||
|  | { | ||
|  |     // note: this lock does not sleep the thread.
 | ||
|  |     while (! tryLock()) | ||
|  |     { | ||
|  |         // spin
 | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | void btSpinMutex::unlock() | ||
|  | { | ||
|  |     std::atomic<int>* aDest = reinterpret_cast<std::atomic<int>*>(&mLock); | ||
|  |     std::atomic_store_explicit( aDest, int(0), std::memory_order_release ); | ||
|  | } | ||
|  | 
 | ||
|  | 
 | ||
|  | #elif USE_MSVC_INTRINSICS
 | ||
|  | 
 | ||
|  | #define WIN32_LEAN_AND_MEAN
 | ||
|  | 
 | ||
|  | #include <windows.h>
 | ||
|  | #include <intrin.h>
 | ||
|  | 
 | ||
|  | #define THREAD_LOCAL_STATIC __declspec( thread ) static
 | ||
|  | 
 | ||
|  | 
 | ||
|  | bool btSpinMutex::tryLock() | ||
|  | { | ||
|  |     volatile long* aDest = reinterpret_cast<long*>(&mLock); | ||
|  |     return ( 0 == _InterlockedCompareExchange( aDest, 1, 0) ); | ||
|  | } | ||
|  | 
 | ||
|  | void btSpinMutex::lock() | ||
|  | { | ||
|  |     // note: this lock does not sleep the thread
 | ||
|  |     while (! tryLock()) | ||
|  |     { | ||
|  |         // spin
 | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | void btSpinMutex::unlock() | ||
|  | { | ||
|  |     volatile long* aDest = reinterpret_cast<long*>( &mLock ); | ||
|  |     _InterlockedExchange( aDest, 0 ); | ||
|  | } | ||
|  | 
 | ||
|  | #elif USE_GCC_BUILTIN_ATOMICS
 | ||
|  | 
 | ||
|  | #define THREAD_LOCAL_STATIC static __thread
 | ||
|  | 
 | ||
|  | 
 | ||
|  | bool btSpinMutex::tryLock() | ||
|  | { | ||
|  |     int expected = 0; | ||
|  |     bool weak = false; | ||
|  |     const int memOrderSuccess = __ATOMIC_ACQ_REL; | ||
|  |     const int memOrderFail = __ATOMIC_ACQUIRE; | ||
|  |     return __atomic_compare_exchange_n(&mLock, &expected, int(1), weak, memOrderSuccess, memOrderFail); | ||
|  | } | ||
|  | 
 | ||
|  | void btSpinMutex::lock() | ||
|  | { | ||
|  |     // note: this lock does not sleep the thread
 | ||
|  |     while (! tryLock()) | ||
|  |     { | ||
|  |         // spin
 | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | void btSpinMutex::unlock() | ||
|  | { | ||
|  |     __atomic_store_n(&mLock, int(0), __ATOMIC_RELEASE); | ||
|  | } | ||
|  | 
 | ||
|  | #elif USE_GCC_BUILTIN_ATOMICS_OLD
 | ||
|  | 
 | ||
|  | 
 | ||
|  | #define THREAD_LOCAL_STATIC static __thread
 | ||
|  | 
 | ||
|  | bool btSpinMutex::tryLock() | ||
|  | { | ||
|  |     return __sync_bool_compare_and_swap(&mLock, int(0), int(1)); | ||
|  | } | ||
|  | 
 | ||
|  | void btSpinMutex::lock() | ||
|  | { | ||
|  |     // note: this lock does not sleep the thread
 | ||
|  |     while (! tryLock()) | ||
|  |     { | ||
|  |         // spin
 | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | void btSpinMutex::unlock() | ||
|  | { | ||
|  |     // write 0
 | ||
|  |     __sync_fetch_and_and(&mLock, int(0)); | ||
|  | } | ||
|  | 
 | ||
|  | #else //#elif USE_MSVC_INTRINSICS
 | ||
|  | 
 | ||
|  | #error "no threading primitives defined -- unknown platform"
 | ||
|  | 
 | ||
|  | #endif  //#else //#elif USE_MSVC_INTRINSICS
 | ||
|  | 
 | ||
|  | #else //#if BT_THREADSAFE
 | ||
|  | 
 | ||
|  | // These should not be called ever
 | ||
|  | void btSpinMutex::lock() | ||
|  | { | ||
|  |     btAssert( !"unimplemented btSpinMutex::lock() called" ); | ||
|  | } | ||
|  | 
 | ||
|  | void btSpinMutex::unlock() | ||
|  | { | ||
|  |     btAssert( !"unimplemented btSpinMutex::unlock() called" ); | ||
|  | } | ||
|  | 
 | ||
|  | bool btSpinMutex::tryLock() | ||
|  | { | ||
|  |     btAssert( !"unimplemented btSpinMutex::tryLock() called" ); | ||
|  |     return true; | ||
|  | } | ||
|  | 
 | ||
|  | #define THREAD_LOCAL_STATIC static
 | ||
|  | 
 | ||
|  | #endif // #else //#if BT_THREADSAFE
 | ||
|  | 
 | ||
|  | 
 | ||
|  | struct ThreadsafeCounter | ||
|  | { | ||
|  |     unsigned int mCounter; | ||
|  |     btSpinMutex mMutex; | ||
|  | 
 | ||
|  |     ThreadsafeCounter() | ||
|  |     { | ||
|  |         mCounter = 0; | ||
|  |         --mCounter; // first count should come back 0
 | ||
|  |     } | ||
|  | 
 | ||
|  |     unsigned int getNext() | ||
|  |     { | ||
|  |         // no need to optimize this with atomics, it is only called ONCE per thread!
 | ||
|  |         mMutex.lock(); | ||
|  |         mCounter++; | ||
|  |         if ( mCounter >= BT_MAX_THREAD_COUNT ) | ||
|  |         { | ||
|  |             btAssert( !"thread counter exceeded" ); | ||
|  |             // wrap back to the first worker index
 | ||
|  |             mCounter = 1; | ||
|  |         } | ||
|  |         unsigned int val = mCounter; | ||
|  |         mMutex.unlock(); | ||
|  |         return val; | ||
|  |     } | ||
|  | }; | ||
|  | 
 | ||
|  | 
 | ||
|  | static btITaskScheduler* gBtTaskScheduler; | ||
|  | static int gThreadsRunningCounter = 0;  // useful for detecting if we are trying to do nested parallel-for calls
 | ||
|  | static btSpinMutex gThreadsRunningCounterMutex; | ||
|  | static ThreadsafeCounter gThreadCounter; | ||
|  | 
 | ||
|  | 
 | ||
|  | //
 | ||
|  | // BT_DETECT_BAD_THREAD_INDEX tries to detect when there are multiple threads assigned the same thread index.
 | ||
|  | //
 | ||
|  | // BT_DETECT_BAD_THREAD_INDEX is a developer option to test if
 | ||
|  | // certain assumptions about how the task scheduler manages its threads
 | ||
|  | // holds true.
 | ||
|  | // The main assumption is:
 | ||
|  | //   - when the threadpool is resized, the task scheduler either
 | ||
|  | //      1. destroys all worker threads and creates all new ones in the correct number, OR
 | ||
|  | //      2. never destroys a worker thread
 | ||
|  | //
 | ||
|  | // We make that assumption because we can't easily enumerate the worker threads of a task scheduler
 | ||
|  | // to assign nice sequential thread-indexes. We also do not get notified if a worker thread is destroyed,
 | ||
|  | // so we can't tell when a thread-index is no longer being used.
 | ||
|  | // We allocate thread-indexes as needed with a sequential global thread counter.
 | ||
|  | //
 | ||
|  | // Our simple thread-counting scheme falls apart if the task scheduler destroys some threads but
 | ||
|  | // continues to re-use other threads and the application repeatedly resizes the thread pool of the 
 | ||
|  | // task scheduler.
 | ||
|  | // In order to prevent the thread-counter from exceeding the global max (BT_MAX_THREAD_COUNT), we
 | ||
|  | // wrap the thread counter back to 1. This should only happen if the worker threads have all been
 | ||
|  | // destroyed and re-created.
 | ||
|  | //
 | ||
|  | // BT_DETECT_BAD_THREAD_INDEX only works for Win32 right now,
 | ||
|  | // but could be adapted to work with pthreads
 | ||
|  | #define BT_DETECT_BAD_THREAD_INDEX 0
 | ||
|  | 
 | ||
|  | #if BT_DETECT_BAD_THREAD_INDEX
 | ||
|  | 
 | ||
|  | typedef DWORD ThreadId_t; | ||
|  | const static ThreadId_t kInvalidThreadId = 0; | ||
|  | ThreadId_t gDebugThreadIds[ BT_MAX_THREAD_COUNT ]; | ||
|  | 
 | ||
|  | static ThreadId_t getDebugThreadId() | ||
|  | { | ||
|  |     return GetCurrentThreadId(); | ||
|  | } | ||
|  | 
 | ||
|  | #endif // #if BT_DETECT_BAD_THREAD_INDEX
 | ||
|  | 
 | ||
|  | 
 | ||
|  | // return a unique index per thread, main thread is 0, worker threads are in [1, BT_MAX_THREAD_COUNT)
 | ||
|  | unsigned int btGetCurrentThreadIndex() | ||
|  | { | ||
|  |     const unsigned int kNullIndex = ~0U; | ||
|  |     THREAD_LOCAL_STATIC unsigned int sThreadIndex = kNullIndex; | ||
|  |     if ( sThreadIndex == kNullIndex ) | ||
|  |     { | ||
|  |         sThreadIndex = gThreadCounter.getNext(); | ||
|  |         btAssert( sThreadIndex < BT_MAX_THREAD_COUNT ); | ||
|  |     } | ||
|  | #if BT_DETECT_BAD_THREAD_INDEX
 | ||
|  |     if ( gBtTaskScheduler && sThreadIndex > 0 ) | ||
|  |     { | ||
|  |         ThreadId_t tid = getDebugThreadId(); | ||
|  |         // if not set
 | ||
|  |         if ( gDebugThreadIds[ sThreadIndex ] == kInvalidThreadId ) | ||
|  |         { | ||
|  |             // set it
 | ||
|  |             gDebugThreadIds[ sThreadIndex ] = tid; | ||
|  |         } | ||
|  |         else | ||
|  |         { | ||
|  |             if ( gDebugThreadIds[ sThreadIndex ] != tid ) | ||
|  |             { | ||
|  |                 // this could indicate the task scheduler is breaking our assumptions about
 | ||
|  |                 // how threads are managed when threadpool is resized
 | ||
|  |                 btAssert( !"there are 2 or more threads with the same thread-index!" ); | ||
|  |                 __debugbreak(); | ||
|  |             } | ||
|  |         } | ||
|  |     } | ||
|  | #endif // #if BT_DETECT_BAD_THREAD_INDEX
 | ||
|  |     return sThreadIndex; | ||
|  | } | ||
|  | 
 | ||
|  | bool btIsMainThread() | ||
|  | { | ||
|  |     return btGetCurrentThreadIndex() == 0; | ||
|  | } | ||
|  | 
 | ||
|  | void btResetThreadIndexCounter() | ||
|  | { | ||
|  |     // for when all current worker threads are destroyed
 | ||
|  |     btAssert( btIsMainThread() ); | ||
|  |     gThreadCounter.mCounter = 0; | ||
|  | } | ||
|  | 
 | ||
|  | btITaskScheduler::btITaskScheduler( const char* name ) | ||
|  | { | ||
|  |     m_name = name; | ||
|  |     m_savedThreadCounter = 0; | ||
|  |     m_isActive = false; | ||
|  | } | ||
|  | 
 | ||
|  | void btITaskScheduler::activate() | ||
|  | { | ||
|  |     // gThreadCounter is used to assign a thread-index to each worker thread in a task scheduler.
 | ||
|  |     // The main thread is always thread-index 0, and worker threads are numbered from 1 to 63 (BT_MAX_THREAD_COUNT-1)
 | ||
|  |     // The thread-indexes need to be unique amongst the threads that can be running simultaneously.
 | ||
|  |     // Since only one task scheduler can be used at a time, it is OK for a pair of threads that belong to different
 | ||
|  |     // task schedulers to share the same thread index because they can't be running at the same time.
 | ||
|  |     // So each task scheduler needs to keep its own thread counter value
 | ||
|  |     if ( !m_isActive ) | ||
|  |     { | ||
|  |         gThreadCounter.mCounter = m_savedThreadCounter;  // restore saved thread counter
 | ||
|  |         m_isActive = true; | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | void btITaskScheduler::deactivate() | ||
|  | { | ||
|  |     if ( m_isActive ) | ||
|  |     { | ||
|  |         m_savedThreadCounter = gThreadCounter.mCounter;  // save thread counter
 | ||
|  |         m_isActive = false; | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | void btPushThreadsAreRunning() | ||
|  | { | ||
|  |     gThreadsRunningCounterMutex.lock(); | ||
|  |     gThreadsRunningCounter++; | ||
|  |     gThreadsRunningCounterMutex.unlock(); | ||
|  | } | ||
|  | 
 | ||
|  | void btPopThreadsAreRunning() | ||
|  | { | ||
|  |     gThreadsRunningCounterMutex.lock(); | ||
|  |     gThreadsRunningCounter--; | ||
|  |     gThreadsRunningCounterMutex.unlock(); | ||
|  | } | ||
|  | 
 | ||
|  | bool btThreadsAreRunning() | ||
|  | { | ||
|  |     return gThreadsRunningCounter != 0; | ||
|  | } | ||
|  | 
 | ||
|  | 
 | ||
|  | void btSetTaskScheduler( btITaskScheduler* ts ) | ||
|  | { | ||
|  |     int threadId = btGetCurrentThreadIndex();  // make sure we call this on main thread at least once before any workers run
 | ||
|  |     if ( threadId != 0 ) | ||
|  |     { | ||
|  |         btAssert( !"btSetTaskScheduler must be called from the main thread!" ); | ||
|  |         return; | ||
|  |     } | ||
|  |     if ( gBtTaskScheduler ) | ||
|  |     { | ||
|  |         // deactivate old task scheduler
 | ||
|  |         gBtTaskScheduler->deactivate(); | ||
|  |     } | ||
|  |     gBtTaskScheduler = ts; | ||
|  |     if ( ts ) | ||
|  |     { | ||
|  |         // activate new task scheduler
 | ||
|  |         ts->activate(); | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | 
 | ||
|  | btITaskScheduler* btGetTaskScheduler() | ||
|  | { | ||
|  |     return gBtTaskScheduler; | ||
|  | } | ||
|  | 
 | ||
|  | 
 | ||
|  | void btParallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) | ||
|  | { | ||
|  | #if BT_THREADSAFE
 | ||
|  | 
 | ||
|  | #if BT_DETECT_BAD_THREAD_INDEX
 | ||
|  |     if ( !btThreadsAreRunning() ) | ||
|  |     { | ||
|  |         // clear out thread ids
 | ||
|  |         for ( int i = 0; i < BT_MAX_THREAD_COUNT; ++i ) | ||
|  |         { | ||
|  |             gDebugThreadIds[ i ] = kInvalidThreadId; | ||
|  |         } | ||
|  |     } | ||
|  | #endif // #if BT_DETECT_BAD_THREAD_INDEX
 | ||
|  | 
 | ||
|  |     btAssert( gBtTaskScheduler != NULL );  // call btSetTaskScheduler() with a valid task scheduler first!
 | ||
|  |     gBtTaskScheduler->parallelFor( iBegin, iEnd, grainSize, body ); | ||
|  | 
 | ||
|  | #else // #if BT_THREADSAFE
 | ||
|  | 
 | ||
|  |     // non-parallel version of btParallelFor
 | ||
|  |     btAssert( !"called btParallelFor in non-threadsafe build. enable BT_THREADSAFE" ); | ||
|  |     body.forLoop( iBegin, iEnd ); | ||
|  | 
 | ||
|  | #endif// #if BT_THREADSAFE
 | ||
|  | } | ||
|  | 
 | ||
|  | 
 | ||
|  | ///
 | ||
|  | /// btTaskSchedulerSequential -- non-threaded implementation of task scheduler
 | ||
|  | ///                              (really just useful for testing performance of single threaded vs multi)
 | ||
|  | ///
 | ||
|  | class btTaskSchedulerSequential : public btITaskScheduler | ||
|  | { | ||
|  | public: | ||
|  |     btTaskSchedulerSequential() : btITaskScheduler( "Sequential" ) {} | ||
|  |     virtual int getMaxNumThreads() const BT_OVERRIDE { return 1; } | ||
|  |     virtual int getNumThreads() const BT_OVERRIDE { return 1; } | ||
|  |     virtual void setNumThreads( int numThreads ) BT_OVERRIDE {} | ||
|  |     virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE | ||
|  |     { | ||
|  |         BT_PROFILE( "parallelFor_sequential" ); | ||
|  |         body.forLoop( iBegin, iEnd ); | ||
|  |     } | ||
|  | }; | ||
|  | 
 | ||
|  | 
 | ||
|  | #if BT_USE_OPENMP && BT_THREADSAFE
 | ||
|  | ///
 | ||
|  | /// btTaskSchedulerOpenMP -- wrapper around OpenMP task scheduler
 | ||
|  | ///
 | ||
|  | class btTaskSchedulerOpenMP : public btITaskScheduler | ||
|  | { | ||
|  |     int m_numThreads; | ||
|  | public: | ||
|  |     btTaskSchedulerOpenMP() : btITaskScheduler( "OpenMP" ) | ||
|  |     { | ||
|  |         m_numThreads = 0; | ||
|  |     } | ||
|  |     virtual int getMaxNumThreads() const BT_OVERRIDE | ||
|  |     { | ||
|  |         return omp_get_max_threads(); | ||
|  |     } | ||
|  |     virtual int getNumThreads() const BT_OVERRIDE | ||
|  |     { | ||
|  |         return m_numThreads; | ||
|  |     } | ||
|  |     virtual void setNumThreads( int numThreads ) BT_OVERRIDE | ||
|  |     { | ||
|  |         // With OpenMP, because it is a standard with various implementations, we can't
 | ||
|  |         // know for sure if every implementation has the same behavior of destroying all
 | ||
|  |         // previous threads when resizing the threadpool
 | ||
|  |         m_numThreads = ( std::max )( 1, ( std::min )( int( BT_MAX_THREAD_COUNT ), numThreads ) ); | ||
|  |         omp_set_num_threads( 1 );  // hopefully, all previous threads get destroyed here
 | ||
|  |         omp_set_num_threads( m_numThreads ); | ||
|  |         m_savedThreadCounter = 0; | ||
|  |         if ( m_isActive ) | ||
|  |         { | ||
|  |             btResetThreadIndexCounter(); | ||
|  |         } | ||
|  |     } | ||
|  |     virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE | ||
|  |     { | ||
|  |         BT_PROFILE( "parallelFor_OpenMP" ); | ||
|  |         btPushThreadsAreRunning(); | ||
|  | #pragma omp parallel for schedule( static, 1 )
 | ||
|  |         for ( int i = iBegin; i < iEnd; i += grainSize ) | ||
|  |         { | ||
|  |             BT_PROFILE( "OpenMP_job" ); | ||
|  |             body.forLoop( i, ( std::min )( i + grainSize, iEnd ) ); | ||
|  |         } | ||
|  |         btPopThreadsAreRunning(); | ||
|  |     } | ||
|  | }; | ||
|  | #endif // #if BT_USE_OPENMP && BT_THREADSAFE
 | ||
|  | 
 | ||
|  | 
 | ||
|  | #if BT_USE_TBB && BT_THREADSAFE
 | ||
|  | ///
 | ||
|  | /// btTaskSchedulerTBB -- wrapper around Intel Threaded Building Blocks task scheduler
 | ||
|  | ///
 | ||
|  | class btTaskSchedulerTBB : public btITaskScheduler | ||
|  | { | ||
|  |     int m_numThreads; | ||
|  |     tbb::task_scheduler_init* m_tbbSchedulerInit; | ||
|  | 
 | ||
|  | public: | ||
|  |     btTaskSchedulerTBB() : btITaskScheduler( "IntelTBB" ) | ||
|  |     { | ||
|  |         m_numThreads = 0; | ||
|  |         m_tbbSchedulerInit = NULL; | ||
|  |     } | ||
|  |     ~btTaskSchedulerTBB() | ||
|  |     { | ||
|  |         if ( m_tbbSchedulerInit ) | ||
|  |         { | ||
|  |             delete m_tbbSchedulerInit; | ||
|  |             m_tbbSchedulerInit = NULL; | ||
|  |         } | ||
|  |     } | ||
|  | 
 | ||
|  |     virtual int getMaxNumThreads() const BT_OVERRIDE | ||
|  |     { | ||
|  |         return tbb::task_scheduler_init::default_num_threads(); | ||
|  |     } | ||
|  |     virtual int getNumThreads() const BT_OVERRIDE | ||
|  |     { | ||
|  |         return m_numThreads; | ||
|  |     } | ||
|  |     virtual void setNumThreads( int numThreads ) BT_OVERRIDE | ||
|  |     { | ||
|  |         m_numThreads = ( std::max )( 1, ( std::min )( int(BT_MAX_THREAD_COUNT), numThreads ) ); | ||
|  |         if ( m_tbbSchedulerInit ) | ||
|  |         { | ||
|  |             // destroys all previous threads
 | ||
|  |             delete m_tbbSchedulerInit; | ||
|  |             m_tbbSchedulerInit = NULL; | ||
|  |         } | ||
|  |         m_tbbSchedulerInit = new tbb::task_scheduler_init( m_numThreads ); | ||
|  |         m_savedThreadCounter = 0; | ||
|  |         if ( m_isActive ) | ||
|  |         { | ||
|  |             btResetThreadIndexCounter(); | ||
|  |         } | ||
|  |     } | ||
|  |     struct BodyAdapter | ||
|  |     { | ||
|  |         const btIParallelForBody* mBody; | ||
|  | 
 | ||
|  |         void operator()( const tbb::blocked_range<int>& range ) const | ||
|  |         { | ||
|  |             BT_PROFILE( "TBB_job" ); | ||
|  |             mBody->forLoop( range.begin(), range.end() ); | ||
|  |         } | ||
|  |     }; | ||
|  |     virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE | ||
|  |     { | ||
|  |         BT_PROFILE( "parallelFor_TBB" ); | ||
|  |         // TBB dispatch
 | ||
|  |         BodyAdapter tbbBody; | ||
|  |         tbbBody.mBody = &body; | ||
|  |         btPushThreadsAreRunning(); | ||
|  |         tbb::parallel_for( tbb::blocked_range<int>( iBegin, iEnd, grainSize ), | ||
|  |             tbbBody, | ||
|  |             tbb::simple_partitioner() | ||
|  |         ); | ||
|  |         btPopThreadsAreRunning(); | ||
|  |     } | ||
|  | }; | ||
|  | #endif // #if BT_USE_TBB && BT_THREADSAFE
 | ||
|  | 
 | ||
|  | 
 | ||
|  | #if BT_USE_PPL && BT_THREADSAFE
 | ||
|  | ///
 | ||
|  | /// btTaskSchedulerPPL -- wrapper around Microsoft Parallel Patterns Lib task scheduler
 | ||
|  | ///
 | ||
|  | class btTaskSchedulerPPL : public btITaskScheduler | ||
|  | { | ||
|  |     int m_numThreads; | ||
|  | public: | ||
|  |     btTaskSchedulerPPL() : btITaskScheduler( "PPL" ) | ||
|  |     { | ||
|  |         m_numThreads = 0; | ||
|  |     } | ||
|  |     virtual int getMaxNumThreads() const BT_OVERRIDE | ||
|  |     { | ||
|  |         return concurrency::GetProcessorCount(); | ||
|  |     } | ||
|  |     virtual int getNumThreads() const BT_OVERRIDE | ||
|  |     { | ||
|  |         return m_numThreads; | ||
|  |     } | ||
|  |     virtual void setNumThreads( int numThreads ) BT_OVERRIDE | ||
|  |     { | ||
|  |         // capping the thread count for PPL due to a thread-index issue
 | ||
|  |         const int maxThreadCount = (std::min)(int(BT_MAX_THREAD_COUNT), 31); | ||
|  |         m_numThreads = ( std::max )( 1, ( std::min )( maxThreadCount, numThreads ) ); | ||
|  |         using namespace concurrency; | ||
|  |         if ( CurrentScheduler::Id() != -1 ) | ||
|  |         { | ||
|  |             CurrentScheduler::Detach(); | ||
|  |         } | ||
|  |         SchedulerPolicy policy; | ||
|  |         { | ||
|  |             // PPL seems to destroy threads when threadpool is shrunk, but keeps reusing old threads
 | ||
|  |             // force it to destroy old threads
 | ||
|  |             policy.SetConcurrencyLimits( 1, 1 ); | ||
|  |             CurrentScheduler::Create( policy ); | ||
|  |             CurrentScheduler::Detach(); | ||
|  |         } | ||
|  |         policy.SetConcurrencyLimits( m_numThreads, m_numThreads ); | ||
|  |         CurrentScheduler::Create( policy ); | ||
|  |         m_savedThreadCounter = 0; | ||
|  |         if ( m_isActive ) | ||
|  |         { | ||
|  |             btResetThreadIndexCounter(); | ||
|  |         } | ||
|  |     } | ||
|  |     struct BodyAdapter | ||
|  |     { | ||
|  |         const btIParallelForBody* mBody; | ||
|  |         int mGrainSize; | ||
|  |         int mIndexEnd; | ||
|  | 
 | ||
|  |         void operator()( int i ) const | ||
|  |         { | ||
|  |             BT_PROFILE( "PPL_job" ); | ||
|  |             mBody->forLoop( i, ( std::min )( i + mGrainSize, mIndexEnd ) ); | ||
|  |         } | ||
|  |     }; | ||
|  |     virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE | ||
|  |     { | ||
|  |         BT_PROFILE( "parallelFor_PPL" ); | ||
|  |         // PPL dispatch
 | ||
|  |         BodyAdapter pplBody; | ||
|  |         pplBody.mBody = &body; | ||
|  |         pplBody.mGrainSize = grainSize; | ||
|  |         pplBody.mIndexEnd = iEnd; | ||
|  |         btPushThreadsAreRunning(); | ||
|  |         // note: MSVC 2010 doesn't support partitioner args, so avoid them
 | ||
|  |         concurrency::parallel_for( iBegin, | ||
|  |             iEnd, | ||
|  |             grainSize, | ||
|  |             pplBody | ||
|  |         ); | ||
|  |         btPopThreadsAreRunning(); | ||
|  |     } | ||
|  | }; | ||
|  | #endif // #if BT_USE_PPL && BT_THREADSAFE
 | ||
|  | 
 | ||
|  | 
 | ||
|  | // create a non-threaded task scheduler (always available)
 | ||
|  | btITaskScheduler* btGetSequentialTaskScheduler() | ||
|  | { | ||
|  |     static btTaskSchedulerSequential sTaskScheduler; | ||
|  |     return &sTaskScheduler; | ||
|  | } | ||
|  | 
 | ||
|  | 
 | ||
|  | // create an OpenMP task scheduler (if available, otherwise returns null)
 | ||
|  | btITaskScheduler* btGetOpenMPTaskScheduler() | ||
|  | { | ||
|  | #if BT_USE_OPENMP && BT_THREADSAFE
 | ||
|  |     static btTaskSchedulerOpenMP sTaskScheduler; | ||
|  |     return &sTaskScheduler; | ||
|  | #else
 | ||
|  |     return NULL; | ||
|  | #endif
 | ||
|  | } | ||
|  | 
 | ||
|  | 
 | ||
|  | // create an Intel TBB task scheduler (if available, otherwise returns null)
 | ||
|  | btITaskScheduler* btGetTBBTaskScheduler() | ||
|  | { | ||
|  | #if BT_USE_TBB && BT_THREADSAFE
 | ||
|  |     static btTaskSchedulerTBB sTaskScheduler; | ||
|  |     return &sTaskScheduler; | ||
|  | #else
 | ||
|  |     return NULL; | ||
|  | #endif
 | ||
|  | } | ||
|  | 
 | ||
|  | 
 | ||
|  | // create a PPL task scheduler (if available, otherwise returns null)
 | ||
|  | btITaskScheduler* btGetPPLTaskScheduler() | ||
|  | { | ||
|  | #if BT_USE_PPL && BT_THREADSAFE
 | ||
|  |     static btTaskSchedulerPPL sTaskScheduler; | ||
|  |     return &sTaskScheduler; | ||
|  | #else
 | ||
|  |     return NULL; | ||
|  | #endif
 | ||
|  | } | ||
|  | 
 |