723 lines
		
	
	
		
			20 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			723 lines
		
	
	
		
			20 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
/*
 | 
						|
Copyright (c) 2003-2014 Erwin Coumans  http://bullet.googlecode.com
 | 
						|
 | 
						|
This software is provided 'as-is', without any express or implied warranty.
 | 
						|
In no event will the authors be held liable for any damages arising from the use of this software.
 | 
						|
Permission is granted to anyone to use this software for any purpose, 
 | 
						|
including commercial applications, and to alter it and redistribute it freely, 
 | 
						|
subject to the following restrictions:
 | 
						|
 | 
						|
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
 | 
						|
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
 | 
						|
3. This notice may not be removed or altered from any source distribution.
 | 
						|
*/
 | 
						|
 | 
						|
 | 
						|
#include "btThreads.h"
 | 
						|
#include "btQuickprof.h"
 | 
						|
#include <algorithm>  // for min and max
 | 
						|
 | 
						|
 | 
						|
#if BT_USE_OPENMP && BT_THREADSAFE
 | 
						|
 | 
						|
#include <omp.h>
 | 
						|
 | 
						|
#endif // #if BT_USE_OPENMP && BT_THREADSAFE
 | 
						|
 | 
						|
 | 
						|
#if BT_USE_PPL && BT_THREADSAFE
 | 
						|
 | 
						|
// use Microsoft Parallel Patterns Library (installed with Visual Studio 2010 and later)
 | 
						|
#include <ppl.h>  // if you get a compile error here, check whether your version of Visual Studio includes PPL
 | 
						|
// Visual Studio 2010 and later should come with it
 | 
						|
#include <concrtrm.h>  // for GetProcessorCount()
 | 
						|
 | 
						|
#endif // #if BT_USE_PPL && BT_THREADSAFE
 | 
						|
 | 
						|
 | 
						|
#if BT_USE_TBB && BT_THREADSAFE
 | 
						|
 | 
						|
// use Intel Threading Building Blocks for thread management
 | 
						|
#define __TBB_NO_IMPLICIT_LINKAGE 1
 | 
						|
#include <tbb/tbb.h>
 | 
						|
#include <tbb/task_scheduler_init.h>
 | 
						|
#include <tbb/parallel_for.h>
 | 
						|
#include <tbb/blocked_range.h>
 | 
						|
 | 
						|
#endif // #if BT_USE_TBB && BT_THREADSAFE
 | 
						|
 | 
						|
 | 
						|
#if BT_THREADSAFE
 | 
						|
//
 | 
						|
// Lightweight spin-mutex based on atomics
 | 
						|
// Using ordinary system-provided mutexes like Windows critical sections was noticeably slower
 | 
						|
// presumably because when it fails to lock at first it would sleep the thread and trigger costly
 | 
						|
// context switching.
 | 
						|
// 
 | 
						|
 | 
						|
#if __cplusplus >= 201103L
 | 
						|
 | 
						|
// for anything claiming full C++11 compliance, use C++11 atomics
 | 
						|
// on GCC or Clang you need to compile with -std=c++11
 | 
						|
#define USE_CPP11_ATOMICS 1
 | 
						|
 | 
						|
#elif defined( _MSC_VER )
 | 
						|
 | 
						|
// on MSVC, use intrinsics instead
 | 
						|
#define USE_MSVC_INTRINSICS 1
 | 
						|
 | 
						|
#elif defined( __GNUC__ ) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7))
 | 
						|
 | 
						|
// available since GCC 4.7 and some versions of clang
 | 
						|
// todo: check for clang
 | 
						|
#define USE_GCC_BUILTIN_ATOMICS 1
 | 
						|
 | 
						|
#elif defined( __GNUC__ ) && (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)
 | 
						|
 | 
						|
// available since GCC 4.1
 | 
						|
#define USE_GCC_BUILTIN_ATOMICS_OLD 1
 | 
						|
 | 
						|
#endif
 | 
						|
 | 
						|
 | 
						|
#if USE_CPP11_ATOMICS
 | 
						|
 | 
						|
#include <atomic>
 | 
						|
#include <thread>
 | 
						|
 | 
						|
#define THREAD_LOCAL_STATIC thread_local static
 | 
						|
 | 
						|
bool btSpinMutex::tryLock()
 | 
						|
{
 | 
						|
    std::atomic<int>* aDest = reinterpret_cast<std::atomic<int>*>(&mLock);
 | 
						|
    int expected = 0;
 | 
						|
    return std::atomic_compare_exchange_weak_explicit( aDest, &expected, int(1), std::memory_order_acq_rel, std::memory_order_acquire );
 | 
						|
}
 | 
						|
 | 
						|
void btSpinMutex::lock()
 | 
						|
{
 | 
						|
    // note: this lock does not sleep the thread.
 | 
						|
    while (! tryLock())
 | 
						|
    {
 | 
						|
        // spin
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
void btSpinMutex::unlock()
 | 
						|
{
 | 
						|
    std::atomic<int>* aDest = reinterpret_cast<std::atomic<int>*>(&mLock);
 | 
						|
    std::atomic_store_explicit( aDest, int(0), std::memory_order_release );
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
#elif USE_MSVC_INTRINSICS
 | 
						|
 | 
						|
#define WIN32_LEAN_AND_MEAN
 | 
						|
 | 
						|
#include <windows.h>
 | 
						|
#include <intrin.h>
 | 
						|
 | 
						|
#define THREAD_LOCAL_STATIC __declspec( thread ) static
 | 
						|
 | 
						|
 | 
						|
bool btSpinMutex::tryLock()
 | 
						|
{
 | 
						|
    volatile long* aDest = reinterpret_cast<long*>(&mLock);
 | 
						|
    return ( 0 == _InterlockedCompareExchange( aDest, 1, 0) );
 | 
						|
}
 | 
						|
 | 
						|
void btSpinMutex::lock()
 | 
						|
{
 | 
						|
    // note: this lock does not sleep the thread
 | 
						|
    while (! tryLock())
 | 
						|
    {
 | 
						|
        // spin
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
void btSpinMutex::unlock()
 | 
						|
{
 | 
						|
    volatile long* aDest = reinterpret_cast<long*>( &mLock );
 | 
						|
    _InterlockedExchange( aDest, 0 );
 | 
						|
}
 | 
						|
 | 
						|
#elif USE_GCC_BUILTIN_ATOMICS
 | 
						|
 | 
						|
#define THREAD_LOCAL_STATIC static __thread
 | 
						|
 | 
						|
 | 
						|
bool btSpinMutex::tryLock()
 | 
						|
{
 | 
						|
    int expected = 0;
 | 
						|
    bool weak = false;
 | 
						|
    const int memOrderSuccess = __ATOMIC_ACQ_REL;
 | 
						|
    const int memOrderFail = __ATOMIC_ACQUIRE;
 | 
						|
    return __atomic_compare_exchange_n(&mLock, &expected, int(1), weak, memOrderSuccess, memOrderFail);
 | 
						|
}
 | 
						|
 | 
						|
void btSpinMutex::lock()
 | 
						|
{
 | 
						|
    // note: this lock does not sleep the thread
 | 
						|
    while (! tryLock())
 | 
						|
    {
 | 
						|
        // spin
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
void btSpinMutex::unlock()
 | 
						|
{
 | 
						|
    __atomic_store_n(&mLock, int(0), __ATOMIC_RELEASE);
 | 
						|
}
 | 
						|
 | 
						|
#elif USE_GCC_BUILTIN_ATOMICS_OLD
 | 
						|
 | 
						|
 | 
						|
#define THREAD_LOCAL_STATIC static __thread
 | 
						|
 | 
						|
bool btSpinMutex::tryLock()
 | 
						|
{
 | 
						|
    return __sync_bool_compare_and_swap(&mLock, int(0), int(1));
 | 
						|
}
 | 
						|
 | 
						|
void btSpinMutex::lock()
 | 
						|
{
 | 
						|
    // note: this lock does not sleep the thread
 | 
						|
    while (! tryLock())
 | 
						|
    {
 | 
						|
        // spin
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
void btSpinMutex::unlock()
 | 
						|
{
 | 
						|
    // write 0
 | 
						|
    __sync_fetch_and_and(&mLock, int(0));
 | 
						|
}
 | 
						|
 | 
						|
#else //#elif USE_MSVC_INTRINSICS
 | 
						|
 | 
						|
#error "no threading primitives defined -- unknown platform"
 | 
						|
 | 
						|
#endif  //#else //#elif USE_MSVC_INTRINSICS
 | 
						|
 | 
						|
#else //#if BT_THREADSAFE
 | 
						|
 | 
						|
// These should not be called ever
 | 
						|
void btSpinMutex::lock()
 | 
						|
{
 | 
						|
    btAssert( !"unimplemented btSpinMutex::lock() called" );
 | 
						|
}
 | 
						|
 | 
						|
void btSpinMutex::unlock()
 | 
						|
{
 | 
						|
    btAssert( !"unimplemented btSpinMutex::unlock() called" );
 | 
						|
}
 | 
						|
 | 
						|
bool btSpinMutex::tryLock()
 | 
						|
{
 | 
						|
    btAssert( !"unimplemented btSpinMutex::tryLock() called" );
 | 
						|
    return true;
 | 
						|
}
 | 
						|
 | 
						|
#define THREAD_LOCAL_STATIC static
 | 
						|
 | 
						|
#endif // #else //#if BT_THREADSAFE
 | 
						|
 | 
						|
 | 
						|
struct ThreadsafeCounter
 | 
						|
{
 | 
						|
    unsigned int mCounter;
 | 
						|
    btSpinMutex mMutex;
 | 
						|
 | 
						|
    ThreadsafeCounter()
 | 
						|
    {
 | 
						|
        mCounter = 0;
 | 
						|
        --mCounter; // first count should come back 0
 | 
						|
    }
 | 
						|
 | 
						|
    unsigned int getNext()
 | 
						|
    {
 | 
						|
        // no need to optimize this with atomics, it is only called ONCE per thread!
 | 
						|
        mMutex.lock();
 | 
						|
        mCounter++;
 | 
						|
        if ( mCounter >= BT_MAX_THREAD_COUNT )
 | 
						|
        {
 | 
						|
            btAssert( !"thread counter exceeded" );
 | 
						|
            // wrap back to the first worker index
 | 
						|
            mCounter = 1;
 | 
						|
        }
 | 
						|
        unsigned int val = mCounter;
 | 
						|
        mMutex.unlock();
 | 
						|
        return val;
 | 
						|
    }
 | 
						|
};
 | 
						|
 | 
						|
 | 
						|
static btITaskScheduler* gBtTaskScheduler;
 | 
						|
static int gThreadsRunningCounter = 0;  // useful for detecting if we are trying to do nested parallel-for calls
 | 
						|
static btSpinMutex gThreadsRunningCounterMutex;
 | 
						|
static ThreadsafeCounter gThreadCounter;
 | 
						|
 | 
						|
 | 
						|
//
 | 
						|
// BT_DETECT_BAD_THREAD_INDEX tries to detect when there are multiple threads assigned the same thread index.
 | 
						|
//
 | 
						|
// BT_DETECT_BAD_THREAD_INDEX is a developer option to test if
 | 
						|
// certain assumptions about how the task scheduler manages its threads
 | 
						|
// holds true.
 | 
						|
// The main assumption is:
 | 
						|
//   - when the threadpool is resized, the task scheduler either
 | 
						|
//      1. destroys all worker threads and creates all new ones in the correct number, OR
 | 
						|
//      2. never destroys a worker thread
 | 
						|
//
 | 
						|
// We make that assumption because we can't easily enumerate the worker threads of a task scheduler
 | 
						|
// to assign nice sequential thread-indexes. We also do not get notified if a worker thread is destroyed,
 | 
						|
// so we can't tell when a thread-index is no longer being used.
 | 
						|
// We allocate thread-indexes as needed with a sequential global thread counter.
 | 
						|
//
 | 
						|
// Our simple thread-counting scheme falls apart if the task scheduler destroys some threads but
 | 
						|
// continues to re-use other threads and the application repeatedly resizes the thread pool of the 
 | 
						|
// task scheduler.
 | 
						|
// In order to prevent the thread-counter from exceeding the global max (BT_MAX_THREAD_COUNT), we
 | 
						|
// wrap the thread counter back to 1. This should only happen if the worker threads have all been
 | 
						|
// destroyed and re-created.
 | 
						|
//
 | 
						|
// BT_DETECT_BAD_THREAD_INDEX only works for Win32 right now,
 | 
						|
// but could be adapted to work with pthreads
 | 
						|
#define BT_DETECT_BAD_THREAD_INDEX 0
 | 
						|
 | 
						|
#if BT_DETECT_BAD_THREAD_INDEX
 | 
						|
 | 
						|
typedef DWORD ThreadId_t;
 | 
						|
const static ThreadId_t kInvalidThreadId = 0;
 | 
						|
ThreadId_t gDebugThreadIds[ BT_MAX_THREAD_COUNT ];
 | 
						|
 | 
						|
static ThreadId_t getDebugThreadId()
 | 
						|
{
 | 
						|
    return GetCurrentThreadId();
 | 
						|
}
 | 
						|
 | 
						|
#endif // #if BT_DETECT_BAD_THREAD_INDEX
 | 
						|
 | 
						|
 | 
						|
// return a unique index per thread, main thread is 0, worker threads are in [1, BT_MAX_THREAD_COUNT)
 | 
						|
unsigned int btGetCurrentThreadIndex()
 | 
						|
{
 | 
						|
    const unsigned int kNullIndex = ~0U;
 | 
						|
    THREAD_LOCAL_STATIC unsigned int sThreadIndex = kNullIndex;
 | 
						|
    if ( sThreadIndex == kNullIndex )
 | 
						|
    {
 | 
						|
        sThreadIndex = gThreadCounter.getNext();
 | 
						|
        btAssert( sThreadIndex < BT_MAX_THREAD_COUNT );
 | 
						|
    }
 | 
						|
#if BT_DETECT_BAD_THREAD_INDEX
 | 
						|
    if ( gBtTaskScheduler && sThreadIndex > 0 )
 | 
						|
    {
 | 
						|
        ThreadId_t tid = getDebugThreadId();
 | 
						|
        // if not set
 | 
						|
        if ( gDebugThreadIds[ sThreadIndex ] == kInvalidThreadId )
 | 
						|
        {
 | 
						|
            // set it
 | 
						|
            gDebugThreadIds[ sThreadIndex ] = tid;
 | 
						|
        }
 | 
						|
        else
 | 
						|
        {
 | 
						|
            if ( gDebugThreadIds[ sThreadIndex ] != tid )
 | 
						|
            {
 | 
						|
                // this could indicate the task scheduler is breaking our assumptions about
 | 
						|
                // how threads are managed when threadpool is resized
 | 
						|
                btAssert( !"there are 2 or more threads with the same thread-index!" );
 | 
						|
                __debugbreak();
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
#endif // #if BT_DETECT_BAD_THREAD_INDEX
 | 
						|
    return sThreadIndex;
 | 
						|
}
 | 
						|
 | 
						|
bool btIsMainThread()
 | 
						|
{
 | 
						|
    return btGetCurrentThreadIndex() == 0;
 | 
						|
}
 | 
						|
 | 
						|
void btResetThreadIndexCounter()
 | 
						|
{
 | 
						|
    // for when all current worker threads are destroyed
 | 
						|
    btAssert( btIsMainThread() );
 | 
						|
    gThreadCounter.mCounter = 0;
 | 
						|
}
 | 
						|
 | 
						|
btITaskScheduler::btITaskScheduler( const char* name )
 | 
						|
{
 | 
						|
    m_name = name;
 | 
						|
    m_savedThreadCounter = 0;
 | 
						|
    m_isActive = false;
 | 
						|
}
 | 
						|
 | 
						|
void btITaskScheduler::activate()
 | 
						|
{
 | 
						|
    // gThreadCounter is used to assign a thread-index to each worker thread in a task scheduler.
 | 
						|
    // The main thread is always thread-index 0, and worker threads are numbered from 1 to 63 (BT_MAX_THREAD_COUNT-1)
 | 
						|
    // The thread-indexes need to be unique amongst the threads that can be running simultaneously.
 | 
						|
    // Since only one task scheduler can be used at a time, it is OK for a pair of threads that belong to different
 | 
						|
    // task schedulers to share the same thread index because they can't be running at the same time.
 | 
						|
    // So each task scheduler needs to keep its own thread counter value
 | 
						|
    if ( !m_isActive )
 | 
						|
    {
 | 
						|
        gThreadCounter.mCounter = m_savedThreadCounter;  // restore saved thread counter
 | 
						|
        m_isActive = true;
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
void btITaskScheduler::deactivate()
 | 
						|
{
 | 
						|
    if ( m_isActive )
 | 
						|
    {
 | 
						|
        m_savedThreadCounter = gThreadCounter.mCounter;  // save thread counter
 | 
						|
        m_isActive = false;
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
void btPushThreadsAreRunning()
 | 
						|
{
 | 
						|
    gThreadsRunningCounterMutex.lock();
 | 
						|
    gThreadsRunningCounter++;
 | 
						|
    gThreadsRunningCounterMutex.unlock();
 | 
						|
}
 | 
						|
 | 
						|
void btPopThreadsAreRunning()
 | 
						|
{
 | 
						|
    gThreadsRunningCounterMutex.lock();
 | 
						|
    gThreadsRunningCounter--;
 | 
						|
    gThreadsRunningCounterMutex.unlock();
 | 
						|
}
 | 
						|
 | 
						|
bool btThreadsAreRunning()
 | 
						|
{
 | 
						|
    return gThreadsRunningCounter != 0;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
void btSetTaskScheduler( btITaskScheduler* ts )
 | 
						|
{
 | 
						|
    int threadId = btGetCurrentThreadIndex();  // make sure we call this on main thread at least once before any workers run
 | 
						|
    if ( threadId != 0 )
 | 
						|
    {
 | 
						|
        btAssert( !"btSetTaskScheduler must be called from the main thread!" );
 | 
						|
        return;
 | 
						|
    }
 | 
						|
    if ( gBtTaskScheduler )
 | 
						|
    {
 | 
						|
        // deactivate old task scheduler
 | 
						|
        gBtTaskScheduler->deactivate();
 | 
						|
    }
 | 
						|
    gBtTaskScheduler = ts;
 | 
						|
    if ( ts )
 | 
						|
    {
 | 
						|
        // activate new task scheduler
 | 
						|
        ts->activate();
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
btITaskScheduler* btGetTaskScheduler()
 | 
						|
{
 | 
						|
    return gBtTaskScheduler;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
void btParallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body )
 | 
						|
{
 | 
						|
#if BT_THREADSAFE
 | 
						|
 | 
						|
#if BT_DETECT_BAD_THREAD_INDEX
 | 
						|
    if ( !btThreadsAreRunning() )
 | 
						|
    {
 | 
						|
        // clear out thread ids
 | 
						|
        for ( int i = 0; i < BT_MAX_THREAD_COUNT; ++i )
 | 
						|
        {
 | 
						|
            gDebugThreadIds[ i ] = kInvalidThreadId;
 | 
						|
        }
 | 
						|
    }
 | 
						|
#endif // #if BT_DETECT_BAD_THREAD_INDEX
 | 
						|
 | 
						|
    btAssert( gBtTaskScheduler != NULL );  // call btSetTaskScheduler() with a valid task scheduler first!
 | 
						|
    gBtTaskScheduler->parallelFor( iBegin, iEnd, grainSize, body );
 | 
						|
 | 
						|
#else // #if BT_THREADSAFE
 | 
						|
 | 
						|
    // non-parallel version of btParallelFor
 | 
						|
    btAssert( !"called btParallelFor in non-threadsafe build. enable BT_THREADSAFE" );
 | 
						|
    body.forLoop( iBegin, iEnd );
 | 
						|
 | 
						|
#endif// #if BT_THREADSAFE
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
///
 | 
						|
/// btTaskSchedulerSequential -- non-threaded implementation of task scheduler
 | 
						|
///                              (really just useful for testing performance of single threaded vs multi)
 | 
						|
///
 | 
						|
class btTaskSchedulerSequential : public btITaskScheduler
 | 
						|
{
 | 
						|
public:
 | 
						|
    btTaskSchedulerSequential() : btITaskScheduler( "Sequential" ) {}
 | 
						|
    virtual int getMaxNumThreads() const BT_OVERRIDE { return 1; }
 | 
						|
    virtual int getNumThreads() const BT_OVERRIDE { return 1; }
 | 
						|
    virtual void setNumThreads( int numThreads ) BT_OVERRIDE {}
 | 
						|
    virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE
 | 
						|
    {
 | 
						|
        BT_PROFILE( "parallelFor_sequential" );
 | 
						|
        body.forLoop( iBegin, iEnd );
 | 
						|
    }
 | 
						|
};
 | 
						|
 | 
						|
 | 
						|
#if BT_USE_OPENMP && BT_THREADSAFE
 | 
						|
///
 | 
						|
/// btTaskSchedulerOpenMP -- wrapper around OpenMP task scheduler
 | 
						|
///
 | 
						|
class btTaskSchedulerOpenMP : public btITaskScheduler
 | 
						|
{
 | 
						|
    int m_numThreads;
 | 
						|
public:
 | 
						|
    btTaskSchedulerOpenMP() : btITaskScheduler( "OpenMP" )
 | 
						|
    {
 | 
						|
        m_numThreads = 0;
 | 
						|
    }
 | 
						|
    virtual int getMaxNumThreads() const BT_OVERRIDE
 | 
						|
    {
 | 
						|
        return omp_get_max_threads();
 | 
						|
    }
 | 
						|
    virtual int getNumThreads() const BT_OVERRIDE
 | 
						|
    {
 | 
						|
        return m_numThreads;
 | 
						|
    }
 | 
						|
    virtual void setNumThreads( int numThreads ) BT_OVERRIDE
 | 
						|
    {
 | 
						|
        // With OpenMP, because it is a standard with various implementations, we can't
 | 
						|
        // know for sure if every implementation has the same behavior of destroying all
 | 
						|
        // previous threads when resizing the threadpool
 | 
						|
        m_numThreads = ( std::max )( 1, ( std::min )( int( BT_MAX_THREAD_COUNT ), numThreads ) );
 | 
						|
        omp_set_num_threads( 1 );  // hopefully, all previous threads get destroyed here
 | 
						|
        omp_set_num_threads( m_numThreads );
 | 
						|
        m_savedThreadCounter = 0;
 | 
						|
        if ( m_isActive )
 | 
						|
        {
 | 
						|
            btResetThreadIndexCounter();
 | 
						|
        }
 | 
						|
    }
 | 
						|
    virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE
 | 
						|
    {
 | 
						|
        BT_PROFILE( "parallelFor_OpenMP" );
 | 
						|
        btPushThreadsAreRunning();
 | 
						|
#pragma omp parallel for schedule( static, 1 )
 | 
						|
        for ( int i = iBegin; i < iEnd; i += grainSize )
 | 
						|
        {
 | 
						|
            BT_PROFILE( "OpenMP_job" );
 | 
						|
            body.forLoop( i, ( std::min )( i + grainSize, iEnd ) );
 | 
						|
        }
 | 
						|
        btPopThreadsAreRunning();
 | 
						|
    }
 | 
						|
};
 | 
						|
#endif // #if BT_USE_OPENMP && BT_THREADSAFE
 | 
						|
 | 
						|
 | 
						|
#if BT_USE_TBB && BT_THREADSAFE
 | 
						|
///
 | 
						|
/// btTaskSchedulerTBB -- wrapper around Intel Threaded Building Blocks task scheduler
 | 
						|
///
 | 
						|
class btTaskSchedulerTBB : public btITaskScheduler
 | 
						|
{
 | 
						|
    int m_numThreads;
 | 
						|
    tbb::task_scheduler_init* m_tbbSchedulerInit;
 | 
						|
 | 
						|
public:
 | 
						|
    btTaskSchedulerTBB() : btITaskScheduler( "IntelTBB" )
 | 
						|
    {
 | 
						|
        m_numThreads = 0;
 | 
						|
        m_tbbSchedulerInit = NULL;
 | 
						|
    }
 | 
						|
    ~btTaskSchedulerTBB()
 | 
						|
    {
 | 
						|
        if ( m_tbbSchedulerInit )
 | 
						|
        {
 | 
						|
            delete m_tbbSchedulerInit;
 | 
						|
            m_tbbSchedulerInit = NULL;
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    virtual int getMaxNumThreads() const BT_OVERRIDE
 | 
						|
    {
 | 
						|
        return tbb::task_scheduler_init::default_num_threads();
 | 
						|
    }
 | 
						|
    virtual int getNumThreads() const BT_OVERRIDE
 | 
						|
    {
 | 
						|
        return m_numThreads;
 | 
						|
    }
 | 
						|
    virtual void setNumThreads( int numThreads ) BT_OVERRIDE
 | 
						|
    {
 | 
						|
        m_numThreads = ( std::max )( 1, ( std::min )( int(BT_MAX_THREAD_COUNT), numThreads ) );
 | 
						|
        if ( m_tbbSchedulerInit )
 | 
						|
        {
 | 
						|
            // destroys all previous threads
 | 
						|
            delete m_tbbSchedulerInit;
 | 
						|
            m_tbbSchedulerInit = NULL;
 | 
						|
        }
 | 
						|
        m_tbbSchedulerInit = new tbb::task_scheduler_init( m_numThreads );
 | 
						|
        m_savedThreadCounter = 0;
 | 
						|
        if ( m_isActive )
 | 
						|
        {
 | 
						|
            btResetThreadIndexCounter();
 | 
						|
        }
 | 
						|
    }
 | 
						|
    struct BodyAdapter
 | 
						|
    {
 | 
						|
        const btIParallelForBody* mBody;
 | 
						|
 | 
						|
        void operator()( const tbb::blocked_range<int>& range ) const
 | 
						|
        {
 | 
						|
            BT_PROFILE( "TBB_job" );
 | 
						|
            mBody->forLoop( range.begin(), range.end() );
 | 
						|
        }
 | 
						|
    };
 | 
						|
    virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE
 | 
						|
    {
 | 
						|
        BT_PROFILE( "parallelFor_TBB" );
 | 
						|
        // TBB dispatch
 | 
						|
        BodyAdapter tbbBody;
 | 
						|
        tbbBody.mBody = &body;
 | 
						|
        btPushThreadsAreRunning();
 | 
						|
        tbb::parallel_for( tbb::blocked_range<int>( iBegin, iEnd, grainSize ),
 | 
						|
            tbbBody,
 | 
						|
            tbb::simple_partitioner()
 | 
						|
        );
 | 
						|
        btPopThreadsAreRunning();
 | 
						|
    }
 | 
						|
};
 | 
						|
#endif // #if BT_USE_TBB && BT_THREADSAFE
 | 
						|
 | 
						|
 | 
						|
#if BT_USE_PPL && BT_THREADSAFE
 | 
						|
///
 | 
						|
/// btTaskSchedulerPPL -- wrapper around Microsoft Parallel Patterns Lib task scheduler
 | 
						|
///
 | 
						|
class btTaskSchedulerPPL : public btITaskScheduler
 | 
						|
{
 | 
						|
    int m_numThreads;
 | 
						|
public:
 | 
						|
    btTaskSchedulerPPL() : btITaskScheduler( "PPL" )
 | 
						|
    {
 | 
						|
        m_numThreads = 0;
 | 
						|
    }
 | 
						|
    virtual int getMaxNumThreads() const BT_OVERRIDE
 | 
						|
    {
 | 
						|
        return concurrency::GetProcessorCount();
 | 
						|
    }
 | 
						|
    virtual int getNumThreads() const BT_OVERRIDE
 | 
						|
    {
 | 
						|
        return m_numThreads;
 | 
						|
    }
 | 
						|
    virtual void setNumThreads( int numThreads ) BT_OVERRIDE
 | 
						|
    {
 | 
						|
        // capping the thread count for PPL due to a thread-index issue
 | 
						|
        const int maxThreadCount = (std::min)(int(BT_MAX_THREAD_COUNT), 31);
 | 
						|
        m_numThreads = ( std::max )( 1, ( std::min )( maxThreadCount, numThreads ) );
 | 
						|
        using namespace concurrency;
 | 
						|
        if ( CurrentScheduler::Id() != -1 )
 | 
						|
        {
 | 
						|
            CurrentScheduler::Detach();
 | 
						|
        }
 | 
						|
        SchedulerPolicy policy;
 | 
						|
        {
 | 
						|
            // PPL seems to destroy threads when threadpool is shrunk, but keeps reusing old threads
 | 
						|
            // force it to destroy old threads
 | 
						|
            policy.SetConcurrencyLimits( 1, 1 );
 | 
						|
            CurrentScheduler::Create( policy );
 | 
						|
            CurrentScheduler::Detach();
 | 
						|
        }
 | 
						|
        policy.SetConcurrencyLimits( m_numThreads, m_numThreads );
 | 
						|
        CurrentScheduler::Create( policy );
 | 
						|
        m_savedThreadCounter = 0;
 | 
						|
        if ( m_isActive )
 | 
						|
        {
 | 
						|
            btResetThreadIndexCounter();
 | 
						|
        }
 | 
						|
    }
 | 
						|
    struct BodyAdapter
 | 
						|
    {
 | 
						|
        const btIParallelForBody* mBody;
 | 
						|
        int mGrainSize;
 | 
						|
        int mIndexEnd;
 | 
						|
 | 
						|
        void operator()( int i ) const
 | 
						|
        {
 | 
						|
            BT_PROFILE( "PPL_job" );
 | 
						|
            mBody->forLoop( i, ( std::min )( i + mGrainSize, mIndexEnd ) );
 | 
						|
        }
 | 
						|
    };
 | 
						|
    virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE
 | 
						|
    {
 | 
						|
        BT_PROFILE( "parallelFor_PPL" );
 | 
						|
        // PPL dispatch
 | 
						|
        BodyAdapter pplBody;
 | 
						|
        pplBody.mBody = &body;
 | 
						|
        pplBody.mGrainSize = grainSize;
 | 
						|
        pplBody.mIndexEnd = iEnd;
 | 
						|
        btPushThreadsAreRunning();
 | 
						|
        // note: MSVC 2010 doesn't support partitioner args, so avoid them
 | 
						|
        concurrency::parallel_for( iBegin,
 | 
						|
            iEnd,
 | 
						|
            grainSize,
 | 
						|
            pplBody
 | 
						|
        );
 | 
						|
        btPopThreadsAreRunning();
 | 
						|
    }
 | 
						|
};
 | 
						|
#endif // #if BT_USE_PPL && BT_THREADSAFE
 | 
						|
 | 
						|
 | 
						|
// create a non-threaded task scheduler (always available)
 | 
						|
btITaskScheduler* btGetSequentialTaskScheduler()
 | 
						|
{
 | 
						|
    static btTaskSchedulerSequential sTaskScheduler;
 | 
						|
    return &sTaskScheduler;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
// create an OpenMP task scheduler (if available, otherwise returns null)
 | 
						|
btITaskScheduler* btGetOpenMPTaskScheduler()
 | 
						|
{
 | 
						|
#if BT_USE_OPENMP && BT_THREADSAFE
 | 
						|
    static btTaskSchedulerOpenMP sTaskScheduler;
 | 
						|
    return &sTaskScheduler;
 | 
						|
#else
 | 
						|
    return NULL;
 | 
						|
#endif
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
// create an Intel TBB task scheduler (if available, otherwise returns null)
 | 
						|
btITaskScheduler* btGetTBBTaskScheduler()
 | 
						|
{
 | 
						|
#if BT_USE_TBB && BT_THREADSAFE
 | 
						|
    static btTaskSchedulerTBB sTaskScheduler;
 | 
						|
    return &sTaskScheduler;
 | 
						|
#else
 | 
						|
    return NULL;
 | 
						|
#endif
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
// create a PPL task scheduler (if available, otherwise returns null)
 | 
						|
btITaskScheduler* btGetPPLTaskScheduler()
 | 
						|
{
 | 
						|
#if BT_USE_PPL && BT_THREADSAFE
 | 
						|
    static btTaskSchedulerPPL sTaskScheduler;
 | 
						|
    return &sTaskScheduler;
 | 
						|
#else
 | 
						|
    return NULL;
 | 
						|
#endif
 | 
						|
}
 | 
						|
 |