Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/numa threads #75

Open
wants to merge 6 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions bladebit.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,7 @@
<ClInclude Include="src\Types.h"/>
<ClInclude Include="src\Util.h"/>
<ClInclude Include="src\util\Log.h"/>
<ClInclude Include="src\Version.h"/>
<ClInclude Include="src\View.h"/>
</ItemGroup>
<ItemGroup>
Expand Down Expand Up @@ -344,6 +345,12 @@
<ExcludedFromBuild Condition="'$(Configuration)'=='debug.arm'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)'=='release.arm'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="src\test\test_numa_sort.cpp">
<ExcludedFromBuild Condition="'$(Configuration)'=='release'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)'=='debug'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)'=='debug.arm'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)'=='release.arm'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="src\threading\Semaphore.cpp"/>
<ClCompile Include="src\threading\Thread.cpp"/>
<ClCompile Include="src\threading\ThreadPool.cpp"/>
Expand Down Expand Up @@ -455,6 +462,7 @@
<ClInclude Include="src/Types.h"/>
<ClInclude Include="src/Util.h"/>
<ClInclude Include="src/util/Log.h"/>
<ClInclude Include="src/Version.h"/>
<ClInclude Include="src/View.h"/>
</ItemGroup>

Expand Down
9 changes: 9 additions & 0 deletions bladebit.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,9 @@
<ClInclude Include="src\util\Log.h">
<Filter>src\util</Filter>
</ClInclude>
<ClInclude Include="src\Version.h">
<Filter>src</Filter>
</ClInclude>
<ClInclude Include="src\View.h">
<Filter>src</Filter>
</ClInclude>
Expand Down Expand Up @@ -462,6 +465,9 @@
<ClInclude Include="src/util/Log.h">
<Filter>src/util</Filter>
</ClInclude>
<ClInclude Include="src/Version.h">
<Filter>src</Filter>
</ClInclude>
<ClInclude Include="src/View.h">
<Filter>src</Filter>
</ClInclude>
Expand Down Expand Up @@ -557,6 +563,9 @@
<ClCompile Include="src\test\test_main.cpp">
<Filter>src\test</Filter>
</ClCompile>
<ClCompile Include="src\test\test_numa_sort.cpp">
<Filter>src\test</Filter>
</ClCompile>
<ClCompile Include="src\threading\Semaphore.cpp">
<Filter>src\threading</Filter>
</ClCompile>
Expand Down
1 change: 1 addition & 0 deletions project.js
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ bladebit.configs.test = () => config({
,src: [

'src/test/test_main.cpp'
,'src/test/test_numa_sort.cpp'
// ,'src/test/TestNuma.cpp'
]

Expand Down
4 changes: 4 additions & 0 deletions src/Globals.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,10 @@ struct Span
: values( values )
, length( length )
{}

inline T& operator[]( unsigned int index ) const { return this->values[index]; }
inline T& operator[]( int index ) const { return this->values[index]; }

};

typedef Span<unsigned char> ByteSpan;
13 changes: 10 additions & 3 deletions src/SysHost.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,10 @@ ImplementFlagOps( VProtect );

struct NumaInfo
{
uint nodeCount; // How many NUMA nodes in the system
uint cpuCount; // Total cpu count used by nodes
Span<uint>* cpuIds; // CPU ids of each node
uint nodeCount; // How many NUMA nodes in the system
uint cpuCount; // Total cpu count used by nodes
Span<uint>* cpuIds; // CPU ids of each node
byte* cpuToNodeMap; // Gets the node a CPU belongs to for a given cpu id.
};

class SysHost
Expand Down Expand Up @@ -69,4 +70,10 @@ class SysHost
/// Set interleave NUMA mode for the specified memory regions.
/// NOTE: Pages must not yet be faulted.
static bool NumaSetMemoryInterleavedMode( void* ptr, size_t size );

/// Get the node a memory page belongs to.
/// Returns a negative value upon failure.
/// NOTE: Pages must first be faulted on linuz.
static int NumaGetNodeFromPage( void* ptr );

};
37 changes: 28 additions & 9 deletions src/algorithm/RadixSort.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,19 @@ class RadixSort256
template<uint32 ThreadCount, typename T1, typename TK>
static void SortWithKey( ThreadPool& pool, T1* input, T1* tmp, TK* keyInput, TK* keyTmp, uint64 length );

template<uint32 ThreadCount>
static void SortY( ThreadPool& pool, uint64* input, uint64* tmp, uint64 length );

template<uint32 ThreadCount>
static void SortYWithKey( ThreadPool& pool, uint64* input, uint64* tmp, uint32* keyInput, uint32* keyTmp, uint64 length );

private:

template<uint32 ThreadCount, SortMode Mode, typename T1, typename TK>
template<uint32 ThreadCount, SortMode Mode, typename T1, typename TK, int MaxIter = sizeof( T1 )>
static void DoSort( ThreadPool& pool, T1* input, T1* tmp, TK* keyInput, TK* keyTmp, uint64 length );

template<typename T1, typename T2, bool IsKeyed>
template<typename T1, typename T2, bool IsKeyed, int MaxIter = 0>
static void RadixSortThread( SortJob<T1,T2>* job );

};


Expand All @@ -69,7 +74,21 @@ inline void RadixSort256::SortWithKey( ThreadPool& pool, T1* input, T1* tmp, TK*
}

//-----------------------------------------------------------
template<uint32 ThreadCount, RadixSort256::SortMode Mode, typename T1, typename TK>
template<uint32 ThreadCount>
inline void RadixSort256::SortY( ThreadPool& pool, uint64* input, uint64* tmp, uint64 length )
{
DoSort<ThreadCount, ModeSingle, uint64, void, 5>( pool, input, tmp, nullptr, nullptr, length );
}

//-----------------------------------------------------------
template<uint32 ThreadCount>
inline void RadixSort256::SortYWithKey( ThreadPool& pool, uint64* input, uint64* tmp, uint32* keyInput, uint32* keyTmp, uint64 length )
{
DoSort<ThreadCount, SortAndGenKey, uint64, uint32, 5>( pool, input, tmp, keyInput, keyTmp, length );
}

//-----------------------------------------------------------
template<uint32 ThreadCount, RadixSort256::SortMode Mode, typename T1, typename TK, int MaxIter>
void inline RadixSort256::DoSort( ThreadPool& pool, T1* input, T1* tmp, TK* keyInput, TK* keyTmp, uint64 length )
{
const uint threadCount = ThreadCount > pool.ThreadCount() ? pool.ThreadCount() : ThreadCount;
Expand Down Expand Up @@ -110,22 +129,22 @@ void inline RadixSort256::DoSort( ThreadPool& pool, T1* input, T1* tmp, TK* keyI
jobs[threadCount-1].length += trailingEntries;

if constexpr ( Mode == SortAndGenKey )
pool.RunJob( RadixSortThread<T1, TK, true>, jobs, threadCount );
pool.RunJob( RadixSortThread<T1, TK, true, MaxIter>, jobs, threadCount );
else
pool.RunJob( RadixSortThread<T1, TK, false>, jobs, threadCount );
pool.RunJob( RadixSortThread<T1, TK, false, MaxIter>, jobs, threadCount );
}

#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-but-set-variable"

//-----------------------------------------------------------
template<typename T1, typename T2, bool IsKeyed>
template<typename T1, typename T2, bool IsKeyed, int MaxIter>
void RadixSort256::RadixSortThread( SortJob<T1, T2>* job )
{
constexpr uint Radix = 256;

const uint32 iterations = sizeof( T1 );
const uint32 shiftBase = 8;
constexpr uint32 iterations = MaxIter > 0 ? MaxIter : sizeof( T1 );
const uint32 shiftBase = 8;

uint32 shift = 0;

Expand Down
2 changes: 1 addition & 1 deletion src/memplot/FxSort.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ inline void SortFx(
GenSortKey<MAX_JOBS>( pool, length, sortKey );

// Sort on y along with the sort key
RadixSort256::SortWithKey<MAX_JOBS>( pool,
RadixSort256::SortYWithKey<MAX_JOBS>( pool,
yBuffer, yTmp,
sortKey, sortKeyTmp,
length
Expand Down
Loading