Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 37 additions & 30 deletions src/coreclr/debug/daccess/daccess.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1544,24 +1544,24 @@ DAC_INSTANCE*
DacInstanceManager::Add(DAC_INSTANCE* inst)
{
_ASSERTE(inst != NULL);
#ifdef _DEBUG
bool isInserted = (m_hash.find(inst->addr) == m_hash.end());
#endif //_DEBUG
DAC_INSTANCE *(&target) = m_hash[inst->addr];
_ASSERTE(!isInserted || target == NULL);
if( target != NULL )
const KeyValuePair<TADDR, DAC_INSTANCE*>* pEntry = m_hash.LookupPtr(inst->addr);
if (pEntry != NULL)
{
DAC_INSTANCE* existing = pEntry->Value();

//This is necessary to preserve the semantics of Supersede, however, it
//is more or less dead code.
inst->next = target;
target = inst;
inst->next = existing;

//verify descending order
_ASSERTE(inst->size >= target->size);
_ASSERTE(inst->size >= existing->size);

m_hash.ReplacePtr(pEntry, KeyValuePair<TADDR, DAC_INSTANCE*>(inst->addr, inst));
}
else
{
target = inst;
inst->next = NULL;
m_hash.Add(KeyValuePair<TADDR, DAC_INSTANCE*>(inst->addr, inst));
}

return inst;
Expand Down Expand Up @@ -1803,15 +1803,13 @@ DacInstanceManager::Find(TADDR addr)
DAC_INSTANCE*
DacInstanceManager::Find(TADDR addr)
{
DacInstanceHashIterator iter = m_hash.find(addr);
if( iter == m_hash.end() )
{
return NULL;
}
else
const KeyValuePair<TADDR, DAC_INSTANCE*>* pEntry = m_hash.LookupPtr(addr);
if (pEntry != NULL)
{
return iter->second;
return pEntry->Value();
}

return NULL;
}
#endif // if defined(DAC_HASHTABLE)

Expand Down Expand Up @@ -1887,12 +1885,11 @@ DacInstanceManager::Supersede(DAC_INSTANCE* inst)
// later cleanup.
//

DacInstanceHashIterator iter = m_hash.find(inst->addr);
if( iter == m_hash.end() )
const KeyValuePair<TADDR, DAC_INSTANCE*>* pEntry = m_hash.LookupPtr(inst->addr);
if (pEntry == NULL)
return;

DAC_INSTANCE** bucket = &(iter->second);
DAC_INSTANCE* cur = *bucket;
DAC_INSTANCE* cur = pEntry->Value();
DAC_INSTANCE* prev = NULL;
//walk through the chain looking for this particular instance
while (cur)
Expand All @@ -1901,7 +1898,15 @@ DacInstanceManager::Supersede(DAC_INSTANCE* inst)
{
if (!prev)
{
*bucket = inst->next;
// Removing the head of the chain.
if (inst->next != NULL)
{
m_hash.ReplacePtr(pEntry, KeyValuePair<TADDR, DAC_INSTANCE*>(inst->addr, inst->next));
}
else
{
m_hash.Remove(inst->addr);
}
}
else
{
Expand Down Expand Up @@ -1982,7 +1987,7 @@ void DacInstanceManager::Flush(bool fSaveBlock)
}
}
#else //DAC_HASHTABLE
m_hash.clear();
m_hash.RemoveAll();
#endif //DAC_HASHTABLE

InitEmpty();
Expand Down Expand Up @@ -2021,18 +2026,17 @@ DacInstanceManager::ClearEnumMemMarker(void)
void
DacInstanceManager::ClearEnumMemMarker(void)
{
ULONG i;
DAC_INSTANCE* inst;

DacInstanceHashIterator end = m_hash.end();
DacInstanceHashIterator end = m_hash.End();
/* REVISIT_TODO Fri 10/20/2006
* This might have an issue, since it might miss chained entries off of
* ->next. However, ->next is going away, and for all intents and
* purposes, this never happens.
*/
for( DacInstanceHashIterator cur = m_hash.begin(); cur != end; ++cur )
for (DacInstanceHashIterator cur = m_hash.Begin(); cur != end; ++cur)
{
cur->second->enumMem = 0;
cur->Value()->enumMem = 0;
}

for (inst = m_superseded; inst; inst = inst->next)
Expand Down Expand Up @@ -2144,10 +2148,10 @@ DacInstanceManager::DumpAllInstances(
int numInBucket = 0;
#endif // #if defined(DAC_MEASURE_PERF)

DacInstanceHashIterator end = m_hash.end();
for (DacInstanceHashIterator cur = m_hash.begin(); end != cur; ++cur)
DacInstanceHashIterator end = m_hash.End();
for (DacInstanceHashIterator cur = m_hash.Begin(); end != cur; ++cur)
{
inst = cur->second;
inst = cur->Value();

// Only report those we intended to.
// So far, only metadata is excluded!
Expand Down Expand Up @@ -3270,6 +3274,9 @@ ClrDataAccess::Flush(void)
//
m_mdImports.Flush();

// Free cached patch entries for thread unwinding
m_patchCache.Flush();

// Free instance memory.
m_instances.Flush();

Expand Down
71 changes: 48 additions & 23 deletions src/coreclr/debug/daccess/dacfn.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1505,43 +1505,68 @@ HRESULT DacReplacePatchesInHostMemory(MemoryRange range, PVOID pBuffer)
return S_OK;
}

if (g_dacImpl == nullptr)
{
return E_UNEXPECTED;
}

// Cache the patches as the target is not running during DAC operations, and hash
// table iteration is pretty slow.
const SArray<DacPatchCacheEntry>& entries = g_dacImpl->m_patchCache.GetEntries();

CORDB_ADDRESS address = (CORDB_ADDRESS)(dac_cast<TADDR>(range.StartAddress()));
SIZE_T cbSize = range.Size();

for (COUNT_T i = 0; i < entries.GetCount(); i++)
{
const DacPatchCacheEntry& entry = entries[i];

if (IsPatchInRequestedRange(address, cbSize, entry.address))
{
CORDbgSetInstructionEx(reinterpret_cast<PBYTE>(pBuffer), address, entry.address, entry.opcode, cbSize);
}
}

return S_OK;
}

const SArray<DacPatchCacheEntry>& DacPatchCache::GetEntries()
{
Populate();
return m_entries;
}

void DacPatchCache::Populate()
{
SUPPORTS_DAC;

if (m_isPopulated)
{
return;
}

Comment thread
hoyosjs marked this conversation as resolved.
// Clear any stale entries from previous failed population attempts
m_entries.Clear();

HASHFIND info;

DebuggerPatchTable * pTable = DebuggerController::GetPatchTable();
DebuggerControllerPatch * pPatch = pTable->GetFirstPatch(&info);

// <PERF>
// The unwinder needs to read the stack very often to restore pushed registers, retrieve the
// return addres, etc. However, stack addresses should never be patched.
// One way to optimize this code is to pass the stack base and the stack limit of the thread to this
// function and use those two values to filter out stack addresses.
//
// Another thing we can do is instead of enumerating the patches, we could enumerate the address.
// This is more efficient when we have a large number of patches and a small memory range. Perhaps
// we could do a hybrid approach, i.e. use the size of the range and the number of patches to dynamically
// determine which enumeration is more efficient.
// </PERF>
while (pPatch != NULL)
{
CORDB_ADDRESS patchAddress = (CORDB_ADDRESS)dac_cast<TADDR>(pPatch->address);

if (patchAddress != NULL)
{
PRD_TYPE opcode = pPatch->opcode;

CORDB_ADDRESS address = (CORDB_ADDRESS)(dac_cast<TADDR>(range.StartAddress()));
SIZE_T cbSize = range.Size();

// Check if the address of the patch is in the specified memory range.
if (IsPatchInRequestedRange(address, cbSize, patchAddress))
{
// Replace the patch in the buffer with the original opcode.
CORDbgSetInstructionEx(reinterpret_cast<PBYTE>(pBuffer), address, patchAddress, opcode, cbSize);
}
DacPatchCacheEntry entry;
entry.address = patchAddress;
entry.opcode = pPatch->opcode;
m_entries.Append(entry);
}

pPatch = pTable->GetNextPatch(&info);
}

return S_OK;
m_isPopulated = true;
}
114 changes: 62 additions & 52 deletions src/coreclr/debug/daccess/dacimpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,8 @@
#include "gcinterface.dac.h"
//---------------------------------------------------------------------------------------
// Setting DAC_HASHTABLE tells the DAC to use the hand rolled hashtable for
// storing code:DAC_INSTANCE . Otherwise, the DAC uses the STL unordered_map to.
// storing code:DAC_INSTANCE . Otherwise, the DAC uses SHash.

#define DAC_HASHTABLE

#ifndef DAC_HASHTABLE
#pragma push_macro("return")
#undef return
#include <unordered_map>
#pragma pop_macro("return")
#endif //DAC_HASHTABLE
extern CRITICAL_SECTION g_dacCritSec;

// Convert between CLRDATA_ADDRESS and TADDR.
Expand Down Expand Up @@ -732,54 +724,28 @@ class DacInstanceManager
HashInstanceKeyBlock* m_hash[DAC_INSTANCE_HASH_SIZE];
#else //DAC_HASHTABLE

// We're going to use the STL unordered_map for our instance hash.
// This has the benefit of scaling to different workloads appropriately (as opposed to having a
// fixed number of buckets).

class DacHashCompare : public std::hash_compare<TADDR>
// SHash-based hash table for DAC instances, keyed by target address.
// The custom hash function avoids pseudo-randomizing in favor of a simple
// shift that clusters nearby addresses together. This improves access
// locality and has a significant positive impact on minidump library
// performance when enumerating entries during dump generation (as dbghelp
// has to coalesce adjacent memory regions).
class DacInstanceSHashTraits : public NonDacAwareSHashTraits<MapSHashTraits<TADDR, DAC_INSTANCE*>>
{
public:
// Custom hash function
// The default hash function uses a pseudo-randomizing function to get a random
// distribution. In our case, we'd actually like a more even distribution to get
// better access locality (see comments for DAC_INSTANCE_HASH_BITS).
//
// Also, when enumerating the hash during dump generation, clustering nearby addresses
// together can have a significant positive impact on the performance of the minidump
// library (at least the un-optimized version 6.5 linked into our dw20.exe - on Vista+
// we use the OS's version 6.7+ with radically improved perf characteristics). Having
// a random distribution is actually the worst-case because it means most blocks won't
// be merged until near the end, and a large number of intermediate blocks will have to
// be searched with each call.
//
// The default pseudo-randomizing function also requires a call to ldiv which shows up as
// a 3%-5% perf hit in most perf-sensitive scenarios, so this should also always be
// faster.
inline size_t operator()(const TADDR& keyval) const
typedef MapSHashTraits<TADDR, DAC_INSTANCE*> PARENT;
typedef PARENT::element_t element_t;
typedef PARENT::count_t count_t;
typedef PARENT::key_t key_t;

static count_t Hash(key_t k)
{
return (unsigned)(keyval >>DAC_INSTANCE_HASH_SHIFT);
return (count_t)(k >> DAC_INSTANCE_HASH_SHIFT);
}

// Explicitly bring in the two-argument comparison function from the base class (just less-than)
// This is necessary because once we override one form of operator() above, we don't automatically
// get the others by C++ inheritance rules.
using std::hash_compare<TADDR>::operator();

#ifdef NIDUMP_CUSTOMIZED_DAC_HASH // not set
//this particular number is supposed to be roughly the same amount of
//memory as the old code (buckets * number of entries in the old
//blocks.)
//disabled for now. May tweak implementation later. It turns out that
//having a large number of initial buckets is excellent for nidump, but it
// is terrible for most other scenarios due to the cost of clearing them at
// every Flush. Once there is a better perf suite, we can tweak these values more.
static const size_t min_buckets = DAC_INSTANCE_HASH_SIZE * 256;
#endif

};
typedef std::unordered_map<TADDR, DAC_INSTANCE*, DacHashCompare > DacInstanceHash;
typedef DacInstanceHash::value_type DacInstanceHashValue;
typedef DacInstanceHash::iterator DacInstanceHashIterator;

typedef SHash<DacInstanceSHashTraits> DacInstanceHash;
typedef DacInstanceHash::Iterator DacInstanceHashIterator;
DacInstanceHash m_hash;
#endif //DAC_HASHTABLE

Expand All @@ -795,6 +761,48 @@ class DacStreamManager;
#endif // FEATURE_MINIMETADATA_IN_TRIAGEDUMPS


//----------------------------------------------------------------------------
//
// DacPatchCache - Caches debugger breakpoint patches from the target process.
//
// DacReplacePatchesInHostMemory needs to iterate the target's patch hash table
// to replace breakpoint instructions with original opcodes. This iteration is
// expensive because it walks all hash buckets and entries and it will hit either a
// hash lookup in the instance cache or a cache miss + remote read. Since the target
// is not running during DAC operations, the patches should not change while we are performing
// memory enumeration, but if they do we'll miss them until the next time Flush() gets called.
//
//----------------------------------------------------------------------------

struct DacPatchCacheEntry
{
CORDB_ADDRESS address;
PRD_TYPE opcode;
};

class DacPatchCache
{
public:
DacPatchCache()
: m_isPopulated(false)
{
}

const SArray<DacPatchCacheEntry>& GetEntries();

void Flush()
{
m_entries.Clear();
m_isPopulated = false;
}

private:
void Populate();

SArray<DacPatchCacheEntry> m_entries;
bool m_isPopulated;
};

//----------------------------------------------------------------------------
//
// ClrDataAccess.
Expand Down Expand Up @@ -1408,6 +1416,8 @@ class ClrDataAccess
ULONG32 m_instanceAge;
bool m_debugMode;

DacPatchCache m_patchCache;

#ifdef FEATURE_MINIMETADATA_IN_TRIAGEDUMPS

protected:
Expand Down
9 changes: 9 additions & 0 deletions src/coreclr/debug/daccess/enummem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2000,6 +2000,15 @@ ClrDataAccess::EnumMemoryRegions(IN ICLRDataEnumMemoryRegionsCallback* callback,
{
DacLogMessage("EnumMemoryRegions(CLRDATA_ENUM_MEM_HEAP2)\n");
}
else if (g_EnableFastHeapDumps != 0)
{
// When the target process had DOTNET_EnableFastHeapDumps set,
// use HEAP2 which dumps loader heap pages in bulk instead of
// relying only on per-structure heap enumeration. This is
// significantly faster for large heap dump scenarios.
flags = CLRDATA_ENUM_MEM_HEAP2;
DacLogMessage("EnumMemoryRegions(CLRDATA_ENUM_MEM_HEAP promoted to HEAP2 via DOTNET_EnableFastHeapDumps)\n");
}
else
{
flags = CLRDATA_ENUM_MEM_HEAP;
Expand Down
Loading
Loading