dotnet · hoyosjs · May 13, 2026 · May 11, 2026 · May 11, 2026 · May 11, 2026
diff --git a/src/coreclr/debug/daccess/daccess.cpp b/src/coreclr/debug/daccess/daccess.cpp
@@ -1544,24 +1544,24 @@ DAC_INSTANCE*
 DacInstanceManager::Add(DAC_INSTANCE* inst)
 {
     _ASSERTE(inst != NULL);
-#ifdef _DEBUG
-    bool isInserted = (m_hash.find(inst->addr) == m_hash.end());
-#endif //_DEBUG
-    DAC_INSTANCE *(&target) = m_hash[inst->addr];
-    _ASSERTE(!isInserted || target == NULL);
-    if( target != NULL )
+    const KeyValuePair<TADDR, DAC_INSTANCE*>* pEntry = m_hash.LookupPtr(inst->addr);
+    if (pEntry != NULL)
     {
+        DAC_INSTANCE* existing = pEntry->Value();
+
         //This is necessary to preserve the semantics of Supersede, however, it
         //is more or less dead code.
-        inst->next = target;
-        target = inst;
+        inst->next = existing;
 
         //verify descending order
-        _ASSERTE(inst->size >= target->size);
+        _ASSERTE(inst->size >= existing->size);
+
+        m_hash.ReplacePtr(pEntry, KeyValuePair<TADDR, DAC_INSTANCE*>(inst->addr, inst));
     }
     else
     {
-        target = inst;
+        inst->next = NULL;
+        m_hash.Add(KeyValuePair<TADDR, DAC_INSTANCE*>(inst->addr, inst));
     }
 
     return inst;
@@ -1803,15 +1803,13 @@ DacInstanceManager::Find(TADDR addr)
 DAC_INSTANCE*
 DacInstanceManager::Find(TADDR addr)
 {
-    DacInstanceHashIterator iter = m_hash.find(addr);
-    if( iter == m_hash.end() )
-    {
-        return NULL;
-    }
-    else
+    const KeyValuePair<TADDR, DAC_INSTANCE*>* pEntry = m_hash.LookupPtr(addr);
+    if (pEntry != NULL)
     {
-        return iter->second;
+        return pEntry->Value();
     }
+
+    return NULL;
 }
 #endif // if defined(DAC_HASHTABLE)
 
@@ -1887,12 +1885,11 @@ DacInstanceManager::Supersede(DAC_INSTANCE* inst)
     // later cleanup.
     //
 
-    DacInstanceHashIterator iter = m_hash.find(inst->addr);
-    if( iter == m_hash.end() )
+    const KeyValuePair<TADDR, DAC_INSTANCE*>* pEntry = m_hash.LookupPtr(inst->addr);
+    if (pEntry == NULL)
         return;
 
-    DAC_INSTANCE** bucket = &(iter->second);
-    DAC_INSTANCE* cur = *bucket;
+    DAC_INSTANCE* cur = pEntry->Value();
     DAC_INSTANCE* prev = NULL;
     //walk through the chain looking for this particular instance
     while (cur)
@@ -1901,7 +1898,15 @@ DacInstanceManager::Supersede(DAC_INSTANCE* inst)
         {
             if (!prev)
             {
-                *bucket = inst->next;
+                // Removing the head of the chain.
+                if (inst->next != NULL)
+                {
+                    m_hash.ReplacePtr(pEntry, KeyValuePair<TADDR, DAC_INSTANCE*>(inst->addr, inst->next));
+                }
+                else
+                {
+                    m_hash.Remove(inst->addr);
+                }
             }
             else
             {
@@ -1982,7 +1987,7 @@ void DacInstanceManager::Flush(bool fSaveBlock)
         }
     }
 #else //DAC_HASHTABLE
-    m_hash.clear();
+    m_hash.RemoveAll();
 #endif //DAC_HASHTABLE
 
     InitEmpty();
@@ -2021,18 +2026,17 @@ DacInstanceManager::ClearEnumMemMarker(void)
 void
 DacInstanceManager::ClearEnumMemMarker(void)
 {
-    ULONG i;
     DAC_INSTANCE* inst;
 
-    DacInstanceHashIterator end = m_hash.end();
+    DacInstanceHashIterator end = m_hash.End();
     /* REVISIT_TODO Fri 10/20/2006
      * This might have an issue, since it might miss chained entries off of
      * ->next.  However, ->next is going away, and for all intents and
      *  purposes, this never happens.
 */
-    for( DacInstanceHashIterator cur = m_hash.begin(); cur != end; ++cur )
+    for (DacInstanceHashIterator cur = m_hash.Begin(); cur != end; ++cur)
     {
-        cur->second->enumMem = 0;
+        cur->Value()->enumMem = 0;
     }
 
     for (inst = m_superseded; inst; inst = inst->next)
@@ -2144,10 +2148,10 @@ DacInstanceManager::DumpAllInstances(
    int numInBucket = 0;
 #endif // #if defined(DAC_MEASURE_PERF)
 
-   DacInstanceHashIterator end = m_hash.end();
-   for (DacInstanceHashIterator cur = m_hash.begin(); end != cur; ++cur)
+   DacInstanceHashIterator end = m_hash.End();
+   for (DacInstanceHashIterator cur = m_hash.Begin(); end != cur; ++cur)
    {
-       inst = cur->second;
+       inst = cur->Value();
 
        // Only report those we intended to.
        // So far, only metadata is excluded!
@@ -3270,6 +3274,9 @@ ClrDataAccess::Flush(void)
     //
     m_mdImports.Flush();
 
+    // Free cached patch entries for thread unwinding
+    m_patchCache.Flush();
+
     // Free instance memory.
     m_instances.Flush();
 

diff --git a/src/coreclr/debug/daccess/dacfn.cpp b/src/coreclr/debug/daccess/dacfn.cpp
@@ -1505,43 +1505,68 @@ HRESULT DacReplacePatchesInHostMemory(MemoryRange range, PVOID pBuffer)
         return S_OK;
     }
 
+    if (g_dacImpl == nullptr)
+    {
+        return E_UNEXPECTED;
+    }
+
+    // Cache the patches as the target is not running during DAC operations, and hash
+    // table iteration is pretty slow.
+    const SArray<DacPatchCacheEntry>& entries = g_dacImpl->m_patchCache.GetEntries();
+
+    CORDB_ADDRESS address = (CORDB_ADDRESS)(dac_cast<TADDR>(range.StartAddress()));
+    SIZE_T        cbSize  = range.Size();
+
+    for (COUNT_T i = 0; i < entries.GetCount(); i++)
+    {
+        const DacPatchCacheEntry& entry = entries[i];
+
+        if (IsPatchInRequestedRange(address, cbSize, entry.address))
+        {
+            CORDbgSetInstructionEx(reinterpret_cast<PBYTE>(pBuffer), address, entry.address, entry.opcode, cbSize);
+        }
+    }
+
+    return S_OK;
+}
+
+const SArray<DacPatchCacheEntry>& DacPatchCache::GetEntries()
+{
+    Populate();
+    return m_entries;
+}
+
+void DacPatchCache::Populate()
+{
+    SUPPORTS_DAC;
+
+    if (m_isPopulated)
+    {
+        return;
+    }
+
+    // Clear any stale entries from previous failed population attempts
+    m_entries.Clear();
+
     HASHFIND info;
 
     DebuggerPatchTable *      pTable = DebuggerController::GetPatchTable();
     DebuggerControllerPatch * pPatch = pTable->GetFirstPatch(&info);
 
-    // <PERF>
-    // The unwinder needs to read the stack very often to restore pushed registers, retrieve the
-    // return addres, etc.  However, stack addresses should never be patched.
-    // One way to optimize this code is to pass the stack base and the stack limit of the thread to this
-    // function and use those two values to filter out stack addresses.
-    //
-    // Another thing we can do is instead of enumerating the patches, we could enumerate the address.
-    // This is more efficient when we have a large number of patches and a small memory range.  Perhaps
-    // we could do a hybrid approach, i.e. use the size of the range and the number of patches to dynamically
-    // determine which enumeration is more efficient.
-    // </PERF>
     while (pPatch != NULL)
     {
         CORDB_ADDRESS patchAddress = (CORDB_ADDRESS)dac_cast<TADDR>(pPatch->address);
 
         if (patchAddress != NULL)
         {
-            PRD_TYPE opcode = pPatch->opcode;
-
-            CORDB_ADDRESS address = (CORDB_ADDRESS)(dac_cast<TADDR>(range.StartAddress()));
-            SIZE_T        cbSize  = range.Size();
-
-            // Check if the address of the patch is in the specified memory range.
-            if (IsPatchInRequestedRange(address, cbSize, patchAddress))
-            {
-                // Replace the patch in the buffer with the original opcode.
-                CORDbgSetInstructionEx(reinterpret_cast<PBYTE>(pBuffer), address, patchAddress, opcode, cbSize);
-            }
+            DacPatchCacheEntry entry;
+            entry.address = patchAddress;
+            entry.opcode = pPatch->opcode;
+            m_entries.Append(entry);
         }
 
         pPatch = pTable->GetNextPatch(&info);
     }
 
-    return S_OK;
+    m_isPopulated = true;
 }
diff --git a/src/coreclr/debug/daccess/dacimpl.h b/src/coreclr/debug/daccess/dacimpl.h
@@ -16,16 +16,8 @@
 #include "gcinterface.dac.h"
 //---------------------------------------------------------------------------------------
 // Setting DAC_HASHTABLE tells the DAC to use the hand rolled hashtable for
-// storing code:DAC_INSTANCE .  Otherwise, the DAC uses the STL unordered_map to.
+// storing code:DAC_INSTANCE .  Otherwise, the DAC uses SHash.
 
-#define DAC_HASHTABLE
-
-#ifndef DAC_HASHTABLE
-#pragma push_macro("return")
-#undef return
-#include <unordered_map>
-#pragma pop_macro("return")
-#endif //DAC_HASHTABLE
 extern CRITICAL_SECTION g_dacCritSec;
 
 // Convert between CLRDATA_ADDRESS and TADDR.
@@ -732,54 +724,28 @@ class DacInstanceManager
     HashInstanceKeyBlock* m_hash[DAC_INSTANCE_HASH_SIZE];
 #else //DAC_HASHTABLE
 
-    // We're going to use the STL unordered_map for our instance hash.
-    // This has the benefit of scaling to different workloads appropriately (as opposed to having a
-    // fixed number of buckets).
-
-    class DacHashCompare : public std::hash_compare<TADDR>
+    // SHash-based hash table for DAC instances, keyed by target address.
+    // The custom hash function avoids pseudo-randomizing in favor of a simple
+    // shift that clusters nearby addresses together.  This improves access
+    // locality and has a significant positive impact on minidump library
+    // performance when enumerating entries during dump generation (as dbghelp
+    // has to coalesce adjacent memory regions).
+    class DacInstanceSHashTraits : public NonDacAwareSHashTraits<MapSHashTraits<TADDR, DAC_INSTANCE*>>
     {
     public:
-        // Custom hash function
-        // The default hash function uses a pseudo-randomizing function to get a random
-        // distribution.  In our case, we'd actually like a more even distribution to get
-        // better access locality (see comments for DAC_INSTANCE_HASH_BITS).
-        //
-        // Also, when enumerating the hash during dump generation, clustering nearby addresses
-        // together can have a significant positive impact on the performance of the minidump
-        // library (at least the un-optimized version 6.5 linked into our dw20.exe - on Vista+
-        // we use the OS's version 6.7+ with radically improved perf characteristics).  Having
-        // a random distribution is actually the worst-case because it means most blocks won't
-        // be merged until near the end, and a large number of intermediate blocks will have to
-        // be searched with each call.
-        //
-        // The default pseudo-randomizing function also requires a call to ldiv which shows up as
-        // a 3%-5% perf hit in most perf-sensitive scenarios, so this should also always be
-        // faster.
-        inline size_t operator()(const TADDR& keyval) const
+        typedef MapSHashTraits<TADDR, DAC_INSTANCE*> PARENT;
+        typedef PARENT::element_t element_t;
+        typedef PARENT::count_t count_t;
+        typedef PARENT::key_t key_t;
+
+        static count_t Hash(key_t k)
         {
-            return (unsigned)(keyval >>DAC_INSTANCE_HASH_SHIFT);
+            return (count_t)(k >> DAC_INSTANCE_HASH_SHIFT);
         }
-
-        // Explicitly bring in the two-argument comparison function from the base class (just less-than)
-        // This is necessary because once we override one form of operator() above, we don't automatically
-        // get the others by C++ inheritance rules.
-    using std::hash_compare<TADDR>::operator();
-
-#ifdef NIDUMP_CUSTOMIZED_DAC_HASH   // not set
-        //this particular number is supposed to be roughly the same amount of
-        //memory as the old code (buckets * number of entries in the old
-        //blocks.)
-        //disabled for now.  May tweak implementation later.  It turns out that
-        //having a large number of initial buckets is excellent for nidump, but it
-        // is terrible for most other scenarios due to the cost of clearing them at
-        // every Flush.  Once there is a better perf suite, we can tweak these values more.
-        static const size_t min_buckets = DAC_INSTANCE_HASH_SIZE * 256;
-#endif
-
     };
-    typedef std::unordered_map<TADDR, DAC_INSTANCE*, DacHashCompare > DacInstanceHash;
-    typedef DacInstanceHash::value_type DacInstanceHashValue;
-    typedef DacInstanceHash::iterator DacInstanceHashIterator;
+
+    typedef SHash<DacInstanceSHashTraits> DacInstanceHash;
+    typedef DacInstanceHash::Iterator DacInstanceHashIterator;
     DacInstanceHash m_hash;
 #endif //DAC_HASHTABLE
 
@@ -795,6 +761,48 @@ class DacStreamManager;
 #endif // FEATURE_MINIMETADATA_IN_TRIAGEDUMPS
 
 
+//----------------------------------------------------------------------------
+//
+// DacPatchCache - Caches debugger breakpoint patches from the target process.
+//
+// DacReplacePatchesInHostMemory needs to iterate the target's patch hash table
+// to replace breakpoint instructions with original opcodes. This iteration is
+// expensive because it walks all hash buckets and entries and it will hit either a
+// hash lookup in the instance cache or a cache miss + remote read. Since the target
+// is not running during DAC operations, the patches should not change while we are performing
+// memory enumeration, but if they do we'll miss them until the next time Flush() gets called.
+//
+//----------------------------------------------------------------------------
+
+struct DacPatchCacheEntry
+{
+    CORDB_ADDRESS address;
+    PRD_TYPE opcode;
+};
+
+class DacPatchCache
+{
+public:
+    DacPatchCache()
+        : m_isPopulated(false)
+    {
+    }
+
+    const SArray<DacPatchCacheEntry>& GetEntries();
+
+    void Flush()
+    {
+        m_entries.Clear();
+        m_isPopulated = false;
+    }
+
+private:
+    void Populate();
+
+    SArray<DacPatchCacheEntry> m_entries;
+    bool m_isPopulated;
+};
+
 //----------------------------------------------------------------------------
 //
 // ClrDataAccess.
@@ -1408,6 +1416,8 @@ class ClrDataAccess
     ULONG32 m_instanceAge;
     bool m_debugMode;
 
+    DacPatchCache m_patchCache;
+
 #ifdef FEATURE_MINIMETADATA_IN_TRIAGEDUMPS
 
 protected:

diff --git a/src/coreclr/debug/daccess/enummem.cpp b/src/coreclr/debug/daccess/enummem.cpp
@@ -2000,6 +2000,15 @@ ClrDataAccess::EnumMemoryRegions(IN ICLRDataEnumMemoryRegionsCallback* callback,
             {
                 DacLogMessage("EnumMemoryRegions(CLRDATA_ENUM_MEM_HEAP2)\n");
             }
+            else if (g_EnableFastHeapDumps != 0)
+            {
+                // When the target process had DOTNET_EnableFastHeapDumps set,
+                // use HEAP2 which dumps loader heap pages in bulk instead of
+                // relying only on per-structure heap enumeration. This is
+                // significantly faster for large heap dump scenarios.
+                flags = CLRDATA_ENUM_MEM_HEAP2;
+                DacLogMessage("EnumMemoryRegions(CLRDATA_ENUM_MEM_HEAP promoted to HEAP2 via DOTNET_EnableFastHeapDumps)\n");
+            }
             else
             {
                 flags = CLRDATA_ENUM_MEM_HEAP;