Workaround Metal memory leak by using HostMemory for text and audio state context.

yichunk · copybara-github · commit 41200bd5c894 · 2026-05-01T10:32:53.000-07:00
LiteRT-LM-PiperOrigin-RevId: 908794945
diff --git a/runtime/executor/BUILD b/runtime/executor/BUILD
@@ -929,9 +929,11 @@ cc_library(
         "@litert//litert/cc:litert_tensor_buffer_types",
         "//runtime/components:model_resources",
         "//runtime/engine:io_types",
+        "//runtime/util:convert_tensor_buffer",
         "//runtime/util:file_util",
         "//runtime/util:litert_status_util",
         "//runtime/util:scoped_file",
+        "//runtime/util:tensor_buffer_util",
         "@litert//tflite/types:half",
     ] + select({
         "@litert//litert:litert_link_capi_so": [
diff --git a/runtime/executor/audio_litert_compiled_model_executor.cc b/runtime/executor/audio_litert_compiled_model_executor.cc
@@ -56,9 +56,10 @@
 #include "runtime/executor/executor_settings_base.h"
 #include "runtime/executor/litert_compiled_model_executor_utils.h"
 #include "runtime/executor/llm_executor_io_types.h"
+#include "runtime/util/convert_tensor_buffer.h"
 #include "runtime/util/file_util.h"
 #include "runtime/util/scoped_file.h"
-#include "runtime/util/status_macros.h"  //NOLINT
+#include "runtime/util/tensor_buffer_util.h"
 #include "tflite/types/half.h"  // from @litert
 
 namespace litert::lm {
@@ -838,8 +839,7 @@ AudioLiteRtCompiledModelExecutor::AudioStreamingEncoder::CreateNewContext() {
       // state.
       continue;
     }
-    LITERT_ASSIGN_OR_RETURN(auto new_buffer, compiled_model_.CreateInputBuffer(
-                                                 signature.Key(), name));
+    LITERT_ASSIGN_OR_RETURN(auto new_buffer, CopyTensorBuffer(env_, buffer));
     if (name == kPrevMaskName) {
       LITERT_ASSIGN_OR_RETURN(auto prev_mask_type, buffer.TensorType());
       LITERT_ASSIGN_OR_RETURN(int prev_mask_size,
@@ -860,15 +860,13 @@ absl::StatusOr<std::unique_ptr<AudioStreamingContext>>
 AudioLiteRtCompiledModelExecutor::AudioStreamingEncoder::CloneContext() {
   absl::flat_hash_map<absl::string_view, ::litert::TensorBuffer> state_buffers;
   LITERT_ASSIGN_OR_RETURN(auto signature, compiled_model_.GetSignature(0));
-  for (auto& [name, buffer] : input_buffers_map_) {
+  for (const auto& [name, buffer] : input_buffers_map_) {
     if (name == kSegmentValuesName || name == kSegmentMaskName) {
       // Skip the segment values and mask buffers as they are not part of the
       // state.
       continue;
     }
-    LITERT_ASSIGN_OR_RETURN(auto new_buffer, compiled_model_.CreateInputBuffer(
-                                                 signature.Key(), name));
-    RETURN_IF_ERROR(CopyBuffer(buffer, new_buffer));
+    LITERT_ASSIGN_OR_RETURN(auto new_buffer, CopyTensorBuffer(env_, buffer));
     state_buffers[name] = std::move(new_buffer);
   }
   auto audio_streaming_context =
@@ -890,8 +888,28 @@ AudioLiteRtCompiledModelExecutor::AudioStreamingEncoder::RestoreContext(
       // state.
       continue;
     }
-    LITERT_ASSIGN_OR_RETURN(auto buffer_copy, buffer.Duplicate());
-    input_buffers_map_[name] = std::move(buffer_copy);
+
+    if (input_buffers_map_[name].IsMetalMemory()) {
+      // b/505373949#comment13: A temporary fix for Metal memory leak.
+      LITERT_ASSIGN_OR_RETURN(auto tensor_type, buffer.TensorType());
+      if (tensor_type.ElementType() == ElementType::Float32) {
+        LITERT_ASSIGN_OR_RETURN(auto data_span,
+                                ReferTensorBufferAsSpan<float>(buffer));
+        LITERT_RETURN_IF_ERROR(
+            input_buffers_map_[name].Write<float>(data_span));
+      } else if (tensor_type.ElementType() == ElementType::Bool) {
+        LITERT_ASSIGN_OR_RETURN(auto data_span,
+                                ReferTensorBufferAsSpan<bool>(buffer));
+        LITERT_RETURN_IF_ERROR(input_buffers_map_[name].Write<bool>(data_span));
+      } else {
+        return absl::InvalidArgumentError(
+            absl::StrCat("Unsupported element type for state buffer: ",
+                         tensor_type.ElementType()));
+      }
+    } else {
+      LITERT_ASSIGN_OR_RETURN(auto buffer_copy, buffer.Duplicate());
+      input_buffers_map_[name] = std::move(buffer_copy);
+    }
   }
   return absl::OkStatus();
 }
diff --git a/runtime/util/BUILD b/runtime/util/BUILD
@@ -350,6 +350,7 @@ cc_library(
         "@com_google_absl//absl/types:span",
         "@litert//litert/cc:litert_macros",
         "@litert//litert/cc:litert_ranked_tensor_type",
+        "@litert//litert/cc:litert_tensor_buffer_types",
     ] + select({
         "@litert//litert:litert_link_capi_so": [
             "@litert//litert/cc:litert_api_with_dynamic_runtime",
diff --git a/runtime/util/tensor_buffer_util.cc b/runtime/util/tensor_buffer_util.cc
@@ -15,13 +15,15 @@
 #include "runtime/util/tensor_buffer_util.h"
 
 #include <cstring>
+#include <memory>
 #include <utility>
 #include <vector>
 
 #include "absl/status/statusor.h"  // from @com_google_absl
 #include "litert/cc/litert_environment.h"  // from @litert
 #include "litert/cc/litert_macros.h"  // from @litert
 #include "litert/cc/litert_tensor_buffer.h"  // from @litert
+#include "litert/cc/litert_tensor_buffer_types.h"  // from @litert
 
 namespace litert::lm {
 
@@ -48,9 +50,23 @@ absl::StatusOr<::litert::TensorBuffer> CopyTensorBuffer(
   LITERT_ASSIGN_OR_RETURN(auto buffer_type, tensor_buffer.BufferType());
   LITERT_ASSIGN_OR_RETURN(auto size, tensor_buffer.PackedSize());
 
-  LITERT_ASSIGN_OR_RETURN(auto output_tensor_buffer,
-                          ::litert::TensorBuffer::CreateManaged(
-                              env, buffer_type, tensor_type, size));
+  std::unique_ptr<::litert::TensorBuffer> output_tensor_buffer;
+  if (tensor_buffer.IsMetalMemory()) {
+    // b/505373949#comment13: A temporary fix to create a host memory buffer to
+    // copy from the metal memory buffer to avoid memory leak:
+    LITERT_ASSIGN_OR_RETURN(
+        auto buffer,
+        ::litert::TensorBuffer::CreateManaged(
+            env, ::litert::TensorBufferType::kHostMemory, tensor_type, size));
+    output_tensor_buffer =
+        std::make_unique<::litert::TensorBuffer>(std::move(buffer));
+  } else {
+    LITERT_ASSIGN_OR_RETURN(
+        auto buffer, ::litert::TensorBuffer::CreateManaged(env, buffer_type,
+                                                           tensor_type, size));
+    output_tensor_buffer =
+        std::make_unique<::litert::TensorBuffer>(std::move(buffer));
+  }
 
   LITERT_ASSIGN_OR_RETURN(
       auto src_lock_and_addr,
@@ -59,11 +75,11 @@ absl::StatusOr<::litert::TensorBuffer> CopyTensorBuffer(
   LITERT_ASSIGN_OR_RETURN(
       auto dst_lock_and_addr,
       ::litert::TensorBufferScopedLock::Create(
-          output_tensor_buffer, ::litert::TensorBuffer::LockMode::kWrite));
+          *output_tensor_buffer, ::litert::TensorBuffer::LockMode::kWrite));
 
   std::memcpy(dst_lock_and_addr.second, src_lock_and_addr.second, size);
 
-  return std::move(output_tensor_buffer);
+  return std::move(*output_tensor_buffer);
 }
 
 }  // namespace litert::lm