diff --git a/libavcodec/d3d12va_decode.c b/libavcodec/d3d12va_decode.c
index 312d6e1ed7..4645ffe122 100644
--- a/libavcodec/d3d12va_decode.c
+++ b/libavcodec/d3d12va_decode.c
@@ -41,6 +41,101 @@ typedef struct HelperObjects {
     uint64_t fence_value;
 } HelperObjects;
 
+typedef struct ReferenceFrame {
+    ID3D12Resource *resource;
+    int            used;
+    ID3D12Resource *output_resource;
+} ReferenceFrame;
+
+static ID3D12Resource *get_reference_only_resource(AVCodecContext *avctx, ID3D12Resource *output_resource)
+{
+    D3D12VADecodeContext   *ctx          = D3D12VA_DECODE_CONTEXT(avctx);
+    AVHWFramesContext      *frames_ctx   = D3D12VA_FRAMES_CONTEXT(avctx);
+    AVD3D12VADeviceContext *device_hwctx = ctx->device_ctx;
+    int i = 0;
+    ID3D12Resource *resource = NULL;
+    D3D12_HEAP_PROPERTIES props = { .Type = D3D12_HEAP_TYPE_DEFAULT };
+    D3D12_RESOURCE_DESC desc;
+    ReferenceFrame *reference_only_map = ctx->reference_only_map;
+    if (reference_only_map == NULL) {
+        av_log(avctx, AV_LOG_ERROR, "Reference frames are not allocated!\n");
+        return NULL;
+    }
+
+    // find unused resource
+    for (i = 0; i < ctx->max_num_ref; i++) {
+        if (!reference_only_map[i].used && reference_only_map[i].resource != NULL) {
+            reference_only_map[i].used = 1;
+            resource = reference_only_map[i].resource;
+            reference_only_map[i].output_resource = output_resource;
+            return resource;
+        }
+    }
+
+    // find space to allocate
+    for (i = 0; i < ctx->max_num_ref; i++) {
+        if (reference_only_map[i].resource == NULL)
+            break;
+    }
+
+    if (i == ctx->max_num_ref) {
+        av_log(avctx, AV_LOG_ERROR, "No space for new Reference frame!\n");
+        return NULL;
+    }
+
+    // allocate frame
+    output_resource->lpVtbl->GetDesc(output_resource, &desc);
+    desc.Flags = D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY | D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE;
+
+    if (FAILED(ID3D12Device_CreateCommittedResource(device_hwctx->device, &props, D3D12_HEAP_FLAG_NONE, &desc,
+        D3D12_RESOURCE_STATE_COMMON, NULL, &IID_ID3D12Resource, (void **)&reference_only_map[i].resource))) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to create D3D12 Reference Resource!\n");
+        return NULL;
+    }
+
+    reference_only_map[i].used = 1;
+    resource = reference_only_map[i].resource;
+    reference_only_map[i].output_resource = output_resource;
+
+    return resource;
+}
+
+static void free_reference_only_resources(AVCodecContext *avctx)
+{
+    D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx);
+    int i;
+    ReferenceFrame *reference_only_map = ctx->reference_only_map;
+    if (reference_only_map != NULL) {
+        for (i = 0; i < ctx->max_num_ref; i++) {
+            if (reference_only_map[i].resource != NULL) {
+                D3D12_OBJECT_RELEASE(reference_only_map[i].resource);
+            }
+        }
+        av_freep(&ctx->reference_only_map);
+        av_freep(&ctx->ref_only_resources);
+    }
+}
+
+static void prepare_reference_only_resources(AVCodecContext *avctx)
+{
+    D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx);
+    int i, j;
+    ReferenceFrame *reference_only_map = ctx->reference_only_map;
+    if (reference_only_map == NULL)
+        return;
+    memset(ctx->ref_only_resources, 0, ctx->max_num_ref * sizeof(*(ctx->ref_only_resources)));
+    for (j = 0; j < ctx->max_num_ref; j++) {
+        for (i = 0; i < ctx->max_num_ref; i++) {
+            if (reference_only_map[j].used && reference_only_map[j].output_resource == ctx->ref_resources[i]) {
+                ctx->ref_only_resources[i] = reference_only_map[j].resource;
+                break;
+            }
+        }
+        if (i == ctx->max_num_ref)
+            reference_only_map[j].used = 0;
+    }
+}
+
 int ff_d3d12va_get_suitable_max_bitstream_size(AVCodecContext *avctx)
 {
     AVHWFramesContext *frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx);
@@ -250,6 +345,18 @@ static int d3d12va_create_decoder(AVCodecContext *avctx)
         return AVERROR_PATCHWELCOME;
     }
 
+    ctx->reference_only_map = NULL;
+    ctx->ref_only_resources = NULL;
+    if (feature.ConfigurationFlags & D3D12_VIDEO_DECODE_CONFIGURATION_FLAG_REFERENCE_ONLY_ALLOCATIONS_REQUIRED) {
+        av_log(avctx, AV_LOG_VERBOSE, "Reference-Only Allocations are required for this D3D12 decoder configuration.\n");
+        ctx->reference_only_map = av_calloc(ctx->max_num_ref + 1, sizeof(ReferenceFrame));
+            if (!ctx->reference_only_map)
+                return AVERROR(ENOMEM);
+        ctx->ref_only_resources = av_calloc(ctx->max_num_ref, sizeof(*ctx->ref_only_resources));
+            if (!ctx->ref_only_resources)
+                return AVERROR(ENOMEM);
+    }
+
     desc = (D3D12_VIDEO_DECODER_DESC) {
         .NodeMask = 0,
         .Configuration = ctx->cfg,
@@ -394,6 +501,7 @@ av_cold int ff_d3d12va_decode_uninit(AVCodecContext *avctx)
 
         av_log(avctx, AV_LOG_VERBOSE, "Total number of command allocators reused: %d\n", num_allocator);
     }
+    free_reference_only_resources(avctx);
 
     av_fifo_freep2(&ctx->objects_queue);
 
@@ -412,14 +520,15 @@ static inline int d3d12va_update_reference_frames_state(AVCodecContext *avctx, D
                                                         ID3D12Resource *current_resource, int state_before, int state_end)
 {
     D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx);
+    ID3D12Resource **ref_resources = ctx->ref_only_resources ? ctx->ref_only_resources : ctx->ref_resources;
 
     int num_barrier = 0;
     for (int i = 0; i < ctx->max_num_ref; i++) {
-        if (((ctx->used_mask >> i) & 0x1) && ctx->ref_resources[i] && ctx->ref_resources[i] != current_resource) {
+        if (((ctx->used_mask >> i) & 0x1) && ref_resources[i] && ref_resources[i] != current_resource) {
             barriers[num_barrier].Type  = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
             barriers[num_barrier].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
-            barriers[num_barrier].Transition = (D3D12_RESOURCE_TRANSITION_BARRIER){
-                .pResource   = ctx->ref_resources[i],
+            barriers[num_barrier].Transition = (D3D12_RESOURCE_TRANSITION_BARRIER) {
+                .pResource   = ref_resources[i],
                 .Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES,
                 .StateBefore = state_before,
                 .StateAfter  = state_end,
@@ -440,8 +549,9 @@ int ff_d3d12va_common_end_frame(AVCodecContext *avctx, AVFrame *frame,
     D3D12VADecodeContext   *ctx               = D3D12VA_DECODE_CONTEXT(avctx);
     ID3D12Resource         *buffer            = NULL;
     ID3D12CommandAllocator *command_allocator = NULL;
-    AVD3D12VAFrame         *f                 = (AVD3D12VAFrame *)frame->data[0];
-    ID3D12Resource         *resource          = (ID3D12Resource *)f->texture;
+    AVD3D12VAFrame         *f                 = (AVD3D12VAFrame*)frame->data[0];
+    ID3D12Resource         *output_resource   = (ID3D12Resource*)f->texture;
+    ID3D12Resource         *ref_resource      = NULL;
 
     ID3D12VideoDecodeCommandList *cmd_list = ctx->command_list;
     D3D12_RESOURCE_BARRIER barriers[32] = { 0 };
@@ -466,25 +576,55 @@ int ff_d3d12va_common_end_frame(AVCodecContext *avctx, AVFrame *frame,
     D3D12_VIDEO_DECODE_OUTPUT_STREAM_ARGUMENTS output_args = {
         .ConversionArguments = { 0 },
         .OutputSubresource   = 0,
-        .pOutputTexture2D    = resource,
+        .pOutputTexture2D    = output_resource,
     };
 
+    memset(ctx->ref_subresources, 0, sizeof(UINT) * ctx->max_num_ref);
+    input_args.ReferenceFrames.NumTexture2Ds = ctx->max_num_ref;
+    input_args.ReferenceFrames.pSubresources = ctx->ref_subresources;
+
+    if (ctx->reference_only_map) {
+        ref_resource = get_reference_only_resource(avctx, output_resource);
+        if (ref_resource == NULL) {
+            av_log(avctx, AV_LOG_ERROR, "Failed to get reference frame!\n");
+            goto fail;
+        }
+        prepare_reference_only_resources(avctx);
+
+        output_args.ConversionArguments.Enable               = 1;
+        input_args.ReferenceFrames.ppTexture2Ds  = ctx->ref_only_resources;
+        output_args.ConversionArguments.pReferenceTexture2D  = ref_resource;
+        output_args.ConversionArguments.ReferenceSubresource = 0;
+    } else {
+        ref_resource = output_resource;
+        input_args.ReferenceFrames.ppTexture2Ds  = ctx->ref_resources;
+    }
+
     UINT num_barrier = 1;
     barriers[0] = (D3D12_RESOURCE_BARRIER) {
         .Type  = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
         .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
         .Transition = {
-            .pResource   = resource,
+            .pResource   = output_resource,
             .Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES,
             .StateBefore = D3D12_RESOURCE_STATE_COMMON,
             .StateAfter  = D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE,
         },
     };
 
-    memset(ctx->ref_subresources, 0, sizeof(UINT) * ctx->max_num_ref);
-    input_args.ReferenceFrames.NumTexture2Ds = ctx->max_num_ref;
-    input_args.ReferenceFrames.ppTexture2Ds  = ctx->ref_resources;
-    input_args.ReferenceFrames.pSubresources = ctx->ref_subresources;
+    if (ctx->reference_only_map) {
+        barriers[1] = (D3D12_RESOURCE_BARRIER) {
+            .Type  = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
+            .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
+            .Transition = {
+                .pResource   = ref_resource,
+                .Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES,
+                .StateBefore = D3D12_RESOURCE_STATE_COMMON,
+                .StateAfter  = D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE,
+            },
+        };
+        num_barrier++;
+    }
 
     ret = d3d12va_fence_completion(&f->sync_ctx);
     if (ret < 0)
@@ -505,7 +645,7 @@ int ff_d3d12va_common_end_frame(AVCodecContext *avctx, AVFrame *frame,
 
     DX_CHECK(ID3D12VideoDecodeCommandList_Reset(cmd_list, command_allocator));
 
-    num_barrier += d3d12va_update_reference_frames_state(avctx, &barriers[1], resource, D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_VIDEO_DECODE_READ);
+    num_barrier += d3d12va_update_reference_frames_state(avctx, &barriers[num_barrier], ref_resource, D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_VIDEO_DECODE_READ);
 
     ID3D12VideoDecodeCommandList_ResourceBarrier(cmd_list, num_barrier, barriers);
 
diff --git a/libavcodec/d3d12va_decode.h b/libavcodec/d3d12va_decode.h
index b64994760a..c771004222 100644
--- a/libavcodec/d3d12va_decode.h
+++ b/libavcodec/d3d12va_decode.h
@@ -119,6 +119,19 @@ typedef struct D3D12VADecodeContext {
      * Private to the FFmpeg AVHWAccel implementation
      */
     unsigned report_id;
+
+    /**
+     * The Reference-Only feature in DirectX 12 is a memory optimization
+     * technique designed for video decoding/encoding scenarios.
+     * This feature requires that reference resources must be allocated
+     * with the `D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY` resource flag.
+     * Reference textures must also be separated from output textures.
+     * reference_only_map used as a storage for reference only frames
+     * ref_only_resources used as a shadow for  ref_resources
+     */
+    void *reference_only_map;
+    ID3D12Resource **ref_only_resources;
+
 } D3D12VADecodeContext;
 
 /**