<?xml version="1.0" encoding="UTF-8"?>
<?latexml searchpaths="/home/japhy/scienceReplication.artiswrong.com/paper_files/arxiv/2507.06426/latex_extracted"?>
<!--  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% --><!--  %2345678901234567890123456789012345678901234567890123456789012345678901234567890 --><!--  %1         2         3         4         5         6         7         8 --><?latexml class="ieeeconf" options="letterpaper, 10 pt, conference"?>
<!--  %“documentclass[a4paper, 10pt, conference]–ieeeconf˝      % Use this line for a4 paper --><!--  %This command is only needed if --><!--  %you want to use the “thanks command --><!--  %Needed to meet printer requirements. --><!--  %In case you encounter the following error: --><!--  %Error 1010 The PDF file may be corrupt (unable to open PDF file) OR --><!--  %Error 1000 An error occurred while parsing a contents stream. Unable to analyze the PDF file. --><!--  %This is a known problem with pdfLaTeX conversion filter. The file cannot be opened with acrobat reader --><!--  %Please use one of the alternatives below to circumvent this error by uncommenting one or the other --><!--  %“pdfobjcompresslevel=0 --><!--  %“pdfminorversion=4 --><!--  %See the “addtolength command later in the file to balance the column lengths --><!--  %on the last page of the document --><!--  %**** root.tex Line 25 **** --><!--  %The following packages can be found on http:““www.ctan.org --><!--  %“usepackage–graphics˝ % for pdf, bitmapped graphics files --><!--  %“usepackage–epsfig˝ % for postscript graphics files --><!--  %“usepackage–mathptmx˝ % assumes new font selection scheme installed --><!--  %“usepackage–times˝ % assumes new font selection scheme installed --><!--  %“usepackage–amsmath˝ % assumes amsmath package installed --><!--  %“usepackage–amssymb˝  % assumes amsmath package installed --><?latexml package="graphicx"?>
<?latexml package="comment"?>
<?latexml package="float"?>
<?latexml package="hyperref"?>
<?latexml package="geometry"?>
<!--  %a4paper, --><!--  %total=–170mm,257mm˝, --><!--  %left=48pt, --><!--  %right=48pt, --><!--  %top=57pt, --><!--  %bottom=43pt, --><!--  %nominal 72pt --><!--  %**** root.tex Line 50 **** --><!--  %¡-this % stops a space --><!--  %¡-this % stops a space --><!--  %**** root.tex Line 75 **** --><?latexml RelaxNGSchema="LaTeXML"?>
<document xmlns="http://dlmf.nist.gov/LaTeXML" class="ltx_authors_1line">
  <resource src="LaTeXML.css" type="text/css"/>
  <resource src="ltx-article.css" type="text/css"/>
  <title font="bold" fontsize="173%">Evaluating Robots Like Human Infants: A Case Study of <break/>Learned Bipedal Locomotion
</title>
  <creator role="author">
    <personname>Devin Crowley<Math mode="inline" tex="{}^{1}" text="^1" xml:id="m1">
        <XMath>
          <XMApp role="FLOATSUPERSCRIPT" scriptpos="1">
            <XMTok fontsize="70%" meaning="1" role="NUMBER">1</XMTok>
          </XMApp>
        </XMath>
      </Math>, Whitney G. Cole<Math mode="inline" tex="{}^{2}" text="^2" xml:id="m2">
        <XMath>
          <XMApp role="FLOATSUPERSCRIPT" scriptpos="1">
            <XMTok fontsize="70%" meaning="2" role="NUMBER">2</XMTok>
          </XMApp>
        </XMath>
      </Math>, Christina M. Hospodar<Math mode="inline" tex="{}^{3}" text="^3" xml:id="m3">
        <XMath>
          <XMApp role="FLOATSUPERSCRIPT" scriptpos="1">
            <XMTok fontsize="70%" meaning="3" role="NUMBER">3</XMTok>
          </XMApp>
        </XMath>
      </Math>, Ruiting Shen<Math mode="inline" tex="{}^{4}" text="^4" xml:id="m4">
        <XMath>
          <XMApp role="FLOATSUPERSCRIPT" scriptpos="1">
            <XMTok fontsize="70%" meaning="4" role="NUMBER">4</XMTok>
          </XMApp>
        </XMath>
      </Math>, Karen E. Adolph<Math mode="inline" tex="{}^{5}" text="^5" xml:id="m5">
        <XMath>
          <XMApp role="FLOATSUPERSCRIPT" scriptpos="1">
            <XMTok fontsize="70%" meaning="5" role="NUMBER">5</XMTok>
          </XMApp>
        </XMath>
      </Math>, and Alan Fern<Math mode="inline" tex="{}^{6}" text="^6" xml:id="m6">
        <XMath>
          <XMApp role="FLOATSUPERSCRIPT" scriptpos="1">
            <XMTok fontsize="70%" meaning="6" role="NUMBER">6</XMTok>
          </XMApp>
        </XMath>
      </Math>





</personname>
    <contact role="thanks">*This work was supported under NSF grant number 2321851.</contact>
    <contact role="thanks"><Math mode="inline" tex="{}^{1}" text="^1" xml:id="m7">
        <XMath>
          <XMApp role="FLOATSUPERSCRIPT" scriptpos="1">
            <XMTok fontsize="70%" meaning="1" role="NUMBER">1</XMTok>
          </XMApp>
        </XMath>
      </Math>Devin Crowley is with the Department of Electrical Engineering and Computer Sciences,
Oregon State University
<text font="typewriter" fontsize="90%">crowleyd@oregonstate.edu</text></contact>
    <contact role="thanks"><Math mode="inline" tex="{}^{2}" text="^2" xml:id="m8">
        <XMath>
          <XMApp role="FLOATSUPERSCRIPT" scriptpos="1">
            <XMTok fontsize="70%" meaning="2" role="NUMBER">2</XMTok>
          </XMApp>
        </XMath>
      </Math>Whitney G. Cole is with the Department of Psychology, New York University
<text font="typewriter" fontsize="90%">wgcole@nyu.edu</text></contact>
    <contact role="thanks"><Math mode="inline" tex="{}^{3}" text="^3" xml:id="m9">
        <XMath>
          <XMApp role="FLOATSUPERSCRIPT" scriptpos="1">
            <XMTok fontsize="70%" meaning="3" role="NUMBER">3</XMTok>
          </XMApp>
        </XMath>
      </Math>Christina M. Hospodar is with the Department of Psychology, New York University
<text font="typewriter" fontsize="90%">christina.hospodar@nyu.edu</text></contact>
    <contact role="thanks"><Math mode="inline" tex="{}^{4}" text="^4" xml:id="m10">
        <XMath>
          <XMApp role="FLOATSUPERSCRIPT" scriptpos="1">
            <XMTok fontsize="70%" meaning="4" role="NUMBER">4</XMTok>
          </XMApp>
        </XMath>
      </Math>Ruiting Shen is with the Department of Psychology, New York University
<text font="typewriter" fontsize="90%">rs8422@nyu.edu</text></contact>
    <contact role="thanks"><Math mode="inline" tex="{}^{5}" text="^5" xml:id="m11">
        <XMath>
          <XMApp role="FLOATSUPERSCRIPT" scriptpos="1">
            <XMTok fontsize="70%" meaning="5" role="NUMBER">5</XMTok>
          </XMApp>
        </XMath>
      </Math>Karen E. Adolph is with the Department of Psychology, New York University
<text font="typewriter" fontsize="90%">karen.adolph@nyu.edu</text></contact>
    <contact role="thanks"><Math mode="inline" tex="{}^{6}" text="^6" xml:id="m12">
        <XMath>
          <XMApp role="FLOATSUPERSCRIPT" scriptpos="1">
            <XMTok fontsize="70%" meaning="6" role="NUMBER">6</XMTok>
          </XMApp>
        </XMath>
      </Math>Alan Fern is with the Department of Electrical Engineering and Computer Sciences, Oregon State University
<text font="typewriter" fontsize="90%">alan.fern@oregonstate.edu</text></contact>
  </creator>
  <abstract name="Abstract">
    <p>Typically, learned robot controllers are trained via relatively unsystematic regimens and evaluated with coarse-grained outcome measures such as average cumulative reward. The typical approach is useful to compare learning algorithms but provides limited insight into the effects of different training regimens and little understanding about the richness and complexity of learned behaviors. Likewise,
human infants and other animals are “trained” via unsystematic regimens, but in contrast, developmental psychologists evaluate their performance in highly-controlled experiments with fine-grained measures such as success, speed of walking, and prospective adjustments. However, the study of learned behavior in human infants is limited by the practical constraints of training and testing babies. Here, we present a case study that applies methods from developmental psychology to study the learned behavior of the simulated bipedal robot Cassie. Following research on infant walking, we systematically designed reinforcement learning training regimens and tested the resulting controllers in simulated environments analogous to those used for babies—but without the practical constraints. Results reveal new insights into the behavioral impact of different training regimens and the development of Cassie’s learned behaviors relative to infants who are learning to walk. This interdisciplinary baby-robot approach provides inspiration for future research designed to systematically test effects of training on the development of complex learned robot behaviors.</p>
  </abstract>
<!--  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
     %keywords:
     %- bioinspired robot learning
     %- reinforcement learning
     %- performance evaluation and benchmarking
     %This paper presents a case study in applying high-level evaluation methods from a different field to assess a learned robot controller and systematically analyze the effects of different training regimens.
     %===============================================================================-->  <figure inlist="lof" labels="LABEL:fig:human_robot_composite" placement="!htb" xml:id="S0.F1">
    <tags>
      <tag>Fig. 1</tag>
      <tag role="autoref">Figure 1</tag>
      <tag role="refnum">1</tag>
      <tag role="typerefnum">Fig. 1</tag>
    </tags>
<!--  %**** root.tex Line 100 **** -->    <graphics candidates="figures/human_robot_composite.png" class="ltx_centering" graphic="figures/human_robot_composite.png" options="width=433.62pt" xml:id="S0.F1.g1"/>
    <toccaption class="ltx_centering"><tag close=" ">1</tag>Test environments with infants in the real world and the bipedal robot Cassie in simulation. Left-to-right: slopes, drop-offs, gaps, and bridges.</toccaption>
    <caption class="ltx_centering"><tag close=": ">Fig. 1</tag>Test environments with infants in the real world and the bipedal robot Cassie in simulation. Left-to-right: slopes, drop-offs, gaps, and bridges.</caption>
  </figure>
<!--  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
     %% Section: INTRODUCTION
     %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%-->  <section inlist="toc" labels="LABEL:sec:introduction" xml:id="S1">
    <tags>
      <tag>I</tag>
      <tag role="autoref">section I</tag>
      <tag role="refnum">I</tag>
      <tag role="typerefnum">§I</tag>
    </tags>
    <title><tag close=" ">I</tag><text font="smallcaps">Introduction</text></title>
    <para xml:id="S1.p1">
      <p>Consider a visually-guided bipedal robot trained in simulation to walk over challenging terrain with varied elevations and obstacles in the path. Typically, training regimens and test situations are not manipulated systematically, and performance evaluations report only crude metrics (e.g., average cumulative reward over multiple test environments). Albeit useful to roughly rank learned controllers,
the typical approach to training, testing, and evaluation cannot reveal how different training regimens affect the complex details of learned behaviors like gait modifications while approaching and navigating varied terrain. For example, two different controllers with similar reward functions may nonetheless behave very differently when walking down a steep slope. One controller might leverage its vision and adjust its gait prospectively while approaching the slope, whereas the other controller may adjust reactively after stepping on the slope.</p>
    </para>
    <para xml:id="S1.p2">
      <p>Inspired by developmental research with human infants, we advocate for a more systematic approach to training and testing, and a more detailed evaluation of the learned behaviors. To promote this approach, this paper presents a case study that adapts developmental research on infant locomotion to learned, visually-guided locomotion controllers for the Cassie bipedal robot. In particular, we used simulated test environments with the experimental apparatuses from infant studies (slopes, drop-offs, gaps, and bridges) and conducted similar experiments and analyses <cite class="ltx_citemacro_cite">[<bibref bibrefs="adolph2019annreview,adolph2021visualcliff" separator="," yyseparator=","/>]</cite> (see Fig. <ref labelref="LABEL:fig:human_robot_composite"/>).</p>
    </para>
    <para xml:id="S1.p3">
      <p>Testing in simulation removes the constraints posed by real-world robot experiments, allowing fine-grained comparisons of different training regimens. Simulation-based work is often accompanied by a sim-to-real transfer method to produce a controller that functions in the real world. However, addressing the reality gap is orthogonal to the investigations of this work. We test in simulation without transfer to the real world because our objective is to study the development of locomotion capabilities, not in producing a working real-world controller.</p>
    </para>
    <para xml:id="S1.p4">
      <p>This robot evaluation approach can advance behavioral research with robots by guiding robot training regimens and informing decisions about which controller works best in real-world operating environments.
In addition, this approach may also inform behavioral research with infants by revealing the benefits and constraints of reinforcement learning (RL) under various training regimens—regimens that would be unethical or impractical for human babies.</p>
    </para>
<!--  %**** root.tex Line 125 **** 
     %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     %% Section: BACKGROUND
     %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%-->  </section>
  <section inlist="toc" labels="LABEL:sec:background" xml:id="S2">
    <tags>
      <tag>II</tag>
      <tag role="autoref">section II</tag>
      <tag role="refnum">II</tag>
      <tag role="typerefnum">§II</tag>
    </tags>
    <title><tag close=" ">II</tag><text font="smallcaps">Background</text></title>
    <para xml:id="S2.p1">
      <p>Our case study follows prior work on learning locomotion controllers for the Cassie robot and analyzes behavior using established methods from research with walking infants.
<!--  %The case study presented in this paper follows prior work learning locomotion controllers for the Cassie robot and analyzing the resulting behavior using established methods from the study of human infants. --></p>
    </para>
    <subsection inlist="toc" xml:id="S2.SS1">
      <tags>
        <tag>II-A</tag>
        <tag role="autoref">subsection II-A</tag>
        <tag role="refnum">II-A</tag>
        <tag role="typerefnum">§II-A</tag>
      </tags>
      <title><tag close=" ">II-A</tag><text font="italic">Learning in the Development of Infant Locomotion</text></title>
      <para xml:id="S2.SS1.p1">
        <p>How do babies learn to navigate varied terrain such as steep slopes, high drop-offs, wide gaps, and narrow bridges, as illustrated in Fig. <ref labelref="LABEL:fig:human_robot_composite"/>? Infants learn to walk amidst continual changes in their environments and skills. Features of the environment are variable (ground surfaces can be flat or sloping, rigid or deformable, high-traction or slippery; the path can be clear or cluttered with obstacles and elevations) and infants’ walking skill improves dramatically over the first several months after walk onset <cite class="ltx_citemacro_cite">[<bibref bibrefs="adolph2019annreview,adolph2018tics,hospodar2024wires" separator="," yyseparator=","/>]</cite>. During natural locomotion, infants’ walking paths are curved, not straight; their steps are omnidirectional, not forward; and their locomotion is intermittent rather than continuous <cite class="ltx_citemacro_cite">[<bibref bibrefs="cole2016bouts,lee2018cost,hospodar2021practice,adolph2012learntowalk" separator="," yyseparator=","/>]</cite>. Infants’ everyday life creates a natural training regimen—with toys scattered on the ground, laundry heaps in the corner, furniture obstructing the path, and so on. For infants, learning to walk means learning to modify gait from step to step to navigate a varied environment.</p>
      </para>
      <para xml:id="S2.SS1.p2">
        <p>Developmental researchers test novice and experienced walking infants on challenging terrain, such as slopes, drop-offs, gaps, and bridges of varying difficulties, because the obstacles are novel (no baby encounters steep slopes, narrow bridges, etc. during everyday life). Thus, researchers can test generalization from everyday experience to novel situations based on whether babies modify their gait prospectively while approaching the obstacle or reactively after stepping onto the obstacle. In all cases, the best solution is to modify gait prospectively on approach. Reactive adjustments while traversing the obstacles are less optimal because gravity takes over and gait modifications entail fighting to keep the body over the moving base of support. The strongest evidence for prospective control is changes in foot placement, step length, and walking speed before stepping onto the obstacle—rather than modifying gait after stepping onto the obstacle.</p>
      </para>
      <para xml:id="S2.SS1.p3">
        <p>Prior work suggests the ability to modify gait develops with walking experience <cite class="ltx_citemacro_cite">[<bibref bibrefs="adolph2019annreview,adolph2018tics,hospodar2024wires" separator="," yyseparator=","/>]</cite>. Novice infants walk blithely over the brink of impossibly steep slopes, high drop-offs, wide gaps, and narrow bridges (requiring rescue from an experimenter). After several months of everyday walking experience—notably, with no prior experience on the test obstacles—infants modify their gait prospectively. For every obstacle, experienced infants slow down and shorten their steps during approach. For slopes, their initial steps on the slope are short and slow, they widen their base of support, and brake forward momentum from step to step <cite class="ltx_citemacro_cite">[<bibref bibrefs="adolph1997learning,gill2009change" separator="," yyseparator=","/>]</cite>. For drop-offs, they place their stance foot close to the brink so that their moving foot can stretch down to the bottom of the precipice all the while keeping their body upright as it drops vertically onto their moving foot <cite class="ltx_citemacro_cite">[<bibref bibrefs="kretch2013cliff" separator="," yyseparator=","/>]</cite>. For gaps, they place their stance foot close to the brink of the gap, increase step length with the moving foot, and place their moving foot close to the far side of the gap. For bridges, infants place their body at the near side of the bridge, and take short, slow, narrow steps <cite class="ltx_citemacro_cite">[<bibref bibrefs="kretch2013bridge,kretch2017organization" separator="," yyseparator=","/>]</cite>.</p>
      </para>
      <para xml:id="S2.SS1.p4">
        <p>Developmental researchers hypothesize that infants learn to modify gait prospectively as they accumulate everyday experience navigating an ever-changing environment. Practice doing the same thing repeatedly (e.g., stepping on a treadmill) and training to the test (e.g., repeated practice walking down slopes) do not contribute to the development of prospective gait modifications <cite class="ltx_citemacro_cite">[<bibref bibrefs="gill2009change" separator="," yyseparator=","/>]</cite>. However, developmental researchers cannot control infants’ natural, everyday training regimens and must accommodate practical limitations for testing infants. Babies can complete only a few dozen trials before becoming tired or fussy, and often learn gait modifications During testing. Such limitations do not exist for simulated robots.
Thus, simulated robots provide a powerful, highly-controlled platform for testing effects of experience on motor learning, with the potential to serve as a surrogate for studying how infants learn to walk over varied terrain.
<!--  %**** root.tex Line 150 **** --></p>
      </para>
    </subsection>
    <subsection inlist="toc" xml:id="S2.SS2">
      <tags>
        <tag>II-B</tag>
        <tag role="autoref">subsection II-B</tag>
        <tag role="refnum">II-B</tag>
        <tag role="typerefnum">§II-B</tag>
      </tags>
      <title><tag close=" ">II-B</tag><text font="italic">RL for Bipedal Locomotion</text></title>
      <para xml:id="S2.SS2.p1">
        <p><text font="bold">Controller Architecture</text>
We tested the behavior of controllers for the Cassie bipedal robot trained with RL using a learning framework from prior work <cite class="ltx_citemacro_cite">[<bibref bibrefs="duan2023learning" separator="," yyseparator=","/>]</cite>, lightly adapted for purely simulation-based experimentation. This framework layers a visually-guided residual controller on top of the outputs of a frozen blind controller trained only on flat terrain. The controllers are represented as recurrent long short-term memory (LSTM) neural networks. Their outputs are proportional derivative (PD) control targets, set at 50 Hz for 10 actuated joints centered around a standing pose. A static PD controller uses these targets to set the motor torques at 2000 Hz. A schematic of this learning framework is shown in Fig. <ref labelref="LABEL:fig:rl_schematic"/>. Training a slower controller to set targets for a faster PD controller is commonly used in RL for locomotion <cite class="ltx_citemacro_cite">[<bibref bibrefs="peng2017learning,xie2018feedback,tan2018AgileLocomotion,hwangbo2019learning,TsounisDeepGait" separator="," yyseparator=","/>]</cite> and is a strong precedent for the Cassie platform <cite class="ltx_citemacro_cite">[<bibref bibrefs="duan2023learning,siekmann2020learning,siekmann2020simtoreal,siekmann2021stairs" separator="," yyseparator=","/>]</cite>.</p>
      </para>
      <para xml:id="S2.SS2.p2">
        <p>The input to the blind controller includes: (1) a 35-dimensional vector containing the positions and velocities of all joints, plus the pelvis orientation and rotational velocity; (2) a clock signal that dictates the cadence of the footsteps; (3) gait parameters that modulate the clock signal; and (4) commands for forward speed, lateral speed, and turn rate. A grid of ground-truth terrain heights taken from the simulated terrain serves as additional input to the visually-guided controller. The grid is a 1m wide by 1.5m long rectangle of 20 by 30 values respectively in front of Cassie.</p>
      </para>
      <para xml:id="S2.SS2.p3">
        <p>In addition to PD target residuals, the visually-guided controller outputs the clock progression speed and the phase-offset between the feet. This allows the controller to modulate the frequency of footsteps and adjust the left-right cadence. In principle, this means the controller can learn to produce asymmetric 2-beat patterns like skipping rather than performing only the basic, symmetrical, left-right gait pattern.
Both blind and visually-guided controllers use the same reward function. It encourages adherence to the commands, minimized motor torque, and footsteps in accordance with the clock signal.
See <cite class="ltx_citemacro_cite">[<bibref bibrefs="duan2023learning" separator="," yyseparator=","/>]</cite> for further details.</p>
      </para>
      <figure inlist="lof" labels="LABEL:fig:rl_schematic" placement="!htb" xml:id="S2.F2">
        <tags>
          <tag>Fig. 2</tag>
          <tag role="autoref">Figure 2</tag>
          <tag role="refnum">2</tag>
          <tag role="typerefnum">Fig. 2</tag>
        </tags>
        <graphics candidates="figures/rl_schematic.png" class="ltx_centering" graphic="figures/rl_schematic.png" options="width=433.62pt" xml:id="S2.F2.g1"/>
        <toccaption class="ltx_centering"><tag close=" ">2</tag>Controller schematic. The controller consists of a visually-guided component, trained on varied terrain, which modulates the output of a blind component trained on only flat ground.</toccaption>
        <caption class="ltx_centering"><tag close=": ">Fig. 2</tag>Controller schematic. The controller consists of a visually-guided component, trained on varied terrain, which modulates the output of a blind component trained on only flat ground.</caption>
      </figure>
      <para xml:id="S2.SS2.p4">
        <p><text font="bold">Simulation Training</text>
The controllers are trained using the actor-critic proximal policy optimization (PPO) algorithm with gradient clipping, a standard model-free RL algorithm <cite class="ltx_citemacro_cite">[<bibref bibrefs="schulman2017proximal" separator="," yyseparator=","/>]</cite>. The simulator used for both training and testing is the MuJoCo physics engine <cite class="ltx_citemacro_cite">[<bibref bibrefs="todorov2012mujoco" separator="," yyseparator=","/>]</cite> using a model of the Cassie robot. Because we examined behavior only in simulation, we did not use dynamics randomization to aid in sim-to-real transfer as in prior work with Cassie <cite class="ltx_citemacro_cite">[<bibref bibrefs="duan2023learning,siekmann2020learning,siekmann2020simtoreal,siekmann2021stairs,yu2022dynamic,dao2023simtoreal,crowley2023gaits" separator="," yyseparator=","/>]</cite>.</p>
      </para>
<!--  %**** root.tex Line 175 **** 
     %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     %% Section: EXPERIMENTAL SETUP
     %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%-->    </subsection>
  </section>
  <section inlist="toc" labels="LABEL:sec:experimental_setup" xml:id="S3">
    <tags>
      <tag>III</tag>
      <tag role="autoref">section III</tag>
      <tag role="refnum">III</tag>
      <tag role="typerefnum">§III</tag>
    </tags>
    <title><tag close=" ">III</tag><text font="smallcaps">Experimental Setup</text></title>
    <para xml:id="S3.p1">
      <p>Our analyses evaluate the performance and behavior of several visually-guided controllers, differing in the distribution of training terrains. All controllers output residual PD targets added to the outputs of the same pre-trained blind controller described in Section <ref labelref="LABEL:sec:background"/>.</p>
    </para>
    <subsection inlist="toc" xml:id="S3.SS1">
      <tags>
        <tag>III-A</tag>
        <tag role="autoref">subsection III-A</tag>
        <tag role="refnum">III-A</tag>
        <tag role="typerefnum">§III-A</tag>
      </tags>
      <title><tag close=" ">III-A</tag><text font="italic">Training Regimens</text></title>
      <para xml:id="S3.SS1.p1">
        <p>We trained Cassie on <text font="italic">standard</text>, <text font="italic">multi-test obstacle</text>, <text font="italic">combined standard and multi-test</text>, and <text font="italic">single-test obstacle</text> terrains. The standard training terrains replicate prior robotics work <cite class="ltx_citemacro_cite">[<bibref bibrefs="duan2023learning" separator="," yyseparator=","/>]</cite> on which our learning framework is based: flat, hills, ridges, blocks, and stairs, shown in Fig. <ref labelref="LABEL:fig:icra_terrains"/>. The multi-test obstacle terrains are recreations of test apparatuses used in experiments with infants: slopes, drop-offs, gaps, and bridges, shown in Fig. <ref labelref="LABEL:fig:human_robot_composite"/> for infants in the real world and Cassie in simulation.</p>
      </para>
      <figure inlist="lof" labels="LABEL:fig:icra_terrains" placement="!htb" xml:id="S3.F3">
        <tags>
          <tag>Fig. 3</tag>
          <tag role="autoref">Figure 3</tag>
          <tag role="refnum">3</tag>
          <tag role="typerefnum">Fig. 3</tag>
        </tags>
        <graphics candidates="figures/terrains_icra_row.png" class="ltx_centering" graphic="figures/terrains_icra_row.png" options="width=433.62pt" xml:id="S3.F3.g1"/>
        <toccaption class="ltx_centering"><tag close=" ">3</tag>Standard terrains. Left-to-right: flat, hills, ridges, blocks, and stairs.</toccaption>
        <caption class="ltx_centering"><tag close=": ">Fig. 3</tag>Standard terrains. Left-to-right: flat, hills, ridges, blocks, and stairs.</caption>
      </figure>
      <para xml:id="S3.SS1.p2">
        <p>Table <ref labelref="LABEL:tab:curricula"/> shows the frequency of each terrain for the standard, multi-test obstacle, combined standard and multi-test, and single-test obstacle training regimens. Cassie received 20k iterations for the single-test obstacle regimen and 110k iterations for each of the others. The standard regimen uses a range of commands for forward speed, lateral speed, and turn rate. The test-obstacle regimens use only one command: straight forward at <Math mode="inline" tex="0.8\frac{m}{s}" text="0.8 * (m / s)" xml:id="S3.SS1.p2.m1">
            <XMath>
              <XMApp>
                <XMTok meaning="times" role="MULOP">⁢</XMTok>
                <XMTok meaning="0.8" role="NUMBER">0.8</XMTok>
                <XMApp>
                  <XMTok mathstyle="text" meaning="divide" role="FRACOP"/>
                  <XMTok font="italic" fontsize="70%" role="UNKNOWN">m</XMTok>
                  <XMTok font="italic" fontsize="70%" role="UNKNOWN">s</XMTok>
                </XMApp>
              </XMApp>
            </XMath>
          </Math>.</p>
      </para>
<!--  %**** root.tex Line 200 **** -->      <table inlist="lot" labels="LABEL:tab:curricula" placement="tb" xml:id="S3.T1">
        <tags>
          <tag>TABLE I</tag>
          <tag role="autoref">Table I</tag>
          <tag role="refnum">I</tag>
          <tag role="typerefnum">TABLE I</tag>
        </tags>
        <toccaption><tag close=" ">I</tag>Terrain frequencies by training regimen</toccaption>
        <caption><tag close=": ">TABLE I</tag>Terrain frequencies by training regimen</caption>
        <tabular class="ltx_centering ltx_guessed_headers" rowsep="1.0pt" vattach="middle">
          <thead>
            <tr>
              <td align="center" border="l rr t" thead="column row"><rule height="10.8pt" width="0.0pt"/> Terrain set</td>
              <td align="center" border="l rr t" colspan="5" thead="column">Standard</td>
              <td align="center" border="l r t" colspan="4" thead="column">Test Obstacles</td>
            </tr>
            <tr>
              <td align="center" border="l rr tt" thead="column row"><rule height="10.8pt" width="0.0pt"/> Terrain</td>
              <td align="center" border="r tt" thead="column">Flat</td>
              <td align="center" border="r tt" thead="column">Hills</td>
              <td align="center" border="r tt" thead="column">Ridges</td>
              <td align="center" border="r tt" thead="column">Blocks</td>
              <td align="center" border="rr tt" thead="column row">Stairs</td>
              <td align="center" border="r tt" thead="column">Slopes</td>
              <td align="center" border="r tt" thead="column">Drop-offs</td>
              <td align="center" border="r tt" thead="column">Bridges</td>
              <td align="center" border="r tt" thead="column">Gaps</td>
            </tr>
          </thead>
          <tbody>
            <tr>
              <td align="center" border="l rr tt" thead="row"><rule height="10.8pt" width="0.0pt"/> Standard</td>
              <td align="center" border="r tt">3%</td>
              <td align="center" border="r tt">7%</td>
              <td align="center" border="r tt">20%</td>
              <td align="center" border="r tt">35%</td>
              <td align="center" border="rr tt" thead="row">35%</td>
              <td align="center" border="r tt">0%</td>
              <td align="center" border="r tt">0%</td>
              <td align="center" border="r tt">0%</td>
              <td align="center" border="r tt">0%</td>
            </tr>
            <tr>
              <td align="center" border="l rr" thead="row">Multi-test</td>
              <td align="center" border="r">0%</td>
              <td align="center" border="r">0%</td>
              <td align="center" border="r">0%</td>
              <td align="center" border="r">0%</td>
              <td align="center" border="rr" thead="row">0%</td>
              <td align="center" border="r">25%</td>
              <td align="center" border="r">25%</td>
              <td align="center" border="r">25%</td>
              <td align="center" border="r">25%</td>
            </tr>
            <tr>
              <td align="center" border="l rr" thead="row">Combined</td>
              <td align="center" border="r">1.5%</td>
              <td align="center" border="r">3.5%</td>
              <td align="center" border="r">10%</td>
              <td align="center" border="r">18.5%</td>
              <td align="center" border="rr" thead="row">18.5%</td>
              <td align="center" border="r">12.5%</td>
              <td align="center" border="r">12.5%</td>
              <td align="center" border="r">12.5%</td>
              <td align="center" border="r">12.5%</td>
            </tr>
            <tr>
              <td align="center" border="l rr" thead="row">Single-test</td>
              <td align="center" border="r">0%</td>
              <td align="center" border="r">0%</td>
              <td align="center" border="r">0%</td>
              <td align="center" border="r">0%</td>
              <td align="center" border="rr" thead="row">0%</td>
              <td align="center" border="r">100%</td>
              <td align="center" border="r">0%</td>
              <td align="center" border="r">0%</td>
              <td align="center" border="r">0%</td>
            </tr>
            <tr>
              <td align="center" border="l rr" thead="row">Single-test</td>
              <td align="center" border="r">0%</td>
              <td align="center" border="r">0%</td>
              <td align="center" border="r">0%</td>
              <td align="center" border="r">0%</td>
              <td align="center" border="rr" thead="row">0%</td>
              <td align="center" border="r">0%</td>
              <td align="center" border="r">100%</td>
              <td align="center" border="r">0%</td>
              <td align="center" border="r">0%</td>
            </tr>
            <tr>
              <td align="center" border="l rr" thead="row">Single-test</td>
              <td align="center" border="r">0%</td>
              <td align="center" border="r">0%</td>
              <td align="center" border="r">0%</td>
              <td align="center" border="r">0%</td>
              <td align="center" border="rr" thead="row">0%</td>
              <td align="center" border="r">0%</td>
              <td align="center" border="r">0%</td>
              <td align="center" border="r">100%</td>
              <td align="center" border="r">0%</td>
            </tr>
            <tr>
              <td align="center" border="b l rr" thead="row">Single-test</td>
              <td align="center" border="b r">0%</td>
              <td align="center" border="b r">0%</td>
              <td align="center" border="b r">0%</td>
              <td align="center" border="b r">0%</td>
              <td align="center" border="b rr" thead="row">0%</td>
              <td align="center" border="b r">0%</td>
              <td align="center" border="b r">0%</td>
              <td align="center" border="b r">0%</td>
              <td align="center" border="b r">100%</td>
            </tr>
          </tbody>
        </tabular>
<!--  %**** root.tex Line 225 **** 
     %˝-->      </table>
    </subsection>
    <subsection inlist="toc" xml:id="S3.SS2">
      <tags>
        <tag>III-B</tag>
        <tag role="autoref">subsection III-B</tag>
        <tag role="refnum">III-B</tag>
        <tag role="typerefnum">§III-B</tag>
      </tags>
      <title><tag close=" ">III-B</tag><text font="italic">Testing Setup</text></title>
      <para xml:id="S3.SS2.p1">
        <p>Our controller evaluation mirrored tests with infants, so we used only the four obstacle terrains. Thus, terrains used in the obstacle training regimens are identical to those used in testing.</p>
      </para>
      <para xml:id="S3.SS2.p2">
        <p>Cassie began each test trial facing the obstacle (slope, drop-off, gap, or bridge) at a distance of 3-3.5m, with a lateral offset of 0-0.25m. Each obstacle had a difficulty parameter, ranging from 0-1, randomized in training, that linearly adjusts the relevant property for each terrain. Cassie received 50 trials at each of 101 difficulty levels, for a total of 5050 trials per obstacle. The downward angle of slopes (1.5m long) ranged from 0-90<ERROR class="undefined">\textdegree</ERROR>, the step-down height of drop-offs ranged from 0-1.5m, bridge width (1.5m long) ranged from 0.02-1.02m, and gap width ranged from 0-1m.</p>
      </para>
<!--  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
     %% Section: BEHAVIORAL ANALYSIS RESULTS
     %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%-->    </subsection>
  </section>
  <section inlist="toc" labels="LABEL:sec:behavioral_analysis_results" xml:id="S4">
    <tags>
      <tag>IV</tag>
      <tag role="autoref">section IV</tag>
      <tag role="refnum">IV</tag>
      <tag role="typerefnum">§IV</tag>
    </tags>
    <title><tag close=" ">IV</tag><text font="smallcaps">Behavioral Analysis Results</text></title>
<!--  %**** root.tex Line 250 **** -->    <figure inlist="lof" labels="LABEL:fig:megafigure" placement="!htb" xml:id="S4.F4">
      <tags>
        <tag>Fig. 4</tag>
        <tag role="autoref">Figure 4</tag>
        <tag role="refnum">4</tag>
        <tag role="typerefnum">Fig. 4</tag>
      </tags>
      <graphics candidates="figures/CassieFig_PaperSize_2024-06-05-1.png" class="ltx_centering" graphic="figures/CassieFig_PaperSize_2024-06-05-1.png" options="height=0.0pt" xml:id="S4.F4.g1"/>
      <toccaption class="ltx_centering"><tag close=" ">4</tag>Evaluation on slopes, drop-offs, gaps, and bridges. Each column shows behavioral results for each test obstacle across continual, systematic increase in difficulty. Curves show blind, standard, multi-test obstacle, combined standard and multi-test obstacle, and single-test obstacle training regimens. Top row: Success rates for navigating the obstacles. Second row: Average speed of walking on or over the obstacle. Third row: Average speed of last two steps prior to the obstacle. Bottom row: Placement of last step relative to the edge of the obstacle.</toccaption>
      <caption class="ltx_centering"><tag close=": ">Fig. 4</tag>Evaluation on slopes, drop-offs, gaps, and bridges. Each column shows behavioral results for each test obstacle across continual, systematic increase in difficulty. Curves show blind, standard, multi-test obstacle, combined standard and multi-test obstacle, and single-test obstacle training regimens. Top row: Success rates for navigating the obstacles. Second row: Average speed of walking on or over the obstacle. Third row: Average speed of last two steps prior to the obstacle. Bottom row: Placement of last step relative to the edge of the obstacle.</caption>
    </figure>
    <subsection inlist="toc" xml:id="S4.SS1">
      <tags>
        <tag>IV-A</tag>
        <tag role="autoref">subsection IV-A</tag>
        <tag role="refnum">IV-A</tag>
        <tag role="typerefnum">§IV-A</tag>
      </tags>
      <title><tag close=" ">IV-A</tag><text font="italic">Success Rate</text></title>
      <para xml:id="S4.SS1.p1">
        <p>Training improved Cassie’s success at navigating obstacles, but performance varied depending on the obstacle (Fig. <ref labelref="LABEL:fig:megafigure"/>, top row). Notably, the single-test obstacle regimens (green curves) ensured greater success on every obstacle compared with the other regimens. On slopes and drop-offs, every training regimen improved success relative to the blind controller (gray curves), and on the drop-offs, the multi-test and combined regimens improved performance relative to the standard regimen (red curve). However, on gaps and bridges, results were mixed. On gaps, the standard, multi-test, and combined regimens performed equivalently to the blind controller. And on bridges—even at the lowest difficulty level—blind, standard, multi-test, and combined regimens produced success rates of <Math mode="inline" tex="&lt;78\%" text="absent less 78percent" xml:id="S4.SS1.p1.m1">
            <XMath>
              <XMApp>
                <XMTok meaning="less-than" role="RELOP">&lt;</XMTok>
                <XMTok meaning="absent"/>
                <XMApp>
                  <XMTok meaning="percent" role="POSTFIX">%</XMTok>
                  <XMTok meaning="78" role="NUMBER">78</XMTok>
                </XMApp>
              </XMApp>
            </XMath>
          </Math>. Yet, training to the test in the single-test obstacle regimen demonstrates that the gap and bridge obstacles were learnable.</p>
      </para>
    </subsection>
    <subsection inlist="toc" xml:id="S4.SS2">
      <tags>
        <tag>IV-B</tag>
        <tag role="autoref">subsection IV-B</tag>
        <tag role="refnum">IV-B</tag>
        <tag role="typerefnum">§IV-B</tag>
      </tags>
      <title><tag close=" ">IV-B</tag><text font="italic">Gait Modifications Mid-Obstacle</text></title>
      <para xml:id="S4.SS2.p1">
        <p>Successful walking on more difficult obstacles was achieved in part by modifying gait after stepping on or over the obstacle (Fig. <ref labelref="LABEL:fig:megafigure"/>, second row)—that is, in the multiple steps on the slope or bridge and the single step to cross the drop-off or gap. On slopes, for example, the speed of the blind controller (gray curve) increased with difficulty as gravity pulled Cassie down steeper slopes; speed peaked at <Math mode="inline" tex="\sim" text="similar-to" xml:id="S4.SS2.p1.m1">
            <XMath>
              <XMTok meaning="similar-to" name="sim" role="RELOP">∼</XMTok>
            </XMath>
          </Math>30% difficulty and Cassie failed thereafter. The standard, multi-test, and combined training controllers also increased speed with difficulty, but speed peaked at <Math mode="inline" tex="\sim" text="similar-to" xml:id="S4.SS2.p1.m2">
            <XMath>
              <XMTok meaning="similar-to" name="sim" role="RELOP">∼</XMTok>
            </XMath>
          </Math>22% difficulty—before the peak of the blind controller—then decreased on steeper slopes as Cassie began using a braking strategy, resulting in success on steeper slopes than the blind controller could manage. The single-test training controller initially peaked even earlier at <Math mode="inline" tex="\sim" text="similar-to" xml:id="S4.SS2.p1.m3">
            <XMath>
              <XMTok meaning="similar-to" name="sim" role="RELOP">∼</XMTok>
            </XMath>
          </Math>11% difficulty, before implementing a braking strategy. The second peak for the single-test controller resulted from Cassie slipping down the slope, and speed decreased as Cassie began jumping down the slope at even more difficult increments.</p>
      </para>
      <para xml:id="S4.SS2.p2">
        <p>On drop-offs and gaps, speed increased with difficulty. For drop-offs, increased speed likely reflects effects of gravity pulling the body down, but for gaps, it likely reflects Cassie launching its body to span larger gaps. Consistent with the poor success rate on bridges, Cassie increased speed on narrower bridges—making narrower bridges more challenging.</p>
      </para>
      <para xml:id="S4.SS2.p3">
        <p>Although mid-obstacle gait modifications indicate that Cassie modified its gait to cope with more difficult obstacles, we cannot definitively categorize such adjustments as prospective. Increased speed to launch over a gap is produced before crossing (i.e., prospective), but decreased speed after stepping onto the slope could be in reaction to feeling the slant (i.e., reactive), and increased speed on the drop-off may be entirely out of Cassie’s control (i.e., neither prospective nor reactive). The best test of planning, therefore, is gait modifications <text font="italic">prior</text> to encountering the obstacle.</p>
      </para>
    </subsection>
    <subsection inlist="toc" labels="LABEL:sec:gait_modifications_prior_to_the_obstacle" xml:id="S4.SS3">
      <tags>
        <tag>IV-C</tag>
        <tag role="autoref">subsection IV-C</tag>
        <tag role="refnum">IV-C</tag>
        <tag role="typerefnum">§IV-C</tag>
      </tags>
      <title><tag close=" ">IV-C</tag><text font="italic">Gait Modifications Prior to the Obstacle</text></title>
<!--  %**** root.tex Line 275 **** -->      <para xml:id="S4.SS3.p1">
        <p>Cassie did not show compelling evidence of prospective speed adjustments prior to obstacles (Fig. <ref labelref="LABEL:fig:megafigure"/>, row 3). Speed and step length in the preceding two steps were constant or decreased only slightly across difficulty levels (e.g., from <Math mode="inline" tex="\sim" text="similar-to" xml:id="S4.SS3.p1.m1">
            <XMath>
              <XMTok meaning="similar-to" name="sim" role="RELOP">∼</XMTok>
            </XMath>
          </Math>0.83 m/s at 0 difficulty to <Math mode="inline" tex="\sim" text="similar-to" xml:id="S4.SS3.p1.m2">
            <XMath>
              <XMTok meaning="similar-to" name="sim" role="RELOP">∼</XMTok>
            </XMath>
          </Math>0.76 m/s at 50% difficulty).</p>
      </para>
      <para xml:id="S4.SS3.p2">
        <p>Cassie did, however, show evidence of prospective gait modifications based on foot placement (Fig. <ref labelref="LABEL:fig:megafigure"/>, row 4).
Cassie placed its last step prior to the obstacle closer to the edge as difficulty increased for the single-test regimen on slopes, drop-offs, and gaps, and for the standard, multi-test, and combined regimens on drop-offs and gaps. Placing the foot close to the edge is crucial, especially for drop-offs and gaps because it shortens the size of the step needed to cross. For drop-offs, the standard, multi-test, and combined regimen’s last step landed <Math mode="inline" tex="\sim" text="similar-to" xml:id="S4.SS3.p2.m1">
            <XMath>
              <XMTok meaning="similar-to" name="sim" role="RELOP">∼</XMTok>
            </XMath>
          </Math>0.15m from the edge at difficulty 0, but dropped to <Math mode="inline" tex="\sim" text="similar-to" xml:id="S4.SS3.p2.m2">
            <XMath>
              <XMTok meaning="similar-to" name="sim" role="RELOP">∼</XMTok>
            </XMath>
          </Math>0.08m on drop-offs at difficulty <Math mode="inline" tex="\sim" text="similar-to" xml:id="S4.SS3.p2.m3">
            <XMath>
              <XMTok meaning="similar-to" name="sim" role="RELOP">∼</XMTok>
            </XMath>
          </Math>25-100%.</p>
      </para>
<!--  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
     %% Section: DISCUSSION
     %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%-->    </subsection>
  </section>
  <section inlist="toc" labels="LABEL:sec:discussion" xml:id="S5">
    <tags>
      <tag>V</tag>
      <tag role="autoref">section V</tag>
      <tag role="refnum">V</tag>
      <tag role="typerefnum">§V</tag>
    </tags>
    <title><tag close=" ">V</tag><text font="smallcaps">Discussion</text></title>
    <para xml:id="S5.p1">
      <p>We applied experimental methods from research with human infants to the simulated bipedal robot Cassie. Systematic manipulation of Cassie’s training regimens and tests of performance outcomes revealed differential effects on learning based on a detailed characterization of behavior. Training specifically to the test (single-test obstacle regimen) resulted in higher success rates and more prospective gait modifications than training on a variety of non-test terrains (standard regimen), a variety of test obstacles (multi-test obstacle regimen), or a combination of non-test and test obstacles (combined regimen).</p>
    </para>
    <subsection inlist="toc" labels="LABEL:sec:effects_of_training" xml:id="S5.SS1">
      <tags>
        <tag>V-A</tag>
        <tag role="autoref">subsection V-A</tag>
        <tag role="refnum">V-A</tag>
        <tag role="typerefnum">§V-A</tag>
      </tags>
      <title><tag close=" ">V-A</tag><text font="italic">Effects of Training</text></title>
      <para xml:id="S5.SS1.p1">
        <p>Cassie showed markedly superior performance on each test obstacle when trained exclusively on that obstacle compared to the other training regimens. This indicates limited generalizability between these terrains, despite qualitative similarities. Hills are akin to slopes; ridges, blocks, and stairs are akin to drop-offs. Even the test obstacles approximate each other at some levels. The steepest slope is identical to the highest drop-off, and the widest gap approximates the gap around the bridge. The bridge obstacle is the most unique, requiring Cassie to not only handle the terrain in front of it, but to adjust navigation and guide foot placement to keep from falling off. We see from the poor single-test performance on bridges that it is also the most difficult. This may be explained somewhat by the reward function encouraging an uncompromising heading.</p>
      </para>
      <para xml:id="S5.SS1.p2">
        <p>Interestingly, the success rates are scarcely improved from the standard regimen to the multi-test and combined regimens where Cassie is exposed to the test obstacles. The single-test controllers were trained for 20k iterations on the obstacle they would be evaluated on, whereas the controllers trained on the standard, multi-test obstacle, and combined standard and multi-test regimens were trained for 110k iterations spread across various terrains. The multi-test regimen therefore received 27.5k iterations on each test obstacle, but still shows inferior performance to single-test. This indicates that the variety of experience is hindering Cassie’s ability to learn the best strategies for each obstacle or possibly that longer training runs or larger models are required.
<!--  %**** root.tex Line 300 **** --></p>
      </para>
      <para xml:id="S5.SS1.p3">
        <p>This interpretation is corroborated by the speeds mid-obstacle in Fig. <ref labelref="LABEL:fig:megafigure"/>, row 2. The single-test (green) curves stand apart from the others, indicating a different strategy was learned. This is most easily seen on slopes (column 1), where a braking behavior is adopted earliest (at a lower difficulty) by the single-test controller. Even more telling is the shift where it speeds up again, indicating a change in strategy not seen by the other controllers.</p>
      </para>
      <para xml:id="S5.SS1.p4">
        <p>As discussed in Section <ref labelref="LABEL:sec:gait_modifications_prior_to_the_obstacle"/>, Cassie does not modulate its speed preceding an obstacle, but it does adjust foot placement, as illustrated in the last two rows of Fig. <ref labelref="LABEL:fig:megafigure"/>. The more the speed deviates from the commanded speed the more reward is lost, so an inflexible reward function may account for the consistent speeds. However, the intentional foot placement is most clearly seen on the gap obstacle in the bottom row of Fig. <ref labelref="LABEL:fig:megafigure"/>. Curiously, the single-test step lengths are larger and more similar to the blind controller’s, whereas the other regimens produce shorter preceding footsteps, possibly indicating a reluctance to step off the ledge. This combined with their low success rate indicates that they haven’t discovered an effective strategy (stepping across the gap) and are falling back on optimized failure modes.</p>
      </para>
    </subsection>
    <subsection inlist="toc" xml:id="S5.SS2">
      <tags>
        <tag>V-B</tag>
        <tag role="autoref">subsection V-B</tag>
        <tag role="refnum">V-B</tag>
        <tag role="typerefnum">§V-B</tag>
      </tags>
      <title><tag close=" ">V-B</tag><text font="italic">Differences Between Babies and Robots</text></title>
      <para xml:id="S5.SS2.p1">
        <p>Cassie demonstrated motor skills more advanced than any infant—jumping down high drop-offs and recovering from near-catastrophic falls. But we tested simulated Cassie. Real Cassie—with a physical body—would have required repairs after such feats. Babies also must deal with the consequences of errors, but cannot be taken to the shop for repairs. Instead, their body is built to cope with frequent errors in learning to walk: infants are small, low to the ground, and move slowly, decreasing impact forces produced by a fall <cite class="ltx_citemacro_cite">[<bibref bibrefs="han2021impact" separator="," yyseparator=","/>]</cite>.</p>
      </para>
      <para xml:id="S5.SS2.p2">
        <p>Moreover, every baby learns things Cassie did not learn: Infants use a wide range of prospective gait modifications, invent and use alternative strategies for obstacles where modifications are insufficient, and avoid crossing impossible obstacles <cite class="ltx_citemacro_cite">[<bibref bibrefs="adolph2019annreview,adolph2018tics,hospodar2024wires" separator="," yyseparator=","/>]</cite>. Most critical, babies generalize learning. No infant experiences single-test training. To acquire behavioral flexibility and prospective control of locomotion, infants must generalize from everyday experiences. In this regard, any 18-month-old can run circles around Cassie. Infants’ greater flexibility and adaptability may result from more powerful learning mechanisms than pure RL. One hypothesis is that infants are “learning to learn”. That is, they learn to generate and gather the relevant information and use it to guide their actions from moment to moment.</p>
      </para>
<!--  %=============================================================================== -->    </subsection>
  </section>
  <section inlist="toc" labels="LABEL:sec:limitations" xml:id="S6">
    <tags>
      <tag>VI</tag>
      <tag role="autoref">section VI</tag>
      <tag role="refnum">VI</tag>
      <tag role="typerefnum">§VI</tag>
    </tags>
    <title><tag close=" ">VI</tag><text font="smallcaps">Limitations and Future Work</text></title>
    <para xml:id="S6.p1">
      <p>Our case study tested controller behavior only at a single point in training with a given regimen. Future work should investigate the development of learned behaviors at varied points in training to understand the trajectory of learning.</p>
    </para>
    <para xml:id="S6.p2">
      <p>In contrast to infants, Cassie did not prospectively modify speed while approaching obstacles. Apparently, infants’ natural training regimen teaches them to prospectively modify their speed to better cope with obstacles even as they are learning to walk. Decreased speed is useful for walking down steep slopes, high drop-offs, and narrow bridges, and increased speed is useful to leap over wide gaps. However, Cassie received an explicit, fixed speed command, and was encouraged to abide by it in the reward function. Thus, speed adjustments must yield greater improvements in the reward than the reward lost due to the speed error, or else those behaviors won’t be learned. Our fixed speed command may have precluded Cassie’s discovery of speed adjustment. To improve robot learning and to better understand infant walking, future work should consider relaxing the speed command to give the controller greater flexibility to modify gait and adopt novel strategies.</p>
    </para>
<!--  %**** root.tex Line 325 **** 
     %===============================================================================-->  </section>
  <section inlist="toc" labels="LABEL:sec:conclusion" xml:id="S7">
    <tags>
      <tag>VII</tag>
      <tag role="autoref">section VII</tag>
      <tag role="refnum">VII</tag>
      <tag role="typerefnum">§VII</tag>
    </tags>
    <title><tag close=" ">VII</tag><text font="smallcaps">Conclusions</text></title>
    <para xml:id="S7.p1">
      <p>The simulated bipedal robot Cassie learned to modify its gait via precise foot placement just-prior to the obstacle and modifying speed while walking on or stepping over the obstacle. However, systematic training and testing based on methods from developmental research with human infants revealed that every training regimen produced more limited generalization and less adaptive behavioral modifications than expected, but a greater ability to jump, launch, and recover balance after large-amplitude movements and near falls. Likely, babies beat bots because they are “learning to learn” rather than responding solely to rewards.</p>
    </para>
<!--  %=============================================================================== 
     %This command serves to balance the column lengths
     %on the last page of the document manually. It shortens
     %the textheight of the last page by a suitable amount.
     %This command does not take effect until the next page
     %so it should come on the page before the last. Make
     %sure that you do not shorten the textheight too much.
     %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     %**** root.tex Line 350 ****
     %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     %Generated by IEEEtran.bst, version: 1.14 (2015/08/26)-->  </section>
  <bibliography xml:id="bib">
    <title>References</title>
    <biblist>
      <bibitem key="adolph2019annreview" xml:id="bib.bib1">
        <tags>
          <tag>[1]</tag>
          <tag role="autoref">1</tag>
          <tag role="refnum">1</tag>
        </tags>
        <bibblock>
K. E. Adolph and J. E. Hoch, “Motor development: Embodied, embedded, enculturated, and enabling,” <emph font="italic">Annual review of psychology</emph>, vol. 70, pp. 141–164, 2019.
<!--  %**** root.bbl Line 25 **** --></bibblock>
      </bibitem>
      <bibitem key="adolph2021visualcliff" xml:id="bib.bib2">
        <tags>
          <tag>[2]</tag>
          <tag role="autoref">2</tag>
          <tag role="refnum">2</tag>
        </tags>
        <bibblock>
K. Adolph, B. Kaplan, and K. Kretch, “Infants on the edge: Beyond the visual cliff,” in <emph font="italic">Revisiting the classic studies: Developmental psychology</emph>.   Sage Publications, 2021.
</bibblock>
      </bibitem>
      <bibitem key="adolph2018tics" xml:id="bib.bib3">
        <tags>
          <tag>[3]</tag>
          <tag role="autoref">3</tag>
          <tag role="refnum">3</tag>
        </tags>
        <bibblock>
K. E. Adolph, J. E. Hoch, and W. G. Cole, “Development (of walking): 15 suggestions,” <emph font="italic">Trends in Cognitive Sciences</emph>, vol. 22, no. 8, pp. 699–711, 2018.
</bibblock>
      </bibitem>
      <bibitem key="hospodar2024wires" xml:id="bib.bib4">
        <tags>
          <tag>[4]</tag>
          <tag role="autoref">4</tag>
          <tag role="refnum">4</tag>
        </tags>
        <bibblock>
C. M. Hospodar and K. E. Adolph, “The development of gait and mobility: Form and function in infant locomotion,” <emph font="italic">Wiley Interdisciplinary Reviews: Cognitive Science</emph>, p. e1677, 2024.
</bibblock>
      </bibitem>
      <bibitem key="cole2016bouts" xml:id="bib.bib5">
        <tags>
          <tag>[5]</tag>
          <tag role="autoref">5</tag>
          <tag role="refnum">5</tag>
        </tags>
        <bibblock>
W. G. Cole, S. R. Robinson, and K. E. Adolph, “Bouts of steps: The organization of infant exploration,” <emph font="italic">Developmental psychobiology</emph>, vol. 58, no. 3, pp. 341–354, 2016.
</bibblock>
      </bibitem>
      <bibitem key="lee2018cost" xml:id="bib.bib6">
        <tags>
          <tag>[6]</tag>
          <tag role="autoref">6</tag>
          <tag role="refnum">6</tag>
        </tags>
        <bibblock>
D. K. Lee, W. G. Cole, L. Golenia, and K. E. Adolph, “The cost of simplifying complex developmental phenomena: A new perspective on learning to walk,” <emph font="italic">Developmental science</emph>, vol. 21, no. 4, p. e12615, 2018.
</bibblock>
      </bibitem>
      <bibitem key="hospodar2021practice" xml:id="bib.bib7">
        <tags>
          <tag>[7]</tag>
          <tag role="autoref">7</tag>
          <tag role="refnum">7</tag>
        </tags>
        <bibblock>
C. M. Hospodar, J. E. Hoch, D. K. Lee, P. E. Shrout, and K. E. Adolph, “Practice and proficiency: Factors that facilitate infant walking skill,” <emph font="italic">Developmental psychobiology</emph>, vol. 63, no. 7, p. e22187, 2021.
</bibblock>
      </bibitem>
      <bibitem key="adolph2012learntowalk" xml:id="bib.bib8">
        <tags>
          <tag>[8]</tag>
          <tag role="autoref">8</tag>
          <tag role="refnum">8</tag>
        </tags>
        <bibblock>
K. E. Adolph, W. G. Cole, M. Komati, J. S. Garciaguirre, D. Badaly, J. M. Lingeman, G. L. Chan, and R. B. Sotsky, “How do you learn to walk? thousands of steps and dozens of falls per day,” <emph font="italic">Psychological science</emph>, vol. 23, no. 11, pp. 1387–1394, 2012.
</bibblock>
      </bibitem>
      <bibitem key="adolph1997learning" xml:id="bib.bib9">
        <tags>
          <tag>[9]</tag>
          <tag role="autoref">9</tag>
          <tag role="refnum">9</tag>
        </tags>
        <bibblock>
K. E. Adolph, B. I. Bertenthal, S. M. Boker, E. C. Goldfield, and E. J. Gibson, “Learning in the development of infant locomotion,” <emph font="italic">Monographs of the society for research in child development</emph>, pp. i–162, 1997.
<!--  %**** root.bbl Line 50 **** --></bibblock>
      </bibitem>
      <bibitem key="gill2009change" xml:id="bib.bib10">
        <tags>
          <tag>[10]</tag>
          <tag role="autoref">10</tag>
          <tag role="refnum">10</tag>
        </tags>
        <bibblock>
S. V. Gill, K. E. Adolph, and B. Vereijken, “Change in action: How infants learn to walk down slopes,” <emph font="italic">Developmental science</emph>, vol. 12, no. 6, pp. 888–902, 2009.
</bibblock>
      </bibitem>
      <bibitem key="kretch2013cliff" xml:id="bib.bib11">
        <tags>
          <tag>[11]</tag>
          <tag role="autoref">11</tag>
          <tag role="refnum">11</tag>
        </tags>
        <bibblock>
K. S. Kretch and K. E. Adolph, “Cliff or step? posture-specific learning at the edge of a drop-off,” <emph font="italic">Child development</emph>, vol. 84, no. 1, pp. 226–240, 2013.
</bibblock>
      </bibitem>
      <bibitem key="kretch2013bridge" xml:id="bib.bib12">
        <tags>
          <tag>[12]</tag>
          <tag role="autoref">12</tag>
          <tag role="refnum">12</tag>
        </tags>
        <bibblock>
——, “No bridge too high: Infants decide whether to cross based on the probability of falling not the severity of the potential fall,” <emph font="italic">Developmental science</emph>, vol. 16, no. 3, pp. 336–351, 2013.
</bibblock>
      </bibitem>
      <bibitem key="kretch2017organization" xml:id="bib.bib13">
        <tags>
          <tag>[13]</tag>
          <tag role="autoref">13</tag>
          <tag role="refnum">13</tag>
        </tags>
        <bibblock>
——, “The organization of exploratory behaviors in infant locomotor planning,” <emph font="italic">Developmental science</emph>, vol. 20, no. 4, p. e12421, 2017.
</bibblock>
      </bibitem>
      <bibitem key="duan2023learning" xml:id="bib.bib14">
        <tags>
          <tag>[14]</tag>
          <tag role="autoref">14</tag>
          <tag role="refnum">14</tag>
        </tags>
        <bibblock>
H. Duan, B. Pandit, M. S. Gadde, B. van Marum, J. Dao, C. Kim, and A. Fern, “Learning vision-based bipedal locomotion for challenging terrain,” 2023.
</bibblock>
      </bibitem>
      <bibitem key="peng2017learning" xml:id="bib.bib15">
        <tags>
          <tag>[15]</tag>
          <tag role="autoref">15</tag>
          <tag role="refnum">15</tag>
        </tags>
        <bibblock>
X. B. Peng and M. van de Panne, “Learning locomotion skills using deeprl: Does the choice of action space matter?” in <emph font="italic">Proceedings of the ACM SIGGRAPH/Eurographics Symposium on Computer Animation</emph>.   ACM, 2017, p. 12.
</bibblock>
      </bibitem>
      <bibitem key="xie2018feedback" xml:id="bib.bib16">
        <tags>
          <tag>[16]</tag>
          <tag role="autoref">16</tag>
          <tag role="refnum">16</tag>
        </tags>
        <bibblock>
Z. Xie, G. Berseth, P. Clary, J. Hurst, and M. van de Panne, “Feedback control for cassie with deep reinforcement learning,” in <emph font="italic">2018 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)</emph>.   IEEE, 2018, pp. 1241–1246.
</bibblock>
      </bibitem>
      <bibitem key="tan2018AgileLocomotion" xml:id="bib.bib17">
        <tags>
          <tag>[17]</tag>
          <tag role="autoref">17</tag>
          <tag role="refnum">17</tag>
        </tags>
        <bibblock>
J. Tan, T. Zhang, E. Coumans, A. Iscen, Y. Bai, D. Hafner, S. Bohez, and V. Vanhoucke, “Sim-to-real: Learning agile locomotion for quadruped robots,” in <emph font="italic">Proc. of Robotics: Science and Systems XIV</emph>.   Pittsburgh, Pennsylvania: Robotics: Science and Systems Foundation, 6 2018. [Online]. Available: <ref class="ltx_url" font="typewriter" href="http://www.roboticsproceedings.org/rss14/p10.html">http://www.roboticsproceedings.org/rss14/p10.html</ref>
<!--  %**** root.bbl Line 75 **** --></bibblock>
      </bibitem>
      <bibitem key="hwangbo2019learning" xml:id="bib.bib18">
        <tags>
          <tag>[18]</tag>
          <tag role="autoref">18</tag>
          <tag role="refnum">18</tag>
        </tags>
        <bibblock>
J. Hwangbo, J. Lee, A. Dosovitskiy, D. Bellicoso, V. Tsounis, V. Koltun, and M. Hutter, “<ref class="ltx_href" href="https://robotics.sciencemag.org/content/4/26/eaau5872">Learning agile and dynamic motor skills for legged robots</ref>,” <emph font="italic">Science Robotics</emph>, vol. 4, no. 26, 2019. [Online]. Available: <ref class="ltx_url" font="typewriter" href="https://robotics.sciencemag.org/content/4/26/eaau5872">https://robotics.sciencemag.org/content/4/26/eaau5872</ref>
</bibblock>
      </bibitem>
      <bibitem key="TsounisDeepGait" xml:id="bib.bib19">
        <tags>
          <tag>[19]</tag>
          <tag role="autoref">19</tag>
          <tag role="refnum">19</tag>
        </tags>
        <bibblock>
V. Tsounis, M. Alge, J. Lee, F. Farshidian, and M. Hutter, “Deepgait: Planning and control of quadrupedal gaits using deep reinforcement learning,” <emph font="italic">IEEE Robotics and Automation Letters</emph>, vol. 5, no. 2, pp. 3699–3706, 2020.
</bibblock>
      </bibitem>
      <bibitem key="siekmann2020learning" xml:id="bib.bib20">
        <tags>
          <tag>[20]</tag>
          <tag role="autoref">20</tag>
          <tag role="refnum">20</tag>
        </tags>
        <bibblock>
J. Siekmann, S. Valluri, J. Dao, L. Bermillo, H. Duan, A. Fern, and J. Hurst, “<ref class="ltx_href" href="https://roboticsconference.org/2020/program/papers/31.html">Learning memory-based control for human-scale bipedal locomotion</ref>,” in <emph font="italic">Proceedings of Robotics: Science and Systems</emph>, 7 2020.
</bibblock>
      </bibitem>
      <bibitem key="siekmann2020simtoreal" xml:id="bib.bib21">
        <tags>
          <tag>[21]</tag>
          <tag role="autoref">21</tag>
          <tag role="refnum">21</tag>
        </tags>
        <bibblock>
J. Siekmann, Y. Godse, A. Fern, and J. Hurst, “<ref class="ltx_href" href="https://arxiv.org/abs/2011.01387">Sim-to-Real Learning of All Common Bipedal Gaits via Periodic Reward Composition</ref>,” in <emph font="italic">IEEE International Conference on Robotics and Automation (ICRA)</emph>, 2021.
</bibblock>
      </bibitem>
      <bibitem key="siekmann2021stairs" xml:id="bib.bib22">
        <tags>
          <tag>[22]</tag>
          <tag role="autoref">22</tag>
          <tag role="refnum">22</tag>
        </tags>
        <bibblock>
J. Siekmann, K. Green, J. Warila, A. Fern, and J. Hurst, “Blind Bipedal Stair Traversal via Sim-to-Real Reinforcement Learning,” in <emph font="italic">Proceedings of Robotics: Science and Systems</emph>, vol. abs/2105.08328, Virtual, 7 2021. [Online]. Available: <ref class="ltx_url" font="typewriter" href="https://arxiv.org/abs/2105.08328">https://arxiv.org/abs/2105.08328</ref>
</bibblock>
      </bibitem>
      <bibitem key="schulman2017proximal" xml:id="bib.bib23">
        <tags>
          <tag>[23]</tag>
          <tag role="autoref">23</tag>
          <tag role="refnum">23</tag>
        </tags>
        <bibblock>
J. Schulman, F. Wolski, P. Dhariwal, A. Radford, and O. Klimov, “<ref class="ltx_href" href="https://arxiv.org/abs/1707.06347">Proximal Policy Optimization Algorithms</ref>,” 2017. [Online]. Available: <ref class="ltx_url" font="typewriter" href="https://arxiv.org/abs/1707.06347">https://arxiv.org/abs/1707.06347</ref>
<!--  %**** root.bbl Line 100 **** --></bibblock>
      </bibitem>
      <bibitem key="todorov2012mujoco" xml:id="bib.bib24">
        <tags>
          <tag>[24]</tag>
          <tag role="autoref">24</tag>
          <tag role="refnum">24</tag>
        </tags>
        <bibblock>
E. Todorov, T. Erez, and Y. Tassa, “<ref class="ltx_href" href="https://ieeexplore.ieee.org/abstract/document/6386109">MuJoCo: A physics engine for model-based control</ref>,” in <emph font="italic">2012 IEEE/RSJ International Conference on Intelligent Robots and Systems</emph>.   IEEE, 2012, pp. 5026–5033.
</bibblock>
      </bibitem>
      <bibitem key="yu2022dynamic" xml:id="bib.bib25">
        <tags>
          <tag>[25]</tag>
          <tag role="autoref">25</tag>
          <tag role="refnum">25</tag>
        </tags>
        <bibblock>
F. Yu, R. Batke, J. Dao, J. Hurst, K. Green, and A. Fern, “Dynamic bipedal maneuvers through sim-to-real reinforcement learning,” 2022.
</bibblock>
      </bibitem>
      <bibitem key="dao2023simtoreal" xml:id="bib.bib26">
        <tags>
          <tag>[26]</tag>
          <tag role="autoref">26</tag>
          <tag role="refnum">26</tag>
        </tags>
        <bibblock>
J. Dao, H. Duan, and A. Fern, “Sim-to-real learning for humanoid box loco-manipulation,” 2023.
</bibblock>
      </bibitem>
      <bibitem key="crowley2023gaits" xml:id="bib.bib27">
        <tags>
          <tag>[27]</tag>
          <tag role="autoref">27</tag>
          <tag role="refnum">27</tag>
        </tags>
        <bibblock>
D. Crowley, J. Dao, H. Duan, K. Green, J. Hurst, and A. Fern, “Optimizing bipedal locomotion for the 100m dash with comparison to human running,” in <emph font="italic">2023 IEEE International Conference on Robotics and Automation (ICRA)</emph>, 2023, pp. 12 205–12 211.
</bibblock>
      </bibitem>
      <bibitem key="han2021impact" xml:id="bib.bib28">
        <tags>
          <tag>[28]</tag>
          <tag role="autoref">28</tag>
          <tag role="refnum">28</tag>
        </tags>
        <bibblock>
D. Han and K. E. Adolph, “The impact of errors in infant development: Falling like a baby,” <emph font="italic">Developmental science</emph>, vol. 24, no. 5, p. e13069, 2021.
</bibblock>
      </bibitem>
    </biblist>
  </bibliography>
<!--  %.bib --></document>
