<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>

<!-- ======================================================================= -->
<script src="http://www.google.com/jsapi" type="text/javascript"></script>
<script type="text/javascript">google.load("jquery", "1.3.2");</script>
<style type="text/css">
body {
font-family: "Titillium Web","HelveticaNeue-Light", "Helvetica Neue Light", "Helvetica Neue", Helvetica, Arial, "Lucida Grande", sans-serif;
font-weight:300;
font-size:18px;
margin-left: auto;
margin-right: auto;
width: 1100px;
}

h1 {
font-weight:300;
}

.disclaimerbox {
background-color: #eee;
border: 1px solid #eeeeee;
border-radius: 10px ;
-moz-border-radius: 10px ;
-webkit-border-radius: 10px ;
padding: 20px;
}

video.header-vid {
height: 140px;
border: 1px solid black;
border-radius: 10px ;
-moz-border-radius: 10px ;
-webkit-border-radius: 10px ;
}

img.header-img {
height: 140px;
border: 1px solid black;
border-radius: 10px ;
-moz-border-radius: 10px ;
-webkit-border-radius: 10px ;
}

img.rounded {
border: 1px solid #eeeeee;
border-radius: 10px ;
-moz-border-radius: 10px ;
-webkit-border-radius: 10px ;
}

a:link,a:visited
{
color: #1367a7;
text-decoration: none;
}
a:hover {
color: #208799;
}

td.dl-link {
height: 160px;
text-align: center;
font-size: 22px;
}

.layered-paper-big { /* modified from: http://css-tricks.com/snippets/css/layered-paper/ */
box-shadow:
0px 0px 1px 1px rgba(0,0,0,0.35), /* The top layer shadow */
5px 5px 0 0px #fff, /* The second layer */
5px 5px 1px 1px rgba(0,0,0,0.35), /* The second layer shadow */
10px 10px 0 0px #fff, /* The third layer */
10px 10px 1px 1px rgba(0,0,0,0.35), /* The third layer shadow */
15px 15px 0 0px #fff, /* The fourth layer */
15px 15px 1px 1px rgba(0,0,0,0.35), /* The fourth layer shadow */
20px 20px 0 0px #fff, /* The fifth layer */
20px 20px 1px 1px rgba(0,0,0,0.35), /* The fifth layer shadow */
25px 25px 0 0px #fff, /* The fifth layer */
25px 25px 1px 1px rgba(0,0,0,0.35); /* The fifth layer shadow */
margin-left: 10px;
margin-right: 45px;
}


.layered-paper { /* modified from: http://css-tricks.com/snippets/css/layered-paper/ */
box-shadow:
0px 0px 1px 1px rgba(0,0,0,0.35), /* The top layer shadow */
5px 5px 0 0px #fff, /* The second layer */
5px 5px 1px 1px rgba(0,0,0,0.35), /* The second layer shadow */
10px 10px 0 0px #fff, /* The third layer */
10px 10px 1px 1px rgba(0,0,0,0.35); /* The third layer shadow */
margin-top: 5px;
margin-left: 10px;
margin-right: 30px;
margin-bottom: 5px;
}

.vert-cent {
position: relative;
top: 50%;
transform: translateY(-50%);
}

hr
{
border: 0;
height: 1px;
background-image: linear-gradient(to right, rgba(0, 0, 0, 0), rgba(0, 0, 0, 0.75), rgba(0, 0, 0, 0));
}

#authors td {
padding-bottom:5px;
padding-top:30px;
}
</style>
<!-- ======================================================================= -->

<!-- Start : Google Analytics Code -->
<script async src="https://www.googletagmanager.com/gtag/js?id=UA-64069893-3"></script>
<script>
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());

gtag('config', 'UA-64069893-3');
</script>
<!-- End : Google Analytics Code -->

<script type="text/javascript" src="resources/hidebib.js"></script>
<link href='https://fonts.googleapis.com/css?family=Titillium+Web:400,600,400italic,600italic,300,300italic' rel='stylesheet' type='text/css'>
<head>
<meta charset="utf-8" />
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<link rel="icon" type="image/png" href="resources/seal_icon.png">
<title>Large-Scale Study of Curiosity-Driven Learning</title>
<meta name="HandheldFriendly" content="True" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<link rel="canonical" href="https://pathak22.github.io/large-scale-curiosity/" />
<meta name="referrer" content="no-referrer-when-downgrade" />

<meta property="og:site_name" content="Large-Scale Curiosity" />
<meta property="og:type" content="video.other" />
<meta property="og:title" content="Large-Scale Study of Curiosity-Driven Learning" />
<meta property="og:description" content="Burda*, Edwards*, Pathak* et.al. (* equal contribution, alphabetical order) Large-Scale Study of Curiosity-Driven Learning. 2018." />
<meta property="og:url" content="https://pathak22.github.io/large-scale-curiosity/" />
<meta property="og:image" content="https://pathak22.github.io/large-scale-curiosity/resources/teaser.jpg" />
<meta property="og:video" content="https://www.youtube.com/v/l1FqtAHfJLI" />

<meta property="article:publisher" content="http://people.eecs.berkeley.edu/~pathak/" />
<meta name="twitter:card" content="summary_large_image" />
<meta name="twitter:title" content="Large-Scale Study of Curiosity-Driven Learning" />
<meta name="twitter:description" content="Burda*, Edwards*, Pathak* et.al. (* equal contribution, alphabetical order) Large-Scale Study of Curiosity-Driven Learning. 2018." />
<meta name="twitter:url" content="https://pathak22.github.io/large-scale-curiosity/" />
<meta name="twitter:image" content="https://pathak22.github.io/large-scale-curiosity/resources/teaser.jpg" />
<meta name="twitter:label1" content="Written by" />
<meta name="twitter:data1" content="Deepak Pathak" />
<meta name="twitter:label2" content="Filed under" />
<meta name="twitter:data2" content="" />
<meta name="twitter:site" content="@pathak" />
<meta property="og:image:width" content="1600" />
<meta property="og:image:height" content="900" />

<script src="https://www.youtube.com/iframe_api"></script>
<meta name="twitter:card" content="player" />
<meta name="twitter:image" content="https://pathak22.github.io/large-scale-curiosity/resources/teaser.jpg" />
<meta name="twitter:player" content="https://www.youtube.com/embed/l1FqtAHfJLI?rel=0&showinfo=0" />
<meta name="twitter:player:width" content="640" />
<meta name="twitter:player:height" content="360" />
</head>

<body>
<br>
<center><span style="font-size:44px;font-weight:bold;">Large-Scale Study of<br/>Curiosity-Driven Learning</span></center><br/>
<table align=center width=800px cellpadding=0 cellspacing=0>
<tr>
<td align=center width=200px>
<center><span style="font-size:22px"><a href="https://sites.google.com/site/yburda/" target="_blank">Yuri Burda&nbsp&#42;</a></span></center></td>
<td align=center width=200px>
<center><span style="font-size:22px"><a href="https://github.com/harri-edwards/" target="_blank">Harri Edwards&nbsp&#42;</a></span></center></td>
<td align=center width=200px>
<center><span style="font-size:22px"><a href="https://people.eecs.berkeley.edu/~pathak/" target="_blank">Deepak Pathak&nbsp&#42;</a></span></center></td>
<tr/>
<tr>
<td align=center width=200px>
<center><span style="font-size:20px">OpenAI</span></center></td>
<td align=center width=200px>
<center><span style="font-size:20px">OpenAI</span></center></td>
<td align=center width=200px>
<center><span style="font-size:20px">UC Berkeley</span></center></td>
<tr/>
</table><br/>
<table align=center width=800px cellpadding=0 cellspacing=0>
<tr>
<td align=center width=200px>
<center><span style="font-size:22px"><a href="http://homepages.inf.ed.ac.uk/amos/" target="_blank">Amos Storkey</a></span></center></td>
<td align=center width=200px>
<center><span style="font-size:22px"><a href="https://people.eecs.berkeley.edu/~trevor/" target="_blank">Trevor Darrell</a></span></center></td>
<td align=center width=200px>
<center><span style="font-size:22px"><a href="https://people.eecs.berkeley.edu/~efros/" target="_blank">Alexei A. Efros</a></span></center></td>
<tr/>
<tr>
<td align=center width=200px>
<center><span style="font-size:20px">Univ. of Edinburgh</span></center></td>
<td align=center width=200px>
<center><span style="font-size:20px">UC Berkeley</span></center></td>
<td align=center width=200px>
<center><span style="font-size:20px">UC Berkeley</span></center></td>
<tr/>
</table>
<table align=center width=600px style="padding-top:20px;padding-bottom:20px">
<tr>
<td align=center width=600px><center><span style="font-size:20px">* alphabetical ordering, equal contribution</span></center></td>
<tr/>
</table>
<!-- <table align=center width=600px>
<tr>
<td align=center width=650px>
<center><span style="font-size:22px"><a href="" target="_blank">To be submitted</a></span></center></td>
<tr/>
</table> -->
<table align=center width=700px>
<tr>
<td align=center width=200px><center><span style="font-size:22px"><a href="resources/largeScaleCuriosity2018.pdf">[Download Paper]</a></span></center></td>
<td align=center width=200px><center><span style="font-size:22px"><a href='https://github.com/openai/large-scale-curiosity'>[GitHub Code]</a></span></center></td>
<tr/>
</table><br/>

<table align=center width=300px>
<tr><td align=center width=300px>
<iframe width="768" height="432" src="https://www.youtube.com/embed/l1FqtAHfJLI" frameborder="0" allowfullscreen></iframe>
</td></tr>
</table>
<br>

<div style="width:800px; margin:0 auto; text-align=center">
Reinforcement learning algorithms rely on carefully engineering environment rewards that are extrinsic to the agent. However, annotating each environment with hand-designed, dense rewards is not scalable, motivating the need for developing reward functions that are intrinsic to the agent. Curiosity is a type of intrinsic reward function which uses prediction error as reward signal.<br/><br>In this paper:<br/>
(a) We perform the first large-scale study of purely curiosity-driven learning, i.e. <i>without any extrinsic rewards</i>, across 54 standard benchmark environments, including the Atari game suite. Our results show surprisingly good performance, and a high degree of alignment between the intrinsic curiosity objective and the hand-designed extrinsic rewards of many game environments.<br/>
(b) We investigate the effect of using different feature spaces for computing prediction error and show that random features are sufficient for many popular RL game benchmarks, but learned features appear to generalize better (e.g. to novel game levels in Super Mario Bros.).<br/>
(c) We demonstrate limitations of the prediction-based rewards in stochastic setups.
</div>
<br><hr>

<center><h1>Curiosity-Driven Learning Without Extrinsic Rewards</h1></center>
<div style="width:800px; margin:0 auto; text-align=center">
A snapshot of the 54 environments investigated in the paper. We show that agents are able to makeprogress using no extrinsic reward, or end-of-episode signal, and only using curiosity.
</div><br/>
<center><span style="font-size:22px"><a href="resources/gameplay_videos.zip">[Click here to download game-play videos of all 54 environments]<br/>[no reward, only curiosity] [32MB]</a></span></center>
<table align=center width=1000px>
<p style="margin-top:4px;"></p>
<tr><td width=1000px>
<center><a href="resources/teaser.jpg"><img src = "resources/teaser.jpg" height="350px"></img></a><br></center>
</td></tr>
</table>
<br/><hr>

<center id="sourceCode"><h1>Source Code and Environment</h1></center>
<div style="width:800px; margin:0 auto; text-align=center">
We have released the TensorFlow based implementation on the github page. Try our code!
</div>
<table align=center width=900px>
<tr>
<!-- <p style="margin-top:4px;"></p> -->
<td width=300px align=center>
<span style="font-size:28px"><a href='https://github.com/openai/large-scale-curiosity'>[GitHub]</a></span>
</td>
</tr>
</table>
<br><hr>

<table align=center width=850px>
<center><h1>Paper and Bibtex</h1></center>
<tr>
<td width=250px align=left>
<!-- <p style="margin-top:4px;"></p> -->
<a href="resources/largeScaleCuriosity2018.pdf"><img style="height:150px" src="resources/thumbnail.jpeg"/></a>
<center>
<span style="font-size:20pt"><a href="resources/largeScaleCuriosity2018.pdf">[Paper]</a>&nbsp;
<span style="font-size:20pt"><a href="https://arxiv.org/abs/1808.04355">[ArXiv]</a>
</center>
</td>
<td width=50px align=center>
</td>
<td width=550px align=left>
<!-- <p style="margin-top:4px;"></p> -->
<p style="text-align:left;"><b><span style="font-size:20pt">Citation</span></b><br/><span style="font-size:6px;">&nbsp;<br/></span> <span style="font-size:15pt">Yuri Burda, Harri Edwards, Deepak Pathak,<br/>Amos Storkey, Trevor Darrell and Alexei A. Efros. <b>Large-Scale Study of Curiosity-Driven Learning<br/></b> In <i>arXiv:1808.04355</i> 2018.</span></p>
<!-- <p style="margin-top:20px;"></p> -->
<span style="font-size:20pt"><a shape="rect" href="javascript:togglebib('largeScaleCuriosity2018_bib')" class="togglebib">[Bibtex]</a></span>
</td>
</tr>
<tr>
<td width=250px align=left>
</td>
<td width=50px align=center>
</td>
<td width=550px align=left>
<div class="paper" id="largeScaleCuriosity2018_bib">
<pre xml:space="preserve">
@inproceedings{pathak18largescale,
Author = {Burda, Yuri and
Edwards, Harri and Pathak, Deepak and
Storkey, Amos and Darrell, Trevor and
Efros, Alexei A.},
Title = {Large-Scale Study of
Curiosity-Driven Learning},
Booktitle = {arXiv:1808.04355},
Year = {2018}
}</pre>
</div>
</td>
</tr>
</table>
<br><hr>

<center><h1>Related Work</h1></center>
<table align=center width=900px>
<tr>
<td width=300px align=center>
<span style="font-size:20px">Pathak, Agrawal, Efros, Darrell. Curiosity-driven Exploration by Self-supervised Prediction.<br/>In ICML 2017.<a href='https://pathak22.github.io/noreward-rl/' target="_blank">[website]</a></span>
</td>
</tr>
</table>
<br><hr>

<table align=center width=800px>
<tr><td width=800px><left>
<center><h1>Acknowledgements</h1></center>
We would like to thank Chris Lu for help in designing the Unity environments, Phillip Isola and Alex Nichols for feedback on an early draft of the paper. We are grateful to the members of BAIR and OpenAI for fruitful discussions and comments. DP is supported by the Facebook graduate fellowship.<br>
</left></td></tr>
</table>
<br><br>
<script xml:space="preserve" language="JavaScript">
hideallbibs();
</script>
</body>
</html>