<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
	xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
	xmlns:georss="http://www.georss.org/georss" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:media="http://search.yahoo.com/mrss/"
	>

<channel>
	<title>Andreas Gal</title>
	<atom:link href="http://andreasgal.com/feed/" rel="self" type="application/rss+xml" />
	<link>http://andreasgal.com</link>
	<description>Engineering the Open Web Platform.</description>
	<lastBuildDate>Sun, 01 Apr 2012 16:23:23 +0000</lastBuildDate>
	<language>en</language>
	<sy:updatePeriod>hourly</sy:updatePeriod>
	<sy:updateFrequency>1</sy:updateFrequency>
	<generator>http://wordpress.com/</generator>
<cloud domain='andreasgal.com' port='80' path='/?rsscloud=notify' registerProcedure='' protocol='http-post' />
<image>
		<url>http://s2.wp.com/i/buttonw-com.png</url>
		<title>Andreas Gal</title>
		<link>http://andreasgal.com</link>
	</image>
	<atom:link rel="search" type="application/opensearchdescription+xml" href="http://andreasgal.com/osd.xml" title="Andreas Gal" />
	<atom:link rel='hub' href='http://andreasgal.com/?pushpress=hub'/>
		<item>
		<title>Boot 2 Gecko @ JSConf US 2012</title>
		<link>http://andreasgal.com/2012/04/01/boot-2-gecko-jsconf-us-2012/</link>
		<comments>http://andreasgal.com/2012/04/01/boot-2-gecko-jsconf-us-2012/#comments</comments>
		<pubDate>Sun, 01 Apr 2012 10:35:46 +0000</pubDate>
		<dc:creator>Andreas</dc:creator>
				<category><![CDATA[Mozilla]]></category>

		<guid isPermaLink="false">http://andreasgal.com/?p=238</guid>
		<description><![CDATA[The Boot 2 Gecko team is attending JSConf US 2012, including B2G&#8217;s tech lead Chris Jones and the Gaia (Phone UX) lead Vivien Nicolas. We have a bunch of devices with us, so flag us down if you have any &#8230; <a href="http://andreasgal.com/2012/04/01/boot-2-gecko-jsconf-us-2012/">Continue reading <span class="meta-nav">&#8594;</span></a><img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=andreasgal.com&amp;blog=891661&amp;post=238&amp;subd=andreasgal&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>The <a href="https://wiki.mozilla.org/B2G" target="_blank">Boot 2 Gecko</a> team is attending <a href="http://2012.jsconf.us/" target="_blank">JSConf US 2012</a>, including B2G&#8217;s tech lead <a href="http://blog.mozilla.com/cjones/" target="_blank">Chris Jones</a> and the Gaia (Phone UX) lead <a href="http://www.flickr.com/photos/paulrouget/3677602237/" target="_blank">Vivien Nicolas</a>. We have a bunch of devices with us, so flag us down if you have any questions or would like to play with a B2G phone.</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/andreasgal.wordpress.com/238/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/andreasgal.wordpress.com/238/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/andreasgal.wordpress.com/238/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/andreasgal.wordpress.com/238/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/andreasgal.wordpress.com/238/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/andreasgal.wordpress.com/238/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/andreasgal.wordpress.com/238/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/andreasgal.wordpress.com/238/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/andreasgal.wordpress.com/238/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/andreasgal.wordpress.com/238/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/andreasgal.wordpress.com/238/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/andreasgal.wordpress.com/238/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/andreasgal.wordpress.com/238/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/andreasgal.wordpress.com/238/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=andreasgal.com&amp;blog=891661&amp;post=238&amp;subd=andreasgal&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://andreasgal.com/2012/04/01/boot-2-gecko-jsconf-us-2012/feed/</wfw:commentRss>
		<slash:comments>1</slash:comments>
	
		<media:content url="" medium="image">
			<media:title type="html">andreasgal</media:title>
		</media:content>
	</item>
		<item>
		<title>Mozilla Project vs Mozilla Corporation</title>
		<link>http://andreasgal.com/2012/03/12/mozilla-project-vs-mozilla-corporation/</link>
		<comments>http://andreasgal.com/2012/03/12/mozilla-project-vs-mozilla-corporation/#comments</comments>
		<pubDate>Tue, 13 Mar 2012 05:38:27 +0000</pubDate>
		<dc:creator>Andreas</dc:creator>
				<category><![CDATA[Mozilla]]></category>

		<guid isPermaLink="false">http://andreasgal.com/?p=234</guid>
		<description><![CDATA[We went through a lengthy thread on hardware-decoding and H.264 on dev-platform today. It was heated but mostly civil. We are all pretty passionate about open standards, so an intense debate was to be expected. One issue I keep running &#8230; <a href="http://andreasgal.com/2012/03/12/mozilla-project-vs-mozilla-corporation/">Continue reading <span class="meta-nav">&#8594;</span></a><img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=andreasgal.com&amp;blog=891661&amp;post=234&amp;subd=andreasgal&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>We went through a lengthy thread on hardware-decoding and H.264 on dev-platform today. It was heated but mostly civil. We are all pretty passionate about open standards, so an intense debate was to be expected. One issue I keep running into whenever I take some idea to a public forum like dev-platform is my standing with the Mozilla Project and the Mozilla Corporation. I am the Director of Research for the Mozilla Corporation. I can speak with some authority about research stuff. I am also a contributor to the Mozilla Project. I own a couple modules, and I am a peer to another few. When I posted to dev-platform today about codecs, I was wearing my Mozilla Project contributor hat. Me being a director at the Mozilla Corporation buys me exactly zero standing and authority with the Mozilla Project. The Mozilla Project has a governance structure. Peers and module owners make decisions. Since we are talking about codecs here, and I am neither a peer nor a module owner, I have as much authority as any random contributor on whether we take the course I plotted in my email or not. I am absolutely convinced I am right, but I still don&#8217;t get to make the call. The decision is with the module owners. So next time you read something I write, keep in mind: I am allowed to have a lot of opinions, but I have actually very little authority over the Mozilla Project. The authority rests with a large experienced group of module owners, not any one individual, even less so someone hired for a job (like a director). Instead, module ownership is based on merit and contribution to the project. And that&#8217;s why Mozilla rocks.</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/andreasgal.wordpress.com/234/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/andreasgal.wordpress.com/234/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/andreasgal.wordpress.com/234/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/andreasgal.wordpress.com/234/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/andreasgal.wordpress.com/234/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/andreasgal.wordpress.com/234/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/andreasgal.wordpress.com/234/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/andreasgal.wordpress.com/234/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/andreasgal.wordpress.com/234/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/andreasgal.wordpress.com/234/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/andreasgal.wordpress.com/234/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/andreasgal.wordpress.com/234/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/andreasgal.wordpress.com/234/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/andreasgal.wordpress.com/234/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=andreasgal.com&amp;blog=891661&amp;post=234&amp;subd=andreasgal&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://andreasgal.com/2012/03/12/mozilla-project-vs-mozilla-corporation/feed/</wfw:commentRss>
		<slash:comments>2</slash:comments>
	
		<media:content url="" medium="image">
			<media:title type="html">andreasgal</media:title>
		</media:content>
	</item>
		<item>
		<title>Boot 2 Gecko at Mobile World Congress 2012</title>
		<link>http://andreasgal.com/2012/02/21/boot-2-gecko-at-mobile-world-congress-2012/</link>
		<comments>http://andreasgal.com/2012/02/21/boot-2-gecko-at-mobile-world-congress-2012/#comments</comments>
		<pubDate>Wed, 22 Feb 2012 07:39:44 +0000</pubDate>
		<dc:creator>Andreas</dc:creator>
				<category><![CDATA[Mozilla]]></category>

		<guid isPermaLink="false">http://andreasgal.com/?p=222</guid>
		<description><![CDATA[Mozilla and the Boot 2 Gecko team are attending Mobile World Congress 2012. Visit us at our booth (7B96) in Hall 7.<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=andreasgal.com&amp;blog=891661&amp;post=222&amp;subd=andreasgal&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>Mozilla and the Boot 2 Gecko team are attending Mobile World Congress 2012. Visit us at our booth (7B96) in Hall 7.</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/andreasgal.wordpress.com/222/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/andreasgal.wordpress.com/222/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/andreasgal.wordpress.com/222/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/andreasgal.wordpress.com/222/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/andreasgal.wordpress.com/222/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/andreasgal.wordpress.com/222/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/andreasgal.wordpress.com/222/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/andreasgal.wordpress.com/222/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/andreasgal.wordpress.com/222/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/andreasgal.wordpress.com/222/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/andreasgal.wordpress.com/222/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/andreasgal.wordpress.com/222/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/andreasgal.wordpress.com/222/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/andreasgal.wordpress.com/222/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=andreasgal.com&amp;blog=891661&amp;post=222&amp;subd=andreasgal&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://andreasgal.com/2012/02/21/boot-2-gecko-at-mobile-world-congress-2012/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="" medium="image">
			<media:title type="html">andreasgal</media:title>
		</media:content>
	</item>
		<item>
		<title>pdf.js: Rendering PDF with HTML5 and JavaScript</title>
		<link>http://andreasgal.com/2011/06/15/pdf-js/</link>
		<comments>http://andreasgal.com/2011/06/15/pdf-js/#comments</comments>
		<pubDate>Thu, 16 Jun 2011 02:21:22 +0000</pubDate>
		<dc:creator>Andreas</dc:creator>
				<category><![CDATA[Mozilla]]></category>

		<guid isPermaLink="false">http://andreasgal.com/?p=206</guid>
		<description><![CDATA[Update: I updated the links again. pdf.js has moved to a new location on github. Why? While traveling to the Firefox 4 launch parties in Seoul and Taipei all the way from California, we killed a lot of time by &#8230; <a href="http://andreasgal.com/2011/06/15/pdf-js/">Continue reading <span class="meta-nav">&#8594;</span></a><img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=andreasgal.com&amp;blog=891661&amp;post=206&amp;subd=andreasgal&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p><strong>Update:</strong> I updated the links again. pdf.js has moved to a new location on github.</p>
<h2>Why?</h2>
<p>While traveling to the Firefox 4 launch parties in Seoul and Taipei all the way from California, we killed a lot of time by brainstorming cool things to do with the web platform. Like many before us, we were wondering why nobody had implemented a PDF reader in HTML5/JavaScript. The kinds of operations a PDF reader needs to be fast at &#8211;render text, draw lines, blit images&#8211; need to be fast in browsers too, so browsers are already highly optimized for them.</p>
<p>Building an HTML5-based PDF renderer would also answer the question of whether the web platform and in particular canvas and SVG APIs are complete enough to efficiently and faithfully render PDFs.</p>
<p>Displaying PDFs directly in the browser would definitely improve the user&#8217;s experience. There are literally millions (billions?) of PDFs floating around the web, and on many devices loading PDFs switches to a different application (e.g. Preview on OS X and PDF View on Android). Also, external PDF readers and many plugins don&#8217;t support important PDF features well, including content links and fetch-as-you-go (HTTP range requests).</p>
<p>External readers and plugins are also forced to reinvent their own user interaction paradigms, meaning for example that users might scroll HTML pages in one way with one set of heuristics in the browser, but a totally different way in an external PDF reader.</p>
<p>It&#8217;s important to note that we&#8217;re not trying to promote PDF to a first-class web citizen like HTML5 is. Instead we hope that a browser-<a href="http://arewenativeyet.com/">native</a> PDF renderer written on the web platform allows web technologies to subsume PDF.</p>
<h2>Benefits</h2>
<p>The traditional approach to rendering PDFs in a browser is to use a native-code plugin, either Adobe&#8217;s own PDF Reader or other commercial renderers, or some open source alternative (e.g. poppler). From a security perspective, this enlarges the trusted code base, and because of that Google&#8217;s Chrome browser goes through quite some pain to sandbox the PDF renderer to avoid code injection attacks. An HTML5-based implementation is completely immune to this class of problems.</p>
<h2>Project Status</h2>
<p>We have been developing pdf.js in the open (on <a href="http://github.com/">github.com</a>), albeit quietly, for about a month now. We were waiting on the completion of some major features (Type1 fonts, gradients, etc.) before communicating <a href="https://github.com/mozilla/pdf.js">pdf.js</a> more broadly. We&#8217;ve been taken by surprise by the early and intense interest in our work, so we decided to blog and talk about our project earlier than we initially planned.</p>
<p>As part of our project plan, we are initially focused on achieveing pixel-perfect rendering of a single PDF paper, a <a href="http://people.mozilla.org/~gal/compressed.tracemonkey-pldi-09.pdf">2009 paper on Trace Compilation we submitted to the ACM SIGPLAN PLDI conference</a>. As the Tracemonkey work described in the paper led the way for JavaScript JITs, so we hope pdf.js opens the door to implementing legacy formats on top of the web platform.</p>
<p>If you want to see a demo of <a href="https://github.com/mozilla/pdf.js">pdf.js</a>, click on this <a href="http://mozilla.github.com/pdf.js/web/viewer.html">link</a>. There are still glitches and rendering artifacts, but you will get the picture. We are still missing Type1 PostScript fonts, which Vivien Nicolas is working on.</p>
<p>Along the way, we had to add some new interfaces to the HTML5 canvas element, and figure out how to implement some difficult features of the PDF spec in JavaScript. See Chris&#8217;s <a href="http://blog.mozilla.com/cjones/2011/06/15/overview-of-pdf-js-guts/">post</a> for a general technological overview, and Shaon&#8217;s <a href="https://sbarman.wordpress.com/">post</a> for details on rendering &#8220;shading patterns&#8221;.</p>
<h2>Whats next?</h2>
<p>We intend to use <a href="https://github.com/mozilla/pdf.js">pdf.js</a> to render PDFs &#8220;natively&#8221;, within Firefox itself. Our most immediate goal is to implement the most commonly used PDF features so we can render a large majority of the PDFs found on the web. We believe we can reach that point in less than 3 months (the entire code so far is less than one month old, and it already renders a large set of PDF features).</p>
<p>Initially we will make a Firefox extension available to interested users that enables inline PDF rendering using <a href="https://github.com/mozilla/pdf.js">pdf.js</a>, but our ultimate goal is of course shipping <a href="https://github.com/mozilla/pdf.js">pdf.js</a> with Firefox. This will result in a substantial usability but also security improvement for our users. <a href="https://github.com/mozilla/pdf.js">pdf.js</a> uses only safe web languages and doesn&#8217;t contain any native code pieces attackers could exploit.</p>
<h2>Open Source</h2>
<p>We want <a href="https://github.com/mozilla/pdf.js">pdf.js</a> to be a community driven and governed open-source project. We&#8217;ll use it for Firefox, but we think there are many cool applications for it. We would love to see it embedded in other browsers or web applications; because it&#8217;s written only in standards-compliant web technologies, the code will run in any compliant browser. We are licensing pdf.js under a very liberal 3-clause BSD license and we welcome external contributors. We are looking forward to your ideas or code to make pdf.js better! Take a look at our <a href="https://github.com/mozilla/pdf.js">github</a> and <a href="https://wiki.mozilla.org/PDF.js">our wiki</a>, or talk to us on IRC in <a href="irc://irc.mozilla.org/pdfjs">#pdfjs</a>.</p>
<p>Chris Jones and Andreas Gal (and the pdf.js team)</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/andreasgal.wordpress.com/206/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/andreasgal.wordpress.com/206/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/andreasgal.wordpress.com/206/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/andreasgal.wordpress.com/206/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/andreasgal.wordpress.com/206/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/andreasgal.wordpress.com/206/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/andreasgal.wordpress.com/206/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/andreasgal.wordpress.com/206/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/andreasgal.wordpress.com/206/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/andreasgal.wordpress.com/206/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/andreasgal.wordpress.com/206/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/andreasgal.wordpress.com/206/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/andreasgal.wordpress.com/206/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/andreasgal.wordpress.com/206/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=andreasgal.com&amp;blog=891661&amp;post=206&amp;subd=andreasgal&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://andreasgal.com/2011/06/15/pdf-js/feed/</wfw:commentRss>
		<slash:comments>198</slash:comments>
	
		<media:content url="" medium="image">
			<media:title type="html">andreasgal</media:title>
		</media:content>
	</item>
		<item>
		<title>User data in the cloud: Lessons from the Sony debacle</title>
		<link>http://andreasgal.com/2011/05/02/user-data/</link>
		<comments>http://andreasgal.com/2011/05/02/user-data/#comments</comments>
		<pubDate>Mon, 02 May 2011 07:07:53 +0000</pubDate>
		<dc:creator>Andreas</dc:creator>
				<category><![CDATA[Mozilla]]></category>
		<category><![CDATA[Research]]></category>

		<guid isPermaLink="false">http://andreasgal.wordpress.com/?p=169</guid>
		<description><![CDATA[Two weeks ago our friends at Sony managed to get personal information of 70 million users stolen from them. I got one of the notification emails a couple days ago myself (I must have signed up for the Playstation Network &#8230; <a href="http://andreasgal.com/2011/05/02/user-data/">Continue reading <span class="meta-nav">&#8594;</span></a><img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=andreasgal.com&amp;blog=891661&amp;post=169&amp;subd=andreasgal&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p><a href="http://andreasgal.files.wordpress.com/2011/04/safety.jpg"><img class="size-full wp-image-170 alignright" title="Don't try to lift more than you are able" src="http://andreasgal.files.wordpress.com/2011/04/safety.jpg?w=640" alt=""   /></a>Two weeks ago our friends at Sony managed to get personal information of 70 million users <a title="Sony Playstation network breach" href="http://www.unwiredview.com/2011/05/02/sony-execs-hang-heads-in-shame-over-playstation-network-breach/">stolen from them</a>. I got one of the notification emails a couple days ago myself (I must have signed up for the Playstation Network when I installed the PS3 we bought to do our Cell VM work back at UCI.) In this instance, Sony is a shining example of how <em><strong>NOT</strong></em> to handle user data.</p>
<h2>Whats user data?</h2>
<p>User data is any piece of identifiable data about a user. It can be all sorts of obvious stuff like your name, address, birth date, passwords (all of these Sony managed to lose), but also less obvious data such as usage history, communications, and what not. Whether Sony lost the latter is not clear. Some of the information Sony lost clearly should never have been stored in the first place. I understand why Sony asked me for my birth date when I signed up for PSN. Some jurisdictions want you to be a certain age before you can engage in virtual mayhem and violence that is modern video gaming. But why the hell did Sony store this information in a database? Why not just flag my account as &#8220;remembers the first techno song being played on the radio, definitely old enough&#8221;?</p>
<h2>Risks</h2>
<p>Storing data is always risky. If you store any data in the cloud, eventually someone will break into it. Investing a lot of money (expensive equipment, expensive practices, expensive staff) helps delaying that day of reconning, but only within limits. There are a lot of financial incentives to steal this kinda of data. Personal information of 70 million people is a fantastic starting point for all sorts of phishing attacks. And even if only one out of 10,000 people getting that Nigerian email falls for it, that&#8217;s still plenty of people to take advantage of. Sony took a risk storing user data. Unfortunately, it was not a well calculated risk. They could have easily reduces the fallout from a breach by storing less information, i.e. by not storing the birth date, or maybe even not storing personal information at all! Shocking proposition, I know.</p>
<h2>Not knowing is bliss</h2>
<p>Why does Sony need the names of its PSN subscribers in the PSN user database to begin with? Let people choose a handle and a password. If you want to personal the experience, let users <em>chose</em> their name. Dear PSN, you may call me Tracemonkey now. You really don&#8217;t need to know my real name and address. As for payment information, I think its ok if PSN asks me once a year to re-enter my credit card info, which is then briefly processed but never stored. Had they followed this simple principle of touching (and storing) as little user data as possible, they would have saved themselves a lot of legal trouble and liability.</p>
<h2>Browsers</h2>
<p>I am not ranting about this topic out of thin air. Web browsers handle a lot of personal information, and its tempting for browser vendors to get in on the whole social networks online transactions online identity game. A number of people at Mozilla don&#8217;t seem too comfortable with hosting on our infrastructure any user data ever. Its really scary and risky after all (just ask Sony). I think that&#8217;s wrong. We absolutely should get into the key areas of social and identity. Why? Because the state of the art is crappy, and we can do better.</p>
<h2>Microsoft Passport anyone?</h2>
<p>Web identity is a total mess. I have at least 30 accounts in various places with different account names and passwords (well at least I try). Various organizations and services have tried to established a single login. Microsoft Passport was one of the earlier ones. I am really glad that didn&#8217;t work out. Can you imagine the evil empire owning all your personal data and online identity? Microsoft has a lot of incentives to use and abuse such a powerful position, and it certainly does. I still get emails from Microsoft about my Passport account on a regular basis, almost a decade later. Its usually an invitation to try this new Microsoft feature Y or maybe try chatting with my passport account or &#8230; well whatever. Microsoft sits on a lot of data, and its tempting to monetize it. And of course its not just them. Everyone else is just as bad. Ever noticed how Google and Facebook customize ads for you based on the data they have about you? Creepy. This is why I think Mozilla can do a lot better. We don&#8217;t have any hidden agenda. We don&#8217;t have any extra services we want to sell you. We don&#8217;t have to monetize data we store to turn a profit and make shareholders happy. We simply don&#8217;t have any shareholders. We are a company owned by a non-profit foundation that wants to make the web a better place. This puts us in a much better position to do whats best for our users, instead of whats best for our quarterly statement (we don&#8217;t publish any of those in case you didn&#8217;t notice.)</p>
<h2>Playing it safe</h2>
<p>We are currently in the process of figuring out how exactly Mozilla should handle user data. I have exactly zero authority when it comes to these kind of decisions, but Mozilla is a pretty open and democratic place and we tend to discuss this stuff pretty openly, giving anyone a voice who wants to speak up. I think its imperative that we follow a couple guiding principles as we explore ways to better serve our users using services such as identity or social:</p>
<ol>
<li><strong>Always keep the users&#8217; best interest in mind (and only the users&#8217; best interests).</strong> I don&#8217;t care if we can ship a feature faster or cheaper if we store more user data (or maybe store it not encrypted instead of encrypted). Our new Sync service is a great example for this. Its a total pain in the butt to encrypt the browser history on the client before it is uploaded to our services, but its the right thing to do. It means that in case of Sync we can never see any of your browser history, even if we tried, and your data is safe by default.</li>
<li><strong>Always store as little data as possible in the cloud.</strong> If there is a way to implement a feature completely in the client without us ever having to see user data, that&#8217;s always the right approach, even if its harder. This is exactly the issue we are facing with our new F1 social browsing feature. It allows you to share websites on Facebook/Twitter/etc as you visit them. Its a really cool feature&#8211;I use it all the time. Unfortunately, the protocol Facebook/Twitter/etc offer to authenticate and access their APIs (<em>oauth</em>) is totally broken, and conceptually doesn&#8217;t really work for client applications. oauth requires the client (Firefox/F1) and Facebook/Twitter/etc to negotiate a shared secret (called the consumer key). With a pure client solution this secret can never be kept (someone could peek into Firefox/F1 and extract the key). It seems Facebook has blacklisted consumer keys before because people checked them into open-source repositories. The only alternative to this is to put the key behind a service Mozilla runs and then let Firefox/F1 post via that service, but that means we would be able to see (in theory, not intentionally of course) all the Facebook/Twitter/etc status updates of millions of people every day. That&#8217;s wrong. As tempting and quick as it would be to setup a Mozilla service that keeps the key hidden and posts for users, we should never put ourselves in a position where we handle user data without an overwhelming need for it. In this particular instance we should simply negotiate with Facebook/Twitter/etc to not enforce the shared secret rule (Twitter already doesn&#8217;t it seems, since there are so many Twitter client apps out there), and maybe in parallel we should work on better protocols than <em>oauth</em>.</li>
</ol>
<h2>Going fast</h2>
<p>As we were discussing these various architectural aspects of how to handle user data (or how not to handle it) the last few weeks, some people were tempted to go the easy route and store a lot more user data (in particular in the clear) than necessary because it might get us to market faster. I think this is wrong for the above two principles, but its also wrong because it will <em><strong>NOT</strong></em> get us to market faster. Dealing with user data from an infrastructure perspective is a total pain. To handle or even store things like Facebook/Twitter/etc account authentication tokens or user contacts, we have to build out a serious security infrastructure. We need to hire expensive, highly trained personnel and we have to seriously tighten our security practices. That doesn&#8217;t mean we are unsafe right now. It just means our current practices match our current threat scenarios. For example we have external IT administrators who don&#8217;t even work for Mozilla administering our source code repository access controls. They are simply Mozilla project volunteers. Considering the limited risks, this is acceptable. When it comes to storing user data, entirely different standards will be needed. And getting all that sorted out and implemented will require a lot of audits, careful planning &#8230; and a lot of time. So if you want to go fast, go zero user data. Or encrypt the user data on the client so all we get to see are blobs of meaningless zeros and ones. That&#8217;s how Sync works, and we got it up and running within a couple months. That&#8217;s how you go fast.</p>
<h2>What&#8217;s next?</h2>
<p>We are currently discussing what our process will be to store user data. Expect people with actual authority to make decisions (and to talk about them) to start talking about this publicly in a few weeks. I already know that the result of our internal deliberations will be a policy that will focus on what&#8217;s best for our users, and that will minimize risks for them (and in the end, for us). And expect a safe and secure implementation of F1 to show up in your browser really soon. You can already <a title="F1 sharing" href="http://f1.mozillamessaging.com/">try out the prototype now</a>. It really rocks.</p>
<p><strong>This blog post represents my personal opinion, not the official position of Mozilla.</strong></p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/andreasgal.wordpress.com/169/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/andreasgal.wordpress.com/169/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/andreasgal.wordpress.com/169/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/andreasgal.wordpress.com/169/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/andreasgal.wordpress.com/169/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/andreasgal.wordpress.com/169/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/andreasgal.wordpress.com/169/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/andreasgal.wordpress.com/169/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/andreasgal.wordpress.com/169/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/andreasgal.wordpress.com/169/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/andreasgal.wordpress.com/169/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/andreasgal.wordpress.com/169/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/andreasgal.wordpress.com/169/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/andreasgal.wordpress.com/169/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=andreasgal.com&amp;blog=891661&amp;post=169&amp;subd=andreasgal&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://andreasgal.com/2011/05/02/user-data/feed/</wfw:commentRss>
		<slash:comments>3</slash:comments>
	
		<media:content url="" medium="image">
			<media:title type="html">andreasgal</media:title>
		</media:content>

		<media:content url="http://andreasgal.files.wordpress.com/2011/04/safety.jpg" medium="image">
			<media:title type="html">Don&#039;t try to lift more than you are able</media:title>
		</media:content>
	</item>
		<item>
		<title>Compartments</title>
		<link>http://andreasgal.com/2010/10/13/compartments/</link>
		<comments>http://andreasgal.com/2010/10/13/compartments/#comments</comments>
		<pubDate>Thu, 14 Oct 2010 00:26:40 +0000</pubDate>
		<dc:creator>Andreas</dc:creator>
				<category><![CDATA[Research]]></category>

		<guid isPermaLink="false">http://andreasgal.wordpress.com/?p=130</guid>
		<description><![CDATA[Heap We have implemented a major change to the way Firefox manages JavaScript objects. JavaScript objects include script-instantiated objects such as Arrays or Date objects, but also include JavaScript representations of Document Object Model (DOM) elements, such as input fields &#8230; <a href="http://andreasgal.com/2010/10/13/compartments/">Continue reading <span class="meta-nav">&#8594;</span></a><img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=andreasgal.com&amp;blog=891661&amp;post=130&amp;subd=andreasgal&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<h1>Heap</h1>
<p>We have implemented a major change to the way Firefox manages JavaScript objects. JavaScript objects include script-instantiated objects such as Arrays or Date objects, but also include JavaScript representations of Document Object Model (<strong>DOM</strong>) elements, such as input fields or <em>DIV</em> elements. In the past, Firefox held all JavaScript objects in a single JavaScript heap. This heap is occasionally garbage collected, which means the browser walks the entire object graph in the heap and determines which objects are still reachable and which are not. Unreachable objects are de-allocated and space is reclaimed.</p>
<p><a href="http://andreasgal.files.wordpress.com/2010/10/figure1.png"><img class="alignnone size-full wp-image-132" title="Firefox 3.6 single heap model." src="http://andreasgal.files.wordpress.com/2010/10/mochup1.png?w=640" alt="Firefox 3.6 single heap model."   /></a></p>
<p>Having all JavaScript objects in the browser congregate in a single heap is suboptimal for a number of reasons. If a user has multiple windows (or tabs) open, and one of these windows (or tabs) created a lot of objects, it is likely that many of these objects are no longer reachable (<em>garbage</em>). When the browser detects such a state, it will initiate a garbage collection. Unfortunately though, since objects from different windows (or tabs) are intermixed in the heap, the browser has to walk the <strong>entire</strong> heap. If a number of idle windows are open, this can be quite wasteful, since those windows haven&#8217;t really created any garbage, so whenever a window with heavy activity triggers a garbage collection, much of the garbage collection time is spent walking unrelated parts of the global object graph.</p>
<p>In Firefox this problem is even more pronounced than in other browsers, because our UI code (also called <strong>chrome</strong> code, not to be confused with Google Chrome) is implemented in JavaScript, and there are a lot of chrome (UI) objects alive at any given moment. These UI objects tend to stick around and every time a web content window causes a garbage collection, Firefox spends a lot of time figuring out whether chrome objects are still alive instead of being able to focus on the active web content window.</p>
<p><a href="http://andreasgal.files.wordpress.com/2010/10/figure2.png"><img class="alignnone size-full wp-image-133" title="FIrefox 4 Compartmentalized JavaScript Heaps" src="http://andreasgal.files.wordpress.com/2010/10/mockup2.png?w=640" alt="FIrefox 4 Compartmentalized JavaScript Heaps"   /></a></p>
<h2>Compartments</h2>
<p>For Firefox 4 we changed the way JavaScript objects are managed. Our JavaScript engine SpiderMonkey (sometimes also called TraceMonkey and JägerMonkey, which are SpiderMonkey&#8217;s trace-compilation and baseline Just-in-Time compilers) now supports multiple JavaScript heaps, which we also call <strong>compartments</strong>. All objects that belong to a certain <strong>origin</strong> (such as &#8220;http://mail.google.com/&#8221; or &#8220;http://www.bank.com/&#8221;) are placed into a separate compartment. This has a couple very important implications.</p>
<ul>
<li><em>All objects created by a website reside within the same compartment and hence are located in the same memory region.</em> This improves <strong>cache utilization</strong> by reducing false sharing of cache lines. False sharing occurs when we are trying to operate on an object and we have to read an entire cache line of data into the CPU cache. In the old model JavaScript objects could be co-located with arbitrary other JavaScript objects from other origins. Such cross origin objects are used together very infrequently, which reduces the number of cache hits we get. In the new model most objects touched by a website are tightly packed next to each other in memory, with no cross origin objects in between.</li>
<li><em>JavaScript objects (including JavaScript functions, which are objects as well) are only allowed to touch objects in the same compartment.</em> This invariant is very useful for security purposes. The JavaScript engine enforces this requirement at a very low level. It means that a &#8220;google.com&#8221; object can never accidentally leak into an untrusted website such as &#8220;evil.com&#8221;. Only a special object type can cross compartment boundaries. We call these objects <strong>cross compartment wrappers</strong>. We track the creation of these cross compartment wrappers, and thus the JavaScript engine knows at all times what objects from a compartment are kept alive by outside references (through cross compartment wrappers). This allows us to garbage collect individual compartments, in addition to a global collection. We simply assume all objects referenced from outside the compartment to be live, and then walk the object graph inside the compartment. Objects that are found to be disconnected from the graph are discarded. With this new <strong>per-compartment garbage collection</strong> we shortcut having to walk unrelated heap areas of a window (or tab) that triggered a garbage collection.</li>
</ul>
<h1>Wrappers</h1>
<p>Wrappers are not a new concept in Firefox, or browsers in general. In the past we have already used them to regulate how windows (or tabs) pass objects to each other. In the past, when another window (or tab or iframe) tried to touch an object that belongs to a different window, we handed it a wrapper object instead. That wrapper object dynamically checks at access time whether the accessor window (also called the <strong>subject</strong>) is permitted to access the target object. If one Google Mail window is trying to access another Google Mail window, the access is permitted, because these two windows (or tabs or iframes) are <strong>same origin</strong> and hence its safe to permit this access. If an untrusted website obtains a reference to a Google Mail DOM element, we hand it the same wrapper, and if it ever tries to access the Google Mail DOM Element the wrapper will at access time deny the property access because the untrusted website &#8220;evil.com&#8221; is cross origin with &#8220;google.com&#8221;.</p>
<p><a href="http://andreasgal.files.wordpress.com/2010/10/figure3.png"><img class="alignnone size-full wp-image-134" title="Firefox 3.6 Shared Wrappers" src="http://andreasgal.files.wordpress.com/2010/10/mockup3.png?w=640" alt="Firefox 3.6 Shared Wrappers"   /></a></p>
<p>A disadvantage of the Firefox 3.6 wrapper approach (which is similar to the way other browsers utilize wrappers) was the fact that these wrappers had to be injected manually at the right places in the C++ browser implementation, and each wrapper had to do a dynamic security check at access time. With compartments we can do a lot better:</p>
<ul>
<li>Since all objects belonging to the same origin are within the same compartment, <strong>and</strong> no object from a different origin is in that compartment, we can let all objects within a compartment touch other objects in the same compartment without a wrapper in between. Keep in mind that this doesn&#8217;t just apply to windows but also to iframes. A single Google Mail session often uses dozens of iframes that all heavily exchange objects with each other. In the past we had to inject wrappers in between that kept performing dynamic security checks. This is no longer necessary, and there is an observable speedup when using iframe heavy web applications such as Google Mail.</li>
<li>Since all cross origin objects are in a different compartment, any cross origin access that needs to perform a security check can only happen through a cross compartment wrapper. Such a cross compartment wrapper always lives in a source compartment, and accesses a single destination object. When we create a cross compartment wrapper, we consult with the <strong>wrapper factory </strong>to see what kind of security policy should be applied. When &#8220;evil.com&#8221; obtains a reference to a &#8220;google.com&#8221; object, for example, we have to create a wrapper to that object in the &#8220;evil.com&#8221; compartment. When that wrapper is created the wrapper factory will tell us to apply a stringent cross origin security policy, which makes it impossible for &#8220;evil.com&#8221; to glean information from the &#8220;google.com&#8221; window. In contrast to our old wrappers, this security policy is <strong>static</strong>. Since only &#8220;evil.com&#8221; objects ever see this wrapper, and it only points to one single DOM element in the destination compartment, the policy doesn&#8217;t have to be re-checked at access time. Instead, every time &#8220;evil.com&#8221; attempts to read information from the DOM element, the access is denied without even comparing the two origins.</li>
</ul>
<p><a href="http://andreasgal.files.wordpress.com/2010/10/figure4.png"><img class="alignnone size-full wp-image-135" title="Firefox 4 Cross Compartment Wrapper" src="http://andreasgal.files.wordpress.com/2010/10/mockup4.png?w=640" alt="Firefox 4 Cross Compartment Wrapper"   /></a></p>
<h1>Brain Transplants</h1>
<p>A particularly interesting oddity of the JavaScript DOM representation is the existence of two objects for each DOM window (or tab or iframe), the <strong>inner</strong> window and the <strong>outer</strong> window. This split was implemented by web browsers a few years ago to securely deal with windows being navigated to a new URL. When such a navigation occurs, the inner window object inside the outer window is replaced with a new object, whereas the actual reference to <em>window</em> (which is the outer window) remains unchanged. If such a navigation takes the window to a new origin, we allocate the inner window in the appropriate new compartment. This of course creates now the problem that the outer window can possibly no longer directly point to the new inner window, because it is in a different compartment.</p>
<p>We solve this problem through brain transplants. Whenever an outer window navigates, we copy it into the new destination compartment. The object in the old compartment is transformed into a cross compartment wrapper that points to the newly created object in the destination compartment. So the term brain transplants is very appropriate here. We are essentially transplanting the guts of the outer window object into a new object hull in the same compartment we allocated the inner object in.</p>
<h1>Processes</h1>
<p>Some readers might wonder how compartments compare to per-tab processes as they are used by Google Chrome and Internet Explorer. Compartments are similar in many ways, but also very different. Both processes and compartments shield JavaScript objects against each other. The most important distinction is that processes offer a stronger separation enforced by the processor hardware, while compartments offer a pure software guarantee. However, on the upside compartments allow much more efficient cross compartment communication that processes code. With compartments cross origin websites can still communicate with each other with a small overhead (governed by certain cross origin access policy), while with processes cross-process JavaScript object access is either impossible or extremely expensive. In a modern browser you will likely see both forms of separation being applied. Two web sites that never have to talk to each other can live in separate processes, while cross origin websites that do want to communicate can use compartments to enhance security and performance.</p>
<h1>Future</h1>
<p>We have landed the main compartments patch and current nightly builds (and Beta 7) are running with per-origin compartment JavaScript heaps. Some of the functionality described above will not ship until Beta 8, most importantly per-compartment garbage collections. Those currently still happen for all compartments at once. The foundation we laid with the compartments work will also enable a number of future extensions. Since we now cleanly separate objects belonging to different tabs, future changes to our JavaScript engine will permit us to not only perform JavaScript garbage collection for individual compartments, but we will also be able to do so in the background on a different thread for tabs with inactive content (i.e. no event handler is firing at the moment).</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/andreasgal.wordpress.com/130/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/andreasgal.wordpress.com/130/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/andreasgal.wordpress.com/130/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/andreasgal.wordpress.com/130/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/andreasgal.wordpress.com/130/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/andreasgal.wordpress.com/130/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/andreasgal.wordpress.com/130/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/andreasgal.wordpress.com/130/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/andreasgal.wordpress.com/130/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/andreasgal.wordpress.com/130/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/andreasgal.wordpress.com/130/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/andreasgal.wordpress.com/130/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/andreasgal.wordpress.com/130/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/andreasgal.wordpress.com/130/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=andreasgal.com&amp;blog=891661&amp;post=130&amp;subd=andreasgal&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://andreasgal.com/2010/10/13/compartments/feed/</wfw:commentRss>
		<slash:comments>128</slash:comments>
	
		<media:content url="" medium="image">
			<media:title type="html">andreasgal</media:title>
		</media:content>

		<media:content url="http://andreasgal.files.wordpress.com/2010/10/mochup1.png" medium="image">
			<media:title type="html">Firefox 3.6 single heap model.</media:title>
		</media:content>

		<media:content url="http://andreasgal.files.wordpress.com/2010/10/mockup2.png" medium="image">
			<media:title type="html">FIrefox 4 Compartmentalized JavaScript Heaps</media:title>
		</media:content>

		<media:content url="http://andreasgal.files.wordpress.com/2010/10/mockup3.png" medium="image">
			<media:title type="html">Firefox 3.6 Shared Wrappers</media:title>
		</media:content>

		<media:content url="http://andreasgal.files.wordpress.com/2010/10/mockup4.png" medium="image">
			<media:title type="html">Firefox 4 Cross Compartment Wrapper</media:title>
		</media:content>
	</item>
		<item>
		<title>Narcissus/Zaphod JavaScript Research VM for Firefox 4</title>
		<link>http://andreasgal.com/2010/09/17/narcissuszaphod-javascript-vm-for-firefox-4/</link>
		<comments>http://andreasgal.com/2010/09/17/narcissuszaphod-javascript-vm-for-firefox-4/#comments</comments>
		<pubDate>Fri, 17 Sep 2010 18:58:23 +0000</pubDate>
		<dc:creator>Andreas</dc:creator>
				<category><![CDATA[Research]]></category>

		<guid isPermaLink="false">http://andreasgal.wordpress.com/?p=120</guid>
		<description><![CDATA[Our research intern Tom Austin released the first version of the Narcissus JavaScript Virtual Machine for Firefox 4. Narcissus is a JavaScript virtual machine written in JavaScript. Tom&#8217;s Firefox extension Zaphod allows using Narcissus as the default JavaScript engine in &#8230; <a href="http://andreasgal.com/2010/09/17/narcissuszaphod-javascript-vm-for-firefox-4/">Continue reading <span class="meta-nav">&#8594;</span></a><img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=andreasgal.com&amp;blog=891661&amp;post=120&amp;subd=andreasgal&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<div id="attachment_121" class="wp-caption alignleft" style="width: 310px"><a href="http://andreasgal.files.wordpress.com/2010/09/zaphod-beeblebrox-001.jpg"><img class="size-medium wp-image-121 " title="Zaphod-Beeblebrox-001" src="http://andreasgal.files.wordpress.com/2010/09/zaphod-beeblebrox-001.jpg?w=300&#038;h=180" alt="Zaphod Beeblebrox" width="300" height="180" /></a><p class="wp-caption-text">Zaphod</p></div>
<div>
<p>Our research intern Tom Austin released the first version of the Narcissus JavaScript Virtual Machine for Firefox 4. Narcissus is a JavaScript virtual machine written in <strong>JavaScript</strong>. Tom&#8217;s Firefox extension <a href="https://mozillalabs.com/zaphod/" target="_self">Zaphod</a> allows using Narcissus as the default JavaScript engine in Firefox 4. This opens up the world of JavaScript language and virtual machine research to JavaScript programmers. It is no longer necessary to modify complex C++ code to implement new prototype language features for JavaScript (i.e. modules, type annotations, etc.). Similarly, Narcissus/Zaphod can also be used to try out new JavaScript optimizations and static analysis. Since Zaphod runs directly in Firefox, such experiments are no longer limited to simple command line JavaScript shell test cases. Zaphod can run complex websites and all the JS code on them.</p>
<p>Stay tuned for future updates to Zaphod and Narcissus. We have bold plans for both. Our <a href="http://doctorjs.org/" target="_self">static analysis</a> pass for Narcissus will soon be integrated with Zaphod, as well as our new Static Single Assignment-form Narcissus AST/intermediate representation.</p>
<p><em>Want to work on cool research projects in the JavaScript/Web space? Join Mozilla Research as research intern. We are looking for highly talented PhD students for Winter 2010/2011 and Summer 2011. Contact me at <strong>gal@mozilla.com</strong>.<br />
</em></p>
</div>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/andreasgal.wordpress.com/120/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/andreasgal.wordpress.com/120/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/andreasgal.wordpress.com/120/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/andreasgal.wordpress.com/120/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/andreasgal.wordpress.com/120/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/andreasgal.wordpress.com/120/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/andreasgal.wordpress.com/120/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/andreasgal.wordpress.com/120/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/andreasgal.wordpress.com/120/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/andreasgal.wordpress.com/120/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/andreasgal.wordpress.com/120/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/andreasgal.wordpress.com/120/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/andreasgal.wordpress.com/120/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/andreasgal.wordpress.com/120/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=andreasgal.com&amp;blog=891661&amp;post=120&amp;subd=andreasgal&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://andreasgal.com/2010/09/17/narcissuszaphod-javascript-vm-for-firefox-4/feed/</wfw:commentRss>
		<slash:comments>3</slash:comments>
	
		<media:content url="" medium="image">
			<media:title type="html">andreasgal</media:title>
		</media:content>

		<media:content url="http://andreasgal.files.wordpress.com/2010/09/zaphod-beeblebrox-001.jpg?w=300" medium="image">
			<media:title type="html">Zaphod-Beeblebrox-001</media:title>
		</media:content>
	</item>
		<item>
		<title>TraceMonkey vs V8</title>
		<link>http://andreasgal.com/2008/09/03/tracemonkey-vs-v8/</link>
		<comments>http://andreasgal.com/2008/09/03/tracemonkey-vs-v8/#comments</comments>
		<pubDate>Wed, 03 Sep 2008 09:09:00 +0000</pubDate>
		<dc:creator>Andreas</dc:creator>
				<category><![CDATA[Trace Compilation]]></category>

		<guid isPermaLink="false">http://andreasgal.wordpress.com/?p=109</guid>
		<description><![CDATA[Update: I got a lot of comments on my post. I am trying to answer them as they come in, so check back after you leave a comment. Brendan Eich and Mike Shaver have posted an update on our progress &#8230; <a href="http://andreasgal.com/2008/09/03/tracemonkey-vs-v8/">Continue reading <span class="meta-nav">&#8594;</span></a><img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=andreasgal.com&amp;blog=891661&amp;post=109&amp;subd=andreasgal&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>Update: <em>I got a lot of comments on my post. I am trying to answer them as they come in, so check back after you leave a comment.</em></p>
<p><a href="http://weblogs.mozillazine.org/roadmap/" target="_self">Brendan Eich</a> and <a href="http://shaver.off.net/diary/" target="_self">Mike Shaver</a> have posted an update on our progress on TraceMonkey. There has been a lot of buzz around Google&#8217;s new Chrome browsers and its V8 JavaScript VM. Some voices have claimed that V8 is several times faster than TraceMonkey. We did some head to head comparisons and these claims don&#8217;t match our observations.</p>
<p><img class="aligncenter" src="http://weblogs.mozillazine.org/roadmap/tm-v8-sunspider-totals.png" alt="" width="624" height="530" /></p>
<p>We used Apple&#8217;s SunSpider benchmarks for our tests. Depending on the OS and machine configuration we are 1.18x to 1.28x faster than V8. Since V8 is only available for Windows, we didn&#8217;t perform any tests on MacOSX and Linux, both of which we support already. Our latest builds also work on ARM, by the way.</p>
<p>I am sure you can derive different results by tweaking the benchmarks or designing entirely new custom benchmarks alltogether, but since SunSpider has been used fairly intensively in the past two years to measure the evolution of JavaScript performance in Safari, Firefox, Opera, and IE, I think SunSpider is probably the most reliable cross-platform benchmarking tool at this point (which doesn&#8217;t say that its a particularly good one, its just the best we have right now.)</p>
<p>Talking about IE, our tests also indicate that we are about 15 times faster than IE 7, and about 5 times faster than IE 8 beta on the SunSpider aggregate scores.</p>
<p>If you want to give TraceMonkey a try, take a look at our <a href="http://ftp.mozilla.org/pub/mozilla.org/firefox/nightly/latest-trunk/" target="_self">nightly builds</a>. You can enable the JIT in the about:config <a href="http://ejohn.org/blog/tracemonkey/" target="_self">settings</a>. The nightly builds are certainly not ready yet for wide-spread use, but we have improved stability significantly since our initial preview release. Firefox with TraceMonkey enabled is now my default browser, and I am writing this post with it.</p>
<br /><img alt="" border="0" src="http://feeds.wordpress.com/1.0/categories/andreasgal.wordpress.com/109/" /> <img alt="" border="0" src="http://feeds.wordpress.com/1.0/tags/andreasgal.wordpress.com/109/" /> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/andreasgal.wordpress.com/109/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/andreasgal.wordpress.com/109/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/andreasgal.wordpress.com/109/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/andreasgal.wordpress.com/109/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/andreasgal.wordpress.com/109/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/andreasgal.wordpress.com/109/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/andreasgal.wordpress.com/109/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/andreasgal.wordpress.com/109/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/andreasgal.wordpress.com/109/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/andreasgal.wordpress.com/109/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/andreasgal.wordpress.com/109/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/andreasgal.wordpress.com/109/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/andreasgal.wordpress.com/109/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/andreasgal.wordpress.com/109/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=andreasgal.com&amp;blog=891661&amp;post=109&amp;subd=andreasgal&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://andreasgal.com/2008/09/03/tracemonkey-vs-v8/feed/</wfw:commentRss>
		<slash:comments>40</slash:comments>
	
		<media:content url="" medium="image">
			<media:title type="html">andreasgal</media:title>
		</media:content>

		<media:content url="http://weblogs.mozillazine.org/roadmap/tm-v8-sunspider-totals.png" medium="image" />
	</item>
		<item>
		<title>Tracing the Web</title>
		<link>http://andreasgal.com/2008/08/22/tracing-the-web/</link>
		<comments>http://andreasgal.com/2008/08/22/tracing-the-web/#comments</comments>
		<pubDate>Fri, 22 Aug 2008 19:57:19 +0000</pubDate>
		<dc:creator>Andreas</dc:creator>
				<category><![CDATA[Trace Compilation]]></category>

		<guid isPermaLink="false">http://andreasgal.wordpress.com/?p=69</guid>
		<description><![CDATA[We have landed! For the last two months I have been working with Mozilla on a just-in-time compiler for the JavaScript engine in Firefox, and a few hours ago this project (codenamed TraceMonkey) has landed in the main Firefox development &#8230; <a href="http://andreasgal.com/2008/08/22/tracing-the-web/">Continue reading <span class="meta-nav">&#8594;</span></a><img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=andreasgal.com&amp;blog=891661&amp;post=69&amp;subd=andreasgal&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p><strong>We have landed!<br />
</strong></p>
<p>For the last two months I have been working with Mozilla on a just-in-time compiler for the JavaScript engine in <a title="Firefox" href="http://www.mozilla.com/en-US/firefox/" target="_blank">Firefox</a>, and a few hours ago this project (codenamed TraceMonkey) has <a title="TraceMonkey has landed in mozilla-central." href="http://hg.mozilla.org/index.cgi/mozilla-central/rev/74a9a3453bd9" target="_blank">landed</a> in the main Firefox development tree.</p>
<p>TraceMonkey is a <a href="http://base.google.com/base_media?q=hand1017890191470242229&amp;size=8" target="_blank">trace-based</a> JIT compiler and it pushes the envelope on JavaScript performance. On average, we speed up Apple&#8217;s popular <a href="http://webkit.org/perf/sunspider-0.9/sunspider.html">SunSpider</a> benchmarks by factor 4.6 over the last release of Firefox. The overall runtime of SunSpider improved by about 1.83x (parts of SunSpider excercise things like the regular expression engine which is outside of the scope of JIT compilation, hence the lesser overall speedup). For the SunSpider ubench suite, which focuses on core JavaScript language features, we achieve a speedup of 22x. Whichever metric you chose to apply, Firefox now has the fastest JavaScript engine in the world.</p>
<div id="attachment_76" class="wp-caption alignright" style="width: 624px"><img class="size-full wp-image-76" src="http://andreasgal.files.wordpress.com/2008/08/tracingmonkey0014.jpg?w=640" alt="TraceMonkey Performance relative to Firefox 3.0"   /><p class="wp-caption-text">TraceMonkey Performance relative to Firefox 3.0</p></div>
<p><a href="http://blog.mozilla.com/schrep/2008/08/22/what-can-you-do-when-your-browser-is-7-times-faster/" target="_blank">Mike Schroepfer</a> put together a <a href="http://people.mozilla.com/~schrep/tm-image-adjustment.swf" target="_blank">demo</a> showing the real-world performance impact of TraceMonkey. You should also check out <a href="http://weblogs.mozillazine.org/roadmap/" target="_blank">Brendan Eich&#8217;s</a> and <a href="http://shaver.off.net/diary/" target="_blank">Mike Shaver&#8217;s blog</a> post about TraceMonkey, as well as <a href="http://www.bailopan.net/blog/?p=84" target="_blank">David Anderson&#8217;s updates</a> on our 64-bit x86 port (yes, we do 64-bit!).</p>
<p><strong>Dynamic Compilation with Traces </strong></p>
<p>Traditional just-in-time compilers (like Sun&#8217;s Hotspot VM) are in their design and structure very similar to static compilers (like GCC). They observe which methods get executed frequently, and translate the method into native machine code once a certaint threshold has been reached. While such methods often contain performance-critical parts (such as loops), they often also contain slow paths and non-loopy code, which barely if at all contributes to the runtime of the method. A whole-method compiler, however, has to always analyze and translate the entire method, even if parts of it are not particularly &#8220;compilation-worthy&#8221;.</p>
<p>Trace-based compilation takes a very different approach. We monitor the interpretation of bytecode instruction by the virtual machine and scan for frequently taken backwards branches, which are an indicator for loops in the underlying program. Once we identify such a loop start point, we follow the interpreter as it executes the program and record the sequence of bytecode instructions that get executed along the way. Since we start at a loop header, the interpreter will eventually return to this entry point once it completed an iteration through the loop. The resulting linear sequence of instructions is what we call a trace.</p>
<p>Traces represent a single iteration through a loop, and can span multiple methods and program modules. If a function is invoked from inside a loop, we follow the function call and inline the instructions executed inside the called method. Function calls themselves are never actually recorded. We merely verify at runtime that the same conditions that caused that function to be activated still hold.</p>
<p><strong>Trace Trees and Nested Trace Trees</strong></p>
<p>TraceMonkey uses a particular flavor of trace-based compilation which I described in my dissertation: <a href="http://base.google.com/base_media?q=hand1017890191470242229&amp;size=8" target="_blank">Trace Trees</a>. Loops often consist of more than a single performance-relevant path, and Trace Trees allow to capture all of these and organize them in a tree-shaped data structure which can be compiled trace-by-trace yet produces a globally optimized result for the entire loop. To deal with nested loop, these trees can also be nested inside of each other, with outer loop trees <em>calling</em> the inner loop tree.</p>
<div id="attachment_83" class="wp-caption alignright" style="width: 178px"><img class="size-medium wp-image-83" src="http://andreasgal.files.wordpress.com/2008/08/cfg.png?w=168&#038;h=240" alt="" width="168" height="240" /><p class="wp-caption-text">Control-Flow Graph representation of a loop with a nested condition.</p></div>
<div id="attachment_84" class="wp-caption alignright" style="width: 245px"><img class="size-medium wp-image-84" src="http://andreasgal.files.wordpress.com/2008/08/tree.png?w=235&#038;h=300" alt="" width="235" height="300" /><p class="wp-caption-text">Trace Tree for the code shown int he Control-Flow Graph. Traces are recorded starting at the loop header (A) and connect back to A after completing an iteration.</p></div>
<p>A particular advantage of Trace Trees is the fact that they always represent a loop and thus enter function frame and leave function frame operations are always balanced <em>as long we stay on trace</em>. Thus, we can actually completely optimize away the overhead of function calls. As long we stay on trace (which in case of a loop we usually do for many iterations), we don&#8217;t construct and destroy function frames. Instead, we simply execute the inlined trace we recorded. Function frames for inlined calls are only constructed should we detect that we have to leave the trace (for example because we reached the end of the loop).</p>
<p><strong>Type Specialization</strong></p>
<p>Trace Trees by their very nature are the result of a control-flow speculation. We speculate that loops tend to execute the same sequence of instructions over and over, which is usually true for many applications. In TraceMonkey we go a step further and also speculate on types.</p>
<p>JavaScript in contrast to Java or C/C++ is a dynamically typed language. Variables are declared by name only, and their type will be determined automatically once a value is assigned to them. Assigning values with different types to a variable changes the type of the variable on the fly to match the new value&#8217;s type. Executing such dynamically typed code has been traditionally fairly expensive. Type specialization eliminates much of this overhead.</p>
<p><a href="http://shaver.off.net/diary/" target="_self">Mike Shaver</a> ran some benchmarks, comparing the performance of simple loops written in JavaScript and C. Our JIT generates code that is roughly equivalent to the performance of unoptimized C code (gcc -O0). We achieve this through aggressive type speculation. Whenever we see a program assign only integers to a variable, for example, we specialize the generated machine code to hold that variable in an integer machine register. Guards in the traces ensure that the type doesn&#8217;t unexpectedly change, in which case we leave the trace and let the interpreter handle this (unexpected and often infrequent) case.</p>
<p>Type specialization removes much of the principal overhead associated with dynamically typed languages, and as we further improve our JIT we expect to get fairly close to the performance of statically typed languages such as Java or C.</p>
<p><strong>Traces Everywhere</strong></p>
<p>Our work on TraceMonkey was done in close collaboration with Adobe&#8217;s <a href="http://www.mozilla.org/projects/tamarin/" target="_blank">Tamarin Tracing</a> project. In fact, TraceMonkey and Tamarin Tracing share the same core tracing backend (nanojit), which was contributed by Adobe. Adobe has been criticized in the last few month for the slow performance of Tamarin Tracing on untyped JavaScript code. However, Tamarin Tracing is first and foremost a JIT compiler for ActionScript, a <em>typed</em> JavaScript dialect. While Tamarin Tracing does run untyped code, its not particularly optimized (yet) for this task.</p>
<p>TraceMonkey shows the full potential of Adobe&#8217;s nanojit backend when combined with a VM that was specifically designed and optimized for untyped JavaScript code (SpiderMonkey), and we expect much of our work to make its way into nanojit and Tamarin Tracing.</p>
<p><strong>People</strong></p>
<p>TraceMonkey was a tremendous group effort of a large group of extremely talented people. Much of the recent advances in the area of nested trees, aggressive type speculation and runtime type inference are based on work done by graduate students at our research group at UC Irvine (Michael Bebenita, Mason Chang, Marcelo Contra, Gregor Wagner and others). Our research was generously funded by a grant from the National Science Foundation (Principal Investigator <a href="http://www.ics.uci.edu/~franz/" target="_blank">Professor Michael Franz</a>, Program Director <a href="http://www.nsf.gov/staff/staff_bio.jsp?lan=hgill&amp;org=NSF" target="_blank">Dr. Helen Gill</a>) as well as grants and donations from Microsoft, Sun Microsystems, Intel, and last but not least Mozilla.</p>
<p>For me, it has been an amazing opportunity to spend the last two month here at Mozilla, turning our research ideas into actual product code. Its hard to describe what it feels like to work alongside people like <a href="http://weblogs.mozillazine.org/roadmap/" target="_blank">Brendan Eich</a>, the inventor of JavaScript, or <a href="http://shaver.off.net/diary/" target="_blank">Mike Shaver,</a> Mozilla&#8217;s new VP Engineering and life-long JavaScript VM veteran. And even interns around here are rocket scientists. David Anderson, one of Mozilla&#8217;s interns, wrote a complete 64-bit backend for us over the summer, making TraceMonkey the first JavaScript JIT capable of targeting x86-64.</p>
<p>TraceMonkey was developed in close collaboration with Edwin Smith and his Tamarin Tracing team at Adobe, and the web at large owes Adobe a great deal of gratitude for open-sourcing the Tamarin and Tamarin Tracing VMs, allowing Mozilla to build TraceMonkey on top of Tamarin Tracing&#8217;s nanojit backend. nanojit is a small and highly efficient trace-based just-in-time compiler backend that is language agnostic and highly portable, and I think it has a bright future. It has just landed in Firefox, and hopefully we will see it pop up in a future release of Adobe&#8217;s Flash Player soon.</p>
<p><strong>The Road Ahead</strong></p>
<p>Landing in the central Firefox repository was a big step for us, but there is also definitively a lot of work ahead of us. We are now at the point where we trace a lot of code in benchmarks and on the web, but there is a lot more coverage we will add over time.</p>
<p>Also, we are far away from having exhausted all the potential of trace compilation and we plan to add many features and optimizations over the next few month. Our current speedups are just the beginning of whats possible:</p>
<ul>
<li>Improve register allocation and code generation in nanojit.</li>
<li>Runtime analysis of builtins (machine code) to reduce spill overhead of builtin calls (Gregor Wagner from UCI did some work on this recently.)</li>
<li>Bring performance of the ARM backend up to par with x86 and x86-64 backends and add a PowerPC backend (joint work with Adobe).</li>
<li>Add tree-recompilation and parallel compilation (based on our prior work on <a href="www.ics.uci.edu/~franz/Site/pubs-pdf/ICS-TR-07-12.pdf" target="_blank">Parallel Dynamic Compilation</a>, Mohammad Haghighat from Intel has been looking into this for nanojit).</li>
<li>Add more advanced trace optimization techniques like <a href="http://andreasgal.com/2008/02/28/tree-folding/" target="_blank">Tree Folding</a>, <a href="http://andreasgal.com/2007/11/08/more-precise-load-propagation/" target="_blank">Load Propagation</a> and <a href="http://andreasgal.com/2007/09/20/improvements-to-the-singleton-analysis-pass/" target="_blank">Escape Analysis</a>.</li>
</ul>
<p>Our goal is to eventually close the performance gap between JavaScript and traditional desktop languages, and we believe that for many applications this will be possible.</p>
<p>In parallel to our work with Mozilla on JavaScript performance, we also have a number of exciting tracing-related projects going on at UC Irvine. <a href="http://www.masonchang.com" target="_blank">Mason Chang</a>, one of our graduate students, is currently working with Adobe on the Tamarin Tracing VM, adding context threading and trace visualisation. <a href="http://michael.bebenita.com/">Michael Bebenita</a> from UCI is currently interning with Sun Microssystems and has been making great progress integrating our Java trace compiler into Maxine, and we plan on switching to Maxine as our main research platform for Java compilation. Alexander Yermolovich (also UC Irvine) is working with Adobe this summer on an exciting project involving fast execution of rich dynamic content that Adobe will hopefully announce to the public soon.</p>
<p>If you are interested in their work, check out their blogs (linked from my website). For further reading material on traces and trace compilation you can also take a log at my earlier <a href="http://wordpress.com/tag/trace-compilation/" target="_blank">blog posts</a> on this topic.</p>
<p><strong>Update:</strong> Mason Chang did some <a href="http://www.masonchang.com/2008/08/tracemonkey-vs-squirrelfish.html" target="_blank">benchmarks</a> comparing TraceMonkey to Apple&#8217;s WebKit/SquirrelFish VM. Looks like we are on average 2.5x faster than SquirrelFish (about 15% faster on total runtime).</p>
<p><em>I am looking for a tenure-track faculty position for Fall 2009 to continue my research on virtual machines, dynamic compilation and type-safe languages.</em></p>
<br /><img alt="" border="0" src="http://feeds.wordpress.com/1.0/categories/andreasgal.wordpress.com/69/" /> <img alt="" border="0" src="http://feeds.wordpress.com/1.0/tags/andreasgal.wordpress.com/69/" /> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/andreasgal.wordpress.com/69/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/andreasgal.wordpress.com/69/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/andreasgal.wordpress.com/69/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/andreasgal.wordpress.com/69/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/andreasgal.wordpress.com/69/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/andreasgal.wordpress.com/69/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/andreasgal.wordpress.com/69/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/andreasgal.wordpress.com/69/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/andreasgal.wordpress.com/69/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/andreasgal.wordpress.com/69/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/andreasgal.wordpress.com/69/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/andreasgal.wordpress.com/69/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/andreasgal.wordpress.com/69/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/andreasgal.wordpress.com/69/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=andreasgal.com&amp;blog=891661&amp;post=69&amp;subd=andreasgal&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://andreasgal.com/2008/08/22/tracing-the-web/feed/</wfw:commentRss>
		<slash:comments>50</slash:comments>
	
		<media:content url="" medium="image">
			<media:title type="html">andreasgal</media:title>
		</media:content>

		<media:content url="http://andreasgal.files.wordpress.com/2008/08/tracingmonkey0014.jpg" medium="image">
			<media:title type="html">TraceMonkey Performance relative to Firefox 3.0</media:title>
		</media:content>

		<media:content url="http://andreasgal.files.wordpress.com/2008/08/cfg.png?w=210" medium="image" />

		<media:content url="http://andreasgal.files.wordpress.com/2008/08/tree.png?w=235" medium="image" />
	</item>
		<item>
		<title>Trace-Trees FAQ</title>
		<link>http://andreasgal.com/2008/06/02/trace-trees-faq/</link>
		<comments>http://andreasgal.com/2008/06/02/trace-trees-faq/#comments</comments>
		<pubDate>Mon, 02 Jun 2008 09:01:26 +0000</pubDate>
		<dc:creator>Andreas</dc:creator>
				<category><![CDATA[Trace Compilation]]></category>

		<guid isPermaLink="false">http://andreasgal.wordpress.com/?p=67</guid>
		<description><![CDATA[Dave Roberts sent me a couple of questions about trace trees after he saw our work mentioned on Steve Yegge&#8217;s blog. I figured my answers might be interesting to more people than just Dave.  Most of your papers on trace-trees &#8230; <a href="http://andreasgal.com/2008/06/02/trace-trees-faq/">Continue reading <span class="meta-nav">&#8594;</span></a><img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=andreasgal.com&amp;blog=891661&amp;post=67&amp;subd=andreasgal&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>Dave Roberts sent me a couple of questions about trace trees after he saw our work mentioned on <a href="http://steve-yegge.blogspot.com/2008/05/dynamic-languages-strike-back.html">Steve Yegge&#8217;s blog</a>. I figured my answers might be interesting to more people than just Dave. </p>
<blockquote><p>Most of your papers on trace-trees just describe the behavior of the technique with respect to a single trace tree. That is, as described, you basically find the first inner loop in the program and then trace and compile that, extending it as you find other paths that branch from it. That&#8217;s fine, but how does the system behave with respect to large programs that have many such loops? I&#8217;m assuming that you&#8217;re compiling loops in many methods across a large such program. Are you saving the trace results across all that activity? In other words, if you find a hot loop in method A, then when you finally exit that method and later find a hot loop in method B, do you throw away the work you did for method A and recreate it later, or are you building up bits of compiled code throughout the long-term program run? I assume the latter, but didn&#8217;t really know.</p></blockquote>
<p>Our code initially runs through an interpreter in a bytecode format. In principle, each bytecode can be the anchor for a trace tree. The code is interpreted until a particular potential anchor becomes &#8220;hot&#8221; enough to host a tree. At that point we will record a trace and execute it and then subsequently try to extend the tree whenever we side-exit from it. We only grow the tree with traces that connect back to the same loop header the tree is anchored at, either through a direct path through the loop, or some path going through some outer loop. This is not always possible, i.e. if 2 loops are nested inside a loop, at which point we have to generate nested trees where an outer tree calls the inner trees (since we can&#8217;t easily form a path through the inner and outer loop at the same time, we would get stuck looping in the other inner loop and the trace would get very long). We use various abort conditions to restrict the maximum size of a trace we want to attach to a tree. With an unlimited trace length the entire program would eventually attach to each tree we start, which is counter-intuitive. We want each tree to represent one hot code region.</p>
<blockquote><p>Assuming you&#8217;re building up bits of code long-term, are there any issues reentering the compiled code from the interpreter when you next execute method A? The papers always describe entering the compiled code as an act that happens right after you record the trace and compile it, but they don&#8217;t really talk about the issues of reentering the same code later. How is this done.</p></blockquote>
<p>Yes, we compile the trace (or tree) and then re-enter it every time the interpreter runs across its anchor point. In our language (JVML) the bytecode is statically typed in that at each point in the program (so for each bytecode instruction) all variables (local variable slots and stack slots) have one unique type. The recorded and compiled trace is compiled with that fixed type distribution and knows how to pull the values from the interpreter stack and local variable frame. Constant values are detected by the optimized and directly embedded in the trace instead of reading them from the interpreter frame. One could even speculate on certain values. Once you see a boolean value in the local variable frame being true for N iterations we could just re-compile the tree assuming that vlaue is always true, and then insert a guard that ensures that this specialized tree is only executed if that slot really contains a boolean true value.</p>
<blockquote>
<div><span class="921420701-22052008"><span style="font-family:Arial;font-size:x-small;">What about the case where method A contains a loop and calls method B in the loop. Method B also has a loop inside it. Perhaps like the following, in pseudo-Java code:</span></span></div>
<div><span class="921420701-22052008"></span> </div>
<div><span class="921420701-22052008"><span style="font-family:Arial;font-size:x-small;">public int methodA(int a) {</span></span></div>
<div><span class="921420701-22052008">    <span style="font-family:Arial;font-size:x-small;">// complex way of calculating a^3</span></span></div>
<div><span class="921420701-22052008">    <span style="font-family:Arial;font-size:x-small;">sum = 0;</span></span></div>
<div><span class="921420701-22052008">    <span style="font-family:Arial;font-size:x-small;">for (i = 0; i &lt; a; i++) {</span></span></div>
<div><span class="921420701-22052008">        <span style="font-family:Arial;font-size:x-small;">sum += methodB(a);</span></span></div>
<div><span class="921420701-22052008">    <span style="font-family:Arial;font-size:x-small;">}</span></span></div>
<div><span class="921420701-22052008">    <span style="font-family:Arial;font-size:x-small;">return sum;</span></span></div>
<div><span class="921420701-22052008"><span style="font-family:Arial;font-size:x-small;">}</span></span></div>
<div><span class="921420701-22052008"></span> </div>
<div><span class="921420701-22052008"><span style="font-family:Arial;font-size:x-small;">public int methodB(int b) {</span></span></div>
<div><span class="921420701-22052008">    <span style="font-family:Arial;font-size:x-small;">// complex way of calculating b^2</span></span></div>
<div><span class="921420701-22052008">    <span style="font-family:Arial;font-size:x-small;">sum = 0;</span></span></div>
<div><span class="921420701-22052008">    <span style="font-family:Arial;font-size:x-small;">for (i = 0; i &lt; b; i++) {</span></span></div>
<div><span class="921420701-22052008">        <span style="font-family:Arial;font-size:x-small;">sum += b;</span></span></div>
<div><span class="921420701-22052008">    <span style="font-family:Arial;font-size:x-small;">}</span></span></div>
<div><span class="921420701-22052008">    <span style="font-family:Arial;font-size:x-small;">return sum;</span></span></div>
<div><span class="921420701-22052008"><span style="font-family:Arial;font-size:x-small;">}</span></span></div>
<div><span class="921420701-22052008"></span> </div>
<div><span class="921420701-22052008"><span style="font-family:Arial;font-size:x-small;">You would expect the system to detect the loop in B first and compile that. When B gets called again from A, you would expect the interpreter to re-enter the compiled code.</span></span></div>
<div><span class="921420701-22052008"></span> </div>
<div><span class="921420701-22052008"><span style="font-family:Arial;font-size:x-small;">At some point, however, the system will detect the loop in A and then trace and compile that. When that happens, the trace starting in A would inline B, right? And while it&#8217;s tracing through the inlined B, does it just ignore the fact that there is already a compiled trace for the loop in B, unrolling it because it doesn&#8217;t return to the loop head in A? If the trace gets too long, because the loop in B might be much larger than in A, then the trace aborts. Is there a way to make the trace starting in A recognize that it has reached a spot where there is already an old trace in B, and the right behavior might be to somehow incorporate that previous trace instead of completely unrolling the loop in B.</span></span></div>
</blockquote>
<div>You hit the nail on the head. Thats exactly what we do :) We call this &#8220;nested trace trees&#8221; and its Michael Bebenita&#8217;s brainchild. In my original dissertation work I only traced through and compiled the inner loop. The rest of the code was interpreted. As long the inner loop is a lot hotter than the outer code calling it, this still gives a decent speedup. But in certain cases this of course fails. Michael extended this approach as follows. The inner loop is usually hotter and will trigger a tree being recorded for the inner loop. Eventually the outer loop triggers a tree to be recorded starting at its own header. We follow the trace inside the invoked method and then detect that we reached a point where we already have a tree (the inner tree). Instead of following the inner tree (which we as you pointed out wouldn&#8217;t be able to record without excessive unrolling), we call it (literally call it, like a method call). There are actually two ways to do this call. Either we compile the outer tree and the inner tree together, teaching the inner tree to directly read the values from the registers and spill locations the outer tree holds its context values (we call this welding), or by spilling all values the inner tree needs from the outer tree onto the stack and then using a more generic invocation mechanism. The latter allows the machine code generated for the inner tree to be reused (saving code space), while the former approach is faster. The nested trace tree construct permits a number of optimizations to be communicated between trees, i.e. whether values that a tree gets handed it from an outer tree escape the tree, allowing global analysis and optimization.</div>
<blockquote>
<div><span class="921420701-22052008"><span style="font-family:Arial;font-size:x-small;">Otherwise it seems like:</span></span></div>
<div><span class="921420701-22052008"><span style="font-family:Arial;font-size:x-small;">  a. you could waste a lot of time trying to keep tracing the loop starting in A and have B blow out the length of your trace buffer. Since tracing is slower than simply interpreting, this would be a net loss in speed.</span></span></div>
<div><span class="921420701-22052008"><span style="font-family:Arial;font-size:x-small;">  b. if you try to unroll another loop fully, even if it doesn&#8217;t result in your trace buffer length being exceeded, it&#8217;s a good way to get very long traces, but the compiled speed of those traces may not be much faster than calling the compiled code in B anyway.</span></span></div>
</blockquote>
<div>You are correct. Long traces and excessive &#8220;outerlining&#8221; (inlining of outer loop parts) rarely pay off, mostly because the outer loop parts are less hot than the inner paths, but now they compete for the same register resources as the inner paths. </div>
<blockquote>
<div><span class="921420701-22052008"><span style="font-family:Arial;font-size:x-small;">  c. it would then seem that loops that occur higher up in the call tree would get pretty large generally, which would bloat things up overall. either that or they wouldn&#8217;t get compiled at all because the traces would all be too long, which means you&#8217;d spend a lot more time doing interpreting.</span></span></div>
</blockquote>
<div>Yes. We are currently playing with the parameters and never outerlining at all and only nesting trees seems to be mostly almost as fast as outerlining.</div>
<blockquote>
<div>After how many iterations of a loop do you start tracing? You probably don&#8217;t want to do it after 1 loop, but you probably don&#8217;t want to wait until 50 or 100 either. Are we talking small, single-digit numbers here, or 10 or 20 times through the loop?</div>
</blockquote>
<div>We use 2-3 digit numbers to start a tree. The Tamarin Tracing team is using even smaller numbers (low 2 digits). Its basically a function of how much overhead compilation incurs vs interpretation. Tamarin&#8217;s interpreter is really slow (being worked on intensively though), so they try to compile as early as possible.</div>
<blockquote>
<div>
<div><span class="921420701-22052008"><span style="font-family:Arial;font-size:x-small;">You talked about tree folding in this recent blog post. Have you guys written anything about that, or is it too new? It would be interesting to understand the complexity of trying to fold the trees back. One of the nice things about the original trace tree algorithm was that it was relatively simple in concept: just trace a tree and then run a simplified form of SSA over it to compile it.</span></span></div>
<div><span class="921420701-22052008"><span style="font-family:Arial;font-size:x-small;"><a href="http://andreasgal.com/2008/02/28/tree-folding/">http://andreasgal.com/2008/02/28/tree-folding/</a></span></span></div>
</div>
</blockquote>
<div>A paper on folding is planned for CGO, and we plan on submitting a paper on nested trace trees to PLDI. We were spectacularly unsuccessful selling our trace-compilation work at either venue in the past though, so we will publish the papers in parallel as a technical report. Just check the tech report section of my publications shortly after the respective deadlines. We will also have a submission for VEE. There are a lot of conferences coming up over the summer, and we have a lot of unpublished research piled up.</div>
<blockquote>
<div>Does tree folding complicate your SSA analysis considerably?</div>
</blockquote>
<div>No, its a pre-pass that happens right after a trace was added to a tree. Its the only destructive/tree modifying optimization. It starts with the old tree state and the new trace and it produces a tree that merges traces as much as possible. That new tree than replaces the old tree. The representation is largely unchanged and the folding implementation doesn&#8217;t touch any of the backend code. The biggest issue with folding is that we have to run (side-exit) along most paths of a deeply branchy code area until everything has been folded, so we get quite a few compilation runs. The nasty 3D Cube example from sunspider (JavaScript benchmark) requires some 63 compiler runs for a fairly compact source code loop with nested if-statements inside. Our compiler is very fast though, so this might be tolerable.</div>
<blockquote>
<div>About 8 years ago, we looked at using Insignia&#8217;s GeodeVM in a commercial embedded project I was working on. Their VM was really quite fast. I remember them saying that they would try to identify hot pieces of code and would compile those to native code, but that they would do that on a sub-method basis. I think you mentioned Geode in one of the papers as related work. Do you know what they do versus your trace-tree technique?</div>
</blockquote>
<div>I know about Insignia&#8217;s work only from marketing material and through third party gossip. From what I understand, Insignia uses a bytecode to native code compiler to compile all of the bytecode to native code and then compresses the entire compilation result using gzip. The code is fast to execute, but is at the same time pretty compact since its stored in a compressed format. In other words its a Java VM for embedded systems similarly to my first implementation of a JVM trace compiler, but otherwise largely unrelated as far as the actual approach is concerned. If anyone from Insignia wants to correct me, please go ahead :)</div>
<br /><img alt="" border="0" src="http://feeds.wordpress.com/1.0/categories/andreasgal.wordpress.com/67/" /> <img alt="" border="0" src="http://feeds.wordpress.com/1.0/tags/andreasgal.wordpress.com/67/" /> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/andreasgal.wordpress.com/67/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/andreasgal.wordpress.com/67/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/andreasgal.wordpress.com/67/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/andreasgal.wordpress.com/67/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/andreasgal.wordpress.com/67/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/andreasgal.wordpress.com/67/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/andreasgal.wordpress.com/67/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/andreasgal.wordpress.com/67/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/andreasgal.wordpress.com/67/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/andreasgal.wordpress.com/67/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/andreasgal.wordpress.com/67/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/andreasgal.wordpress.com/67/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/andreasgal.wordpress.com/67/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/andreasgal.wordpress.com/67/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=andreasgal.com&amp;blog=891661&amp;post=67&amp;subd=andreasgal&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://andreasgal.com/2008/06/02/trace-trees-faq/feed/</wfw:commentRss>
		<slash:comments>8</slash:comments>
	
		<media:content url="" medium="image">
			<media:title type="html">andreasgal</media:title>
		</media:content>
	</item>
	</channel>
</rss>
